@midscene/core 0.21.4-beta-20250711063424.0 → 0.21.4-beta-20250715053831.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/es/ai-model.d.ts +36 -24
  2. package/dist/es/ai-model.js +5 -1
  3. package/dist/es/{chunk-QT5OZCDN.js → chunk-NQHZHZRA.js} +408 -224
  4. package/dist/es/chunk-NQHZHZRA.js.map +1 -0
  5. package/dist/es/{chunk-2CMOAEAS.js → chunk-NSQ46QDR.js} +3 -3
  6. package/dist/es/index.d.ts +4 -4
  7. package/dist/es/index.js +2 -2
  8. package/dist/es/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  9. package/dist/es/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  10. package/dist/es/utils.d.ts +1 -1
  11. package/dist/es/utils.js +1 -1
  12. package/dist/lib/ai-model.d.ts +36 -24
  13. package/dist/lib/ai-model.js +6 -2
  14. package/dist/lib/{chunk-QT5OZCDN.js → chunk-NQHZHZRA.js} +417 -233
  15. package/dist/lib/chunk-NQHZHZRA.js.map +1 -0
  16. package/dist/lib/{chunk-2CMOAEAS.js → chunk-NSQ46QDR.js} +3 -3
  17. package/dist/lib/index.d.ts +4 -4
  18. package/dist/lib/index.js +12 -12
  19. package/dist/lib/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  20. package/dist/lib/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  21. package/dist/lib/utils.d.ts +1 -1
  22. package/dist/lib/utils.js +2 -2
  23. package/dist/types/ai-model.d.ts +36 -24
  24. package/dist/types/index.d.ts +4 -4
  25. package/dist/types/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  26. package/dist/types/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  27. package/dist/types/utils.d.ts +1 -1
  28. package/package.json +3 -3
  29. package/dist/es/chunk-QT5OZCDN.js.map +0 -1
  30. package/dist/lib/chunk-QT5OZCDN.js.map +0 -1
  31. /package/dist/es/{chunk-2CMOAEAS.js.map → chunk-NSQ46QDR.js.map} +0 -0
  32. /package/dist/lib/{chunk-2CMOAEAS.js.map → chunk-NSQ46QDR.js.map} +0 -0
@@ -1135,13 +1135,17 @@ pageDescription:
1135
1135
 
1136
1136
  // src/ai-model/service-caller/index.ts
1137
1137
  function checkAIConfig() {
1138
- if (_env.getAIConfig.call(void 0, _env.OPENAI_API_KEY))
1138
+ const openaiKey = _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY);
1139
+ const azureConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI);
1140
+ const anthropicKey = _env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY);
1141
+ const initConfigJson = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1142
+ if (openaiKey)
1139
1143
  return true;
1140
- if (_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI))
1144
+ if (azureConfig)
1141
1145
  return true;
1142
- if (_env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY))
1146
+ if (anthropicKey)
1143
1147
  return true;
1144
- return Boolean(_env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON));
1148
+ return Boolean(initConfigJson);
1145
1149
  }
1146
1150
  var debugConfigInitialized = false;
1147
1151
  function initDebugConfig() {
@@ -1191,9 +1195,12 @@ async function createChatClient({
1191
1195
  const socksProxy = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_SOCKS_PROXY);
1192
1196
  const httpProxy = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_HTTP_PROXY);
1193
1197
  let proxyAgent = void 0;
1198
+ const debugProxy = _logger.getDebug.call(void 0, "ai:call:proxy");
1194
1199
  if (httpProxy) {
1200
+ debugProxy("using http proxy", httpProxy);
1195
1201
  proxyAgent = new (0, _httpsproxyagent.HttpsProxyAgent)(httpProxy);
1196
1202
  } else if (socksProxy) {
1203
+ debugProxy("using socks proxy", socksProxy);
1197
1204
  proxyAgent = new (0, _socksproxyagent.SocksProxyAgent)(socksProxy);
1198
1205
  }
1199
1206
  if (_env.getAIConfig.call(void 0, _env.OPENAI_USE_AZURE)) {
@@ -1290,7 +1297,7 @@ Please check your config.`
1290
1297
  }
1291
1298
  throw new Error("Openai SDK or Anthropic SDK is not initialized");
1292
1299
  }
1293
- async function call(messages, AIActionTypeValue, responseFormat) {
1300
+ async function call(messages, AIActionTypeValue, responseFormat, options) {
1294
1301
  const { completion, style } = await createChatClient({
1295
1302
  AIActionTypeValue
1296
1303
  });
@@ -1300,91 +1307,217 @@ async function call(messages, AIActionTypeValue, responseFormat) {
1300
1307
  const debugProfileDetail = _logger.getDebug.call(void 0, "ai:profile:detail");
1301
1308
  const startTime = Date.now();
1302
1309
  const model = getModelName();
1310
+ const isStreaming = _optionalChain([options, 'optionalAccess', _22 => _22.stream]) && _optionalChain([options, 'optionalAccess', _23 => _23.onChunk]);
1303
1311
  let content;
1312
+ let accumulated = "";
1304
1313
  let usage;
1305
1314
  let timeCost;
1306
1315
  const commonConfig = {
1307
1316
  temperature: _env.vlLocateMode.call(void 0, ) === "vlm-ui-tars" ? 0 : 0.1,
1308
- stream: false,
1317
+ stream: !!isStreaming,
1309
1318
  max_tokens: typeof maxTokens === "number" ? maxTokens : Number.parseInt(maxTokens || "2048", 10),
1310
1319
  ..._env.vlLocateMode.call(void 0, ) === "qwen-vl" ? {
1311
1320
  vl_high_resolution_images: true
1312
1321
  } : {}
1313
1322
  };
1314
- if (style === "openai") {
1315
- debugCall(`sending request to ${model}`);
1316
- let result;
1317
- try {
1318
- const startTime2 = Date.now();
1319
- result = await completion.create({
1320
- model,
1321
- messages,
1322
- response_format: responseFormat,
1323
- ...commonConfig
1324
- });
1325
- timeCost = Date.now() - startTime2;
1326
- } catch (e) {
1327
- const newError = new Error(
1328
- `failed to call AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,
1329
- {
1330
- cause: e
1331
- }
1323
+ try {
1324
+ if (style === "openai") {
1325
+ debugCall(
1326
+ `sending ${isStreaming ? "streaming " : ""}request to ${model}`
1332
1327
  );
1333
- throw newError;
1334
- }
1335
- debugProfileStats(
1336
- `model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _22 => _22.usage, 'optionalAccess', _23 => _23.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _24 => _24.usage, 'optionalAccess', _25 => _25.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _26 => _26.usage, 'optionalAccess', _27 => _27.total_tokens]) || ""}, cost-ms, ${Date.now() - startTime}, requestId, ${result._request_id || ""}`
1337
- );
1338
- debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
1339
- _utils.assert.call(void 0,
1340
- result.choices,
1341
- `invalid response from LLM service: ${JSON.stringify(result)}`
1342
- );
1343
- content = result.choices[0].message.content;
1344
- debugCall(`response: ${content}`);
1345
- _utils.assert.call(void 0, content, "empty content");
1346
- usage = result.usage;
1347
- } else if (style === "anthropic") {
1348
- const convertImageContent = (content2) => {
1349
- if (content2.type === "image_url") {
1350
- const imgBase64 = content2.image_url.url;
1351
- _utils.assert.call(void 0, imgBase64, "image_url is required");
1352
- return {
1353
- source: {
1354
- type: "base64",
1355
- media_type: imgBase64.includes("data:image/png;base64,") ? "image/png" : "image/jpeg",
1356
- data: imgBase64.split(",")[1]
1328
+ if (isStreaming) {
1329
+ const stream = await completion.create(
1330
+ {
1331
+ model,
1332
+ messages,
1333
+ response_format: responseFormat,
1334
+ ...commonConfig
1357
1335
  },
1358
- type: "image"
1359
- };
1336
+ {
1337
+ stream: true
1338
+ }
1339
+ );
1340
+ for await (const chunk of stream) {
1341
+ const content2 = _optionalChain([chunk, 'access', _24 => _24.choices, 'optionalAccess', _25 => _25[0], 'optionalAccess', _26 => _26.delta, 'optionalAccess', _27 => _27.content]) || "";
1342
+ const reasoning_content = _optionalChain([chunk, 'access', _28 => _28.choices, 'optionalAccess', _29 => _29[0], 'optionalAccess', _30 => _30.delta, 'optionalAccess', _31 => _31.reasoning_content]) || "";
1343
+ if (chunk.usage) {
1344
+ usage = chunk.usage;
1345
+ }
1346
+ if (content2 || reasoning_content) {
1347
+ accumulated += content2;
1348
+ const chunkData = {
1349
+ content: content2,
1350
+ reasoning_content,
1351
+ accumulated,
1352
+ isComplete: false,
1353
+ usage: void 0
1354
+ };
1355
+ options.onChunk(chunkData);
1356
+ }
1357
+ if (_optionalChain([chunk, 'access', _32 => _32.choices, 'optionalAccess', _33 => _33[0], 'optionalAccess', _34 => _34.finish_reason])) {
1358
+ timeCost = Date.now() - startTime;
1359
+ if (!usage) {
1360
+ const estimatedTokens = Math.max(
1361
+ 1,
1362
+ Math.floor(accumulated.length / 4)
1363
+ );
1364
+ usage = {
1365
+ prompt_tokens: estimatedTokens,
1366
+ completion_tokens: estimatedTokens,
1367
+ total_tokens: estimatedTokens * 2
1368
+ };
1369
+ }
1370
+ const finalChunk = {
1371
+ content: "",
1372
+ accumulated,
1373
+ reasoning_content: "",
1374
+ isComplete: true,
1375
+ usage: {
1376
+ prompt_tokens: _nullishCoalesce(usage.prompt_tokens, () => ( 0)),
1377
+ completion_tokens: _nullishCoalesce(usage.completion_tokens, () => ( 0)),
1378
+ total_tokens: _nullishCoalesce(usage.total_tokens, () => ( 0)),
1379
+ time_cost: _nullishCoalesce(timeCost, () => ( 0))
1380
+ }
1381
+ };
1382
+ options.onChunk(finalChunk);
1383
+ break;
1384
+ }
1385
+ }
1386
+ content = accumulated;
1387
+ debugProfileStats(
1388
+ `streaming model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, cost-ms, ${timeCost}`
1389
+ );
1390
+ } else {
1391
+ const result = await completion.create({
1392
+ model,
1393
+ messages,
1394
+ response_format: responseFormat,
1395
+ ...commonConfig
1396
+ });
1397
+ timeCost = Date.now() - startTime;
1398
+ debugProfileStats(
1399
+ `model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _35 => _35.usage, 'optionalAccess', _36 => _36.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _37 => _37.usage, 'optionalAccess', _38 => _38.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _39 => _39.usage, 'optionalAccess', _40 => _40.total_tokens]) || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
1400
+ );
1401
+ debugProfileDetail(
1402
+ `model usage detail: ${JSON.stringify(result.usage)}`
1403
+ );
1404
+ _utils.assert.call(void 0,
1405
+ result.choices,
1406
+ `invalid response from LLM service: ${JSON.stringify(result)}`
1407
+ );
1408
+ content = result.choices[0].message.content;
1409
+ usage = result.usage;
1360
1410
  }
1361
- return content2;
1411
+ debugCall(`response: ${content}`);
1412
+ _utils.assert.call(void 0, content, "empty content");
1413
+ } else if (style === "anthropic") {
1414
+ const convertImageContent = (content2) => {
1415
+ if (content2.type === "image_url") {
1416
+ const imgBase64 = content2.image_url.url;
1417
+ _utils.assert.call(void 0, imgBase64, "image_url is required");
1418
+ return {
1419
+ source: {
1420
+ type: "base64",
1421
+ media_type: imgBase64.includes("data:image/png;base64,") ? "image/png" : "image/jpeg",
1422
+ data: imgBase64.split(",")[1]
1423
+ },
1424
+ type: "image"
1425
+ };
1426
+ }
1427
+ return content2;
1428
+ };
1429
+ if (isStreaming) {
1430
+ const stream = await completion.create({
1431
+ model,
1432
+ system: "You are a versatile professional in software UI automation",
1433
+ messages: messages.map((m) => ({
1434
+ role: "user",
1435
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1436
+ })),
1437
+ response_format: responseFormat,
1438
+ ...commonConfig
1439
+ });
1440
+ for await (const chunk of stream) {
1441
+ const content2 = _optionalChain([chunk, 'access', _41 => _41.delta, 'optionalAccess', _42 => _42.text]) || "";
1442
+ if (content2) {
1443
+ accumulated += content2;
1444
+ const chunkData = {
1445
+ content: content2,
1446
+ accumulated,
1447
+ reasoning_content: "",
1448
+ isComplete: false,
1449
+ usage: void 0
1450
+ };
1451
+ options.onChunk(chunkData);
1452
+ }
1453
+ if (chunk.type === "message_stop") {
1454
+ timeCost = Date.now() - startTime;
1455
+ const anthropicUsage = chunk.usage;
1456
+ const finalChunk = {
1457
+ content: "",
1458
+ accumulated,
1459
+ reasoning_content: "",
1460
+ isComplete: true,
1461
+ usage: anthropicUsage ? {
1462
+ prompt_tokens: _nullishCoalesce(anthropicUsage.input_tokens, () => ( 0)),
1463
+ completion_tokens: _nullishCoalesce(anthropicUsage.output_tokens, () => ( 0)),
1464
+ total_tokens: (_nullishCoalesce(anthropicUsage.input_tokens, () => ( 0))) + (_nullishCoalesce(anthropicUsage.output_tokens, () => ( 0))),
1465
+ time_cost: _nullishCoalesce(timeCost, () => ( 0))
1466
+ } : void 0
1467
+ };
1468
+ options.onChunk(finalChunk);
1469
+ break;
1470
+ }
1471
+ }
1472
+ content = accumulated;
1473
+ } else {
1474
+ const result = await completion.create({
1475
+ model,
1476
+ system: "You are a versatile professional in software UI automation",
1477
+ messages: messages.map((m) => ({
1478
+ role: "user",
1479
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1480
+ })),
1481
+ response_format: responseFormat,
1482
+ ...commonConfig
1483
+ });
1484
+ timeCost = Date.now() - startTime;
1485
+ content = result.content[0].text;
1486
+ usage = result.usage;
1487
+ }
1488
+ _utils.assert.call(void 0, content, "empty content");
1489
+ }
1490
+ if (isStreaming && !usage) {
1491
+ const estimatedTokens = Math.max(
1492
+ 1,
1493
+ Math.floor((content || "").length / 4)
1494
+ );
1495
+ usage = {
1496
+ prompt_tokens: estimatedTokens,
1497
+ completion_tokens: estimatedTokens,
1498
+ total_tokens: estimatedTokens * 2
1499
+ };
1500
+ }
1501
+ return {
1502
+ content: content || "",
1503
+ usage: usage ? {
1504
+ prompt_tokens: _nullishCoalesce(usage.prompt_tokens, () => ( 0)),
1505
+ completion_tokens: _nullishCoalesce(usage.completion_tokens, () => ( 0)),
1506
+ total_tokens: _nullishCoalesce(usage.total_tokens, () => ( 0)),
1507
+ time_cost: _nullishCoalesce(timeCost, () => ( 0))
1508
+ } : void 0,
1509
+ isStreamed: !!isStreaming
1362
1510
  };
1363
- const startTime2 = Date.now();
1364
- const result = await completion.create({
1365
- model,
1366
- system: "You are a versatile professional in software UI automation",
1367
- messages: messages.map((m) => ({
1368
- role: "user",
1369
- content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1370
- })),
1371
- response_format: responseFormat,
1372
- ...commonConfig
1373
- });
1374
- timeCost = Date.now() - startTime2;
1375
- content = result.content[0].text;
1376
- _utils.assert.call(void 0, content, "empty content");
1377
- usage = result.usage;
1511
+ } catch (e) {
1512
+ console.error(" call AI error", e);
1513
+ const newError = new Error(
1514
+ `failed to call ${isStreaming ? "streaming " : ""}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,
1515
+ {
1516
+ cause: e
1517
+ }
1518
+ );
1519
+ throw newError;
1378
1520
  }
1379
- return {
1380
- content: content || "",
1381
- usage: {
1382
- prompt_tokens: _nullishCoalesce(_optionalChain([usage, 'optionalAccess', _28 => _28.prompt_tokens]), () => ( 0)),
1383
- completion_tokens: _nullishCoalesce(_optionalChain([usage, 'optionalAccess', _29 => _29.completion_tokens]), () => ( 0)),
1384
- total_tokens: _nullishCoalesce(_optionalChain([usage, 'optionalAccess', _30 => _30.total_tokens]), () => ( 0)),
1385
- time_cost: _nullishCoalesce(timeCost, () => ( 0))
1386
- }
1387
- };
1388
1521
  }
1389
1522
  async function callToGetJSONObject(messages, AIActionTypeValue) {
1390
1523
  let responseFormat;
@@ -1444,8 +1577,8 @@ function preprocessDoubaoBboxJson(input) {
1444
1577
  }
1445
1578
  function safeParseJson(input) {
1446
1579
  const cleanJsonString = extractJSONFromCodeBlock(input);
1447
- if (_optionalChain([cleanJsonString, 'optionalAccess', _31 => _31.match, 'call', _32 => _32(/\((\d+),(\d+)\)/)])) {
1448
- return _optionalChain([cleanJsonString, 'access', _33 => _33.match, 'call', _34 => _34(/\((\d+),(\d+)\)/), 'optionalAccess', _35 => _35.slice, 'call', _36 => _36(1), 'access', _37 => _37.map, 'call', _38 => _38(Number)]);
1580
+ if (_optionalChain([cleanJsonString, 'optionalAccess', _43 => _43.match, 'call', _44 => _44(/\((\d+),(\d+)\)/)])) {
1581
+ return _optionalChain([cleanJsonString, 'access', _45 => _45.match, 'call', _46 => _46(/\((\d+),(\d+)\)/), 'optionalAccess', _47 => _47.slice, 'call', _48 => _48(1), 'access', _49 => _49.map, 'call', _50 => _50(Number)]);
1449
1582
  }
1450
1583
  try {
1451
1584
  return JSON.parse(cleanJsonString);
@@ -1464,6 +1597,9 @@ function safeParseJson(input) {
1464
1597
 
1465
1598
  // src/ai-model/prompt/playwright-generator.ts
1466
1599
 
1600
+
1601
+ // src/ai-model/prompt/yaml-generator.ts
1602
+
1467
1603
  var getScreenshotsForLLM = (events, maxScreenshots = 1) => {
1468
1604
  const eventsWithScreenshots = events.filter(
1469
1605
  (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
@@ -1529,7 +1665,6 @@ var prepareEventSummary = (events, options = {}) => {
1529
1665
  const filteredEvents = filterEventsByType(events);
1530
1666
  const eventCounts = createEventCounts(filteredEvents, events.length);
1531
1667
  const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1532
- const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1533
1668
  const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1534
1669
  const inputDescriptions = extractInputDescriptions(
1535
1670
  filteredEvents.inputEvents
@@ -1540,7 +1675,6 @@ var prepareEventSummary = (events, options = {}) => {
1540
1675
  testName: options.testName || "Automated test from recorded events",
1541
1676
  startUrl,
1542
1677
  eventCounts,
1543
- pageTitles,
1544
1678
  urls,
1545
1679
  clickDescriptions,
1546
1680
  inputDescriptions,
@@ -1575,6 +1709,141 @@ var validateEvents = (events) => {
1575
1709
  throw new Error("No events provided for test generation");
1576
1710
  }
1577
1711
  };
1712
+ var generateYamlTest = async (events, options = {}) => {
1713
+ try {
1714
+ validateEvents(events);
1715
+ const summary = prepareEventSummary(events, {
1716
+ testName: options.testName,
1717
+ maxScreenshots: options.maxScreenshots || 3
1718
+ });
1719
+ const yamlSummary = {
1720
+ ...summary,
1721
+ includeTimestamps: options.includeTimestamps || false
1722
+ };
1723
+ const screenshots = getScreenshotsForLLM(
1724
+ events,
1725
+ options.maxScreenshots || 3
1726
+ );
1727
+ const prompt = [
1728
+ {
1729
+ role: "system",
1730
+ content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${_constants.YAML_EXAMPLE_CODE}`
1731
+ },
1732
+ {
1733
+ role: "user",
1734
+ content: `Generate YAML test for Midscene.js automation from recorded browser events.
1735
+
1736
+ Event Summary:
1737
+ ${JSON.stringify(yamlSummary, null, 2)}
1738
+
1739
+ Convert events:
1740
+ - navigation → target.url
1741
+ - click → aiTap with element description
1742
+ - input → aiInput with value and locate
1743
+ - scroll → aiScroll with appropriate direction
1744
+ - Add aiAssert for important state changes
1745
+
1746
+ Respond with YAML only, no explanations.`
1747
+ }
1748
+ ];
1749
+ if (screenshots.length > 0) {
1750
+ prompt.push({
1751
+ role: "user",
1752
+ content: "Here are screenshots from the recording session to help you understand the context:"
1753
+ });
1754
+ prompt.push({
1755
+ role: "user",
1756
+ content: screenshots.map((screenshot) => ({
1757
+ type: "image_url",
1758
+ image_url: {
1759
+ url: screenshot
1760
+ }
1761
+ }))
1762
+ });
1763
+ }
1764
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1765
+ if (_optionalChain([response, 'optionalAccess', _51 => _51.content]) && typeof response.content === "string") {
1766
+ return response.content;
1767
+ }
1768
+ throw new Error("Failed to generate YAML test configuration");
1769
+ } catch (error) {
1770
+ throw new Error(`Failed to generate YAML test: ${error}`);
1771
+ }
1772
+ };
1773
+ var generateYamlTestStream = async (events, options = {}) => {
1774
+ try {
1775
+ validateEvents(events);
1776
+ const summary = prepareEventSummary(events, {
1777
+ testName: options.testName,
1778
+ maxScreenshots: options.maxScreenshots || 3
1779
+ });
1780
+ const yamlSummary = {
1781
+ ...summary,
1782
+ includeTimestamps: options.includeTimestamps || false
1783
+ };
1784
+ const screenshots = getScreenshotsForLLM(
1785
+ events,
1786
+ options.maxScreenshots || 3
1787
+ );
1788
+ const prompt = [
1789
+ {
1790
+ role: "system",
1791
+ content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${_constants.YAML_EXAMPLE_CODE}`
1792
+ },
1793
+ {
1794
+ role: "user",
1795
+ content: `Generate YAML test for Midscene.js automation from recorded browser events.
1796
+
1797
+ Event Summary:
1798
+ ${JSON.stringify(yamlSummary, null, 2)}
1799
+
1800
+ Convert events:
1801
+ - navigation → target.url
1802
+ - click → aiTap with element description
1803
+ - input → aiInput with value and locate
1804
+ - scroll → aiScroll with appropriate direction
1805
+ - Add aiAssert for important state changes
1806
+
1807
+ Respond with YAML only, no explanations.`
1808
+ }
1809
+ ];
1810
+ if (screenshots.length > 0) {
1811
+ prompt.push({
1812
+ role: "user",
1813
+ content: "Here are screenshots from the recording session to help you understand the context:"
1814
+ });
1815
+ prompt.push({
1816
+ role: "user",
1817
+ content: screenshots.map((screenshot) => ({
1818
+ type: "image_url",
1819
+ image_url: {
1820
+ url: screenshot
1821
+ }
1822
+ }))
1823
+ });
1824
+ }
1825
+ if (options.stream && options.onChunk) {
1826
+ return await call(prompt, 2 /* EXTRACT_DATA */, void 0, {
1827
+ stream: true,
1828
+ onChunk: options.onChunk
1829
+ });
1830
+ } else {
1831
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1832
+ if (_optionalChain([response, 'optionalAccess', _52 => _52.content]) && typeof response.content === "string") {
1833
+ return {
1834
+ content: response.content,
1835
+ usage: response.usage,
1836
+ isStreamed: false
1837
+ };
1838
+ }
1839
+ throw new Error("Failed to generate YAML test configuration");
1840
+ }
1841
+ } catch (error) {
1842
+ throw new Error(`Failed to generate YAML test: ${error}`);
1843
+ }
1844
+ };
1845
+
1846
+ // src/ai-model/prompt/playwright-generator.ts
1578
1847
  var generatePlaywrightTest = async (events, options = {}) => {
1579
1848
  validateEvents(events);
1580
1849
  const summary = prepareEventSummary(events, {
@@ -1623,161 +1892,74 @@ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
1623
1892
  }
1624
1893
  ];
1625
1894
  const response = await call(prompt, 2 /* EXTRACT_DATA */);
1626
- if (_optionalChain([response, 'optionalAccess', _39 => _39.content]) && typeof response.content === "string") {
1895
+ if (_optionalChain([response, 'optionalAccess', _53 => _53.content]) && typeof response.content === "string") {
1627
1896
  return response.content;
1628
1897
  }
1629
1898
  throw new Error("Failed to generate Playwright test code");
1630
1899
  };
1631
-
1632
- // src/ai-model/prompt/yaml-generator.ts
1633
-
1634
- var getScreenshotsForLLM2 = (events, maxScreenshots = 1) => {
1635
- const eventsWithScreenshots = events.filter(
1636
- (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
1637
- );
1638
- const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {
1639
- if (a.type === "navigation" && b.type !== "navigation")
1640
- return -1;
1641
- if (a.type !== "navigation" && b.type === "navigation")
1642
- return 1;
1643
- if (a.type === "click" && b.type !== "click")
1644
- return -1;
1645
- if (a.type !== "click" && b.type === "click")
1646
- return 1;
1647
- return 0;
1900
+ var generatePlaywrightTestStream = async (events, options = {}) => {
1901
+ validateEvents(events);
1902
+ const summary = prepareEventSummary(events, {
1903
+ testName: options.testName,
1904
+ maxScreenshots: options.maxScreenshots || 3
1648
1905
  });
1649
- const screenshots = [];
1650
- for (const event of sortedEvents) {
1651
- const screenshot = event.screenshotWithBox || event.screenshotAfter || event.screenshotBefore;
1652
- if (screenshot && !screenshots.includes(screenshot)) {
1653
- screenshots.push(screenshot);
1654
- if (screenshots.length >= maxScreenshots)
1655
- break;
1656
- }
1657
- }
1658
- return screenshots;
1659
- };
1660
- var filterEventsByType2 = (events) => {
1661
- return {
1662
- navigationEvents: events.filter((event) => event.type === "navigation"),
1663
- clickEvents: events.filter((event) => event.type === "click"),
1664
- inputEvents: events.filter((event) => event.type === "input"),
1665
- scrollEvents: events.filter((event) => event.type === "scroll")
1666
- };
1667
- };
1668
- var createEventCounts2 = (filteredEvents, totalEvents) => {
1669
- return {
1670
- navigation: filteredEvents.navigationEvents.length,
1671
- click: filteredEvents.clickEvents.length,
1672
- input: filteredEvents.inputEvents.length,
1673
- scroll: filteredEvents.scrollEvents.length,
1674
- total: totalEvents
1675
- };
1676
- };
1677
- var extractInputDescriptions2 = (inputEvents) => {
1678
- return inputEvents.map((event) => ({
1679
- description: event.elementDescription || "",
1680
- value: event.value || ""
1681
- })).filter((item) => item.description && item.value);
1682
- };
1683
- var processEventsForLLM2 = (events) => {
1684
- return events.map((event) => ({
1685
- type: event.type,
1686
- timestamp: event.timestamp,
1687
- url: event.url,
1688
- title: event.title,
1689
- elementDescription: event.elementDescription,
1690
- value: event.value,
1691
- pageInfo: event.pageInfo,
1692
- elementRect: event.elementRect
1693
- }));
1694
- };
1695
- var prepareEventSummary2 = (events, options = {}) => {
1696
- const filteredEvents = filterEventsByType2(events);
1697
- const eventCounts = createEventCounts2(filteredEvents, events.length);
1698
- const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1699
- const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1700
- const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1701
- const inputDescriptions = extractInputDescriptions2(
1702
- filteredEvents.inputEvents
1703
- ).slice(0, 10);
1704
- const urls = filteredEvents.navigationEvents.map((e) => e.url).filter((url) => Boolean(url)).slice(0, 5);
1705
- const processedEvents = processEventsForLLM2(events);
1706
- return {
1707
- testName: options.testName || "Automated test from recorded events",
1708
- startUrl,
1709
- eventCounts,
1710
- pageTitles,
1711
- urls,
1712
- clickDescriptions,
1713
- inputDescriptions,
1714
- events: processedEvents
1906
+ const playwrightSummary = {
1907
+ ...summary,
1908
+ waitForNetworkIdle: options.waitForNetworkIdle !== false,
1909
+ waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2e3,
1910
+ viewportSize: options.viewportSize || { width: 1280, height: 800 }
1715
1911
  };
1716
- };
1717
- var validateEvents2 = (events) => {
1718
- if (!events.length) {
1719
- throw new Error("No events provided for test generation");
1720
- }
1721
- };
1722
- var generateYamlTest = async (events, options = {}) => {
1723
- try {
1724
- validateEvents2(events);
1725
- const summary = prepareEventSummary2(events, {
1726
- testName: options.testName,
1727
- maxScreenshots: options.maxScreenshots || 3
1728
- });
1729
- const yamlSummary = {
1730
- ...summary,
1731
- includeTimestamps: options.includeTimestamps || false
1732
- };
1733
- const screenshots = getScreenshotsForLLM2(
1734
- events,
1735
- options.maxScreenshots || 3
1736
- );
1737
- const prompt = [
1738
- {
1739
- role: "system",
1740
- content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${_constants.YAML_EXAMPLE_CODE}`
1741
- },
1742
- {
1743
- role: "user",
1744
- content: `Generate YAML test for Midscene.js automation from recorded browser events.
1912
+ const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);
1913
+ const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.
1745
1914
 
1746
1915
  Event Summary:
1747
- ${JSON.stringify(yamlSummary, null, 2)}
1916
+ ${JSON.stringify(playwrightSummary, null, 2)}
1748
1917
 
1749
- Convert events:
1750
- - navigation target.url
1751
- - click aiTap with element description
1752
- - input aiInput with value and locate
1753
- - scroll aiScroll with appropriate direction
1754
- - Add aiAssert for important state changes
1918
+ Generated code should:
1919
+ 1. Import required dependencies
1920
+ 2. Set up the test with proper configuration
1921
+ 3. Include a beforeEach hook to navigate to the starting URL
1922
+ 4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)
1923
+ 5. Include appropriate assertions and validations
1924
+ 6. Follow best practices for Playwright tests
1925
+ 7. Be ready to execute without further modification
1926
+ 8. can't wrap this test code in markdown code block
1755
1927
 
1756
- Respond with YAML only, no explanations.`
1757
- }
1758
- ];
1759
- if (screenshots.length > 0) {
1760
- prompt.push({
1761
- role: "user",
1762
- content: "Here are screenshots from the recording session to help you understand the context:"
1763
- });
1764
- prompt.push({
1765
- role: "user",
1766
- content: screenshots.map((screenshot) => ({
1767
- type: "image_url",
1768
- image_url: {
1769
- url: screenshot
1770
- }
1771
- }))
1772
- });
1928
+ Respond ONLY with the complete Playwright test code, no explanations.`;
1929
+ const messageContent = createMessageContent(
1930
+ promptText,
1931
+ screenshots,
1932
+ options.includeScreenshots !== false
1933
+ );
1934
+ const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene.
1935
+ Your task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.
1936
+
1937
+ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
1938
+ const prompt = [
1939
+ {
1940
+ role: "system",
1941
+ content: systemPrompt
1942
+ },
1943
+ {
1944
+ role: "user",
1945
+ content: messageContent
1773
1946
  }
1947
+ ];
1948
+ if (options.stream && options.onChunk) {
1949
+ return await call(prompt, 2 /* EXTRACT_DATA */, void 0, {
1950
+ stream: true,
1951
+ onChunk: options.onChunk
1952
+ });
1953
+ } else {
1774
1954
  const response = await call(prompt, 2 /* EXTRACT_DATA */);
1775
- if (_optionalChain([response, 'optionalAccess', _40 => _40.content]) && typeof response.content === "string") {
1776
- return response.content;
1955
+ if (_optionalChain([response, 'optionalAccess', _54 => _54.content]) && typeof response.content === "string") {
1956
+ return {
1957
+ content: response.content,
1958
+ usage: response.usage,
1959
+ isStreamed: false
1960
+ };
1777
1961
  }
1778
- throw new Error("Failed to generate YAML test configuration");
1779
- } catch (error) {
1780
- throw new Error(`Failed to generate YAML test: ${error}`);
1962
+ throw new Error("Failed to generate Playwright test code");
1781
1963
  }
1782
1964
  };
1783
1965
 
@@ -1976,7 +2158,7 @@ async function AiLocateElement(options) {
1976
2158
  );
1977
2159
  }
1978
2160
  let referenceImagePayload;
1979
- if (_optionalChain([options, 'access', _41 => _41.referenceImage, 'optionalAccess', _42 => _42.rect]) && options.referenceImage.base64) {
2161
+ if (_optionalChain([options, 'access', _55 => _55.referenceImage, 'optionalAccess', _56 => _56.rect]) && options.referenceImage.base64) {
1980
2162
  referenceImagePayload = await _img.cropByRect.call(void 0,
1981
2163
  options.referenceImage.base64,
1982
2164
  options.referenceImage.rect,
@@ -2012,10 +2194,10 @@ async function AiLocateElement(options) {
2012
2194
  if ("bbox" in res.content && Array.isArray(res.content.bbox)) {
2013
2195
  resRect = adaptBboxToRect(
2014
2196
  res.content.bbox,
2015
- _optionalChain([options, 'access', _43 => _43.searchConfig, 'optionalAccess', _44 => _44.rect, 'optionalAccess', _45 => _45.width]) || context.size.width,
2016
- _optionalChain([options, 'access', _46 => _46.searchConfig, 'optionalAccess', _47 => _47.rect, 'optionalAccess', _48 => _48.height]) || context.size.height,
2017
- _optionalChain([options, 'access', _49 => _49.searchConfig, 'optionalAccess', _50 => _50.rect, 'optionalAccess', _51 => _51.left]),
2018
- _optionalChain([options, 'access', _52 => _52.searchConfig, 'optionalAccess', _53 => _53.rect, 'optionalAccess', _54 => _54.top])
2197
+ _optionalChain([options, 'access', _57 => _57.searchConfig, 'optionalAccess', _58 => _58.rect, 'optionalAccess', _59 => _59.width]) || context.size.width,
2198
+ _optionalChain([options, 'access', _60 => _60.searchConfig, 'optionalAccess', _61 => _61.rect, 'optionalAccess', _62 => _62.height]) || context.size.height,
2199
+ _optionalChain([options, 'access', _63 => _63.searchConfig, 'optionalAccess', _64 => _64.rect, 'optionalAccess', _65 => _65.left]),
2200
+ _optionalChain([options, 'access', _66 => _66.searchConfig, 'optionalAccess', _67 => _67.rect, 'optionalAccess', _68 => _68.top])
2019
2201
  );
2020
2202
  debugInspect("resRect", resRect);
2021
2203
  const rectCenter = {
@@ -2034,7 +2216,7 @@ async function AiLocateElement(options) {
2034
2216
  }
2035
2217
  } catch (e) {
2036
2218
  const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : "unknown error in locate";
2037
- if (!errors || _optionalChain([errors, 'optionalAccess', _55 => _55.length]) === 0) {
2219
+ if (!errors || _optionalChain([errors, 'optionalAccess', _69 => _69.length]) === 0) {
2038
2220
  errors = [msg];
2039
2221
  } else {
2040
2222
  errors.push(`(${msg})`);
@@ -2125,14 +2307,14 @@ async function AiExtractElementInfo(options) {
2125
2307
  truncateTextLength: 200,
2126
2308
  filterNonTextContent: false,
2127
2309
  visibleOnly: false,
2128
- domIncluded: _optionalChain([extractOption, 'optionalAccess', _56 => _56.domIncluded])
2310
+ domIncluded: _optionalChain([extractOption, 'optionalAccess', _70 => _70.domIncluded])
2129
2311
  });
2130
2312
  const extractDataPromptText = await extractDataQueryPrompt(
2131
2313
  description,
2132
2314
  dataQuery
2133
2315
  );
2134
2316
  const userContent = [];
2135
- if (_optionalChain([extractOption, 'optionalAccess', _57 => _57.screenshotIncluded]) !== false) {
2317
+ if (_optionalChain([extractOption, 'optionalAccess', _71 => _71.screenshotIncluded]) !== false) {
2136
2318
  userContent.push({
2137
2319
  type: "image_url",
2138
2320
  image_url: {
@@ -2260,7 +2442,7 @@ async function plan(userInstruction, opts) {
2260
2442
  const { content, usage } = await call2(msgs, 3 /* PLAN */);
2261
2443
  const rawResponse = JSON.stringify(content, void 0, 2);
2262
2444
  const planFromAI = content;
2263
- const actions = (_optionalChain([planFromAI, 'access', _58 => _58.action, 'optionalAccess', _59 => _59.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2445
+ const actions = (_optionalChain([planFromAI, 'access', _72 => _72.action, 'optionalAccess', _73 => _73.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2264
2446
  const returnValue = {
2265
2447
  ...planFromAI,
2266
2448
  actions,
@@ -2287,7 +2469,7 @@ async function plan(userInstruction, opts) {
2287
2469
  _utils.assert.call(void 0, !planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
2288
2470
  } else {
2289
2471
  actions.forEach((action) => {
2290
- if (_optionalChain([action, 'access', _60 => _60.locate, 'optionalAccess', _61 => _61.id])) {
2472
+ if (_optionalChain([action, 'access', _74 => _74.locate, 'optionalAccess', _75 => _75.id])) {
2291
2473
  const element = elementById(action.locate.id);
2292
2474
  if (element) {
2293
2475
  action.locate.id = element.id;
@@ -2576,6 +2758,8 @@ async function resizeImageForUiTars(imageBase64, size) {
2576
2758
 
2577
2759
 
2578
2760
 
2579
- exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.call = call; exports.callToGetJSONObject = callToGetJSONObject; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generateYamlTest = generateYamlTest; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
2580
2761
 
2581
- //# sourceMappingURL=chunk-QT5OZCDN.js.map
2762
+
2763
+ exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.call = call; exports.callToGetJSONObject = callToGetJSONObject; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.generateYamlTest = generateYamlTest; exports.generateYamlTestStream = generateYamlTestStream; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generatePlaywrightTestStream = generatePlaywrightTestStream; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
2764
+
2765
+ //# sourceMappingURL=chunk-NQHZHZRA.js.map