@midscene/core 0.21.4-beta-20250711063424.0 → 0.21.4-beta-20250715053831.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/es/ai-model.d.ts +36 -24
  2. package/dist/es/ai-model.js +5 -1
  3. package/dist/es/{chunk-QT5OZCDN.js → chunk-NQHZHZRA.js} +408 -224
  4. package/dist/es/chunk-NQHZHZRA.js.map +1 -0
  5. package/dist/es/{chunk-2CMOAEAS.js → chunk-NSQ46QDR.js} +3 -3
  6. package/dist/es/index.d.ts +4 -4
  7. package/dist/es/index.js +2 -2
  8. package/dist/es/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  9. package/dist/es/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  10. package/dist/es/utils.d.ts +1 -1
  11. package/dist/es/utils.js +1 -1
  12. package/dist/lib/ai-model.d.ts +36 -24
  13. package/dist/lib/ai-model.js +6 -2
  14. package/dist/lib/{chunk-QT5OZCDN.js → chunk-NQHZHZRA.js} +417 -233
  15. package/dist/lib/chunk-NQHZHZRA.js.map +1 -0
  16. package/dist/lib/{chunk-2CMOAEAS.js → chunk-NSQ46QDR.js} +3 -3
  17. package/dist/lib/index.d.ts +4 -4
  18. package/dist/lib/index.js +12 -12
  19. package/dist/lib/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  20. package/dist/lib/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  21. package/dist/lib/utils.d.ts +1 -1
  22. package/dist/lib/utils.js +2 -2
  23. package/dist/types/ai-model.d.ts +36 -24
  24. package/dist/types/index.d.ts +4 -4
  25. package/dist/types/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  26. package/dist/types/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  27. package/dist/types/utils.d.ts +1 -1
  28. package/package.json +3 -3
  29. package/dist/es/chunk-QT5OZCDN.js.map +0 -1
  30. package/dist/lib/chunk-QT5OZCDN.js.map +0 -1
  31. /package/dist/es/{chunk-2CMOAEAS.js.map → chunk-NSQ46QDR.js.map} +0 -0
  32. /package/dist/lib/{chunk-2CMOAEAS.js.map → chunk-NSQ46QDR.js.map} +0 -0
@@ -1135,13 +1135,17 @@ pageDescription:
1135
1135
 
1136
1136
  // src/ai-model/service-caller/index.ts
1137
1137
  function checkAIConfig() {
1138
- if (getAIConfig(OPENAI_API_KEY))
1138
+ const openaiKey = getAIConfig(OPENAI_API_KEY);
1139
+ const azureConfig = getAIConfig(MIDSCENE_USE_AZURE_OPENAI);
1140
+ const anthropicKey = getAIConfig(ANTHROPIC_API_KEY);
1141
+ const initConfigJson = getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1142
+ if (openaiKey)
1139
1143
  return true;
1140
- if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI))
1144
+ if (azureConfig)
1141
1145
  return true;
1142
- if (getAIConfig(ANTHROPIC_API_KEY))
1146
+ if (anthropicKey)
1143
1147
  return true;
1144
- return Boolean(getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON));
1148
+ return Boolean(initConfigJson);
1145
1149
  }
1146
1150
  var debugConfigInitialized = false;
1147
1151
  function initDebugConfig() {
@@ -1191,9 +1195,12 @@ async function createChatClient({
1191
1195
  const socksProxy = getAIConfig(MIDSCENE_OPENAI_SOCKS_PROXY);
1192
1196
  const httpProxy = getAIConfig(MIDSCENE_OPENAI_HTTP_PROXY);
1193
1197
  let proxyAgent = void 0;
1198
+ const debugProxy = getDebug2("ai:call:proxy");
1194
1199
  if (httpProxy) {
1200
+ debugProxy("using http proxy", httpProxy);
1195
1201
  proxyAgent = new HttpsProxyAgent(httpProxy);
1196
1202
  } else if (socksProxy) {
1203
+ debugProxy("using socks proxy", socksProxy);
1197
1204
  proxyAgent = new SocksProxyAgent(socksProxy);
1198
1205
  }
1199
1206
  if (getAIConfig(OPENAI_USE_AZURE)) {
@@ -1290,7 +1297,7 @@ Please check your config.`
1290
1297
  }
1291
1298
  throw new Error("Openai SDK or Anthropic SDK is not initialized");
1292
1299
  }
1293
- async function call(messages, AIActionTypeValue, responseFormat) {
1300
+ async function call(messages, AIActionTypeValue, responseFormat, options) {
1294
1301
  const { completion, style } = await createChatClient({
1295
1302
  AIActionTypeValue
1296
1303
  });
@@ -1300,91 +1307,217 @@ async function call(messages, AIActionTypeValue, responseFormat) {
1300
1307
  const debugProfileDetail = getDebug2("ai:profile:detail");
1301
1308
  const startTime = Date.now();
1302
1309
  const model = getModelName();
1310
+ const isStreaming = options?.stream && options?.onChunk;
1303
1311
  let content;
1312
+ let accumulated = "";
1304
1313
  let usage;
1305
1314
  let timeCost;
1306
1315
  const commonConfig = {
1307
1316
  temperature: vlLocateMode3() === "vlm-ui-tars" ? 0 : 0.1,
1308
- stream: false,
1317
+ stream: !!isStreaming,
1309
1318
  max_tokens: typeof maxTokens === "number" ? maxTokens : Number.parseInt(maxTokens || "2048", 10),
1310
1319
  ...vlLocateMode3() === "qwen-vl" ? {
1311
1320
  vl_high_resolution_images: true
1312
1321
  } : {}
1313
1322
  };
1314
- if (style === "openai") {
1315
- debugCall(`sending request to ${model}`);
1316
- let result;
1317
- try {
1318
- const startTime2 = Date.now();
1319
- result = await completion.create({
1320
- model,
1321
- messages,
1322
- response_format: responseFormat,
1323
- ...commonConfig
1324
- });
1325
- timeCost = Date.now() - startTime2;
1326
- } catch (e) {
1327
- const newError = new Error(
1328
- `failed to call AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,
1329
- {
1330
- cause: e
1331
- }
1323
+ try {
1324
+ if (style === "openai") {
1325
+ debugCall(
1326
+ `sending ${isStreaming ? "streaming " : ""}request to ${model}`
1332
1327
  );
1333
- throw newError;
1334
- }
1335
- debugProfileStats(
1336
- `model, ${model}, mode, ${vlLocateMode3() || "default"}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${result.usage?.prompt_tokens || ""}, completion-tokens, ${result.usage?.completion_tokens || ""}, total-tokens, ${result.usage?.total_tokens || ""}, cost-ms, ${Date.now() - startTime}, requestId, ${result._request_id || ""}`
1337
- );
1338
- debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
1339
- assert3(
1340
- result.choices,
1341
- `invalid response from LLM service: ${JSON.stringify(result)}`
1342
- );
1343
- content = result.choices[0].message.content;
1344
- debugCall(`response: ${content}`);
1345
- assert3(content, "empty content");
1346
- usage = result.usage;
1347
- } else if (style === "anthropic") {
1348
- const convertImageContent = (content2) => {
1349
- if (content2.type === "image_url") {
1350
- const imgBase64 = content2.image_url.url;
1351
- assert3(imgBase64, "image_url is required");
1352
- return {
1353
- source: {
1354
- type: "base64",
1355
- media_type: imgBase64.includes("data:image/png;base64,") ? "image/png" : "image/jpeg",
1356
- data: imgBase64.split(",")[1]
1328
+ if (isStreaming) {
1329
+ const stream = await completion.create(
1330
+ {
1331
+ model,
1332
+ messages,
1333
+ response_format: responseFormat,
1334
+ ...commonConfig
1357
1335
  },
1358
- type: "image"
1359
- };
1336
+ {
1337
+ stream: true
1338
+ }
1339
+ );
1340
+ for await (const chunk of stream) {
1341
+ const content2 = chunk.choices?.[0]?.delta?.content || "";
1342
+ const reasoning_content = chunk.choices?.[0]?.delta?.reasoning_content || "";
1343
+ if (chunk.usage) {
1344
+ usage = chunk.usage;
1345
+ }
1346
+ if (content2 || reasoning_content) {
1347
+ accumulated += content2;
1348
+ const chunkData = {
1349
+ content: content2,
1350
+ reasoning_content,
1351
+ accumulated,
1352
+ isComplete: false,
1353
+ usage: void 0
1354
+ };
1355
+ options.onChunk(chunkData);
1356
+ }
1357
+ if (chunk.choices?.[0]?.finish_reason) {
1358
+ timeCost = Date.now() - startTime;
1359
+ if (!usage) {
1360
+ const estimatedTokens = Math.max(
1361
+ 1,
1362
+ Math.floor(accumulated.length / 4)
1363
+ );
1364
+ usage = {
1365
+ prompt_tokens: estimatedTokens,
1366
+ completion_tokens: estimatedTokens,
1367
+ total_tokens: estimatedTokens * 2
1368
+ };
1369
+ }
1370
+ const finalChunk = {
1371
+ content: "",
1372
+ accumulated,
1373
+ reasoning_content: "",
1374
+ isComplete: true,
1375
+ usage: {
1376
+ prompt_tokens: usage.prompt_tokens ?? 0,
1377
+ completion_tokens: usage.completion_tokens ?? 0,
1378
+ total_tokens: usage.total_tokens ?? 0,
1379
+ time_cost: timeCost ?? 0
1380
+ }
1381
+ };
1382
+ options.onChunk(finalChunk);
1383
+ break;
1384
+ }
1385
+ }
1386
+ content = accumulated;
1387
+ debugProfileStats(
1388
+ `streaming model, ${model}, mode, ${vlLocateMode3() || "default"}, cost-ms, ${timeCost}`
1389
+ );
1390
+ } else {
1391
+ const result = await completion.create({
1392
+ model,
1393
+ messages,
1394
+ response_format: responseFormat,
1395
+ ...commonConfig
1396
+ });
1397
+ timeCost = Date.now() - startTime;
1398
+ debugProfileStats(
1399
+ `model, ${model}, mode, ${vlLocateMode3() || "default"}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${result.usage?.prompt_tokens || ""}, completion-tokens, ${result.usage?.completion_tokens || ""}, total-tokens, ${result.usage?.total_tokens || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
1400
+ );
1401
+ debugProfileDetail(
1402
+ `model usage detail: ${JSON.stringify(result.usage)}`
1403
+ );
1404
+ assert3(
1405
+ result.choices,
1406
+ `invalid response from LLM service: ${JSON.stringify(result)}`
1407
+ );
1408
+ content = result.choices[0].message.content;
1409
+ usage = result.usage;
1360
1410
  }
1361
- return content2;
1411
+ debugCall(`response: ${content}`);
1412
+ assert3(content, "empty content");
1413
+ } else if (style === "anthropic") {
1414
+ const convertImageContent = (content2) => {
1415
+ if (content2.type === "image_url") {
1416
+ const imgBase64 = content2.image_url.url;
1417
+ assert3(imgBase64, "image_url is required");
1418
+ return {
1419
+ source: {
1420
+ type: "base64",
1421
+ media_type: imgBase64.includes("data:image/png;base64,") ? "image/png" : "image/jpeg",
1422
+ data: imgBase64.split(",")[1]
1423
+ },
1424
+ type: "image"
1425
+ };
1426
+ }
1427
+ return content2;
1428
+ };
1429
+ if (isStreaming) {
1430
+ const stream = await completion.create({
1431
+ model,
1432
+ system: "You are a versatile professional in software UI automation",
1433
+ messages: messages.map((m) => ({
1434
+ role: "user",
1435
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1436
+ })),
1437
+ response_format: responseFormat,
1438
+ ...commonConfig
1439
+ });
1440
+ for await (const chunk of stream) {
1441
+ const content2 = chunk.delta?.text || "";
1442
+ if (content2) {
1443
+ accumulated += content2;
1444
+ const chunkData = {
1445
+ content: content2,
1446
+ accumulated,
1447
+ reasoning_content: "",
1448
+ isComplete: false,
1449
+ usage: void 0
1450
+ };
1451
+ options.onChunk(chunkData);
1452
+ }
1453
+ if (chunk.type === "message_stop") {
1454
+ timeCost = Date.now() - startTime;
1455
+ const anthropicUsage = chunk.usage;
1456
+ const finalChunk = {
1457
+ content: "",
1458
+ accumulated,
1459
+ reasoning_content: "",
1460
+ isComplete: true,
1461
+ usage: anthropicUsage ? {
1462
+ prompt_tokens: anthropicUsage.input_tokens ?? 0,
1463
+ completion_tokens: anthropicUsage.output_tokens ?? 0,
1464
+ total_tokens: (anthropicUsage.input_tokens ?? 0) + (anthropicUsage.output_tokens ?? 0),
1465
+ time_cost: timeCost ?? 0
1466
+ } : void 0
1467
+ };
1468
+ options.onChunk(finalChunk);
1469
+ break;
1470
+ }
1471
+ }
1472
+ content = accumulated;
1473
+ } else {
1474
+ const result = await completion.create({
1475
+ model,
1476
+ system: "You are a versatile professional in software UI automation",
1477
+ messages: messages.map((m) => ({
1478
+ role: "user",
1479
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1480
+ })),
1481
+ response_format: responseFormat,
1482
+ ...commonConfig
1483
+ });
1484
+ timeCost = Date.now() - startTime;
1485
+ content = result.content[0].text;
1486
+ usage = result.usage;
1487
+ }
1488
+ assert3(content, "empty content");
1489
+ }
1490
+ if (isStreaming && !usage) {
1491
+ const estimatedTokens = Math.max(
1492
+ 1,
1493
+ Math.floor((content || "").length / 4)
1494
+ );
1495
+ usage = {
1496
+ prompt_tokens: estimatedTokens,
1497
+ completion_tokens: estimatedTokens,
1498
+ total_tokens: estimatedTokens * 2
1499
+ };
1500
+ }
1501
+ return {
1502
+ content: content || "",
1503
+ usage: usage ? {
1504
+ prompt_tokens: usage.prompt_tokens ?? 0,
1505
+ completion_tokens: usage.completion_tokens ?? 0,
1506
+ total_tokens: usage.total_tokens ?? 0,
1507
+ time_cost: timeCost ?? 0
1508
+ } : void 0,
1509
+ isStreamed: !!isStreaming
1362
1510
  };
1363
- const startTime2 = Date.now();
1364
- const result = await completion.create({
1365
- model,
1366
- system: "You are a versatile professional in software UI automation",
1367
- messages: messages.map((m) => ({
1368
- role: "user",
1369
- content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1370
- })),
1371
- response_format: responseFormat,
1372
- ...commonConfig
1373
- });
1374
- timeCost = Date.now() - startTime2;
1375
- content = result.content[0].text;
1376
- assert3(content, "empty content");
1377
- usage = result.usage;
1511
+ } catch (e) {
1512
+ console.error(" call AI error", e);
1513
+ const newError = new Error(
1514
+ `failed to call ${isStreaming ? "streaming " : ""}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,
1515
+ {
1516
+ cause: e
1517
+ }
1518
+ );
1519
+ throw newError;
1378
1520
  }
1379
- return {
1380
- content: content || "",
1381
- usage: {
1382
- prompt_tokens: usage?.prompt_tokens ?? 0,
1383
- completion_tokens: usage?.completion_tokens ?? 0,
1384
- total_tokens: usage?.total_tokens ?? 0,
1385
- time_cost: timeCost ?? 0
1386
- }
1387
- };
1388
1521
  }
1389
1522
  async function callToGetJSONObject(messages, AIActionTypeValue) {
1390
1523
  let responseFormat;
@@ -1464,6 +1597,9 @@ function safeParseJson(input) {
1464
1597
 
1465
1598
  // src/ai-model/prompt/playwright-generator.ts
1466
1599
  import { PLAYWRIGHT_EXAMPLE_CODE } from "@midscene/shared/constants";
1600
+
1601
+ // src/ai-model/prompt/yaml-generator.ts
1602
+ import { YAML_EXAMPLE_CODE } from "@midscene/shared/constants";
1467
1603
  var getScreenshotsForLLM = (events, maxScreenshots = 1) => {
1468
1604
  const eventsWithScreenshots = events.filter(
1469
1605
  (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
@@ -1529,7 +1665,6 @@ var prepareEventSummary = (events, options = {}) => {
1529
1665
  const filteredEvents = filterEventsByType(events);
1530
1666
  const eventCounts = createEventCounts(filteredEvents, events.length);
1531
1667
  const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1532
- const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1533
1668
  const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1534
1669
  const inputDescriptions = extractInputDescriptions(
1535
1670
  filteredEvents.inputEvents
@@ -1540,7 +1675,6 @@ var prepareEventSummary = (events, options = {}) => {
1540
1675
  testName: options.testName || "Automated test from recorded events",
1541
1676
  startUrl,
1542
1677
  eventCounts,
1543
- pageTitles,
1544
1678
  urls,
1545
1679
  clickDescriptions,
1546
1680
  inputDescriptions,
@@ -1575,6 +1709,141 @@ var validateEvents = (events) => {
1575
1709
  throw new Error("No events provided for test generation");
1576
1710
  }
1577
1711
  };
1712
+ var generateYamlTest = async (events, options = {}) => {
1713
+ try {
1714
+ validateEvents(events);
1715
+ const summary = prepareEventSummary(events, {
1716
+ testName: options.testName,
1717
+ maxScreenshots: options.maxScreenshots || 3
1718
+ });
1719
+ const yamlSummary = {
1720
+ ...summary,
1721
+ includeTimestamps: options.includeTimestamps || false
1722
+ };
1723
+ const screenshots = getScreenshotsForLLM(
1724
+ events,
1725
+ options.maxScreenshots || 3
1726
+ );
1727
+ const prompt = [
1728
+ {
1729
+ role: "system",
1730
+ content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`
1731
+ },
1732
+ {
1733
+ role: "user",
1734
+ content: `Generate YAML test for Midscene.js automation from recorded browser events.
1735
+
1736
+ Event Summary:
1737
+ ${JSON.stringify(yamlSummary, null, 2)}
1738
+
1739
+ Convert events:
1740
+ - navigation → target.url
1741
+ - click → aiTap with element description
1742
+ - input → aiInput with value and locate
1743
+ - scroll → aiScroll with appropriate direction
1744
+ - Add aiAssert for important state changes
1745
+
1746
+ Respond with YAML only, no explanations.`
1747
+ }
1748
+ ];
1749
+ if (screenshots.length > 0) {
1750
+ prompt.push({
1751
+ role: "user",
1752
+ content: "Here are screenshots from the recording session to help you understand the context:"
1753
+ });
1754
+ prompt.push({
1755
+ role: "user",
1756
+ content: screenshots.map((screenshot) => ({
1757
+ type: "image_url",
1758
+ image_url: {
1759
+ url: screenshot
1760
+ }
1761
+ }))
1762
+ });
1763
+ }
1764
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1765
+ if (response?.content && typeof response.content === "string") {
1766
+ return response.content;
1767
+ }
1768
+ throw new Error("Failed to generate YAML test configuration");
1769
+ } catch (error) {
1770
+ throw new Error(`Failed to generate YAML test: ${error}`);
1771
+ }
1772
+ };
1773
+ var generateYamlTestStream = async (events, options = {}) => {
1774
+ try {
1775
+ validateEvents(events);
1776
+ const summary = prepareEventSummary(events, {
1777
+ testName: options.testName,
1778
+ maxScreenshots: options.maxScreenshots || 3
1779
+ });
1780
+ const yamlSummary = {
1781
+ ...summary,
1782
+ includeTimestamps: options.includeTimestamps || false
1783
+ };
1784
+ const screenshots = getScreenshotsForLLM(
1785
+ events,
1786
+ options.maxScreenshots || 3
1787
+ );
1788
+ const prompt = [
1789
+ {
1790
+ role: "system",
1791
+ content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`
1792
+ },
1793
+ {
1794
+ role: "user",
1795
+ content: `Generate YAML test for Midscene.js automation from recorded browser events.
1796
+
1797
+ Event Summary:
1798
+ ${JSON.stringify(yamlSummary, null, 2)}
1799
+
1800
+ Convert events:
1801
+ - navigation → target.url
1802
+ - click → aiTap with element description
1803
+ - input → aiInput with value and locate
1804
+ - scroll → aiScroll with appropriate direction
1805
+ - Add aiAssert for important state changes
1806
+
1807
+ Respond with YAML only, no explanations.`
1808
+ }
1809
+ ];
1810
+ if (screenshots.length > 0) {
1811
+ prompt.push({
1812
+ role: "user",
1813
+ content: "Here are screenshots from the recording session to help you understand the context:"
1814
+ });
1815
+ prompt.push({
1816
+ role: "user",
1817
+ content: screenshots.map((screenshot) => ({
1818
+ type: "image_url",
1819
+ image_url: {
1820
+ url: screenshot
1821
+ }
1822
+ }))
1823
+ });
1824
+ }
1825
+ if (options.stream && options.onChunk) {
1826
+ return await call(prompt, 2 /* EXTRACT_DATA */, void 0, {
1827
+ stream: true,
1828
+ onChunk: options.onChunk
1829
+ });
1830
+ } else {
1831
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1832
+ if (response?.content && typeof response.content === "string") {
1833
+ return {
1834
+ content: response.content,
1835
+ usage: response.usage,
1836
+ isStreamed: false
1837
+ };
1838
+ }
1839
+ throw new Error("Failed to generate YAML test configuration");
1840
+ }
1841
+ } catch (error) {
1842
+ throw new Error(`Failed to generate YAML test: ${error}`);
1843
+ }
1844
+ };
1845
+
1846
+ // src/ai-model/prompt/playwright-generator.ts
1578
1847
  var generatePlaywrightTest = async (events, options = {}) => {
1579
1848
  validateEvents(events);
1580
1849
  const summary = prepareEventSummary(events, {
@@ -1628,163 +1897,76 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
1628
1897
  }
1629
1898
  throw new Error("Failed to generate Playwright test code");
1630
1899
  };
1631
-
1632
- // src/ai-model/prompt/yaml-generator.ts
1633
- import { YAML_EXAMPLE_CODE } from "@midscene/shared/constants";
1634
- var getScreenshotsForLLM2 = (events, maxScreenshots = 1) => {
1635
- const eventsWithScreenshots = events.filter(
1636
- (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
1637
- );
1638
- const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {
1639
- if (a.type === "navigation" && b.type !== "navigation")
1640
- return -1;
1641
- if (a.type !== "navigation" && b.type === "navigation")
1642
- return 1;
1643
- if (a.type === "click" && b.type !== "click")
1644
- return -1;
1645
- if (a.type !== "click" && b.type === "click")
1646
- return 1;
1647
- return 0;
1900
+ var generatePlaywrightTestStream = async (events, options = {}) => {
1901
+ validateEvents(events);
1902
+ const summary = prepareEventSummary(events, {
1903
+ testName: options.testName,
1904
+ maxScreenshots: options.maxScreenshots || 3
1648
1905
  });
1649
- const screenshots = [];
1650
- for (const event of sortedEvents) {
1651
- const screenshot = event.screenshotWithBox || event.screenshotAfter || event.screenshotBefore;
1652
- if (screenshot && !screenshots.includes(screenshot)) {
1653
- screenshots.push(screenshot);
1654
- if (screenshots.length >= maxScreenshots)
1655
- break;
1656
- }
1657
- }
1658
- return screenshots;
1659
- };
1660
- var filterEventsByType2 = (events) => {
1661
- return {
1662
- navigationEvents: events.filter((event) => event.type === "navigation"),
1663
- clickEvents: events.filter((event) => event.type === "click"),
1664
- inputEvents: events.filter((event) => event.type === "input"),
1665
- scrollEvents: events.filter((event) => event.type === "scroll")
1666
- };
1667
- };
1668
- var createEventCounts2 = (filteredEvents, totalEvents) => {
1669
- return {
1670
- navigation: filteredEvents.navigationEvents.length,
1671
- click: filteredEvents.clickEvents.length,
1672
- input: filteredEvents.inputEvents.length,
1673
- scroll: filteredEvents.scrollEvents.length,
1674
- total: totalEvents
1675
- };
1676
- };
1677
- var extractInputDescriptions2 = (inputEvents) => {
1678
- return inputEvents.map((event) => ({
1679
- description: event.elementDescription || "",
1680
- value: event.value || ""
1681
- })).filter((item) => item.description && item.value);
1682
- };
1683
- var processEventsForLLM2 = (events) => {
1684
- return events.map((event) => ({
1685
- type: event.type,
1686
- timestamp: event.timestamp,
1687
- url: event.url,
1688
- title: event.title,
1689
- elementDescription: event.elementDescription,
1690
- value: event.value,
1691
- pageInfo: event.pageInfo,
1692
- elementRect: event.elementRect
1693
- }));
1694
- };
1695
- var prepareEventSummary2 = (events, options = {}) => {
1696
- const filteredEvents = filterEventsByType2(events);
1697
- const eventCounts = createEventCounts2(filteredEvents, events.length);
1698
- const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1699
- const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1700
- const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1701
- const inputDescriptions = extractInputDescriptions2(
1702
- filteredEvents.inputEvents
1703
- ).slice(0, 10);
1704
- const urls = filteredEvents.navigationEvents.map((e) => e.url).filter((url) => Boolean(url)).slice(0, 5);
1705
- const processedEvents = processEventsForLLM2(events);
1706
- return {
1707
- testName: options.testName || "Automated test from recorded events",
1708
- startUrl,
1709
- eventCounts,
1710
- pageTitles,
1711
- urls,
1712
- clickDescriptions,
1713
- inputDescriptions,
1714
- events: processedEvents
1906
+ const playwrightSummary = {
1907
+ ...summary,
1908
+ waitForNetworkIdle: options.waitForNetworkIdle !== false,
1909
+ waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2e3,
1910
+ viewportSize: options.viewportSize || { width: 1280, height: 800 }
1715
1911
  };
1716
- };
1717
- var validateEvents2 = (events) => {
1718
- if (!events.length) {
1719
- throw new Error("No events provided for test generation");
1720
- }
1721
- };
1722
- var generateYamlTest = async (events, options = {}) => {
1723
- try {
1724
- validateEvents2(events);
1725
- const summary = prepareEventSummary2(events, {
1726
- testName: options.testName,
1727
- maxScreenshots: options.maxScreenshots || 3
1728
- });
1729
- const yamlSummary = {
1730
- ...summary,
1731
- includeTimestamps: options.includeTimestamps || false
1732
- };
1733
- const screenshots = getScreenshotsForLLM2(
1734
- events,
1735
- options.maxScreenshots || 3
1736
- );
1737
- const prompt = [
1738
- {
1739
- role: "system",
1740
- content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`
1741
- },
1742
- {
1743
- role: "user",
1744
- content: `Generate YAML test for Midscene.js automation from recorded browser events.
1912
+ const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);
1913
+ const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.
1745
1914
 
1746
1915
  Event Summary:
1747
- ${JSON.stringify(yamlSummary, null, 2)}
1916
+ ${JSON.stringify(playwrightSummary, null, 2)}
1748
1917
 
1749
- Convert events:
1750
- - navigation target.url
1751
- - click aiTap with element description
1752
- - input aiInput with value and locate
1753
- - scroll aiScroll with appropriate direction
1754
- - Add aiAssert for important state changes
1918
+ Generated code should:
1919
+ 1. Import required dependencies
1920
+ 2. Set up the test with proper configuration
1921
+ 3. Include a beforeEach hook to navigate to the starting URL
1922
+ 4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)
1923
+ 5. Include appropriate assertions and validations
1924
+ 6. Follow best practices for Playwright tests
1925
+ 7. Be ready to execute without further modification
1926
+ 8. can't wrap this test code in markdown code block
1755
1927
 
1756
- Respond with YAML only, no explanations.`
1757
- }
1758
- ];
1759
- if (screenshots.length > 0) {
1760
- prompt.push({
1761
- role: "user",
1762
- content: "Here are screenshots from the recording session to help you understand the context:"
1763
- });
1764
- prompt.push({
1765
- role: "user",
1766
- content: screenshots.map((screenshot) => ({
1767
- type: "image_url",
1768
- image_url: {
1769
- url: screenshot
1770
- }
1771
- }))
1772
- });
1928
+ Respond ONLY with the complete Playwright test code, no explanations.`;
1929
+ const messageContent = createMessageContent(
1930
+ promptText,
1931
+ screenshots,
1932
+ options.includeScreenshots !== false
1933
+ );
1934
+ const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene.
1935
+ Your task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.
1936
+
1937
+ ${PLAYWRIGHT_EXAMPLE_CODE}`;
1938
+ const prompt = [
1939
+ {
1940
+ role: "system",
1941
+ content: systemPrompt
1942
+ },
1943
+ {
1944
+ role: "user",
1945
+ content: messageContent
1773
1946
  }
1947
+ ];
1948
+ if (options.stream && options.onChunk) {
1949
+ return await call(prompt, 2 /* EXTRACT_DATA */, void 0, {
1950
+ stream: true,
1951
+ onChunk: options.onChunk
1952
+ });
1953
+ } else {
1774
1954
  const response = await call(prompt, 2 /* EXTRACT_DATA */);
1775
1955
  if (response?.content && typeof response.content === "string") {
1776
- return response.content;
1956
+ return {
1957
+ content: response.content,
1958
+ usage: response.usage,
1959
+ isStreamed: false
1960
+ };
1777
1961
  }
1778
- throw new Error("Failed to generate YAML test configuration");
1779
- } catch (error) {
1780
- throw new Error(`Failed to generate YAML test: ${error}`);
1962
+ throw new Error("Failed to generate Playwright test code");
1781
1963
  }
1782
1964
  };
1783
1965
 
1784
1966
  // src/ai-model/inspect.ts
1785
1967
  import {
1786
- MIDSCENE_USE_QWEN_VL as MIDSCENE_USE_QWEN_VL2,
1787
- MIDSCENE_USE_VLM_UI_TARS as MIDSCENE_USE_VLM_UI_TARS2,
1968
+ MIDSCENE_USE_QWEN_VL,
1969
+ MIDSCENE_USE_VLM_UI_TARS,
1788
1970
  getAIConfigInBoolean as getAIConfigInBoolean2,
1789
1971
  vlLocateMode as vlLocateMode4
1790
1972
  } from "@midscene/shared/env";
@@ -1980,7 +2162,7 @@ async function AiLocateElement(options) {
1980
2162
  referenceImagePayload = await cropByRect(
1981
2163
  options.referenceImage.base64,
1982
2164
  options.referenceImage.rect,
1983
- getAIConfigInBoolean2(MIDSCENE_USE_QWEN_VL2)
2165
+ getAIConfigInBoolean2(MIDSCENE_USE_QWEN_VL)
1984
2166
  );
1985
2167
  }
1986
2168
  const msgs = [
@@ -2106,7 +2288,7 @@ async function AiLocateSection(options) {
2106
2288
  imageBase64 = await cropByRect(
2107
2289
  screenshotBase64,
2108
2290
  sectionRect,
2109
- getAIConfigInBoolean2(MIDSCENE_USE_QWEN_VL2)
2291
+ getAIConfigInBoolean2(MIDSCENE_USE_QWEN_VL)
2110
2292
  );
2111
2293
  }
2112
2294
  return {
@@ -2167,7 +2349,7 @@ async function AiAssert(options) {
2167
2349
  assert4(assertion, "assertion should be a string");
2168
2350
  const { screenshotBase64 } = context;
2169
2351
  const systemPrompt = systemPromptToAssert({
2170
- isUITars: getAIConfigInBoolean2(MIDSCENE_USE_VLM_UI_TARS2)
2352
+ isUITars: getAIConfigInBoolean2(MIDSCENE_USE_VLM_UI_TARS)
2171
2353
  });
2172
2354
  const msgs = [
2173
2355
  { role: "system", content: systemPrompt },
@@ -2567,8 +2749,10 @@ export {
2567
2749
  callAiFn,
2568
2750
  adaptBboxToRect,
2569
2751
  expandSearchArea,
2570
- generatePlaywrightTest,
2571
2752
  generateYamlTest,
2753
+ generateYamlTestStream,
2754
+ generatePlaywrightTest,
2755
+ generatePlaywrightTestStream,
2572
2756
  AiLocateElement,
2573
2757
  AiLocateSection,
2574
2758
  AiExtractElementInfo,
@@ -2578,4 +2762,4 @@ export {
2578
2762
  resizeImageForUiTars
2579
2763
  };
2580
2764
 
2581
- //# sourceMappingURL=chunk-QT5OZCDN.js.map
2765
+ //# sourceMappingURL=chunk-NQHZHZRA.js.map