@midscene/core 0.21.4-beta-20250711063424.0 → 0.21.4-beta-20250714025212.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/es/ai-model.d.ts +36 -24
  2. package/dist/es/ai-model.js +5 -1
  3. package/dist/es/{chunk-QT5OZCDN.js → chunk-H3KP5MGB.js} +407 -219
  4. package/dist/es/chunk-H3KP5MGB.js.map +1 -0
  5. package/dist/es/{chunk-2CMOAEAS.js → chunk-QKX52XS3.js} +3 -3
  6. package/dist/es/index.d.ts +4 -4
  7. package/dist/es/index.js +2 -2
  8. package/dist/es/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  9. package/dist/es/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  10. package/dist/es/utils.d.ts +1 -1
  11. package/dist/es/utils.js +1 -1
  12. package/dist/lib/ai-model.d.ts +36 -24
  13. package/dist/lib/ai-model.js +6 -2
  14. package/dist/lib/{chunk-QT5OZCDN.js → chunk-H3KP5MGB.js} +421 -233
  15. package/dist/lib/chunk-H3KP5MGB.js.map +1 -0
  16. package/dist/lib/{chunk-2CMOAEAS.js → chunk-QKX52XS3.js} +3 -3
  17. package/dist/lib/index.d.ts +4 -4
  18. package/dist/lib/index.js +12 -12
  19. package/dist/lib/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  20. package/dist/lib/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  21. package/dist/lib/utils.d.ts +1 -1
  22. package/dist/lib/utils.js +2 -2
  23. package/dist/types/ai-model.d.ts +36 -24
  24. package/dist/types/index.d.ts +4 -4
  25. package/dist/types/{llm-planning-fe687364.d.ts → llm-planning-b342ff86.d.ts} +1 -1
  26. package/dist/types/{types-b0b4c68e.d.ts → types-05553e39.d.ts} +32 -1
  27. package/dist/types/utils.d.ts +1 -1
  28. package/package.json +3 -3
  29. package/dist/es/chunk-QT5OZCDN.js.map +0 -1
  30. package/dist/lib/chunk-QT5OZCDN.js.map +0 -1
  31. /package/dist/es/{chunk-2CMOAEAS.js.map → chunk-QKX52XS3.js.map} +0 -0
  32. /package/dist/lib/{chunk-2CMOAEAS.js.map → chunk-QKX52XS3.js.map} +0 -0
@@ -1135,13 +1135,24 @@ pageDescription:
1135
1135
 
1136
1136
  // src/ai-model/service-caller/index.ts
1137
1137
  function checkAIConfig() {
1138
- if (_env.getAIConfig.call(void 0, _env.OPENAI_API_KEY))
1138
+ const openaiKey = _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY);
1139
+ const azureConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI);
1140
+ const anthropicKey = _env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY);
1141
+ const initConfigJson = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1142
+ console.log("AI Config Check:", {
1143
+ hasOpenAI: !!openaiKey,
1144
+ hasAzure: !!azureConfig,
1145
+ hasAnthropic: !!anthropicKey,
1146
+ hasInitConfig: !!initConfigJson,
1147
+ openaiKeyPrefix: openaiKey ? `${openaiKey.substring(0, 10)}...` : "none"
1148
+ });
1149
+ if (openaiKey)
1139
1150
  return true;
1140
- if (_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI))
1151
+ if (azureConfig)
1141
1152
  return true;
1142
- if (_env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY))
1153
+ if (anthropicKey)
1143
1154
  return true;
1144
- return Boolean(_env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON));
1155
+ return Boolean(initConfigJson);
1145
1156
  }
1146
1157
  var debugConfigInitialized = false;
1147
1158
  function initDebugConfig() {
@@ -1290,7 +1301,7 @@ Please check your config.`
1290
1301
  }
1291
1302
  throw new Error("Openai SDK or Anthropic SDK is not initialized");
1292
1303
  }
1293
- async function call(messages, AIActionTypeValue, responseFormat) {
1304
+ async function call(messages, AIActionTypeValue, responseFormat, options) {
1294
1305
  const { completion, style } = await createChatClient({
1295
1306
  AIActionTypeValue
1296
1307
  });
@@ -1300,91 +1311,217 @@ async function call(messages, AIActionTypeValue, responseFormat) {
1300
1311
  const debugProfileDetail = _logger.getDebug.call(void 0, "ai:profile:detail");
1301
1312
  const startTime = Date.now();
1302
1313
  const model = getModelName();
1314
+ const isStreaming = _optionalChain([options, 'optionalAccess', _22 => _22.stream]) && _optionalChain([options, 'optionalAccess', _23 => _23.onChunk]);
1303
1315
  let content;
1316
+ let accumulated = "";
1304
1317
  let usage;
1305
1318
  let timeCost;
1306
1319
  const commonConfig = {
1307
1320
  temperature: _env.vlLocateMode.call(void 0, ) === "vlm-ui-tars" ? 0 : 0.1,
1308
- stream: false,
1321
+ stream: !!isStreaming,
1309
1322
  max_tokens: typeof maxTokens === "number" ? maxTokens : Number.parseInt(maxTokens || "2048", 10),
1310
1323
  ..._env.vlLocateMode.call(void 0, ) === "qwen-vl" ? {
1311
1324
  vl_high_resolution_images: true
1312
1325
  } : {}
1313
1326
  };
1314
- if (style === "openai") {
1315
- debugCall(`sending request to ${model}`);
1316
- let result;
1317
- try {
1318
- const startTime2 = Date.now();
1319
- result = await completion.create({
1320
- model,
1321
- messages,
1322
- response_format: responseFormat,
1323
- ...commonConfig
1324
- });
1325
- timeCost = Date.now() - startTime2;
1326
- } catch (e) {
1327
- const newError = new Error(
1328
- `failed to call AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,
1329
- {
1330
- cause: e
1331
- }
1327
+ try {
1328
+ if (style === "openai") {
1329
+ debugCall(
1330
+ `sending ${isStreaming ? "streaming " : ""}request to ${model}`
1332
1331
  );
1333
- throw newError;
1334
- }
1335
- debugProfileStats(
1336
- `model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _22 => _22.usage, 'optionalAccess', _23 => _23.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _24 => _24.usage, 'optionalAccess', _25 => _25.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _26 => _26.usage, 'optionalAccess', _27 => _27.total_tokens]) || ""}, cost-ms, ${Date.now() - startTime}, requestId, ${result._request_id || ""}`
1337
- );
1338
- debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
1339
- _utils.assert.call(void 0,
1340
- result.choices,
1341
- `invalid response from LLM service: ${JSON.stringify(result)}`
1342
- );
1343
- content = result.choices[0].message.content;
1344
- debugCall(`response: ${content}`);
1345
- _utils.assert.call(void 0, content, "empty content");
1346
- usage = result.usage;
1347
- } else if (style === "anthropic") {
1348
- const convertImageContent = (content2) => {
1349
- if (content2.type === "image_url") {
1350
- const imgBase64 = content2.image_url.url;
1351
- _utils.assert.call(void 0, imgBase64, "image_url is required");
1352
- return {
1353
- source: {
1354
- type: "base64",
1355
- media_type: imgBase64.includes("data:image/png;base64,") ? "image/png" : "image/jpeg",
1356
- data: imgBase64.split(",")[1]
1332
+ if (isStreaming) {
1333
+ const stream = await completion.create(
1334
+ {
1335
+ model,
1336
+ messages,
1337
+ response_format: responseFormat,
1338
+ ...commonConfig
1357
1339
  },
1358
- type: "image"
1359
- };
1340
+ {
1341
+ stream: true
1342
+ }
1343
+ );
1344
+ for await (const chunk of stream) {
1345
+ const content2 = _optionalChain([chunk, 'access', _24 => _24.choices, 'optionalAccess', _25 => _25[0], 'optionalAccess', _26 => _26.delta, 'optionalAccess', _27 => _27.content]) || "";
1346
+ const reasoning_content = _optionalChain([chunk, 'access', _28 => _28.choices, 'optionalAccess', _29 => _29[0], 'optionalAccess', _30 => _30.delta, 'optionalAccess', _31 => _31.reasoning_content]) || "";
1347
+ if (chunk.usage) {
1348
+ usage = chunk.usage;
1349
+ }
1350
+ if (content2 || reasoning_content) {
1351
+ accumulated += content2;
1352
+ const chunkData = {
1353
+ content: content2,
1354
+ reasoning_content,
1355
+ accumulated,
1356
+ isComplete: false,
1357
+ usage: void 0
1358
+ };
1359
+ options.onChunk(chunkData);
1360
+ }
1361
+ if (_optionalChain([chunk, 'access', _32 => _32.choices, 'optionalAccess', _33 => _33[0], 'optionalAccess', _34 => _34.finish_reason])) {
1362
+ timeCost = Date.now() - startTime;
1363
+ if (!usage) {
1364
+ const estimatedTokens = Math.max(
1365
+ 1,
1366
+ Math.floor(accumulated.length / 4)
1367
+ );
1368
+ usage = {
1369
+ prompt_tokens: estimatedTokens,
1370
+ completion_tokens: estimatedTokens,
1371
+ total_tokens: estimatedTokens * 2
1372
+ };
1373
+ }
1374
+ const finalChunk = {
1375
+ content: "",
1376
+ accumulated,
1377
+ reasoning_content: "",
1378
+ isComplete: true,
1379
+ usage: {
1380
+ prompt_tokens: _nullishCoalesce(usage.prompt_tokens, () => ( 0)),
1381
+ completion_tokens: _nullishCoalesce(usage.completion_tokens, () => ( 0)),
1382
+ total_tokens: _nullishCoalesce(usage.total_tokens, () => ( 0)),
1383
+ time_cost: _nullishCoalesce(timeCost, () => ( 0))
1384
+ }
1385
+ };
1386
+ options.onChunk(finalChunk);
1387
+ break;
1388
+ }
1389
+ }
1390
+ content = accumulated;
1391
+ debugProfileStats(
1392
+ `streaming model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, cost-ms, ${timeCost}`
1393
+ );
1394
+ } else {
1395
+ const result = await completion.create({
1396
+ model,
1397
+ messages,
1398
+ response_format: responseFormat,
1399
+ ...commonConfig
1400
+ });
1401
+ timeCost = Date.now() - startTime;
1402
+ debugProfileStats(
1403
+ `model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _35 => _35.usage, 'optionalAccess', _36 => _36.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _37 => _37.usage, 'optionalAccess', _38 => _38.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _39 => _39.usage, 'optionalAccess', _40 => _40.total_tokens]) || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
1404
+ );
1405
+ debugProfileDetail(
1406
+ `model usage detail: ${JSON.stringify(result.usage)}`
1407
+ );
1408
+ _utils.assert.call(void 0,
1409
+ result.choices,
1410
+ `invalid response from LLM service: ${JSON.stringify(result)}`
1411
+ );
1412
+ content = result.choices[0].message.content;
1413
+ usage = result.usage;
1360
1414
  }
1361
- return content2;
1415
+ debugCall(`response: ${content}`);
1416
+ _utils.assert.call(void 0, content, "empty content");
1417
+ } else if (style === "anthropic") {
1418
+ const convertImageContent = (content2) => {
1419
+ if (content2.type === "image_url") {
1420
+ const imgBase64 = content2.image_url.url;
1421
+ _utils.assert.call(void 0, imgBase64, "image_url is required");
1422
+ return {
1423
+ source: {
1424
+ type: "base64",
1425
+ media_type: imgBase64.includes("data:image/png;base64,") ? "image/png" : "image/jpeg",
1426
+ data: imgBase64.split(",")[1]
1427
+ },
1428
+ type: "image"
1429
+ };
1430
+ }
1431
+ return content2;
1432
+ };
1433
+ if (isStreaming) {
1434
+ const stream = await completion.create({
1435
+ model,
1436
+ system: "You are a versatile professional in software UI automation",
1437
+ messages: messages.map((m) => ({
1438
+ role: "user",
1439
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1440
+ })),
1441
+ response_format: responseFormat,
1442
+ ...commonConfig
1443
+ });
1444
+ for await (const chunk of stream) {
1445
+ const content2 = _optionalChain([chunk, 'access', _41 => _41.delta, 'optionalAccess', _42 => _42.text]) || "";
1446
+ if (content2) {
1447
+ accumulated += content2;
1448
+ const chunkData = {
1449
+ content: content2,
1450
+ accumulated,
1451
+ reasoning_content: "",
1452
+ isComplete: false,
1453
+ usage: void 0
1454
+ };
1455
+ options.onChunk(chunkData);
1456
+ }
1457
+ if (chunk.type === "message_stop") {
1458
+ timeCost = Date.now() - startTime;
1459
+ const anthropicUsage = chunk.usage;
1460
+ const finalChunk = {
1461
+ content: "",
1462
+ accumulated,
1463
+ reasoning_content: "",
1464
+ isComplete: true,
1465
+ usage: anthropicUsage ? {
1466
+ prompt_tokens: _nullishCoalesce(anthropicUsage.input_tokens, () => ( 0)),
1467
+ completion_tokens: _nullishCoalesce(anthropicUsage.output_tokens, () => ( 0)),
1468
+ total_tokens: (_nullishCoalesce(anthropicUsage.input_tokens, () => ( 0))) + (_nullishCoalesce(anthropicUsage.output_tokens, () => ( 0))),
1469
+ time_cost: _nullishCoalesce(timeCost, () => ( 0))
1470
+ } : void 0
1471
+ };
1472
+ options.onChunk(finalChunk);
1473
+ break;
1474
+ }
1475
+ }
1476
+ content = accumulated;
1477
+ } else {
1478
+ const result = await completion.create({
1479
+ model,
1480
+ system: "You are a versatile professional in software UI automation",
1481
+ messages: messages.map((m) => ({
1482
+ role: "user",
1483
+ content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1484
+ })),
1485
+ response_format: responseFormat,
1486
+ ...commonConfig
1487
+ });
1488
+ timeCost = Date.now() - startTime;
1489
+ content = result.content[0].text;
1490
+ usage = result.usage;
1491
+ }
1492
+ _utils.assert.call(void 0, content, "empty content");
1493
+ }
1494
+ if (isStreaming && !usage) {
1495
+ const estimatedTokens = Math.max(
1496
+ 1,
1497
+ Math.floor((content || "").length / 4)
1498
+ );
1499
+ usage = {
1500
+ prompt_tokens: estimatedTokens,
1501
+ completion_tokens: estimatedTokens,
1502
+ total_tokens: estimatedTokens * 2
1503
+ };
1504
+ }
1505
+ return {
1506
+ content: content || "",
1507
+ usage: usage ? {
1508
+ prompt_tokens: _nullishCoalesce(usage.prompt_tokens, () => ( 0)),
1509
+ completion_tokens: _nullishCoalesce(usage.completion_tokens, () => ( 0)),
1510
+ total_tokens: _nullishCoalesce(usage.total_tokens, () => ( 0)),
1511
+ time_cost: _nullishCoalesce(timeCost, () => ( 0))
1512
+ } : void 0,
1513
+ isStreamed: !!isStreaming
1362
1514
  };
1363
- const startTime2 = Date.now();
1364
- const result = await completion.create({
1365
- model,
1366
- system: "You are a versatile professional in software UI automation",
1367
- messages: messages.map((m) => ({
1368
- role: "user",
1369
- content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
1370
- })),
1371
- response_format: responseFormat,
1372
- ...commonConfig
1373
- });
1374
- timeCost = Date.now() - startTime2;
1375
- content = result.content[0].text;
1376
- _utils.assert.call(void 0, content, "empty content");
1377
- usage = result.usage;
1515
+ } catch (e) {
1516
+ console.error(" call AI error", e);
1517
+ const newError = new Error(
1518
+ `failed to call ${isStreaming ? "streaming " : ""}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,
1519
+ {
1520
+ cause: e
1521
+ }
1522
+ );
1523
+ throw newError;
1378
1524
  }
1379
- return {
1380
- content: content || "",
1381
- usage: {
1382
- prompt_tokens: _nullishCoalesce(_optionalChain([usage, 'optionalAccess', _28 => _28.prompt_tokens]), () => ( 0)),
1383
- completion_tokens: _nullishCoalesce(_optionalChain([usage, 'optionalAccess', _29 => _29.completion_tokens]), () => ( 0)),
1384
- total_tokens: _nullishCoalesce(_optionalChain([usage, 'optionalAccess', _30 => _30.total_tokens]), () => ( 0)),
1385
- time_cost: _nullishCoalesce(timeCost, () => ( 0))
1386
- }
1387
- };
1388
1525
  }
1389
1526
  async function callToGetJSONObject(messages, AIActionTypeValue) {
1390
1527
  let responseFormat;
@@ -1444,8 +1581,8 @@ function preprocessDoubaoBboxJson(input) {
1444
1581
  }
1445
1582
  function safeParseJson(input) {
1446
1583
  const cleanJsonString = extractJSONFromCodeBlock(input);
1447
- if (_optionalChain([cleanJsonString, 'optionalAccess', _31 => _31.match, 'call', _32 => _32(/\((\d+),(\d+)\)/)])) {
1448
- return _optionalChain([cleanJsonString, 'access', _33 => _33.match, 'call', _34 => _34(/\((\d+),(\d+)\)/), 'optionalAccess', _35 => _35.slice, 'call', _36 => _36(1), 'access', _37 => _37.map, 'call', _38 => _38(Number)]);
1584
+ if (_optionalChain([cleanJsonString, 'optionalAccess', _43 => _43.match, 'call', _44 => _44(/\((\d+),(\d+)\)/)])) {
1585
+ return _optionalChain([cleanJsonString, 'access', _45 => _45.match, 'call', _46 => _46(/\((\d+),(\d+)\)/), 'optionalAccess', _47 => _47.slice, 'call', _48 => _48(1), 'access', _49 => _49.map, 'call', _50 => _50(Number)]);
1449
1586
  }
1450
1587
  try {
1451
1588
  return JSON.parse(cleanJsonString);
@@ -1464,6 +1601,9 @@ function safeParseJson(input) {
1464
1601
 
1465
1602
  // src/ai-model/prompt/playwright-generator.ts
1466
1603
 
1604
+
1605
+ // src/ai-model/prompt/yaml-generator.ts
1606
+
1467
1607
  var getScreenshotsForLLM = (events, maxScreenshots = 1) => {
1468
1608
  const eventsWithScreenshots = events.filter(
1469
1609
  (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
@@ -1529,7 +1669,6 @@ var prepareEventSummary = (events, options = {}) => {
1529
1669
  const filteredEvents = filterEventsByType(events);
1530
1670
  const eventCounts = createEventCounts(filteredEvents, events.length);
1531
1671
  const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1532
- const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1533
1672
  const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1534
1673
  const inputDescriptions = extractInputDescriptions(
1535
1674
  filteredEvents.inputEvents
@@ -1540,7 +1679,6 @@ var prepareEventSummary = (events, options = {}) => {
1540
1679
  testName: options.testName || "Automated test from recorded events",
1541
1680
  startUrl,
1542
1681
  eventCounts,
1543
- pageTitles,
1544
1682
  urls,
1545
1683
  clickDescriptions,
1546
1684
  inputDescriptions,
@@ -1575,6 +1713,141 @@ var validateEvents = (events) => {
1575
1713
  throw new Error("No events provided for test generation");
1576
1714
  }
1577
1715
  };
1716
+ var generateYamlTest = async (events, options = {}) => {
1717
+ try {
1718
+ validateEvents(events);
1719
+ const summary = prepareEventSummary(events, {
1720
+ testName: options.testName,
1721
+ maxScreenshots: options.maxScreenshots || 3
1722
+ });
1723
+ const yamlSummary = {
1724
+ ...summary,
1725
+ includeTimestamps: options.includeTimestamps || false
1726
+ };
1727
+ const screenshots = getScreenshotsForLLM(
1728
+ events,
1729
+ options.maxScreenshots || 3
1730
+ );
1731
+ const prompt = [
1732
+ {
1733
+ role: "system",
1734
+ content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${_constants.YAML_EXAMPLE_CODE}`
1735
+ },
1736
+ {
1737
+ role: "user",
1738
+ content: `Generate YAML test for Midscene.js automation from recorded browser events.
1739
+
1740
+ Event Summary:
1741
+ ${JSON.stringify(yamlSummary, null, 2)}
1742
+
1743
+ Convert events:
1744
+ - navigation → target.url
1745
+ - click → aiTap with element description
1746
+ - input → aiInput with value and locate
1747
+ - scroll → aiScroll with appropriate direction
1748
+ - Add aiAssert for important state changes
1749
+
1750
+ Respond with YAML only, no explanations.`
1751
+ }
1752
+ ];
1753
+ if (screenshots.length > 0) {
1754
+ prompt.push({
1755
+ role: "user",
1756
+ content: "Here are screenshots from the recording session to help you understand the context:"
1757
+ });
1758
+ prompt.push({
1759
+ role: "user",
1760
+ content: screenshots.map((screenshot) => ({
1761
+ type: "image_url",
1762
+ image_url: {
1763
+ url: screenshot
1764
+ }
1765
+ }))
1766
+ });
1767
+ }
1768
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1769
+ if (_optionalChain([response, 'optionalAccess', _51 => _51.content]) && typeof response.content === "string") {
1770
+ return response.content;
1771
+ }
1772
+ throw new Error("Failed to generate YAML test configuration");
1773
+ } catch (error) {
1774
+ throw new Error(`Failed to generate YAML test: ${error}`);
1775
+ }
1776
+ };
1777
+ var generateYamlTestStream = async (events, options = {}) => {
1778
+ try {
1779
+ validateEvents(events);
1780
+ const summary = prepareEventSummary(events, {
1781
+ testName: options.testName,
1782
+ maxScreenshots: options.maxScreenshots || 3
1783
+ });
1784
+ const yamlSummary = {
1785
+ ...summary,
1786
+ includeTimestamps: options.includeTimestamps || false
1787
+ };
1788
+ const screenshots = getScreenshotsForLLM(
1789
+ events,
1790
+ options.maxScreenshots || 3
1791
+ );
1792
+ const prompt = [
1793
+ {
1794
+ role: "system",
1795
+ content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${_constants.YAML_EXAMPLE_CODE}`
1796
+ },
1797
+ {
1798
+ role: "user",
1799
+ content: `Generate YAML test for Midscene.js automation from recorded browser events.
1800
+
1801
+ Event Summary:
1802
+ ${JSON.stringify(yamlSummary, null, 2)}
1803
+
1804
+ Convert events:
1805
+ - navigation → target.url
1806
+ - click → aiTap with element description
1807
+ - input → aiInput with value and locate
1808
+ - scroll → aiScroll with appropriate direction
1809
+ - Add aiAssert for important state changes
1810
+
1811
+ Respond with YAML only, no explanations.`
1812
+ }
1813
+ ];
1814
+ if (screenshots.length > 0) {
1815
+ prompt.push({
1816
+ role: "user",
1817
+ content: "Here are screenshots from the recording session to help you understand the context:"
1818
+ });
1819
+ prompt.push({
1820
+ role: "user",
1821
+ content: screenshots.map((screenshot) => ({
1822
+ type: "image_url",
1823
+ image_url: {
1824
+ url: screenshot
1825
+ }
1826
+ }))
1827
+ });
1828
+ }
1829
+ if (options.stream && options.onChunk) {
1830
+ return await call(prompt, 2 /* EXTRACT_DATA */, void 0, {
1831
+ stream: true,
1832
+ onChunk: options.onChunk
1833
+ });
1834
+ } else {
1835
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1836
+ if (_optionalChain([response, 'optionalAccess', _52 => _52.content]) && typeof response.content === "string") {
1837
+ return {
1838
+ content: response.content,
1839
+ usage: response.usage,
1840
+ isStreamed: false
1841
+ };
1842
+ }
1843
+ throw new Error("Failed to generate YAML test configuration");
1844
+ }
1845
+ } catch (error) {
1846
+ throw new Error(`Failed to generate YAML test: ${error}`);
1847
+ }
1848
+ };
1849
+
1850
+ // src/ai-model/prompt/playwright-generator.ts
1578
1851
  var generatePlaywrightTest = async (events, options = {}) => {
1579
1852
  validateEvents(events);
1580
1853
  const summary = prepareEventSummary(events, {
@@ -1623,161 +1896,74 @@ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
1623
1896
  }
1624
1897
  ];
1625
1898
  const response = await call(prompt, 2 /* EXTRACT_DATA */);
1626
- if (_optionalChain([response, 'optionalAccess', _39 => _39.content]) && typeof response.content === "string") {
1899
+ if (_optionalChain([response, 'optionalAccess', _53 => _53.content]) && typeof response.content === "string") {
1627
1900
  return response.content;
1628
1901
  }
1629
1902
  throw new Error("Failed to generate Playwright test code");
1630
1903
  };
1631
-
1632
- // src/ai-model/prompt/yaml-generator.ts
1633
-
1634
- var getScreenshotsForLLM2 = (events, maxScreenshots = 1) => {
1635
- const eventsWithScreenshots = events.filter(
1636
- (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
1637
- );
1638
- const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {
1639
- if (a.type === "navigation" && b.type !== "navigation")
1640
- return -1;
1641
- if (a.type !== "navigation" && b.type === "navigation")
1642
- return 1;
1643
- if (a.type === "click" && b.type !== "click")
1644
- return -1;
1645
- if (a.type !== "click" && b.type === "click")
1646
- return 1;
1647
- return 0;
1904
+ var generatePlaywrightTestStream = async (events, options = {}) => {
1905
+ validateEvents(events);
1906
+ const summary = prepareEventSummary(events, {
1907
+ testName: options.testName,
1908
+ maxScreenshots: options.maxScreenshots || 3
1648
1909
  });
1649
- const screenshots = [];
1650
- for (const event of sortedEvents) {
1651
- const screenshot = event.screenshotWithBox || event.screenshotAfter || event.screenshotBefore;
1652
- if (screenshot && !screenshots.includes(screenshot)) {
1653
- screenshots.push(screenshot);
1654
- if (screenshots.length >= maxScreenshots)
1655
- break;
1656
- }
1657
- }
1658
- return screenshots;
1659
- };
1660
- var filterEventsByType2 = (events) => {
1661
- return {
1662
- navigationEvents: events.filter((event) => event.type === "navigation"),
1663
- clickEvents: events.filter((event) => event.type === "click"),
1664
- inputEvents: events.filter((event) => event.type === "input"),
1665
- scrollEvents: events.filter((event) => event.type === "scroll")
1666
- };
1667
- };
1668
- var createEventCounts2 = (filteredEvents, totalEvents) => {
1669
- return {
1670
- navigation: filteredEvents.navigationEvents.length,
1671
- click: filteredEvents.clickEvents.length,
1672
- input: filteredEvents.inputEvents.length,
1673
- scroll: filteredEvents.scrollEvents.length,
1674
- total: totalEvents
1675
- };
1676
- };
1677
- var extractInputDescriptions2 = (inputEvents) => {
1678
- return inputEvents.map((event) => ({
1679
- description: event.elementDescription || "",
1680
- value: event.value || ""
1681
- })).filter((item) => item.description && item.value);
1682
- };
1683
- var processEventsForLLM2 = (events) => {
1684
- return events.map((event) => ({
1685
- type: event.type,
1686
- timestamp: event.timestamp,
1687
- url: event.url,
1688
- title: event.title,
1689
- elementDescription: event.elementDescription,
1690
- value: event.value,
1691
- pageInfo: event.pageInfo,
1692
- elementRect: event.elementRect
1693
- }));
1694
- };
1695
- var prepareEventSummary2 = (events, options = {}) => {
1696
- const filteredEvents = filterEventsByType2(events);
1697
- const eventCounts = createEventCounts2(filteredEvents, events.length);
1698
- const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1699
- const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1700
- const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1701
- const inputDescriptions = extractInputDescriptions2(
1702
- filteredEvents.inputEvents
1703
- ).slice(0, 10);
1704
- const urls = filteredEvents.navigationEvents.map((e) => e.url).filter((url) => Boolean(url)).slice(0, 5);
1705
- const processedEvents = processEventsForLLM2(events);
1706
- return {
1707
- testName: options.testName || "Automated test from recorded events",
1708
- startUrl,
1709
- eventCounts,
1710
- pageTitles,
1711
- urls,
1712
- clickDescriptions,
1713
- inputDescriptions,
1714
- events: processedEvents
1910
+ const playwrightSummary = {
1911
+ ...summary,
1912
+ waitForNetworkIdle: options.waitForNetworkIdle !== false,
1913
+ waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2e3,
1914
+ viewportSize: options.viewportSize || { width: 1280, height: 800 }
1715
1915
  };
1716
- };
1717
- var validateEvents2 = (events) => {
1718
- if (!events.length) {
1719
- throw new Error("No events provided for test generation");
1720
- }
1721
- };
1722
- var generateYamlTest = async (events, options = {}) => {
1723
- try {
1724
- validateEvents2(events);
1725
- const summary = prepareEventSummary2(events, {
1726
- testName: options.testName,
1727
- maxScreenshots: options.maxScreenshots || 3
1728
- });
1729
- const yamlSummary = {
1730
- ...summary,
1731
- includeTimestamps: options.includeTimestamps || false
1732
- };
1733
- const screenshots = getScreenshotsForLLM2(
1734
- events,
1735
- options.maxScreenshots || 3
1736
- );
1737
- const prompt = [
1738
- {
1739
- role: "system",
1740
- content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${_constants.YAML_EXAMPLE_CODE}`
1741
- },
1742
- {
1743
- role: "user",
1744
- content: `Generate YAML test for Midscene.js automation from recorded browser events.
1916
+ const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);
1917
+ const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.
1745
1918
 
1746
1919
  Event Summary:
1747
- ${JSON.stringify(yamlSummary, null, 2)}
1920
+ ${JSON.stringify(playwrightSummary, null, 2)}
1748
1921
 
1749
- Convert events:
1750
- - navigation target.url
1751
- - click aiTap with element description
1752
- - input aiInput with value and locate
1753
- - scroll aiScroll with appropriate direction
1754
- - Add aiAssert for important state changes
1922
+ Generated code should:
1923
+ 1. Import required dependencies
1924
+ 2. Set up the test with proper configuration
1925
+ 3. Include a beforeEach hook to navigate to the starting URL
1926
+ 4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)
1927
+ 5. Include appropriate assertions and validations
1928
+ 6. Follow best practices for Playwright tests
1929
+ 7. Be ready to execute without further modification
1930
+ 8. can't wrap this test code in markdown code block
1755
1931
 
1756
- Respond with YAML only, no explanations.`
1757
- }
1758
- ];
1759
- if (screenshots.length > 0) {
1760
- prompt.push({
1761
- role: "user",
1762
- content: "Here are screenshots from the recording session to help you understand the context:"
1763
- });
1764
- prompt.push({
1765
- role: "user",
1766
- content: screenshots.map((screenshot) => ({
1767
- type: "image_url",
1768
- image_url: {
1769
- url: screenshot
1770
- }
1771
- }))
1772
- });
1932
+ Respond ONLY with the complete Playwright test code, no explanations.`;
1933
+ const messageContent = createMessageContent(
1934
+ promptText,
1935
+ screenshots,
1936
+ options.includeScreenshots !== false
1937
+ );
1938
+ const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene.
1939
+ Your task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.
1940
+
1941
+ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
1942
+ const prompt = [
1943
+ {
1944
+ role: "system",
1945
+ content: systemPrompt
1946
+ },
1947
+ {
1948
+ role: "user",
1949
+ content: messageContent
1773
1950
  }
1951
+ ];
1952
+ if (options.stream && options.onChunk) {
1953
+ return await call(prompt, 2 /* EXTRACT_DATA */, void 0, {
1954
+ stream: true,
1955
+ onChunk: options.onChunk
1956
+ });
1957
+ } else {
1774
1958
  const response = await call(prompt, 2 /* EXTRACT_DATA */);
1775
- if (_optionalChain([response, 'optionalAccess', _40 => _40.content]) && typeof response.content === "string") {
1776
- return response.content;
1959
+ if (_optionalChain([response, 'optionalAccess', _54 => _54.content]) && typeof response.content === "string") {
1960
+ return {
1961
+ content: response.content,
1962
+ usage: response.usage,
1963
+ isStreamed: false
1964
+ };
1777
1965
  }
1778
- throw new Error("Failed to generate YAML test configuration");
1779
- } catch (error) {
1780
- throw new Error(`Failed to generate YAML test: ${error}`);
1966
+ throw new Error("Failed to generate Playwright test code");
1781
1967
  }
1782
1968
  };
1783
1969
 
@@ -1976,7 +2162,7 @@ async function AiLocateElement(options) {
1976
2162
  );
1977
2163
  }
1978
2164
  let referenceImagePayload;
1979
- if (_optionalChain([options, 'access', _41 => _41.referenceImage, 'optionalAccess', _42 => _42.rect]) && options.referenceImage.base64) {
2165
+ if (_optionalChain([options, 'access', _55 => _55.referenceImage, 'optionalAccess', _56 => _56.rect]) && options.referenceImage.base64) {
1980
2166
  referenceImagePayload = await _img.cropByRect.call(void 0,
1981
2167
  options.referenceImage.base64,
1982
2168
  options.referenceImage.rect,
@@ -2012,10 +2198,10 @@ async function AiLocateElement(options) {
2012
2198
  if ("bbox" in res.content && Array.isArray(res.content.bbox)) {
2013
2199
  resRect = adaptBboxToRect(
2014
2200
  res.content.bbox,
2015
- _optionalChain([options, 'access', _43 => _43.searchConfig, 'optionalAccess', _44 => _44.rect, 'optionalAccess', _45 => _45.width]) || context.size.width,
2016
- _optionalChain([options, 'access', _46 => _46.searchConfig, 'optionalAccess', _47 => _47.rect, 'optionalAccess', _48 => _48.height]) || context.size.height,
2017
- _optionalChain([options, 'access', _49 => _49.searchConfig, 'optionalAccess', _50 => _50.rect, 'optionalAccess', _51 => _51.left]),
2018
- _optionalChain([options, 'access', _52 => _52.searchConfig, 'optionalAccess', _53 => _53.rect, 'optionalAccess', _54 => _54.top])
2201
+ _optionalChain([options, 'access', _57 => _57.searchConfig, 'optionalAccess', _58 => _58.rect, 'optionalAccess', _59 => _59.width]) || context.size.width,
2202
+ _optionalChain([options, 'access', _60 => _60.searchConfig, 'optionalAccess', _61 => _61.rect, 'optionalAccess', _62 => _62.height]) || context.size.height,
2203
+ _optionalChain([options, 'access', _63 => _63.searchConfig, 'optionalAccess', _64 => _64.rect, 'optionalAccess', _65 => _65.left]),
2204
+ _optionalChain([options, 'access', _66 => _66.searchConfig, 'optionalAccess', _67 => _67.rect, 'optionalAccess', _68 => _68.top])
2019
2205
  );
2020
2206
  debugInspect("resRect", resRect);
2021
2207
  const rectCenter = {
@@ -2034,7 +2220,7 @@ async function AiLocateElement(options) {
2034
2220
  }
2035
2221
  } catch (e) {
2036
2222
  const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : "unknown error in locate";
2037
- if (!errors || _optionalChain([errors, 'optionalAccess', _55 => _55.length]) === 0) {
2223
+ if (!errors || _optionalChain([errors, 'optionalAccess', _69 => _69.length]) === 0) {
2038
2224
  errors = [msg];
2039
2225
  } else {
2040
2226
  errors.push(`(${msg})`);
@@ -2125,14 +2311,14 @@ async function AiExtractElementInfo(options) {
2125
2311
  truncateTextLength: 200,
2126
2312
  filterNonTextContent: false,
2127
2313
  visibleOnly: false,
2128
- domIncluded: _optionalChain([extractOption, 'optionalAccess', _56 => _56.domIncluded])
2314
+ domIncluded: _optionalChain([extractOption, 'optionalAccess', _70 => _70.domIncluded])
2129
2315
  });
2130
2316
  const extractDataPromptText = await extractDataQueryPrompt(
2131
2317
  description,
2132
2318
  dataQuery
2133
2319
  );
2134
2320
  const userContent = [];
2135
- if (_optionalChain([extractOption, 'optionalAccess', _57 => _57.screenshotIncluded]) !== false) {
2321
+ if (_optionalChain([extractOption, 'optionalAccess', _71 => _71.screenshotIncluded]) !== false) {
2136
2322
  userContent.push({
2137
2323
  type: "image_url",
2138
2324
  image_url: {
@@ -2260,7 +2446,7 @@ async function plan(userInstruction, opts) {
2260
2446
  const { content, usage } = await call2(msgs, 3 /* PLAN */);
2261
2447
  const rawResponse = JSON.stringify(content, void 0, 2);
2262
2448
  const planFromAI = content;
2263
- const actions = (_optionalChain([planFromAI, 'access', _58 => _58.action, 'optionalAccess', _59 => _59.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2449
+ const actions = (_optionalChain([planFromAI, 'access', _72 => _72.action, 'optionalAccess', _73 => _73.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2264
2450
  const returnValue = {
2265
2451
  ...planFromAI,
2266
2452
  actions,
@@ -2287,7 +2473,7 @@ async function plan(userInstruction, opts) {
2287
2473
  _utils.assert.call(void 0, !planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
2288
2474
  } else {
2289
2475
  actions.forEach((action) => {
2290
- if (_optionalChain([action, 'access', _60 => _60.locate, 'optionalAccess', _61 => _61.id])) {
2476
+ if (_optionalChain([action, 'access', _74 => _74.locate, 'optionalAccess', _75 => _75.id])) {
2291
2477
  const element = elementById(action.locate.id);
2292
2478
  if (element) {
2293
2479
  action.locate.id = element.id;
@@ -2576,6 +2762,8 @@ async function resizeImageForUiTars(imageBase64, size) {
2576
2762
 
2577
2763
 
2578
2764
 
2579
- exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.call = call; exports.callToGetJSONObject = callToGetJSONObject; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generateYamlTest = generateYamlTest; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
2580
2765
 
2581
- //# sourceMappingURL=chunk-QT5OZCDN.js.map
2766
+
2767
+ exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.call = call; exports.callToGetJSONObject = callToGetJSONObject; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.generateYamlTest = generateYamlTest; exports.generateYamlTestStream = generateYamlTestStream; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generatePlaywrightTestStream = generatePlaywrightTestStream; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
2768
+
2769
+ //# sourceMappingURL=chunk-H3KP5MGB.js.map