npm - @midscene/core - Versions diffs - 0.9.2 → 0.10.0 - Mend

@midscene/core 0.9.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/lib/ai-model.js +2 -2
package/dist/lib/{chunk-P2MMY6CZ.js → chunk-CERQVVPJ.js} +193 -191
package/dist/lib/{chunk-6MKLXHAY.js → chunk-MN5JVUKU.js} +1 -1
package/dist/lib/index.js +16 -14
package/dist/lib/types/ai-model.d.ts +3 -3
package/dist/lib/types/index.d.ts +4 -4
package/dist/lib/types/{llm-planning-7247f4e9.d.ts → llm-planning-ca109221.d.ts} +1 -1
package/dist/lib/types/{types-c4bec333.d.ts → types-64c4d87b.d.ts} +1 -0
package/dist/lib/types/utils.d.ts +1 -1
package/dist/lib/utils.js +2 -2
package/package.json +3 -2
package/report/index.html +2 -2

package/dist/lib/ai-model.js CHANGED Viewed

@@ -9,7 +9,7 @@
-var _chunkP2MMY6CZjs = require('./chunk-P2MMY6CZ.js');
+var _chunkCERQVVPJjs = require('./chunk-CERQVVPJ.js');
 require('./chunk-JP3JBDZS.js');
 require('./chunk-YSQDPG26.js');
@@ -23,4 +23,4 @@ require('./chunk-YSQDPG26.js');
-exports.AiAssert = _chunkP2MMY6CZjs.AiAssert; exports.AiExtractElementInfo = _chunkP2MMY6CZjs.AiExtractElementInfo; exports.AiInspectElement = _chunkP2MMY6CZjs.AiInspectElement; exports.callAiFn = _chunkP2MMY6CZjs.callAiFn; exports.callToGetJSONObject = _chunkP2MMY6CZjs.callToGetJSONObject; exports.describeUserPage = _chunkP2MMY6CZjs.describeUserPage; exports.plan = _chunkP2MMY6CZjs.plan; exports.systemPromptToLocateElement = _chunkP2MMY6CZjs.systemPromptToLocateElement; exports.transformElementPositionToId = _chunkP2MMY6CZjs.transformElementPositionToId; exports.vlmPlanning = _chunkP2MMY6CZjs.vlmPlanning;
+exports.AiAssert = _chunkCERQVVPJjs.AiAssert; exports.AiExtractElementInfo = _chunkCERQVVPJjs.AiExtractElementInfo; exports.AiInspectElement = _chunkCERQVVPJjs.AiInspectElement; exports.callAiFn = _chunkCERQVVPJjs.callAiFn; exports.callToGetJSONObject = _chunkCERQVVPJjs.callToGetJSONObject; exports.describeUserPage = _chunkCERQVVPJjs.describeUserPage; exports.plan = _chunkCERQVVPJjs.plan; exports.systemPromptToLocateElement = _chunkCERQVVPJjs.systemPromptToLocateElement; exports.transformElementPositionToId = _chunkCERQVVPJjs.transformElementPositionToId; exports.vlmPlanning = _chunkCERQVVPJjs.vlmPlanning;

package/dist/lib/{chunk-P2MMY6CZ.js → chunk-CERQVVPJ.js} RENAMED Viewed

@@ -1109,32 +1109,192 @@ async function callAiFn(msgs, AIActionTypeValue) {
 // src/ai-model/prompt/llm-locator.ts
 var _prompts = require('@langchain/core/prompts');
+// src/ai-model/prompt/ui-tars-planning.ts
+function getTimeZoneInfo() {
+  const timeZone = Intl.DateTimeFormat().resolvedOptions().timeZone;
+  const offset = -(/* @__PURE__ */ new Date()).getTimezoneOffset() / 60;
+  return {
+    timezone: `UTC${offset >= 0 ? "+" : ""}${offset}`,
+    isChina: timeZone === "Asia/Shanghai"
+  };
+}
+var language = getTimeZoneInfo().isChina ? "Chinese" : "English";
+var uiTarsPlanningPrompt = `
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+## Output Format
+\`\`\`
+Thought: ...
+Action: ...
+\`\`\`
+## Action Space
+click(start_box='[x1, y1, x2, y2]')
+left_double(start_box='[x1, y1, x2, y2]')
+right_single(start_box='[x1, y1, x2, y2]')
+drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.
+scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+## Note
+- Use ${language} in \`Thought\` part.
+- Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
+## User Instruction
+`;
+var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
+function parseActionFromVlm(text, factor = 1e3, mode = "bc") {
+  let reflection = null;
+  let thought = null;
+  let actionStr = "";
+  text = text.trim();
+  if (mode === "bc") {
+    if (text.startsWith("Thought:")) {
+      const thoughtMatch = text.match(/Thought: (.+?)(?=\s*Action:|$)/s);
+      if (thoughtMatch) {
+        thought = thoughtMatch[1].trim();
+      }
+    } else if (text.startsWith("Reflection:")) {
+      const reflectionMatch = text.match(
+        /Reflection: (.+?)Action_Summary: (.+?)(?=\s*Action:|$)/
+      );
+      if (reflectionMatch) {
+        thought = reflectionMatch[2].trim();
+        reflection = reflectionMatch[1].trim();
+      }
+    } else if (text.startsWith("Action_Summary:")) {
+      const summaryMatch = text.match(/Action_Summary: (.+?)(?=\s*Action:|$)/);
+      if (summaryMatch) {
+        thought = summaryMatch[1].trim();
+      }
+    }
+    if (!text.includes("Action:")) {
+      actionStr = text;
+    } else {
+      const actionParts = text.split("Action:");
+      actionStr = actionParts[actionParts.length - 1];
+    }
+  } else if (mode === "o1") {
+    const thoughtMatch = text.match(/<Thought>\s*(.*?)\s*<\/Thought>/);
+    const actionSummaryMatch = text.match(
+      /\nAction_Summary:\s*(.*?)\s*Action:/
+    );
+    const actionMatch = text.match(/\nAction:\s*(.*?)\s*<\/Output>/);
+    const thoughtContent = thoughtMatch ? thoughtMatch[1] : null;
+    const actionSummaryContent = actionSummaryMatch ? actionSummaryMatch[1] : null;
+    const actionContent = actionMatch ? actionMatch[1] : null;
+    thought = `${thoughtContent}
+<Action_Summary>
+${actionSummaryContent}`;
+    actionStr = actionContent || "";
+  }
+  const allActions = actionStr.split("\n\n");
+  const actions = [];
+  for (const rawStr of allActions) {
+    const actionInstance = parseAction(rawStr.replace(/\n/g, "\\n").trim());
+    if (!actionInstance) {
+      console.log(`Action can't parse: ${rawStr}`);
+      continue;
+    }
+    const actionType = actionInstance.function;
+    const params = actionInstance.args;
+    const actionInputs = {};
+    for (const [paramName, param] of Object.entries(params)) {
+      if (!param)
+        continue;
+      const trimmedParam = param.trim();
+      actionInputs[paramName.trim()] = trimmedParam;
+      if (paramName.includes("start_box") || paramName.includes("end_box")) {
+        const oriBox = trimmedParam;
+        const numbers = oriBox.replace(/[()]/g, "").split(",");
+        const floatNumbers = numbers.map(
+          (num) => Number.parseFloat(num) / factor
+        );
+        if (floatNumbers.length === 2) {
+          floatNumbers.push(floatNumbers[0], floatNumbers[1]);
+        }
+        actionInputs[paramName.trim()] = JSON.stringify(floatNumbers);
+      }
+    }
+    if (actionType === "finished") {
+      actions.push({
+        reflection,
+        thought,
+        action_type: "finished",
+        action_inputs: {}
+      });
+    } else {
+      actions.push({
+        reflection,
+        thought,
+        action_type: actionType,
+        action_inputs: actionInputs
+      });
+    }
+  }
+  return actions;
+}
+function parseAction(actionStr) {
+  try {
+    const functionPattern = /^(\w+)\((.*)\)$/;
+    const match = actionStr.trim().match(functionPattern);
+    if (!match) {
+      throw new Error("Not a function call");
+    }
+    const [_, functionName, argsStr] = match;
+    const kwargs = {};
+    if (argsStr.trim()) {
+      const argPairs = argsStr.match(/([^,']|'[^']*')+/g) || [];
+      for (const pair of argPairs) {
+        const [key, ...valueParts] = pair.split("=");
+        if (!key)
+          continue;
+        const value = valueParts.join("=").trim().replace(/^['"]|['"]$/g, "");
+        kwargs[key.trim()] = value;
+      }
+    }
+    return {
+      function: functionName,
+      args: kwargs
+    };
+  } catch (e) {
+    console.error(`Failed to parse action '${actionStr}': ${e}`);
+    return null;
+  }
+}
 // src/ai-model/prompt/ui-tars-locator.ts
 function systemPromptToLocateElementPosition() {
   return `
-  You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
-  ## Output Format
-  \`\`\`
-  Action_Summary: ...
-  Action: ...
-  \`\`\`
-  ## Action Space
-  click(start_box='[x1, y1, x2, y2]')
-  long_press(start_box='[x1, y1, x2, y2]', time='')
-  type(content='')
-  scroll(direction='down or up or right or left')
-  open_app(app_name='')
-  navigate_back()
-  navigate_home()
-  WAIT()
-  finished() # Submit the task regardless of whether it succeeds or fails.
-  ## Note
-  - Use Chinese in \`Action_Summary\` part.
-  ## User Instruction
+You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
+## Output Format
+\`\`\`
+Thought: ...
+Action: ...
+\`\`\`
+## Action Space
+click(start_box='[x1, y1, x2, y2]')
+left_double(start_box='[x1, y1, x2, y2]')
+right_single(start_box='[x1, y1, x2, y2]')
+drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
+hotkey(key='')
+type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.
+scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
+wait() #Sleep for 5s and take a screenshot to check for any changes.
+finished()
+call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
+## Note
+- Use ${language} in \`Thought\` part.
+- Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
+## User Instruction
     `;
 }
@@ -1360,8 +1520,6 @@ var _img = require('@midscene/shared/img');
 var _constants = require('@midscene/shared/constants');
 var characteristic = "You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.";
-var contextFormatIntro = `
-The user will give you a screenshot and some of the texts on it. There may be some none-English characters (like Chinese) on it, indicating it's an non-English app. If some text is shown on screenshot but not introduced by the JSON description, use the information you see on screenshot.`;
 function systemPromptToExtract() {
   return `
 You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.
@@ -1397,7 +1555,6 @@ DATA_DEMAND start:
 {dataKeys}
 {dataQuery}
 =====================================
 DATA_DEMAND ends.
   `,
@@ -1406,14 +1563,12 @@ DATA_DEMAND ends.
 function systemPromptToAssert() {
   return `
 ${characteristic}
-${contextFormatIntro}
-Based on the information you get, Return assertion judgment:
+User will give an assertion, and some information about the page. Based on the information you get, tell whether the assertion is truthy.
 Return in the following JSON format:
 {
   thought: string, // string, the thought of the assertion. Should in the same language as the assertion.
-  pass: true, // true or false, whether the assertion is passed
+  pass: true, // true or false, whether the assertion is truthy
 }
 `;
 }
@@ -1454,7 +1609,7 @@ function truncateText(text, maxLength = 100) {
 function elementByPositionWithElementInfo(elementsInfo, position) {
   _assert2.default.call(void 0, typeof position !== "undefined", "position is required for query");
   const matchingElements = elementsInfo.filter((item) => {
-    return item.rect.left <= position.x && position.x <= item.rect.left + item.rect.width && item.rect.top <= position.y && position.y <= item.rect.top + item.rect.height;
+    return item.attributes.nodeType !== _constants.NodeType.CONTAINER && item.rect.left <= position.x && position.x <= item.rect.left + item.rect.width && item.rect.top <= position.y && position.y <= item.rect.top + item.rect.height;
   });
   if (matchingElements.length === 0) {
     return void 0;
@@ -1710,7 +1865,7 @@ The JSON format is as follows:
 {{
   "actions": [
     {{
-      "thought": "Reasons for generating this task, and why this task is feasible on this page",
+      "thought": "Reasons for generating this task, and why this task is feasible on this page.", // Use the same language as the user's instruction.
       "type": "Tap",
       "param": null,
       "locate": {sample} | null,
@@ -1718,8 +1873,8 @@ The JSON format is as follows:
     // ... more actions
   ],
   "taskWillBeAccomplished": boolean,
-  "furtherPlan": {{ "whatHaveDone": string, "whatToDoNext": string }} | null,
-  "error"?: string
+  "furtherPlan": {{ "whatHaveDone": string, "whatToDoNext": string }} | null, // Use the same language as the user's instruction.
+  "error"?: string // Use the same language as the user's instruction.
 }}
 Here is an example of how to decompose a task:
@@ -2416,13 +2571,10 @@ async function AiAssert(options) {
         {
           type: "text",
           text: `
-    pageDescription:
-    ${description}
-    Here is the description of the assertion. Just go ahead:
-    =====================================
-    ${assertion}
-    =====================================
+Here is the description of the assertion. Just go ahead:
+=====================================
+${assertion}
+=====================================
   `
         }
       ]
@@ -2485,156 +2637,6 @@ async function plan(userPrompt, opts) {
   return planFromAI;
 }
-// src/ai-model/prompt/ui-tars-planning.ts
-var uiTarsPlanningPrompt = `
-You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
-## Output Format
-\`\`\`
-Thought: ...
-Action: ...
-\`\`\`
-## Action Space
-click(start_box='[x1, y1, x2, y2]')
-left_double(start_box='[x1, y1, x2, y2]')
-right_single(start_box='[x1, y1, x2, y2]')
-drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
-hotkey(key='')
-type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.
-scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
-wait() #Sleep for 5s and take a screenshot to check for any changes.
-finished()
-call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
-## Note
-- Use Chinese in \`Thought\` part.
-- Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
-## User Instruction
-`;
-var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
-function parseActionFromVlm(text, factor = 1e3, mode = "bc") {
-  let reflection = null;
-  let thought = null;
-  let actionStr = "";
-  text = text.trim();
-  if (mode === "bc") {
-    if (text.startsWith("Thought:")) {
-      const thoughtMatch = text.match(/Thought: (.+?)(?=\s*Action:|$)/s);
-      if (thoughtMatch) {
-        thought = thoughtMatch[1].trim();
-      }
-    } else if (text.startsWith("Reflection:")) {
-      const reflectionMatch = text.match(
-        /Reflection: (.+?)Action_Summary: (.+?)(?=\s*Action:|$)/
-      );
-      if (reflectionMatch) {
-        thought = reflectionMatch[2].trim();
-        reflection = reflectionMatch[1].trim();
-      }
-    } else if (text.startsWith("Action_Summary:")) {
-      const summaryMatch = text.match(/Action_Summary: (.+?)(?=\s*Action:|$)/);
-      if (summaryMatch) {
-        thought = summaryMatch[1].trim();
-      }
-    }
-    if (!text.includes("Action:")) {
-      actionStr = text;
-    } else {
-      const actionParts = text.split("Action:");
-      actionStr = actionParts[actionParts.length - 1];
-    }
-  } else if (mode === "o1") {
-    const thoughtMatch = text.match(/<Thought>\s*(.*?)\s*<\/Thought>/);
-    const actionSummaryMatch = text.match(
-      /\nAction_Summary:\s*(.*?)\s*Action:/
-    );
-    const actionMatch = text.match(/\nAction:\s*(.*?)\s*<\/Output>/);
-    const thoughtContent = thoughtMatch ? thoughtMatch[1] : null;
-    const actionSummaryContent = actionSummaryMatch ? actionSummaryMatch[1] : null;
-    const actionContent = actionMatch ? actionMatch[1] : null;
-    thought = `${thoughtContent}
-<Action_Summary>
-${actionSummaryContent}`;
-    actionStr = actionContent || "";
-  }
-  const allActions = actionStr.split("\n\n");
-  const actions = [];
-  for (const rawStr of allActions) {
-    const actionInstance = parseAction(rawStr.replace(/\n/g, "\\n").trim());
-    if (!actionInstance) {
-      console.log(`Action can't parse: ${rawStr}`);
-      continue;
-    }
-    const actionType = actionInstance.function;
-    const params = actionInstance.args;
-    const actionInputs = {};
-    for (const [paramName, param] of Object.entries(params)) {
-      if (!param)
-        continue;
-      const trimmedParam = param.trim();
-      actionInputs[paramName.trim()] = trimmedParam;
-      if (paramName.includes("start_box") || paramName.includes("end_box")) {
-        const oriBox = trimmedParam;
-        const numbers = oriBox.replace(/[()]/g, "").split(",");
-        const floatNumbers = numbers.map(
-          (num) => Number.parseFloat(num) / factor
-        );
-        if (floatNumbers.length === 2) {
-          floatNumbers.push(floatNumbers[0], floatNumbers[1]);
-        }
-        actionInputs[paramName.trim()] = JSON.stringify(floatNumbers);
-      }
-    }
-    if (actionType === "finished") {
-      actions.push({
-        reflection,
-        thought,
-        action_type: "finished",
-        action_inputs: {}
-      });
-    } else {
-      actions.push({
-        reflection,
-        thought,
-        action_type: actionType,
-        action_inputs: actionInputs
-      });
-    }
-  }
-  return actions;
-}
-function parseAction(actionStr) {
-  try {
-    const functionPattern = /^(\w+)\((.*)\)$/;
-    const match = actionStr.trim().match(functionPattern);
-    if (!match) {
-      throw new Error("Not a function call");
-    }
-    const [_, functionName, argsStr] = match;
-    const kwargs = {};
-    if (argsStr.trim()) {
-      const argPairs = argsStr.match(/([^,']|'[^']*')+/g) || [];
-      for (const pair of argPairs) {
-        const [key, ...valueParts] = pair.split("=");
-        if (!key)
-          continue;
-        const value = valueParts.join("=").trim().replace(/^['"]|['"]$/g, "");
-        kwargs[key.trim()] = value;
-      }
-    }
-    return {
-      function: functionName,
-      args: kwargs
-    };
-  } catch (e) {
-    console.error(`Failed to parse action '${actionStr}': ${e}`);
-    return null;
-  }
-}
 // src/ai-model/ui-tars-planning.ts
 function capitalize(str) {
   return str.charAt(0).toUpperCase() + str.slice(1);

package/dist/lib/{chunk-6MKLXHAY.js → chunk-MN5JVUKU.js} RENAMED Viewed

@@ -184,7 +184,7 @@ function stringifyDumpData(data, indents) {
   return JSON.stringify(data, replacerForPageObject, indents);
 }
 function getVersion() {
-  return "0.9.2";
+  return "0.10.0";
 }
 function debugLog(...message) {
   const debugMode = _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_DEBUG_MODE);

package/dist/lib/index.js CHANGED Viewed

@@ -6,7 +6,7 @@
-var _chunk6MKLXHAYjs = require('./chunk-6MKLXHAY.js');
+var _chunkMN5JVUKUjs = require('./chunk-MN5JVUKU.js');
@@ -17,7 +17,8 @@ var _chunk6MKLXHAYjs = require('./chunk-6MKLXHAY.js');
-var _chunkP2MMY6CZjs = require('./chunk-P2MMY6CZ.js');
+var _chunkCERQVVPJjs = require('./chunk-CERQVVPJ.js');
@@ -168,7 +169,7 @@ ${(_b = this.latestErrorTask()) == null ? void 0 : _b.errorStack}`
   }
   dump() {
     const dumpData = {
-      sdkVersion: _chunk6MKLXHAYjs.getVersion.call(void 0, ),
+      sdkVersion: _chunkMN5JVUKUjs.getVersion.call(void 0, ),
       model_name: _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_MODEL_NAME) || "",
       logTime: Date.now(),
       name: this.name,
@@ -191,16 +192,17 @@ var logFileName = "";
 var logContent = [];
 var logIdIndexMap = {};
 var { pid } = process;
-var logFileExt = _chunk6MKLXHAYjs.insightDumpFileExt;
+var logFileExt = _chunkMN5JVUKUjs.insightDumpFileExt;
 var ifInBrowser = typeof window !== "undefined";
 function writeInsightDump(data, logId, dumpSubscriber) {
-  const logDir = _chunk6MKLXHAYjs.getLogDir.call(void 0, );
+  const logDir = _chunkMN5JVUKUjs.getLogDir.call(void 0, );
   _assert2.default.call(void 0, logDir, "logDir should be set before writing dump file");
   const id = logId || _utils.uuid.call(void 0, );
   const baseData = {
-    sdkVersion: _chunk6MKLXHAYjs.getVersion.call(void 0, ),
+    sdkVersion: _chunkMN5JVUKUjs.getVersion.call(void 0, ),
     logTime: Date.now(),
-    model_name: _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_MODEL_NAME) || ""
+    model_name: _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_MODEL_NAME) || "",
+    model_description: _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_USE_VLM_UI_TARS) ? "vlm-ui-tars enabled" : ""
   };
   const finalData = {
     logId: id,
@@ -208,7 +210,7 @@ function writeInsightDump(data, logId, dumpSubscriber) {
     ...data
   };
   dumpSubscriber == null ? void 0 : dumpSubscriber(finalData);
-  const dataString = _chunk6MKLXHAYjs.stringifyDumpData.call(void 0, finalData, 2);
+  const dataString = _chunkMN5JVUKUjs.stringifyDumpData.call(void 0, finalData, 2);
   if (typeof logIdIndexMap[id] === "number") {
     logContent[logIdIndexMap[id]] = dataString;
   } else {
@@ -222,7 +224,7 @@ function writeInsightDump(data, logId, dumpSubscriber) {
         logFileName = `${pid}_${baseData.logTime}-${Math.random()}`;
       }
     }
-    _chunk6MKLXHAYjs.writeLogFile.call(void 0, {
+    _chunkMN5JVUKUjs.writeLogFile.call(void 0, {
       fileName: logFileName,
       fileExt: logFileExt,
       fileContent: `[
@@ -237,7 +239,7 @@ ${logContent.join(",\n")}
 // src/insight/index.ts
 var Insight = class {
   constructor(context, opt) {
-    this.aiVendorFn = _chunkP2MMY6CZjs.callAiFn;
+    this.aiVendorFn = _chunkCERQVVPJjs.callAiFn;
     _assert2.default.call(void 0, context, "context is required for Insight");
     if (typeof context === "function") {
       this.contextRetrieverFn = context;
@@ -263,7 +265,7 @@ var Insight = class {
     this.onceDumpUpdatedFn = void 0;
     const context = await this.contextRetrieverFn("locate");
     const startTime = Date.now();
-    const { parseResult, elementById, rawResponse, usage } = await _chunkP2MMY6CZjs.AiInspectElement.call(void 0, {
+    const { parseResult, elementById, rawResponse, usage } = await _chunkCERQVVPJjs.AiInspectElement.call(void 0, {
       callAI: callAI || this.aiVendorFn,
       context,
       multi: Boolean(multi),
@@ -346,7 +348,7 @@ ${parseResult.errors.join("\n")}`;
     this.onceDumpUpdatedFn = void 0;
     const context = await this.contextRetrieverFn("extract");
     const startTime = Date.now();
-    const { parseResult, elementById } = await _chunkP2MMY6CZjs.AiExtractElementInfo.call(void 0, {
+    const { parseResult, elementById } = await _chunkCERQVVPJjs.AiExtractElementInfo.call(void 0, {
       context,
       dataQuery: dataDemand
     });
@@ -400,7 +402,7 @@ ${parseResult.errors.join("\n")}`;
     this.onceDumpUpdatedFn = void 0;
     const context = await this.contextRetrieverFn("assert");
     const startTime = Date.now();
-    const assertResult = await _chunkP2MMY6CZjs.AiAssert.call(void 0, {
+    const assertResult = await _chunkCERQVVPJjs.AiAssert.call(void 0, {
       assertion,
       context
     });
@@ -448,4 +450,4 @@ var src_default = Insight;
-exports.AIResponseFormat = _chunkP2MMY6CZjs.AIResponseFormat; exports.BaseElement = _chunkP2MMY6CZjs.BaseElement; exports.Executor = Executor; exports.Insight = Insight; exports.UIContext = _chunkP2MMY6CZjs.UIContext; exports.default = src_default; exports.getLogDirByType = _chunk6MKLXHAYjs.getLogDirByType; exports.getVersion = _chunk6MKLXHAYjs.getVersion; exports.plan = _chunkP2MMY6CZjs.plan; exports.setLogDir = _chunk6MKLXHAYjs.setLogDir; exports.transformElementPositionToId = _chunkP2MMY6CZjs.transformElementPositionToId;
+exports.AIResponseFormat = _chunkCERQVVPJjs.AIResponseFormat; exports.BaseElement = _chunkCERQVVPJjs.BaseElement; exports.Executor = Executor; exports.Insight = Insight; exports.UIContext = _chunkCERQVVPJjs.UIContext; exports.default = src_default; exports.getLogDirByType = _chunkMN5JVUKUjs.getLogDirByType; exports.getVersion = _chunkMN5JVUKUjs.getVersion; exports.plan = _chunkCERQVVPJjs.plan; exports.setLogDir = _chunkMN5JVUKUjs.setLogDir; exports.transformElementPositionToId = _chunkCERQVVPJjs.transformElementPositionToId;

package/dist/lib/types/ai-model.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
-import { g as AIUsageInfo, B as BaseElement, U as UIContext, y as PlanningAction } from './types-c4bec333.js';
+import { g as AIUsageInfo, B as BaseElement, U as UIContext, y as PlanningAction } from './types-64c4d87b.js';
 import { ChatCompletionMessageParam } from 'openai/resources';
 export { ChatCompletionMessageParam } from 'openai/resources';
-import { A as AIActionType } from './llm-planning-7247f4e9.js';
-export { d as AiAssert, b as AiExtractElementInfo, a as AiInspectElement, c as callAiFn, p as plan, t as transformElementPositionToId } from './llm-planning-7247f4e9.js';
+import { A as AIActionType } from './llm-planning-ca109221.js';
+export { d as AiAssert, b as AiExtractElementInfo, a as AiInspectElement, c as callAiFn, p as plan, t as transformElementPositionToId } from './llm-planning-ca109221.js';
 import '@midscene/shared/constants';
 declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{

package/dist/lib/types/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, f as InsightAssertionResponse, A as AISingleElementResponse } from './types-c4bec333.js';
-export { n as AIAssertionResponse, k as AIElementIdResponse, l as AIElementResponse, h as AIResponseFormat, m as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, w as AgentAssertOpt, v as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, q as DumpMeta, u as ElementById, o as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, s as InsightDump, p as InsightExtractParam, L as LiteUISection, O as OnTaskStartTip, t as PartialInsightDumpFromSDK, z as PlanningAIResponse, y as PlanningAction, N as PlanningActionParamAssert, T as PlanningActionParamError, J as PlanningActionParamHover, K as PlanningActionParamInputOrKeyPress, G as PlanningActionParamPlan, M as PlanningActionParamScroll, Q as PlanningActionParamSleep, H as PlanningActionParamTap, V as PlanningActionParamWaitFor, F as PlanningFurtherPlan, x as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, r as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-c4bec333.js';
-import { c as callAiFn } from './llm-planning-7247f4e9.js';
-export { p as plan, t as transformElementPositionToId } from './llm-planning-7247f4e9.js';
+import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, f as InsightAssertionResponse, A as AISingleElementResponse } from './types-64c4d87b.js';
+export { n as AIAssertionResponse, k as AIElementIdResponse, l as AIElementResponse, h as AIResponseFormat, m as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, w as AgentAssertOpt, v as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, q as DumpMeta, u as ElementById, o as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, s as InsightDump, p as InsightExtractParam, L as LiteUISection, O as OnTaskStartTip, t as PartialInsightDumpFromSDK, z as PlanningAIResponse, y as PlanningAction, N as PlanningActionParamAssert, T as PlanningActionParamError, J as PlanningActionParamHover, K as PlanningActionParamInputOrKeyPress, G as PlanningActionParamPlan, M as PlanningActionParamScroll, Q as PlanningActionParamSleep, H as PlanningActionParamTap, V as PlanningActionParamWaitFor, F as PlanningFurtherPlan, x as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, r as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-64c4d87b.js';
+import { c as callAiFn } from './llm-planning-ca109221.js';
+export { p as plan, t as transformElementPositionToId } from './llm-planning-ca109221.js';
 export { getLogDirByType, getVersion, setLogDir } from './utils.js';
 import '@midscene/shared/constants';
 import 'openai/resources';

package/dist/lib/types/{llm-planning-7247f4e9.d.ts → llm-planning-ca109221.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { g as AIUsageInfo, l as AIElementResponse, B as BaseElement, U as UIContext, A as AISingleElementResponse, j as AISingleElementResponseByPosition, u as ElementById, m as AISectionParseResponse, n as AIAssertionResponse, z as PlanningAIResponse } from './types-c4bec333.js';
+import { g as AIUsageInfo, l as AIElementResponse, B as BaseElement, U as UIContext, A as AISingleElementResponse, j as AISingleElementResponseByPosition, u as ElementById, m as AISectionParseResponse, n as AIAssertionResponse, z as PlanningAIResponse } from './types-64c4d87b.js';
 import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
 type AIArgs = [

package/dist/lib/types/{types-c4bec333.d.ts → types-64c4d87b.d.ts} RENAMED Viewed

@@ -101,6 +101,7 @@ interface DumpMeta {
     sdkVersion: string;
     logTime: number;
     model_name: string;
+    model_description?: string;
 }
 interface ReportDumpWithAttributes {
     dumpString: string;

package/dist/lib/types/utils.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as ReportDumpWithAttributes, R as Rect } from './types-c4bec333.js';
+import { r as ReportDumpWithAttributes, R as Rect } from './types-64c4d87b.js';
 import '@midscene/shared/constants';
 import 'openai/resources';

package/dist/lib/utils.js CHANGED Viewed

@@ -16,7 +16,7 @@
-var _chunk6MKLXHAYjs = require('./chunk-6MKLXHAY.js');
+var _chunkMN5JVUKUjs = require('./chunk-MN5JVUKU.js');
 require('./chunk-JP3JBDZS.js');
 require('./chunk-YSQDPG26.js');
@@ -37,4 +37,4 @@ require('./chunk-YSQDPG26.js');
-exports.getLogDir = _chunk6MKLXHAYjs.getLogDir; exports.getLogDirByType = _chunk6MKLXHAYjs.getLogDirByType; exports.getTmpDir = _chunk6MKLXHAYjs.getTmpDir; exports.getTmpFile = _chunk6MKLXHAYjs.getTmpFile; exports.getVersion = _chunk6MKLXHAYjs.getVersion; exports.groupedActionDumpFileExt = _chunk6MKLXHAYjs.groupedActionDumpFileExt; exports.insightDumpFileExt = _chunk6MKLXHAYjs.insightDumpFileExt; exports.overlapped = _chunk6MKLXHAYjs.overlapped; exports.replaceStringWithFirstAppearance = _chunk6MKLXHAYjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunk6MKLXHAYjs.replacerForPageObject; exports.reportHTMLContent = _chunk6MKLXHAYjs.reportHTMLContent; exports.setLogDir = _chunk6MKLXHAYjs.setLogDir; exports.sleep = _chunk6MKLXHAYjs.sleep; exports.stringifyDumpData = _chunk6MKLXHAYjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunk6MKLXHAYjs.uploadTestInfoToServer; exports.writeDumpReport = _chunk6MKLXHAYjs.writeDumpReport; exports.writeLogFile = _chunk6MKLXHAYjs.writeLogFile;
+exports.getLogDir = _chunkMN5JVUKUjs.getLogDir; exports.getLogDirByType = _chunkMN5JVUKUjs.getLogDirByType; exports.getTmpDir = _chunkMN5JVUKUjs.getTmpDir; exports.getTmpFile = _chunkMN5JVUKUjs.getTmpFile; exports.getVersion = _chunkMN5JVUKUjs.getVersion; exports.groupedActionDumpFileExt = _chunkMN5JVUKUjs.groupedActionDumpFileExt; exports.insightDumpFileExt = _chunkMN5JVUKUjs.insightDumpFileExt; exports.overlapped = _chunkMN5JVUKUjs.overlapped; exports.replaceStringWithFirstAppearance = _chunkMN5JVUKUjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunkMN5JVUKUjs.replacerForPageObject; exports.reportHTMLContent = _chunkMN5JVUKUjs.reportHTMLContent; exports.setLogDir = _chunkMN5JVUKUjs.setLogDir; exports.sleep = _chunkMN5JVUKUjs.sleep; exports.stringifyDumpData = _chunkMN5JVUKUjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkMN5JVUKUjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkMN5JVUKUjs.writeDumpReport; exports.writeLogFile = _chunkMN5JVUKUjs.writeLogFile;