npm - @midscene/core - Versions diffs - 0.10.1 → 0.10.2 - Mend

@midscene/core 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/lib/ai-model.js +2 -2
package/dist/lib/{chunk-CERQVVPJ.js → chunk-2SOSTOJY.js} +43 -137
package/dist/lib/{chunk-VPW777AD.js → chunk-MTBFUT2H.js} +1 -1
package/dist/lib/index.js +13 -13
package/dist/lib/types/ai-model.d.ts +5 -46
package/dist/lib/types/index.d.ts +5 -4
package/dist/lib/types/{llm-planning-ca109221.d.ts → llm-planning-373f78e9.d.ts} +1 -1
package/dist/lib/types/{types-64c4d87b.d.ts → types-7fe32cfe.d.ts} +1 -1
package/dist/lib/types/utils.d.ts +1 -1
package/dist/lib/utils.js +2 -2
package/package.json +8 -5
package/report/index.html +1 -1

package/dist/lib/ai-model.js CHANGED Viewed

@@ -9,7 +9,7 @@
-var _chunkCERQVVPJjs = require('./chunk-CERQVVPJ.js');
+var _chunk2SOSTOJYjs = require('./chunk-2SOSTOJY.js');
 require('./chunk-JP3JBDZS.js');
 require('./chunk-YSQDPG26.js');
@@ -23,4 +23,4 @@ require('./chunk-YSQDPG26.js');
-exports.AiAssert = _chunkCERQVVPJjs.AiAssert; exports.AiExtractElementInfo = _chunkCERQVVPJjs.AiExtractElementInfo; exports.AiInspectElement = _chunkCERQVVPJjs.AiInspectElement; exports.callAiFn = _chunkCERQVVPJjs.callAiFn; exports.callToGetJSONObject = _chunkCERQVVPJjs.callToGetJSONObject; exports.describeUserPage = _chunkCERQVVPJjs.describeUserPage; exports.plan = _chunkCERQVVPJjs.plan; exports.systemPromptToLocateElement = _chunkCERQVVPJjs.systemPromptToLocateElement; exports.transformElementPositionToId = _chunkCERQVVPJjs.transformElementPositionToId; exports.vlmPlanning = _chunkCERQVVPJjs.vlmPlanning;
+exports.AiAssert = _chunk2SOSTOJYjs.AiAssert; exports.AiExtractElementInfo = _chunk2SOSTOJYjs.AiExtractElementInfo; exports.AiInspectElement = _chunk2SOSTOJYjs.AiInspectElement; exports.callAiFn = _chunk2SOSTOJYjs.callAiFn; exports.callToGetJSONObject = _chunk2SOSTOJYjs.callToGetJSONObject; exports.describeUserPage = _chunk2SOSTOJYjs.describeUserPage; exports.plan = _chunk2SOSTOJYjs.plan; exports.systemPromptToLocateElement = _chunk2SOSTOJYjs.systemPromptToLocateElement; exports.transformElementPositionToId = _chunk2SOSTOJYjs.transformElementPositionToId; exports.vlmPlanning = _chunk2SOSTOJYjs.vlmPlanning;

package/dist/lib/{chunk-CERQVVPJ.js → chunk-2SOSTOJY.js} RENAMED Viewed

@@ -1147,125 +1147,6 @@ call_user() # Submit the task and call the user when the task is unsolvable, or
 ## User Instruction
 `;
 var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
-function parseActionFromVlm(text, factor = 1e3, mode = "bc") {
-  let reflection = null;
-  let thought = null;
-  let actionStr = "";
-  text = text.trim();
-  if (mode === "bc") {
-    if (text.startsWith("Thought:")) {
-      const thoughtMatch = text.match(/Thought: (.+?)(?=\s*Action:|$)/s);
-      if (thoughtMatch) {
-        thought = thoughtMatch[1].trim();
-      }
-    } else if (text.startsWith("Reflection:")) {
-      const reflectionMatch = text.match(
-        /Reflection: (.+?)Action_Summary: (.+?)(?=\s*Action:|$)/
-      );
-      if (reflectionMatch) {
-        thought = reflectionMatch[2].trim();
-        reflection = reflectionMatch[1].trim();
-      }
-    } else if (text.startsWith("Action_Summary:")) {
-      const summaryMatch = text.match(/Action_Summary: (.+?)(?=\s*Action:|$)/);
-      if (summaryMatch) {
-        thought = summaryMatch[1].trim();
-      }
-    }
-    if (!text.includes("Action:")) {
-      actionStr = text;
-    } else {
-      const actionParts = text.split("Action:");
-      actionStr = actionParts[actionParts.length - 1];
-    }
-  } else if (mode === "o1") {
-    const thoughtMatch = text.match(/<Thought>\s*(.*?)\s*<\/Thought>/);
-    const actionSummaryMatch = text.match(
-      /\nAction_Summary:\s*(.*?)\s*Action:/
-    );
-    const actionMatch = text.match(/\nAction:\s*(.*?)\s*<\/Output>/);
-    const thoughtContent = thoughtMatch ? thoughtMatch[1] : null;
-    const actionSummaryContent = actionSummaryMatch ? actionSummaryMatch[1] : null;
-    const actionContent = actionMatch ? actionMatch[1] : null;
-    thought = `${thoughtContent}
-<Action_Summary>
-${actionSummaryContent}`;
-    actionStr = actionContent || "";
-  }
-  const allActions = actionStr.split("\n\n");
-  const actions = [];
-  for (const rawStr of allActions) {
-    const actionInstance = parseAction(rawStr.replace(/\n/g, "\\n").trim());
-    if (!actionInstance) {
-      console.log(`Action can't parse: ${rawStr}`);
-      continue;
-    }
-    const actionType = actionInstance.function;
-    const params = actionInstance.args;
-    const actionInputs = {};
-    for (const [paramName, param] of Object.entries(params)) {
-      if (!param)
-        continue;
-      const trimmedParam = param.trim();
-      actionInputs[paramName.trim()] = trimmedParam;
-      if (paramName.includes("start_box") || paramName.includes("end_box")) {
-        const oriBox = trimmedParam;
-        const numbers = oriBox.replace(/[()]/g, "").split(",");
-        const floatNumbers = numbers.map(
-          (num) => Number.parseFloat(num) / factor
-        );
-        if (floatNumbers.length === 2) {
-          floatNumbers.push(floatNumbers[0], floatNumbers[1]);
-        }
-        actionInputs[paramName.trim()] = JSON.stringify(floatNumbers);
-      }
-    }
-    if (actionType === "finished") {
-      actions.push({
-        reflection,
-        thought,
-        action_type: "finished",
-        action_inputs: {}
-      });
-    } else {
-      actions.push({
-        reflection,
-        thought,
-        action_type: actionType,
-        action_inputs: actionInputs
-      });
-    }
-  }
-  return actions;
-}
-function parseAction(actionStr) {
-  try {
-    const functionPattern = /^(\w+)\((.*)\)$/;
-    const match = actionStr.trim().match(functionPattern);
-    if (!match) {
-      throw new Error("Not a function call");
-    }
-    const [_, functionName, argsStr] = match;
-    const kwargs = {};
-    if (argsStr.trim()) {
-      const argPairs = argsStr.match(/([^,']|'[^']*')+/g) || [];
-      for (const pair of argPairs) {
-        const [key, ...valueParts] = pair.split("=");
-        if (!key)
-          continue;
-        const value = valueParts.join("=").trim().replace(/^['"]|['"]$/g, "");
-        kwargs[key.trim()] = value;
-      }
-    }
-    return {
-      function: functionName,
-      args: kwargs
-    };
-  } catch (e) {
-    console.error(`Failed to parse action '${actionStr}': ${e}`);
-    return null;
-  }
-}
 // src/ai-model/prompt/ui-tars-locator.ts
 function systemPromptToLocateElementPosition() {
@@ -2638,9 +2519,9 @@ async function plan(userPrompt, opts) {
 }
 // src/ai-model/ui-tars-planning.ts
-function capitalize(str) {
-  return str.charAt(0).toUpperCase() + str.slice(1);
-}
+var _keyboardlayout = require('@midscene/shared/keyboard-layout');
+var _actionparser = require('@ui-tars/action-parser');
 async function vlmPlanning(options) {
   const { conversationHistory, userInstruction, size } = options;
   const systemPrompt = uiTarsPlanningPrompt + userInstruction;
@@ -2654,10 +2535,14 @@ async function vlmPlanning(options) {
     ],
     1 /* INSPECT_ELEMENT */
   );
-  const actions = parseActionFromVlm(res.content);
+  const { parsed } = _actionparser.actionParser.call(void 0, {
+    prediction: res.content,
+    factor: 1e3
+  });
   const transformActions = [];
-  actions.forEach((action) => {
+  parsed.forEach((action) => {
     if (action.action_type === "click") {
+      _assert2.default.call(void 0, action.action_inputs.start_box, "start_box is required");
       const point = getPoint(action.action_inputs.start_box, size);
       transformActions.push({
         type: "Locate",
@@ -2678,6 +2563,20 @@ async function vlmPlanning(options) {
         },
         param: action.thought || ""
       });
+    } else if (action.action_type === "drag") {
+      _assert2.default.call(void 0, action.action_inputs.start_box, "start_box is required");
+      _assert2.default.call(void 0, action.action_inputs.end_box, "end_box is required");
+      const startPoint = getPoint(action.action_inputs.start_box, size);
+      const endPoint = getPoint(action.action_inputs.end_box, size);
+      transformActions.push({
+        type: "Drag",
+        param: {
+          start_box: { x: startPoint[0], y: startPoint[1] },
+          end_box: { x: endPoint[0], y: endPoint[1] }
+        },
+        locate: null,
+        thought: action.thought || ""
+      });
     } else if (action.action_type === "type") {
       transformActions.push({
         type: "Input",
@@ -2704,31 +2603,38 @@ async function vlmPlanning(options) {
         thought: action.thought || ""
       });
     } else if (action.action_type === "hotkey") {
-      const keys = action.action_inputs.key.split(",");
-      for (const key of keys) {
-        transformActions.push({
-          type: "KeyboardPress",
-          param: {
-            value: capitalize(key)
-          },
-          locate: null,
-          thought: action.thought || ""
-        });
-      }
+      _assert2.default.call(void 0, action.action_inputs.key, "key is required");
+      const keys = _keyboardlayout.transformHotkeyInput.call(void 0, action.action_inputs.key);
+      transformActions.push({
+        type: "KeyboardPress",
+        param: {
+          value: keys
+        },
+        locate: null,
+        thought: action.thought || ""
+      });
     } else if (action.action_type === "wait") {
       transformActions.push({
         type: "Sleep",
         param: {
-          timeMs: action.action_inputs.time
+          timeMs: 1e3
         },
         locate: null,
         thought: action.thought || ""
       });
     }
   });
+  if (transformActions.length === 0) {
+    throw new Error("No actions found", {
+      cause: {
+        prediction: res.content,
+        parsed
+      }
+    });
+  }
   return {
     actions: transformActions,
-    realActions: actions,
+    realActions: parsed,
     action_summary: getSummary(res.content)
   };
 }

package/dist/lib/{chunk-VPW777AD.js → chunk-MTBFUT2H.js} RENAMED Viewed

@@ -188,7 +188,7 @@ function stringifyDumpData(data, indents) {
   return JSON.stringify(data, replacerForPageObject, indents);
 }
 function getVersion() {
-  return "0.10.1";
+  return "0.10.2";
 }
 function debugLog(...message) {
   const debugMode = _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_DEBUG_MODE);

package/dist/lib/index.js CHANGED Viewed

@@ -6,7 +6,7 @@
-var _chunkVPW777ADjs = require('./chunk-VPW777AD.js');
+var _chunkMTBFUT2Hjs = require('./chunk-MTBFUT2H.js');
@@ -17,7 +17,7 @@ var _chunkVPW777ADjs = require('./chunk-VPW777AD.js');
-var _chunkCERQVVPJjs = require('./chunk-CERQVVPJ.js');
+var _chunk2SOSTOJYjs = require('./chunk-2SOSTOJY.js');
@@ -169,7 +169,7 @@ ${(_b = this.latestErrorTask()) == null ? void 0 : _b.errorStack}`
   }
   dump() {
     const dumpData = {
-      sdkVersion: _chunkVPW777ADjs.getVersion.call(void 0, ),
+      sdkVersion: _chunkMTBFUT2Hjs.getVersion.call(void 0, ),
       model_name: _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_MODEL_NAME) || "",
       logTime: Date.now(),
       name: this.name,
@@ -192,14 +192,14 @@ var logFileName = "";
 var logContent = [];
 var logIdIndexMap = {};
 var { pid } = process;
-var logFileExt = _chunkVPW777ADjs.insightDumpFileExt;
+var logFileExt = _chunkMTBFUT2Hjs.insightDumpFileExt;
 var ifInBrowser = typeof window !== "undefined";
 function writeInsightDump(data, logId, dumpSubscriber) {
-  const logDir = _chunkVPW777ADjs.getLogDir.call(void 0, );
+  const logDir = _chunkMTBFUT2Hjs.getLogDir.call(void 0, );
   _assert2.default.call(void 0, logDir, "logDir should be set before writing dump file");
   const id = logId || _utils.uuid.call(void 0, );
   const baseData = {
-    sdkVersion: _chunkVPW777ADjs.getVersion.call(void 0, ),
+    sdkVersion: _chunkMTBFUT2Hjs.getVersion.call(void 0, ),
     logTime: Date.now(),
     model_name: _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_MODEL_NAME) || "",
     model_description: _chunkJP3JBDZSjs.getAIConfig.call(void 0, _chunkJP3JBDZSjs.MIDSCENE_USE_VLM_UI_TARS) ? "vlm-ui-tars enabled" : ""
@@ -210,7 +210,7 @@ function writeInsightDump(data, logId, dumpSubscriber) {
     ...data
   };
   dumpSubscriber == null ? void 0 : dumpSubscriber(finalData);
-  const dataString = _chunkVPW777ADjs.stringifyDumpData.call(void 0, finalData, 2);
+  const dataString = _chunkMTBFUT2Hjs.stringifyDumpData.call(void 0, finalData, 2);
   if (typeof logIdIndexMap[id] === "number") {
     logContent[logIdIndexMap[id]] = dataString;
   } else {
@@ -224,7 +224,7 @@ function writeInsightDump(data, logId, dumpSubscriber) {
         logFileName = `${pid}_${baseData.logTime}-${Math.random()}`;
       }
     }
-    _chunkVPW777ADjs.writeLogFile.call(void 0, {
+    _chunkMTBFUT2Hjs.writeLogFile.call(void 0, {
       fileName: logFileName,
       fileExt: logFileExt,
       fileContent: `[
@@ -239,7 +239,7 @@ ${logContent.join(",\n")}
 // src/insight/index.ts
 var Insight = class {
   constructor(context, opt) {
-    this.aiVendorFn = _chunkCERQVVPJjs.callAiFn;
+    this.aiVendorFn = _chunk2SOSTOJYjs.callAiFn;
     _assert2.default.call(void 0, context, "context is required for Insight");
     if (typeof context === "function") {
       this.contextRetrieverFn = context;
@@ -265,7 +265,7 @@ var Insight = class {
     this.onceDumpUpdatedFn = void 0;
     const context = await this.contextRetrieverFn("locate");
     const startTime = Date.now();
-    const { parseResult, elementById, rawResponse, usage } = await _chunkCERQVVPJjs.AiInspectElement.call(void 0, {
+    const { parseResult, elementById, rawResponse, usage } = await _chunk2SOSTOJYjs.AiInspectElement.call(void 0, {
       callAI: callAI || this.aiVendorFn,
       context,
       multi: Boolean(multi),
@@ -348,7 +348,7 @@ ${parseResult.errors.join("\n")}`;
     this.onceDumpUpdatedFn = void 0;
     const context = await this.contextRetrieverFn("extract");
     const startTime = Date.now();
-    const { parseResult, elementById } = await _chunkCERQVVPJjs.AiExtractElementInfo.call(void 0, {
+    const { parseResult, elementById } = await _chunk2SOSTOJYjs.AiExtractElementInfo.call(void 0, {
       context,
       dataQuery: dataDemand
     });
@@ -402,7 +402,7 @@ ${parseResult.errors.join("\n")}`;
     this.onceDumpUpdatedFn = void 0;
     const context = await this.contextRetrieverFn("assert");
     const startTime = Date.now();
-    const assertResult = await _chunkCERQVVPJjs.AiAssert.call(void 0, {
+    const assertResult = await _chunk2SOSTOJYjs.AiAssert.call(void 0, {
       assertion,
       context
     });
@@ -450,4 +450,4 @@ var src_default = Insight;
-exports.AIResponseFormat = _chunkCERQVVPJjs.AIResponseFormat; exports.BaseElement = _chunkCERQVVPJjs.BaseElement; exports.Executor = Executor; exports.Insight = Insight; exports.UIContext = _chunkCERQVVPJjs.UIContext; exports.default = src_default; exports.getLogDirByType = _chunkVPW777ADjs.getLogDirByType; exports.getVersion = _chunkVPW777ADjs.getVersion; exports.plan = _chunkCERQVVPJjs.plan; exports.setLogDir = _chunkVPW777ADjs.setLogDir; exports.transformElementPositionToId = _chunkCERQVVPJjs.transformElementPositionToId;
+exports.AIResponseFormat = _chunk2SOSTOJYjs.AIResponseFormat; exports.BaseElement = _chunk2SOSTOJYjs.BaseElement; exports.Executor = Executor; exports.Insight = Insight; exports.UIContext = _chunk2SOSTOJYjs.UIContext; exports.default = src_default; exports.getLogDirByType = _chunkMTBFUT2Hjs.getLogDirByType; exports.getVersion = _chunkMTBFUT2Hjs.getVersion; exports.plan = _chunk2SOSTOJYjs.plan; exports.setLogDir = _chunkMTBFUT2Hjs.setLogDir; exports.transformElementPositionToId = _chunk2SOSTOJYjs.transformElementPositionToId;

package/dist/lib/types/ai-model.d.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import { g as AIUsageInfo, B as BaseElement, U as UIContext, y as PlanningAction } from './types-64c4d87b.js';
+import { g as AIUsageInfo, B as BaseElement, U as UIContext, y as PlanningAction } from './types-7fe32cfe.js';
 import { ChatCompletionMessageParam } from 'openai/resources';
 export { ChatCompletionMessageParam } from 'openai/resources';
-import { A as AIActionType } from './llm-planning-ca109221.js';
-export { d as AiAssert, b as AiExtractElementInfo, a as AiInspectElement, c as callAiFn, p as plan, t as transformElementPositionToId } from './llm-planning-ca109221.js';
+import { A as AIActionType } from './llm-planning-373f78e9.js';
+export { d as AiAssert, b as AiExtractElementInfo, a as AiInspectElement, c as callAiFn, p as plan, t as transformElementPositionToId } from './llm-planning-373f78e9.js';
+import { actionParser } from '@ui-tars/action-parser';
 import '@midscene/shared/constants';
 declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
@@ -35,7 +36,6 @@ declare function describeUserPage<ElementType extends BaseElement = BaseElement>
     };
 }>;
-type ActionType = 'click' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
 declare function vlmPlanning(options: {
     userInstruction: string;
     conversationHistory: ChatCompletionMessageParam[];
@@ -45,49 +45,8 @@ declare function vlmPlanning(options: {
     };
 }): Promise<{
     actions: PlanningAction<any>[];
-    realActions: Array<Action>;
+    realActions: ReturnType<typeof actionParser>['parsed'];
     action_summary: string;
 }>;
-interface BaseAction {
-    action_type: ActionType;
-    action_inputs: Record<string, any>;
-    reflection: string | null;
-    thought: string | null;
-}
-interface ClickAction extends BaseAction {
-    action_type: 'click';
-    action_inputs: {
-        start_box: string;
-    };
-}
-interface WaitAction extends BaseAction {
-    action_type: 'wait';
-    action_inputs: {
-        time: string;
-    };
-}
-interface TypeAction extends BaseAction {
-    action_type: 'type';
-    action_inputs: {
-        content: string;
-    };
-}
-interface HotkeyAction extends BaseAction {
-    action_type: 'hotkey';
-    action_inputs: {
-        key: string;
-    };
-}
-interface ScrollAction extends BaseAction {
-    action_type: 'scroll';
-    action_inputs: {
-        direction: 'up' | 'down';
-    };
-}
-interface FinishedAction extends BaseAction {
-    action_type: 'finished';
-    action_inputs: Record<string, never>;
-}
-type Action = ClickAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction;
 export { callToGetJSONObject, describeUserPage, systemPromptToLocateElement, vlmPlanning };

package/dist/lib/types/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, f as InsightAssertionResponse, A as AISingleElementResponse } from './types-64c4d87b.js';
-export { n as AIAssertionResponse, k as AIElementIdResponse, l as AIElementResponse, h as AIResponseFormat, m as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, w as AgentAssertOpt, v as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, q as DumpMeta, u as ElementById, o as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, s as InsightDump, p as InsightExtractParam, L as LiteUISection, O as OnTaskStartTip, t as PartialInsightDumpFromSDK, z as PlanningAIResponse, y as PlanningAction, N as PlanningActionParamAssert, T as PlanningActionParamError, J as PlanningActionParamHover, K as PlanningActionParamInputOrKeyPress, G as PlanningActionParamPlan, M as PlanningActionParamScroll, Q as PlanningActionParamSleep, H as PlanningActionParamTap, V as PlanningActionParamWaitFor, F as PlanningFurtherPlan, x as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, r as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-64c4d87b.js';
-import { c as callAiFn } from './llm-planning-ca109221.js';
-export { p as plan, t as transformElementPositionToId } from './llm-planning-ca109221.js';
+import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, f as InsightAssertionResponse, A as AISingleElementResponse } from './types-7fe32cfe.js';
+export { n as AIAssertionResponse, k as AIElementIdResponse, l as AIElementResponse, h as AIResponseFormat, m as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, w as AgentAssertOpt, v as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, q as DumpMeta, u as ElementById, o as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, s as InsightDump, p as InsightExtractParam, L as LiteUISection, O as OnTaskStartTip, t as PartialInsightDumpFromSDK, z as PlanningAIResponse, y as PlanningAction, N as PlanningActionParamAssert, T as PlanningActionParamError, J as PlanningActionParamHover, K as PlanningActionParamInputOrKeyPress, G as PlanningActionParamPlan, M as PlanningActionParamScroll, Q as PlanningActionParamSleep, H as PlanningActionParamTap, V as PlanningActionParamWaitFor, F as PlanningFurtherPlan, x as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, r as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-7fe32cfe.js';
+import { c as callAiFn } from './llm-planning-373f78e9.js';
+export { p as plan, t as transformElementPositionToId } from './llm-planning-373f78e9.js';
 export { getLogDirByType, getVersion, setLogDir } from './utils.js';
 import '@midscene/shared/constants';
 import 'openai/resources';
@@ -32,6 +32,7 @@ interface MidsceneYamlScriptEnv {
   };
   cookie?: string;
   output?: string;
+  trackingActiveTab?: boolean; // if track the newly opened tab, true for default in yaml script
   // bridge mode config
   bridgeMode?: false | 'newTabWithUrl' | 'currentTab';

package/dist/lib/types/{llm-planning-ca109221.d.ts → llm-planning-373f78e9.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { g as AIUsageInfo, l as AIElementResponse, B as BaseElement, U as UIContext, A as AISingleElementResponse, j as AISingleElementResponseByPosition, u as ElementById, m as AISectionParseResponse, n as AIAssertionResponse, z as PlanningAIResponse } from './types-64c4d87b.js';
+import { g as AIUsageInfo, l as AIElementResponse, B as BaseElement, U as UIContext, A as AISingleElementResponse, j as AISingleElementResponseByPosition, u as ElementById, m as AISectionParseResponse, n as AIAssertionResponse, z as PlanningAIResponse } from './types-7fe32cfe.js';
 import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
 type AIArgs = [

package/dist/lib/types/{types-64c4d87b.d.ts → types-7fe32cfe.d.ts} RENAMED Viewed

@@ -161,7 +161,7 @@ interface PlanningLocateParam {
 }
 interface PlanningAction<ParamType = any> {
     thought?: string;
-    type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'FalsyConditionStatement' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished';
+    type: 'Locate' | 'Tap' | 'Drag' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'FalsyConditionStatement' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished';
     param: ParamType;
     locate: PlanningLocateParam | null;
 }

package/dist/lib/types/utils.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as ReportDumpWithAttributes, R as Rect } from './types-64c4d87b.js';
+import { r as ReportDumpWithAttributes, R as Rect } from './types-7fe32cfe.js';
 import '@midscene/shared/constants';
 import 'openai/resources';

package/dist/lib/utils.js CHANGED Viewed

@@ -16,7 +16,7 @@
-var _chunkVPW777ADjs = require('./chunk-VPW777AD.js');
+var _chunkMTBFUT2Hjs = require('./chunk-MTBFUT2H.js');
 require('./chunk-JP3JBDZS.js');
 require('./chunk-YSQDPG26.js');
@@ -37,4 +37,4 @@ require('./chunk-YSQDPG26.js');
-exports.getLogDir = _chunkVPW777ADjs.getLogDir; exports.getLogDirByType = _chunkVPW777ADjs.getLogDirByType; exports.getTmpDir = _chunkVPW777ADjs.getTmpDir; exports.getTmpFile = _chunkVPW777ADjs.getTmpFile; exports.getVersion = _chunkVPW777ADjs.getVersion; exports.groupedActionDumpFileExt = _chunkVPW777ADjs.groupedActionDumpFileExt; exports.insightDumpFileExt = _chunkVPW777ADjs.insightDumpFileExt; exports.overlapped = _chunkVPW777ADjs.overlapped; exports.replaceStringWithFirstAppearance = _chunkVPW777ADjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunkVPW777ADjs.replacerForPageObject; exports.reportHTMLContent = _chunkVPW777ADjs.reportHTMLContent; exports.setLogDir = _chunkVPW777ADjs.setLogDir; exports.sleep = _chunkVPW777ADjs.sleep; exports.stringifyDumpData = _chunkVPW777ADjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkVPW777ADjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkVPW777ADjs.writeDumpReport; exports.writeLogFile = _chunkVPW777ADjs.writeLogFile;
+exports.getLogDir = _chunkMTBFUT2Hjs.getLogDir; exports.getLogDirByType = _chunkMTBFUT2Hjs.getLogDirByType; exports.getTmpDir = _chunkMTBFUT2Hjs.getTmpDir; exports.getTmpFile = _chunkMTBFUT2Hjs.getTmpFile; exports.getVersion = _chunkMTBFUT2Hjs.getVersion; exports.groupedActionDumpFileExt = _chunkMTBFUT2Hjs.groupedActionDumpFileExt; exports.insightDumpFileExt = _chunkMTBFUT2Hjs.insightDumpFileExt; exports.overlapped = _chunkMTBFUT2Hjs.overlapped; exports.replaceStringWithFirstAppearance = _chunkMTBFUT2Hjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunkMTBFUT2Hjs.replacerForPageObject; exports.reportHTMLContent = _chunkMTBFUT2Hjs.reportHTMLContent; exports.setLogDir = _chunkMTBFUT2Hjs.setLogDir; exports.sleep = _chunkMTBFUT2Hjs.sleep; exports.stringifyDumpData = _chunkMTBFUT2Hjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkMTBFUT2Hjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkMTBFUT2Hjs.writeDumpReport; exports.writeLogFile = _chunkMTBFUT2Hjs.writeLogFile;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@midscene/core",
   "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
-  "version": "0.10.1",
+  "version": "0.10.2",
   "repository": "https://github.com/web-infra-dev/midscene",
   "homepage": "https://midscenejs.com/",
   "jsnext:source": "./src/index.ts",
@@ -37,11 +37,12 @@
   },
   "dependencies": {
     "@azure/identity": "4.5.0",
+    "@ui-tars/action-parser": "1.0.1",
     "@anthropic-ai/sdk": "0.33.1",
     "@langchain/core": "0.3.26",
     "socks-proxy-agent": "8.0.4",
     "openai": "4.57.1",
-    "@midscene/shared": "0.10.1"
+    "@midscene/shared": "0.10.2"
   },
   "devDependencies": {
     "@modern-js/module-tools": "2.60.6",
@@ -67,12 +68,14 @@
     "build:watch": "modern build -w",
     "new": "modern new",
     "upgrade": "modern upgrade",
-    "test": "vitest --run -u",
+    "test": "vitest --run",
     "test:ai": "AITEST=true npm run test",
     "computer": "TEST_COMPUTER=true npm run test:ai -- tests/ai/evaluate/computer.test.ts",
+    "test:parse-action": "npm run test:ai -- tests/ai/parse-action.test.ts",
     "evaluate": "npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
     "evaluate:assertion": "npm run test:ai -- tests/ai/evaluate/assertion.test.ts",
-    "prompt": "npm run test:ai -- tests/ai/parse-action.test.ts",
-    "evaluate:update": "UPDATE_AI_DATA=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts"
+    "evaluate:plan": "npm run test:ai -- tests/ai/evaluate/plan/planning.test.ts",
+    "evaluate:update": "UPDATE_AI_DATA=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
+    "prompt": "npm run test:ai -- tests/ai/parse-action.test.ts"
   }
 }