npm - @midscene/core - Versions diffs - 0.3.0 → 0.3.1-beta-20240821105917.0 - Mend

@midscene/core 0.3.0 → 0.3.1-beta-20240821105917.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/LICENSE +2 -2
package/dist/es/ai-model.js +6 -5
package/dist/es/image.js +45 -12
package/dist/es/index.js +11 -9
package/dist/lib/ai-model.js +6 -5
package/dist/lib/image.js +35 -5
package/dist/lib/index.js +11 -9
package/dist/types/ai-model.d.ts +3 -3
package/dist/types/image.d.ts +1 -1
package/dist/types/{index-f43935c0.d.ts → index-0479d487.d.ts} +1 -1
package/dist/types/index.d.ts +4 -4
package/dist/types/{types-81f7991c.d.ts → types-3eb61b5c.d.ts} +17 -4
package/dist/types/utils.d.ts +1 -1
package/package.json +8 -7
package/report/index.html +1 -1

package/LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2024-present Midscene.js
+Copyright (c) 2024-present Bytedance, Inc. and its affiliates.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.

package/dist/es/ai-model.js CHANGED Viewed

@@ -4256,7 +4256,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   assert(content, "empty content");
@@ -4615,8 +4616,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -4869,7 +4870,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -4879,7 +4880,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {

package/dist/es/image.js CHANGED Viewed

@@ -1,6 +1,23 @@
+var __defProp = Object.defineProperty;
+var __getOwnPropSymbols = Object.getOwnPropertySymbols;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __propIsEnum = Object.prototype.propertyIsEnumerable;
+var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
+var __spreadValues = (a, b) => {
+  for (var prop in b || (b = {}))
+    if (__hasOwnProp.call(b, prop))
+      __defNormalProp(a, prop, b[prop]);
+  if (__getOwnPropSymbols)
+    for (var prop of __getOwnPropSymbols(b)) {
+      if (__propIsEnum.call(b, prop))
+        __defNormalProp(a, prop, b[prop]);
+    }
+  return a;
+};
 // src/image/info.ts
 import assert from "assert";
-import { Buffer } from "buffer";
+import { Buffer as Buffer2 } from "buffer";
 import { readFileSync } from "fs";
 import Sharp from "sharp";
 async function imageInfo(image) {
@@ -10,7 +27,7 @@ async function imageInfo(image) {
 }
 async function imageInfoOfBase64(imageBase64) {
   const base64Data = imageBase64.replace(/^data:image\/\w+;base64,/, "");
-  return imageInfo(Buffer.from(base64Data, "base64"));
+  return imageInfo(Buffer2.from(base64Data, "base64"));
 }
 function base64Encoded(image, withHeader = true) {
   const imageBuffer = readFileSync(image);
@@ -27,12 +44,12 @@ function base64Encoded(image, withHeader = true) {
 }
 // src/image/transform.ts
-import { Buffer as Buffer2 } from "buffer";
+import { Buffer as Buffer3 } from "buffer";
 import Sharp2 from "sharp";
 async function saveBase64Image(options) {
   const { base64Data, outputPath } = options;
   const base64Image = base64Data.split(";base64,").pop() || base64Data;
-  const imageBuffer = Buffer2.from(base64Image, "base64");
+  const imageBuffer = Buffer3.from(base64Image, "base64");
   await Sharp2(imageBuffer).toFile(outputPath);
   console.log("Image successfully written to file.");
 }
@@ -44,7 +61,7 @@ async function transformImgPathToBase64(inputPath) {
 }
 async function resizeImg(base64Data) {
   const base64Image = base64Data.split(";base64,").pop() || base64Data;
-  const imageBuffer = Buffer2.from(base64Image, "base64");
+  const imageBuffer = Buffer3.from(base64Image, "base64");
   const metadata = await Sharp2(imageBuffer).metadata();
   const { width, height } = metadata;
   if (!width || !height) {
@@ -99,26 +116,42 @@ async function alignCoordByTrim(image, centerRect) {
   if (!(imgInfo == null ? void 0 : imgInfo.width) || !imgInfo.height || imgInfo.width <= 3 || imgInfo.height <= 3) {
     return centerRect;
   }
+  const zeroSize = {
+    left: 0,
+    top: 0,
+    width: -1,
+    height: -1
+  };
+  const finalCenterRect = __spreadValues({}, centerRect);
+  if (centerRect.left > imgInfo.width || centerRect.top > imgInfo.height) {
+    return zeroSize;
+  }
+  if (centerRect.left + centerRect.width > imgInfo.width) {
+    finalCenterRect.width = imgInfo.width - centerRect.left;
+  }
+  if (centerRect.top + centerRect.height > imgInfo.height) {
+    finalCenterRect.height = imgInfo.height - centerRect.top;
+  }
   try {
-    const img = await Sharp2(image).extract(centerRect).toBuffer();
+    const img = await Sharp2(image).extract(finalCenterRect).toBuffer();
     const trimInfo = await trimImage(img);
     if (!trimInfo) {
-      return centerRect;
+      return finalCenterRect;
     }
     return {
-      left: centerRect.left - trimInfo.trimOffsetLeft,
-      top: centerRect.top - trimInfo.trimOffsetTop,
+      left: finalCenterRect.left - trimInfo.trimOffsetLeft,
+      top: finalCenterRect.top - trimInfo.trimOffsetTop,
       width: trimInfo.width,
       height: trimInfo.height
     };
   } catch (e) {
-    console.log(imgInfo);
+    console.warn(imgInfo, finalCenterRect);
     throw e;
   }
 }
 // src/image/visualization.ts
-import { Buffer as Buffer3 } from "buffer";
+import { Buffer as Buffer4 } from "buffer";
 // src/utils.ts
 import assert2 from "assert";
@@ -260,7 +293,7 @@ async function composeSectionDiagram(sections, context) {
         ${rects.join("\n")}
         </svg>
     `;
-  const svgBuffer = Buffer3.from(rectangles);
+  const svgBuffer = Buffer4.from(rectangles);
   const file = getTmpFile("png");
   await Sharp3({
     create: {

package/dist/es/index.js CHANGED Viewed

@@ -1228,7 +1228,7 @@ var Executor = class {
           returnValue = await task.executor(param, executorContext);
         }
         Object.assign(task, returnValue);
-        task.status = "success";
+        task.status = "finished";
         task.timing.end = Date.now();
         task.timing.cost = task.timing.end - task.timing.start;
         taskIndex++;
@@ -1247,12 +1247,13 @@ var Executor = class {
     }
     if (successfullyCompleted) {
       this.status = "completed";
-      if (this.tasks.length) {
-        return this.tasks[this.tasks.length - 1].output;
-      }
     } else {
       this.status = "error";
     }
+    if (this.tasks.length) {
+      const outputIndex = Math.min(taskIndex, this.tasks.length - 1);
+      return this.tasks[outputIndex].output;
+    }
   }
   isInErrorState() {
     return this.status === "error";
@@ -4547,7 +4548,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   assert3(content, "empty content");
@@ -4926,8 +4928,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -5180,7 +5182,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -5190,7 +5192,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {

package/dist/lib/ai-model.js CHANGED Viewed

@@ -4276,7 +4276,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   (0, import_node_assert.default)(content, "empty content");
@@ -4630,8 +4631,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -4884,7 +4885,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -4894,7 +4895,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {

package/dist/lib/image.js CHANGED Viewed

@@ -3,8 +3,22 @@ var __create = Object.create;
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getOwnPropSymbols = Object.getOwnPropertySymbols;
 var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __propIsEnum = Object.prototype.propertyIsEnumerable;
+var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
+var __spreadValues = (a, b) => {
+  for (var prop in b || (b = {}))
+    if (__hasOwnProp.call(b, prop))
+      __defNormalProp(a, prop, b[prop]);
+  if (__getOwnPropSymbols)
+    for (var prop of __getOwnPropSymbols(b)) {
+      if (__propIsEnum.call(b, prop))
+        __defNormalProp(a, prop, b[prop]);
+    }
+  return a;
+};
 var __export = (target, all) => {
   for (var name in all)
     __defProp(target, name, { get: all[name], enumerable: true });
@@ -144,20 +158,36 @@ async function alignCoordByTrim(image, centerRect) {
   if (!(imgInfo == null ? void 0 : imgInfo.width) || !imgInfo.height || imgInfo.width <= 3 || imgInfo.height <= 3) {
     return centerRect;
   }
+  const zeroSize = {
+    left: 0,
+    top: 0,
+    width: -1,
+    height: -1
+  };
+  const finalCenterRect = __spreadValues({}, centerRect);
+  if (centerRect.left > imgInfo.width || centerRect.top > imgInfo.height) {
+    return zeroSize;
+  }
+  if (centerRect.left + centerRect.width > imgInfo.width) {
+    finalCenterRect.width = imgInfo.width - centerRect.left;
+  }
+  if (centerRect.top + centerRect.height > imgInfo.height) {
+    finalCenterRect.height = imgInfo.height - centerRect.top;
+  }
   try {
-    const img = await (0, import_sharp2.default)(image).extract(centerRect).toBuffer();
+    const img = await (0, import_sharp2.default)(image).extract(finalCenterRect).toBuffer();
     const trimInfo = await trimImage(img);
     if (!trimInfo) {
-      return centerRect;
+      return finalCenterRect;
     }
     return {
-      left: centerRect.left - trimInfo.trimOffsetLeft,
-      top: centerRect.top - trimInfo.trimOffsetTop,
+      left: finalCenterRect.left - trimInfo.trimOffsetLeft,
+      top: finalCenterRect.top - trimInfo.trimOffsetTop,
       width: trimInfo.width,
       height: trimInfo.height
     };
   } catch (e) {
-    console.log(imgInfo);
+    console.warn(imgInfo, finalCenterRect);
     throw e;
   }
 }

package/dist/lib/index.js CHANGED Viewed

@@ -1244,7 +1244,7 @@ var Executor = class {
           returnValue = await task.executor(param, executorContext);
         }
         Object.assign(task, returnValue);
-        task.status = "success";
+        task.status = "finished";
         task.timing.end = Date.now();
         task.timing.cost = task.timing.end - task.timing.start;
         taskIndex++;
@@ -1263,12 +1263,13 @@ var Executor = class {
     }
     if (successfullyCompleted) {
       this.status = "completed";
-      if (this.tasks.length) {
-        return this.tasks[this.tasks.length - 1].output;
-      }
     } else {
       this.status = "error";
     }
+    if (this.tasks.length) {
+      const outputIndex = Math.min(taskIndex, this.tasks.length - 1);
+      return this.tasks[outputIndex].output;
+    }
   }
   isInErrorState() {
     return this.status === "error";
@@ -4563,7 +4564,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   (0, import_node_assert3.default)(content, "empty content");
@@ -4942,8 +4944,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -5196,7 +5198,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -5206,7 +5208,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {

package/dist/types/ai-model.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import { ChatCompletionMessageParam } from 'openai/resources';
 export { ChatCompletionMessageParam } from 'openai/resources';
-import { c as callAiFn } from './index-f43935c0.js';
-export { d as describeUserPage, p as plan } from './index-f43935c0.js';
-import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-81f7991c.js';
+import { c as callAiFn } from './index-0479d487.js';
+export { d as describeUserPage, p as plan } from './index-0479d487.js';
+import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-3eb61b5c.js';
 declare function AiInspectElement<ElementType extends BaseElement = BaseElement>(options: {
     context: UIContext<ElementType>;

package/dist/types/image.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Buffer } from 'node:buffer';
-import { S as Size, R as Rect, h as UISection, U as UIContext, y as Color } from './types-81f7991c.js';
+import { S as Size, R as Rect, h as UISection, U as UIContext, G as Color } from './types-3eb61b5c.js';
 import 'openai/resources';
 /**

package/dist/types/{index-f43935c0.d.ts → index-0479d487.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { B as BaseElement, U as UIContext, q as PlanningAction } from './types-81f7991c.js';
+import { B as BaseElement, U as UIContext, r as PlanningAction } from './types-3eb61b5c.js';
 import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
 type AIArgs = [

package/dist/types/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-81f7991c.js';
-export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, z as BaseAgentParserOpt, j as BasicSectionQuery, C as CallAIFn, y as Color, l as DumpMeta, p as ElementById, i as EnsureObject, H as ExecutionRecorderItem, a3 as ExecutionTaskAction, a2 as ExecutionTaskActionApply, a1 as ExecutionTaskInsightAssertion, a0 as ExecutionTaskInsightAssertionApply, $ as ExecutionTaskInsightAssertionParam, Q as ExecutionTaskInsightDumpLog, W as ExecutionTaskInsightLocate, V as ExecutionTaskInsightLocateApply, O as ExecutionTaskInsightLocateOutput, N as ExecutionTaskInsightLocateParam, _ as ExecutionTaskInsightQuery, Z as ExecutionTaskInsightQueryApply, Y as ExecutionTaskInsightQueryOutput, X as ExecutionTaskInsightQueryParam, a5 as ExecutionTaskPlanning, a4 as ExecutionTaskPlanningApply, M as ExecutionTaskReturn, J as ExecutionTaskType, K as ExecutorContext, a6 as GroupedActionDump, n as InsightDump, k as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, r as PlanningAIResponse, q as PlanningAction, w as PlanningActionParamAssert, t as PlanningActionParamHover, u as PlanningActionParamInputOrKeyPress, v as PlanningActionParamScroll, x as PlanningActionParamSleep, s as PlanningActionParamTap, G as PlaywrightParserOpt, P as Point, F as PuppeteerParserOpt, R as Rect, m as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, h as UISection } from './types-81f7991c.js';
-import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-f43935c0.js';
-export { p as plan } from './index-f43935c0.js';
+import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-3eb61b5c.js';
+export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, q as AgentWaitForOpt, H as BaseAgentParserOpt, j as BasicSectionQuery, C as CallAIFn, G as Color, l as DumpMeta, p as ElementById, i as EnsureObject, M as ExecutionRecorderItem, a6 as ExecutionTaskAction, a5 as ExecutionTaskActionApply, a4 as ExecutionTaskInsightAssertion, a3 as ExecutionTaskInsightAssertionApply, a2 as ExecutionTaskInsightAssertionParam, X as ExecutionTaskInsightDumpLog, Z as ExecutionTaskInsightLocate, Y as ExecutionTaskInsightLocateApply, W as ExecutionTaskInsightLocateOutput, V as ExecutionTaskInsightLocateParam, a1 as ExecutionTaskInsightQuery, a0 as ExecutionTaskInsightQueryApply, $ as ExecutionTaskInsightQueryOutput, _ as ExecutionTaskInsightQueryParam, a8 as ExecutionTaskPlanning, a7 as ExecutionTaskPlanningApply, Q as ExecutionTaskReturn, N as ExecutionTaskType, O as ExecutorContext, a9 as GroupedActionDump, n as InsightDump, k as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, s as PlanningAIResponse, r as PlanningAction, x as PlanningActionParamAssert, z as PlanningActionParamError, u as PlanningActionParamHover, v as PlanningActionParamInputOrKeyPress, w as PlanningActionParamScroll, y as PlanningActionParamSleep, t as PlanningActionParamTap, F as PlanningActionParamWaitFor, K as PlaywrightParserOpt, P as Point, J as PuppeteerParserOpt, R as Rect, m as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, h as UISection } from './types-3eb61b5c.js';
+import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-0479d487.js';
+export { p as plan } from './index-0479d487.js';
 export { setLogDir } from './utils.js';
 import 'openai/resources';

package/dist/types/{types-81f7991c.d.ts → types-3eb61b5c.d.ts} RENAMED Viewed

@@ -10,7 +10,7 @@ interface Size {
 }
 type Rect = Point & Size;
 declare enum NodeType {
-    INPUT = "INPUT Node",
+    FORM_ITEM = "FORM_ITEM Node",
     BUTTON = "BUTTON Node",
     IMG = "IMG Node",
     TEXT = "TEXT Node"
@@ -122,13 +122,20 @@ interface LiteUISection {
 }
 type ElementById = (id: string) => BaseElement | null;
 type InsightAssertionResponse = AIAssertionResponse;
+/**
+ * agent
+ */
+interface AgentWaitForOpt {
+    checkIntervalMs?: number;
+    timeoutMs?: number;
+}
 /**
  * planning
  *
  */
 interface PlanningAction<ParamType = any> {
     thought?: string;
-    type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'Sleep';
+    type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'AssertWithoutThrow' | 'Sleep';
     param: ParamType;
 }
 interface PlanningAIResponse {
@@ -150,6 +157,12 @@ interface PlanningActionParamAssert {
 interface PlanningActionParamSleep {
     timeMs: number;
 }
+interface PlanningActionParamError {
+    thought: string;
+}
+type PlanningActionParamWaitFor = AgentWaitForOpt & {
+    assertion: string;
+};
 /**
  * misc
  */
@@ -191,7 +204,7 @@ interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
     cache?: TaskCacheInfo;
 }
 type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
-    status: 'pending' | 'running' | 'success' | 'failed' | 'cancelled';
+    status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
     error?: string;
     errorStack?: string;
     timing?: {
@@ -243,4 +256,4 @@ interface GroupedActionDump {
     executions: ExecutionDump[];
 }
-export { type ExecutionTaskInsightAssertionParam as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PuppeteerParserOpt as F, type PlaywrightParserOpt as G, type ExecutionRecorderItem as H, type InsightTaskInfo as I, type ExecutionTaskType as J, type ExecutorContext as K, type LiteUISection as L, type ExecutionTaskReturn as M, type ExecutionTaskInsightLocateParam as N, type ExecutionTaskInsightLocateOutput as O, type Point as P, type ExecutionTaskInsightDumpLog as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocateApply as V, type ExecutionTaskInsightLocate as W, type ExecutionTaskInsightQueryParam as X, type ExecutionTaskInsightQueryOutput as Y, type ExecutionTaskInsightQueryApply as Z, type ExecutionTaskInsightQuery as _, type ExecutionTaskApply as a, type ExecutionTaskInsightAssertionApply as a0, type ExecutionTaskInsightAssertion as a1, type ExecutionTaskActionApply as a2, type ExecutionTaskAction as a3, type ExecutionTaskPlanningApply as a4, type ExecutionTaskPlanning as a5, type GroupedActionDump as a6, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type AIAssertionResponse as g, type UISection as h, type EnsureObject as i, type BasicSectionQuery as j, type InsightExtractParam as k, type DumpMeta as l, type ReportDumpWithAttributes as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type PlanningAction as q, type PlanningAIResponse as r, type PlanningActionParamTap as s, type PlanningActionParamHover as t, type PlanningActionParamInputOrKeyPress as u, type PlanningActionParamScroll as v, type PlanningActionParamAssert as w, type PlanningActionParamSleep as x, type Color as y, type BaseAgentParserOpt as z };
+export { type ExecutionTaskInsightQueryOutput as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PlanningActionParamWaitFor as F, type Color as G, type BaseAgentParserOpt as H, type InsightTaskInfo as I, type PuppeteerParserOpt as J, type PlaywrightParserOpt as K, type LiteUISection as L, type ExecutionRecorderItem as M, type ExecutionTaskType as N, type ExecutorContext as O, type Point as P, type ExecutionTaskReturn as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocateParam as V, type ExecutionTaskInsightLocateOutput as W, type ExecutionTaskInsightDumpLog as X, type ExecutionTaskInsightLocateApply as Y, type ExecutionTaskInsightLocate as Z, type ExecutionTaskInsightQueryParam as _, type ExecutionTaskApply as a, type ExecutionTaskInsightQueryApply as a0, type ExecutionTaskInsightQuery as a1, type ExecutionTaskInsightAssertionParam as a2, type ExecutionTaskInsightAssertionApply as a3, type ExecutionTaskInsightAssertion as a4, type ExecutionTaskActionApply as a5, type ExecutionTaskAction as a6, type ExecutionTaskPlanningApply as a7, type ExecutionTaskPlanning as a8, type GroupedActionDump as a9, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type AIAssertionResponse as g, type UISection as h, type EnsureObject as i, type BasicSectionQuery as j, type InsightExtractParam as k, type DumpMeta as l, type ReportDumpWithAttributes as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type AgentWaitForOpt as q, type PlanningAction as r, type PlanningAIResponse as s, type PlanningActionParamTap as t, type PlanningActionParamHover as u, type PlanningActionParamInputOrKeyPress as v, type PlanningActionParamScroll as w, type PlanningActionParamAssert as x, type PlanningActionParamSleep as y, type PlanningActionParamError as z };

package/dist/types/utils.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { m as ReportDumpWithAttributes, R as Rect } from './types-81f7991c.js';
+import { m as ReportDumpWithAttributes, R as Rect } from './types-3eb61b5c.js';
 import 'openai/resources';
 interface PkgInfo {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@midscene/core",
   "description": "Hello, It's Midscene",
-  "version": "0.3.0",
+  "version": "0.3.1-beta-20240821105917.0",
   "jsnext:source": "./src/index.ts",
   "main": "./dist/lib/index.js",
   "module": "./dist/es/index.js",
@@ -60,18 +60,19 @@
     }
   },
   "dependencies": {
+    "node-fetch": "2.6.7",
     "openai": "4.47.1",
-    "sharp": "0.33.3",
-    "node-fetch": "2.6.7"
+    "optional": "0.1.4",
+    "sharp": "0.33.3"
   },
   "devDependencies": {
-    "@types/node-fetch": "2.6.11",
     "@modern-js/module-tools": "^2.56.1",
     "@types/node": "^18.0.0",
+    "@types/node-fetch": "2.6.11",
+    "dotenv": "16.4.5",
     "langsmith": "0.1.36",
     "typescript": "~5.0.4",
-    "vitest": "^1.6.0",
-    "dotenv": "16.4.5"
+    "vitest": "^1.6.0"
   },
   "engines": {
     "node": ">=16.0.0"
@@ -88,6 +89,6 @@
     "new": "modern new",
     "upgrade": "modern upgrade",
     "test": "vitest --run",
-    "test:all": "AITEST=true vitest --run"
+    "test:ai": "AITEST=true npm run test"
   }
 }