npm - @midscene/core - Versions diffs - 0.3.0 → 0.3.1 - Mend

@midscene/core 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/LICENSE +2 -2
package/dist/es/ai-model.js +7 -11
package/dist/es/image.js +1 -1
package/dist/es/index.js +12 -12
package/dist/es/utils.js +5 -1
package/dist/lib/ai-model.js +6 -10
package/dist/lib/index.js +11 -11
package/dist/lib/utils.js +4 -0
package/dist/types/ai-model.d.ts +3 -3
package/dist/types/image.d.ts +1 -1
package/dist/types/{index-f43935c0.d.ts → index-7a9ec3e1.d.ts} +1 -1
package/dist/types/index.d.ts +4 -4
package/dist/types/{types-81f7991c.d.ts → types-ed68710b.d.ts} +16 -3
package/dist/types/utils.d.ts +1 -1
package/package.json +8 -7
package/report/index.html +1 -1

package/LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2024-present Midscene.js
+Copyright (c) 2024-present Bytedance, Inc. and its affiliates.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.

package/dist/es/ai-model.js CHANGED Viewed

@@ -4256,7 +4256,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   assert(content, "empty content");
@@ -4439,7 +4440,7 @@ import {
   writeFileSync
 } from "fs";
 import { tmpdir } from "os";
-import path, { basename, join } from "path";
+import path, { basename, dirname, join } from "path";
 var logDir = join(process.cwd(), "./midscene_run/");
 // src/image/visualization.ts
@@ -4615,8 +4616,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -4869,7 +4870,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -4879,7 +4880,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {
@@ -4953,11 +4954,6 @@ async function plan(userPrompt, opts, useModel) {
   if (planFromAI.error) {
     throw new Error(planFromAI.error);
   }
-  actions.forEach((task) => {
-    if (task.type === "Error") {
-      throw new Error(task.thought);
-    }
-  });
   return { plans: actions };
 }
 export {

package/dist/es/image.js CHANGED Viewed

@@ -130,7 +130,7 @@ import {
   writeFileSync
 } from "fs";
 import { tmpdir } from "os";
-import path, { basename, join } from "path";
+import path, { basename, dirname, join } from "path";
 var pkg;
 function getPkgInfo() {
   if (pkg) {

package/dist/es/index.js CHANGED Viewed

@@ -1021,7 +1021,7 @@ import {
   writeFileSync
 } from "fs";
 import { tmpdir } from "os";
-import path, { basename, join } from "path";
+import path, { basename, dirname, join } from "path";
 var pkg;
 function getPkgInfo() {
   if (pkg) {
@@ -1106,6 +1106,10 @@ ${logDirName}/dump
     logEnvReady = true;
   }
   const filePath = join(targetDir, `${fileName}.${fileExt}`);
+  const outputResourceDir = dirname(filePath);
+  if (!existsSync(outputResourceDir)) {
+    mkdirSync(outputResourceDir, { recursive: true });
+  }
   writeFileSync(filePath, fileContent);
   if (opts == null ? void 0 : opts.generateReport) {
     return writeDumpReport(fileName, fileContent);
@@ -1228,7 +1232,7 @@ var Executor = class {
           returnValue = await task.executor(param, executorContext);
         }
         Object.assign(task, returnValue);
-        task.status = "success";
+        task.status = "finished";
         task.timing.end = Date.now();
         task.timing.cost = task.timing.end - task.timing.start;
         taskIndex++;
@@ -4547,7 +4551,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   assert3(content, "empty content");
@@ -4926,8 +4931,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -5180,7 +5185,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -5190,7 +5195,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {
@@ -5264,11 +5269,6 @@ async function plan(userPrompt, opts, useModel) {
   if (planFromAI.error) {
     throw new Error(planFromAI.error);
   }
-  actions.forEach((task) => {
-    if (task.type === "Error") {
-      throw new Error(task.thought);
-    }
-  });
   return { plans: actions };
 }

package/dist/es/utils.js CHANGED Viewed

@@ -8,7 +8,7 @@ import {
   writeFileSync
 } from "fs";
 import { tmpdir } from "os";
-import path, { basename, join } from "path";
+import path, { basename, dirname, join } from "path";
 var pkg;
 function getPkgInfo() {
   if (pkg) {
@@ -94,6 +94,10 @@ ${logDirName}/dump
     logEnvReady = true;
   }
   const filePath = join(targetDir, `${fileName}.${fileExt}`);
+  const outputResourceDir = dirname(filePath);
+  if (!existsSync(outputResourceDir)) {
+    mkdirSync(outputResourceDir, { recursive: true });
+  }
   writeFileSync(filePath, fileContent);
   if (opts == null ? void 0 : opts.generateReport) {
     return writeDumpReport(fileName, fileContent);

package/dist/lib/ai-model.js CHANGED Viewed

@@ -4276,7 +4276,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   (0, import_node_assert.default)(content, "empty content");
@@ -4630,8 +4631,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -4884,7 +4885,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -4894,7 +4895,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {
@@ -4968,11 +4969,6 @@ async function plan(userPrompt, opts, useModel) {
   if (planFromAI.error) {
     throw new Error(planFromAI.error);
   }
-  actions.forEach((task) => {
-    if (task.type === "Error") {
-      throw new Error(task.thought);
-    }
-  });
   return { plans: actions };
 }
 // Annotate the CommonJS export names for ESM import in node:

package/dist/lib/index.js CHANGED Viewed

@@ -1122,6 +1122,10 @@ ${logDirName}/dump
     logEnvReady = true;
   }
   const filePath = (0, import_node_path.join)(targetDir, `${fileName}.${fileExt}`);
+  const outputResourceDir = (0, import_node_path.dirname)(filePath);
+  if (!(0, import_node_fs.existsSync)(outputResourceDir)) {
+    (0, import_node_fs.mkdirSync)(outputResourceDir, { recursive: true });
+  }
   (0, import_node_fs.writeFileSync)(filePath, fileContent);
   if (opts == null ? void 0 : opts.generateReport) {
     return writeDumpReport(fileName, fileContent);
@@ -1244,7 +1248,7 @@ var Executor = class {
           returnValue = await task.executor(param, executorContext);
         }
         Object.assign(task, returnValue);
-        task.status = "success";
+        task.status = "finished";
         task.timing.end = Date.now();
         task.timing.cost = task.timing.end - task.timing.start;
         taskIndex++;
@@ -4563,7 +4567,8 @@ async function call(messages, responseFormat) {
   const completion = await openai.chat.completions.create({
     model,
     messages,
-    response_format: { type: responseFormat }
+    response_format: { type: responseFormat },
+    temperature: 0.2
   });
   const { content } = completion.choices[0].message;
   (0, import_node_assert3.default)(content, "empty content");
@@ -4942,8 +4947,8 @@ async function callCozeAi(options) {
   }
   const aiResponse = await completion.json();
   if (aiResponse.code !== 0) {
-    console.error("CozeAI error response", aiResponse);
-    throw new Error("CozeAI error response", aiResponse);
+    console.error("CozeAI error response", aiResponse.msg);
+    throw new Error(`CozeAI error response ${aiResponse.msg}`);
   }
   if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
     console.error("aiResponse", aiResponse);
@@ -5196,7 +5201,7 @@ function systemPromptToTaskPlanning() {
     * param: { timeMs: number }, wait for timeMs milliseconds
   Here is an example of how to decompose a task.
-  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
+  When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
   * Find: 'The search bar'
   * Input: 'Weather in Shanghai'
   * Sleep: 1000
@@ -5206,7 +5211,7 @@ function systemPromptToTaskPlanning() {
   1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
   2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
-  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
+  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
   Return in the following JSON format:
   {
@@ -5280,11 +5285,6 @@ async function plan(userPrompt, opts, useModel) {
   if (planFromAI.error) {
     throw new Error(planFromAI.error);
   }
-  actions.forEach((task) => {
-    if (task.type === "Error") {
-      throw new Error(task.thought);
-    }
-  });
   return { plans: actions };
 }

package/dist/lib/utils.js CHANGED Viewed

@@ -138,6 +138,10 @@ ${logDirName}/dump
     logEnvReady = true;
   }
   const filePath = (0, import_node_path.join)(targetDir, `${fileName}.${fileExt}`);
+  const outputResourceDir = (0, import_node_path.dirname)(filePath);
+  if (!(0, import_node_fs.existsSync)(outputResourceDir)) {
+    (0, import_node_fs.mkdirSync)(outputResourceDir, { recursive: true });
+  }
   (0, import_node_fs.writeFileSync)(filePath, fileContent);
   if (opts == null ? void 0 : opts.generateReport) {
     return writeDumpReport(fileName, fileContent);

package/dist/types/ai-model.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import { ChatCompletionMessageParam } from 'openai/resources';
 export { ChatCompletionMessageParam } from 'openai/resources';
-import { c as callAiFn } from './index-f43935c0.js';
-export { d as describeUserPage, p as plan } from './index-f43935c0.js';
-import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-81f7991c.js';
+import { c as callAiFn } from './index-7a9ec3e1.js';
+export { d as describeUserPage, p as plan } from './index-7a9ec3e1.js';
+import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-ed68710b.js';
 declare function AiInspectElement<ElementType extends BaseElement = BaseElement>(options: {
     context: UIContext<ElementType>;

package/dist/types/image.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Buffer } from 'node:buffer';
-import { S as Size, R as Rect, h as UISection, U as UIContext, y as Color } from './types-81f7991c.js';
+import { S as Size, R as Rect, h as UISection, U as UIContext, G as Color } from './types-ed68710b.js';
 import 'openai/resources';
 /**

package/dist/types/{index-f43935c0.d.ts → index-7a9ec3e1.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { B as BaseElement, U as UIContext, q as PlanningAction } from './types-81f7991c.js';
+import { B as BaseElement, U as UIContext, r as PlanningAction } from './types-ed68710b.js';
 import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
 type AIArgs = [

package/dist/types/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-81f7991c.js';
-export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, z as BaseAgentParserOpt, j as BasicSectionQuery, C as CallAIFn, y as Color, l as DumpMeta, p as ElementById, i as EnsureObject, H as ExecutionRecorderItem, a3 as ExecutionTaskAction, a2 as ExecutionTaskActionApply, a1 as ExecutionTaskInsightAssertion, a0 as ExecutionTaskInsightAssertionApply, $ as ExecutionTaskInsightAssertionParam, Q as ExecutionTaskInsightDumpLog, W as ExecutionTaskInsightLocate, V as ExecutionTaskInsightLocateApply, O as ExecutionTaskInsightLocateOutput, N as ExecutionTaskInsightLocateParam, _ as ExecutionTaskInsightQuery, Z as ExecutionTaskInsightQueryApply, Y as ExecutionTaskInsightQueryOutput, X as ExecutionTaskInsightQueryParam, a5 as ExecutionTaskPlanning, a4 as ExecutionTaskPlanningApply, M as ExecutionTaskReturn, J as ExecutionTaskType, K as ExecutorContext, a6 as GroupedActionDump, n as InsightDump, k as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, r as PlanningAIResponse, q as PlanningAction, w as PlanningActionParamAssert, t as PlanningActionParamHover, u as PlanningActionParamInputOrKeyPress, v as PlanningActionParamScroll, x as PlanningActionParamSleep, s as PlanningActionParamTap, G as PlaywrightParserOpt, P as Point, F as PuppeteerParserOpt, R as Rect, m as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, h as UISection } from './types-81f7991c.js';
-import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-f43935c0.js';
-export { p as plan } from './index-f43935c0.js';
+import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-ed68710b.js';
+export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, q as AgentWaitForOpt, H as BaseAgentParserOpt, j as BasicSectionQuery, C as CallAIFn, G as Color, l as DumpMeta, p as ElementById, i as EnsureObject, M as ExecutionRecorderItem, a6 as ExecutionTaskAction, a5 as ExecutionTaskActionApply, a4 as ExecutionTaskInsightAssertion, a3 as ExecutionTaskInsightAssertionApply, a2 as ExecutionTaskInsightAssertionParam, X as ExecutionTaskInsightDumpLog, Z as ExecutionTaskInsightLocate, Y as ExecutionTaskInsightLocateApply, W as ExecutionTaskInsightLocateOutput, V as ExecutionTaskInsightLocateParam, a1 as ExecutionTaskInsightQuery, a0 as ExecutionTaskInsightQueryApply, $ as ExecutionTaskInsightQueryOutput, _ as ExecutionTaskInsightQueryParam, a8 as ExecutionTaskPlanning, a7 as ExecutionTaskPlanningApply, Q as ExecutionTaskReturn, N as ExecutionTaskType, O as ExecutorContext, a9 as GroupedActionDump, n as InsightDump, k as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, s as PlanningAIResponse, r as PlanningAction, x as PlanningActionParamAssert, z as PlanningActionParamError, u as PlanningActionParamHover, v as PlanningActionParamInputOrKeyPress, w as PlanningActionParamScroll, y as PlanningActionParamSleep, t as PlanningActionParamTap, F as PlanningActionParamWaitFor, K as PlaywrightParserOpt, P as Point, J as PuppeteerParserOpt, R as Rect, m as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, h as UISection } from './types-ed68710b.js';
+import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-7a9ec3e1.js';
+export { p as plan } from './index-7a9ec3e1.js';
 export { setLogDir } from './utils.js';
 import 'openai/resources';

package/dist/types/{types-81f7991c.d.ts → types-ed68710b.d.ts} RENAMED Viewed

@@ -122,13 +122,20 @@ interface LiteUISection {
 }
 type ElementById = (id: string) => BaseElement | null;
 type InsightAssertionResponse = AIAssertionResponse;
+/**
+ * agent
+ */
+interface AgentWaitForOpt {
+    checkIntervalMs?: number;
+    timeoutMs?: number;
+}
 /**
  * planning
  *
  */
 interface PlanningAction<ParamType = any> {
     thought?: string;
-    type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'Sleep';
+    type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'AssertWithoutThrow' | 'Sleep';
     param: ParamType;
 }
 interface PlanningAIResponse {
@@ -150,6 +157,12 @@ interface PlanningActionParamAssert {
 interface PlanningActionParamSleep {
     timeMs: number;
 }
+interface PlanningActionParamError {
+    thought: string;
+}
+type PlanningActionParamWaitFor = AgentWaitForOpt & {
+    assertion: string;
+};
 /**
  * misc
  */
@@ -191,7 +204,7 @@ interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
     cache?: TaskCacheInfo;
 }
 type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
-    status: 'pending' | 'running' | 'success' | 'failed' | 'cancelled';
+    status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
     error?: string;
     errorStack?: string;
     timing?: {
@@ -243,4 +256,4 @@ interface GroupedActionDump {
     executions: ExecutionDump[];
 }
-export { type ExecutionTaskInsightAssertionParam as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PuppeteerParserOpt as F, type PlaywrightParserOpt as G, type ExecutionRecorderItem as H, type InsightTaskInfo as I, type ExecutionTaskType as J, type ExecutorContext as K, type LiteUISection as L, type ExecutionTaskReturn as M, type ExecutionTaskInsightLocateParam as N, type ExecutionTaskInsightLocateOutput as O, type Point as P, type ExecutionTaskInsightDumpLog as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocateApply as V, type ExecutionTaskInsightLocate as W, type ExecutionTaskInsightQueryParam as X, type ExecutionTaskInsightQueryOutput as Y, type ExecutionTaskInsightQueryApply as Z, type ExecutionTaskInsightQuery as _, type ExecutionTaskApply as a, type ExecutionTaskInsightAssertionApply as a0, type ExecutionTaskInsightAssertion as a1, type ExecutionTaskActionApply as a2, type ExecutionTaskAction as a3, type ExecutionTaskPlanningApply as a4, type ExecutionTaskPlanning as a5, type GroupedActionDump as a6, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type AIAssertionResponse as g, type UISection as h, type EnsureObject as i, type BasicSectionQuery as j, type InsightExtractParam as k, type DumpMeta as l, type ReportDumpWithAttributes as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type PlanningAction as q, type PlanningAIResponse as r, type PlanningActionParamTap as s, type PlanningActionParamHover as t, type PlanningActionParamInputOrKeyPress as u, type PlanningActionParamScroll as v, type PlanningActionParamAssert as w, type PlanningActionParamSleep as x, type Color as y, type BaseAgentParserOpt as z };
+export { type ExecutionTaskInsightQueryOutput as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PlanningActionParamWaitFor as F, type Color as G, type BaseAgentParserOpt as H, type InsightTaskInfo as I, type PuppeteerParserOpt as J, type PlaywrightParserOpt as K, type LiteUISection as L, type ExecutionRecorderItem as M, type ExecutionTaskType as N, type ExecutorContext as O, type Point as P, type ExecutionTaskReturn as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocateParam as V, type ExecutionTaskInsightLocateOutput as W, type ExecutionTaskInsightDumpLog as X, type ExecutionTaskInsightLocateApply as Y, type ExecutionTaskInsightLocate as Z, type ExecutionTaskInsightQueryParam as _, type ExecutionTaskApply as a, type ExecutionTaskInsightQueryApply as a0, type ExecutionTaskInsightQuery as a1, type ExecutionTaskInsightAssertionParam as a2, type ExecutionTaskInsightAssertionApply as a3, type ExecutionTaskInsightAssertion as a4, type ExecutionTaskActionApply as a5, type ExecutionTaskAction as a6, type ExecutionTaskPlanningApply as a7, type ExecutionTaskPlanning as a8, type GroupedActionDump as a9, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type AIAssertionResponse as g, type UISection as h, type EnsureObject as i, type BasicSectionQuery as j, type InsightExtractParam as k, type DumpMeta as l, type ReportDumpWithAttributes as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type AgentWaitForOpt as q, type PlanningAction as r, type PlanningAIResponse as s, type PlanningActionParamTap as t, type PlanningActionParamHover as u, type PlanningActionParamInputOrKeyPress as v, type PlanningActionParamScroll as w, type PlanningActionParamAssert as x, type PlanningActionParamSleep as y, type PlanningActionParamError as z };

package/dist/types/utils.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { m as ReportDumpWithAttributes, R as Rect } from './types-81f7991c.js';
+import { m as ReportDumpWithAttributes, R as Rect } from './types-ed68710b.js';
 import 'openai/resources';
 interface PkgInfo {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@midscene/core",
   "description": "Hello, It's Midscene",
-  "version": "0.3.0",
+  "version": "0.3.1",
   "jsnext:source": "./src/index.ts",
   "main": "./dist/lib/index.js",
   "module": "./dist/es/index.js",
@@ -60,18 +60,19 @@
     }
   },
   "dependencies": {
+    "node-fetch": "2.6.7",
     "openai": "4.47.1",
-    "sharp": "0.33.3",
-    "node-fetch": "2.6.7"
+    "optional": "0.1.4",
+    "sharp": "0.33.3"
   },
   "devDependencies": {
-    "@types/node-fetch": "2.6.11",
     "@modern-js/module-tools": "^2.56.1",
     "@types/node": "^18.0.0",
+    "@types/node-fetch": "2.6.11",
+    "dotenv": "16.4.5",
     "langsmith": "0.1.36",
     "typescript": "~5.0.4",
-    "vitest": "^1.6.0",
-    "dotenv": "16.4.5"
+    "vitest": "^1.6.0"
   },
   "engines": {
     "node": ">=16.0.0"
@@ -88,6 +89,6 @@
     "new": "modern new",
     "upgrade": "modern upgrade",
     "test": "vitest --run",
-    "test:all": "AITEST=true vitest --run"
+    "test:ai": "AITEST=true npm run test"
   }
 }