@midscene/core 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024-present Midscene.js
3
+ Copyright (c) 2024-present Bytedance, Inc. and its affiliates.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -4256,7 +4256,8 @@ async function call(messages, responseFormat) {
4256
4256
  const completion = await openai.chat.completions.create({
4257
4257
  model,
4258
4258
  messages,
4259
- response_format: { type: responseFormat }
4259
+ response_format: { type: responseFormat },
4260
+ temperature: 0.2
4260
4261
  });
4261
4262
  const { content } = completion.choices[0].message;
4262
4263
  assert(content, "empty content");
@@ -4439,7 +4440,7 @@ import {
4439
4440
  writeFileSync
4440
4441
  } from "fs";
4441
4442
  import { tmpdir } from "os";
4442
- import path, { basename, join } from "path";
4443
+ import path, { basename, dirname, join } from "path";
4443
4444
  var logDir = join(process.cwd(), "./midscene_run/");
4444
4445
 
4445
4446
  // src/image/visualization.ts
@@ -4615,8 +4616,8 @@ async function callCozeAi(options) {
4615
4616
  }
4616
4617
  const aiResponse = await completion.json();
4617
4618
  if (aiResponse.code !== 0) {
4618
- console.error("CozeAI error response", aiResponse);
4619
- throw new Error("CozeAI error response", aiResponse);
4619
+ console.error("CozeAI error response", aiResponse.msg);
4620
+ throw new Error(`CozeAI error response ${aiResponse.msg}`);
4620
4621
  }
4621
4622
  if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
4622
4623
  console.error("aiResponse", aiResponse);
@@ -4869,7 +4870,7 @@ function systemPromptToTaskPlanning() {
4869
4870
  * param: { timeMs: number }, wait for timeMs milliseconds
4870
4871
 
4871
4872
  Here is an example of how to decompose a task.
4872
- When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
4873
+ When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
4873
4874
  * Find: 'The search bar'
4874
4875
  * Input: 'Weather in Shanghai'
4875
4876
  * Sleep: 1000
@@ -4879,7 +4880,7 @@ function systemPromptToTaskPlanning() {
4879
4880
  1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
4880
4881
  2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
4881
4882
 
4882
- If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
4883
+ If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
4883
4884
 
4884
4885
  Return in the following JSON format:
4885
4886
  {
@@ -4953,11 +4954,6 @@ async function plan(userPrompt, opts, useModel) {
4953
4954
  if (planFromAI.error) {
4954
4955
  throw new Error(planFromAI.error);
4955
4956
  }
4956
- actions.forEach((task) => {
4957
- if (task.type === "Error") {
4958
- throw new Error(task.thought);
4959
- }
4960
- });
4961
4957
  return { plans: actions };
4962
4958
  }
4963
4959
  export {
package/dist/es/image.js CHANGED
@@ -130,7 +130,7 @@ import {
130
130
  writeFileSync
131
131
  } from "fs";
132
132
  import { tmpdir } from "os";
133
- import path, { basename, join } from "path";
133
+ import path, { basename, dirname, join } from "path";
134
134
  var pkg;
135
135
  function getPkgInfo() {
136
136
  if (pkg) {
package/dist/es/index.js CHANGED
@@ -1021,7 +1021,7 @@ import {
1021
1021
  writeFileSync
1022
1022
  } from "fs";
1023
1023
  import { tmpdir } from "os";
1024
- import path, { basename, join } from "path";
1024
+ import path, { basename, dirname, join } from "path";
1025
1025
  var pkg;
1026
1026
  function getPkgInfo() {
1027
1027
  if (pkg) {
@@ -1106,6 +1106,10 @@ ${logDirName}/dump
1106
1106
  logEnvReady = true;
1107
1107
  }
1108
1108
  const filePath = join(targetDir, `${fileName}.${fileExt}`);
1109
+ const outputResourceDir = dirname(filePath);
1110
+ if (!existsSync(outputResourceDir)) {
1111
+ mkdirSync(outputResourceDir, { recursive: true });
1112
+ }
1109
1113
  writeFileSync(filePath, fileContent);
1110
1114
  if (opts == null ? void 0 : opts.generateReport) {
1111
1115
  return writeDumpReport(fileName, fileContent);
@@ -1228,7 +1232,7 @@ var Executor = class {
1228
1232
  returnValue = await task.executor(param, executorContext);
1229
1233
  }
1230
1234
  Object.assign(task, returnValue);
1231
- task.status = "success";
1235
+ task.status = "finished";
1232
1236
  task.timing.end = Date.now();
1233
1237
  task.timing.cost = task.timing.end - task.timing.start;
1234
1238
  taskIndex++;
@@ -4547,7 +4551,8 @@ async function call(messages, responseFormat) {
4547
4551
  const completion = await openai.chat.completions.create({
4548
4552
  model,
4549
4553
  messages,
4550
- response_format: { type: responseFormat }
4554
+ response_format: { type: responseFormat },
4555
+ temperature: 0.2
4551
4556
  });
4552
4557
  const { content } = completion.choices[0].message;
4553
4558
  assert3(content, "empty content");
@@ -4926,8 +4931,8 @@ async function callCozeAi(options) {
4926
4931
  }
4927
4932
  const aiResponse = await completion.json();
4928
4933
  if (aiResponse.code !== 0) {
4929
- console.error("CozeAI error response", aiResponse);
4930
- throw new Error("CozeAI error response", aiResponse);
4934
+ console.error("CozeAI error response", aiResponse.msg);
4935
+ throw new Error(`CozeAI error response ${aiResponse.msg}`);
4931
4936
  }
4932
4937
  if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
4933
4938
  console.error("aiResponse", aiResponse);
@@ -5180,7 +5185,7 @@ function systemPromptToTaskPlanning() {
5180
5185
  * param: { timeMs: number }, wait for timeMs milliseconds
5181
5186
 
5182
5187
  Here is an example of how to decompose a task.
5183
- When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
5188
+ When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
5184
5189
  * Find: 'The search bar'
5185
5190
  * Input: 'Weather in Shanghai'
5186
5191
  * Sleep: 1000
@@ -5190,7 +5195,7 @@ function systemPromptToTaskPlanning() {
5190
5195
  1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
5191
5196
  2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
5192
5197
 
5193
- If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
5198
+ If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
5194
5199
 
5195
5200
  Return in the following JSON format:
5196
5201
  {
@@ -5264,11 +5269,6 @@ async function plan(userPrompt, opts, useModel) {
5264
5269
  if (planFromAI.error) {
5265
5270
  throw new Error(planFromAI.error);
5266
5271
  }
5267
- actions.forEach((task) => {
5268
- if (task.type === "Error") {
5269
- throw new Error(task.thought);
5270
- }
5271
- });
5272
5272
  return { plans: actions };
5273
5273
  }
5274
5274
 
package/dist/es/utils.js CHANGED
@@ -8,7 +8,7 @@ import {
8
8
  writeFileSync
9
9
  } from "fs";
10
10
  import { tmpdir } from "os";
11
- import path, { basename, join } from "path";
11
+ import path, { basename, dirname, join } from "path";
12
12
  var pkg;
13
13
  function getPkgInfo() {
14
14
  if (pkg) {
@@ -94,6 +94,10 @@ ${logDirName}/dump
94
94
  logEnvReady = true;
95
95
  }
96
96
  const filePath = join(targetDir, `${fileName}.${fileExt}`);
97
+ const outputResourceDir = dirname(filePath);
98
+ if (!existsSync(outputResourceDir)) {
99
+ mkdirSync(outputResourceDir, { recursive: true });
100
+ }
97
101
  writeFileSync(filePath, fileContent);
98
102
  if (opts == null ? void 0 : opts.generateReport) {
99
103
  return writeDumpReport(fileName, fileContent);
@@ -4276,7 +4276,8 @@ async function call(messages, responseFormat) {
4276
4276
  const completion = await openai.chat.completions.create({
4277
4277
  model,
4278
4278
  messages,
4279
- response_format: { type: responseFormat }
4279
+ response_format: { type: responseFormat },
4280
+ temperature: 0.2
4280
4281
  });
4281
4282
  const { content } = completion.choices[0].message;
4282
4283
  (0, import_node_assert.default)(content, "empty content");
@@ -4630,8 +4631,8 @@ async function callCozeAi(options) {
4630
4631
  }
4631
4632
  const aiResponse = await completion.json();
4632
4633
  if (aiResponse.code !== 0) {
4633
- console.error("CozeAI error response", aiResponse);
4634
- throw new Error("CozeAI error response", aiResponse);
4634
+ console.error("CozeAI error response", aiResponse.msg);
4635
+ throw new Error(`CozeAI error response ${aiResponse.msg}`);
4635
4636
  }
4636
4637
  if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
4637
4638
  console.error("aiResponse", aiResponse);
@@ -4884,7 +4885,7 @@ function systemPromptToTaskPlanning() {
4884
4885
  * param: { timeMs: number }, wait for timeMs milliseconds
4885
4886
 
4886
4887
  Here is an example of how to decompose a task.
4887
- When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
4888
+ When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
4888
4889
  * Find: 'The search bar'
4889
4890
  * Input: 'Weather in Shanghai'
4890
4891
  * Sleep: 1000
@@ -4894,7 +4895,7 @@ function systemPromptToTaskPlanning() {
4894
4895
  1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
4895
4896
  2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
4896
4897
 
4897
- If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
4898
+ If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
4898
4899
 
4899
4900
  Return in the following JSON format:
4900
4901
  {
@@ -4968,11 +4969,6 @@ async function plan(userPrompt, opts, useModel) {
4968
4969
  if (planFromAI.error) {
4969
4970
  throw new Error(planFromAI.error);
4970
4971
  }
4971
- actions.forEach((task) => {
4972
- if (task.type === "Error") {
4973
- throw new Error(task.thought);
4974
- }
4975
- });
4976
4972
  return { plans: actions };
4977
4973
  }
4978
4974
  // Annotate the CommonJS export names for ESM import in node:
package/dist/lib/index.js CHANGED
@@ -1122,6 +1122,10 @@ ${logDirName}/dump
1122
1122
  logEnvReady = true;
1123
1123
  }
1124
1124
  const filePath = (0, import_node_path.join)(targetDir, `${fileName}.${fileExt}`);
1125
+ const outputResourceDir = (0, import_node_path.dirname)(filePath);
1126
+ if (!(0, import_node_fs.existsSync)(outputResourceDir)) {
1127
+ (0, import_node_fs.mkdirSync)(outputResourceDir, { recursive: true });
1128
+ }
1125
1129
  (0, import_node_fs.writeFileSync)(filePath, fileContent);
1126
1130
  if (opts == null ? void 0 : opts.generateReport) {
1127
1131
  return writeDumpReport(fileName, fileContent);
@@ -1244,7 +1248,7 @@ var Executor = class {
1244
1248
  returnValue = await task.executor(param, executorContext);
1245
1249
  }
1246
1250
  Object.assign(task, returnValue);
1247
- task.status = "success";
1251
+ task.status = "finished";
1248
1252
  task.timing.end = Date.now();
1249
1253
  task.timing.cost = task.timing.end - task.timing.start;
1250
1254
  taskIndex++;
@@ -4563,7 +4567,8 @@ async function call(messages, responseFormat) {
4563
4567
  const completion = await openai.chat.completions.create({
4564
4568
  model,
4565
4569
  messages,
4566
- response_format: { type: responseFormat }
4570
+ response_format: { type: responseFormat },
4571
+ temperature: 0.2
4567
4572
  });
4568
4573
  const { content } = completion.choices[0].message;
4569
4574
  (0, import_node_assert3.default)(content, "empty content");
@@ -4942,8 +4947,8 @@ async function callCozeAi(options) {
4942
4947
  }
4943
4948
  const aiResponse = await completion.json();
4944
4949
  if (aiResponse.code !== 0) {
4945
- console.error("CozeAI error response", aiResponse);
4946
- throw new Error("CozeAI error response", aiResponse);
4950
+ console.error("CozeAI error response", aiResponse.msg);
4951
+ throw new Error(`CozeAI error response ${aiResponse.msg}`);
4947
4952
  }
4948
4953
  if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
4949
4954
  console.error("aiResponse", aiResponse);
@@ -5196,7 +5201,7 @@ function systemPromptToTaskPlanning() {
5196
5201
  * param: { timeMs: number }, wait for timeMs milliseconds
5197
5202
 
5198
5203
  Here is an example of how to decompose a task.
5199
- When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you my decompose this task into something like this:
5204
+ When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
5200
5205
  * Find: 'The search bar'
5201
5206
  * Input: 'Weather in Shanghai'
5202
5207
  * Sleep: 1000
@@ -5206,7 +5211,7 @@ function systemPromptToTaskPlanning() {
5206
5211
  1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
5207
5212
  2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
5208
5213
 
5209
- If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. If any errors occur during task planning (such as the page content being irrelevant to the task or the mentioned element not existing), please return the error message with an explanation in the errors field. Thoughts, prompts, and error messages should all be in the same language as the user query.
5214
+ If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
5210
5215
 
5211
5216
  Return in the following JSON format:
5212
5217
  {
@@ -5280,11 +5285,6 @@ async function plan(userPrompt, opts, useModel) {
5280
5285
  if (planFromAI.error) {
5281
5286
  throw new Error(planFromAI.error);
5282
5287
  }
5283
- actions.forEach((task) => {
5284
- if (task.type === "Error") {
5285
- throw new Error(task.thought);
5286
- }
5287
- });
5288
5288
  return { plans: actions };
5289
5289
  }
5290
5290
 
package/dist/lib/utils.js CHANGED
@@ -138,6 +138,10 @@ ${logDirName}/dump
138
138
  logEnvReady = true;
139
139
  }
140
140
  const filePath = (0, import_node_path.join)(targetDir, `${fileName}.${fileExt}`);
141
+ const outputResourceDir = (0, import_node_path.dirname)(filePath);
142
+ if (!(0, import_node_fs.existsSync)(outputResourceDir)) {
143
+ (0, import_node_fs.mkdirSync)(outputResourceDir, { recursive: true });
144
+ }
141
145
  (0, import_node_fs.writeFileSync)(filePath, fileContent);
142
146
  if (opts == null ? void 0 : opts.generateReport) {
143
147
  return writeDumpReport(fileName, fileContent);
@@ -1,8 +1,8 @@
1
1
  import { ChatCompletionMessageParam } from 'openai/resources';
2
2
  export { ChatCompletionMessageParam } from 'openai/resources';
3
- import { c as callAiFn } from './index-f43935c0.js';
4
- export { d as describeUserPage, p as plan } from './index-f43935c0.js';
5
- import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-81f7991c.js';
3
+ import { c as callAiFn } from './index-7a9ec3e1.js';
4
+ export { d as describeUserPage, p as plan } from './index-7a9ec3e1.js';
5
+ import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-ed68710b.js';
6
6
 
7
7
  declare function AiInspectElement<ElementType extends BaseElement = BaseElement>(options: {
8
8
  context: UIContext<ElementType>;
@@ -1,5 +1,5 @@
1
1
  import { Buffer } from 'node:buffer';
2
- import { S as Size, R as Rect, h as UISection, U as UIContext, y as Color } from './types-81f7991c.js';
2
+ import { S as Size, R as Rect, h as UISection, U as UIContext, G as Color } from './types-ed68710b.js';
3
3
  import 'openai/resources';
4
4
 
5
5
  /**
@@ -1,4 +1,4 @@
1
- import { B as BaseElement, U as UIContext, q as PlanningAction } from './types-81f7991c.js';
1
+ import { B as BaseElement, U as UIContext, r as PlanningAction } from './types-ed68710b.js';
2
2
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
3
 
4
4
  type AIArgs = [
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-81f7991c.js';
2
- export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, z as BaseAgentParserOpt, j as BasicSectionQuery, C as CallAIFn, y as Color, l as DumpMeta, p as ElementById, i as EnsureObject, H as ExecutionRecorderItem, a3 as ExecutionTaskAction, a2 as ExecutionTaskActionApply, a1 as ExecutionTaskInsightAssertion, a0 as ExecutionTaskInsightAssertionApply, $ as ExecutionTaskInsightAssertionParam, Q as ExecutionTaskInsightDumpLog, W as ExecutionTaskInsightLocate, V as ExecutionTaskInsightLocateApply, O as ExecutionTaskInsightLocateOutput, N as ExecutionTaskInsightLocateParam, _ as ExecutionTaskInsightQuery, Z as ExecutionTaskInsightQueryApply, Y as ExecutionTaskInsightQueryOutput, X as ExecutionTaskInsightQueryParam, a5 as ExecutionTaskPlanning, a4 as ExecutionTaskPlanningApply, M as ExecutionTaskReturn, J as ExecutionTaskType, K as ExecutorContext, a6 as GroupedActionDump, n as InsightDump, k as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, r as PlanningAIResponse, q as PlanningAction, w as PlanningActionParamAssert, t as PlanningActionParamHover, u as PlanningActionParamInputOrKeyPress, v as PlanningActionParamScroll, x as PlanningActionParamSleep, s as PlanningActionParamTap, G as PlaywrightParserOpt, P as Point, F as PuppeteerParserOpt, R as Rect, m as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, h as UISection } from './types-81f7991c.js';
3
- import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-f43935c0.js';
4
- export { p as plan } from './index-f43935c0.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-ed68710b.js';
2
+ export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, q as AgentWaitForOpt, H as BaseAgentParserOpt, j as BasicSectionQuery, C as CallAIFn, G as Color, l as DumpMeta, p as ElementById, i as EnsureObject, M as ExecutionRecorderItem, a6 as ExecutionTaskAction, a5 as ExecutionTaskActionApply, a4 as ExecutionTaskInsightAssertion, a3 as ExecutionTaskInsightAssertionApply, a2 as ExecutionTaskInsightAssertionParam, X as ExecutionTaskInsightDumpLog, Z as ExecutionTaskInsightLocate, Y as ExecutionTaskInsightLocateApply, W as ExecutionTaskInsightLocateOutput, V as ExecutionTaskInsightLocateParam, a1 as ExecutionTaskInsightQuery, a0 as ExecutionTaskInsightQueryApply, $ as ExecutionTaskInsightQueryOutput, _ as ExecutionTaskInsightQueryParam, a8 as ExecutionTaskPlanning, a7 as ExecutionTaskPlanningApply, Q as ExecutionTaskReturn, N as ExecutionTaskType, O as ExecutorContext, a9 as GroupedActionDump, n as InsightDump, k as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, s as PlanningAIResponse, r as PlanningAction, x as PlanningActionParamAssert, z as PlanningActionParamError, u as PlanningActionParamHover, v as PlanningActionParamInputOrKeyPress, w as PlanningActionParamScroll, y as PlanningActionParamSleep, t as PlanningActionParamTap, F as PlanningActionParamWaitFor, K as PlaywrightParserOpt, P as Point, J as PuppeteerParserOpt, R as Rect, m as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, h as UISection } from './types-ed68710b.js';
3
+ import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-7a9ec3e1.js';
4
+ export { p as plan } from './index-7a9ec3e1.js';
5
5
  export { setLogDir } from './utils.js';
6
6
  import 'openai/resources';
7
7
 
@@ -122,13 +122,20 @@ interface LiteUISection {
122
122
  }
123
123
  type ElementById = (id: string) => BaseElement | null;
124
124
  type InsightAssertionResponse = AIAssertionResponse;
125
+ /**
126
+ * agent
127
+ */
128
+ interface AgentWaitForOpt {
129
+ checkIntervalMs?: number;
130
+ timeoutMs?: number;
131
+ }
125
132
  /**
126
133
  * planning
127
134
  *
128
135
  */
129
136
  interface PlanningAction<ParamType = any> {
130
137
  thought?: string;
131
- type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'Sleep';
138
+ type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'AssertWithoutThrow' | 'Sleep';
132
139
  param: ParamType;
133
140
  }
134
141
  interface PlanningAIResponse {
@@ -150,6 +157,12 @@ interface PlanningActionParamAssert {
150
157
  interface PlanningActionParamSleep {
151
158
  timeMs: number;
152
159
  }
160
+ interface PlanningActionParamError {
161
+ thought: string;
162
+ }
163
+ type PlanningActionParamWaitFor = AgentWaitForOpt & {
164
+ assertion: string;
165
+ };
153
166
  /**
154
167
  * misc
155
168
  */
@@ -191,7 +204,7 @@ interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
191
204
  cache?: TaskCacheInfo;
192
205
  }
193
206
  type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
194
- status: 'pending' | 'running' | 'success' | 'failed' | 'cancelled';
207
+ status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
195
208
  error?: string;
196
209
  errorStack?: string;
197
210
  timing?: {
@@ -243,4 +256,4 @@ interface GroupedActionDump {
243
256
  executions: ExecutionDump[];
244
257
  }
245
258
 
246
- export { type ExecutionTaskInsightAssertionParam as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PuppeteerParserOpt as F, type PlaywrightParserOpt as G, type ExecutionRecorderItem as H, type InsightTaskInfo as I, type ExecutionTaskType as J, type ExecutorContext as K, type LiteUISection as L, type ExecutionTaskReturn as M, type ExecutionTaskInsightLocateParam as N, type ExecutionTaskInsightLocateOutput as O, type Point as P, type ExecutionTaskInsightDumpLog as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocateApply as V, type ExecutionTaskInsightLocate as W, type ExecutionTaskInsightQueryParam as X, type ExecutionTaskInsightQueryOutput as Y, type ExecutionTaskInsightQueryApply as Z, type ExecutionTaskInsightQuery as _, type ExecutionTaskApply as a, type ExecutionTaskInsightAssertionApply as a0, type ExecutionTaskInsightAssertion as a1, type ExecutionTaskActionApply as a2, type ExecutionTaskAction as a3, type ExecutionTaskPlanningApply as a4, type ExecutionTaskPlanning as a5, type GroupedActionDump as a6, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type AIAssertionResponse as g, type UISection as h, type EnsureObject as i, type BasicSectionQuery as j, type InsightExtractParam as k, type DumpMeta as l, type ReportDumpWithAttributes as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type PlanningAction as q, type PlanningAIResponse as r, type PlanningActionParamTap as s, type PlanningActionParamHover as t, type PlanningActionParamInputOrKeyPress as u, type PlanningActionParamScroll as v, type PlanningActionParamAssert as w, type PlanningActionParamSleep as x, type Color as y, type BaseAgentParserOpt as z };
259
+ export { type ExecutionTaskInsightQueryOutput as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PlanningActionParamWaitFor as F, type Color as G, type BaseAgentParserOpt as H, type InsightTaskInfo as I, type PuppeteerParserOpt as J, type PlaywrightParserOpt as K, type LiteUISection as L, type ExecutionRecorderItem as M, type ExecutionTaskType as N, type ExecutorContext as O, type Point as P, type ExecutionTaskReturn as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocateParam as V, type ExecutionTaskInsightLocateOutput as W, type ExecutionTaskInsightDumpLog as X, type ExecutionTaskInsightLocateApply as Y, type ExecutionTaskInsightLocate as Z, type ExecutionTaskInsightQueryParam as _, type ExecutionTaskApply as a, type ExecutionTaskInsightQueryApply as a0, type ExecutionTaskInsightQuery as a1, type ExecutionTaskInsightAssertionParam as a2, type ExecutionTaskInsightAssertionApply as a3, type ExecutionTaskInsightAssertion as a4, type ExecutionTaskActionApply as a5, type ExecutionTaskAction as a6, type ExecutionTaskPlanningApply as a7, type ExecutionTaskPlanning as a8, type GroupedActionDump as a9, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type AIAssertionResponse as g, type UISection as h, type EnsureObject as i, type BasicSectionQuery as j, type InsightExtractParam as k, type DumpMeta as l, type ReportDumpWithAttributes as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type AgentWaitForOpt as q, type PlanningAction as r, type PlanningAIResponse as s, type PlanningActionParamTap as t, type PlanningActionParamHover as u, type PlanningActionParamInputOrKeyPress as v, type PlanningActionParamScroll as w, type PlanningActionParamAssert as x, type PlanningActionParamSleep as y, type PlanningActionParamError as z };
@@ -1,4 +1,4 @@
1
- import { m as ReportDumpWithAttributes, R as Rect } from './types-81f7991c.js';
1
+ import { m as ReportDumpWithAttributes, R as Rect } from './types-ed68710b.js';
2
2
  import 'openai/resources';
3
3
 
4
4
  interface PkgInfo {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Hello, It's Midscene",
4
- "version": "0.3.0",
4
+ "version": "0.3.1",
5
5
  "jsnext:source": "./src/index.ts",
6
6
  "main": "./dist/lib/index.js",
7
7
  "module": "./dist/es/index.js",
@@ -60,18 +60,19 @@
60
60
  }
61
61
  },
62
62
  "dependencies": {
63
+ "node-fetch": "2.6.7",
63
64
  "openai": "4.47.1",
64
- "sharp": "0.33.3",
65
- "node-fetch": "2.6.7"
65
+ "optional": "0.1.4",
66
+ "sharp": "0.33.3"
66
67
  },
67
68
  "devDependencies": {
68
- "@types/node-fetch": "2.6.11",
69
69
  "@modern-js/module-tools": "^2.56.1",
70
70
  "@types/node": "^18.0.0",
71
+ "@types/node-fetch": "2.6.11",
72
+ "dotenv": "16.4.5",
71
73
  "langsmith": "0.1.36",
72
74
  "typescript": "~5.0.4",
73
- "vitest": "^1.6.0",
74
- "dotenv": "16.4.5"
75
+ "vitest": "^1.6.0"
75
76
  },
76
77
  "engines": {
77
78
  "node": ">=16.0.0"
@@ -88,6 +89,6 @@
88
89
  "new": "modern new",
89
90
  "upgrade": "modern upgrade",
90
91
  "test": "vitest --run",
91
- "test:all": "AITEST=true vitest --run"
92
+ "test:ai": "AITEST=true npm run test"
92
93
  }
93
94
  }