@midscene/core 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -2
- package/dist/es/ai-model.js +7 -11
- package/dist/es/image.js +1 -1
- package/dist/es/index.js +12 -12
- package/dist/es/utils.js +5 -1
- package/dist/lib/ai-model.js +6 -10
- package/dist/lib/index.js +11 -11
- package/dist/lib/utils.js +4 -0
- package/dist/types/ai-model.d.ts +3 -3
- package/dist/types/image.d.ts +1 -1
- package/dist/types/{index-f43935c0.d.ts → index-7a9ec3e1.d.ts} +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/{types-81f7991c.d.ts → types-ed68710b.d.ts} +16 -3
- package/dist/types/utils.d.ts +1 -1
- package/package.json +8 -7
- package/report/index.html +1 -1
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2024-present
|
|
3
|
+
Copyright (c) 2024-present Bytedance, Inc. and its affiliates.
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
package/dist/es/ai-model.js
CHANGED
|
@@ -4256,7 +4256,8 @@ async function call(messages, responseFormat) {
|
|
|
4256
4256
|
const completion = await openai.chat.completions.create({
|
|
4257
4257
|
model,
|
|
4258
4258
|
messages,
|
|
4259
|
-
response_format: { type: responseFormat }
|
|
4259
|
+
response_format: { type: responseFormat },
|
|
4260
|
+
temperature: 0.2
|
|
4260
4261
|
});
|
|
4261
4262
|
const { content } = completion.choices[0].message;
|
|
4262
4263
|
assert(content, "empty content");
|
|
@@ -4439,7 +4440,7 @@ import {
|
|
|
4439
4440
|
writeFileSync
|
|
4440
4441
|
} from "fs";
|
|
4441
4442
|
import { tmpdir } from "os";
|
|
4442
|
-
import path, { basename, join } from "path";
|
|
4443
|
+
import path, { basename, dirname, join } from "path";
|
|
4443
4444
|
var logDir = join(process.cwd(), "./midscene_run/");
|
|
4444
4445
|
|
|
4445
4446
|
// src/image/visualization.ts
|
|
@@ -4615,8 +4616,8 @@ async function callCozeAi(options) {
|
|
|
4615
4616
|
}
|
|
4616
4617
|
const aiResponse = await completion.json();
|
|
4617
4618
|
if (aiResponse.code !== 0) {
|
|
4618
|
-
console.error("CozeAI error response", aiResponse);
|
|
4619
|
-
throw new Error(
|
|
4619
|
+
console.error("CozeAI error response", aiResponse.msg);
|
|
4620
|
+
throw new Error(`CozeAI error response ${aiResponse.msg}`);
|
|
4620
4621
|
}
|
|
4621
4622
|
if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
|
|
4622
4623
|
console.error("aiResponse", aiResponse);
|
|
@@ -4869,7 +4870,7 @@ function systemPromptToTaskPlanning() {
|
|
|
4869
4870
|
* param: { timeMs: number }, wait for timeMs milliseconds
|
|
4870
4871
|
|
|
4871
4872
|
Here is an example of how to decompose a task.
|
|
4872
|
-
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you
|
|
4873
|
+
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
|
|
4873
4874
|
* Find: 'The search bar'
|
|
4874
4875
|
* Input: 'Weather in Shanghai'
|
|
4875
4876
|
* Sleep: 1000
|
|
@@ -4879,7 +4880,7 @@ function systemPromptToTaskPlanning() {
|
|
|
4879
4880
|
1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
|
|
4880
4881
|
2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
|
|
4881
4882
|
|
|
4882
|
-
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal.
|
|
4883
|
+
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
|
|
4883
4884
|
|
|
4884
4885
|
Return in the following JSON format:
|
|
4885
4886
|
{
|
|
@@ -4953,11 +4954,6 @@ async function plan(userPrompt, opts, useModel) {
|
|
|
4953
4954
|
if (planFromAI.error) {
|
|
4954
4955
|
throw new Error(planFromAI.error);
|
|
4955
4956
|
}
|
|
4956
|
-
actions.forEach((task) => {
|
|
4957
|
-
if (task.type === "Error") {
|
|
4958
|
-
throw new Error(task.thought);
|
|
4959
|
-
}
|
|
4960
|
-
});
|
|
4961
4957
|
return { plans: actions };
|
|
4962
4958
|
}
|
|
4963
4959
|
export {
|
package/dist/es/image.js
CHANGED
package/dist/es/index.js
CHANGED
|
@@ -1021,7 +1021,7 @@ import {
|
|
|
1021
1021
|
writeFileSync
|
|
1022
1022
|
} from "fs";
|
|
1023
1023
|
import { tmpdir } from "os";
|
|
1024
|
-
import path, { basename, join } from "path";
|
|
1024
|
+
import path, { basename, dirname, join } from "path";
|
|
1025
1025
|
var pkg;
|
|
1026
1026
|
function getPkgInfo() {
|
|
1027
1027
|
if (pkg) {
|
|
@@ -1106,6 +1106,10 @@ ${logDirName}/dump
|
|
|
1106
1106
|
logEnvReady = true;
|
|
1107
1107
|
}
|
|
1108
1108
|
const filePath = join(targetDir, `${fileName}.${fileExt}`);
|
|
1109
|
+
const outputResourceDir = dirname(filePath);
|
|
1110
|
+
if (!existsSync(outputResourceDir)) {
|
|
1111
|
+
mkdirSync(outputResourceDir, { recursive: true });
|
|
1112
|
+
}
|
|
1109
1113
|
writeFileSync(filePath, fileContent);
|
|
1110
1114
|
if (opts == null ? void 0 : opts.generateReport) {
|
|
1111
1115
|
return writeDumpReport(fileName, fileContent);
|
|
@@ -1228,7 +1232,7 @@ var Executor = class {
|
|
|
1228
1232
|
returnValue = await task.executor(param, executorContext);
|
|
1229
1233
|
}
|
|
1230
1234
|
Object.assign(task, returnValue);
|
|
1231
|
-
task.status = "
|
|
1235
|
+
task.status = "finished";
|
|
1232
1236
|
task.timing.end = Date.now();
|
|
1233
1237
|
task.timing.cost = task.timing.end - task.timing.start;
|
|
1234
1238
|
taskIndex++;
|
|
@@ -4547,7 +4551,8 @@ async function call(messages, responseFormat) {
|
|
|
4547
4551
|
const completion = await openai.chat.completions.create({
|
|
4548
4552
|
model,
|
|
4549
4553
|
messages,
|
|
4550
|
-
response_format: { type: responseFormat }
|
|
4554
|
+
response_format: { type: responseFormat },
|
|
4555
|
+
temperature: 0.2
|
|
4551
4556
|
});
|
|
4552
4557
|
const { content } = completion.choices[0].message;
|
|
4553
4558
|
assert3(content, "empty content");
|
|
@@ -4926,8 +4931,8 @@ async function callCozeAi(options) {
|
|
|
4926
4931
|
}
|
|
4927
4932
|
const aiResponse = await completion.json();
|
|
4928
4933
|
if (aiResponse.code !== 0) {
|
|
4929
|
-
console.error("CozeAI error response", aiResponse);
|
|
4930
|
-
throw new Error(
|
|
4934
|
+
console.error("CozeAI error response", aiResponse.msg);
|
|
4935
|
+
throw new Error(`CozeAI error response ${aiResponse.msg}`);
|
|
4931
4936
|
}
|
|
4932
4937
|
if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
|
|
4933
4938
|
console.error("aiResponse", aiResponse);
|
|
@@ -5180,7 +5185,7 @@ function systemPromptToTaskPlanning() {
|
|
|
5180
5185
|
* param: { timeMs: number }, wait for timeMs milliseconds
|
|
5181
5186
|
|
|
5182
5187
|
Here is an example of how to decompose a task.
|
|
5183
|
-
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you
|
|
5188
|
+
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
|
|
5184
5189
|
* Find: 'The search bar'
|
|
5185
5190
|
* Input: 'Weather in Shanghai'
|
|
5186
5191
|
* Sleep: 1000
|
|
@@ -5190,7 +5195,7 @@ function systemPromptToTaskPlanning() {
|
|
|
5190
5195
|
1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
|
|
5191
5196
|
2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
|
|
5192
5197
|
|
|
5193
|
-
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal.
|
|
5198
|
+
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
|
|
5194
5199
|
|
|
5195
5200
|
Return in the following JSON format:
|
|
5196
5201
|
{
|
|
@@ -5264,11 +5269,6 @@ async function plan(userPrompt, opts, useModel) {
|
|
|
5264
5269
|
if (planFromAI.error) {
|
|
5265
5270
|
throw new Error(planFromAI.error);
|
|
5266
5271
|
}
|
|
5267
|
-
actions.forEach((task) => {
|
|
5268
|
-
if (task.type === "Error") {
|
|
5269
|
-
throw new Error(task.thought);
|
|
5270
|
-
}
|
|
5271
|
-
});
|
|
5272
5272
|
return { plans: actions };
|
|
5273
5273
|
}
|
|
5274
5274
|
|
package/dist/es/utils.js
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
writeFileSync
|
|
9
9
|
} from "fs";
|
|
10
10
|
import { tmpdir } from "os";
|
|
11
|
-
import path, { basename, join } from "path";
|
|
11
|
+
import path, { basename, dirname, join } from "path";
|
|
12
12
|
var pkg;
|
|
13
13
|
function getPkgInfo() {
|
|
14
14
|
if (pkg) {
|
|
@@ -94,6 +94,10 @@ ${logDirName}/dump
|
|
|
94
94
|
logEnvReady = true;
|
|
95
95
|
}
|
|
96
96
|
const filePath = join(targetDir, `${fileName}.${fileExt}`);
|
|
97
|
+
const outputResourceDir = dirname(filePath);
|
|
98
|
+
if (!existsSync(outputResourceDir)) {
|
|
99
|
+
mkdirSync(outputResourceDir, { recursive: true });
|
|
100
|
+
}
|
|
97
101
|
writeFileSync(filePath, fileContent);
|
|
98
102
|
if (opts == null ? void 0 : opts.generateReport) {
|
|
99
103
|
return writeDumpReport(fileName, fileContent);
|
package/dist/lib/ai-model.js
CHANGED
|
@@ -4276,7 +4276,8 @@ async function call(messages, responseFormat) {
|
|
|
4276
4276
|
const completion = await openai.chat.completions.create({
|
|
4277
4277
|
model,
|
|
4278
4278
|
messages,
|
|
4279
|
-
response_format: { type: responseFormat }
|
|
4279
|
+
response_format: { type: responseFormat },
|
|
4280
|
+
temperature: 0.2
|
|
4280
4281
|
});
|
|
4281
4282
|
const { content } = completion.choices[0].message;
|
|
4282
4283
|
(0, import_node_assert.default)(content, "empty content");
|
|
@@ -4630,8 +4631,8 @@ async function callCozeAi(options) {
|
|
|
4630
4631
|
}
|
|
4631
4632
|
const aiResponse = await completion.json();
|
|
4632
4633
|
if (aiResponse.code !== 0) {
|
|
4633
|
-
console.error("CozeAI error response", aiResponse);
|
|
4634
|
-
throw new Error(
|
|
4634
|
+
console.error("CozeAI error response", aiResponse.msg);
|
|
4635
|
+
throw new Error(`CozeAI error response ${aiResponse.msg}`);
|
|
4635
4636
|
}
|
|
4636
4637
|
if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
|
|
4637
4638
|
console.error("aiResponse", aiResponse);
|
|
@@ -4884,7 +4885,7 @@ function systemPromptToTaskPlanning() {
|
|
|
4884
4885
|
* param: { timeMs: number }, wait for timeMs milliseconds
|
|
4885
4886
|
|
|
4886
4887
|
Here is an example of how to decompose a task.
|
|
4887
|
-
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you
|
|
4888
|
+
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
|
|
4888
4889
|
* Find: 'The search bar'
|
|
4889
4890
|
* Input: 'Weather in Shanghai'
|
|
4890
4891
|
* Sleep: 1000
|
|
@@ -4894,7 +4895,7 @@ function systemPromptToTaskPlanning() {
|
|
|
4894
4895
|
1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
|
|
4895
4896
|
2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
|
|
4896
4897
|
|
|
4897
|
-
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal.
|
|
4898
|
+
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
|
|
4898
4899
|
|
|
4899
4900
|
Return in the following JSON format:
|
|
4900
4901
|
{
|
|
@@ -4968,11 +4969,6 @@ async function plan(userPrompt, opts, useModel) {
|
|
|
4968
4969
|
if (planFromAI.error) {
|
|
4969
4970
|
throw new Error(planFromAI.error);
|
|
4970
4971
|
}
|
|
4971
|
-
actions.forEach((task) => {
|
|
4972
|
-
if (task.type === "Error") {
|
|
4973
|
-
throw new Error(task.thought);
|
|
4974
|
-
}
|
|
4975
|
-
});
|
|
4976
4972
|
return { plans: actions };
|
|
4977
4973
|
}
|
|
4978
4974
|
// Annotate the CommonJS export names for ESM import in node:
|
package/dist/lib/index.js
CHANGED
|
@@ -1122,6 +1122,10 @@ ${logDirName}/dump
|
|
|
1122
1122
|
logEnvReady = true;
|
|
1123
1123
|
}
|
|
1124
1124
|
const filePath = (0, import_node_path.join)(targetDir, `${fileName}.${fileExt}`);
|
|
1125
|
+
const outputResourceDir = (0, import_node_path.dirname)(filePath);
|
|
1126
|
+
if (!(0, import_node_fs.existsSync)(outputResourceDir)) {
|
|
1127
|
+
(0, import_node_fs.mkdirSync)(outputResourceDir, { recursive: true });
|
|
1128
|
+
}
|
|
1125
1129
|
(0, import_node_fs.writeFileSync)(filePath, fileContent);
|
|
1126
1130
|
if (opts == null ? void 0 : opts.generateReport) {
|
|
1127
1131
|
return writeDumpReport(fileName, fileContent);
|
|
@@ -1244,7 +1248,7 @@ var Executor = class {
|
|
|
1244
1248
|
returnValue = await task.executor(param, executorContext);
|
|
1245
1249
|
}
|
|
1246
1250
|
Object.assign(task, returnValue);
|
|
1247
|
-
task.status = "
|
|
1251
|
+
task.status = "finished";
|
|
1248
1252
|
task.timing.end = Date.now();
|
|
1249
1253
|
task.timing.cost = task.timing.end - task.timing.start;
|
|
1250
1254
|
taskIndex++;
|
|
@@ -4563,7 +4567,8 @@ async function call(messages, responseFormat) {
|
|
|
4563
4567
|
const completion = await openai.chat.completions.create({
|
|
4564
4568
|
model,
|
|
4565
4569
|
messages,
|
|
4566
|
-
response_format: { type: responseFormat }
|
|
4570
|
+
response_format: { type: responseFormat },
|
|
4571
|
+
temperature: 0.2
|
|
4567
4572
|
});
|
|
4568
4573
|
const { content } = completion.choices[0].message;
|
|
4569
4574
|
(0, import_node_assert3.default)(content, "empty content");
|
|
@@ -4942,8 +4947,8 @@ async function callCozeAi(options) {
|
|
|
4942
4947
|
}
|
|
4943
4948
|
const aiResponse = await completion.json();
|
|
4944
4949
|
if (aiResponse.code !== 0) {
|
|
4945
|
-
console.error("CozeAI error response", aiResponse);
|
|
4946
|
-
throw new Error(
|
|
4950
|
+
console.error("CozeAI error response", aiResponse.msg);
|
|
4951
|
+
throw new Error(`CozeAI error response ${aiResponse.msg}`);
|
|
4947
4952
|
}
|
|
4948
4953
|
if (!(aiResponse == null ? void 0 : aiResponse.messages) || !((_a = aiResponse == null ? void 0 : aiResponse.messages[0]) == null ? void 0 : _a.content)) {
|
|
4949
4954
|
console.error("aiResponse", aiResponse);
|
|
@@ -5196,7 +5201,7 @@ function systemPromptToTaskPlanning() {
|
|
|
5196
5201
|
* param: { timeMs: number }, wait for timeMs milliseconds
|
|
5197
5202
|
|
|
5198
5203
|
Here is an example of how to decompose a task.
|
|
5199
|
-
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you
|
|
5204
|
+
When a user says 'Input "Weather in Shanghai" into the search bar, wait 1 second, hit enter', by viewing the page screenshot and description, you may decompose this task into something like this:
|
|
5200
5205
|
* Find: 'The search bar'
|
|
5201
5206
|
* Input: 'Weather in Shanghai'
|
|
5202
5207
|
* Sleep: 1000
|
|
@@ -5206,7 +5211,7 @@ function systemPromptToTaskPlanning() {
|
|
|
5206
5211
|
1. The actions you composed MUST be based on the page context information you get. Instead of making up actions that are not related to the page context.
|
|
5207
5212
|
2. In most cases, you should Locate one element first, then do other actions on it. For example, alway Find one element, then hover on it. But if you think it's necessary to do other actions first (like global scroll, global key press), you can do that.
|
|
5208
5213
|
|
|
5209
|
-
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal.
|
|
5214
|
+
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
|
|
5210
5215
|
|
|
5211
5216
|
Return in the following JSON format:
|
|
5212
5217
|
{
|
|
@@ -5280,11 +5285,6 @@ async function plan(userPrompt, opts, useModel) {
|
|
|
5280
5285
|
if (planFromAI.error) {
|
|
5281
5286
|
throw new Error(planFromAI.error);
|
|
5282
5287
|
}
|
|
5283
|
-
actions.forEach((task) => {
|
|
5284
|
-
if (task.type === "Error") {
|
|
5285
|
-
throw new Error(task.thought);
|
|
5286
|
-
}
|
|
5287
|
-
});
|
|
5288
5288
|
return { plans: actions };
|
|
5289
5289
|
}
|
|
5290
5290
|
|
package/dist/lib/utils.js
CHANGED
|
@@ -138,6 +138,10 @@ ${logDirName}/dump
|
|
|
138
138
|
logEnvReady = true;
|
|
139
139
|
}
|
|
140
140
|
const filePath = (0, import_node_path.join)(targetDir, `${fileName}.${fileExt}`);
|
|
141
|
+
const outputResourceDir = (0, import_node_path.dirname)(filePath);
|
|
142
|
+
if (!(0, import_node_fs.existsSync)(outputResourceDir)) {
|
|
143
|
+
(0, import_node_fs.mkdirSync)(outputResourceDir, { recursive: true });
|
|
144
|
+
}
|
|
141
145
|
(0, import_node_fs.writeFileSync)(filePath, fileContent);
|
|
142
146
|
if (opts == null ? void 0 : opts.generateReport) {
|
|
143
147
|
return writeDumpReport(fileName, fileContent);
|
package/dist/types/ai-model.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { ChatCompletionMessageParam } from 'openai/resources';
|
|
2
2
|
export { ChatCompletionMessageParam } from 'openai/resources';
|
|
3
|
-
import { c as callAiFn } from './index-
|
|
4
|
-
export { d as describeUserPage, p as plan } from './index-
|
|
5
|
-
import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-
|
|
3
|
+
import { c as callAiFn } from './index-7a9ec3e1.js';
|
|
4
|
+
export { d as describeUserPage, p as plan } from './index-7a9ec3e1.js';
|
|
5
|
+
import { B as BaseElement, U as UIContext, e as AIElementParseResponse, f as AISectionParseResponse, g as AIAssertionResponse } from './types-ed68710b.js';
|
|
6
6
|
|
|
7
7
|
declare function AiInspectElement<ElementType extends BaseElement = BaseElement>(options: {
|
|
8
8
|
context: UIContext<ElementType>;
|
package/dist/types/image.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Buffer } from 'node:buffer';
|
|
2
|
-
import { S as Size, R as Rect, h as UISection, U as UIContext,
|
|
2
|
+
import { S as Size, R as Rect, h as UISection, U as UIContext, G as Color } from './types-ed68710b.js';
|
|
3
3
|
import 'openai/resources';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { B as BaseElement, U as UIContext,
|
|
1
|
+
import { B as BaseElement, U as UIContext, r as PlanningAction } from './types-ed68710b.js';
|
|
2
2
|
import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
|
|
3
3
|
|
|
4
4
|
type AIArgs = [
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-
|
|
2
|
-
export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse,
|
|
3
|
-
import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-
|
|
4
|
-
export { p as plan } from './index-
|
|
1
|
+
import { E as ExecutionTask, a as ExecutionTaskApply, b as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, c as InsightOptions, d as InsightAssertionResponse } from './types-ed68710b.js';
|
|
2
|
+
export { g as AIAssertionResponse, e as AIElementParseResponse, A as AIResponseFormat, f as AISectionParseResponse, q as AgentWaitForOpt, H as BaseAgentParserOpt, j as BasicSectionQuery, C as CallAIFn, G as Color, l as DumpMeta, p as ElementById, i as EnsureObject, M as ExecutionRecorderItem, a6 as ExecutionTaskAction, a5 as ExecutionTaskActionApply, a4 as ExecutionTaskInsightAssertion, a3 as ExecutionTaskInsightAssertionApply, a2 as ExecutionTaskInsightAssertionParam, X as ExecutionTaskInsightDumpLog, Z as ExecutionTaskInsightLocate, Y as ExecutionTaskInsightLocateApply, W as ExecutionTaskInsightLocateOutput, V as ExecutionTaskInsightLocateParam, a1 as ExecutionTaskInsightQuery, a0 as ExecutionTaskInsightQueryApply, $ as ExecutionTaskInsightQueryOutput, _ as ExecutionTaskInsightQueryParam, a8 as ExecutionTaskPlanning, a7 as ExecutionTaskPlanningApply, Q as ExecutionTaskReturn, N as ExecutionTaskType, O as ExecutorContext, a9 as GroupedActionDump, n as InsightDump, k as InsightExtractParam, L as LiteUISection, o as PartialInsightDumpFromSDK, s as PlanningAIResponse, r as PlanningAction, x as PlanningActionParamAssert, z as PlanningActionParamError, u as PlanningActionParamHover, v as PlanningActionParamInputOrKeyPress, w as PlanningActionParamScroll, y as PlanningActionParamSleep, t as PlanningActionParamTap, F as PlanningActionParamWaitFor, K as PlaywrightParserOpt, P as Point, J as PuppeteerParserOpt, R as Rect, m as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, h as UISection } from './types-ed68710b.js';
|
|
3
|
+
import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-7a9ec3e1.js';
|
|
4
|
+
export { p as plan } from './index-7a9ec3e1.js';
|
|
5
5
|
export { setLogDir } from './utils.js';
|
|
6
6
|
import 'openai/resources';
|
|
7
7
|
|
|
@@ -122,13 +122,20 @@ interface LiteUISection {
|
|
|
122
122
|
}
|
|
123
123
|
type ElementById = (id: string) => BaseElement | null;
|
|
124
124
|
type InsightAssertionResponse = AIAssertionResponse;
|
|
125
|
+
/**
|
|
126
|
+
* agent
|
|
127
|
+
*/
|
|
128
|
+
interface AgentWaitForOpt {
|
|
129
|
+
checkIntervalMs?: number;
|
|
130
|
+
timeoutMs?: number;
|
|
131
|
+
}
|
|
125
132
|
/**
|
|
126
133
|
* planning
|
|
127
134
|
*
|
|
128
135
|
*/
|
|
129
136
|
interface PlanningAction<ParamType = any> {
|
|
130
137
|
thought?: string;
|
|
131
|
-
type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'Sleep';
|
|
138
|
+
type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'AssertWithoutThrow' | 'Sleep';
|
|
132
139
|
param: ParamType;
|
|
133
140
|
}
|
|
134
141
|
interface PlanningAIResponse {
|
|
@@ -150,6 +157,12 @@ interface PlanningActionParamAssert {
|
|
|
150
157
|
interface PlanningActionParamSleep {
|
|
151
158
|
timeMs: number;
|
|
152
159
|
}
|
|
160
|
+
interface PlanningActionParamError {
|
|
161
|
+
thought: string;
|
|
162
|
+
}
|
|
163
|
+
type PlanningActionParamWaitFor = AgentWaitForOpt & {
|
|
164
|
+
assertion: string;
|
|
165
|
+
};
|
|
153
166
|
/**
|
|
154
167
|
* misc
|
|
155
168
|
*/
|
|
@@ -191,7 +204,7 @@ interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
|
|
|
191
204
|
cache?: TaskCacheInfo;
|
|
192
205
|
}
|
|
193
206
|
type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
|
|
194
|
-
status: 'pending' | 'running' | '
|
|
207
|
+
status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
|
|
195
208
|
error?: string;
|
|
196
209
|
errorStack?: string;
|
|
197
210
|
timing?: {
|
|
@@ -243,4 +256,4 @@ interface GroupedActionDump {
|
|
|
243
256
|
executions: ExecutionDump[];
|
|
244
257
|
}
|
|
245
258
|
|
|
246
|
-
export { type
|
|
259
|
+
export { type ExecutionTaskInsightQueryOutput as $, AIResponseFormat as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PlanningActionParamWaitFor as F, type Color as G, type BaseAgentParserOpt as H, type InsightTaskInfo as I, type PuppeteerParserOpt as J, type PlaywrightParserOpt as K, type LiteUISection as L, type ExecutionRecorderItem as M, type ExecutionTaskType as N, type ExecutorContext as O, type Point as P, type ExecutionTaskReturn as Q, type Rect as R, type Size as S, type TaskCacheInfo as T, UIContext as U, type ExecutionTaskInsightLocateParam as V, type ExecutionTaskInsightLocateOutput as W, type ExecutionTaskInsightDumpLog as X, type ExecutionTaskInsightLocateApply as Y, type ExecutionTaskInsightLocate as Z, type ExecutionTaskInsightQueryParam as _, type ExecutionTaskApply as a, type ExecutionTaskInsightQueryApply as a0, type ExecutionTaskInsightQuery as a1, type ExecutionTaskInsightAssertionParam as a2, type ExecutionTaskInsightAssertionApply as a3, type ExecutionTaskInsightAssertion as a4, type ExecutionTaskActionApply as a5, type ExecutionTaskAction as a6, type ExecutionTaskPlanningApply as a7, type ExecutionTaskPlanning as a8, type GroupedActionDump as a9, type ExecutionDump as b, type InsightOptions as c, type InsightAssertionResponse as d, type AIElementParseResponse as e, type AISectionParseResponse as f, type AIAssertionResponse as g, type UISection as h, type EnsureObject as i, type BasicSectionQuery as j, type InsightExtractParam as k, type DumpMeta as l, type ReportDumpWithAttributes as m, type InsightDump as n, type PartialInsightDumpFromSDK as o, type ElementById as p, type AgentWaitForOpt as q, type PlanningAction as r, type PlanningAIResponse as s, type PlanningActionParamTap as t, type PlanningActionParamHover as u, type PlanningActionParamInputOrKeyPress as v, type PlanningActionParamScroll as w, type PlanningActionParamAssert as x, type PlanningActionParamSleep as y, type PlanningActionParamError as z };
|
package/dist/types/utils.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "Hello, It's Midscene",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.1",
|
|
5
5
|
"jsnext:source": "./src/index.ts",
|
|
6
6
|
"main": "./dist/lib/index.js",
|
|
7
7
|
"module": "./dist/es/index.js",
|
|
@@ -60,18 +60,19 @@
|
|
|
60
60
|
}
|
|
61
61
|
},
|
|
62
62
|
"dependencies": {
|
|
63
|
+
"node-fetch": "2.6.7",
|
|
63
64
|
"openai": "4.47.1",
|
|
64
|
-
"
|
|
65
|
-
"
|
|
65
|
+
"optional": "0.1.4",
|
|
66
|
+
"sharp": "0.33.3"
|
|
66
67
|
},
|
|
67
68
|
"devDependencies": {
|
|
68
|
-
"@types/node-fetch": "2.6.11",
|
|
69
69
|
"@modern-js/module-tools": "^2.56.1",
|
|
70
70
|
"@types/node": "^18.0.0",
|
|
71
|
+
"@types/node-fetch": "2.6.11",
|
|
72
|
+
"dotenv": "16.4.5",
|
|
71
73
|
"langsmith": "0.1.36",
|
|
72
74
|
"typescript": "~5.0.4",
|
|
73
|
-
"vitest": "^1.6.0"
|
|
74
|
-
"dotenv": "16.4.5"
|
|
75
|
+
"vitest": "^1.6.0"
|
|
75
76
|
},
|
|
76
77
|
"engines": {
|
|
77
78
|
"node": ">=16.0.0"
|
|
@@ -88,6 +89,6 @@
|
|
|
88
89
|
"new": "modern new",
|
|
89
90
|
"upgrade": "modern upgrade",
|
|
90
91
|
"test": "vitest --run",
|
|
91
|
-
"test:
|
|
92
|
+
"test:ai": "AITEST=true npm run test"
|
|
92
93
|
}
|
|
93
94
|
}
|