@midscene/core 0.8.7-beta-20241218070032.0 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/ai-model.js +23 -27
- package/dist/lib/index.js +24 -28
- package/dist/lib/types/ai-model.d.ts +3 -3
- package/dist/lib/types/{index-12fdcf10.d.ts → index-43fd19f4.d.ts} +2 -3
- package/dist/lib/types/index.d.ts +4 -4
- package/dist/lib/types/{types-20204347.d.ts → types-55182ae1.d.ts} +1 -1
- package/dist/lib/types/utils.d.ts +1 -1
- package/dist/lib/utils.js +1 -1
- package/package.json +2 -2
- package/report/index.html +2 -2
package/dist/lib/ai-model.js
CHANGED
|
@@ -4355,8 +4355,8 @@ var allAIConfig = () => {
|
|
|
4355
4355
|
|
|
4356
4356
|
// src/ai-model/common.ts
|
|
4357
4357
|
async function callAiFn(options) {
|
|
4358
|
-
const {
|
|
4359
|
-
if (preferOpenAIModel(
|
|
4358
|
+
const { msgs, AIActionType: AIActionTypeValue } = options;
|
|
4359
|
+
if (preferOpenAIModel("openAI")) {
|
|
4360
4360
|
const { content, usage } = await callToGetJSONObject(
|
|
4361
4361
|
msgs,
|
|
4362
4362
|
AIActionTypeValue
|
|
@@ -4830,7 +4830,7 @@ You are a versatile professional in software UI automation. Your outstanding con
|
|
|
4830
4830
|
## Workflow
|
|
4831
4831
|
|
|
4832
4832
|
1. Receive the user's element description, screenshot, and instruction.
|
|
4833
|
-
2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll /
|
|
4833
|
+
2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep). The "About the action" section below will give you more details.
|
|
4834
4834
|
3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
|
|
4835
4835
|
4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
|
|
4836
4836
|
5. Consider whether the user's instruction will be accomplished after all the actions
|
|
@@ -4841,7 +4841,8 @@ You are a versatile professional in software UI automation. Your outstanding con
|
|
|
4841
4841
|
|
|
4842
4842
|
- All the actions you composed MUST be based on the page context information you get.
|
|
4843
4843
|
- Trust the "What have been done" field about the task (if any), don't repeat actions in it.
|
|
4844
|
-
-
|
|
4844
|
+
- Respond only with valid JSON. Do not write an introduction or summary.
|
|
4845
|
+
- If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
|
|
4845
4846
|
|
|
4846
4847
|
## About the \`actions\` field
|
|
4847
4848
|
|
|
@@ -4877,8 +4878,9 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
|
|
|
4877
4878
|
}
|
|
4878
4879
|
* To scroll some specific element, put the element at the center of the region in the \`locate\` field. If it's a page scroll, put \`null\` in the \`locate\` field.
|
|
4879
4880
|
* \`param\` is required in this action. If some fields are not specified, use direction \`down\`, \`once\` scroll type, and \`null\` distance.
|
|
4880
|
-
- type: '
|
|
4881
|
+
- type: 'FalsyConditionStatement'
|
|
4881
4882
|
* { param: null }
|
|
4883
|
+
* use this action when the instruction is an "if" statement and the condition is falsy.
|
|
4882
4884
|
- type: 'Sleep'
|
|
4883
4885
|
* { param: { timeMs: number } }
|
|
4884
4886
|
|
|
@@ -4892,7 +4894,8 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
|
|
|
4892
4894
|
|
|
4893
4895
|
## Output JSON Format:
|
|
4894
4896
|
|
|
4895
|
-
|
|
4897
|
+
The JSON format is as follows:
|
|
4898
|
+
|
|
4896
4899
|
{
|
|
4897
4900
|
"actions": [
|
|
4898
4901
|
{
|
|
@@ -4965,13 +4968,13 @@ By viewing the page screenshot and description, you should consider this and out
|
|
|
4965
4968
|
If the user says "If there is a popup, close it", you should consider this and output the JSON:
|
|
4966
4969
|
|
|
4967
4970
|
* By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
|
|
4968
|
-
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`
|
|
4971
|
+
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
|
|
4969
4972
|
|
|
4970
4973
|
\`\`\`json
|
|
4971
4974
|
{
|
|
4972
4975
|
"actions": [{
|
|
4973
4976
|
"thought": "There is no popup on the page",
|
|
4974
|
-
"type": "
|
|
4977
|
+
"type": "FalsyConditionStatement",
|
|
4975
4978
|
"param": null
|
|
4976
4979
|
}
|
|
4977
4980
|
],
|
|
@@ -5196,11 +5199,9 @@ async function call(messages, responseFormat) {
|
|
|
5196
5199
|
return { content, usage: completion.usage };
|
|
5197
5200
|
}
|
|
5198
5201
|
async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
5199
|
-
let responseFormat
|
|
5200
|
-
type: "json_object" /* JSON */
|
|
5201
|
-
};
|
|
5202
|
+
let responseFormat;
|
|
5202
5203
|
const model = getModelName();
|
|
5203
|
-
if (model
|
|
5204
|
+
if (model.includes("gpt-4o")) {
|
|
5204
5205
|
switch (AIActionTypeValue) {
|
|
5205
5206
|
case 0 /* ASSERT */:
|
|
5206
5207
|
responseFormat = assertSchema;
|
|
@@ -5214,9 +5215,9 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
|
5214
5215
|
responseFormat = planSchema;
|
|
5215
5216
|
break;
|
|
5216
5217
|
}
|
|
5217
|
-
|
|
5218
|
-
|
|
5219
|
-
|
|
5218
|
+
if (model === "gpt-4o-2024-05-13") {
|
|
5219
|
+
responseFormat = { type: "json_object" /* JSON */ };
|
|
5220
|
+
}
|
|
5220
5221
|
}
|
|
5221
5222
|
const safeJsonParse = (input) => {
|
|
5222
5223
|
try {
|
|
@@ -5234,7 +5235,7 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
|
5234
5235
|
try {
|
|
5235
5236
|
return { content: JSON.parse(jsonContent), usage: response.usage };
|
|
5236
5237
|
} catch (e) {
|
|
5237
|
-
throw Error(`parse json
|
|
5238
|
+
throw Error(`failed to parse json response: ${response.content}`);
|
|
5238
5239
|
}
|
|
5239
5240
|
}
|
|
5240
5241
|
function extractJSONFromCodeBlock(response) {
|
|
@@ -5281,7 +5282,7 @@ function transformElementPositionToId(aiResult, elementsInfo) {
|
|
|
5281
5282
|
};
|
|
5282
5283
|
}
|
|
5283
5284
|
async function AiInspectElement(options) {
|
|
5284
|
-
const { context, multi, targetElementDescription, callAI
|
|
5285
|
+
const { context, multi, targetElementDescription, callAI } = options;
|
|
5285
5286
|
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
|
5286
5287
|
const { description, elementById, elementByPosition: elementByPosition2 } = await describeUserPage(context);
|
|
5287
5288
|
if (options.quickAnswer) {
|
|
@@ -5356,8 +5357,7 @@ ${JSON.stringify({
|
|
|
5356
5357
|
if (callAI) {
|
|
5357
5358
|
const res = await callAI({
|
|
5358
5359
|
msgs,
|
|
5359
|
-
AIActionType: 1 /* INSPECT_ELEMENT
|
|
5360
|
-
useModel
|
|
5360
|
+
AIActionType: 1 /* INSPECT_ELEMENT */
|
|
5361
5361
|
});
|
|
5362
5362
|
return {
|
|
5363
5363
|
parseResult: transformElementPositionToId(res.content, context.content),
|
|
@@ -5368,8 +5368,7 @@ ${JSON.stringify({
|
|
|
5368
5368
|
}
|
|
5369
5369
|
const inspectElement = await callAiFn({
|
|
5370
5370
|
msgs,
|
|
5371
|
-
AIActionType: 1 /* INSPECT_ELEMENT
|
|
5372
|
-
useModel
|
|
5371
|
+
AIActionType: 1 /* INSPECT_ELEMENT */
|
|
5373
5372
|
});
|
|
5374
5373
|
return {
|
|
5375
5374
|
parseResult: transformElementPositionToId(
|
|
@@ -5419,7 +5418,6 @@ DATA_DEMAND ends.
|
|
|
5419
5418
|
];
|
|
5420
5419
|
const result = await callAiFn({
|
|
5421
5420
|
msgs,
|
|
5422
|
-
useModel,
|
|
5423
5421
|
AIActionType: 2 /* EXTRACT_DATA */
|
|
5424
5422
|
});
|
|
5425
5423
|
return {
|
|
@@ -5462,8 +5460,7 @@ async function AiAssert(options) {
|
|
|
5462
5460
|
];
|
|
5463
5461
|
const { content: assertResult, usage } = await callAiFn({
|
|
5464
5462
|
msgs,
|
|
5465
|
-
AIActionType: 0 /* ASSERT
|
|
5466
|
-
useModel
|
|
5463
|
+
AIActionType: 0 /* ASSERT */
|
|
5467
5464
|
});
|
|
5468
5465
|
return {
|
|
5469
5466
|
content: assertResult,
|
|
@@ -5473,7 +5470,7 @@ async function AiAssert(options) {
|
|
|
5473
5470
|
|
|
5474
5471
|
// src/ai-model/automation/index.ts
|
|
5475
5472
|
var import_node_assert4 = __toESM(require("assert"));
|
|
5476
|
-
async function plan(userPrompt, opts
|
|
5473
|
+
async function plan(userPrompt, opts) {
|
|
5477
5474
|
const { callAI, context } = opts || {};
|
|
5478
5475
|
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
|
5479
5476
|
const { description: pageDescription, elementByPosition: elementByPosition2 } = await describeUserPage(context);
|
|
@@ -5524,8 +5521,7 @@ ${taskBackgroundContext}
|
|
|
5524
5521
|
const call2 = callAI || callAiFn;
|
|
5525
5522
|
const { content, usage } = await call2({
|
|
5526
5523
|
msgs,
|
|
5527
|
-
AIActionType: 3 /* PLAN
|
|
5528
|
-
useModel
|
|
5524
|
+
AIActionType: 3 /* PLAN */
|
|
5529
5525
|
});
|
|
5530
5526
|
const planFromAI = content;
|
|
5531
5527
|
const actions = (planFromAI == null ? void 0 : planFromAI.actions) || [];
|
package/dist/lib/index.js
CHANGED
|
@@ -4506,7 +4506,7 @@ function stringifyDumpData(data, indents) {
|
|
|
4506
4506
|
return JSON.stringify(data, replacerForPageObject, indents);
|
|
4507
4507
|
}
|
|
4508
4508
|
function getVersion() {
|
|
4509
|
-
return "0.8.7
|
|
4509
|
+
return "0.8.7";
|
|
4510
4510
|
}
|
|
4511
4511
|
|
|
4512
4512
|
// src/action/executor.ts
|
|
@@ -5177,7 +5177,7 @@ You are a versatile professional in software UI automation. Your outstanding con
|
|
|
5177
5177
|
## Workflow
|
|
5178
5178
|
|
|
5179
5179
|
1. Receive the user's element description, screenshot, and instruction.
|
|
5180
|
-
2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll /
|
|
5180
|
+
2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep). The "About the action" section below will give you more details.
|
|
5181
5181
|
3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
|
|
5182
5182
|
4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
|
|
5183
5183
|
5. Consider whether the user's instruction will be accomplished after all the actions
|
|
@@ -5188,7 +5188,8 @@ You are a versatile professional in software UI automation. Your outstanding con
|
|
|
5188
5188
|
|
|
5189
5189
|
- All the actions you composed MUST be based on the page context information you get.
|
|
5190
5190
|
- Trust the "What have been done" field about the task (if any), don't repeat actions in it.
|
|
5191
|
-
-
|
|
5191
|
+
- Respond only with valid JSON. Do not write an introduction or summary.
|
|
5192
|
+
- If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
|
|
5192
5193
|
|
|
5193
5194
|
## About the \`actions\` field
|
|
5194
5195
|
|
|
@@ -5224,8 +5225,9 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
|
|
|
5224
5225
|
}
|
|
5225
5226
|
* To scroll some specific element, put the element at the center of the region in the \`locate\` field. If it's a page scroll, put \`null\` in the \`locate\` field.
|
|
5226
5227
|
* \`param\` is required in this action. If some fields are not specified, use direction \`down\`, \`once\` scroll type, and \`null\` distance.
|
|
5227
|
-
- type: '
|
|
5228
|
+
- type: 'FalsyConditionStatement'
|
|
5228
5229
|
* { param: null }
|
|
5230
|
+
* use this action when the instruction is an "if" statement and the condition is falsy.
|
|
5229
5231
|
- type: 'Sleep'
|
|
5230
5232
|
* { param: { timeMs: number } }
|
|
5231
5233
|
|
|
@@ -5239,7 +5241,8 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
|
|
|
5239
5241
|
|
|
5240
5242
|
## Output JSON Format:
|
|
5241
5243
|
|
|
5242
|
-
|
|
5244
|
+
The JSON format is as follows:
|
|
5245
|
+
|
|
5243
5246
|
{
|
|
5244
5247
|
"actions": [
|
|
5245
5248
|
{
|
|
@@ -5312,13 +5315,13 @@ By viewing the page screenshot and description, you should consider this and out
|
|
|
5312
5315
|
If the user says "If there is a popup, close it", you should consider this and output the JSON:
|
|
5313
5316
|
|
|
5314
5317
|
* By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
|
|
5315
|
-
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`
|
|
5318
|
+
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
|
|
5316
5319
|
|
|
5317
5320
|
\`\`\`json
|
|
5318
5321
|
{
|
|
5319
5322
|
"actions": [{
|
|
5320
5323
|
"thought": "There is no popup on the page",
|
|
5321
|
-
"type": "
|
|
5324
|
+
"type": "FalsyConditionStatement",
|
|
5322
5325
|
"param": null
|
|
5323
5326
|
}
|
|
5324
5327
|
],
|
|
@@ -5543,11 +5546,9 @@ async function call(messages, responseFormat) {
|
|
|
5543
5546
|
return { content, usage: completion.usage };
|
|
5544
5547
|
}
|
|
5545
5548
|
async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
5546
|
-
let responseFormat
|
|
5547
|
-
type: "json_object" /* JSON */
|
|
5548
|
-
};
|
|
5549
|
+
let responseFormat;
|
|
5549
5550
|
const model = getModelName();
|
|
5550
|
-
if (model
|
|
5551
|
+
if (model.includes("gpt-4o")) {
|
|
5551
5552
|
switch (AIActionTypeValue) {
|
|
5552
5553
|
case 0 /* ASSERT */:
|
|
5553
5554
|
responseFormat = assertSchema;
|
|
@@ -5561,9 +5562,9 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
|
5561
5562
|
responseFormat = planSchema;
|
|
5562
5563
|
break;
|
|
5563
5564
|
}
|
|
5564
|
-
|
|
5565
|
-
|
|
5566
|
-
|
|
5565
|
+
if (model === "gpt-4o-2024-05-13") {
|
|
5566
|
+
responseFormat = { type: "json_object" /* JSON */ };
|
|
5567
|
+
}
|
|
5567
5568
|
}
|
|
5568
5569
|
const safeJsonParse = (input) => {
|
|
5569
5570
|
try {
|
|
@@ -5581,7 +5582,7 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
|
5581
5582
|
try {
|
|
5582
5583
|
return { content: JSON.parse(jsonContent), usage: response.usage };
|
|
5583
5584
|
} catch (e) {
|
|
5584
|
-
throw Error(`parse json
|
|
5585
|
+
throw Error(`failed to parse json response: ${response.content}`);
|
|
5585
5586
|
}
|
|
5586
5587
|
}
|
|
5587
5588
|
function extractJSONFromCodeBlock(response) {
|
|
@@ -5602,8 +5603,8 @@ function extractJSONFromCodeBlock(response) {
|
|
|
5602
5603
|
|
|
5603
5604
|
// src/ai-model/common.ts
|
|
5604
5605
|
async function callAiFn(options) {
|
|
5605
|
-
const {
|
|
5606
|
-
if (preferOpenAIModel(
|
|
5606
|
+
const { msgs, AIActionType: AIActionTypeValue } = options;
|
|
5607
|
+
if (preferOpenAIModel("openAI")) {
|
|
5607
5608
|
const { content, usage } = await callToGetJSONObject(
|
|
5608
5609
|
msgs,
|
|
5609
5610
|
AIActionTypeValue
|
|
@@ -5654,7 +5655,7 @@ function transformElementPositionToId(aiResult, elementsInfo) {
|
|
|
5654
5655
|
};
|
|
5655
5656
|
}
|
|
5656
5657
|
async function AiInspectElement(options) {
|
|
5657
|
-
const { context, multi, targetElementDescription, callAI
|
|
5658
|
+
const { context, multi, targetElementDescription, callAI } = options;
|
|
5658
5659
|
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
|
5659
5660
|
const { description, elementById, elementByPosition: elementByPosition2 } = await describeUserPage(context);
|
|
5660
5661
|
if (options.quickAnswer) {
|
|
@@ -5729,8 +5730,7 @@ ${JSON.stringify({
|
|
|
5729
5730
|
if (callAI) {
|
|
5730
5731
|
const res = await callAI({
|
|
5731
5732
|
msgs,
|
|
5732
|
-
AIActionType: 1 /* INSPECT_ELEMENT
|
|
5733
|
-
useModel
|
|
5733
|
+
AIActionType: 1 /* INSPECT_ELEMENT */
|
|
5734
5734
|
});
|
|
5735
5735
|
return {
|
|
5736
5736
|
parseResult: transformElementPositionToId(res.content, context.content),
|
|
@@ -5741,8 +5741,7 @@ ${JSON.stringify({
|
|
|
5741
5741
|
}
|
|
5742
5742
|
const inspectElement = await callAiFn({
|
|
5743
5743
|
msgs,
|
|
5744
|
-
AIActionType: 1 /* INSPECT_ELEMENT
|
|
5745
|
-
useModel
|
|
5744
|
+
AIActionType: 1 /* INSPECT_ELEMENT */
|
|
5746
5745
|
});
|
|
5747
5746
|
return {
|
|
5748
5747
|
parseResult: transformElementPositionToId(
|
|
@@ -5792,7 +5791,6 @@ DATA_DEMAND ends.
|
|
|
5792
5791
|
];
|
|
5793
5792
|
const result = await callAiFn({
|
|
5794
5793
|
msgs,
|
|
5795
|
-
useModel,
|
|
5796
5794
|
AIActionType: 2 /* EXTRACT_DATA */
|
|
5797
5795
|
});
|
|
5798
5796
|
return {
|
|
@@ -5835,8 +5833,7 @@ async function AiAssert(options) {
|
|
|
5835
5833
|
];
|
|
5836
5834
|
const { content: assertResult, usage } = await callAiFn({
|
|
5837
5835
|
msgs,
|
|
5838
|
-
AIActionType: 0 /* ASSERT
|
|
5839
|
-
useModel
|
|
5836
|
+
AIActionType: 0 /* ASSERT */
|
|
5840
5837
|
});
|
|
5841
5838
|
return {
|
|
5842
5839
|
content: assertResult,
|
|
@@ -5846,7 +5843,7 @@ async function AiAssert(options) {
|
|
|
5846
5843
|
|
|
5847
5844
|
// src/ai-model/automation/index.ts
|
|
5848
5845
|
var import_node_assert6 = __toESM(require("assert"));
|
|
5849
|
-
async function plan(userPrompt, opts
|
|
5846
|
+
async function plan(userPrompt, opts) {
|
|
5850
5847
|
const { callAI, context } = opts || {};
|
|
5851
5848
|
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
|
5852
5849
|
const { description: pageDescription, elementByPosition: elementByPosition2 } = await describeUserPage(context);
|
|
@@ -5897,8 +5894,7 @@ ${taskBackgroundContext}
|
|
|
5897
5894
|
const call2 = callAI || callAiFn;
|
|
5898
5895
|
const { content, usage } = await call2({
|
|
5899
5896
|
msgs,
|
|
5900
|
-
AIActionType: 3 /* PLAN
|
|
5901
|
-
useModel
|
|
5897
|
+
AIActionType: 3 /* PLAN */
|
|
5902
5898
|
});
|
|
5903
5899
|
const planFromAI = content;
|
|
5904
5900
|
const actions = (planFromAI == null ? void 0 : planFromAI.actions) || [];
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { g as AIUsageInfo } from './types-
|
|
1
|
+
import { g as AIUsageInfo } from './types-55182ae1.js';
|
|
2
2
|
import { ChatCompletionMessageParam } from 'openai/resources';
|
|
3
3
|
export { ChatCompletionMessageParam } from 'openai/resources';
|
|
4
|
-
import { A as AIActionType } from './index-
|
|
5
|
-
export { f as AiAssert, e as AiExtractElementInfo, b as AiInspectElement, c as callAiFn, d as describeUserPage, p as plan, t as transformElementPositionToId } from './index-
|
|
4
|
+
import { A as AIActionType } from './index-43fd19f4.js';
|
|
5
|
+
export { f as AiAssert, e as AiExtractElementInfo, b as AiInspectElement, c as callAiFn, d as describeUserPage, p as plan, t as transformElementPositionToId } from './index-43fd19f4.js';
|
|
6
6
|
|
|
7
7
|
declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
|
|
8
8
|
content: T;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { g as AIUsageInfo, B as BaseElement, U as UIContext, m as AIElementResponse, A as AISingleElementResponse, i as AISingleElementResponseById, n as AISectionParseResponse, o as AIAssertionResponse, F as PlanningAIResponse } from './types-
|
|
1
|
+
import { g as AIUsageInfo, B as BaseElement, U as UIContext, m as AIElementResponse, A as AISingleElementResponse, i as AISingleElementResponseById, n as AISectionParseResponse, o as AIAssertionResponse, F as PlanningAIResponse } from './types-55182ae1.js';
|
|
2
2
|
import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
|
|
3
3
|
|
|
4
4
|
type AIArgs = [
|
|
@@ -14,7 +14,6 @@ declare enum AIActionType {
|
|
|
14
14
|
declare function callAiFn<T>(options: {
|
|
15
15
|
msgs: AIArgs;
|
|
16
16
|
AIActionType: AIActionType;
|
|
17
|
-
useModel?: 'openAI' | 'coze';
|
|
18
17
|
}): Promise<{
|
|
19
18
|
content: T;
|
|
20
19
|
usage?: AIUsageInfo;
|
|
@@ -116,6 +115,6 @@ declare function plan(userPrompt: string, opts: {
|
|
|
116
115
|
originalPrompt?: string;
|
|
117
116
|
context: UIContext;
|
|
118
117
|
callAI?: typeof callAiFn<PlanningAIResponse>;
|
|
119
|
-
}
|
|
118
|
+
}): Promise<PlanningAIResponse>;
|
|
120
119
|
|
|
121
120
|
export { AIActionType as A, retrieveSection as a, AiInspectElement as b, callAiFn as c, describeUserPage as d, AiExtractElementInfo as e, AiAssert as f, plan as p, retrieveElement as r, transformElementPositionToId as t };
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, A as AISingleElementResponse, f as InsightAssertionResponse } from './types-
|
|
2
|
-
export { o as AIAssertionResponse, k as AIElementIdResponse, l as AIElementPositionResponse, m as AIElementResponse, h as AIResponseFormat, n as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, x as AgentAssertOpt, w as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, r as DumpMeta, v as ElementById, p as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, t as InsightDump, q as InsightExtractParam, L as LiteUISection, u as PartialInsightDumpFromSDK, F as PlanningAIResponse, z as PlanningAction, O as PlanningActionParamAssert, T as PlanningActionParamError, K as PlanningActionParamHover, M as PlanningActionParamInputOrKeyPress, H as PlanningActionParamPlan, N as PlanningActionParamScroll, Q as PlanningActionParamSleep, J as PlanningActionParamTap, V as PlanningActionParamWaitFor, G as PlanningFurtherPlan, y as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, s as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-
|
|
1
|
+
import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, A as AISingleElementResponse, f as InsightAssertionResponse } from './types-55182ae1.js';
|
|
2
|
+
export { o as AIAssertionResponse, k as AIElementIdResponse, l as AIElementPositionResponse, m as AIElementResponse, h as AIResponseFormat, n as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, x as AgentAssertOpt, w as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, r as DumpMeta, v as ElementById, p as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, t as InsightDump, q as InsightExtractParam, L as LiteUISection, u as PartialInsightDumpFromSDK, F as PlanningAIResponse, z as PlanningAction, O as PlanningActionParamAssert, T as PlanningActionParamError, K as PlanningActionParamHover, M as PlanningActionParamInputOrKeyPress, H as PlanningActionParamPlan, N as PlanningActionParamScroll, Q as PlanningActionParamSleep, J as PlanningActionParamTap, V as PlanningActionParamWaitFor, G as PlanningFurtherPlan, y as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, s as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-55182ae1.js';
|
|
3
3
|
export { allAIConfig, getAIConfig, overrideAIConfig } from './env.js';
|
|
4
|
-
import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-
|
|
5
|
-
export { p as plan, t as transformElementPositionToId } from './index-
|
|
4
|
+
import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-43fd19f4.js';
|
|
5
|
+
export { p as plan, t as transformElementPositionToId } from './index-43fd19f4.js';
|
|
6
6
|
export { getLogDirByType, getVersion, setLogDir } from './utils.js';
|
|
7
7
|
import 'openai/resources';
|
|
8
8
|
|
|
@@ -176,7 +176,7 @@ interface PlanningLocateParam {
|
|
|
176
176
|
}
|
|
177
177
|
interface PlanningAction<ParamType = any> {
|
|
178
178
|
thought?: string;
|
|
179
|
-
type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | '
|
|
179
|
+
type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'FalsyConditionStatement' | 'Assert' | 'AssertWithoutThrow' | 'Sleep';
|
|
180
180
|
param: ParamType;
|
|
181
181
|
locate: PlanningLocateParam | null;
|
|
182
182
|
}
|
package/dist/lib/utils.js
CHANGED
|
@@ -272,7 +272,7 @@ function stringifyDumpData(data, indents) {
|
|
|
272
272
|
return JSON.stringify(data, replacerForPageObject, indents);
|
|
273
273
|
}
|
|
274
274
|
function getVersion() {
|
|
275
|
-
return "0.8.7
|
|
275
|
+
return "0.8.7";
|
|
276
276
|
}
|
|
277
277
|
function debugLog(...message) {
|
|
278
278
|
const debugMode = getAIConfig(MIDSCENE_DEBUG_MODE);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
|
|
4
|
-
"version": "0.8.7
|
|
4
|
+
"version": "0.8.7",
|
|
5
5
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
6
6
|
"homepage": "https://midscenejs.com/",
|
|
7
7
|
"jsnext:source": "./src/index.ts",
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"openai": "4.57.1",
|
|
40
40
|
"optional": "0.1.4",
|
|
41
41
|
"socks-proxy-agent": "8.0.4",
|
|
42
|
-
"@midscene/shared": "0.8.7
|
|
42
|
+
"@midscene/shared": "0.8.7"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@modern-js/module-tools": "2.60.6",
|