@donggui/core 1.5.4-donggui.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/es/agent/agent.mjs +709 -0
- package/dist/es/agent/agent.mjs.map +1 -0
- package/dist/es/agent/common.mjs +0 -0
- package/dist/es/agent/execution-session.mjs +41 -0
- package/dist/es/agent/execution-session.mjs.map +1 -0
- package/dist/es/agent/index.mjs +6 -0
- package/dist/es/agent/task-builder.mjs +330 -0
- package/dist/es/agent/task-builder.mjs.map +1 -0
- package/dist/es/agent/task-cache.mjs +186 -0
- package/dist/es/agent/task-cache.mjs.map +1 -0
- package/dist/es/agent/tasks.mjs +422 -0
- package/dist/es/agent/tasks.mjs.map +1 -0
- package/dist/es/agent/ui-utils.mjs +91 -0
- package/dist/es/agent/ui-utils.mjs.map +1 -0
- package/dist/es/agent/utils.mjs +198 -0
- package/dist/es/agent/utils.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/actions.mjs +224 -0
- package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/index.mjs +6 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
- package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
- package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/util.mjs +9 -0
- package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
- package/dist/es/ai-model/conversation-history.mjs +195 -0
- package/dist/es/ai-model/conversation-history.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +11 -0
- package/dist/es/ai-model/inspect.mjs +386 -0
- package/dist/es/ai-model/inspect.mjs.map +1 -0
- package/dist/es/ai-model/llm-planning.mjs +233 -0
- package/dist/es/ai-model/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/common.mjs +7 -0
- package/dist/es/ai-model/prompt/common.mjs.map +1 -0
- package/dist/es/ai-model/prompt/describe.mjs +66 -0
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
- package/dist/es/ai-model/prompt/extraction.mjs +129 -0
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +364 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/util.mjs +59 -0
- package/dist/es/ai-model/prompt/util.mjs.map +1 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +466 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
- package/dist/es/common.mjs +371 -0
- package/dist/es/common.mjs.map +1 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +300 -0
- package/dist/es/device/index.mjs.map +1 -0
- package/dist/es/dump/html-utils.mjs +211 -0
- package/dist/es/dump/html-utils.mjs.map +1 -0
- package/dist/es/dump/image-restoration.mjs +43 -0
- package/dist/es/dump/image-restoration.mjs.map +1 -0
- package/dist/es/dump/index.mjs +3 -0
- package/dist/es/index.mjs +15 -0
- package/dist/es/index.mjs.map +1 -0
- package/dist/es/report-generator.mjs +134 -0
- package/dist/es/report-generator.mjs.map +1 -0
- package/dist/es/report.mjs +111 -0
- package/dist/es/report.mjs.map +1 -0
- package/dist/es/screenshot-item.mjs +105 -0
- package/dist/es/screenshot-item.mjs.map +1 -0
- package/dist/es/service/index.mjs +256 -0
- package/dist/es/service/index.mjs.map +1 -0
- package/dist/es/service/utils.mjs +15 -0
- package/dist/es/service/utils.mjs.map +1 -0
- package/dist/es/skill/index.mjs +38 -0
- package/dist/es/skill/index.mjs.map +1 -0
- package/dist/es/task-runner.mjs +258 -0
- package/dist/es/task-runner.mjs.map +1 -0
- package/dist/es/task-timing.mjs +12 -0
- package/dist/es/task-timing.mjs.map +1 -0
- package/dist/es/tree.mjs +13 -0
- package/dist/es/tree.mjs.map +1 -0
- package/dist/es/types.mjs +196 -0
- package/dist/es/types.mjs.map +1 -0
- package/dist/es/utils.mjs +218 -0
- package/dist/es/utils.mjs.map +1 -0
- package/dist/es/yaml/builder.mjs +13 -0
- package/dist/es/yaml/builder.mjs.map +1 -0
- package/dist/es/yaml/index.mjs +4 -0
- package/dist/es/yaml/player.mjs +418 -0
- package/dist/es/yaml/player.mjs.map +1 -0
- package/dist/es/yaml/utils.mjs +73 -0
- package/dist/es/yaml/utils.mjs.map +1 -0
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/agent/agent.js +757 -0
- package/dist/lib/agent/agent.js.map +1 -0
- package/dist/lib/agent/common.js +5 -0
- package/dist/lib/agent/execution-session.js +75 -0
- package/dist/lib/agent/execution-session.js.map +1 -0
- package/dist/lib/agent/index.js +81 -0
- package/dist/lib/agent/index.js.map +1 -0
- package/dist/lib/agent/task-builder.js +367 -0
- package/dist/lib/agent/task-builder.js.map +1 -0
- package/dist/lib/agent/task-cache.js +238 -0
- package/dist/lib/agent/task-cache.js.map +1 -0
- package/dist/lib/agent/tasks.js +465 -0
- package/dist/lib/agent/tasks.js.map +1 -0
- package/dist/lib/agent/ui-utils.js +143 -0
- package/dist/lib/agent/ui-utils.js.map +1 -0
- package/dist/lib/agent/utils.js +275 -0
- package/dist/lib/agent/utils.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/actions.js +258 -0
- package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/index.js +66 -0
- package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/parser.js +282 -0
- package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/planning.js +105 -0
- package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
- package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/util.js +46 -0
- package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
- package/dist/lib/ai-model/conversation-history.js +229 -0
- package/dist/lib/ai-model/conversation-history.js.map +1 -0
- package/dist/lib/ai-model/index.js +125 -0
- package/dist/lib/ai-model/index.js.map +1 -0
- package/dist/lib/ai-model/inspect.js +429 -0
- package/dist/lib/ai-model/inspect.js.map +1 -0
- package/dist/lib/ai-model/llm-planning.js +270 -0
- package/dist/lib/ai-model/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/common.js +41 -0
- package/dist/lib/ai-model/prompt/common.js.map +1 -0
- package/dist/lib/ai-model/prompt/describe.js +100 -0
- package/dist/lib/ai-model/prompt/describe.js.map +1 -0
- package/dist/lib/ai-model/prompt/extraction.js +169 -0
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +401 -0
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/util.js +105 -0
- package/dist/lib/ai-model/prompt/util.js.map +1 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +531 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -0
- package/dist/lib/ai-model/ui-tars-planning.js +283 -0
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
- package/dist/lib/common.js +480 -0
- package/dist/lib/common.js.map +1 -0
- package/dist/lib/device/device-options.js +20 -0
- package/dist/lib/device/device-options.js.map +1 -0
- package/dist/lib/device/index.js +418 -0
- package/dist/lib/device/index.js.map +1 -0
- package/dist/lib/dump/html-utils.js +281 -0
- package/dist/lib/dump/html-utils.js.map +1 -0
- package/dist/lib/dump/image-restoration.js +77 -0
- package/dist/lib/dump/image-restoration.js.map +1 -0
- package/dist/lib/dump/index.js +60 -0
- package/dist/lib/dump/index.js.map +1 -0
- package/dist/lib/index.js +146 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/report-generator.js +172 -0
- package/dist/lib/report-generator.js.map +1 -0
- package/dist/lib/report.js +145 -0
- package/dist/lib/report.js.map +1 -0
- package/dist/lib/screenshot-item.js +139 -0
- package/dist/lib/screenshot-item.js.map +1 -0
- package/dist/lib/service/index.js +290 -0
- package/dist/lib/service/index.js.map +1 -0
- package/dist/lib/service/utils.js +49 -0
- package/dist/lib/service/utils.js.map +1 -0
- package/dist/lib/skill/index.js +72 -0
- package/dist/lib/skill/index.js.map +1 -0
- package/dist/lib/task-runner.js +295 -0
- package/dist/lib/task-runner.js.map +1 -0
- package/dist/lib/task-timing.js +46 -0
- package/dist/lib/task-timing.js.map +1 -0
- package/dist/lib/tree.js +53 -0
- package/dist/lib/tree.js.map +1 -0
- package/dist/lib/types.js +285 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/lib/utils.js +297 -0
- package/dist/lib/utils.js.map +1 -0
- package/dist/lib/yaml/builder.js +57 -0
- package/dist/lib/yaml/builder.js.map +1 -0
- package/dist/lib/yaml/index.js +81 -0
- package/dist/lib/yaml/index.js.map +1 -0
- package/dist/lib/yaml/player.js +452 -0
- package/dist/lib/yaml/player.js.map +1 -0
- package/dist/lib/yaml/utils.js +126 -0
- package/dist/lib/yaml/utils.js.map +1 -0
- package/dist/lib/yaml.js +20 -0
- package/dist/lib/yaml.js.map +1 -0
- package/dist/types/agent/agent.d.ts +190 -0
- package/dist/types/agent/common.d.ts +0 -0
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +10 -0
- package/dist/types/agent/task-builder.d.ts +34 -0
- package/dist/types/agent/task-cache.d.ts +48 -0
- package/dist/types/agent/tasks.d.ts +70 -0
- package/dist/types/agent/ui-utils.d.ts +14 -0
- package/dist/types/agent/utils.d.ts +29 -0
- package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +10 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
- package/dist/types/ai-model/conversation-history.d.ts +105 -0
- package/dist/types/ai-model/index.d.ts +14 -0
- package/dist/types/ai-model/inspect.d.ts +58 -0
- package/dist/types/ai-model/llm-planning.d.ts +19 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +33 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
- package/dist/types/ai-model/service-caller/index.d.ts +49 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
- package/dist/types/common.d.ts +288 -0
- package/dist/types/device/device-options.d.ts +142 -0
- package/dist/types/device/index.d.ts +2315 -0
- package/dist/types/dump/html-utils.d.ts +52 -0
- package/dist/types/dump/image-restoration.d.ts +6 -0
- package/dist/types/dump/index.d.ts +5 -0
- package/dist/types/index.d.ts +17 -0
- package/dist/types/report-generator.d.ts +48 -0
- package/dist/types/report.d.ts +15 -0
- package/dist/types/screenshot-item.d.ts +66 -0
- package/dist/types/service/index.d.ts +23 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/skill/index.d.ts +25 -0
- package/dist/types/task-runner.d.ts +48 -0
- package/dist/types/task-timing.d.ts +8 -0
- package/dist/types/tree.d.ts +4 -0
- package/dist/types/types.d.ts +645 -0
- package/dist/types/utils.d.ts +40 -0
- package/dist/types/yaml/builder.d.ts +2 -0
- package/dist/types/yaml/index.d.ts +4 -0
- package/dist/types/yaml/player.d.ts +34 -0
- package/dist/types/yaml/utils.d.ts +9 -0
- package/dist/types/yaml.d.ts +203 -0
- package/package.json +111 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
import { findAllMidsceneLocatorField } from "../../common.mjs";
|
|
2
|
+
import { getPreferredLanguage } from "@midscene/shared/env";
|
|
3
|
+
import { getZodDescription, getZodTypeName } from "@midscene/shared/zod-schema-utils";
|
|
4
|
+
import { bboxDescription } from "./common.mjs";
|
|
5
|
+
const vlLocateParam = (modelFamily)=>{
|
|
6
|
+
if (modelFamily) return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;
|
|
7
|
+
return "{ prompt: string /* description of the target element */ }";
|
|
8
|
+
};
|
|
9
|
+
const findDefaultValue = (field)=>{
|
|
10
|
+
let current = field;
|
|
11
|
+
const visited = new Set();
|
|
12
|
+
while(current && !visited.has(current)){
|
|
13
|
+
visited.add(current);
|
|
14
|
+
const currentWithDef = current;
|
|
15
|
+
if (!currentWithDef._def?.typeName) break;
|
|
16
|
+
if ('ZodDefault' === currentWithDef._def.typeName) return currentWithDef._def.defaultValue?.();
|
|
17
|
+
if ('ZodOptional' === currentWithDef._def.typeName || 'ZodNullable' === currentWithDef._def.typeName) current = currentWithDef._def.innerType;
|
|
18
|
+
else break;
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
const SAMPLE_BBOXES = [
|
|
22
|
+
[
|
|
23
|
+
50,
|
|
24
|
+
100,
|
|
25
|
+
200,
|
|
26
|
+
200
|
|
27
|
+
],
|
|
28
|
+
[
|
|
29
|
+
300,
|
|
30
|
+
400,
|
|
31
|
+
500,
|
|
32
|
+
500
|
|
33
|
+
],
|
|
34
|
+
[
|
|
35
|
+
600,
|
|
36
|
+
100,
|
|
37
|
+
800,
|
|
38
|
+
250
|
|
39
|
+
],
|
|
40
|
+
[
|
|
41
|
+
50,
|
|
42
|
+
600,
|
|
43
|
+
250,
|
|
44
|
+
750
|
|
45
|
+
]
|
|
46
|
+
];
|
|
47
|
+
const injectBboxIntoSample = (sample, locateFields, includeBbox)=>{
|
|
48
|
+
if (!includeBbox) return sample;
|
|
49
|
+
const result = {
|
|
50
|
+
...sample
|
|
51
|
+
};
|
|
52
|
+
let bboxIndex = 0;
|
|
53
|
+
for (const field of locateFields)if (result[field] && 'object' == typeof result[field] && result[field].prompt) {
|
|
54
|
+
result[field] = {
|
|
55
|
+
...result[field],
|
|
56
|
+
bbox: SAMPLE_BBOXES[bboxIndex % SAMPLE_BBOXES.length]
|
|
57
|
+
};
|
|
58
|
+
bboxIndex++;
|
|
59
|
+
}
|
|
60
|
+
return result;
|
|
61
|
+
};
|
|
62
|
+
const descriptionForAction = (action, locatorSchemaTypeDescription, includeBbox = false)=>{
|
|
63
|
+
const tab = ' ';
|
|
64
|
+
const fields = [];
|
|
65
|
+
fields.push(`- type: "${action.name}"`);
|
|
66
|
+
if (action.paramSchema) {
|
|
67
|
+
const paramLines = [];
|
|
68
|
+
const schema = action.paramSchema;
|
|
69
|
+
const isZodObject = schema._def?.typeName === 'ZodObject';
|
|
70
|
+
if (isZodObject && schema.shape) {
|
|
71
|
+
const shape = schema.shape;
|
|
72
|
+
for (const [key, field] of Object.entries(shape))if (field && 'object' == typeof field) {
|
|
73
|
+
const isOptional = 'function' == typeof field.isOptional && field.isOptional();
|
|
74
|
+
const keyWithOptional = isOptional ? `${key}?` : key;
|
|
75
|
+
const typeName = getZodTypeName(field, locatorSchemaTypeDescription);
|
|
76
|
+
const description = getZodDescription(field);
|
|
77
|
+
const defaultValue = findDefaultValue(field);
|
|
78
|
+
const hasDefault = void 0 !== defaultValue;
|
|
79
|
+
let paramLine = `${keyWithOptional}: ${typeName}`;
|
|
80
|
+
const comments = [];
|
|
81
|
+
if (description) comments.push(description);
|
|
82
|
+
if (hasDefault) {
|
|
83
|
+
const defaultStr = 'string' == typeof defaultValue ? `"${defaultValue}"` : JSON.stringify(defaultValue);
|
|
84
|
+
comments.push(`default: ${defaultStr}`);
|
|
85
|
+
}
|
|
86
|
+
if (comments.length > 0) paramLine += ` // ${comments.join(', ')}`;
|
|
87
|
+
paramLines.push(paramLine);
|
|
88
|
+
}
|
|
89
|
+
if (paramLines.length > 0) {
|
|
90
|
+
fields.push('- param:');
|
|
91
|
+
paramLines.forEach((line)=>{
|
|
92
|
+
fields.push(` - ${line}`);
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
} else {
|
|
96
|
+
const typeName = getZodTypeName(schema);
|
|
97
|
+
const description = getZodDescription(schema);
|
|
98
|
+
let paramDescription = `- param: ${typeName}`;
|
|
99
|
+
if (description) paramDescription += ` // ${description}`;
|
|
100
|
+
paramDescription += ' (pass the value directly, not as an object)';
|
|
101
|
+
fields.push(paramDescription);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (action.sample && 'object' == typeof action.sample) {
|
|
105
|
+
const locateFields = findAllMidsceneLocatorField(action.paramSchema);
|
|
106
|
+
const sampleWithBbox = injectBboxIntoSample(action.sample, locateFields, includeBbox);
|
|
107
|
+
const sampleStr = `- sample:\n${tab}${tab}<action-type>${action.name}</action-type>\n${tab}${tab}<action-param-json>\n${tab}${tab}${JSON.stringify(sampleWithBbox, null, 2).replace(/\n/g, `\n${tab}${tab}`)}\n${tab}${tab}</action-param-json>`;
|
|
108
|
+
fields.push(sampleStr);
|
|
109
|
+
}
|
|
110
|
+
return `- ${action.name}, ${action.description || "No description provided"}
|
|
111
|
+
${tab}${fields.join(`\n${tab}`)}
|
|
112
|
+
`.trim();
|
|
113
|
+
};
|
|
114
|
+
const EXPLICIT_INSTRUCTION_RULES = `
|
|
115
|
+
**Explicit instructions vs. High-level goals:**
|
|
116
|
+
- **Explicit steps** (e.g., "click X", "type Y", "fill form"): Execute EXACTLY those steps - nothing more
|
|
117
|
+
- **High-level goals** (e.g., "log in", "complete purchase"): Determine necessary steps
|
|
118
|
+
|
|
119
|
+
**Examples:**
|
|
120
|
+
- "fill out the form" → Fill fields only, do NOT submit
|
|
121
|
+
- "click the button" → Click only, do NOT wait for page load
|
|
122
|
+
- "type 'hello'" → Type only, do NOT press Enter
|
|
123
|
+
- "select the first item" → Select only, do NOT proceed to checkout`;
|
|
124
|
+
const ASSERTION_RULES = `
|
|
125
|
+
**Assertion instructions:**
|
|
126
|
+
- If assertion condition is NOT satisfied and cannot be satisfied, mark as failed (success="false")
|
|
127
|
+
- If page is loading (spinner, skeleton, progress bar), wait before asserting`;
|
|
128
|
+
const PAGE_NAVIGATION_RULES = `
|
|
129
|
+
**Page navigation restriction:**
|
|
130
|
+
- Complete task on current page unless explicitly instructed to navigate
|
|
131
|
+
- Do NOT click links, use browser navigation, or open new URLs
|
|
132
|
+
- If task cannot be accomplished, report failure instead of navigating`;
|
|
133
|
+
const generateSubGoalSection = ()=>`
|
|
134
|
+
* <update-plan-content> tag
|
|
135
|
+
|
|
136
|
+
<update-plan-content>
|
|
137
|
+
<sub-goal index="1" status="finished|pending">description</sub-goal>
|
|
138
|
+
<sub-goal index="2" status="finished|pending">description</sub-goal>
|
|
139
|
+
</update-plan-content>
|
|
140
|
+
|
|
141
|
+
* <mark-sub-goal-done> tag
|
|
142
|
+
|
|
143
|
+
<mark-sub-goal-done>
|
|
144
|
+
<sub-goal index="1" status="finished" />
|
|
145
|
+
</mark-sub-goal-done>
|
|
146
|
+
|
|
147
|
+
IMPORTANT: Mark sub-goal as "finished" ONLY AFTER confirming completion in screenshot.`;
|
|
148
|
+
const generateSimplifiedExample = (includeSubGoals, locateNameField, locateEmailField)=>{
|
|
149
|
+
if (includeSubGoals) return `
|
|
150
|
+
## Example: Fill registration form
|
|
151
|
+
|
|
152
|
+
**User:** "fill out the form with name 'John' and email 'xxxx(真实的email地址)', then return the email"
|
|
153
|
+
|
|
154
|
+
**Response:**
|
|
155
|
+
<thought>Form has Name and Email fields. Both empty. Break into sub-goals.</thought>
|
|
156
|
+
<update-plan-content>
|
|
157
|
+
<sub-goal index="1" status="pending">Fill Name with 'John'</sub-goal>
|
|
158
|
+
<sub-goal index="2" status="pending">Fill Email with 'xxxx(真实的email地址)'</sub-goal>
|
|
159
|
+
<sub-goal index="3" status="pending">Return email address</sub-goal>
|
|
160
|
+
</update-plan-content>
|
|
161
|
+
<log>Click Name field</log>
|
|
162
|
+
<action-type>Tap</action-type>
|
|
163
|
+
<action-param-json>{"locate": ${locateNameField}}</action-param-json>
|
|
164
|
+
|
|
165
|
+
(After typing name)
|
|
166
|
+
<thought>Name='John'. Sub-goal 1 complete. Fill Email next.</thought>
|
|
167
|
+
<mark-sub-goal-done><sub-goal index="1" status="finished" /></mark-sub-goal-done>
|
|
168
|
+
<log>Click Email field</log>
|
|
169
|
+
<action-type>Tap</action-type>
|
|
170
|
+
<action-param-json>{"locate": ${locateEmailField}}</action-param-json>
|
|
171
|
+
|
|
172
|
+
(After typing email)
|
|
173
|
+
<thought>Both fields filled. All sub-goals complete.</thought>
|
|
174
|
+
<mark-sub-goal-done>
|
|
175
|
+
<sub-goal index="2" status="finished" />
|
|
176
|
+
<sub-goal index="3" status="finished" />
|
|
177
|
+
</mark-sub-goal-done>
|
|
178
|
+
<complete success="true">xxxx(真实的email地址)</complete>`;
|
|
179
|
+
return `
|
|
180
|
+
## Example: Fill registration form
|
|
181
|
+
|
|
182
|
+
**User:** "fill out the form with name 'John' and email 'xxxx(真实的email地址)', then return the email"
|
|
183
|
+
|
|
184
|
+
**Response:**
|
|
185
|
+
<thought>Form has Name and Email fields. Both empty. Start with Name.</thought>
|
|
186
|
+
<log>Click Name field</log>
|
|
187
|
+
<action-type>Tap</action-type>
|
|
188
|
+
<action-param-json>{"locate": ${locateNameField}}</action-param-json>
|
|
189
|
+
|
|
190
|
+
(After typing name)
|
|
191
|
+
<thought>Name='John'. Fill Email next.</thought>
|
|
192
|
+
<log>Click Email field</log>
|
|
193
|
+
<action-type>Tap</action-type>
|
|
194
|
+
<action-param-json>{"locate": ${locateEmailField}}</action-param-json>
|
|
195
|
+
|
|
196
|
+
(After typing email)
|
|
197
|
+
<thought>Both fields filled. Task complete.</thought>
|
|
198
|
+
<complete success="true">xxxx(真实的email地址)</complete>`;
|
|
199
|
+
};
|
|
200
|
+
async function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbox, includeThought, includeSubGoals }) {
|
|
201
|
+
const preferredLanguage = getPreferredLanguage();
|
|
202
|
+
if (includeBbox && !modelFamily) throw new Error('modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.');
|
|
203
|
+
const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, vlLocateParam(includeBbox ? modelFamily : void 0), includeBbox));
|
|
204
|
+
const actionList = actionDescriptionList.join('\n');
|
|
205
|
+
const shouldIncludeSubGoals = includeSubGoals ?? false;
|
|
206
|
+
const locateExample1 = includeBbox ? `{
|
|
207
|
+
"prompt": "Add to cart button for Sauce Labs Backpack",
|
|
208
|
+
"bbox": [345, 442, 458, 483]
|
|
209
|
+
}` : `{
|
|
210
|
+
"prompt": "Add to cart button for Sauce Labs Backpack"
|
|
211
|
+
}`;
|
|
212
|
+
const locateNameField = includeBbox ? `{
|
|
213
|
+
"prompt": "Name input field in the registration form",
|
|
214
|
+
"bbox": [120, 180, 380, 210]
|
|
215
|
+
}` : `{
|
|
216
|
+
"prompt": "Name input field in the registration form"
|
|
217
|
+
}`;
|
|
218
|
+
const locateEmailField = includeBbox ? `{
|
|
219
|
+
"prompt": "Email input field in the registration form",
|
|
220
|
+
"bbox": [120, 240, 380, 270]
|
|
221
|
+
}` : `{
|
|
222
|
+
"prompt": "Email input field in the registration form"
|
|
223
|
+
}`;
|
|
224
|
+
const step1Title = shouldIncludeSubGoals ? '## Step 1: Observe and Plan' : '## Step 1: Observe';
|
|
225
|
+
const step1Description = shouldIncludeSubGoals ? "Observe screenshot and logs, then break down instruction into sub-goals." : 'Observe screenshot and logs to understand current state.';
|
|
226
|
+
const thoughtTagDescription = `REQUIRED: Always output the <thought> tag.
|
|
227
|
+
|
|
228
|
+
Answer these questions:
|
|
229
|
+
- What is the user's requirement?
|
|
230
|
+
- What is the current state?
|
|
231
|
+
${shouldIncludeSubGoals ? '- Are all sub-goals completed?' : ''}
|
|
232
|
+
- What should be the next action?
|
|
233
|
+
|
|
234
|
+
${EXPLICIT_INSTRUCTION_RULES}`;
|
|
235
|
+
const subGoalSection = shouldIncludeSubGoals ? generateSubGoalSection() : '';
|
|
236
|
+
const memoryStepNumber = 2;
|
|
237
|
+
const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 2;
|
|
238
|
+
const actionStepNumber = shouldIncludeSubGoals ? 4 : 3;
|
|
239
|
+
const goalAccomplished = shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled';
|
|
240
|
+
return `
|
|
241
|
+
# Role
|
|
242
|
+
You are an expert UI automation agent. Accomplish user instructions by observing screenshots and executing actions.
|
|
243
|
+
|
|
244
|
+
# Steps
|
|
245
|
+
|
|
246
|
+
${step1Title}
|
|
247
|
+
|
|
248
|
+
${step1Description}
|
|
249
|
+
|
|
250
|
+
* <thought> tag (REQUIRED)
|
|
251
|
+
|
|
252
|
+
${thoughtTagDescription}
|
|
253
|
+
${subGoalSection}
|
|
254
|
+
${shouldIncludeSubGoals ? `
|
|
255
|
+
## Step ${memoryStepNumber}: Memory Data
|
|
256
|
+
|
|
257
|
+
Record information needed in follow-up actions. Current screenshot will NOT be available later.
|
|
258
|
+
|
|
259
|
+
Examples: extracted data, element states, content to reference.
|
|
260
|
+
` : ''}
|
|
261
|
+
## Step ${checkGoalStepNumber}: Check if ${shouldIncludeSubGoals ? 'Goal is Accomplished' : 'Instruction is Fulfilled'}
|
|
262
|
+
|
|
263
|
+
${shouldIncludeSubGoals ? 'Based on screenshot and sub-goals status, determine' : 'Determine'} if task is completed.
|
|
264
|
+
|
|
265
|
+
### CRITICAL: User's Instruction is Supreme
|
|
266
|
+
|
|
267
|
+
The user's instruction defines EXACT scope. Follow it precisely - nothing more, nothing less.
|
|
268
|
+
|
|
269
|
+
${EXPLICIT_INSTRUCTION_RULES}
|
|
270
|
+
|
|
271
|
+
**What "${goalAccomplished}" means:**
|
|
272
|
+
- Done EXACTLY what user asked - no extra steps, no assumptions
|
|
273
|
+
- Do NOT perform actions beyond explicit instruction
|
|
274
|
+
|
|
275
|
+
${ASSERTION_RULES}
|
|
276
|
+
${!shouldIncludeSubGoals ? `
|
|
277
|
+
${PAGE_NAVIGATION_RULES}
|
|
278
|
+
` : ''}
|
|
279
|
+
### Output Rules
|
|
280
|
+
|
|
281
|
+
- If NOT complete, continue to Step ${actionStepNumber}
|
|
282
|
+
- Use <complete success="true|false">message</complete> to output result
|
|
283
|
+
- success="true" if ${goalAccomplished}
|
|
284
|
+
- success="false" if ${goalAccomplished.replace('is', 'is not').replace('fulfilled', 'fulfilled and cannot be fulfilled')}
|
|
285
|
+
- message: information for user (follow requested format)
|
|
286
|
+
- If outputting <complete>, do NOT output <action-type> or <action-param-json>
|
|
287
|
+
|
|
288
|
+
## Step ${actionStepNumber}: Determine Next Action
|
|
289
|
+
|
|
290
|
+
ONLY if task is not complete: Determine next action based on current screenshot${shouldIncludeSubGoals ? ' and plan' : ''}.
|
|
291
|
+
|
|
292
|
+
- Don't give extra actions beyond instruction${shouldIncludeSubGoals ? ' or plan' : ''}
|
|
293
|
+
- Consider current screenshot - if element not visible, find it first
|
|
294
|
+
- Ensure previous actions completed successfully
|
|
295
|
+
- Give just the next ONE action
|
|
296
|
+
- If errors persist >3 times, use <error> tag
|
|
297
|
+
|
|
298
|
+
### Supporting actions list
|
|
299
|
+
|
|
300
|
+
${actionList}
|
|
301
|
+
|
|
302
|
+
### Log tag (preamble message)
|
|
303
|
+
|
|
304
|
+
Brief message to user explaining next action:
|
|
305
|
+
- **Use ${preferredLanguage}**
|
|
306
|
+
- **Keep concise**: 1-2 sentences, 8-12 words
|
|
307
|
+
- **Build on context**: connect with previous actions
|
|
308
|
+
- **Friendly tone**: collaborative and engaging
|
|
309
|
+
|
|
310
|
+
**Examples:**
|
|
311
|
+
- <log>Click the login button</log>
|
|
312
|
+
- <log>Scroll to find the 'Yes' button</log>
|
|
313
|
+
|
|
314
|
+
### Action output
|
|
315
|
+
|
|
316
|
+
Use <action-type> and <action-param-json> tags:
|
|
317
|
+
- <action-type> MUST be from supporting actions list
|
|
318
|
+
- Use EXACT field names from action description
|
|
319
|
+
- 'complete' is NOT a valid action-type
|
|
320
|
+
|
|
321
|
+
Example:
|
|
322
|
+
<action-type>Tap</action-type>
|
|
323
|
+
<action-param-json>
|
|
324
|
+
{
|
|
325
|
+
"locate": ${locateExample1}
|
|
326
|
+
}
|
|
327
|
+
</action-param-json>
|
|
328
|
+
|
|
329
|
+
### Error output
|
|
330
|
+
|
|
331
|
+
Use <error> tag for error messages:
|
|
332
|
+
<error>Unable to find the required element on the page</error>
|
|
333
|
+
|
|
334
|
+
# Return Format
|
|
335
|
+
|
|
336
|
+
**Always include (REQUIRED):**
|
|
337
|
+
<thought>Your thought process here. NEVER skip this tag.</thought>
|
|
338
|
+
${shouldIncludeSubGoals ? `
|
|
339
|
+
<update-plan-content>...</update-plan-content>
|
|
340
|
+
<mark-sub-goal-done>
|
|
341
|
+
<sub-goal index="1" status="finished" />
|
|
342
|
+
</mark-sub-goal-done>
|
|
343
|
+
` : ''}${shouldIncludeSubGoals ? `
|
|
344
|
+
<memory>...</memory>
|
|
345
|
+
` : ''}
|
|
346
|
+
**Then choose ONE path:**
|
|
347
|
+
|
|
348
|
+
**Path A: If ${goalAccomplished} or failed**
|
|
349
|
+
<complete success="true|false">...</complete>
|
|
350
|
+
|
|
351
|
+
**Path B: If ${goalAccomplished.replace('is', 'is not yet')}**
|
|
352
|
+
<log>...</log>
|
|
353
|
+
<action-type>...</action-type>
|
|
354
|
+
<action-param-json>...</action-param-json>
|
|
355
|
+
|
|
356
|
+
<!-- OR if there's an error -->
|
|
357
|
+
<error>...</error>
|
|
358
|
+
|
|
359
|
+
${generateSimplifiedExample(shouldIncludeSubGoals, locateNameField, locateEmailField)}
|
|
360
|
+
`;
|
|
361
|
+
}
|
|
362
|
+
export { descriptionForAction, systemPromptToTaskPlanning };
|
|
363
|
+
|
|
364
|
+
//# sourceMappingURL=llm-planning.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import { findAllMidsceneLocatorField } from '@/common';\nimport type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nconst SAMPLE_BBOXES: [number, number, number, number][] = [\n [50, 100, 200, 200],\n [300, 400, 500, 500],\n [600, 100, 800, 250],\n [50, 600, 250, 750],\n];\n\nconst injectBboxIntoSample = (\n sample: Record<string, any>,\n locateFields: string[],\n includeBbox: boolean,\n): Record<string, any> => {\n if (!includeBbox) return sample;\n const result = { ...sample };\n let bboxIndex = 0;\n for (const field of locateFields) {\n if (\n result[field] &&\n typeof result[field] === 'object' &&\n result[field].prompt\n ) {\n result[field] = {\n ...result[field],\n bbox: SAMPLE_BBOXES[bboxIndex % SAMPLE_BBOXES.length],\n };\n bboxIndex++;\n }\n }\n return result;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n includeBbox = false,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n fields.push(`- type: \"${action.name}\"`);\n\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n const description = getZodDescription(field as z.ZodTypeAny);\n\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n if (action.sample && typeof action.sample === 'object') {\n const locateFields = findAllMidsceneLocatorField(action.paramSchema);\n const sampleWithBbox = injectBboxIntoSample(\n action.sample,\n locateFields,\n includeBbox,\n );\n const sampleStr = `- sample:\\n${tab}${tab}<action-type>${action.name}</action-type>\\n${tab}${tab}<action-param-json>\\n${tab}${tab}${JSON.stringify(sampleWithBbox, null, 2).replace(/\\n/g, `\\n${tab}${tab}`)}\\n${tab}${tab}</action-param-json>`;\n fields.push(sampleStr);\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nconst EXPLICIT_INSTRUCTION_RULES = `\n**Explicit instructions vs. High-level goals:**\n- **Explicit steps** (e.g., \"click X\", \"type Y\", \"fill form\"): Execute EXACTLY those steps - nothing more\n- **High-level goals** (e.g., \"log in\", \"complete purchase\"): Determine necessary steps\n\n**Examples:**\n- \"fill out the form\" → Fill fields only, do NOT submit\n- \"click the button\" → Click only, do NOT wait for page load\n- \"type 'hello'\" → Type only, do NOT press Enter\n- \"select the first item\" → Select only, do NOT proceed to checkout`;\n\nconst ASSERTION_RULES = `\n**Assertion instructions:**\n- If assertion condition is NOT satisfied and cannot be satisfied, mark as failed (success=\"false\")\n- If page is loading (spinner, skeleton, progress bar), wait before asserting`;\n\nconst PAGE_NAVIGATION_RULES = `\n**Page navigation restriction:**\n- Complete task on current page unless explicitly instructed to navigate\n- Do NOT click links, use browser navigation, or open new URLs\n- If task cannot be accomplished, report failure instead of navigating`;\n\nconst generateSubGoalSection = () => `\n* <update-plan-content> tag\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished|pending\">description</sub-goal>\n <sub-goal index=\"2\" status=\"finished|pending\">description</sub-goal>\n</update-plan-content>\n\n* <mark-sub-goal-done> tag\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nIMPORTANT: Mark sub-goal as \"finished\" ONLY AFTER confirming completion in screenshot.`;\n\nconst generateSimplifiedExample = (\n includeSubGoals: boolean,\n locateNameField: string,\n locateEmailField: string,\n) => {\n if (includeSubGoals) {\n return `\n## Example: Fill registration form\n\n**User:** \"fill out the form with name 'John' and email 'xxxx(真实的email地址)', then return the email\"\n\n**Response:**\n<thought>Form has Name and Email fields. Both empty. Break into sub-goals.</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Fill Name with 'John'</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Fill Email with 'xxxx(真实的email地址)'</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Return email address</sub-goal>\n</update-plan-content>\n<log>Click Name field</log>\n<action-type>Tap</action-type>\n<action-param-json>{\"locate\": ${locateNameField}}</action-param-json>\n\n(After typing name)\n<thought>Name='John'. Sub-goal 1 complete. Fill Email next.</thought>\n<mark-sub-goal-done><sub-goal index=\"1\" status=\"finished\" /></mark-sub-goal-done>\n<log>Click Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>{\"locate\": ${locateEmailField}}</action-param-json>\n\n(After typing email)\n<thought>Both fields filled. All sub-goals complete.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"2\" status=\"finished\" />\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>\n<complete success=\"true\">xxxx(真实的email地址)</complete>`;\n }\n\n return `\n## Example: Fill registration form\n\n**User:** \"fill out the form with name 'John' and email 'xxxx(真实的email地址)', then return the email\"\n\n**Response:**\n<thought>Form has Name and Email fields. Both empty. Start with Name.</thought>\n<log>Click Name field</log>\n<action-type>Tap</action-type>\n<action-param-json>{\"locate\": ${locateNameField}}</action-param-json>\n\n(After typing name)\n<thought>Name='John'. Fill Email next.</thought>\n<log>Click Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>{\"locate\": ${locateEmailField}}</action-param-json>\n\n(After typing email)\n<thought>Both fields filled. Task complete.</thought>\n<complete success=\"true\">xxxx(真实的email地址)</complete>`;\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n includeSubGoals,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n includeSubGoals?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n includeBbox,\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const shouldIncludeThought = includeThought ?? true;\n const shouldIncludeSubGoals = includeSubGoals ?? false;\n\n const locateExample1 = includeBbox\n ? `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\",\n \"bbox\": [345, 442, 458, 483]\n }`\n : `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\"\n }`;\n\n const locateNameField = includeBbox\n ? `{\n \"prompt\": \"Name input field in the registration form\",\n \"bbox\": [120, 180, 380, 210]\n }`\n : `{\n \"prompt\": \"Name input field in the registration form\"\n }`;\n\n const locateEmailField = includeBbox\n ? `{\n \"prompt\": \"Email input field in the registration form\",\n \"bbox\": [120, 240, 380, 270]\n }`\n : `{\n \"prompt\": \"Email input field in the registration form\"\n }`;\n\n const step1Title = shouldIncludeSubGoals\n ? '## Step 1: Observe and Plan'\n : '## Step 1: Observe';\n\n const step1Description = shouldIncludeSubGoals\n ? \"Observe screenshot and logs, then break down instruction into sub-goals.\"\n : 'Observe screenshot and logs to understand current state.';\n\n const thoughtTagDescription = `REQUIRED: Always output the <thought> tag.\n\nAnswer these questions:\n- What is the user's requirement?\n- What is the current state?\n${shouldIncludeSubGoals ? '- Are all sub-goals completed?' : ''}\n- What should be the next action?\n\n${EXPLICIT_INSTRUCTION_RULES}`;\n\n const subGoalSection = shouldIncludeSubGoals ? generateSubGoalSection() : '';\n\n const memoryStepNumber = 2;\n const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 2;\n const actionStepNumber = shouldIncludeSubGoals ? 4 : 3;\n\n const goalTerm = shouldIncludeSubGoals ? 'goal' : 'instruction';\n const goalAccomplished = shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled';\n\n return `\n# Role\nYou are an expert UI automation agent. Accomplish user instructions by observing screenshots and executing actions.\n\n# Steps\n\n${step1Title}\n\n${step1Description}\n\n* <thought> tag (REQUIRED)\n\n${thoughtTagDescription}\n${subGoalSection}\n${\n shouldIncludeSubGoals\n ? `\n## Step ${memoryStepNumber}: Memory Data\n\nRecord information needed in follow-up actions. Current screenshot will NOT be available later.\n\nExamples: extracted data, element states, content to reference.\n`\n : ''\n}\n## Step ${checkGoalStepNumber}: Check if ${shouldIncludeSubGoals ? 'Goal is Accomplished' : 'Instruction is Fulfilled'}\n\n${shouldIncludeSubGoals ? 'Based on screenshot and sub-goals status, determine' : 'Determine'} if task is completed.\n\n### CRITICAL: User's Instruction is Supreme\n\nThe user's instruction defines EXACT scope. Follow it precisely - nothing more, nothing less.\n\n${EXPLICIT_INSTRUCTION_RULES}\n\n**What \"${goalAccomplished}\" means:**\n- Done EXACTLY what user asked - no extra steps, no assumptions\n- Do NOT perform actions beyond explicit instruction\n\n${ASSERTION_RULES}\n${\n !shouldIncludeSubGoals\n ? `\n${PAGE_NAVIGATION_RULES}\n`\n : ''\n}\n### Output Rules\n\n- If NOT complete, continue to Step ${actionStepNumber}\n- Use <complete success=\"true|false\">message</complete> to output result\n - success=\"true\" if ${goalAccomplished}\n - success=\"false\" if ${goalAccomplished.replace('is', 'is not').replace('fulfilled', 'fulfilled and cannot be fulfilled')}\n - message: information for user (follow requested format)\n- If outputting <complete>, do NOT output <action-type> or <action-param-json>\n\n## Step ${actionStepNumber}: Determine Next Action\n\nONLY if task is not complete: Determine next action based on current screenshot${shouldIncludeSubGoals ? ' and plan' : ''}.\n\n- Don't give extra actions beyond instruction${shouldIncludeSubGoals ? ' or plan' : ''}\n- Consider current screenshot - if element not visible, find it first\n- Ensure previous actions completed successfully\n- Give just the next ONE action\n- If errors persist >3 times, use <error> tag\n\n### Supporting actions list\n\n${actionList}\n\n### Log tag (preamble message)\n\nBrief message to user explaining next action:\n- **Use ${preferredLanguage}**\n- **Keep concise**: 1-2 sentences, 8-12 words\n- **Build on context**: connect with previous actions\n- **Friendly tone**: collaborative and engaging\n\n**Examples:**\n- <log>Click the login button</log>\n- <log>Scroll to find the 'Yes' button</log>\n\n### Action output\n\nUse <action-type> and <action-param-json> tags:\n- <action-type> MUST be from supporting actions list\n- Use EXACT field names from action description\n- 'complete' is NOT a valid action-type\n\nExample:\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateExample1}\n}\n</action-param-json>\n\n### Error output\n\nUse <error> tag for error messages:\n<error>Unable to find the required element on the page</error>\n\n# Return Format\n\n**Always include (REQUIRED):**\n<thought>Your thought process here. NEVER skip this tag.</thought>\n${\n shouldIncludeSubGoals\n ? `\n<update-plan-content>...</update-plan-content>\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n`\n : ''\n}${\n shouldIncludeSubGoals\n ? `\n<memory>...</memory>\n`\n : ''\n}\n**Then choose ONE path:**\n\n**Path A: If ${goalAccomplished} or failed**\n<complete success=\"true|false\">...</complete>\n\n**Path B: If ${goalAccomplished.replace('is', 'is not yet')}**\n<log>...</log>\n<action-type>...</action-type>\n<action-param-json>...</action-param-json>\n\n<!-- OR if there's an error -->\n<error>...</error>\n\n${generateSimplifiedExample(shouldIncludeSubGoals, locateNameField, locateEmailField)}\n`;\n}\n"],"names":["vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","SAMPLE_BBOXES","injectBboxIntoSample","sample","locateFields","includeBbox","result","bboxIndex","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","findAllMidsceneLocatorField","sampleWithBbox","sampleStr","EXPLICIT_INSTRUCTION_RULES","ASSERTION_RULES","PAGE_NAVIGATION_RULES","generateSubGoalSection","generateSimplifiedExample","includeSubGoals","locateNameField","locateEmailField","systemPromptToTaskPlanning","actionSpace","includeThought","preferredLanguage","getPreferredLanguage","Error","actionDescriptionList","actionList","shouldIncludeSubGoals","locateExample1","step1Title","step1Description","thoughtTagDescription","subGoalSection","memoryStepNumber","checkGoalStepNumber","actionStepNumber","goalAccomplished"],"mappings":";;;;AAWA,MAAMA,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,cAAc;IAEvG,OAAO;AACT;AAEA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAGzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEA,MAAMC,gBAAoD;IACxD;QAAC;QAAI;QAAK;QAAK;KAAI;IACnB;QAAC;QAAK;QAAK;QAAK;KAAI;IACpB;QAAC;QAAK;QAAK;QAAK;KAAI;IACpB;QAAC;QAAI;QAAK;QAAK;KAAI;CACpB;AAED,MAAMC,uBAAuB,CAC3BC,QACAC,cACAC;IAEA,IAAI,CAACA,aAAa,OAAOF;IACzB,MAAMG,SAAS;QAAE,GAAGH,MAAM;IAAC;IAC3B,IAAII,YAAY;IAChB,KAAK,MAAMX,SAASQ,aAClB,IACEE,MAAM,CAACV,MAAM,IACb,AAAyB,YAAzB,OAAOU,MAAM,CAACV,MAAM,IACpBU,MAAM,CAACV,MAAM,CAAC,MAAM,EACpB;QACAU,MAAM,CAACV,MAAM,GAAG;YACd,GAAGU,MAAM,CAACV,MAAM;YAChB,MAAMK,aAAa,CAACM,YAAYN,cAAc,MAAM,CAAC;QACvD;QACAM;IACF;IAEF,OAAOD;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC,8BACAL,cAAc,KAAK;IAEnB,MAAMM,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAE3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAEtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAE/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAC/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKrB,MAAM,IAAIsB,OAAO,OAAO,CAACF,OACxC,IAAIpB,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBACtC,MAAMuB,aACJ,AACE,cADF,OAAQvB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMwB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAEjD,MAAMI,WAAWC,eAAe1B,OAAOc;gBAEvC,MAAMa,cAAcC,kBAAkB5B;gBAEtC,MAAM6B,eAAe9B,iBAAiBC;gBACtC,MAAM8B,aAAaD,AAAiBE,WAAjBF;gBAEnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAGF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACmB;oBAClBpB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEoB,MAAM;gBAC3B;YACF;QACF,OAAO;YACL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAEtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBrB,OAAO,IAAI,CAACqB;QACd;IACF;IAEA,IAAIxB,OAAO,MAAM,IAAI,AAAyB,YAAzB,OAAOA,OAAO,MAAM,EAAe;QACtD,MAAML,eAAe8B,4BAA4BzB,OAAO,WAAW;QACnE,MAAM0B,iBAAiBjC,qBACrBO,OAAO,MAAM,EACbL,cACAC;QAEF,MAAM+B,YAAY,CAAC,WAAW,EAAEzB,MAAMA,IAAI,aAAa,EAAEF,OAAO,IAAI,CAAC,gBAAgB,EAAEE,MAAMA,IAAI,qBAAqB,EAAEA,MAAMA,MAAMoB,KAAK,SAAS,CAACI,gBAAgB,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,EAAExB,MAAMA,KAAK,EAAE,EAAE,EAAEA,MAAMA,IAAI,oBAAoB,CAAC;QAChPC,OAAO,IAAI,CAACwB;IACd;IAEA,OAAO,CAAC,EAAE,EAAE3B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEA,MAAM0B,6BAA6B,CAAC;;;;;;;;;mEAS+B,CAAC;AAEpE,MAAMC,kBAAkB,CAAC;;;6EAGoD,CAAC;AAE9E,MAAMC,wBAAwB,CAAC;;;;sEAIuC,CAAC;AAEvE,MAAMC,yBAAyB,IAAM,CAAC;;;;;;;;;;;;;;sFAcgD,CAAC;AAEvF,MAAMC,4BAA4B,CAChCC,iBACAC,iBACAC;IAEA,IAAIF,iBACF,OAAO,CAAC;;;;;;;;;;;;;;8BAckB,EAAEC,gBAAgB;;;;;;;8BAOlB,EAAEC,iBAAiB;;;;;;;;oDAQG,CAAC;IAGnD,OAAO,CAAC;;;;;;;;;8BASoB,EAAED,gBAAgB;;;;;;8BAMlB,EAAEC,iBAAiB;;;;oDAIG,CAAC;AACrD;AAEO,eAAeC,2BAA2B,EAC/CC,WAAW,EACXrD,WAAW,EACXY,WAAW,EACX0C,cAAc,EACdL,eAAe,EAOhB;IACC,MAAMM,oBAAoBC;IAE1B,IAAI5C,eAAe,CAACZ,aAClB,MAAM,IAAIyD,MACR;IAIJ,MAAMC,wBAAwBL,YAAY,GAAG,CAAC,CAACrC,SACtCD,qBACLC,QACAjB,cAAca,cAAcZ,cAAckC,SAC1CtB;IAGJ,MAAM+C,aAAaD,sBAAsB,IAAI,CAAC;IAG9C,MAAME,wBAAwBX,mBAAmB;IAEjD,MAAMY,iBAAiBjD,cACnB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAEF,MAAMsC,kBAAkBtC,cACpB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAEF,MAAMuC,mBAAmBvC,cACrB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAEF,MAAMkD,aAAaF,wBACf,gCACA;IAEJ,MAAMG,mBAAmBH,wBACrB,6EACA;IAEJ,MAAMI,wBAAwB,CAAC;;;;;AAKjC,EAAEJ,wBAAwB,mCAAmC,GAAG;;;AAGhE,EAAEhB,4BAA4B;IAE5B,MAAMqB,iBAAiBL,wBAAwBb,2BAA2B;IAE1E,MAAMmB,mBAAmB;IACzB,MAAMC,sBAAsBP,wBAAwB,IAAI;IACxD,MAAMQ,mBAAmBR,wBAAwB,IAAI;IAGrD,MAAMS,mBAAmBT,wBAAwB,yBAAyB;IAE1E,OAAO,CAAC;;;;;;AAMV,EAAEE,WAAW;;AAEb,EAAEC,iBAAiB;;;;AAInB,EAAEC,sBAAsB;AACxB,EAAEC,eAAe;AACjB,EACEL,wBACI,CAAC;QACC,EAAEM,iBAAiB;;;;;AAK3B,CAAC,GACK,GACL;QACO,EAAEC,oBAAoB,WAAW,EAAEP,wBAAwB,yBAAyB,2BAA2B;;AAEvH,EAAEA,wBAAwB,wDAAwD,YAAY;;;;;;AAM9F,EAAEhB,2BAA2B;;QAErB,EAAEyB,iBAAiB;;;;AAI3B,EAAExB,gBAAgB;AAClB,EACE,CAACe,wBACG,CAAC;AACP,EAAEd,sBAAsB;AACxB,CAAC,GACK,GACL;;;oCAGmC,EAAEsB,iBAAiB;;sBAEjC,EAAEC,iBAAiB;uBAClB,EAAEA,iBAAiB,OAAO,CAAC,MAAM,UAAU,OAAO,CAAC,aAAa,qCAAqC;;;;QAIpH,EAAED,iBAAiB;;+EAEoD,EAAER,wBAAwB,cAAc,GAAG;;6CAE7E,EAAEA,wBAAwB,aAAa,GAAG;;;;;;;;AAQvF,EAAED,WAAW;;;;;QAKL,EAAEJ,kBAAkB;;;;;;;;;;;;;;;;;;;;YAoBhB,EAAEM,eAAe;;;;;;;;;;;;;AAa7B,EACED,wBACI,CAAC;;;;;AAKP,CAAC,GACK,KAEJA,wBACI,CAAC;;AAEP,CAAC,GACK,GACL;;;aAGY,EAAES,iBAAiB;;;aAGnB,EAAEA,iBAAiB,OAAO,CAAC,MAAM,cAAc;;;;;;;;AAQ5D,EAAErB,0BAA0BY,uBAAuBV,iBAAiBC,kBAAkB;AACtF,CAAC;AACD"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { getPreferredLanguage } from "@midscene/shared/env";
|
|
2
|
+
import { bboxDescription } from "./common.mjs";
|
|
3
|
+
function systemPromptToLocateSection(modelFamily) {
|
|
4
|
+
const preferredLanguage = getPreferredLanguage();
|
|
5
|
+
const bboxFormat = bboxDescription(modelFamily);
|
|
6
|
+
return `
|
|
7
|
+
## Role:
|
|
8
|
+
You are an AI assistant that helps identify UI elements.
|
|
9
|
+
|
|
10
|
+
## Objective:
|
|
11
|
+
- Find a section containing the target element
|
|
12
|
+
- If the description mentions reference elements, also locate sections containing those references
|
|
13
|
+
|
|
14
|
+
## Output Format:
|
|
15
|
+
\`\`\`json
|
|
16
|
+
{
|
|
17
|
+
"bbox": [number, number, number, number], // ${bboxFormat}
|
|
18
|
+
"references_bbox"?: [
|
|
19
|
+
[number, number, number, number],
|
|
20
|
+
...
|
|
21
|
+
],
|
|
22
|
+
"error"?: string
|
|
23
|
+
}
|
|
24
|
+
\`\`\`
|
|
25
|
+
|
|
26
|
+
Fields:
|
|
27
|
+
* \`bbox\` - Bounding box of the section containing the target element
|
|
28
|
+
* \`references_bbox\` - Optional array of bounding boxes for reference elements
|
|
29
|
+
* \`error\` - Optional error message if the section cannot be found. Use ${preferredLanguage}.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
If the description is "delete button on the second row with title 'Peter'", return:
|
|
33
|
+
\`\`\`json
|
|
34
|
+
{
|
|
35
|
+
"bbox": [100, 100, 200, 200],
|
|
36
|
+
"references_bbox": [[100, 100, 200, 200]]
|
|
37
|
+
}
|
|
38
|
+
\`\`\`
|
|
39
|
+
`;
|
|
40
|
+
}
|
|
41
|
+
const sectionLocatorInstruction = (sectionDescription)=>`Find section containing: ${sectionDescription}`;
|
|
42
|
+
export { sectionLocatorInstruction, systemPromptToLocateSection };
|
|
43
|
+
|
|
44
|
+
//# sourceMappingURL=llm-section-locator.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-section-locator.mjs","sources":["../../../../src/ai-model/prompt/llm-section-locator.ts"],"sourcesContent":["import type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\n\nexport function systemPromptToLocateSection(\n modelFamily: TModelFamily | undefined,\n) {\n const preferredLanguage = getPreferredLanguage();\n const bboxFormat = bboxDescription(modelFamily);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Find a section containing the target element\n- If the description mentions reference elements, also locate sections containing those references\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxFormat}\n \"references_bbox\"?: [\n [number, number, number, number],\n ...\n ],\n \"error\"?: string\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` - Bounding box of the section containing the target element\n* \\`references_bbox\\` - Optional array of bounding boxes for reference elements\n* \\`error\\` - Optional error message if the section cannot be found. Use ${preferredLanguage}.\n\nExample:\nIf the description is \"delete button on the second row with title 'Peter'\", return:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"references_bbox\": [[100, 100, 200, 200]]\n}\n\\`\\`\\`\n`;\n}\n\nexport const sectionLocatorInstruction = (sectionDescription: string) =>\n `Find section containing: ${sectionDescription}`;\n"],"names":["systemPromptToLocateSection","modelFamily","preferredLanguage","getPreferredLanguage","bboxFormat","bboxDescription","sectionLocatorInstruction","sectionDescription"],"mappings":";;AAIO,SAASA,4BACdC,WAAqC;IAErC,MAAMC,oBAAoBC;IAC1B,MAAMC,aAAaC,gBAAgBJ;IACnC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEG,WAAW;;;;;;;;;;;;yEAYY,EAAEF,kBAAkB;;;;;;;;;;AAU7F,CAAC;AACD;AAEO,MAAMI,4BAA4B,CAACC,qBACxC,CAAC,yBAAyB,EAAEA,oBAAoB"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
function systemPromptToJudgeOrderSensitive() {
|
|
2
|
+
return `
|
|
3
|
+
## Role:
|
|
4
|
+
You are an AI assistant that analyzes UI element descriptions.
|
|
5
|
+
|
|
6
|
+
## Objective:
|
|
7
|
+
Determine whether a given element description is order-sensitive.
|
|
8
|
+
|
|
9
|
+
Order-sensitive descriptions contain phrases that specify position or sequence, such as:
|
|
10
|
+
- "the first button"
|
|
11
|
+
- "the second item"
|
|
12
|
+
- "the third row"
|
|
13
|
+
- "the last input"
|
|
14
|
+
- "the 5th element"
|
|
15
|
+
|
|
16
|
+
Order-insensitive descriptions do not specify position:
|
|
17
|
+
- "login button"
|
|
18
|
+
- "search input"
|
|
19
|
+
- "submit button"
|
|
20
|
+
- "user avatar"
|
|
21
|
+
|
|
22
|
+
## Output Format:
|
|
23
|
+
\`\`\`json
|
|
24
|
+
{
|
|
25
|
+
"isOrderSensitive": boolean
|
|
26
|
+
}
|
|
27
|
+
\`\`\`
|
|
28
|
+
|
|
29
|
+
Return true if the description is order-sensitive, false otherwise.
|
|
30
|
+
`;
|
|
31
|
+
}
|
|
32
|
+
const orderSensitiveJudgePrompt = (description)=>`Analyze this element description: "${description}"`;
|
|
33
|
+
export { orderSensitiveJudgePrompt, systemPromptToJudgeOrderSensitive };
|
|
34
|
+
|
|
35
|
+
//# sourceMappingURL=order-sensitive-judge.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/order-sensitive-judge.mjs","sources":["../../../../src/ai-model/prompt/order-sensitive-judge.ts"],"sourcesContent":["export function systemPromptToJudgeOrderSensitive() {\n return `\n## Role:\nYou are an AI assistant that analyzes UI element descriptions.\n\n## Objective:\nDetermine whether a given element description is order-sensitive.\n\nOrder-sensitive descriptions contain phrases that specify position or sequence, such as:\n- \"the first button\"\n- \"the second item\"\n- \"the third row\"\n- \"the last input\"\n- \"the 5th element\"\n\nOrder-insensitive descriptions do not specify position:\n- \"login button\"\n- \"search input\"\n- \"submit button\"\n- \"user avatar\"\n\n## Output Format:\n\\`\\`\\`json\n{\n \"isOrderSensitive\": boolean\n}\n\\`\\`\\`\n\nReturn true if the description is order-sensitive, false otherwise.\n`;\n}\n\nexport const orderSensitiveJudgePrompt = (description: string) => {\n return `Analyze this element description: \"${description}\"`;\n};\n"],"names":["systemPromptToJudgeOrderSensitive","orderSensitiveJudgePrompt","description"],"mappings":"AAAO,SAASA;IACd,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BV,CAAC;AACD;AAEO,MAAMC,4BAA4B,CAACC,cACjC,CAAC,mCAAmC,EAAEA,YAAY,CAAC,CAAC"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { PLAYWRIGHT_EXAMPLE_CODE } from "@midscene/shared/constants";
|
|
2
|
+
import { callAI, callAIWithStringResponse } from "../index.mjs";
|
|
3
|
+
import { createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents } from "./yaml-generator.mjs";
|
|
4
|
+
const generatePlaywrightTest = async (events, options, modelConfig)=>{
|
|
5
|
+
validateEvents(events);
|
|
6
|
+
const summary = prepareEventSummary(events, {
|
|
7
|
+
testName: options.testName,
|
|
8
|
+
maxScreenshots: options.maxScreenshots || 3
|
|
9
|
+
});
|
|
10
|
+
const playwrightSummary = {
|
|
11
|
+
...summary,
|
|
12
|
+
waitForNetworkIdle: false !== options.waitForNetworkIdle,
|
|
13
|
+
waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,
|
|
14
|
+
viewportSize: options.viewportSize || {
|
|
15
|
+
width: 1280,
|
|
16
|
+
height: 800
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);
|
|
20
|
+
const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.
|
|
21
|
+
|
|
22
|
+
Event Summary:
|
|
23
|
+
${JSON.stringify(playwrightSummary, null, 2)}
|
|
24
|
+
|
|
25
|
+
Generated code should:
|
|
26
|
+
1. Import required dependencies
|
|
27
|
+
2. Set up the test with proper configuration
|
|
28
|
+
3. Include a beforeEach hook to navigate to the starting URL
|
|
29
|
+
4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)
|
|
30
|
+
5. Include appropriate assertions and validations
|
|
31
|
+
6. Follow best practices for Playwright tests
|
|
32
|
+
7. Be ready to execute without further modification
|
|
33
|
+
|
|
34
|
+
Important: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \`\`\`typescript, \`\`\`javascript or \`\`\`). Start directly with the code content.`;
|
|
35
|
+
const messageContent = createMessageContent(promptText, screenshots, false !== options.includeScreenshots);
|
|
36
|
+
const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene.
|
|
37
|
+
Your task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.
|
|
38
|
+
|
|
39
|
+
${PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
40
|
+
const prompt = [
|
|
41
|
+
{
|
|
42
|
+
role: 'system',
|
|
43
|
+
content: systemPrompt
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
role: 'user',
|
|
47
|
+
content: messageContent
|
|
48
|
+
}
|
|
49
|
+
];
|
|
50
|
+
const response = await callAIWithStringResponse(prompt, modelConfig);
|
|
51
|
+
if (response?.content && 'string' == typeof response.content) return response.content;
|
|
52
|
+
throw new Error('Failed to generate Playwright test code');
|
|
53
|
+
};
|
|
54
|
+
const generatePlaywrightTestStream = async (events, options, modelConfig)=>{
|
|
55
|
+
validateEvents(events);
|
|
56
|
+
const summary = prepareEventSummary(events, {
|
|
57
|
+
testName: options.testName,
|
|
58
|
+
maxScreenshots: options.maxScreenshots || 3
|
|
59
|
+
});
|
|
60
|
+
const playwrightSummary = {
|
|
61
|
+
...summary,
|
|
62
|
+
waitForNetworkIdle: false !== options.waitForNetworkIdle,
|
|
63
|
+
waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,
|
|
64
|
+
viewportSize: options.viewportSize || {
|
|
65
|
+
width: 1280,
|
|
66
|
+
height: 800
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);
|
|
70
|
+
const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.
|
|
71
|
+
|
|
72
|
+
Event Summary:
|
|
73
|
+
${JSON.stringify(playwrightSummary, null, 2)}
|
|
74
|
+
|
|
75
|
+
Generated code should:
|
|
76
|
+
1. Import required dependencies
|
|
77
|
+
2. Set up the test with proper configuration
|
|
78
|
+
3. Include a beforeEach hook to navigate to the starting URL
|
|
79
|
+
4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)
|
|
80
|
+
5. Include appropriate assertions and validations
|
|
81
|
+
6. Follow best practices for Playwright tests
|
|
82
|
+
7. Be ready to execute without further modification
|
|
83
|
+
8. can't wrap this test code in markdown code block
|
|
84
|
+
|
|
85
|
+
Important: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \`\`\`typescript, \`\`\`javascript or \`\`\`). Start directly with the code content.`;
|
|
86
|
+
const messageContent = createMessageContent(promptText, screenshots, false !== options.includeScreenshots);
|
|
87
|
+
const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene.
|
|
88
|
+
Your task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.
|
|
89
|
+
|
|
90
|
+
${PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
91
|
+
const prompt = [
|
|
92
|
+
{
|
|
93
|
+
role: 'system',
|
|
94
|
+
content: systemPrompt
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
role: 'user',
|
|
98
|
+
content: messageContent
|
|
99
|
+
}
|
|
100
|
+
];
|
|
101
|
+
if (options.stream && options.onChunk) return await callAI(prompt, modelConfig, {
|
|
102
|
+
stream: true,
|
|
103
|
+
onChunk: options.onChunk
|
|
104
|
+
});
|
|
105
|
+
{
|
|
106
|
+
const response = await callAIWithStringResponse(prompt, modelConfig);
|
|
107
|
+
if (response?.content && 'string' == typeof response.content) return {
|
|
108
|
+
content: response.content,
|
|
109
|
+
usage: response.usage,
|
|
110
|
+
isStreamed: false
|
|
111
|
+
};
|
|
112
|
+
throw new Error('Failed to generate Playwright test code');
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
export { createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, generatePlaywrightTest, generatePlaywrightTestStream, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents };
|
|
116
|
+
|
|
117
|
+
//# sourceMappingURL=playwright-generator.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/playwright-generator.mjs","sources":["../../../../src/ai-model/prompt/playwright-generator.ts"],"sourcesContent":["import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { PLAYWRIGHT_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { callAI, callAIWithStringResponse } from '../index';\n// Import shared utilities and types from yaml-generator\nimport {\n type ChromeRecordedEvent,\n type EventCounts,\n type EventSummary,\n type InputDescription,\n type ProcessedEvent,\n createEventCounts,\n createMessageContent,\n extractInputDescriptions,\n filterEventsByType,\n getScreenshotsForLLM,\n prepareEventSummary,\n processEventsForLLM,\n validateEvents,\n} from './yaml-generator';\n\n// Playwright-specific interfaces\nexport interface PlaywrightGenerationOptions {\n testName?: string;\n includeScreenshots?: boolean;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n viewportSize?: { width: number; height: number };\n waitForNetworkIdle?: boolean;\n waitForNetworkIdleTimeout?: number;\n}\n\n// Re-export shared types for backward compatibility\nexport type {\n ChromeRecordedEvent,\n EventCounts,\n InputDescription,\n ProcessedEvent,\n EventSummary,\n};\n\n// Re-export shared utilities for backward compatibility\nexport {\n getScreenshotsForLLM,\n filterEventsByType,\n createEventCounts,\n extractInputDescriptions,\n processEventsForLLM,\n prepareEventSummary,\n createMessageContent,\n validateEvents,\n};\n\n/**\n * Generates Playwright test code from recorded events\n */\nexport const generatePlaywrightTest = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,\n viewportSize: options.viewportSize || { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n const response = await callAIWithStringResponse(prompt, modelConfig);\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate Playwright test code');\n};\n\n/**\n * Generates Playwright test code from recorded events with streaming support\n */\nexport const generatePlaywrightTestStream = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots || 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2000,\n viewportSize: options.viewportSize || { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n8. can't wrap this test code in markdown code block\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code with streaming\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(prompt, modelConfig);\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate Playwright test code');\n }\n};\n"],"names":["generatePlaywrightTest","events","options","modelConfig","validateEvents","summary","prepareEventSummary","playwrightSummary","screenshots","getScreenshotsForLLM","promptText","JSON","messageContent","createMessageContent","systemPrompt","PLAYWRIGHT_EXAMPLE_CODE","prompt","response","callAIWithStringResponse","Error","generatePlaywrightTestStream","callAI"],"mappings":";;;AA6DO,MAAMA,yBAAyB,OACpCC,QACAC,SACAC;IAGAC,eAAeH;IAGf,MAAMI,UAAUC,oBAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,qBAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;8LAWiJ,CAAC;IAG7L,MAAMK,iBAAiBC,qBACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,yBAAyB;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,MAAMK,WAAW,MAAMC,yBAAyBF,QAAQb;IAExD,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;IAGzB,MAAM,IAAIE,MAAM;AAClB;AAKO,MAAMC,+BAA+B,OAC1CnB,QACAC,SACAC;IAGAC,eAAeH;IAGf,MAAMI,UAAUC,oBAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,qBAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;;8LAYiJ,CAAC;IAG7L,MAAMK,iBAAiBC,qBACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,yBAAyB;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,IAAIV,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAMmB,OAAOL,QAAQb,aAAa;QACvC,QAAQ;QACR,SAASD,QAAQ,OAAO;IAC1B;IACK;QAEL,MAAMe,WAAW,MAAMC,yBAAyBF,QAAQb;QAExD,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;YACL,SAASA,SAAS,OAAO;YACzB,OAAOA,SAAS,KAAK;YACrB,YAAY;QACd;QAGF,MAAM,IAAIE,MAAM;IAClB;AACF"}
|