@midscene/core 0.9.1 → 0.9.2-beta-20250114083542.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/ai-model.js +5 -6741
- package/dist/lib/chunk-G7A32JAG.js +36 -0
- package/dist/lib/chunk-SCNIHQKF.js +115 -0
- package/dist/lib/chunk-Z6Q56DTU.js +2790 -0
- package/dist/lib/chunk-ZRCWDGK2.js +251 -0
- package/dist/lib/env.js +65 -164
- package/dist/lib/index.js +189 -7135
- package/dist/lib/types/ai-model.d.ts +60 -4
- package/dist/lib/types/{automation-81d96430.d.ts → automation-d7e10a4e.d.ts} +6 -2
- package/dist/lib/types/env.d.ts +4 -1
- package/dist/lib/types/index.d.ts +11 -9
- package/dist/lib/types/{types-05c1f241.d.ts → types-c4bec333.d.ts} +5 -10
- package/dist/lib/types/utils.d.ts +2 -1
- package/dist/lib/utils.js +40 -370
- package/dist/lib/wrappers-KKGZQXJL.js +4086 -0
- package/package.json +3 -3
- package/report/index.html +23 -7
|
@@ -0,0 +1,2790 @@
|
|
|
1
|
+
"use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
var _chunkSCNIHQKFjs = require('./chunk-SCNIHQKF.js');
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
var _chunkG7A32JAGjs = require('./chunk-G7A32JAG.js');
|
|
29
|
+
|
|
30
|
+
// ../../node_modules/.pnpm/lex@1.7.9/node_modules/lex/lexer.js
|
|
31
|
+
var require_lexer = _chunkG7A32JAGjs.__commonJS.call(void 0, {
|
|
32
|
+
"../../node_modules/.pnpm/lex@1.7.9/node_modules/lex/lexer.js"(exports, module) {
|
|
33
|
+
"use strict";
|
|
34
|
+
if (typeof module === "object" && typeof module.exports === "object")
|
|
35
|
+
module.exports = Lexer;
|
|
36
|
+
Lexer.defunct = function(chr) {
|
|
37
|
+
throw new Error("Unexpected character at index " + (this.index - 1) + ": " + chr);
|
|
38
|
+
};
|
|
39
|
+
function Lexer(defunct) {
|
|
40
|
+
if (typeof defunct !== "function")
|
|
41
|
+
defunct = Lexer.defunct;
|
|
42
|
+
var tokens = [];
|
|
43
|
+
var rules = [];
|
|
44
|
+
var remove = 0;
|
|
45
|
+
this.state = 0;
|
|
46
|
+
this.index = 0;
|
|
47
|
+
this.input = "";
|
|
48
|
+
this.addRule = function(pattern, action, start) {
|
|
49
|
+
var global = pattern.global;
|
|
50
|
+
if (!global) {
|
|
51
|
+
var flags = "g";
|
|
52
|
+
if (pattern.multiline)
|
|
53
|
+
flags += "m";
|
|
54
|
+
if (pattern.ignoreCase)
|
|
55
|
+
flags += "i";
|
|
56
|
+
pattern = new RegExp(pattern.source, flags);
|
|
57
|
+
}
|
|
58
|
+
if (Object.prototype.toString.call(start) !== "[object Array]")
|
|
59
|
+
start = [0];
|
|
60
|
+
rules.push({
|
|
61
|
+
pattern,
|
|
62
|
+
global,
|
|
63
|
+
action,
|
|
64
|
+
start
|
|
65
|
+
});
|
|
66
|
+
return this;
|
|
67
|
+
};
|
|
68
|
+
this.setInput = function(input) {
|
|
69
|
+
remove = 0;
|
|
70
|
+
this.state = 0;
|
|
71
|
+
this.index = 0;
|
|
72
|
+
tokens.length = 0;
|
|
73
|
+
this.input = input;
|
|
74
|
+
return this;
|
|
75
|
+
};
|
|
76
|
+
this.lex = function() {
|
|
77
|
+
if (tokens.length)
|
|
78
|
+
return tokens.shift();
|
|
79
|
+
this.reject = true;
|
|
80
|
+
while (this.index <= this.input.length) {
|
|
81
|
+
var matches = scan.call(this).splice(remove);
|
|
82
|
+
var index = this.index;
|
|
83
|
+
while (matches.length) {
|
|
84
|
+
if (this.reject) {
|
|
85
|
+
var match = matches.shift();
|
|
86
|
+
var result = match.result;
|
|
87
|
+
var length = match.length;
|
|
88
|
+
this.index += length;
|
|
89
|
+
this.reject = false;
|
|
90
|
+
remove++;
|
|
91
|
+
var token = match.action.apply(this, result);
|
|
92
|
+
if (this.reject)
|
|
93
|
+
this.index = result.index;
|
|
94
|
+
else if (typeof token !== "undefined") {
|
|
95
|
+
switch (Object.prototype.toString.call(token)) {
|
|
96
|
+
case "[object Array]":
|
|
97
|
+
tokens = token.slice(1);
|
|
98
|
+
token = token[0];
|
|
99
|
+
default:
|
|
100
|
+
if (length)
|
|
101
|
+
remove = 0;
|
|
102
|
+
return token;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
} else
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
var input = this.input;
|
|
109
|
+
if (index < input.length) {
|
|
110
|
+
if (this.reject) {
|
|
111
|
+
remove = 0;
|
|
112
|
+
var token = defunct.call(this, input.charAt(this.index++));
|
|
113
|
+
if (typeof token !== "undefined") {
|
|
114
|
+
if (Object.prototype.toString.call(token) === "[object Array]") {
|
|
115
|
+
tokens = token.slice(1);
|
|
116
|
+
return token[0];
|
|
117
|
+
} else
|
|
118
|
+
return token;
|
|
119
|
+
}
|
|
120
|
+
} else {
|
|
121
|
+
if (this.index !== index)
|
|
122
|
+
remove = 0;
|
|
123
|
+
this.reject = true;
|
|
124
|
+
}
|
|
125
|
+
} else if (matches.length)
|
|
126
|
+
this.reject = true;
|
|
127
|
+
else
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
function scan() {
|
|
132
|
+
var matches = [];
|
|
133
|
+
var index = 0;
|
|
134
|
+
var state = this.state;
|
|
135
|
+
var lastIndex = this.index;
|
|
136
|
+
var input = this.input;
|
|
137
|
+
for (var i = 0, length = rules.length; i < length; i++) {
|
|
138
|
+
var rule = rules[i];
|
|
139
|
+
var start = rule.start;
|
|
140
|
+
var states = start.length;
|
|
141
|
+
if (!states || start.indexOf(state) >= 0 || state % 2 && states === 1 && !start[0]) {
|
|
142
|
+
var pattern = rule.pattern;
|
|
143
|
+
pattern.lastIndex = lastIndex;
|
|
144
|
+
var result = pattern.exec(input);
|
|
145
|
+
if (result && result.index === lastIndex) {
|
|
146
|
+
var j = matches.push({
|
|
147
|
+
result,
|
|
148
|
+
action: rule.action,
|
|
149
|
+
length: result[0].length
|
|
150
|
+
});
|
|
151
|
+
if (rule.global)
|
|
152
|
+
index = j;
|
|
153
|
+
while (--j > index) {
|
|
154
|
+
var k = j - 1;
|
|
155
|
+
if (matches[j].length > matches[k].length) {
|
|
156
|
+
var temple = matches[j];
|
|
157
|
+
matches[j] = matches[k];
|
|
158
|
+
matches[k] = temple;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return matches;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// ../../node_modules/.pnpm/string.fromcodepoint@0.2.1/node_modules/string.fromcodepoint/fromcodepoint.js
|
|
171
|
+
var require_fromcodepoint = _chunkG7A32JAGjs.__commonJS.call(void 0, {
|
|
172
|
+
"../../node_modules/.pnpm/string.fromcodepoint@0.2.1/node_modules/string.fromcodepoint/fromcodepoint.js"() {
|
|
173
|
+
"use strict";
|
|
174
|
+
if (!String.fromCodePoint) {
|
|
175
|
+
(function() {
|
|
176
|
+
var defineProperty = function() {
|
|
177
|
+
try {
|
|
178
|
+
var object = {};
|
|
179
|
+
var $defineProperty = Object.defineProperty;
|
|
180
|
+
var result = $defineProperty(object, object, object) && $defineProperty;
|
|
181
|
+
} catch (error) {
|
|
182
|
+
}
|
|
183
|
+
return result;
|
|
184
|
+
}();
|
|
185
|
+
var stringFromCharCode = String.fromCharCode;
|
|
186
|
+
var floor = Math.floor;
|
|
187
|
+
var fromCodePoint = function(_) {
|
|
188
|
+
var MAX_SIZE = 16384;
|
|
189
|
+
var codeUnits = [];
|
|
190
|
+
var highSurrogate;
|
|
191
|
+
var lowSurrogate;
|
|
192
|
+
var index = -1;
|
|
193
|
+
var length = arguments.length;
|
|
194
|
+
if (!length) {
|
|
195
|
+
return "";
|
|
196
|
+
}
|
|
197
|
+
var result = "";
|
|
198
|
+
while (++index < length) {
|
|
199
|
+
var codePoint = Number(arguments[index]);
|
|
200
|
+
if (!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
|
201
|
+
codePoint < 0 || // not a valid Unicode code point
|
|
202
|
+
codePoint > 1114111 || // not a valid Unicode code point
|
|
203
|
+
floor(codePoint) != codePoint) {
|
|
204
|
+
throw RangeError("Invalid code point: " + codePoint);
|
|
205
|
+
}
|
|
206
|
+
if (codePoint <= 65535) {
|
|
207
|
+
codeUnits.push(codePoint);
|
|
208
|
+
} else {
|
|
209
|
+
codePoint -= 65536;
|
|
210
|
+
highSurrogate = (codePoint >> 10) + 55296;
|
|
211
|
+
lowSurrogate = codePoint % 1024 + 56320;
|
|
212
|
+
codeUnits.push(highSurrogate, lowSurrogate);
|
|
213
|
+
}
|
|
214
|
+
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
|
|
215
|
+
result += stringFromCharCode.apply(null, codeUnits);
|
|
216
|
+
codeUnits.length = 0;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
return result;
|
|
220
|
+
};
|
|
221
|
+
if (defineProperty) {
|
|
222
|
+
defineProperty(String, "fromCodePoint", {
|
|
223
|
+
"value": fromCodePoint,
|
|
224
|
+
"configurable": true,
|
|
225
|
+
"writable": true
|
|
226
|
+
});
|
|
227
|
+
} else {
|
|
228
|
+
String.fromCodePoint = fromCodePoint;
|
|
229
|
+
}
|
|
230
|
+
})();
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
// ../../node_modules/.pnpm/unescape-js@1.1.4/node_modules/unescape-js/dist/index.js
|
|
236
|
+
var require_dist = _chunkG7A32JAGjs.__commonJS.call(void 0, {
|
|
237
|
+
"../../node_modules/.pnpm/unescape-js@1.1.4/node_modules/unescape-js/dist/index.js"(exports, module) {
|
|
238
|
+
"use strict";
|
|
239
|
+
Object.defineProperty(exports, "__esModule", {
|
|
240
|
+
value: true
|
|
241
|
+
});
|
|
242
|
+
exports.default = void 0;
|
|
243
|
+
require_fromcodepoint();
|
|
244
|
+
var jsEscapeRegex = /\\(u\{([0-9A-Fa-f]+)\}|u([0-9A-Fa-f]{4})|x([0-9A-Fa-f]{2})|([1-7][0-7]{0,2}|[0-7]{2,3})|(['"tbrnfv0\\]))|\\U([0-9A-Fa-f]{8})/g;
|
|
245
|
+
var usualEscapeSequences = {
|
|
246
|
+
"0": "\0",
|
|
247
|
+
"b": "\b",
|
|
248
|
+
"f": "\f",
|
|
249
|
+
"n": "\n",
|
|
250
|
+
"r": "\r",
|
|
251
|
+
"t": " ",
|
|
252
|
+
"v": "\v",
|
|
253
|
+
"'": "'",
|
|
254
|
+
'"': '"',
|
|
255
|
+
"\\": "\\"
|
|
256
|
+
};
|
|
257
|
+
var fromHex = function fromHex2(str) {
|
|
258
|
+
return String.fromCodePoint(parseInt(str, 16));
|
|
259
|
+
};
|
|
260
|
+
var fromOct = function fromOct2(str) {
|
|
261
|
+
return String.fromCodePoint(parseInt(str, 8));
|
|
262
|
+
};
|
|
263
|
+
var _default = function _default2(string) {
|
|
264
|
+
return string.replace(jsEscapeRegex, function(_, __, varHex, longHex, shortHex, octal, specialCharacter, python) {
|
|
265
|
+
if (varHex !== void 0) {
|
|
266
|
+
return fromHex(varHex);
|
|
267
|
+
} else if (longHex !== void 0) {
|
|
268
|
+
return fromHex(longHex);
|
|
269
|
+
} else if (shortHex !== void 0) {
|
|
270
|
+
return fromHex(shortHex);
|
|
271
|
+
} else if (octal !== void 0) {
|
|
272
|
+
return fromOct(octal);
|
|
273
|
+
} else if (python !== void 0) {
|
|
274
|
+
return fromHex(python);
|
|
275
|
+
} else {
|
|
276
|
+
return usualEscapeSequences[specialCharacter];
|
|
277
|
+
}
|
|
278
|
+
});
|
|
279
|
+
};
|
|
280
|
+
exports.default = _default;
|
|
281
|
+
module.exports = exports.default;
|
|
282
|
+
}
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
// ../../node_modules/.pnpm/utf8@3.0.0/node_modules/utf8/utf8.js
|
|
286
|
+
var require_utf8 = _chunkG7A32JAGjs.__commonJS.call(void 0, {
|
|
287
|
+
"../../node_modules/.pnpm/utf8@3.0.0/node_modules/utf8/utf8.js"(exports) {
|
|
288
|
+
"use strict";
|
|
289
|
+
(function(root) {
|
|
290
|
+
var stringFromCharCode = String.fromCharCode;
|
|
291
|
+
function ucs2decode(string) {
|
|
292
|
+
var output = [];
|
|
293
|
+
var counter = 0;
|
|
294
|
+
var length = string.length;
|
|
295
|
+
var value;
|
|
296
|
+
var extra;
|
|
297
|
+
while (counter < length) {
|
|
298
|
+
value = string.charCodeAt(counter++);
|
|
299
|
+
if (value >= 55296 && value <= 56319 && counter < length) {
|
|
300
|
+
extra = string.charCodeAt(counter++);
|
|
301
|
+
if ((extra & 64512) == 56320) {
|
|
302
|
+
output.push(((value & 1023) << 10) + (extra & 1023) + 65536);
|
|
303
|
+
} else {
|
|
304
|
+
output.push(value);
|
|
305
|
+
counter--;
|
|
306
|
+
}
|
|
307
|
+
} else {
|
|
308
|
+
output.push(value);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return output;
|
|
312
|
+
}
|
|
313
|
+
function ucs2encode(array) {
|
|
314
|
+
var length = array.length;
|
|
315
|
+
var index = -1;
|
|
316
|
+
var value;
|
|
317
|
+
var output = "";
|
|
318
|
+
while (++index < length) {
|
|
319
|
+
value = array[index];
|
|
320
|
+
if (value > 65535) {
|
|
321
|
+
value -= 65536;
|
|
322
|
+
output += stringFromCharCode(value >>> 10 & 1023 | 55296);
|
|
323
|
+
value = 56320 | value & 1023;
|
|
324
|
+
}
|
|
325
|
+
output += stringFromCharCode(value);
|
|
326
|
+
}
|
|
327
|
+
return output;
|
|
328
|
+
}
|
|
329
|
+
function checkScalarValue(codePoint) {
|
|
330
|
+
if (codePoint >= 55296 && codePoint <= 57343) {
|
|
331
|
+
throw Error(
|
|
332
|
+
"Lone surrogate U+" + codePoint.toString(16).toUpperCase() + " is not a scalar value"
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
function createByte(codePoint, shift) {
|
|
337
|
+
return stringFromCharCode(codePoint >> shift & 63 | 128);
|
|
338
|
+
}
|
|
339
|
+
function encodeCodePoint(codePoint) {
|
|
340
|
+
if ((codePoint & 4294967168) == 0) {
|
|
341
|
+
return stringFromCharCode(codePoint);
|
|
342
|
+
}
|
|
343
|
+
var symbol = "";
|
|
344
|
+
if ((codePoint & 4294965248) == 0) {
|
|
345
|
+
symbol = stringFromCharCode(codePoint >> 6 & 31 | 192);
|
|
346
|
+
} else if ((codePoint & 4294901760) == 0) {
|
|
347
|
+
checkScalarValue(codePoint);
|
|
348
|
+
symbol = stringFromCharCode(codePoint >> 12 & 15 | 224);
|
|
349
|
+
symbol += createByte(codePoint, 6);
|
|
350
|
+
} else if ((codePoint & 4292870144) == 0) {
|
|
351
|
+
symbol = stringFromCharCode(codePoint >> 18 & 7 | 240);
|
|
352
|
+
symbol += createByte(codePoint, 12);
|
|
353
|
+
symbol += createByte(codePoint, 6);
|
|
354
|
+
}
|
|
355
|
+
symbol += stringFromCharCode(codePoint & 63 | 128);
|
|
356
|
+
return symbol;
|
|
357
|
+
}
|
|
358
|
+
function utf8encode(string) {
|
|
359
|
+
var codePoints = ucs2decode(string);
|
|
360
|
+
var length = codePoints.length;
|
|
361
|
+
var index = -1;
|
|
362
|
+
var codePoint;
|
|
363
|
+
var byteString = "";
|
|
364
|
+
while (++index < length) {
|
|
365
|
+
codePoint = codePoints[index];
|
|
366
|
+
byteString += encodeCodePoint(codePoint);
|
|
367
|
+
}
|
|
368
|
+
return byteString;
|
|
369
|
+
}
|
|
370
|
+
function readContinuationByte() {
|
|
371
|
+
if (byteIndex >= byteCount) {
|
|
372
|
+
throw Error("Invalid byte index");
|
|
373
|
+
}
|
|
374
|
+
var continuationByte = byteArray[byteIndex] & 255;
|
|
375
|
+
byteIndex++;
|
|
376
|
+
if ((continuationByte & 192) == 128) {
|
|
377
|
+
return continuationByte & 63;
|
|
378
|
+
}
|
|
379
|
+
throw Error("Invalid continuation byte");
|
|
380
|
+
}
|
|
381
|
+
function decodeSymbol() {
|
|
382
|
+
var byte1;
|
|
383
|
+
var byte2;
|
|
384
|
+
var byte3;
|
|
385
|
+
var byte4;
|
|
386
|
+
var codePoint;
|
|
387
|
+
if (byteIndex > byteCount) {
|
|
388
|
+
throw Error("Invalid byte index");
|
|
389
|
+
}
|
|
390
|
+
if (byteIndex == byteCount) {
|
|
391
|
+
return false;
|
|
392
|
+
}
|
|
393
|
+
byte1 = byteArray[byteIndex] & 255;
|
|
394
|
+
byteIndex++;
|
|
395
|
+
if ((byte1 & 128) == 0) {
|
|
396
|
+
return byte1;
|
|
397
|
+
}
|
|
398
|
+
if ((byte1 & 224) == 192) {
|
|
399
|
+
byte2 = readContinuationByte();
|
|
400
|
+
codePoint = (byte1 & 31) << 6 | byte2;
|
|
401
|
+
if (codePoint >= 128) {
|
|
402
|
+
return codePoint;
|
|
403
|
+
} else {
|
|
404
|
+
throw Error("Invalid continuation byte");
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
if ((byte1 & 240) == 224) {
|
|
408
|
+
byte2 = readContinuationByte();
|
|
409
|
+
byte3 = readContinuationByte();
|
|
410
|
+
codePoint = (byte1 & 15) << 12 | byte2 << 6 | byte3;
|
|
411
|
+
if (codePoint >= 2048) {
|
|
412
|
+
checkScalarValue(codePoint);
|
|
413
|
+
return codePoint;
|
|
414
|
+
} else {
|
|
415
|
+
throw Error("Invalid continuation byte");
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
if ((byte1 & 248) == 240) {
|
|
419
|
+
byte2 = readContinuationByte();
|
|
420
|
+
byte3 = readContinuationByte();
|
|
421
|
+
byte4 = readContinuationByte();
|
|
422
|
+
codePoint = (byte1 & 7) << 18 | byte2 << 12 | byte3 << 6 | byte4;
|
|
423
|
+
if (codePoint >= 65536 && codePoint <= 1114111) {
|
|
424
|
+
return codePoint;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
throw Error("Invalid UTF-8 detected");
|
|
428
|
+
}
|
|
429
|
+
var byteArray;
|
|
430
|
+
var byteCount;
|
|
431
|
+
var byteIndex;
|
|
432
|
+
function utf8decode(byteString) {
|
|
433
|
+
byteArray = ucs2decode(byteString);
|
|
434
|
+
byteCount = byteArray.length;
|
|
435
|
+
byteIndex = 0;
|
|
436
|
+
var codePoints = [];
|
|
437
|
+
var tmp;
|
|
438
|
+
while ((tmp = decodeSymbol()) !== false) {
|
|
439
|
+
codePoints.push(tmp);
|
|
440
|
+
}
|
|
441
|
+
return ucs2encode(codePoints);
|
|
442
|
+
}
|
|
443
|
+
root.version = "3.0.0";
|
|
444
|
+
root.encode = utf8encode;
|
|
445
|
+
root.decode = utf8decode;
|
|
446
|
+
})(typeof exports === "undefined" ? exports.utf8 = {} : exports);
|
|
447
|
+
}
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
// ../../node_modules/.pnpm/dirty-json@0.9.2/node_modules/dirty-json/lexer.js
|
|
451
|
+
var require_lexer2 = _chunkG7A32JAGjs.__commonJS.call(void 0, {
|
|
452
|
+
"../../node_modules/.pnpm/dirty-json@0.9.2/node_modules/dirty-json/lexer.js"(exports, module) {
|
|
453
|
+
"use strict";
|
|
454
|
+
var Lexer = require_lexer();
|
|
455
|
+
var unescapeJs = require_dist();
|
|
456
|
+
var utf8 = require_utf8();
|
|
457
|
+
var LEX_FLOAT = 6;
|
|
458
|
+
var LEX_INT = 7;
|
|
459
|
+
var LEX_QUOTE = 11;
|
|
460
|
+
var LEX_RB = 12;
|
|
461
|
+
var LEX_RCB = 13;
|
|
462
|
+
var LEX_TOKEN = 14;
|
|
463
|
+
var LEX_COLON = -1;
|
|
464
|
+
var LEX_COMMA = -2;
|
|
465
|
+
var LEX_LCB = -3;
|
|
466
|
+
var LEX_LB = -4;
|
|
467
|
+
var LEX_DOT = -5;
|
|
468
|
+
var lexSpc = [
|
|
469
|
+
[/\s*:\s*/, LEX_COLON],
|
|
470
|
+
[/\s*,\s*/, LEX_COMMA],
|
|
471
|
+
[/\s*{\s*/, LEX_LCB],
|
|
472
|
+
[/\s*}\s*/, LEX_RCB],
|
|
473
|
+
[/\s*\[\s*/, LEX_LB],
|
|
474
|
+
[/\s*\]\s*/, LEX_RB],
|
|
475
|
+
[/\s*\.\s*/, LEX_DOT]
|
|
476
|
+
// TODO: remove?
|
|
477
|
+
];
|
|
478
|
+
function parseString(str) {
|
|
479
|
+
str = str.replace(/\\\//, "/");
|
|
480
|
+
return unescapeJs(str);
|
|
481
|
+
}
|
|
482
|
+
function getLexer(string) {
|
|
483
|
+
let lexer = new Lexer();
|
|
484
|
+
let col = 0;
|
|
485
|
+
let row = 0;
|
|
486
|
+
lexer.addRule(/"((?:\\.|[^"])*?)($|")/, (lexeme, txt) => {
|
|
487
|
+
col += lexeme.length;
|
|
488
|
+
return { type: LEX_QUOTE, value: parseString(txt), row, col, single: false };
|
|
489
|
+
});
|
|
490
|
+
lexer.addRule(/'((?:\\.|[^'])*?)($|'|(",?[ \t]*\n))/, (lexeme, txt) => {
|
|
491
|
+
col += lexeme.length;
|
|
492
|
+
return { type: LEX_QUOTE, value: parseString(txt), row, col, single: true };
|
|
493
|
+
});
|
|
494
|
+
lexer.addRule(/[\-0-9]*\.[0-9]*([eE][\+\-]?)?[0-9]*(?:\s*)/, (lexeme) => {
|
|
495
|
+
col += lexeme.length;
|
|
496
|
+
return { type: LEX_FLOAT, value: parseFloat(lexeme), row, col };
|
|
497
|
+
});
|
|
498
|
+
lexer.addRule(/\-?[0-9]+([eE][\+\-]?)[0-9]*(?:\s*)/, (lexeme) => {
|
|
499
|
+
col += lexeme.length;
|
|
500
|
+
return { type: LEX_FLOAT, value: parseFloat(lexeme), row, col };
|
|
501
|
+
});
|
|
502
|
+
lexer.addRule(/\-?[0-9]+(?:\s*)/, (lexeme) => {
|
|
503
|
+
col += lexeme.length;
|
|
504
|
+
return { type: LEX_INT, value: parseInt(lexeme), row, col };
|
|
505
|
+
});
|
|
506
|
+
lexSpc.forEach((item) => {
|
|
507
|
+
lexer.addRule(item[0], (lexeme) => {
|
|
508
|
+
col += lexeme.length;
|
|
509
|
+
return { type: item[1], value: lexeme, row, col };
|
|
510
|
+
});
|
|
511
|
+
});
|
|
512
|
+
lexer.addRule(/\s/, (lexeme) => {
|
|
513
|
+
if (lexeme == "\n") {
|
|
514
|
+
col = 0;
|
|
515
|
+
row++;
|
|
516
|
+
} else {
|
|
517
|
+
col += lexeme.length;
|
|
518
|
+
}
|
|
519
|
+
});
|
|
520
|
+
lexer.addRule(/\S[ \t]*/, (lexeme) => {
|
|
521
|
+
col += lexeme.length;
|
|
522
|
+
let lt = LEX_TOKEN;
|
|
523
|
+
let val = lexeme;
|
|
524
|
+
return { type: lt, value: val, row, col };
|
|
525
|
+
});
|
|
526
|
+
lexer.setInput(string);
|
|
527
|
+
return lexer;
|
|
528
|
+
}
|
|
529
|
+
module.exports.lexString = lexString;
|
|
530
|
+
function lexString(str, emit) {
|
|
531
|
+
let lex = getLexer(str);
|
|
532
|
+
let token = "";
|
|
533
|
+
while (token = lex.lex()) {
|
|
534
|
+
emit(token);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
module.exports.getAllTokens = getAllTokens;
|
|
538
|
+
function getAllTokens(str) {
|
|
539
|
+
let arr = [];
|
|
540
|
+
let emit = function(i) {
|
|
541
|
+
arr.push(i);
|
|
542
|
+
};
|
|
543
|
+
lexString(str, emit);
|
|
544
|
+
return arr;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
// ../../node_modules/.pnpm/dirty-json@0.9.2/node_modules/dirty-json/parser.js
|
|
550
|
+
var require_parser = _chunkG7A32JAGjs.__commonJS.call(void 0, {
|
|
551
|
+
"../../node_modules/.pnpm/dirty-json@0.9.2/node_modules/dirty-json/parser.js"(exports, module) {
|
|
552
|
+
"use strict";
|
|
553
|
+
var lexer = require_lexer2();
|
|
554
|
+
var LEX_KV = 0;
|
|
555
|
+
var LEX_KVLIST = 1;
|
|
556
|
+
var LEX_VLIST = 2;
|
|
557
|
+
var LEX_BOOLEAN = 3;
|
|
558
|
+
var LEX_COVALUE = 4;
|
|
559
|
+
var LEX_CVALUE = 5;
|
|
560
|
+
var LEX_FLOAT = 6;
|
|
561
|
+
var LEX_INT = 7;
|
|
562
|
+
var LEX_KEY = 8;
|
|
563
|
+
var LEX_LIST = 9;
|
|
564
|
+
var LEX_OBJ = 10;
|
|
565
|
+
var LEX_QUOTE = 11;
|
|
566
|
+
var LEX_RB = 12;
|
|
567
|
+
var LEX_RCB = 13;
|
|
568
|
+
var LEX_TOKEN = 14;
|
|
569
|
+
var LEX_VALUE = 15;
|
|
570
|
+
var LEX_COLON = -1;
|
|
571
|
+
var LEX_COMMA = -2;
|
|
572
|
+
var LEX_LCB = -3;
|
|
573
|
+
var LEX_LB = -4;
|
|
574
|
+
function extendArray(arr) {
|
|
575
|
+
if (arr.peek == null) {
|
|
576
|
+
Object.defineProperty(arr, "peek", {
|
|
577
|
+
enumerable: false,
|
|
578
|
+
value: function() {
|
|
579
|
+
return this[this.length - 1];
|
|
580
|
+
}
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
if (arr.last == null) {
|
|
584
|
+
Object.defineProperty(arr, "last", {
|
|
585
|
+
enumerable: false,
|
|
586
|
+
value: function(i) {
|
|
587
|
+
return this[this.length - (1 + i)];
|
|
588
|
+
}
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
function is(obj, prop) {
|
|
593
|
+
return obj && obj.hasOwnProperty("type") && obj.type == prop;
|
|
594
|
+
}
|
|
595
|
+
function log(str) {
|
|
596
|
+
}
|
|
597
|
+
module.exports.parse = parse;
|
|
598
|
+
function parse(text, dupKeys) {
|
|
599
|
+
let stack = [];
|
|
600
|
+
let tokens = [];
|
|
601
|
+
extendArray(stack);
|
|
602
|
+
extendArray(tokens);
|
|
603
|
+
let emit = function(t) {
|
|
604
|
+
tokens.push(t);
|
|
605
|
+
};
|
|
606
|
+
lexer.lexString(text, emit);
|
|
607
|
+
if (tokens[0].type == LEX_LB && tokens.last(0).type != LEX_RB) {
|
|
608
|
+
tokens.push({ type: LEX_RB, value: "]", row: -1, col: -1 });
|
|
609
|
+
}
|
|
610
|
+
if (tokens[0].type == LEX_LCB && tokens.last(0).type != LEX_RCB) {
|
|
611
|
+
tokens.push({ type: LEX_RCB, value: "}", row: -1, col: -1 });
|
|
612
|
+
}
|
|
613
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
614
|
+
log("Shifting " + tokens[i].type);
|
|
615
|
+
stack.push(tokens[i]);
|
|
616
|
+
log(stack);
|
|
617
|
+
log("Reducing...");
|
|
618
|
+
while (reduce(stack)) {
|
|
619
|
+
log(stack);
|
|
620
|
+
log("Reducing...");
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
if (stack.length == 1 && stack[0].type == LEX_KVLIST) {
|
|
624
|
+
log("Pre-compile error fix 1");
|
|
625
|
+
stack = [{ type: LEX_OBJ, value: stack[0].value }];
|
|
626
|
+
}
|
|
627
|
+
return compileOST(stack[0], dupKeys);
|
|
628
|
+
}
|
|
629
|
+
function reduce(stack) {
|
|
630
|
+
let next = stack.pop();
|
|
631
|
+
switch (next.type) {
|
|
632
|
+
case LEX_KEY:
|
|
633
|
+
if (next.value.trim() == "true") {
|
|
634
|
+
log("Rule 5");
|
|
635
|
+
stack.push({ "type": LEX_BOOLEAN, "value": "true" });
|
|
636
|
+
return true;
|
|
637
|
+
}
|
|
638
|
+
if (next.value.trim() == "false") {
|
|
639
|
+
log("Rule 6");
|
|
640
|
+
stack.push({ "type": LEX_BOOLEAN, "value": "false" });
|
|
641
|
+
return true;
|
|
642
|
+
}
|
|
643
|
+
if (next.value.trim() == "null") {
|
|
644
|
+
log("Rule 7");
|
|
645
|
+
stack.push({ "type": LEX_VALUE, "value": null });
|
|
646
|
+
return true;
|
|
647
|
+
}
|
|
648
|
+
break;
|
|
649
|
+
case LEX_TOKEN:
|
|
650
|
+
if (is(stack.peek(), LEX_KEY)) {
|
|
651
|
+
log("Rule 11a");
|
|
652
|
+
stack.peek().value += next.value;
|
|
653
|
+
return true;
|
|
654
|
+
}
|
|
655
|
+
log("Rule 11c");
|
|
656
|
+
stack.push({ type: LEX_KEY, value: next.value });
|
|
657
|
+
return true;
|
|
658
|
+
case LEX_INT:
|
|
659
|
+
if (is(next, LEX_INT) && is(stack.peek(), LEX_KEY)) {
|
|
660
|
+
log("Rule 11b");
|
|
661
|
+
stack.peek().value += next.value;
|
|
662
|
+
return true;
|
|
663
|
+
}
|
|
664
|
+
log("Rule 11f");
|
|
665
|
+
next.type = LEX_VALUE;
|
|
666
|
+
stack.push(next);
|
|
667
|
+
return true;
|
|
668
|
+
case LEX_QUOTE:
|
|
669
|
+
log("Rule 11d");
|
|
670
|
+
next.type = LEX_VALUE;
|
|
671
|
+
next.value = next.value;
|
|
672
|
+
stack.push(next);
|
|
673
|
+
return true;
|
|
674
|
+
case LEX_BOOLEAN:
|
|
675
|
+
log("Rule 11e");
|
|
676
|
+
next.type = LEX_VALUE;
|
|
677
|
+
if (next.value == "true") {
|
|
678
|
+
next.value = true;
|
|
679
|
+
} else {
|
|
680
|
+
next.value = false;
|
|
681
|
+
}
|
|
682
|
+
stack.push(next);
|
|
683
|
+
return true;
|
|
684
|
+
case LEX_FLOAT:
|
|
685
|
+
log("Rule 11g");
|
|
686
|
+
next.type = LEX_VALUE;
|
|
687
|
+
stack.push(next);
|
|
688
|
+
return true;
|
|
689
|
+
case LEX_VALUE:
|
|
690
|
+
if (is(stack.peek(), LEX_COMMA)) {
|
|
691
|
+
log("Rule 12");
|
|
692
|
+
next.type = LEX_CVALUE;
|
|
693
|
+
stack.pop();
|
|
694
|
+
stack.push(next);
|
|
695
|
+
return true;
|
|
696
|
+
}
|
|
697
|
+
if (is(stack.peek(), LEX_COLON)) {
|
|
698
|
+
log("Rule 13");
|
|
699
|
+
next.type = LEX_COVALUE;
|
|
700
|
+
stack.pop();
|
|
701
|
+
stack.push(next);
|
|
702
|
+
return true;
|
|
703
|
+
}
|
|
704
|
+
if (is(stack.peek(), LEX_KEY) && is(stack.last(1), LEX_VALUE)) {
|
|
705
|
+
log("Error rule 1");
|
|
706
|
+
let middleVal = stack.pop();
|
|
707
|
+
stack.peek().value += '"' + middleVal.value + '"';
|
|
708
|
+
stack.peek().value += next.value;
|
|
709
|
+
return true;
|
|
710
|
+
}
|
|
711
|
+
if (is(stack.peek(), LEX_KEY) && is(stack.last(1), LEX_VLIST)) {
|
|
712
|
+
log("Error rule 2");
|
|
713
|
+
let middleVal = stack.pop();
|
|
714
|
+
let oldLastVal = stack.peek().value.pop();
|
|
715
|
+
oldLastVal += '"' + middleVal.value + '"';
|
|
716
|
+
oldLastVal += next.value;
|
|
717
|
+
stack.peek().value.push(oldLastVal);
|
|
718
|
+
return true;
|
|
719
|
+
}
|
|
720
|
+
if (is(stack.peek(), LEX_KEY) && is(stack.last(1), LEX_KVLIST)) {
|
|
721
|
+
log("Error rule 3");
|
|
722
|
+
let middleVal = stack.pop();
|
|
723
|
+
let oldLastVal = stack.peek().value.pop();
|
|
724
|
+
const qChar = next.single ? "'" : '"';
|
|
725
|
+
oldLastVal.value += qChar + middleVal.value + qChar;
|
|
726
|
+
oldLastVal.value += next.value;
|
|
727
|
+
stack.peek().value.push(oldLastVal);
|
|
728
|
+
return true;
|
|
729
|
+
}
|
|
730
|
+
if (is(stack.peek(), LEX_KEY)) {
|
|
731
|
+
log("Error rule 4");
|
|
732
|
+
let keyValue = stack.pop().value;
|
|
733
|
+
next.value = keyValue + next.value;
|
|
734
|
+
stack.push(next);
|
|
735
|
+
return true;
|
|
736
|
+
}
|
|
737
|
+
break;
|
|
738
|
+
case LEX_LIST:
|
|
739
|
+
if (is(next, LEX_LIST) && is(stack.peek(), LEX_COMMA)) {
|
|
740
|
+
log("Rule 12a");
|
|
741
|
+
next.type = LEX_CVALUE;
|
|
742
|
+
stack.pop();
|
|
743
|
+
stack.push(next);
|
|
744
|
+
return true;
|
|
745
|
+
}
|
|
746
|
+
if (is(stack.peek(), LEX_COLON)) {
|
|
747
|
+
log("Rule 13a");
|
|
748
|
+
next.type = LEX_COVALUE;
|
|
749
|
+
stack.pop();
|
|
750
|
+
stack.push(next);
|
|
751
|
+
return true;
|
|
752
|
+
}
|
|
753
|
+
break;
|
|
754
|
+
case LEX_OBJ:
|
|
755
|
+
if (is(stack.peek(), LEX_COMMA)) {
|
|
756
|
+
log("Rule 12b");
|
|
757
|
+
let toPush = { "type": LEX_CVALUE, "value": next };
|
|
758
|
+
stack.pop();
|
|
759
|
+
stack.push(toPush);
|
|
760
|
+
return true;
|
|
761
|
+
}
|
|
762
|
+
if (is(stack.peek(), LEX_COLON)) {
|
|
763
|
+
log("Rule 13b");
|
|
764
|
+
let toPush = { "type": LEX_COVALUE, "value": next };
|
|
765
|
+
stack.pop();
|
|
766
|
+
stack.push(toPush);
|
|
767
|
+
return true;
|
|
768
|
+
}
|
|
769
|
+
if (is(stack.peek(), LEX_KEY)) {
|
|
770
|
+
log("Error rule 9");
|
|
771
|
+
let key = stack.pop();
|
|
772
|
+
stack.push({ "type": LEX_KV, "key": key.value.trim(), "value": next });
|
|
773
|
+
return true;
|
|
774
|
+
}
|
|
775
|
+
break;
|
|
776
|
+
case LEX_CVALUE:
|
|
777
|
+
if (is(stack.peek(), LEX_VLIST)) {
|
|
778
|
+
log("Rule 14");
|
|
779
|
+
stack.peek().value.push(next.value);
|
|
780
|
+
return true;
|
|
781
|
+
}
|
|
782
|
+
log("Rule 15");
|
|
783
|
+
stack.push({ "type": LEX_VLIST, "value": [next.value] });
|
|
784
|
+
return true;
|
|
785
|
+
case LEX_VLIST:
|
|
786
|
+
if (is(stack.peek(), LEX_VALUE)) {
|
|
787
|
+
log("Rule 15a");
|
|
788
|
+
next.value.unshift(stack.peek().value);
|
|
789
|
+
stack.pop();
|
|
790
|
+
stack.push(next);
|
|
791
|
+
return true;
|
|
792
|
+
}
|
|
793
|
+
if (is(stack.peek(), LEX_LIST)) {
|
|
794
|
+
log("Rule 15b");
|
|
795
|
+
next.value.unshift(stack.peek().value);
|
|
796
|
+
stack.pop();
|
|
797
|
+
stack.push(next);
|
|
798
|
+
return true;
|
|
799
|
+
}
|
|
800
|
+
if (is(stack.peek(), LEX_OBJ)) {
|
|
801
|
+
log("Rule 15c");
|
|
802
|
+
next.value.unshift(stack.peek());
|
|
803
|
+
stack.pop();
|
|
804
|
+
stack.push(next);
|
|
805
|
+
return true;
|
|
806
|
+
}
|
|
807
|
+
if (is(stack.peek(), LEX_KEY) && (stack.last(1), LEX_COMMA)) {
|
|
808
|
+
log("Error rule 7");
|
|
809
|
+
let l = stack.pop();
|
|
810
|
+
stack.push({ type: LEX_VALUE, "value": l.value });
|
|
811
|
+
log("Start subreduce... (" + l.value + ")");
|
|
812
|
+
while (reduce(stack))
|
|
813
|
+
;
|
|
814
|
+
log("End subreduce");
|
|
815
|
+
stack.push(next);
|
|
816
|
+
return true;
|
|
817
|
+
}
|
|
818
|
+
if (is(stack.peek(), LEX_VLIST)) {
|
|
819
|
+
log("Error rule 8");
|
|
820
|
+
stack.peek().value.push(next.value[0]);
|
|
821
|
+
return true;
|
|
822
|
+
}
|
|
823
|
+
break;
|
|
824
|
+
case LEX_COVALUE:
|
|
825
|
+
if (is(stack.peek(), LEX_KEY) || is(stack.peek(), LEX_VALUE) || is(stack.peek(), LEX_VLIST)) {
|
|
826
|
+
log("Rule 16");
|
|
827
|
+
let key = stack.pop();
|
|
828
|
+
stack.push({ "type": LEX_KV, "key": key.value, "value": next.value });
|
|
829
|
+
return true;
|
|
830
|
+
}
|
|
831
|
+
throw new Error("Got a :value that can't be handled at line " + next.row + ":" + next.col);
|
|
832
|
+
case LEX_KV:
|
|
833
|
+
if (is(stack.last(0), LEX_COMMA) && is(stack.last(1), LEX_KVLIST)) {
|
|
834
|
+
log("Rule 17");
|
|
835
|
+
stack.last(1).value.push(next);
|
|
836
|
+
stack.pop();
|
|
837
|
+
return true;
|
|
838
|
+
}
|
|
839
|
+
log("Rule 18");
|
|
840
|
+
stack.push({ "type": LEX_KVLIST, "value": [next] });
|
|
841
|
+
return true;
|
|
842
|
+
case LEX_KVLIST:
|
|
843
|
+
if (is(stack.peek(), LEX_KVLIST)) {
|
|
844
|
+
log("Rule 17a");
|
|
845
|
+
next.value.forEach(function(i) {
|
|
846
|
+
stack.peek().value.push(i);
|
|
847
|
+
});
|
|
848
|
+
return true;
|
|
849
|
+
}
|
|
850
|
+
break;
|
|
851
|
+
case LEX_RB:
|
|
852
|
+
if (is(stack.peek(), LEX_VLIST) && is(stack.last(1), LEX_LB)) {
|
|
853
|
+
log("Rule 19");
|
|
854
|
+
let l = stack.pop();
|
|
855
|
+
stack.pop();
|
|
856
|
+
stack.push({ "type": LEX_LIST, "value": l.value });
|
|
857
|
+
return true;
|
|
858
|
+
}
|
|
859
|
+
if (is(stack.peek(), LEX_LIST) && is(stack.last(1), LEX_LB)) {
|
|
860
|
+
log("Rule 19b");
|
|
861
|
+
let l = stack.pop();
|
|
862
|
+
stack.pop();
|
|
863
|
+
stack.push({ "type": LEX_LIST, "value": [l.value] });
|
|
864
|
+
return true;
|
|
865
|
+
}
|
|
866
|
+
if (is(stack.peek(), LEX_LB)) {
|
|
867
|
+
log("Rule 22");
|
|
868
|
+
stack.pop();
|
|
869
|
+
stack.push({ type: LEX_LIST, "value": [] });
|
|
870
|
+
return true;
|
|
871
|
+
}
|
|
872
|
+
if (is(stack.peek(), LEX_VALUE) && is(stack.last(1), LEX_LB)) {
|
|
873
|
+
log("Rule 23");
|
|
874
|
+
let val = stack.pop().value;
|
|
875
|
+
stack.pop();
|
|
876
|
+
stack.push({ type: LEX_LIST, "value": [val] });
|
|
877
|
+
return true;
|
|
878
|
+
}
|
|
879
|
+
if (is(stack.peek(), LEX_OBJ) && is(stack.last(1), LEX_LB)) {
|
|
880
|
+
log("Rule 23b");
|
|
881
|
+
let val = stack.pop();
|
|
882
|
+
stack.pop();
|
|
883
|
+
stack.push({ type: LEX_LIST, "value": [val] });
|
|
884
|
+
return true;
|
|
885
|
+
}
|
|
886
|
+
if (is(stack.peek(), LEX_KEY) && is(stack.last(1), LEX_COMMA)) {
|
|
887
|
+
log("Error rule 5");
|
|
888
|
+
let l = stack.pop();
|
|
889
|
+
stack.push({ type: LEX_VALUE, "value": l.value });
|
|
890
|
+
log("Start subreduce... (" + l.value + ")");
|
|
891
|
+
while (reduce(stack))
|
|
892
|
+
;
|
|
893
|
+
log("End subreduce");
|
|
894
|
+
stack.push({ type: LEX_RB });
|
|
895
|
+
return true;
|
|
896
|
+
}
|
|
897
|
+
if (is(stack.peek(), LEX_COMMA) && (is(stack.last(1), LEX_KEY) || is(stack.last(1), LEX_OBJ) || is(stack.last(1), LEX_VALUE))) {
|
|
898
|
+
log("Error rule 5a");
|
|
899
|
+
stack.pop();
|
|
900
|
+
stack.push({ type: LEX_RB, "value": "]" });
|
|
901
|
+
log("Start subreduce...");
|
|
902
|
+
log("Content: " + JSON.stringify(stack));
|
|
903
|
+
while (reduce(stack))
|
|
904
|
+
;
|
|
905
|
+
log("End subreduce");
|
|
906
|
+
return true;
|
|
907
|
+
}
|
|
908
|
+
if (is(stack.peek(), LEX_KEY) && is(stack.last(1), LEX_LB)) {
|
|
909
|
+
log("Error rule 5b");
|
|
910
|
+
let v = stack.pop();
|
|
911
|
+
stack.pop();
|
|
912
|
+
stack.push({ type: LEX_LIST, value: [v.value] });
|
|
913
|
+
return true;
|
|
914
|
+
}
|
|
915
|
+
if (is(stack.peek(), LEX_COMMA) && is(stack.last(1), LEX_VLIST)) {
|
|
916
|
+
log("Error rule 5c");
|
|
917
|
+
stack.pop();
|
|
918
|
+
stack.push({ type: LEX_RB });
|
|
919
|
+
log("Start subreduce...");
|
|
920
|
+
log("Content: " + JSON.stringify(stack));
|
|
921
|
+
while (reduce(stack))
|
|
922
|
+
;
|
|
923
|
+
log("End subreduce");
|
|
924
|
+
return true;
|
|
925
|
+
}
|
|
926
|
+
break;
|
|
927
|
+
case LEX_RCB:
|
|
928
|
+
if (is(stack.peek(), LEX_KVLIST) && is(stack.last(1), LEX_LCB)) {
|
|
929
|
+
log("Rule 20");
|
|
930
|
+
let l = stack.pop();
|
|
931
|
+
stack.pop();
|
|
932
|
+
stack.push({ "type": LEX_OBJ, "value": l.value });
|
|
933
|
+
return true;
|
|
934
|
+
}
|
|
935
|
+
if (is(stack.peek(), LEX_LCB)) {
|
|
936
|
+
log("Rule 21");
|
|
937
|
+
stack.pop();
|
|
938
|
+
stack.push({ type: LEX_OBJ, "value": null });
|
|
939
|
+
return true;
|
|
940
|
+
}
|
|
941
|
+
if (is(stack.peek(), LEX_KEY) && is(stack.last(1), LEX_COLON)) {
|
|
942
|
+
log("Error rule 4a");
|
|
943
|
+
let l = stack.pop();
|
|
944
|
+
stack.push({ type: LEX_VALUE, "value": l.value });
|
|
945
|
+
log("Start subreduce... (" + l.value + ")");
|
|
946
|
+
while (reduce(stack))
|
|
947
|
+
;
|
|
948
|
+
log("End subreduce");
|
|
949
|
+
stack.push({ type: LEX_RCB });
|
|
950
|
+
return true;
|
|
951
|
+
}
|
|
952
|
+
if (is(stack.peek(), LEX_COLON)) {
|
|
953
|
+
log("Error rule 4b");
|
|
954
|
+
stack.push({ type: LEX_VALUE, value: null });
|
|
955
|
+
log("Starting subreduce...");
|
|
956
|
+
while (reduce(stack))
|
|
957
|
+
;
|
|
958
|
+
log("End subreduce.");
|
|
959
|
+
stack.push({ type: LEX_RCB });
|
|
960
|
+
return true;
|
|
961
|
+
}
|
|
962
|
+
if (is(stack.peek(), LEX_COMMA)) {
|
|
963
|
+
log("Error rule 10a");
|
|
964
|
+
stack.pop();
|
|
965
|
+
stack.push({ type: LEX_RCB });
|
|
966
|
+
return true;
|
|
967
|
+
}
|
|
968
|
+
throw new Error("Found } that I can't handle at line " + next.row + ":" + next.col);
|
|
969
|
+
case LEX_COMMA:
|
|
970
|
+
if (is(stack.peek(), LEX_COMMA)) {
|
|
971
|
+
log("Comma error rule 1");
|
|
972
|
+
return true;
|
|
973
|
+
}
|
|
974
|
+
if (is(stack.peek(), LEX_KEY)) {
|
|
975
|
+
log("Comma error rule 2");
|
|
976
|
+
const key = stack.pop();
|
|
977
|
+
stack.push({ type: LEX_VALUE, value: key.value });
|
|
978
|
+
log("Starting subreduce...");
|
|
979
|
+
while (reduce(stack))
|
|
980
|
+
;
|
|
981
|
+
log("End subreduce.");
|
|
982
|
+
stack.push(next);
|
|
983
|
+
return true;
|
|
984
|
+
}
|
|
985
|
+
if (is(stack.peek(), LEX_COLON)) {
|
|
986
|
+
log("Comma error rule 3");
|
|
987
|
+
stack.push({ type: LEX_VALUE, value: null });
|
|
988
|
+
log("Starting subreduce...");
|
|
989
|
+
while (reduce(stack))
|
|
990
|
+
;
|
|
991
|
+
log("End subreduce.");
|
|
992
|
+
stack.push(next);
|
|
993
|
+
return true;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
stack.push(next);
|
|
997
|
+
return false;
|
|
998
|
+
}
|
|
999
|
+
function compileOST(tree, dupKeys) {
|
|
1000
|
+
let rawTypes = ["boolean", "number", "string"];
|
|
1001
|
+
if (rawTypes.indexOf(typeof tree) != -1)
|
|
1002
|
+
return tree;
|
|
1003
|
+
if (tree === null)
|
|
1004
|
+
return null;
|
|
1005
|
+
if (Array.isArray(tree)) {
|
|
1006
|
+
let toR = [];
|
|
1007
|
+
while (tree.length > 0)
|
|
1008
|
+
toR.unshift(compileOST(tree.pop()));
|
|
1009
|
+
return toR;
|
|
1010
|
+
}
|
|
1011
|
+
if (is(tree, LEX_OBJ)) {
|
|
1012
|
+
let toR = {};
|
|
1013
|
+
if (tree.value === null)
|
|
1014
|
+
return {};
|
|
1015
|
+
tree.value.forEach(function(i) {
|
|
1016
|
+
const key = i.key;
|
|
1017
|
+
const val = compileOST(i.value);
|
|
1018
|
+
if (dupKeys && key in toR) {
|
|
1019
|
+
toR[key] = {
|
|
1020
|
+
"value": toR[key],
|
|
1021
|
+
"next": val
|
|
1022
|
+
};
|
|
1023
|
+
} else {
|
|
1024
|
+
toR[key] = val;
|
|
1025
|
+
}
|
|
1026
|
+
});
|
|
1027
|
+
return toR;
|
|
1028
|
+
}
|
|
1029
|
+
if (is(tree, LEX_LIST)) {
|
|
1030
|
+
return compileOST(tree.value);
|
|
1031
|
+
}
|
|
1032
|
+
return tree.value;
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
});
|
|
1036
|
+
|
|
1037
|
+
// ../../node_modules/.pnpm/dirty-json@0.9.2/node_modules/dirty-json/dirty-json.js
|
|
1038
|
+
var require_dirty_json = _chunkG7A32JAGjs.__commonJS.call(void 0, {
|
|
1039
|
+
"../../node_modules/.pnpm/dirty-json@0.9.2/node_modules/dirty-json/dirty-json.js"(exports, module) {
|
|
1040
|
+
"use strict";
|
|
1041
|
+
var parser = require_parser();
|
|
1042
|
+
module.exports.parse = parse;
|
|
1043
|
+
function parse(text, config) {
|
|
1044
|
+
let fallback = true;
|
|
1045
|
+
let duplicateKeys = false;
|
|
1046
|
+
if (config) {
|
|
1047
|
+
if ("fallback" in config && config[fallback] === false) {
|
|
1048
|
+
fallback = false;
|
|
1049
|
+
}
|
|
1050
|
+
duplicateKeys = "duplicateKeys" in config && config["duplicateKeys"] === true;
|
|
1051
|
+
}
|
|
1052
|
+
try {
|
|
1053
|
+
return parser.parse(text, duplicateKeys);
|
|
1054
|
+
} catch (e) {
|
|
1055
|
+
if (fallback === false) {
|
|
1056
|
+
throw e;
|
|
1057
|
+
}
|
|
1058
|
+
try {
|
|
1059
|
+
let json = JSON.parse(text);
|
|
1060
|
+
console.warn("dirty-json got valid JSON that failed with the custom parser. We're returning the valid JSON, but please file a bug report here: https://github.com/RyanMarcus/dirty-json/issues -- the JSON that caused the failure was: " + text);
|
|
1061
|
+
return json;
|
|
1062
|
+
} catch (json_error) {
|
|
1063
|
+
throw e;
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
});
|
|
1069
|
+
|
|
1070
|
+
// src/ai-model/openai/index.ts
|
|
1071
|
+
var _assert = require('assert'); var _assert2 = _interopRequireDefault(_assert);
|
|
1072
|
+
|
|
1073
|
+
// src/types.ts
|
|
1074
|
+
var BaseElement = class {
|
|
1075
|
+
};
|
|
1076
|
+
var AIResponseFormat = /* @__PURE__ */ ((AIResponseFormat2) => {
|
|
1077
|
+
AIResponseFormat2["JSON"] = "json_object";
|
|
1078
|
+
AIResponseFormat2["TEXT"] = "text";
|
|
1079
|
+
return AIResponseFormat2;
|
|
1080
|
+
})(AIResponseFormat || {});
|
|
1081
|
+
var UIContext = class {
|
|
1082
|
+
};
|
|
1083
|
+
|
|
1084
|
+
// src/ai-model/openai/index.ts
|
|
1085
|
+
var import_dirty_json = _chunkG7A32JAGjs.__toESM.call(void 0, require_dirty_json());
|
|
1086
|
+
var _sdk = require('@anthropic-ai/sdk');
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
|
|
1090
|
+
var _identity = require('@azure/identity');
|
|
1091
|
+
var _utils = require('@midscene/shared/utils');
|
|
1092
|
+
var _openai = require('openai'); var _openai2 = _interopRequireDefault(_openai);
|
|
1093
|
+
var _socksproxyagent = require('socks-proxy-agent');
|
|
1094
|
+
|
|
1095
|
+
// src/ai-model/common.ts
|
|
1096
|
+
|
|
1097
|
+
async function callAiFn(msgs, AIActionTypeValue) {
|
|
1098
|
+
_assert2.default.call(void 0,
|
|
1099
|
+
checkAIConfig(),
|
|
1100
|
+
"Cannot find config for AI model service. You should set it before using. https://midscenejs.com/model-provider.html"
|
|
1101
|
+
);
|
|
1102
|
+
const { content, usage } = await callToGetJSONObject(
|
|
1103
|
+
msgs,
|
|
1104
|
+
AIActionTypeValue
|
|
1105
|
+
);
|
|
1106
|
+
return { content, usage };
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
// src/ai-model/prompt/element-inspector.ts
|
|
1110
|
+
var _prompts = require('@langchain/core/prompts');
|
|
1111
|
+
|
|
1112
|
+
// src/ai-model/prompt/element-point.ts
|
|
1113
|
+
function systemPromptToFindElementPosition() {
|
|
1114
|
+
return `
|
|
1115
|
+
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
1116
|
+
|
|
1117
|
+
## Output Format
|
|
1118
|
+
\`\`\`
|
|
1119
|
+
Action_Summary: ...
|
|
1120
|
+
Action: ...
|
|
1121
|
+
\`\`\`
|
|
1122
|
+
|
|
1123
|
+
## Action Space
|
|
1124
|
+
click(start_box='[x1, y1, x2, y2]')
|
|
1125
|
+
long_press(start_box='[x1, y1, x2, y2]', time='')
|
|
1126
|
+
type(content='')
|
|
1127
|
+
scroll(direction='down or up or right or left')
|
|
1128
|
+
open_app(app_name='')
|
|
1129
|
+
navigate_back()
|
|
1130
|
+
navigate_home()
|
|
1131
|
+
WAIT()
|
|
1132
|
+
finished() # Submit the task regardless of whether it succeeds or fails.
|
|
1133
|
+
|
|
1134
|
+
## Note
|
|
1135
|
+
- Use Chinese in \`Action_Summary\` part.
|
|
1136
|
+
|
|
1137
|
+
## User Instruction
|
|
1138
|
+
`;
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
// src/ai-model/prompt/element-inspector.ts
|
|
1142
|
+
function systemPromptToFindElement() {
|
|
1143
|
+
if (_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MATCH_BY_POSITION)) {
|
|
1144
|
+
return systemPromptToFindElementPosition();
|
|
1145
|
+
}
|
|
1146
|
+
return `
|
|
1147
|
+
## Role:
|
|
1148
|
+
You are an expert in software page image (2D) and page element text analysis.
|
|
1149
|
+
|
|
1150
|
+
## Objective:
|
|
1151
|
+
- Identify elements in screenshots and text that match the user's description.
|
|
1152
|
+
- Return JSON data containing the selection reason and element ID.
|
|
1153
|
+
|
|
1154
|
+
## Skills:
|
|
1155
|
+
- Image analysis and recognition
|
|
1156
|
+
- Multilingual text understanding
|
|
1157
|
+
- Software UI design and testing
|
|
1158
|
+
|
|
1159
|
+
## Workflow:
|
|
1160
|
+
1. Receive the user's element description, screenshot, and element description information. Note that the text may contain non-English characters (e.g., Chinese), indicating that the application may be non-English.
|
|
1161
|
+
2. Based on the user's description, locate the target element ID in the list of element descriptions and the screenshot.
|
|
1162
|
+
3. Found the required number of elements
|
|
1163
|
+
4. Return JSON data containing the selection reason and element ID.
|
|
1164
|
+
|
|
1165
|
+
## Constraints:
|
|
1166
|
+
- Strictly adhere to the specified location when describing the required element; do not select elements from other locations.
|
|
1167
|
+
- Elements in the image with NodeType other than "TEXT Node" have been highlighted to identify the element among multiple non-text elements.
|
|
1168
|
+
- Accurately identify element information based on the user's description and return the corresponding element ID from the element description information, not extracted from the image.
|
|
1169
|
+
- If no elements are found, the "elements" array should be empty.
|
|
1170
|
+
- The returned data must conform to the specified JSON format.
|
|
1171
|
+
- The returned value id information must use the id from element info (important: **use id not indexId, id is hash content**)
|
|
1172
|
+
|
|
1173
|
+
## Output Format:
|
|
1174
|
+
|
|
1175
|
+
Please return the result in JSON format as follows:
|
|
1176
|
+
|
|
1177
|
+
\`\`\`json
|
|
1178
|
+
{
|
|
1179
|
+
"elements": [
|
|
1180
|
+
// If no matching elements are found, return an empty array []
|
|
1181
|
+
{
|
|
1182
|
+
"reason": "PLACEHOLDER", // The thought process for finding the element, replace PLACEHOLDER with your thought process
|
|
1183
|
+
"text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
|
|
1184
|
+
"id": "PLACEHOLDER" // Replace PLACEHOLDER with the ID (important: **use id not indexId, id is hash content**) of elementInfo
|
|
1185
|
+
}
|
|
1186
|
+
// More elements...
|
|
1187
|
+
],
|
|
1188
|
+
"errors": [] // Array of strings containing any error messages
|
|
1189
|
+
}
|
|
1190
|
+
\`\`\`
|
|
1191
|
+
|
|
1192
|
+
## Example:
|
|
1193
|
+
Example 1:
|
|
1194
|
+
Input Example:
|
|
1195
|
+
\`\`\`json
|
|
1196
|
+
// Description: "Shopping cart icon in the upper right corner"
|
|
1197
|
+
{
|
|
1198
|
+
"description": "PLACEHOLDER", // Description of the target element
|
|
1199
|
+
"screenshot": "path/screenshot.png",
|
|
1200
|
+
"text": '{
|
|
1201
|
+
"pageSize": {
|
|
1202
|
+
"width": 400, // Width of the page
|
|
1203
|
+
"height": 905 // Height of the page
|
|
1204
|
+
},
|
|
1205
|
+
"elementInfos": [
|
|
1206
|
+
{
|
|
1207
|
+
"id": "1231", // ID of the element
|
|
1208
|
+
"indexId": "0", // Index of the element,The image is labeled to the left of the element
|
|
1209
|
+
"attributes": { // Attributes of the element
|
|
1210
|
+
"nodeType": "IMG Node", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node
|
|
1211
|
+
"src": "https://ap-southeast-3.m",
|
|
1212
|
+
"class": ".img"
|
|
1213
|
+
},
|
|
1214
|
+
"content": "", // Text content of the element
|
|
1215
|
+
"rect": {
|
|
1216
|
+
"left": 280, // Distance from the left side of the page
|
|
1217
|
+
"top": 8, // Distance from the top of the page
|
|
1218
|
+
"width": 44, // Width of the element
|
|
1219
|
+
"height": 44 // Height of the element
|
|
1220
|
+
}
|
|
1221
|
+
},
|
|
1222
|
+
{
|
|
1223
|
+
"id": "66551", // ID of the element
|
|
1224
|
+
"indexId": "1", // Index of the element,The image is labeled to the left of the element
|
|
1225
|
+
"attributes": { // Attributes of the element
|
|
1226
|
+
"nodeType": "IMG Node", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node
|
|
1227
|
+
"src": "data:image/png;base64,iVBORw0KGgoAAAANSU...",
|
|
1228
|
+
"class": ".icon"
|
|
1229
|
+
},
|
|
1230
|
+
"content": "", // Text content of the element
|
|
1231
|
+
"rect": {
|
|
1232
|
+
"left": 350, // Distance from the left side of the page
|
|
1233
|
+
"top": 16, // Distance from the top of the page
|
|
1234
|
+
"width": 25, // Width of the element
|
|
1235
|
+
"height": 25 // Height of the element
|
|
1236
|
+
}
|
|
1237
|
+
},
|
|
1238
|
+
...
|
|
1239
|
+
{
|
|
1240
|
+
"id": "12344",
|
|
1241
|
+
"indexId": "2", // Index of the element,The image is labeled to the left of the element
|
|
1242
|
+
"attributes": {
|
|
1243
|
+
"nodeType": "TEXT Node",
|
|
1244
|
+
"class": ".product-name"
|
|
1245
|
+
},
|
|
1246
|
+
"center": [
|
|
1247
|
+
288,
|
|
1248
|
+
834
|
|
1249
|
+
],
|
|
1250
|
+
"content": "Mango Drink",
|
|
1251
|
+
"rect": {
|
|
1252
|
+
"left": 188,
|
|
1253
|
+
"top": 827,
|
|
1254
|
+
"width": 199,
|
|
1255
|
+
"height": 13
|
|
1256
|
+
}
|
|
1257
|
+
},
|
|
1258
|
+
...
|
|
1259
|
+
]
|
|
1260
|
+
}
|
|
1261
|
+
'
|
|
1262
|
+
}
|
|
1263
|
+
\`\`\`
|
|
1264
|
+
Output Example:
|
|
1265
|
+
\`\`\`json
|
|
1266
|
+
{
|
|
1267
|
+
"elements": [
|
|
1268
|
+
{
|
|
1269
|
+
// Describe the reason for finding this element, replace with actual value in practice
|
|
1270
|
+
"reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
|
|
1271
|
+
"text": "",
|
|
1272
|
+
// ID(**use id not indexId**) of this element, replace with actual value in practice, **use id not indexId**
|
|
1273
|
+
"id": "1231"
|
|
1274
|
+
}
|
|
1275
|
+
],
|
|
1276
|
+
"errors": []
|
|
1277
|
+
}
|
|
1278
|
+
\`\`\`
|
|
1279
|
+
|
|
1280
|
+
`;
|
|
1281
|
+
}
|
|
1282
|
+
var findElementSchema = {
|
|
1283
|
+
type: "json_schema",
|
|
1284
|
+
json_schema: {
|
|
1285
|
+
name: "find_elements",
|
|
1286
|
+
strict: true,
|
|
1287
|
+
schema: {
|
|
1288
|
+
type: "object",
|
|
1289
|
+
properties: {
|
|
1290
|
+
elements: {
|
|
1291
|
+
type: "array",
|
|
1292
|
+
items: {
|
|
1293
|
+
type: "object",
|
|
1294
|
+
properties: {
|
|
1295
|
+
reason: {
|
|
1296
|
+
type: "string",
|
|
1297
|
+
description: "Reason for finding this element"
|
|
1298
|
+
},
|
|
1299
|
+
text: {
|
|
1300
|
+
type: "string",
|
|
1301
|
+
description: "Text content of the element"
|
|
1302
|
+
},
|
|
1303
|
+
id: {
|
|
1304
|
+
type: "string",
|
|
1305
|
+
description: "ID of this element"
|
|
1306
|
+
}
|
|
1307
|
+
},
|
|
1308
|
+
required: ["reason", "text", "id"],
|
|
1309
|
+
additionalProperties: false
|
|
1310
|
+
},
|
|
1311
|
+
description: "List of found elements"
|
|
1312
|
+
},
|
|
1313
|
+
errors: {
|
|
1314
|
+
type: "array",
|
|
1315
|
+
items: {
|
|
1316
|
+
type: "string"
|
|
1317
|
+
},
|
|
1318
|
+
description: "List of error messages, if any"
|
|
1319
|
+
}
|
|
1320
|
+
},
|
|
1321
|
+
required: ["elements", "errors"],
|
|
1322
|
+
additionalProperties: false
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
};
|
|
1326
|
+
var findElementPrompt = new (0, _prompts.PromptTemplate)({
|
|
1327
|
+
template: `
|
|
1328
|
+
Here is the item user want to find. Just go ahead:
|
|
1329
|
+
=====================================
|
|
1330
|
+
{{
|
|
1331
|
+
"description": "{targetElementDescription}",
|
|
1332
|
+
"multi": {multi}
|
|
1333
|
+
}}
|
|
1334
|
+
=====================================
|
|
1335
|
+
|
|
1336
|
+
pageDescription: {pageDescription}
|
|
1337
|
+
`,
|
|
1338
|
+
inputVariables: ["pageDescription", "targetElementDescription", "multi"]
|
|
1339
|
+
});
|
|
1340
|
+
|
|
1341
|
+
// src/ai-model/prompt/planning.ts
|
|
1342
|
+
|
|
1343
|
+
|
|
1344
|
+
// src/ai-model/prompt/util.ts
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
// src/image/index.ts
|
|
1348
|
+
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
|
|
1352
|
+
|
|
1353
|
+
|
|
1354
|
+
|
|
1355
|
+
|
|
1356
|
+
var _img = require('@midscene/shared/img');
|
|
1357
|
+
|
|
1358
|
+
// src/ai-model/prompt/util.ts
|
|
1359
|
+
|
|
1360
|
+
var _constants = require('@midscene/shared/constants');
|
|
1361
|
+
|
|
1362
|
+
var characteristic = "You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.";
|
|
1363
|
+
var contextFormatIntro = `
|
|
1364
|
+
The user will give you a screenshot and some of the texts on it. There may be some none-English characters (like Chinese) on it, indicating it's an non-English app. If some text is shown on screenshot but not introduced by the JSON description, use the information you see on screenshot.`;
|
|
1365
|
+
var ONE_ELEMENT_LOCATOR_PREFIX = "LOCATE_ONE_ELEMENT";
|
|
1366
|
+
var ELEMENTS_LOCATOR_PREFIX = "LOCATE_ONE_OR_MORE_ELEMENTS";
|
|
1367
|
+
var SECTION_MATCHER_FLAG = "SECTION_MATCHER_FLAG/";
|
|
1368
|
+
function systemPromptToExtract() {
|
|
1369
|
+
return `
|
|
1370
|
+
You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.
|
|
1371
|
+
The user will give you a screenshot and the contents of it. There may be some none-English characters (like Chinese) on it, indicating it's an non-English app.
|
|
1372
|
+
|
|
1373
|
+
You have the following skills:
|
|
1374
|
+
|
|
1375
|
+
skill name: extract_data_from_UI
|
|
1376
|
+
related input: DATA_DEMAND
|
|
1377
|
+
skill content:
|
|
1378
|
+
* User will give you some data requirements in DATA_DEMAND. Consider the UI context, follow the user's instructions, and provide comprehensive data accordingly.
|
|
1379
|
+
* There may be some special commands in DATA_DEMAND, please pay extra attention
|
|
1380
|
+
- LOCATE_ONE_ELEMENT and LOCATE_ONE_OR_MORE_ELEMENTS: if you see a description that mentions the keyword LOCATE_ONE_ELEMENT
|
|
1381
|
+
- LOCATE_ONE_OR_MORE_ELEMENTS(e.g. follow LOCATE_ONE_ELEMENT : i want to find ...), it means user wants to locate a specific element meets the description.
|
|
1382
|
+
|
|
1383
|
+
Return in this way: prefix + the id / comma-separated ids, for example: LOCATE_ONE_ELEMENT/1 , LOCATE_ONE_OR_MORE_ELEMENTS/1,2,3 . If not found, keep the prefix and leave the suffix empty, like LOCATE_ONE_ELEMENT/ .
|
|
1384
|
+
|
|
1385
|
+
Return in the following JSON format:
|
|
1386
|
+
{
|
|
1387
|
+
language: "en", // "en" or "zh", the language of the page. Use the same language to describe section name, description, and similar fields.
|
|
1388
|
+
data: any, // the extracted data from extract_data_from_UI skill. Make sure both the value and scheme meet the DATA_DEMAND.
|
|
1389
|
+
errors: [], // string[], error message if any
|
|
1390
|
+
}
|
|
1391
|
+
`;
|
|
1392
|
+
}
|
|
1393
|
+
var extractDataPrompt = new (0, _prompts.PromptTemplate)({
|
|
1394
|
+
template: `
|
|
1395
|
+
pageDescription: {pageDescription}
|
|
1396
|
+
|
|
1397
|
+
Use your extract_data_from_UI skill to find the following data, placing it in the \`data\` field
|
|
1398
|
+
DATA_DEMAND start:
|
|
1399
|
+
=====================================
|
|
1400
|
+
{dataKeys}
|
|
1401
|
+
|
|
1402
|
+
{dataQuery}
|
|
1403
|
+
|
|
1404
|
+
=====================================
|
|
1405
|
+
DATA_DEMAND ends.
|
|
1406
|
+
`,
|
|
1407
|
+
inputVariables: ["pageDescription", "dataKeys", "dataQuery"]
|
|
1408
|
+
});
|
|
1409
|
+
function systemPromptToAssert() {
|
|
1410
|
+
return `
|
|
1411
|
+
${characteristic}
|
|
1412
|
+
${contextFormatIntro}
|
|
1413
|
+
|
|
1414
|
+
Based on the information you get, Return assertion judgment:
|
|
1415
|
+
|
|
1416
|
+
Return in the following JSON format:
|
|
1417
|
+
{
|
|
1418
|
+
thought: string, // string, the thought of the assertion. Should in the same language as the assertion.
|
|
1419
|
+
pass: true, // true or false, whether the assertion is passed
|
|
1420
|
+
}
|
|
1421
|
+
`;
|
|
1422
|
+
}
|
|
1423
|
+
var assertSchema = {
|
|
1424
|
+
type: "json_schema",
|
|
1425
|
+
json_schema: {
|
|
1426
|
+
name: "assert",
|
|
1427
|
+
strict: true,
|
|
1428
|
+
schema: {
|
|
1429
|
+
type: "object",
|
|
1430
|
+
properties: {
|
|
1431
|
+
thought: {
|
|
1432
|
+
type: "string",
|
|
1433
|
+
description: "The thought process behind the assertion"
|
|
1434
|
+
},
|
|
1435
|
+
pass: {
|
|
1436
|
+
type: "boolean",
|
|
1437
|
+
description: "Whether the assertion passed or failed"
|
|
1438
|
+
}
|
|
1439
|
+
},
|
|
1440
|
+
required: ["thought", "pass"],
|
|
1441
|
+
additionalProperties: false
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1444
|
+
};
|
|
1445
|
+
function describeSize(size) {
|
|
1446
|
+
return `${size.width} x ${size.height}`;
|
|
1447
|
+
}
|
|
1448
|
+
function truncateText(text, maxLength = 100) {
|
|
1449
|
+
if (text && text.length > maxLength) {
|
|
1450
|
+
return `${text.slice(0, maxLength)}...`;
|
|
1451
|
+
}
|
|
1452
|
+
if (typeof text === "string") {
|
|
1453
|
+
return text.trim();
|
|
1454
|
+
}
|
|
1455
|
+
return "";
|
|
1456
|
+
}
|
|
1457
|
+
function elementByPositionWithElementInfo(elementsInfo, position) {
|
|
1458
|
+
_assert2.default.call(void 0, typeof position !== "undefined", "position is required for query");
|
|
1459
|
+
const item = elementsInfo.find((item2) => {
|
|
1460
|
+
return item2.rect.left <= position.x && position.x <= item2.rect.left + item2.rect.width && item2.rect.top <= position.y && position.y <= item2.rect.top + item2.rect.height;
|
|
1461
|
+
});
|
|
1462
|
+
return item;
|
|
1463
|
+
}
|
|
1464
|
+
var samplePageDescription = `
|
|
1465
|
+
The size of the page: 1280 x 720
|
|
1466
|
+
Some of the elements are marked with a rectangle in the screenshot, some are not.
|
|
1467
|
+
|
|
1468
|
+
JSON description of all the elements in screenshot:
|
|
1469
|
+
id=c81c4e9a33: {
|
|
1470
|
+
"markerId": 2, // The number indicated by the rectangle label in the screenshot
|
|
1471
|
+
"attributes": // Attributes of the element
|
|
1472
|
+
{"data-id":"@submit s0","class":".gh-search","aria-label":"搜索","nodeType":"IMG", "src": "image_url"},
|
|
1473
|
+
"rect": { "left": 16, "top": 378, "width": 89, "height": 16 } // Position of the element in the page
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
id=5a29bf6419bd: {
|
|
1477
|
+
"content": "获取优惠券",
|
|
1478
|
+
"attributes": { "nodeType": "TEXT" },
|
|
1479
|
+
"rect": { "left": 32, "top": 332, "width": 70, "height": 18 }
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
...many more`;
|
|
1483
|
+
async function describeUserPage(context, opt) {
|
|
1484
|
+
const { screenshotBase64 } = context;
|
|
1485
|
+
let width;
|
|
1486
|
+
let height;
|
|
1487
|
+
if (context.size) {
|
|
1488
|
+
({ width, height } = context.size);
|
|
1489
|
+
} else {
|
|
1490
|
+
const imgSize = await _img.imageInfoOfBase64.call(void 0, screenshotBase64);
|
|
1491
|
+
({ width, height } = imgSize);
|
|
1492
|
+
}
|
|
1493
|
+
const elementsInfo = context.content;
|
|
1494
|
+
const idElementMap = {};
|
|
1495
|
+
elementsInfo.forEach((item) => {
|
|
1496
|
+
idElementMap[item.id] = item;
|
|
1497
|
+
if (item.indexId) {
|
|
1498
|
+
idElementMap[item.indexId] = item;
|
|
1499
|
+
}
|
|
1500
|
+
return { ...item };
|
|
1501
|
+
});
|
|
1502
|
+
const elementInfosDescription = cropFieldInformation(
|
|
1503
|
+
elementsInfo,
|
|
1504
|
+
opt == null ? void 0 : opt.truncateTextLength,
|
|
1505
|
+
opt == null ? void 0 : opt.filterNonTextContent
|
|
1506
|
+
);
|
|
1507
|
+
const contentList = elementInfosDescription.map((item) => {
|
|
1508
|
+
const { id, ...rest } = item;
|
|
1509
|
+
return `id=${id}: ${JSON.stringify(rest)}`;
|
|
1510
|
+
}).join("\n\n");
|
|
1511
|
+
const pageJSONDescription = _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MATCH_BY_POSITION) ? "" : `Some of the elements are marked with a rectangle in the screenshot, some are not.
|
|
1512
|
+
Json description of all the page elements:
|
|
1513
|
+
${contentList}`;
|
|
1514
|
+
const sizeDescription = describeSize({ width, height });
|
|
1515
|
+
return {
|
|
1516
|
+
description: `The size of the page: ${sizeDescription}
|
|
1517
|
+
${pageJSONDescription}`,
|
|
1518
|
+
elementById(id) {
|
|
1519
|
+
_assert2.default.call(void 0, typeof id !== "undefined", "id is required for query");
|
|
1520
|
+
const item = idElementMap[`${id}`];
|
|
1521
|
+
return item;
|
|
1522
|
+
},
|
|
1523
|
+
elementByPosition(position, size) {
|
|
1524
|
+
console.log("elementByPosition", { position, size });
|
|
1525
|
+
return elementByPositionWithElementInfo(elementsInfo, position);
|
|
1526
|
+
},
|
|
1527
|
+
insertElementByPosition(position) {
|
|
1528
|
+
const rect = {
|
|
1529
|
+
left: Math.max(position.x - 4, 0),
|
|
1530
|
+
top: Math.max(position.y - 4, 0),
|
|
1531
|
+
width: 8,
|
|
1532
|
+
height: 8
|
|
1533
|
+
};
|
|
1534
|
+
const id = _utils.generateHashId.call(void 0, rect);
|
|
1535
|
+
const element = {
|
|
1536
|
+
id,
|
|
1537
|
+
attributes: { nodeType: _constants.NodeType.POSITION },
|
|
1538
|
+
rect,
|
|
1539
|
+
content: "",
|
|
1540
|
+
center: [position.x, position.y]
|
|
1541
|
+
};
|
|
1542
|
+
elementsInfo.push(element);
|
|
1543
|
+
idElementMap[id] = element;
|
|
1544
|
+
return element;
|
|
1545
|
+
},
|
|
1546
|
+
size: { width, height }
|
|
1547
|
+
};
|
|
1548
|
+
}
|
|
1549
|
+
function cropFieldInformation(elementsInfo, truncateTextLength, filterNonTextContent = false) {
|
|
1550
|
+
const elementInfosDescription = elementsInfo.map(
|
|
1551
|
+
(item) => {
|
|
1552
|
+
const { id, attributes = {}, rect, content } = item;
|
|
1553
|
+
let htmlTagName = "";
|
|
1554
|
+
const tailorContent = truncateText(content, truncateTextLength);
|
|
1555
|
+
const tailorAttributes = Object.keys(attributes).reduce(
|
|
1556
|
+
(res, currentKey) => {
|
|
1557
|
+
const attributeVal = attributes[currentKey];
|
|
1558
|
+
if (currentKey === "style" || currentKey === "src")
|
|
1559
|
+
return res;
|
|
1560
|
+
if (currentKey === "nodeType") {
|
|
1561
|
+
if (!filterNonTextContent) {
|
|
1562
|
+
res[currentKey] = attributeVal.replace(/\sNode$/, "");
|
|
1563
|
+
}
|
|
1564
|
+
} else if (currentKey === "htmlTagName") {
|
|
1565
|
+
if (!["<span>", "<p>", "<div>"].includes(attributeVal)) {
|
|
1566
|
+
htmlTagName = attributeVal;
|
|
1567
|
+
}
|
|
1568
|
+
} else {
|
|
1569
|
+
res[currentKey] = truncateText(attributeVal);
|
|
1570
|
+
}
|
|
1571
|
+
return res;
|
|
1572
|
+
},
|
|
1573
|
+
{}
|
|
1574
|
+
);
|
|
1575
|
+
return {
|
|
1576
|
+
id,
|
|
1577
|
+
...filterNonTextContent || tailorContent ? {} : { markerId: item.indexId },
|
|
1578
|
+
...tailorContent ? { content: tailorContent } : {},
|
|
1579
|
+
...Object.keys(tailorAttributes).length && !tailorContent ? { attributes: tailorAttributes } : {},
|
|
1580
|
+
...htmlTagName ? { htmlTagName } : {},
|
|
1581
|
+
rect: {
|
|
1582
|
+
left: rect.left,
|
|
1583
|
+
top: rect.top,
|
|
1584
|
+
width: rect.width,
|
|
1585
|
+
height: rect.height
|
|
1586
|
+
// remove 'zoom' if it exists
|
|
1587
|
+
}
|
|
1588
|
+
};
|
|
1589
|
+
}
|
|
1590
|
+
);
|
|
1591
|
+
if (filterNonTextContent) {
|
|
1592
|
+
return elementInfosDescription.filter((item) => item.content);
|
|
1593
|
+
}
|
|
1594
|
+
return elementInfosDescription;
|
|
1595
|
+
}
|
|
1596
|
+
function retrieveElement(prompt, opt) {
|
|
1597
|
+
if (opt == null ? void 0 : opt.multi) {
|
|
1598
|
+
return `follow ${ELEMENTS_LOCATOR_PREFIX}: ${prompt}`;
|
|
1599
|
+
}
|
|
1600
|
+
return `follow ${ONE_ELEMENT_LOCATOR_PREFIX}: ${prompt}`;
|
|
1601
|
+
}
|
|
1602
|
+
function ifElementTypeResponse(response) {
|
|
1603
|
+
if (typeof response !== "string") {
|
|
1604
|
+
return false;
|
|
1605
|
+
}
|
|
1606
|
+
return response.startsWith(ONE_ELEMENT_LOCATOR_PREFIX) || response.startsWith(ELEMENTS_LOCATOR_PREFIX);
|
|
1607
|
+
}
|
|
1608
|
+
function splitElementResponse(response) {
|
|
1609
|
+
const oneElementSplitter = `${ONE_ELEMENT_LOCATOR_PREFIX}/`;
|
|
1610
|
+
if (response.startsWith(oneElementSplitter)) {
|
|
1611
|
+
const id = response.slice(oneElementSplitter.length);
|
|
1612
|
+
if (id.indexOf(",") >= 0) {
|
|
1613
|
+
console.warn(`unexpected comma in one element response: ${id}`);
|
|
1614
|
+
}
|
|
1615
|
+
return id ? id : null;
|
|
1616
|
+
}
|
|
1617
|
+
const elementsSplitter = `${ELEMENTS_LOCATOR_PREFIX}/`;
|
|
1618
|
+
if (response.startsWith(elementsSplitter)) {
|
|
1619
|
+
const idsString = response.slice(elementsSplitter.length);
|
|
1620
|
+
if (!idsString) {
|
|
1621
|
+
return [];
|
|
1622
|
+
}
|
|
1623
|
+
return idsString.split(",");
|
|
1624
|
+
}
|
|
1625
|
+
return null;
|
|
1626
|
+
}
|
|
1627
|
+
function retrieveSection(prompt) {
|
|
1628
|
+
return `${SECTION_MATCHER_FLAG}${prompt}`;
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
// src/ai-model/prompt/planning.ts
|
|
1632
|
+
var quickAnswerFormat = () => {
|
|
1633
|
+
const matchByPosition = _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MATCH_BY_POSITION);
|
|
1634
|
+
const locationFormat = {
|
|
1635
|
+
position: {
|
|
1636
|
+
description: `"position": { x: number; y: number } // Represents the position of the element; replace with actual values in practice (ensure it reflects the element's position)`,
|
|
1637
|
+
format: '"position": { x: number; y: number }',
|
|
1638
|
+
sample: '{"prompt": "the search bar" // Use language consistent with the information on the page}',
|
|
1639
|
+
locateParam: `{
|
|
1640
|
+
"prompt"?: string // the description of the element to find. It can only be omitted when locate is null.
|
|
1641
|
+
} | null // If it's not on the page, the LocateParam should be null`
|
|
1642
|
+
},
|
|
1643
|
+
id: {
|
|
1644
|
+
description: '"id": string // Represents the ID of the element; replace with actual values in practice',
|
|
1645
|
+
format: '"id": string',
|
|
1646
|
+
sample: `{"id": "c81c4e9a33", "prompt": "the search bar"}`,
|
|
1647
|
+
locateParam: `{
|
|
1648
|
+
"id": string, // the id of the element found. It should either be the id marked with a rectangle in the screenshot or the id described in the description.
|
|
1649
|
+
"prompt"?: string // the description of the element to find. It can only be omitted when locate is null.
|
|
1650
|
+
} | null // If it's not on the page, the LocateParam should be null`
|
|
1651
|
+
}
|
|
1652
|
+
};
|
|
1653
|
+
const type = matchByPosition ? "position" : "id";
|
|
1654
|
+
const format = locationFormat[type];
|
|
1655
|
+
return {
|
|
1656
|
+
description: format.description,
|
|
1657
|
+
format: format.format,
|
|
1658
|
+
sample: format.sample,
|
|
1659
|
+
locateParam: format.locateParam
|
|
1660
|
+
};
|
|
1661
|
+
};
|
|
1662
|
+
var systemTemplate = `
|
|
1663
|
+
## Role
|
|
1664
|
+
|
|
1665
|
+
You are a versatile professional in software UI automation. Your outstanding contributions will impact the user experience of billions of users.
|
|
1666
|
+
|
|
1667
|
+
## Objective
|
|
1668
|
+
|
|
1669
|
+
- Decompose the instruction user asked into a series of actions
|
|
1670
|
+
- Locate the target element if possible
|
|
1671
|
+
- If the instruction cannot be accomplished, give a further plan.
|
|
1672
|
+
|
|
1673
|
+
## Workflow
|
|
1674
|
+
|
|
1675
|
+
1. Receive the user's element description, screenshot, and instruction.
|
|
1676
|
+
2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep). The "About the action" section below will give you more details.
|
|
1677
|
+
3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
|
|
1678
|
+
4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
|
|
1679
|
+
5. Consider whether the user's instruction will be accomplished after all the actions
|
|
1680
|
+
- If yes, set \`taskWillBeAccomplished\` to true
|
|
1681
|
+
- If no, don't plan more actions by closing the array. Get ready to reevaluate the task. Some talent people like you will handle this. Give him a clear description of what have been done and what to do next. Put your new plan in the \`furtherPlan\` field. The "How to compose the \`taskWillBeAccomplished\` and \`furtherPlan\` fields" section will give you more details.
|
|
1682
|
+
|
|
1683
|
+
## Constraints
|
|
1684
|
+
|
|
1685
|
+
- All the actions you composed MUST be based on the page context information you get.
|
|
1686
|
+
- Trust the "What have been done" field about the task (if any), don't repeat actions in it.
|
|
1687
|
+
- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \`\`\`json\`.
|
|
1688
|
+
- If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
|
|
1689
|
+
|
|
1690
|
+
## About the \`actions\` field
|
|
1691
|
+
|
|
1692
|
+
### The common \`locate\` param
|
|
1693
|
+
|
|
1694
|
+
The \`locate\` param is commonly used in the \`param\` field of the action, means to locate the target element to perform the action, it follows the following scheme:
|
|
1695
|
+
|
|
1696
|
+
type LocateParam = {locateParam}
|
|
1697
|
+
|
|
1698
|
+
### Supported actions
|
|
1699
|
+
|
|
1700
|
+
Each action has a \`type\` and corresponding \`param\`. To be detailed:
|
|
1701
|
+
- type: 'Tap', tap the located element
|
|
1702
|
+
* {{ locate: {sample}, param: null }}
|
|
1703
|
+
- type: 'Hover', move mouse over to the located element
|
|
1704
|
+
* {{ locate: LocateParam, param: null }}
|
|
1705
|
+
- type: 'Input', replace the value in the input field
|
|
1706
|
+
* {{ locate: LocateParam, param: {{ value: string }} }}
|
|
1707
|
+
* \`value\` is the final required input value based on the existing input. No matter what modifications are required, just provide the final value to replace the existing input value.
|
|
1708
|
+
- type: 'KeyboardPress', press a key
|
|
1709
|
+
* {{ param: {{ value: string }} }}
|
|
1710
|
+
- type: 'Scroll', scroll up or down.
|
|
1711
|
+
* {{
|
|
1712
|
+
locate: LocateParam | null,
|
|
1713
|
+
param: {{
|
|
1714
|
+
direction: 'down'(default) | 'up' | 'right' | 'left',
|
|
1715
|
+
scrollType: 'once' (default) | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft',
|
|
1716
|
+
distance: null | number
|
|
1717
|
+
}}
|
|
1718
|
+
}}
|
|
1719
|
+
* To scroll some specific element, put the element at the center of the region in the \`locate\` field. If it's a page scroll, put \`null\` in the \`locate\` field.
|
|
1720
|
+
* \`param\` is required in this action. If some fields are not specified, use direction \`down\`, \`once\` scroll type, and \`null\` distance.
|
|
1721
|
+
- type: 'FalsyConditionStatement'
|
|
1722
|
+
* {{ param: null }}
|
|
1723
|
+
* use this action when the instruction is an "if" statement and the condition is falsy.
|
|
1724
|
+
- type: 'Sleep'
|
|
1725
|
+
* {{ param: {{ timeMs: number }} }}
|
|
1726
|
+
|
|
1727
|
+
## How to compose the \`taskWillBeAccomplished\` and \`furtherPlan\` fields ?
|
|
1728
|
+
|
|
1729
|
+
\`taskWillBeAccomplished\` is a boolean field, means whether the task will be accomplished after all the actions.
|
|
1730
|
+
|
|
1731
|
+
\`furtherPlan\` is used when the task cannot be accomplished. It follows the scheme {{ whatHaveDone: string, whatToDoNext: string }}:
|
|
1732
|
+
- \`whatHaveDone\`: a string, describe what have been done after the previous actions.
|
|
1733
|
+
- \`whatToDoNext\`: a string, describe what should be done next after the previous actions has finished. It should be a concise and clear description of the actions to be performed. Make sure you don't lose any necessary steps user asked.
|
|
1734
|
+
`;
|
|
1735
|
+
var outputTemplate = `
|
|
1736
|
+
## Output JSON Format:
|
|
1737
|
+
|
|
1738
|
+
The JSON format is as follows:
|
|
1739
|
+
|
|
1740
|
+
{{
|
|
1741
|
+
"actions": [
|
|
1742
|
+
{{
|
|
1743
|
+
"thought": "Reasons for generating this task, and why this task is feasible on this page",
|
|
1744
|
+
"type": "Tap",
|
|
1745
|
+
"param": null,
|
|
1746
|
+
"locate": {sample} | null,
|
|
1747
|
+
}},
|
|
1748
|
+
// ... more actions
|
|
1749
|
+
],
|
|
1750
|
+
"taskWillBeAccomplished": boolean,
|
|
1751
|
+
"furtherPlan": {{ "whatHaveDone": string, "whatToDoNext": string }} | null,
|
|
1752
|
+
"error"?: string
|
|
1753
|
+
}}
|
|
1754
|
+
Here is an example of how to decompose a task:
|
|
1755
|
+
|
|
1756
|
+
When a user says 'Click the language switch button, wait 1s, click "English"', the user will give you the description like this:
|
|
1757
|
+
|
|
1758
|
+
====================
|
|
1759
|
+
{pageDescription}
|
|
1760
|
+
====================
|
|
1761
|
+
|
|
1762
|
+
By viewing the page screenshot and description, you should consider this and output the JSON:
|
|
1763
|
+
|
|
1764
|
+
* The main steps should be: tap the switch button, sleep, and tap the 'English' option
|
|
1765
|
+
* The language switch button is shown in the screenshot, but it's not marked with a rectangle. So we have to use the page description to find the element. By carefully checking the context information (coordinates, attributes, content, etc.), you can find the element.
|
|
1766
|
+
* The "English" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So the last action will have a \`null\` value in the \`locate\` field.
|
|
1767
|
+
* The task cannot be accomplished (because we cannot see the "English" option now), so a \`furtherPlan\` field is needed.
|
|
1768
|
+
|
|
1769
|
+
{{
|
|
1770
|
+
"actions":[
|
|
1771
|
+
{{
|
|
1772
|
+
"type": "Tap",
|
|
1773
|
+
"thought": "Click the language switch button to open the language options.",
|
|
1774
|
+
"param": null,
|
|
1775
|
+
"locate": {sample},
|
|
1776
|
+
}},
|
|
1777
|
+
{{
|
|
1778
|
+
"type": "Sleep",
|
|
1779
|
+
"thought": "Wait for 1 second to ensure the language options are displayed.",
|
|
1780
|
+
"param": {{ "timeMs": 1000 }},
|
|
1781
|
+
}},
|
|
1782
|
+
{{
|
|
1783
|
+
"type": "Tap",
|
|
1784
|
+
"thought": "Locate the 'English' option in the language menu.",
|
|
1785
|
+
"param": null,
|
|
1786
|
+
"locate": null
|
|
1787
|
+
}},
|
|
1788
|
+
],
|
|
1789
|
+
"error": null,
|
|
1790
|
+
"taskWillBeAccomplished": false,
|
|
1791
|
+
"furtherPlan": {{
|
|
1792
|
+
"whatToDoNext": "find the 'English' option and click on it",
|
|
1793
|
+
"whatHaveDone": "Click the language switch button and wait 1s"
|
|
1794
|
+
}}
|
|
1795
|
+
}}
|
|
1796
|
+
|
|
1797
|
+
Here is another example of how to tolerate error situations only when the instruction is an "if" statement:
|
|
1798
|
+
|
|
1799
|
+
If the user says "If there is a popup, close it", you should consider this and output the JSON:
|
|
1800
|
+
|
|
1801
|
+
* By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
|
|
1802
|
+
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
|
|
1803
|
+
|
|
1804
|
+
{{
|
|
1805
|
+
"actions": [{{
|
|
1806
|
+
"type": "FalsyConditionStatement",
|
|
1807
|
+
"thought": "There is no popup on the page",
|
|
1808
|
+
"param": null
|
|
1809
|
+
}}
|
|
1810
|
+
],
|
|
1811
|
+
"taskWillBeAccomplished": true,
|
|
1812
|
+
"furtherPlan": null
|
|
1813
|
+
}}
|
|
1814
|
+
|
|
1815
|
+
For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
|
|
1816
|
+
|
|
1817
|
+
{{
|
|
1818
|
+
"actions": [],
|
|
1819
|
+
"error": "The instruction and page context are irrelevant, there is no popup on the page",
|
|
1820
|
+
"taskWillBeAccomplished": true,
|
|
1821
|
+
"furtherPlan": null
|
|
1822
|
+
}}
|
|
1823
|
+
|
|
1824
|
+
Here is an example of when task is accomplished, don't plan more actions:
|
|
1825
|
+
|
|
1826
|
+
When the user ask to "Wait 4s", you should consider this:
|
|
1827
|
+
|
|
1828
|
+
{{
|
|
1829
|
+
"actions": [
|
|
1830
|
+
{{
|
|
1831
|
+
"type": "Sleep",
|
|
1832
|
+
"thought": "Wait for 4 seconds",
|
|
1833
|
+
"param": {{ "timeMs": 4000 }},
|
|
1834
|
+
}},
|
|
1835
|
+
],
|
|
1836
|
+
"taskWillBeAccomplished": true,
|
|
1837
|
+
"furtherPlan": null // All steps have been included in the actions, so no further plan is needed
|
|
1838
|
+
}}
|
|
1839
|
+
|
|
1840
|
+
Here is an example of what NOT to do:
|
|
1841
|
+
|
|
1842
|
+
Wrong output:
|
|
1843
|
+
|
|
1844
|
+
{{
|
|
1845
|
+
"actions":[
|
|
1846
|
+
{{
|
|
1847
|
+
"type": "Tap",
|
|
1848
|
+
"thought": "Click the language switch button to open the language options.",
|
|
1849
|
+
"param": null,
|
|
1850
|
+
"locate": {{
|
|
1851
|
+
{sample}, // WRONG:prompt is missing
|
|
1852
|
+
}}
|
|
1853
|
+
}},
|
|
1854
|
+
{{
|
|
1855
|
+
"type": "Tap",
|
|
1856
|
+
"thought": "Click the English option",
|
|
1857
|
+
"param": null,
|
|
1858
|
+
"locate": null, // This means the 'English' option is not shown in the screenshot, the task cannot be accomplished
|
|
1859
|
+
}}
|
|
1860
|
+
],
|
|
1861
|
+
"taskWillBeAccomplished": false,
|
|
1862
|
+
// WRONG: should not be null
|
|
1863
|
+
"furtherPlan": null,
|
|
1864
|
+
}}
|
|
1865
|
+
|
|
1866
|
+
Reason:
|
|
1867
|
+
* The \`prompt\` is missing in the first 'Locate' action
|
|
1868
|
+
* Since the option button is not shown in the screenshot, the task cannot be accomplished, so a \`furtherPlan\` field is needed.
|
|
1869
|
+
`;
|
|
1870
|
+
async function systemPromptToTaskPlanning() {
|
|
1871
|
+
const promptTemplate = new (0, _prompts.PromptTemplate)({
|
|
1872
|
+
template: `${systemTemplate}
|
|
1873
|
+
|
|
1874
|
+
${outputTemplate}`,
|
|
1875
|
+
inputVariables: ["pageDescription", "sample", "locateParam"]
|
|
1876
|
+
});
|
|
1877
|
+
return await promptTemplate.format({
|
|
1878
|
+
pageDescription: samplePageDescription,
|
|
1879
|
+
sample: quickAnswerFormat().sample,
|
|
1880
|
+
locateParam: quickAnswerFormat().locateParam
|
|
1881
|
+
});
|
|
1882
|
+
}
|
|
1883
|
+
var planSchema = {
|
|
1884
|
+
type: "json_schema",
|
|
1885
|
+
json_schema: {
|
|
1886
|
+
name: "action_items",
|
|
1887
|
+
strict: true,
|
|
1888
|
+
schema: {
|
|
1889
|
+
type: "object",
|
|
1890
|
+
strict: true,
|
|
1891
|
+
properties: {
|
|
1892
|
+
actions: {
|
|
1893
|
+
type: "array",
|
|
1894
|
+
items: {
|
|
1895
|
+
type: "object",
|
|
1896
|
+
strict: true,
|
|
1897
|
+
properties: {
|
|
1898
|
+
thought: {
|
|
1899
|
+
type: "string",
|
|
1900
|
+
description: "Reasons for generating this task, and why this task is feasible on this page"
|
|
1901
|
+
},
|
|
1902
|
+
type: {
|
|
1903
|
+
type: "string",
|
|
1904
|
+
description: 'Type of action, like "Tap", "Hover", etc.'
|
|
1905
|
+
},
|
|
1906
|
+
param: {
|
|
1907
|
+
type: ["object", "null"],
|
|
1908
|
+
description: "Parameter of the action, can be null ONLY when the type field is Tap or Hover"
|
|
1909
|
+
},
|
|
1910
|
+
locate: {
|
|
1911
|
+
type: ["object", "null"],
|
|
1912
|
+
properties: {
|
|
1913
|
+
..._chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MATCH_BY_POSITION) ? {
|
|
1914
|
+
position: {
|
|
1915
|
+
type: "object",
|
|
1916
|
+
properties: {
|
|
1917
|
+
x: { type: "number" },
|
|
1918
|
+
y: { type: "number" }
|
|
1919
|
+
},
|
|
1920
|
+
required: ["x", "y"],
|
|
1921
|
+
additionalProperties: false
|
|
1922
|
+
}
|
|
1923
|
+
} : {
|
|
1924
|
+
id: { type: "string" }
|
|
1925
|
+
},
|
|
1926
|
+
prompt: { type: "string" }
|
|
1927
|
+
},
|
|
1928
|
+
required: [
|
|
1929
|
+
_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MATCH_BY_POSITION) ? "position" : "id",
|
|
1930
|
+
"prompt"
|
|
1931
|
+
],
|
|
1932
|
+
additionalProperties: false,
|
|
1933
|
+
description: "Location information for the target element"
|
|
1934
|
+
}
|
|
1935
|
+
},
|
|
1936
|
+
required: ["thought", "type", "param", "locate"],
|
|
1937
|
+
additionalProperties: false
|
|
1938
|
+
},
|
|
1939
|
+
description: "List of actions to be performed"
|
|
1940
|
+
},
|
|
1941
|
+
taskWillBeAccomplished: {
|
|
1942
|
+
type: "boolean",
|
|
1943
|
+
description: "Whether the task will be accomplished after the actions"
|
|
1944
|
+
},
|
|
1945
|
+
furtherPlan: {
|
|
1946
|
+
type: ["object", "null"],
|
|
1947
|
+
properties: {
|
|
1948
|
+
whatHaveDone: { type: "string" },
|
|
1949
|
+
whatToDoNext: { type: "string" }
|
|
1950
|
+
},
|
|
1951
|
+
required: ["whatHaveDone", "whatToDoNext"],
|
|
1952
|
+
additionalProperties: false,
|
|
1953
|
+
description: "Plan the task when the task cannot be accomplished"
|
|
1954
|
+
},
|
|
1955
|
+
error: {
|
|
1956
|
+
type: ["string", "null"],
|
|
1957
|
+
description: "Overall error messages"
|
|
1958
|
+
}
|
|
1959
|
+
},
|
|
1960
|
+
required: ["actions", "taskWillBeAccomplished", "furtherPlan", "error"],
|
|
1961
|
+
additionalProperties: false
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
};
|
|
1965
|
+
var generateTaskBackgroundContext = (userPrompt, originalPrompt, whatHaveDone) => {
|
|
1966
|
+
if (originalPrompt && whatHaveDone) {
|
|
1967
|
+
return `
|
|
1968
|
+
Here is the instruction:
|
|
1969
|
+
=====================================
|
|
1970
|
+
${userPrompt}
|
|
1971
|
+
=====================================
|
|
1972
|
+
|
|
1973
|
+
For your information, this is a task that some important person handed to you. Here is the original task description and what have been done after the previous actions:
|
|
1974
|
+
=====================================
|
|
1975
|
+
Original task description: ${originalPrompt}
|
|
1976
|
+
=====================================
|
|
1977
|
+
What have been done: ${whatHaveDone}
|
|
1978
|
+
=====================================
|
|
1979
|
+
`;
|
|
1980
|
+
}
|
|
1981
|
+
return `
|
|
1982
|
+
Here is the instruction:
|
|
1983
|
+
=====================================
|
|
1984
|
+
${userPrompt}
|
|
1985
|
+
=====================================
|
|
1986
|
+
`;
|
|
1987
|
+
};
|
|
1988
|
+
var automationUserPrompt = new (0, _prompts.PromptTemplate)({
|
|
1989
|
+
template: `
|
|
1990
|
+
pageDescription:
|
|
1991
|
+
=====================================
|
|
1992
|
+
{pageDescription}
|
|
1993
|
+
=====================================
|
|
1994
|
+
|
|
1995
|
+
{taskBackgroundContext}
|
|
1996
|
+
`,
|
|
1997
|
+
inputVariables: ["pageDescription", "taskBackgroundContext"]
|
|
1998
|
+
});
|
|
1999
|
+
|
|
2000
|
+
// src/ai-model/openai/index.ts
|
|
2001
|
+
function checkAIConfig(preferVendor) {
|
|
2002
|
+
if (preferVendor && preferVendor !== "openAI")
|
|
2003
|
+
return false;
|
|
2004
|
+
if (_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.OPENAI_API_KEY))
|
|
2005
|
+
return true;
|
|
2006
|
+
if (_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_USE_AZURE_OPENAI))
|
|
2007
|
+
return true;
|
|
2008
|
+
if (_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.ANTHROPIC_API_KEY))
|
|
2009
|
+
return true;
|
|
2010
|
+
return Boolean(_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON));
|
|
2011
|
+
}
|
|
2012
|
+
var defaultModel = "gpt-4o-2024-08-06";
|
|
2013
|
+
function getModelName() {
|
|
2014
|
+
let modelName = defaultModel;
|
|
2015
|
+
const nameInConfig = _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_MODEL_NAME);
|
|
2016
|
+
if (nameInConfig) {
|
|
2017
|
+
modelName = nameInConfig;
|
|
2018
|
+
}
|
|
2019
|
+
return modelName;
|
|
2020
|
+
}
|
|
2021
|
+
async function createChatClient({
|
|
2022
|
+
AIActionTypeValue
|
|
2023
|
+
}) {
|
|
2024
|
+
let openai;
|
|
2025
|
+
const extraConfig = _chunkSCNIHQKFjs.getAIConfigInJson.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
|
|
2026
|
+
const socksProxy = _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_OPENAI_SOCKS_PROXY);
|
|
2027
|
+
const socksAgent = socksProxy ? new (0, _socksproxyagent.SocksProxyAgent)(socksProxy) : void 0;
|
|
2028
|
+
if (_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.OPENAI_USE_AZURE)) {
|
|
2029
|
+
openai = new (0, _openai.AzureOpenAI)({
|
|
2030
|
+
baseURL: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.OPENAI_BASE_URL),
|
|
2031
|
+
apiKey: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.OPENAI_API_KEY),
|
|
2032
|
+
httpAgent: socksAgent,
|
|
2033
|
+
...extraConfig,
|
|
2034
|
+
dangerouslyAllowBrowser: true
|
|
2035
|
+
});
|
|
2036
|
+
} else if (_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_USE_AZURE_OPENAI)) {
|
|
2037
|
+
const extraAzureConfig = _chunkSCNIHQKFjs.getAIConfigInJson.call(void 0,
|
|
2038
|
+
_chunkSCNIHQKFjs.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
|
|
2039
|
+
);
|
|
2040
|
+
const scope = _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_AZURE_OPENAI_SCOPE);
|
|
2041
|
+
let tokenProvider = void 0;
|
|
2042
|
+
if (scope) {
|
|
2043
|
+
_assert2.default.call(void 0,
|
|
2044
|
+
!_utils.ifInBrowser,
|
|
2045
|
+
"Azure OpenAI is not supported in browser with Midscene."
|
|
2046
|
+
);
|
|
2047
|
+
const credential = new (0, _identity.DefaultAzureCredential)();
|
|
2048
|
+
_assert2.default.call(void 0, scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
|
|
2049
|
+
tokenProvider = _identity.getBearerTokenProvider.call(void 0, credential, scope);
|
|
2050
|
+
openai = new (0, _openai.AzureOpenAI)({
|
|
2051
|
+
azureADTokenProvider: tokenProvider,
|
|
2052
|
+
endpoint: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.AZURE_OPENAI_ENDPOINT),
|
|
2053
|
+
apiVersion: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.AZURE_OPENAI_API_VERSION),
|
|
2054
|
+
deployment: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.AZURE_OPENAI_DEPLOYMENT),
|
|
2055
|
+
...extraConfig,
|
|
2056
|
+
...extraAzureConfig
|
|
2057
|
+
});
|
|
2058
|
+
} else {
|
|
2059
|
+
openai = new (0, _openai.AzureOpenAI)({
|
|
2060
|
+
apiKey: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.AZURE_OPENAI_KEY),
|
|
2061
|
+
endpoint: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.AZURE_OPENAI_ENDPOINT),
|
|
2062
|
+
apiVersion: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.AZURE_OPENAI_API_VERSION),
|
|
2063
|
+
deployment: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.AZURE_OPENAI_DEPLOYMENT),
|
|
2064
|
+
dangerouslyAllowBrowser: true,
|
|
2065
|
+
...extraConfig,
|
|
2066
|
+
...extraAzureConfig
|
|
2067
|
+
});
|
|
2068
|
+
}
|
|
2069
|
+
} else if (!_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_USE_ANTHROPIC_SDK)) {
|
|
2070
|
+
openai = new (0, _openai2.default)({
|
|
2071
|
+
baseURL: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.OPENAI_BASE_URL),
|
|
2072
|
+
apiKey: _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.OPENAI_API_KEY),
|
|
2073
|
+
httpAgent: socksAgent,
|
|
2074
|
+
...extraConfig,
|
|
2075
|
+
defaultHeaders: {
|
|
2076
|
+
...(extraConfig == null ? void 0 : extraConfig.defaultHeaders) || {},
|
|
2077
|
+
[_chunkSCNIHQKFjs.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
|
|
2078
|
+
},
|
|
2079
|
+
dangerouslyAllowBrowser: true
|
|
2080
|
+
});
|
|
2081
|
+
}
|
|
2082
|
+
if (openai && _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_LANGSMITH_DEBUG)) {
|
|
2083
|
+
if (_utils.ifInBrowser) {
|
|
2084
|
+
throw new Error("langsmith is not supported in browser");
|
|
2085
|
+
}
|
|
2086
|
+
console.log("DEBUGGING MODE: langsmith wrapper enabled");
|
|
2087
|
+
const { wrapOpenAI } = await Promise.resolve().then(() => require("./wrappers-KKGZQXJL.js"));
|
|
2088
|
+
openai = wrapOpenAI(openai);
|
|
2089
|
+
}
|
|
2090
|
+
if (typeof openai !== "undefined") {
|
|
2091
|
+
return {
|
|
2092
|
+
completion: openai.chat.completions,
|
|
2093
|
+
style: "openai"
|
|
2094
|
+
};
|
|
2095
|
+
}
|
|
2096
|
+
if (_chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_USE_ANTHROPIC_SDK)) {
|
|
2097
|
+
const apiKey = _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.ANTHROPIC_API_KEY);
|
|
2098
|
+
_assert2.default.call(void 0, apiKey, "ANTHROPIC_API_KEY is required");
|
|
2099
|
+
openai = new (0, _sdk.Anthropic)({
|
|
2100
|
+
apiKey,
|
|
2101
|
+
dangerouslyAllowBrowser: true
|
|
2102
|
+
});
|
|
2103
|
+
}
|
|
2104
|
+
if (typeof openai !== "undefined" && openai.messages) {
|
|
2105
|
+
return {
|
|
2106
|
+
completion: openai.messages,
|
|
2107
|
+
style: "anthropic"
|
|
2108
|
+
};
|
|
2109
|
+
}
|
|
2110
|
+
throw new Error("Openai SDK or Anthropic SDK is not initialized");
|
|
2111
|
+
}
|
|
2112
|
+
async function call(messages, AIActionTypeValue, responseFormat) {
|
|
2113
|
+
const { completion, style } = await createChatClient({
|
|
2114
|
+
AIActionTypeValue
|
|
2115
|
+
});
|
|
2116
|
+
const shouldPrintTiming = typeof _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.MIDSCENE_DEBUG_AI_PROFILE) === "string";
|
|
2117
|
+
const maxTokens = _chunkSCNIHQKFjs.getAIConfig.call(void 0, _chunkSCNIHQKFjs.OPENAI_MAX_TOKENS);
|
|
2118
|
+
const startTime = Date.now();
|
|
2119
|
+
const model = getModelName();
|
|
2120
|
+
let content;
|
|
2121
|
+
let usage;
|
|
2122
|
+
const commonConfig = {
|
|
2123
|
+
temperature: 0.1,
|
|
2124
|
+
stream: false,
|
|
2125
|
+
max_tokens: typeof maxTokens === "number" ? maxTokens : Number.parseInt(maxTokens || "2048", 10)
|
|
2126
|
+
};
|
|
2127
|
+
if (style === "openai") {
|
|
2128
|
+
const result = await completion.create({
|
|
2129
|
+
model,
|
|
2130
|
+
messages,
|
|
2131
|
+
response_format: responseFormat,
|
|
2132
|
+
...commonConfig
|
|
2133
|
+
// betas: ['computer-use-2024-10-22'],
|
|
2134
|
+
});
|
|
2135
|
+
shouldPrintTiming && console.log(
|
|
2136
|
+
"Midscene - AI call",
|
|
2137
|
+
model,
|
|
2138
|
+
result.usage,
|
|
2139
|
+
`${Date.now() - startTime}ms`
|
|
2140
|
+
);
|
|
2141
|
+
content = result.choices[0].message.content;
|
|
2142
|
+
_assert2.default.call(void 0, content, "empty content");
|
|
2143
|
+
usage = result.usage;
|
|
2144
|
+
} else if (style === "anthropic") {
|
|
2145
|
+
const convertImageContent = (content2) => {
|
|
2146
|
+
if (content2.type === "image_url") {
|
|
2147
|
+
const imgBase64 = content2.image_url.url;
|
|
2148
|
+
_assert2.default.call(void 0, imgBase64, "image_url is required");
|
|
2149
|
+
return {
|
|
2150
|
+
source: {
|
|
2151
|
+
type: "base64",
|
|
2152
|
+
media_type: imgBase64.includes("data:image/png;base64,") ? "image/png" : "image/jpeg",
|
|
2153
|
+
data: imgBase64.split(",")[1]
|
|
2154
|
+
},
|
|
2155
|
+
type: "image"
|
|
2156
|
+
};
|
|
2157
|
+
}
|
|
2158
|
+
return content2;
|
|
2159
|
+
};
|
|
2160
|
+
const result = await completion.create({
|
|
2161
|
+
model,
|
|
2162
|
+
system: "You are a versatile professional in software UI automation",
|
|
2163
|
+
messages: messages.map((m) => ({
|
|
2164
|
+
role: "user",
|
|
2165
|
+
content: Array.isArray(m.content) ? m.content.map(convertImageContent) : m.content
|
|
2166
|
+
})),
|
|
2167
|
+
response_format: responseFormat,
|
|
2168
|
+
...commonConfig
|
|
2169
|
+
});
|
|
2170
|
+
content = result.content[0].text;
|
|
2171
|
+
_assert2.default.call(void 0, content, "empty content");
|
|
2172
|
+
usage = result.usage;
|
|
2173
|
+
}
|
|
2174
|
+
return { content: content || "", usage };
|
|
2175
|
+
}
|
|
2176
|
+
async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
2177
|
+
let responseFormat;
|
|
2178
|
+
const model = getModelName();
|
|
2179
|
+
if (model.includes("gpt-4o")) {
|
|
2180
|
+
switch (AIActionTypeValue) {
|
|
2181
|
+
case 0 /* ASSERT */:
|
|
2182
|
+
responseFormat = assertSchema;
|
|
2183
|
+
break;
|
|
2184
|
+
case 1 /* INSPECT_ELEMENT */:
|
|
2185
|
+
responseFormat = findElementSchema;
|
|
2186
|
+
break;
|
|
2187
|
+
case 2 /* EXTRACT_DATA */:
|
|
2188
|
+
responseFormat = { type: "json_object" /* JSON */ };
|
|
2189
|
+
break;
|
|
2190
|
+
case 3 /* PLAN */:
|
|
2191
|
+
responseFormat = planSchema;
|
|
2192
|
+
break;
|
|
2193
|
+
}
|
|
2194
|
+
if (model === "gpt-4o-2024-05-13" || !responseFormat) {
|
|
2195
|
+
responseFormat = { type: "json_object" /* JSON */ };
|
|
2196
|
+
}
|
|
2197
|
+
}
|
|
2198
|
+
const response = await call(messages, AIActionTypeValue, responseFormat);
|
|
2199
|
+
_assert2.default.call(void 0, response, "empty response");
|
|
2200
|
+
const jsonContent = safeParseJson(response.content);
|
|
2201
|
+
return { content: jsonContent, usage: response.usage };
|
|
2202
|
+
}
|
|
2203
|
+
function extractJSONFromCodeBlock(response) {
|
|
2204
|
+
try {
|
|
2205
|
+
const jsonMatch = response.match(/^\s*(\{[\s\S]*\})\s*$/);
|
|
2206
|
+
if (jsonMatch) {
|
|
2207
|
+
return jsonMatch[1];
|
|
2208
|
+
}
|
|
2209
|
+
const codeBlockMatch = response.match(
|
|
2210
|
+
/```(?:json)?\s*(\{[\s\S]*?\})\s*```/
|
|
2211
|
+
);
|
|
2212
|
+
if (codeBlockMatch) {
|
|
2213
|
+
return codeBlockMatch[1];
|
|
2214
|
+
}
|
|
2215
|
+
const jsonLikeMatch = response.match(/\{[\s\S]*\}/);
|
|
2216
|
+
if (jsonLikeMatch) {
|
|
2217
|
+
return jsonLikeMatch[0];
|
|
2218
|
+
}
|
|
2219
|
+
} catch (e) {
|
|
2220
|
+
}
|
|
2221
|
+
return response;
|
|
2222
|
+
}
|
|
2223
|
+
function safeParseJson(input) {
|
|
2224
|
+
var _a;
|
|
2225
|
+
const cleanJsonString = extractJSONFromCodeBlock(input);
|
|
2226
|
+
if (cleanJsonString.match(/\((\d+),(\d+)\)/)) {
|
|
2227
|
+
return (_a = cleanJsonString.match(/\((\d+),(\d+)\)/)) == null ? void 0 : _a.slice(1).map(Number);
|
|
2228
|
+
}
|
|
2229
|
+
try {
|
|
2230
|
+
return JSON.parse(cleanJsonString);
|
|
2231
|
+
} catch (e) {
|
|
2232
|
+
}
|
|
2233
|
+
try {
|
|
2234
|
+
return import_dirty_json.default.parse(cleanJsonString);
|
|
2235
|
+
} catch (e) {
|
|
2236
|
+
console.log("e:", e);
|
|
2237
|
+
}
|
|
2238
|
+
throw Error(`failed to parse json response: ${input}`);
|
|
2239
|
+
}
|
|
2240
|
+
|
|
2241
|
+
// src/ai-model/inspect.ts
|
|
2242
|
+
|
|
2243
|
+
var liteContextConfig = {
|
|
2244
|
+
filterNonTextContent: true,
|
|
2245
|
+
truncateTextLength: 200
|
|
2246
|
+
};
|
|
2247
|
+
function transformToAbsoluteCoords(relativePosition, size) {
|
|
2248
|
+
return {
|
|
2249
|
+
x: Number((relativePosition.x / 1e3 * size.width).toFixed(3)),
|
|
2250
|
+
y: Number((relativePosition.y / 1e3 * size.height).toFixed(3))
|
|
2251
|
+
};
|
|
2252
|
+
}
|
|
2253
|
+
async function transformElementPositionToId(aiResult, elementsInfo, size, screenshotBase64) {
|
|
2254
|
+
if (Array.isArray(aiResult)) {
|
|
2255
|
+
const relativePosition = aiResult;
|
|
2256
|
+
const absolutePosition = transformToAbsoluteCoords(
|
|
2257
|
+
{
|
|
2258
|
+
x: relativePosition[0],
|
|
2259
|
+
y: relativePosition[1]
|
|
2260
|
+
},
|
|
2261
|
+
size
|
|
2262
|
+
);
|
|
2263
|
+
const element = elementByPositionWithElementInfo(
|
|
2264
|
+
elementsInfo,
|
|
2265
|
+
absolutePosition
|
|
2266
|
+
);
|
|
2267
|
+
_assert2.default.call(void 0,
|
|
2268
|
+
element,
|
|
2269
|
+
`inspect: no id found with position: ${JSON.stringify({ absolutePosition })}`
|
|
2270
|
+
);
|
|
2271
|
+
return {
|
|
2272
|
+
errors: [],
|
|
2273
|
+
elements: [
|
|
2274
|
+
{
|
|
2275
|
+
id: element.id
|
|
2276
|
+
}
|
|
2277
|
+
]
|
|
2278
|
+
};
|
|
2279
|
+
}
|
|
2280
|
+
return {
|
|
2281
|
+
errors: aiResult.errors,
|
|
2282
|
+
elements: aiResult.elements
|
|
2283
|
+
};
|
|
2284
|
+
}
|
|
2285
|
+
function getQuickAnswer(quickAnswer, elementsInfo, elementById, insertElementByPosition) {
|
|
2286
|
+
if (!quickAnswer) {
|
|
2287
|
+
return void 0;
|
|
2288
|
+
}
|
|
2289
|
+
if ("id" in quickAnswer && quickAnswer.id && elementById(quickAnswer.id)) {
|
|
2290
|
+
return {
|
|
2291
|
+
parseResult: {
|
|
2292
|
+
elements: [quickAnswer],
|
|
2293
|
+
errors: []
|
|
2294
|
+
},
|
|
2295
|
+
rawResponse: quickAnswer,
|
|
2296
|
+
elementById
|
|
2297
|
+
};
|
|
2298
|
+
}
|
|
2299
|
+
if ("position" in quickAnswer && quickAnswer.position) {
|
|
2300
|
+
let element = elementByPositionWithElementInfo(
|
|
2301
|
+
elementsInfo,
|
|
2302
|
+
quickAnswer.position
|
|
2303
|
+
);
|
|
2304
|
+
if (!element) {
|
|
2305
|
+
element = insertElementByPosition(quickAnswer.position);
|
|
2306
|
+
}
|
|
2307
|
+
return {
|
|
2308
|
+
parseResult: {
|
|
2309
|
+
elements: [element],
|
|
2310
|
+
errors: []
|
|
2311
|
+
},
|
|
2312
|
+
rawResponse: quickAnswer,
|
|
2313
|
+
elementById
|
|
2314
|
+
};
|
|
2315
|
+
}
|
|
2316
|
+
}
|
|
2317
|
+
async function AiInspectElement(options) {
|
|
2318
|
+
const { context, multi, targetElementDescription, callAI } = options;
|
|
2319
|
+
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
|
2320
|
+
const { description, elementById, insertElementByPosition, size } = await describeUserPage(context);
|
|
2321
|
+
const quickAnswer = getQuickAnswer(
|
|
2322
|
+
options.quickAnswer,
|
|
2323
|
+
context.content,
|
|
2324
|
+
elementById,
|
|
2325
|
+
insertElementByPosition
|
|
2326
|
+
);
|
|
2327
|
+
if (quickAnswer) {
|
|
2328
|
+
return quickAnswer;
|
|
2329
|
+
}
|
|
2330
|
+
_assert2.default.call(void 0,
|
|
2331
|
+
targetElementDescription,
|
|
2332
|
+
"cannot find the target element description"
|
|
2333
|
+
);
|
|
2334
|
+
const userInstructionPrompt = await findElementPrompt.format({
|
|
2335
|
+
pageDescription: description,
|
|
2336
|
+
targetElementDescription,
|
|
2337
|
+
multi
|
|
2338
|
+
});
|
|
2339
|
+
const systemPrompt = systemPromptToFindElement();
|
|
2340
|
+
const msgs = [
|
|
2341
|
+
{ role: "system", content: systemPrompt },
|
|
2342
|
+
{
|
|
2343
|
+
role: "user",
|
|
2344
|
+
content: [
|
|
2345
|
+
{
|
|
2346
|
+
type: "image_url",
|
|
2347
|
+
image_url: {
|
|
2348
|
+
url: screenshotBase64WithElementMarker || screenshotBase64,
|
|
2349
|
+
detail: "high"
|
|
2350
|
+
}
|
|
2351
|
+
},
|
|
2352
|
+
{
|
|
2353
|
+
type: "text",
|
|
2354
|
+
text: userInstructionPrompt
|
|
2355
|
+
}
|
|
2356
|
+
]
|
|
2357
|
+
}
|
|
2358
|
+
];
|
|
2359
|
+
const callAIFn = callAI || callToGetJSONObject;
|
|
2360
|
+
const res = await callAIFn(msgs, 1 /* INSPECT_ELEMENT */);
|
|
2361
|
+
return {
|
|
2362
|
+
parseResult: await transformElementPositionToId(
|
|
2363
|
+
res.content,
|
|
2364
|
+
context.content,
|
|
2365
|
+
size,
|
|
2366
|
+
screenshotBase64
|
|
2367
|
+
),
|
|
2368
|
+
rawResponse: res.content,
|
|
2369
|
+
elementById,
|
|
2370
|
+
usage: res.usage
|
|
2371
|
+
};
|
|
2372
|
+
}
|
|
2373
|
+
async function AiExtractElementInfo(options) {
|
|
2374
|
+
const { dataQuery, context } = options;
|
|
2375
|
+
const systemPrompt = systemPromptToExtract();
|
|
2376
|
+
const { screenshotBase64 } = context;
|
|
2377
|
+
const { description, elementById } = await describeUserPage(
|
|
2378
|
+
context,
|
|
2379
|
+
liteContextConfig
|
|
2380
|
+
);
|
|
2381
|
+
let dataKeys = "";
|
|
2382
|
+
let dataQueryText = "";
|
|
2383
|
+
if (typeof dataQuery === "string") {
|
|
2384
|
+
dataKeys = "";
|
|
2385
|
+
dataQueryText = dataQuery;
|
|
2386
|
+
} else {
|
|
2387
|
+
dataKeys = `return in key-value style object, keys are ${Object.keys(dataQuery).join(",")}`;
|
|
2388
|
+
dataQueryText = JSON.stringify(dataQuery, null, 2);
|
|
2389
|
+
}
|
|
2390
|
+
const extractDataPromptText = await extractDataPrompt.format({
|
|
2391
|
+
pageDescription: description,
|
|
2392
|
+
dataKeys,
|
|
2393
|
+
dataQuery: dataQueryText
|
|
2394
|
+
});
|
|
2395
|
+
const msgs = [
|
|
2396
|
+
{ role: "system", content: systemPrompt },
|
|
2397
|
+
{
|
|
2398
|
+
role: "user",
|
|
2399
|
+
content: [
|
|
2400
|
+
{
|
|
2401
|
+
type: "image_url",
|
|
2402
|
+
image_url: {
|
|
2403
|
+
url: screenshotBase64,
|
|
2404
|
+
detail: "high"
|
|
2405
|
+
}
|
|
2406
|
+
},
|
|
2407
|
+
{
|
|
2408
|
+
type: "text",
|
|
2409
|
+
text: extractDataPromptText
|
|
2410
|
+
}
|
|
2411
|
+
]
|
|
2412
|
+
}
|
|
2413
|
+
];
|
|
2414
|
+
const result = await callAiFn(
|
|
2415
|
+
msgs,
|
|
2416
|
+
2 /* EXTRACT_DATA */
|
|
2417
|
+
);
|
|
2418
|
+
return {
|
|
2419
|
+
parseResult: result.content,
|
|
2420
|
+
elementById,
|
|
2421
|
+
usage: result.usage
|
|
2422
|
+
};
|
|
2423
|
+
}
|
|
2424
|
+
async function AiAssert(options) {
|
|
2425
|
+
const { assertion, context } = options;
|
|
2426
|
+
_assert2.default.call(void 0, assertion, "assertion should be a string");
|
|
2427
|
+
const { screenshotBase64 } = context;
|
|
2428
|
+
const { description } = await describeUserPage(context, liteContextConfig);
|
|
2429
|
+
const systemPrompt = systemPromptToAssert();
|
|
2430
|
+
const msgs = [
|
|
2431
|
+
{ role: "system", content: systemPrompt },
|
|
2432
|
+
{
|
|
2433
|
+
role: "user",
|
|
2434
|
+
content: [
|
|
2435
|
+
{
|
|
2436
|
+
type: "image_url",
|
|
2437
|
+
image_url: {
|
|
2438
|
+
url: screenshotBase64,
|
|
2439
|
+
detail: "high"
|
|
2440
|
+
}
|
|
2441
|
+
},
|
|
2442
|
+
{
|
|
2443
|
+
type: "text",
|
|
2444
|
+
text: `
|
|
2445
|
+
pageDescription:
|
|
2446
|
+
|
|
2447
|
+
${description}
|
|
2448
|
+
Here is the description of the assertion. Just go ahead:
|
|
2449
|
+
=====================================
|
|
2450
|
+
${assertion}
|
|
2451
|
+
=====================================
|
|
2452
|
+
`
|
|
2453
|
+
}
|
|
2454
|
+
]
|
|
2455
|
+
}
|
|
2456
|
+
];
|
|
2457
|
+
const { content: assertResult, usage } = await callAiFn(
|
|
2458
|
+
msgs,
|
|
2459
|
+
0 /* ASSERT */
|
|
2460
|
+
);
|
|
2461
|
+
return {
|
|
2462
|
+
content: assertResult,
|
|
2463
|
+
usage
|
|
2464
|
+
};
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
// src/ai-model/automation.ts
|
|
2468
|
+
|
|
2469
|
+
async function plan(userPrompt, opts) {
|
|
2470
|
+
const { callAI, context } = opts || {};
|
|
2471
|
+
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
|
2472
|
+
const { description: pageDescription, elementByPosition } = await describeUserPage(context);
|
|
2473
|
+
const systemPrompt = await systemPromptToTaskPlanning();
|
|
2474
|
+
const taskBackgroundContextText = generateTaskBackgroundContext(
|
|
2475
|
+
userPrompt,
|
|
2476
|
+
opts.originalPrompt,
|
|
2477
|
+
opts.whatHaveDone
|
|
2478
|
+
);
|
|
2479
|
+
const userInstructionPrompt = await automationUserPrompt.format({
|
|
2480
|
+
pageDescription,
|
|
2481
|
+
taskBackgroundContext: taskBackgroundContextText
|
|
2482
|
+
});
|
|
2483
|
+
const msgs = [
|
|
2484
|
+
{ role: "system", content: systemPrompt },
|
|
2485
|
+
{
|
|
2486
|
+
role: "user",
|
|
2487
|
+
content: [
|
|
2488
|
+
{
|
|
2489
|
+
type: "image_url",
|
|
2490
|
+
image_url: {
|
|
2491
|
+
url: screenshotBase64WithElementMarker || screenshotBase64,
|
|
2492
|
+
detail: "high"
|
|
2493
|
+
}
|
|
2494
|
+
},
|
|
2495
|
+
{
|
|
2496
|
+
type: "text",
|
|
2497
|
+
text: userInstructionPrompt
|
|
2498
|
+
}
|
|
2499
|
+
]
|
|
2500
|
+
}
|
|
2501
|
+
];
|
|
2502
|
+
const call3 = callAI || callAiFn;
|
|
2503
|
+
const { content, usage } = await call3(msgs, 3 /* PLAN */);
|
|
2504
|
+
const planFromAI = content;
|
|
2505
|
+
const actions = (planFromAI == null ? void 0 : planFromAI.actions) || [];
|
|
2506
|
+
_assert2.default.call(void 0, planFromAI, "can't get plans from AI");
|
|
2507
|
+
_assert2.default.call(void 0,
|
|
2508
|
+
actions.length > 0,
|
|
2509
|
+
`Failed to plan actions: ${planFromAI.error || "(no error details)"}`
|
|
2510
|
+
);
|
|
2511
|
+
return planFromAI;
|
|
2512
|
+
}
|
|
2513
|
+
|
|
2514
|
+
// src/ai-model/prompt/ui-tars-planning.ts
|
|
2515
|
+
var uiTarsPlanningPrompt = `
|
|
2516
|
+
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
|
|
2517
|
+
|
|
2518
|
+
## Output Format
|
|
2519
|
+
|
|
2520
|
+
\`\`\`
|
|
2521
|
+
Thought: ...
|
|
2522
|
+
Action: ...
|
|
2523
|
+
\`\`\`
|
|
2524
|
+
|
|
2525
|
+
## Action Space
|
|
2526
|
+
click(start_box='[x1, y1, x2, y2]')
|
|
2527
|
+
left_double(start_box='[x1, y1, x2, y2]')
|
|
2528
|
+
right_single(start_box='[x1, y1, x2, y2]')
|
|
2529
|
+
drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
|
|
2530
|
+
hotkey(key='')
|
|
2531
|
+
type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.
|
|
2532
|
+
scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
|
|
2533
|
+
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
|
2534
|
+
finished()
|
|
2535
|
+
call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.
|
|
2536
|
+
|
|
2537
|
+
## Note
|
|
2538
|
+
- Use Chinese in \`Thought\` part.
|
|
2539
|
+
- Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
|
|
2540
|
+
|
|
2541
|
+
## User Instruction
|
|
2542
|
+
`;
|
|
2543
|
+
var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
|
|
2544
|
+
function parseActionFromVlm(text, factor = 1e3, mode = "bc") {
|
|
2545
|
+
let reflection = null;
|
|
2546
|
+
let thought = null;
|
|
2547
|
+
let actionStr = "";
|
|
2548
|
+
text = text.trim();
|
|
2549
|
+
if (mode === "bc") {
|
|
2550
|
+
if (text.startsWith("Thought:")) {
|
|
2551
|
+
const thoughtMatch = text.match(/Thought: (.+?)(?=\s*Action:|$)/);
|
|
2552
|
+
if (thoughtMatch) {
|
|
2553
|
+
thought = thoughtMatch[1].trim();
|
|
2554
|
+
}
|
|
2555
|
+
} else if (text.startsWith("Reflection:")) {
|
|
2556
|
+
const reflectionMatch = text.match(
|
|
2557
|
+
/Reflection: (.+?)Action_Summary: (.+?)(?=\s*Action:|$)/
|
|
2558
|
+
);
|
|
2559
|
+
if (reflectionMatch) {
|
|
2560
|
+
thought = reflectionMatch[2].trim();
|
|
2561
|
+
reflection = reflectionMatch[1].trim();
|
|
2562
|
+
}
|
|
2563
|
+
} else if (text.startsWith("Action_Summary:")) {
|
|
2564
|
+
const summaryMatch = text.match(/Action_Summary: (.+?)(?=\s*Action:|$)/);
|
|
2565
|
+
if (summaryMatch) {
|
|
2566
|
+
thought = summaryMatch[1].trim();
|
|
2567
|
+
}
|
|
2568
|
+
}
|
|
2569
|
+
if (!text.includes("Action:")) {
|
|
2570
|
+
actionStr = text;
|
|
2571
|
+
} else {
|
|
2572
|
+
const actionParts = text.split("Action:");
|
|
2573
|
+
actionStr = actionParts[actionParts.length - 1];
|
|
2574
|
+
}
|
|
2575
|
+
} else if (mode === "o1") {
|
|
2576
|
+
const thoughtMatch = text.match(/<Thought>\s*(.*?)\s*<\/Thought>/);
|
|
2577
|
+
const actionSummaryMatch = text.match(
|
|
2578
|
+
/\nAction_Summary:\s*(.*?)\s*Action:/
|
|
2579
|
+
);
|
|
2580
|
+
const actionMatch = text.match(/\nAction:\s*(.*?)\s*<\/Output>/);
|
|
2581
|
+
const thoughtContent = thoughtMatch ? thoughtMatch[1] : null;
|
|
2582
|
+
const actionSummaryContent = actionSummaryMatch ? actionSummaryMatch[1] : null;
|
|
2583
|
+
const actionContent = actionMatch ? actionMatch[1] : null;
|
|
2584
|
+
thought = `${thoughtContent}
|
|
2585
|
+
<Action_Summary>
|
|
2586
|
+
${actionSummaryContent}`;
|
|
2587
|
+
actionStr = actionContent || "";
|
|
2588
|
+
}
|
|
2589
|
+
const allActions = actionStr.split("\n\n");
|
|
2590
|
+
const actions = [];
|
|
2591
|
+
for (const rawStr of allActions) {
|
|
2592
|
+
const actionInstance = parseAction(rawStr.replace(/\n/g, "\\n").trim());
|
|
2593
|
+
if (!actionInstance) {
|
|
2594
|
+
console.log(`Action can't parse: ${rawStr}`);
|
|
2595
|
+
continue;
|
|
2596
|
+
}
|
|
2597
|
+
const actionType = actionInstance.function;
|
|
2598
|
+
const params = actionInstance.args;
|
|
2599
|
+
const actionInputs = {};
|
|
2600
|
+
for (const [paramName, param] of Object.entries(params)) {
|
|
2601
|
+
if (!param)
|
|
2602
|
+
continue;
|
|
2603
|
+
const trimmedParam = param.trim();
|
|
2604
|
+
actionInputs[paramName.trim()] = trimmedParam;
|
|
2605
|
+
if (paramName.includes("start_box") || paramName.includes("end_box")) {
|
|
2606
|
+
const oriBox = trimmedParam;
|
|
2607
|
+
const numbers = oriBox.replace(/[()]/g, "").split(",");
|
|
2608
|
+
const floatNumbers = numbers.map(
|
|
2609
|
+
(num) => Number.parseFloat(num) / factor
|
|
2610
|
+
);
|
|
2611
|
+
if (floatNumbers.length === 2) {
|
|
2612
|
+
floatNumbers.push(floatNumbers[0], floatNumbers[1]);
|
|
2613
|
+
}
|
|
2614
|
+
actionInputs[paramName.trim()] = JSON.stringify(floatNumbers);
|
|
2615
|
+
}
|
|
2616
|
+
}
|
|
2617
|
+
if (actionType === "finished") {
|
|
2618
|
+
actions.push({
|
|
2619
|
+
reflection,
|
|
2620
|
+
thought,
|
|
2621
|
+
action_type: "finished",
|
|
2622
|
+
action_inputs: {}
|
|
2623
|
+
});
|
|
2624
|
+
} else {
|
|
2625
|
+
actions.push({
|
|
2626
|
+
reflection,
|
|
2627
|
+
thought,
|
|
2628
|
+
action_type: actionType,
|
|
2629
|
+
action_inputs: actionInputs
|
|
2630
|
+
});
|
|
2631
|
+
}
|
|
2632
|
+
}
|
|
2633
|
+
return actions;
|
|
2634
|
+
}
|
|
2635
|
+
function parseAction(actionStr) {
|
|
2636
|
+
try {
|
|
2637
|
+
const functionPattern = /^(\w+)\((.*)\)$/;
|
|
2638
|
+
const match = actionStr.trim().match(functionPattern);
|
|
2639
|
+
if (!match) {
|
|
2640
|
+
throw new Error("Not a function call");
|
|
2641
|
+
}
|
|
2642
|
+
const [_, functionName, argsStr] = match;
|
|
2643
|
+
const kwargs = {};
|
|
2644
|
+
if (argsStr.trim()) {
|
|
2645
|
+
const argPairs = argsStr.match(/([^,']|'[^']*')+/g) || [];
|
|
2646
|
+
for (const pair of argPairs) {
|
|
2647
|
+
const [key, ...valueParts] = pair.split("=");
|
|
2648
|
+
if (!key)
|
|
2649
|
+
continue;
|
|
2650
|
+
const value = valueParts.join("=").trim().replace(/^['"]|['"]$/g, "");
|
|
2651
|
+
kwargs[key.trim()] = value;
|
|
2652
|
+
}
|
|
2653
|
+
}
|
|
2654
|
+
return {
|
|
2655
|
+
function: functionName,
|
|
2656
|
+
args: kwargs
|
|
2657
|
+
};
|
|
2658
|
+
} catch (e) {
|
|
2659
|
+
console.error(`Failed to parse action '${actionStr}': ${e}`);
|
|
2660
|
+
return null;
|
|
2661
|
+
}
|
|
2662
|
+
}
|
|
2663
|
+
|
|
2664
|
+
// src/ai-model/vlm-planning.ts
|
|
2665
|
+
function capitalize(str) {
|
|
2666
|
+
return str.charAt(0).toUpperCase() + str.slice(1);
|
|
2667
|
+
}
|
|
2668
|
+
async function vlmPlanning(options) {
|
|
2669
|
+
const { conversationHistory, userInstruction, size } = options;
|
|
2670
|
+
const systemPrompt = uiTarsPlanningPrompt + userInstruction;
|
|
2671
|
+
const res = await call(
|
|
2672
|
+
[
|
|
2673
|
+
{
|
|
2674
|
+
role: "user",
|
|
2675
|
+
content: systemPrompt
|
|
2676
|
+
},
|
|
2677
|
+
...conversationHistory
|
|
2678
|
+
],
|
|
2679
|
+
1 /* INSPECT_ELEMENT */
|
|
2680
|
+
);
|
|
2681
|
+
const actions = parseActionFromVlm(res.content);
|
|
2682
|
+
const transformActions = [];
|
|
2683
|
+
actions.forEach((action) => {
|
|
2684
|
+
if (action.action_type === "click") {
|
|
2685
|
+
const point = getPoint(action.action_inputs.start_box, size);
|
|
2686
|
+
transformActions.push({
|
|
2687
|
+
type: "Locate",
|
|
2688
|
+
locate: {
|
|
2689
|
+
prompt: action.thought || "",
|
|
2690
|
+
position: { x: point[0], y: point[1] }
|
|
2691
|
+
},
|
|
2692
|
+
param: {
|
|
2693
|
+
// action,
|
|
2694
|
+
// position: { x: point[0], y: point[1] },
|
|
2695
|
+
}
|
|
2696
|
+
});
|
|
2697
|
+
transformActions.push({
|
|
2698
|
+
type: "Tap",
|
|
2699
|
+
locate: {
|
|
2700
|
+
prompt: action.thought || "",
|
|
2701
|
+
position: { x: point[0], y: point[1] }
|
|
2702
|
+
},
|
|
2703
|
+
param: action.thought || ""
|
|
2704
|
+
});
|
|
2705
|
+
} else if (action.action_type === "type") {
|
|
2706
|
+
transformActions.push({
|
|
2707
|
+
type: "Input",
|
|
2708
|
+
param: {
|
|
2709
|
+
value: action.action_inputs.content
|
|
2710
|
+
},
|
|
2711
|
+
locate: null,
|
|
2712
|
+
thought: action.thought || ""
|
|
2713
|
+
});
|
|
2714
|
+
} else if (action.action_type === "scroll") {
|
|
2715
|
+
transformActions.push({
|
|
2716
|
+
type: "Scroll",
|
|
2717
|
+
param: {
|
|
2718
|
+
direction: action.action_inputs.direction
|
|
2719
|
+
},
|
|
2720
|
+
locate: null,
|
|
2721
|
+
thought: action.thought || ""
|
|
2722
|
+
});
|
|
2723
|
+
} else if (action.action_type === "finished") {
|
|
2724
|
+
transformActions.push({
|
|
2725
|
+
type: "Finished",
|
|
2726
|
+
param: {},
|
|
2727
|
+
locate: null,
|
|
2728
|
+
thought: action.thought || ""
|
|
2729
|
+
});
|
|
2730
|
+
} else if (action.action_type === "hotkey") {
|
|
2731
|
+
const keys = action.action_inputs.key.split(",");
|
|
2732
|
+
for (const key of keys) {
|
|
2733
|
+
transformActions.push({
|
|
2734
|
+
type: "KeyboardPress",
|
|
2735
|
+
param: {
|
|
2736
|
+
value: capitalize(key)
|
|
2737
|
+
},
|
|
2738
|
+
locate: null,
|
|
2739
|
+
thought: action.thought || ""
|
|
2740
|
+
});
|
|
2741
|
+
}
|
|
2742
|
+
} else if (action.action_type === "wait") {
|
|
2743
|
+
transformActions.push({
|
|
2744
|
+
type: "Sleep",
|
|
2745
|
+
param: {
|
|
2746
|
+
timeMs: action.action_inputs.time
|
|
2747
|
+
},
|
|
2748
|
+
locate: null,
|
|
2749
|
+
thought: action.thought || ""
|
|
2750
|
+
});
|
|
2751
|
+
}
|
|
2752
|
+
});
|
|
2753
|
+
return {
|
|
2754
|
+
actions: transformActions,
|
|
2755
|
+
realActions: actions,
|
|
2756
|
+
action_summary: getSummary(res.content)
|
|
2757
|
+
};
|
|
2758
|
+
}
|
|
2759
|
+
function getPoint(startBox, size) {
|
|
2760
|
+
const [x, y] = JSON.parse(startBox);
|
|
2761
|
+
return [x * size.width, y * size.height];
|
|
2762
|
+
}
|
|
2763
|
+
|
|
2764
|
+
|
|
2765
|
+
|
|
2766
|
+
|
|
2767
|
+
|
|
2768
|
+
|
|
2769
|
+
|
|
2770
|
+
|
|
2771
|
+
|
|
2772
|
+
|
|
2773
|
+
|
|
2774
|
+
|
|
2775
|
+
|
|
2776
|
+
|
|
2777
|
+
|
|
2778
|
+
|
|
2779
|
+
|
|
2780
|
+
|
|
2781
|
+
|
|
2782
|
+
exports.BaseElement = BaseElement; exports.AIResponseFormat = AIResponseFormat; exports.UIContext = UIContext; exports.systemPromptToFindElement = systemPromptToFindElement; exports.describeUserPage = describeUserPage; exports.retrieveElement = retrieveElement; exports.ifElementTypeResponse = ifElementTypeResponse; exports.splitElementResponse = splitElementResponse; exports.retrieveSection = retrieveSection; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFn = callAiFn; exports.transformElementPositionToId = transformElementPositionToId; exports.AiInspectElement = AiInspectElement; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning;
|
|
2783
|
+
/*! Bundled license information:
|
|
2784
|
+
|
|
2785
|
+
string.fromcodepoint/fromcodepoint.js:
|
|
2786
|
+
(*! http://mths.be/fromcodepoint v0.2.1 by @mathias *)
|
|
2787
|
+
|
|
2788
|
+
utf8/utf8.js:
|
|
2789
|
+
(*! https://mths.be/utf8js v3.0.0 by @mathias *)
|
|
2790
|
+
*/
|