@ai-sdk-tool/eval 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -3
- package/data/BFCL_v4_multi_turn_base.jsonl +200 -0
- package/data/BFCL_v4_multi_turn_long_context.jsonl +200 -0
- package/data/BFCL_v4_multi_turn_miss_func.jsonl +200 -0
- package/data/BFCL_v4_multi_turn_miss_param.jsonl +200 -0
- package/data/multi_turn_func_doc/gorilla_file_system.jsonl +18 -0
- package/data/multi_turn_func_doc/math_api.jsonl +17 -0
- package/data/multi_turn_func_doc/memory_kv.jsonl +15 -0
- package/data/multi_turn_func_doc/memory_rec_sum.jsonl +5 -0
- package/data/multi_turn_func_doc/memory_vector.jsonl +12 -0
- package/data/multi_turn_func_doc/message_api.jsonl +10 -0
- package/data/multi_turn_func_doc/posting_api.jsonl +14 -0
- package/data/multi_turn_func_doc/ticket_api.jsonl +9 -0
- package/data/multi_turn_func_doc/trading_bot.jsonl +20 -0
- package/data/multi_turn_func_doc/travel_booking.jsonl +18 -0
- package/data/multi_turn_func_doc/vehicle_control.jsonl +22 -0
- package/data/multi_turn_func_doc/web_search.jsonl +2 -0
- package/data/possible_answer/BFCL_v4_multi_turn_base.jsonl +200 -0
- package/data/possible_answer/BFCL_v4_multi_turn_long_context.jsonl +200 -0
- package/data/possible_answer/BFCL_v4_multi_turn_miss_func.jsonl +200 -0
- package/data/possible_answer/BFCL_v4_multi_turn_miss_param.jsonl +200 -0
- package/dist/index.cjs +4526 -62
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -1
- package/dist/index.d.ts +17 -1
- package/dist/index.js +4525 -62
- package/dist/index.js.map +1 -1
- package/package.json +5 -4
package/dist/index.cjs
CHANGED
|
@@ -30,12 +30,17 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
+
bfclMultiTurnBaseBenchmark: () => bfclMultiTurnBaseBenchmark,
|
|
34
|
+
bfclMultiTurnLongContextBenchmark: () => bfclMultiTurnLongContextBenchmark,
|
|
35
|
+
bfclMultiTurnMissFuncBenchmark: () => bfclMultiTurnMissFuncBenchmark,
|
|
36
|
+
bfclMultiTurnMissParamBenchmark: () => bfclMultiTurnMissParamBenchmark,
|
|
33
37
|
bfclMultipleBenchmark: () => bfclMultipleBenchmark,
|
|
34
38
|
bfclParallelBenchmark: () => bfclParallelBenchmark,
|
|
35
39
|
bfclParallelMultipleBenchmark: () => bfclParallelMultipleBenchmark,
|
|
36
40
|
bfclSimpleBenchmark: () => bfclSimpleBenchmark,
|
|
37
41
|
complexFuncBenchBenchmark: () => complexFuncBenchBenchmark,
|
|
38
42
|
evaluate: () => evaluate,
|
|
43
|
+
executeMultiTurnFuncCall: () => executeMultiTurnFuncCall,
|
|
39
44
|
jsonGenerationBenchmark: () => jsonGenerationBenchmark,
|
|
40
45
|
jsonGenerationSchemaOnlyBenchmark: () => jsonGenerationSchemaOnlyBenchmark
|
|
41
46
|
});
|
|
@@ -565,33 +570,33 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
|
|
|
565
570
|
);
|
|
566
571
|
}
|
|
567
572
|
};
|
|
568
|
-
const
|
|
573
|
+
const fixSchema3 = (schema) => {
|
|
569
574
|
if (!schema || typeof schema !== "object") {
|
|
570
575
|
return { type: "object", properties: {} };
|
|
571
576
|
}
|
|
572
|
-
const copy = Array.isArray(schema) ? schema.map((v) =>
|
|
577
|
+
const copy = Array.isArray(schema) ? schema.map((v) => fixSchema3(v)) : { ...schema };
|
|
573
578
|
if (!Array.isArray(copy)) {
|
|
574
579
|
fixSchemaType2(copy);
|
|
575
|
-
fixSchemaProperties(copy,
|
|
580
|
+
fixSchemaProperties(copy, fixSchema3);
|
|
576
581
|
if (copy.items) {
|
|
577
|
-
copy.items =
|
|
582
|
+
copy.items = fixSchema3(copy.items);
|
|
578
583
|
}
|
|
579
584
|
return copy;
|
|
580
585
|
}
|
|
581
586
|
return copy;
|
|
582
587
|
};
|
|
583
588
|
const flattenMessages = (messages) => Array.isArray(messages) && messages.some((m) => Array.isArray(m)) ? messages.flat(1) : messages;
|
|
584
|
-
const
|
|
589
|
+
const sanitizeName2 = (toolName) => {
|
|
585
590
|
const s = toolName.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
|
586
591
|
return s.length > 0 ? s : "tool";
|
|
587
592
|
};
|
|
588
|
-
const
|
|
593
|
+
const buildTransformedTools2 = (tools, fixSchemaFn) => {
|
|
589
594
|
const nameMap = /* @__PURE__ */ new Map();
|
|
590
595
|
const transformedTools = tools.map((t) => {
|
|
591
596
|
const fixed = fixSchemaFn(t.parameters);
|
|
592
597
|
const isObjectSchema = fixed && typeof fixed === "object" && fixed.type === "object";
|
|
593
598
|
const inputSchema = isObjectSchema ? fixed : { type: "object", properties: {} };
|
|
594
|
-
const sanitized =
|
|
599
|
+
const sanitized = sanitizeName2(t.name);
|
|
595
600
|
nameMap.set(sanitized, t.name);
|
|
596
601
|
return {
|
|
597
602
|
type: "function",
|
|
@@ -613,14 +618,14 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
|
|
|
613
618
|
return [];
|
|
614
619
|
}
|
|
615
620
|
};
|
|
616
|
-
const
|
|
621
|
+
const getSanitizedName2 = (rawName, transformedTools) => {
|
|
617
622
|
var _a, _b;
|
|
618
623
|
if (typeof rawName === "string" && NUMERIC_STRING_REGEX.test(rawName)) {
|
|
619
624
|
return (_b = (_a = transformedTools[Number(rawName)]) == null ? void 0 : _a.name) != null ? _b : rawName;
|
|
620
625
|
}
|
|
621
626
|
return rawName;
|
|
622
627
|
};
|
|
623
|
-
const
|
|
628
|
+
const parseToolArgs2 = (extractedArgs) => {
|
|
624
629
|
if (typeof extractedArgs !== "string") {
|
|
625
630
|
return extractedArgs;
|
|
626
631
|
}
|
|
@@ -630,17 +635,17 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
|
|
|
630
635
|
return extractedArgs;
|
|
631
636
|
}
|
|
632
637
|
};
|
|
633
|
-
const
|
|
638
|
+
const restoreToolCalls2 = (toolCalls, nameMap, transformedTools) => (toolCalls || []).map((c) => {
|
|
634
639
|
var _a, _b, _c, _d, _e, _f;
|
|
635
640
|
const call = c;
|
|
636
641
|
const rawName = (_a = call.toolName) != null ? _a : call.name;
|
|
637
|
-
const sanitizedFromIndex =
|
|
642
|
+
const sanitizedFromIndex = getSanitizedName2(
|
|
638
643
|
rawName,
|
|
639
644
|
transformedTools
|
|
640
645
|
);
|
|
641
646
|
const originalName = (_b = nameMap.get(sanitizedFromIndex)) != null ? _b : sanitizedFromIndex;
|
|
642
647
|
const extractedArgs = (_f = (_e = (_d = (_c = call.args) != null ? _c : call.arguments) != null ? _d : call.input) != null ? _e : call.params) != null ? _f : call.parameters;
|
|
643
|
-
const parsedArgs =
|
|
648
|
+
const parsedArgs = parseToolArgs2(extractedArgs);
|
|
644
649
|
return {
|
|
645
650
|
...call,
|
|
646
651
|
toolName: originalName,
|
|
@@ -1036,7 +1041,7 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
|
|
|
1036
1041
|
caseLogs.push(`[DEBUG] ${testCase.id}: failed to build debug diff`);
|
|
1037
1042
|
}
|
|
1038
1043
|
};
|
|
1039
|
-
const
|
|
1044
|
+
const buildToolsMap2 = (transformedTools) => Object.fromEntries(
|
|
1040
1045
|
transformedTools.map((t) => [
|
|
1041
1046
|
t.name,
|
|
1042
1047
|
(0, import_ai.tool)({
|
|
@@ -1109,11 +1114,11 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
|
|
|
1109
1114
|
const prepareTestCaseData = (testCase) => {
|
|
1110
1115
|
const { function: tools, question: messages } = testCase;
|
|
1111
1116
|
const flatMessages = flattenMessages(messages);
|
|
1112
|
-
const { transformedTools, nameMap } =
|
|
1117
|
+
const { transformedTools, nameMap } = buildTransformedTools2(
|
|
1113
1118
|
tools,
|
|
1114
|
-
|
|
1119
|
+
fixSchema3
|
|
1115
1120
|
);
|
|
1116
|
-
const toolsMap =
|
|
1121
|
+
const toolsMap = buildToolsMap2(transformedTools);
|
|
1117
1122
|
return { flatMessages, transformedTools, nameMap, toolsMap };
|
|
1118
1123
|
};
|
|
1119
1124
|
const processModelResponse = (options) => {
|
|
@@ -1160,7 +1165,7 @@ function createBfclBenchmark(name, description, testDataFile, answerDataFile) {
|
|
|
1160
1165
|
testCaseId: testCase.id,
|
|
1161
1166
|
caseLogs
|
|
1162
1167
|
});
|
|
1163
|
-
const restoredCalls =
|
|
1168
|
+
const restoredCalls = restoreToolCalls2(
|
|
1164
1169
|
toolCalls || [],
|
|
1165
1170
|
nameMap,
|
|
1166
1171
|
transformedTools
|
|
@@ -1310,11 +1315,4416 @@ var bfclParallelMultipleBenchmark = createBfclBenchmark(
|
|
|
1310
1315
|
"BFCL_v4_parallel_multiple_possible_answer.jsonl"
|
|
1311
1316
|
);
|
|
1312
1317
|
|
|
1318
|
+
// src/benchmarks/bfcl-multi-turn.ts
|
|
1319
|
+
var import_node_fs3 = require("fs");
|
|
1320
|
+
var import_node_path3 = __toESM(require("path"), 1);
|
|
1321
|
+
var import_ai2 = require("ai");
|
|
1322
|
+
|
|
1323
|
+
// src/multi-turn/classes/gorilla-file-system.ts
|
|
1324
|
+
var TRAILING_SLASHES_REGEX = /\/+$/;
|
|
1325
|
+
var PATH_TRAILING_SLASHES_REGEX = /\/+$/;
|
|
1326
|
+
var WHITESPACE_REGEX = /\s+/;
|
|
1327
|
+
var File = class {
|
|
1328
|
+
constructor(name, content = "") {
|
|
1329
|
+
this.name = name;
|
|
1330
|
+
this.content = content;
|
|
1331
|
+
}
|
|
1332
|
+
_write(newContent) {
|
|
1333
|
+
this.content = newContent;
|
|
1334
|
+
}
|
|
1335
|
+
_read() {
|
|
1336
|
+
return this.content;
|
|
1337
|
+
}
|
|
1338
|
+
};
|
|
1339
|
+
var Directory = class _Directory {
|
|
1340
|
+
constructor(name, parent = null) {
|
|
1341
|
+
this.name = name;
|
|
1342
|
+
this.parent = parent;
|
|
1343
|
+
this.contents = {};
|
|
1344
|
+
}
|
|
1345
|
+
_addFile(fileName, content = "") {
|
|
1346
|
+
this.contents[fileName] = new File(fileName, content);
|
|
1347
|
+
}
|
|
1348
|
+
_addDirectory(dirName) {
|
|
1349
|
+
this.contents[dirName] = new _Directory(dirName, this);
|
|
1350
|
+
}
|
|
1351
|
+
_getItem(itemName) {
|
|
1352
|
+
if (itemName === ".") {
|
|
1353
|
+
return this;
|
|
1354
|
+
}
|
|
1355
|
+
return this.contents[itemName] || null;
|
|
1356
|
+
}
|
|
1357
|
+
_listContents() {
|
|
1358
|
+
return Object.keys(this.contents);
|
|
1359
|
+
}
|
|
1360
|
+
};
|
|
1361
|
+
var GorillaFileSystem = class _GorillaFileSystem {
|
|
1362
|
+
constructor() {
|
|
1363
|
+
this.root = new Directory("/", null);
|
|
1364
|
+
this._currentDir = this.root;
|
|
1365
|
+
}
|
|
1366
|
+
// biome-ignore lint/suspicious/noExplicitAny: Dynamic scenario loading from JSON
|
|
1367
|
+
_loadScenario(scenario, _longContext = false) {
|
|
1368
|
+
this.root = new Directory("/", null);
|
|
1369
|
+
if (scenario == null ? void 0 : scenario.root) {
|
|
1370
|
+
const rootKeys = Object.keys(scenario.root);
|
|
1371
|
+
if (rootKeys.length > 0) {
|
|
1372
|
+
const rootDirName = rootKeys[0];
|
|
1373
|
+
const rootDir = new Directory(rootDirName, null);
|
|
1374
|
+
this.root = this._loadDirectory(
|
|
1375
|
+
scenario.root[rootDirName].contents || {},
|
|
1376
|
+
rootDir
|
|
1377
|
+
);
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
this._currentDir = this.root;
|
|
1381
|
+
}
|
|
1382
|
+
_loadDirectory(current, parent) {
|
|
1383
|
+
for (const [name, data] of Object.entries(
|
|
1384
|
+
current
|
|
1385
|
+
)) {
|
|
1386
|
+
if (data.type === "directory") {
|
|
1387
|
+
const newDir = new Directory(name, parent);
|
|
1388
|
+
const loadedDir = this._loadDirectory(data.contents || {}, newDir);
|
|
1389
|
+
parent.contents[name] = loadedDir;
|
|
1390
|
+
} else if (data.type === "file") {
|
|
1391
|
+
parent.contents[name] = new File(name, data.content || "");
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
return parent;
|
|
1395
|
+
}
|
|
1396
|
+
pwd() {
|
|
1397
|
+
const path6 = [];
|
|
1398
|
+
let dir = this._currentDir;
|
|
1399
|
+
while (dir !== null) {
|
|
1400
|
+
path6.push(dir.name);
|
|
1401
|
+
dir = dir.parent;
|
|
1402
|
+
}
|
|
1403
|
+
return { current_working_directory: `/${path6.reverse().join("/")}` };
|
|
1404
|
+
}
|
|
1405
|
+
ls(a = false) {
|
|
1406
|
+
let contents = this._currentDir._listContents();
|
|
1407
|
+
if (!a) {
|
|
1408
|
+
contents = contents.filter((item) => !item.startsWith("."));
|
|
1409
|
+
}
|
|
1410
|
+
return { current_directory_content: contents };
|
|
1411
|
+
}
|
|
1412
|
+
cd(folder) {
|
|
1413
|
+
let normalizedFolder = folder.replace(TRAILING_SLASHES_REGEX, "");
|
|
1414
|
+
if (normalizedFolder === "") {
|
|
1415
|
+
normalizedFolder = "/";
|
|
1416
|
+
}
|
|
1417
|
+
if (normalizedFolder !== "." && normalizedFolder !== ".." && normalizedFolder !== "/" && normalizedFolder.includes("/")) {
|
|
1418
|
+
return {
|
|
1419
|
+
error: `cd: ${normalizedFolder}: Unsupported path. Only one folder level at a time is supported.`
|
|
1420
|
+
};
|
|
1421
|
+
}
|
|
1422
|
+
if (normalizedFolder === "..") {
|
|
1423
|
+
if (this._currentDir.parent) {
|
|
1424
|
+
this._currentDir = this._currentDir.parent;
|
|
1425
|
+
return {};
|
|
1426
|
+
}
|
|
1427
|
+
if (this.root === this._currentDir) {
|
|
1428
|
+
return {
|
|
1429
|
+
error: "Current directory is already the root. Cannot go back."
|
|
1430
|
+
};
|
|
1431
|
+
}
|
|
1432
|
+
return { error: "cd: ..: No such directory" };
|
|
1433
|
+
}
|
|
1434
|
+
const targetDir = this._navigateToDirectory(normalizedFolder);
|
|
1435
|
+
if (targetDir && "error" in targetDir) {
|
|
1436
|
+
return targetDir;
|
|
1437
|
+
}
|
|
1438
|
+
if (targetDir instanceof Directory) {
|
|
1439
|
+
this._currentDir = targetDir;
|
|
1440
|
+
return { current_working_directory: targetDir.name };
|
|
1441
|
+
}
|
|
1442
|
+
return { error: `cd: ${normalizedFolder}: No such file or directory` };
|
|
1443
|
+
}
|
|
1444
|
+
mkdir(dir_name) {
|
|
1445
|
+
if (dir_name in this._currentDir.contents) {
|
|
1446
|
+
return {
|
|
1447
|
+
error: `mkdir: cannot create directory '${dir_name}': File exists`
|
|
1448
|
+
};
|
|
1449
|
+
}
|
|
1450
|
+
this._currentDir._addDirectory(dir_name);
|
|
1451
|
+
return null;
|
|
1452
|
+
}
|
|
1453
|
+
touch(file_name) {
|
|
1454
|
+
if (file_name in this._currentDir.contents) {
|
|
1455
|
+
return { error: `touch: cannot touch '${file_name}': File exists` };
|
|
1456
|
+
}
|
|
1457
|
+
this._currentDir._addFile(file_name);
|
|
1458
|
+
return null;
|
|
1459
|
+
}
|
|
1460
|
+
echo(content, file_name) {
|
|
1461
|
+
if (file_name === void 0 || file_name === null) {
|
|
1462
|
+
return { terminal_output: content };
|
|
1463
|
+
}
|
|
1464
|
+
if (file_name in this._currentDir.contents) {
|
|
1465
|
+
const item = this._currentDir._getItem(file_name);
|
|
1466
|
+
if (item instanceof File) {
|
|
1467
|
+
item._write(content);
|
|
1468
|
+
return null;
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
return { error: `echo: cannot write to '${file_name}': No such file` };
|
|
1472
|
+
}
|
|
1473
|
+
cat(file_name) {
|
|
1474
|
+
if (file_name in this._currentDir.contents) {
|
|
1475
|
+
const item = this._currentDir._getItem(file_name);
|
|
1476
|
+
if (item instanceof File) {
|
|
1477
|
+
return { file_content: item._read() };
|
|
1478
|
+
}
|
|
1479
|
+
return { error: `cat: '${file_name}': Is a directory` };
|
|
1480
|
+
}
|
|
1481
|
+
return { error: `cat: '${file_name}': No such file or directory` };
|
|
1482
|
+
}
|
|
1483
|
+
find(path6 = ".", name) {
|
|
1484
|
+
const targetDir = this._navigateToDirectory(path6);
|
|
1485
|
+
if (targetDir && "error" in targetDir) {
|
|
1486
|
+
const errMsg = targetDir.error || "";
|
|
1487
|
+
if (errMsg.startsWith("cd:")) {
|
|
1488
|
+
return { error: errMsg.replace("cd:", "find:") };
|
|
1489
|
+
}
|
|
1490
|
+
return targetDir;
|
|
1491
|
+
}
|
|
1492
|
+
const matches = [];
|
|
1493
|
+
const recursiveSearch = (directory, basePath) => {
|
|
1494
|
+
for (const [itemName, item] of Object.entries(directory.contents)) {
|
|
1495
|
+
const itemPath = `${basePath}/${itemName}`;
|
|
1496
|
+
if (name === void 0 || name === null || itemName.includes(name)) {
|
|
1497
|
+
matches.push(itemPath);
|
|
1498
|
+
}
|
|
1499
|
+
if (item instanceof Directory) {
|
|
1500
|
+
recursiveSearch(item, itemPath);
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
};
|
|
1504
|
+
if (targetDir instanceof Directory) {
|
|
1505
|
+
recursiveSearch(targetDir, path6.replace(PATH_TRAILING_SLASHES_REGEX, ""));
|
|
1506
|
+
}
|
|
1507
|
+
return { matches };
|
|
1508
|
+
}
|
|
1509
|
+
wc(file_name, mode = "l") {
|
|
1510
|
+
if (!["l", "w", "c"].includes(mode)) {
|
|
1511
|
+
return { error: `wc: invalid mode '${mode}'` };
|
|
1512
|
+
}
|
|
1513
|
+
if (file_name in this._currentDir.contents) {
|
|
1514
|
+
const file = this._currentDir._getItem(file_name);
|
|
1515
|
+
if (file instanceof File) {
|
|
1516
|
+
const content = file._read();
|
|
1517
|
+
if (mode === "l") {
|
|
1518
|
+
return { count: content.split("\n").length, type: "lines" };
|
|
1519
|
+
}
|
|
1520
|
+
if (mode === "w") {
|
|
1521
|
+
return {
|
|
1522
|
+
count: content.split(WHITESPACE_REGEX).filter(Boolean).length,
|
|
1523
|
+
type: "words"
|
|
1524
|
+
};
|
|
1525
|
+
}
|
|
1526
|
+
if (mode === "c") {
|
|
1527
|
+
return { count: content.length, type: "characters" };
|
|
1528
|
+
}
|
|
1529
|
+
}
|
|
1530
|
+
}
|
|
1531
|
+
return { error: `wc: ${file_name}: No such file or directory` };
|
|
1532
|
+
}
|
|
1533
|
+
sort(file_name) {
|
|
1534
|
+
if (file_name in this._currentDir.contents) {
|
|
1535
|
+
const file = this._currentDir._getItem(file_name);
|
|
1536
|
+
if (file instanceof File) {
|
|
1537
|
+
const content = file._read();
|
|
1538
|
+
const sortedContent = content.split("\n").sort().join("\n");
|
|
1539
|
+
return { sorted_content: sortedContent };
|
|
1540
|
+
}
|
|
1541
|
+
}
|
|
1542
|
+
return { error: `sort: ${file_name}: No such file or directory` };
|
|
1543
|
+
}
|
|
1544
|
+
grep(file_name, pattern) {
|
|
1545
|
+
if (file_name in this._currentDir.contents) {
|
|
1546
|
+
const file = this._currentDir._getItem(file_name);
|
|
1547
|
+
if (file instanceof File) {
|
|
1548
|
+
const content = file._read();
|
|
1549
|
+
const matchingLines = content.split("\n").filter((line) => line.includes(pattern));
|
|
1550
|
+
return { matching_lines: matchingLines };
|
|
1551
|
+
}
|
|
1552
|
+
}
|
|
1553
|
+
return { error: `grep: ${file_name}: No such file or directory` };
|
|
1554
|
+
}
|
|
1555
|
+
du(human_readable = false) {
|
|
1556
|
+
const getSize = (item) => {
|
|
1557
|
+
if (item instanceof File) {
|
|
1558
|
+
return new TextEncoder().encode(item._read()).length;
|
|
1559
|
+
}
|
|
1560
|
+
if (item instanceof Directory) {
|
|
1561
|
+
return Object.values(item.contents).reduce(
|
|
1562
|
+
(acc, child) => acc + getSize(child),
|
|
1563
|
+
0
|
|
1564
|
+
);
|
|
1565
|
+
}
|
|
1566
|
+
return 0;
|
|
1567
|
+
};
|
|
1568
|
+
const totalSize = getSize(this._currentDir);
|
|
1569
|
+
if (human_readable) {
|
|
1570
|
+
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
1571
|
+
let unitIndex = 0;
|
|
1572
|
+
let size = totalSize;
|
|
1573
|
+
while (size >= 1024 && unitIndex < units.length - 1) {
|
|
1574
|
+
size /= 1024;
|
|
1575
|
+
unitIndex++;
|
|
1576
|
+
}
|
|
1577
|
+
return { disk_usage: `${size.toFixed(2)} ${units[unitIndex]}` };
|
|
1578
|
+
}
|
|
1579
|
+
return { disk_usage: `${totalSize} bytes` };
|
|
1580
|
+
}
|
|
1581
|
+
tail(file_name, lines = 10) {
|
|
1582
|
+
if (file_name in this._currentDir.contents) {
|
|
1583
|
+
const file = this._currentDir._getItem(file_name);
|
|
1584
|
+
if (file instanceof File) {
|
|
1585
|
+
const content = file._read().split("\n");
|
|
1586
|
+
const actualLines = Math.min(lines, content.length);
|
|
1587
|
+
const lastLines = content.slice(-actualLines);
|
|
1588
|
+
return { last_lines: lastLines.join("\n") };
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
return { error: `tail: ${file_name}: No such file or directory` };
|
|
1592
|
+
}
|
|
1593
|
+
diff(file_name1, file_name2) {
|
|
1594
|
+
if (file_name1 in this._currentDir.contents && file_name2 in this._currentDir.contents) {
|
|
1595
|
+
const file1 = this._currentDir._getItem(file_name1);
|
|
1596
|
+
const file2 = this._currentDir._getItem(file_name2);
|
|
1597
|
+
if (file1 instanceof File && file2 instanceof File) {
|
|
1598
|
+
const content1 = file1._read().split("\n");
|
|
1599
|
+
const content2 = file2._read().split("\n");
|
|
1600
|
+
const diffLines = [];
|
|
1601
|
+
const maxLen = Math.max(content1.length, content2.length);
|
|
1602
|
+
for (let i = 0; i < maxLen; i++) {
|
|
1603
|
+
const line1 = content1[i] || "";
|
|
1604
|
+
const line2 = content2[i] || "";
|
|
1605
|
+
if (line1 !== line2) {
|
|
1606
|
+
diffLines.push(`- ${line1}
|
|
1607
|
+
+ ${line2}`);
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
return { diff_lines: diffLines.join("\n") };
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
return {
|
|
1614
|
+
error: `diff: ${file_name1} or ${file_name2}: No such file or directory`
|
|
1615
|
+
};
|
|
1616
|
+
}
|
|
1617
|
+
mv(source, destination) {
|
|
1618
|
+
if (!(source in this._currentDir.contents)) {
|
|
1619
|
+
return {
|
|
1620
|
+
error: `mv: cannot move '${source}': No such file or directory`
|
|
1621
|
+
};
|
|
1622
|
+
}
|
|
1623
|
+
const item = this._currentDir._getItem(source);
|
|
1624
|
+
if (!(item instanceof File || item instanceof Directory)) {
|
|
1625
|
+
return { error: `mv: cannot move '${source}': Not a file or directory` };
|
|
1626
|
+
}
|
|
1627
|
+
if (destination.includes("/")) {
|
|
1628
|
+
return {
|
|
1629
|
+
error: "mv: path not allowed in destination. Provide only a file or directory name."
|
|
1630
|
+
};
|
|
1631
|
+
}
|
|
1632
|
+
if (destination in this._currentDir.contents) {
|
|
1633
|
+
const destItem = this._currentDir._getItem(destination);
|
|
1634
|
+
if (destItem instanceof Directory) {
|
|
1635
|
+
if (source in destItem.contents) {
|
|
1636
|
+
return {
|
|
1637
|
+
error: `mv: cannot move '${source}' to '${destination}/${source}': File exists`
|
|
1638
|
+
};
|
|
1639
|
+
}
|
|
1640
|
+
delete this._currentDir.contents[source];
|
|
1641
|
+
if (item instanceof File) {
|
|
1642
|
+
destItem._addFile(source, item.content);
|
|
1643
|
+
} else {
|
|
1644
|
+
destItem._addDirectory(source);
|
|
1645
|
+
destItem.contents[source].contents = item.contents;
|
|
1646
|
+
}
|
|
1647
|
+
return { result: `'${source}' moved to '${destination}/${source}'` };
|
|
1648
|
+
}
|
|
1649
|
+
return {
|
|
1650
|
+
error: `mv: cannot move '${source}' to '${destination}': Not a directory`
|
|
1651
|
+
};
|
|
1652
|
+
}
|
|
1653
|
+
delete this._currentDir.contents[source];
|
|
1654
|
+
if (item instanceof File) {
|
|
1655
|
+
this._currentDir._addFile(destination, item.content);
|
|
1656
|
+
} else {
|
|
1657
|
+
this._currentDir._addDirectory(destination);
|
|
1658
|
+
this._currentDir.contents[destination].contents = item.contents;
|
|
1659
|
+
}
|
|
1660
|
+
return { result: `'${source}' moved to '${destination}'` };
|
|
1661
|
+
}
|
|
1662
|
+
rm(file_name) {
|
|
1663
|
+
if (file_name in this._currentDir.contents) {
|
|
1664
|
+
delete this._currentDir.contents[file_name];
|
|
1665
|
+
return { result: `'${file_name}' removed` };
|
|
1666
|
+
}
|
|
1667
|
+
return {
|
|
1668
|
+
error: `rm: cannot remove '${file_name}': No such file or directory`
|
|
1669
|
+
};
|
|
1670
|
+
}
|
|
1671
|
+
rmdir(dir_name) {
|
|
1672
|
+
if (dir_name in this._currentDir.contents) {
|
|
1673
|
+
const item = this._currentDir._getItem(dir_name);
|
|
1674
|
+
if (item instanceof Directory) {
|
|
1675
|
+
if (Object.keys(item.contents).length > 0) {
|
|
1676
|
+
return {
|
|
1677
|
+
error: `rmdir: cannot remove '${dir_name}': Directory not empty`
|
|
1678
|
+
};
|
|
1679
|
+
}
|
|
1680
|
+
delete this._currentDir.contents[dir_name];
|
|
1681
|
+
return { result: `'${dir_name}' removed` };
|
|
1682
|
+
}
|
|
1683
|
+
return { error: `rmdir: cannot remove '${dir_name}': Not a directory` };
|
|
1684
|
+
}
|
|
1685
|
+
return {
|
|
1686
|
+
error: `rmdir: cannot remove '${dir_name}': No such file or directory`
|
|
1687
|
+
};
|
|
1688
|
+
}
|
|
1689
|
+
cp(source, destination) {
|
|
1690
|
+
if (!(source in this._currentDir.contents)) {
|
|
1691
|
+
return {
|
|
1692
|
+
error: `cp: cannot copy '${source}': No such file or directory`
|
|
1693
|
+
};
|
|
1694
|
+
}
|
|
1695
|
+
const item = this._currentDir._getItem(source);
|
|
1696
|
+
if (!(item instanceof File || item instanceof Directory)) {
|
|
1697
|
+
return { error: `cp: cannot copy '${source}': Not a file or directory` };
|
|
1698
|
+
}
|
|
1699
|
+
if (destination.includes("/")) {
|
|
1700
|
+
return {
|
|
1701
|
+
error: "cp: path not allowed in destination. Provide only a file or directory name."
|
|
1702
|
+
};
|
|
1703
|
+
}
|
|
1704
|
+
if (destination in this._currentDir.contents) {
|
|
1705
|
+
const destItem = this._currentDir._getItem(destination);
|
|
1706
|
+
if (destItem instanceof Directory) {
|
|
1707
|
+
if (source in destItem.contents) {
|
|
1708
|
+
return {
|
|
1709
|
+
error: `cp: cannot copy '${source}' to '${destination}/${source}': File exists`
|
|
1710
|
+
};
|
|
1711
|
+
}
|
|
1712
|
+
if (item instanceof File) {
|
|
1713
|
+
destItem._addFile(source, item.content);
|
|
1714
|
+
} else {
|
|
1715
|
+
destItem._addDirectory(source);
|
|
1716
|
+
destItem.contents[source].contents = {
|
|
1717
|
+
...item.contents
|
|
1718
|
+
};
|
|
1719
|
+
}
|
|
1720
|
+
return { result: `'${source}' copied to '${destination}/${source}'` };
|
|
1721
|
+
}
|
|
1722
|
+
return {
|
|
1723
|
+
error: `cp: cannot copy '${source}' to '${destination}': Not a directory`
|
|
1724
|
+
};
|
|
1725
|
+
}
|
|
1726
|
+
if (item instanceof File) {
|
|
1727
|
+
this._currentDir._addFile(destination, item.content);
|
|
1728
|
+
} else {
|
|
1729
|
+
this._currentDir._addDirectory(destination);
|
|
1730
|
+
this._currentDir.contents[destination].contents = {
|
|
1731
|
+
...item.contents
|
|
1732
|
+
};
|
|
1733
|
+
}
|
|
1734
|
+
return { result: `'${source}' copied to '${destination}'` };
|
|
1735
|
+
}
|
|
1736
|
+
_navigateToDirectory(path6) {
|
|
1737
|
+
if (path6 === null || path6 === ".") {
|
|
1738
|
+
return this._currentDir;
|
|
1739
|
+
}
|
|
1740
|
+
if (path6 === "/") {
|
|
1741
|
+
return this.root;
|
|
1742
|
+
}
|
|
1743
|
+
const dirs = path6.replace(/^\/+|\/+$/g, "").split("/");
|
|
1744
|
+
let tempDir = path6.startsWith("/") ? this.root : this._currentDir;
|
|
1745
|
+
for (const dirName of dirs) {
|
|
1746
|
+
const nextDir = tempDir._getItem(dirName);
|
|
1747
|
+
if (nextDir instanceof Directory) {
|
|
1748
|
+
tempDir = nextDir;
|
|
1749
|
+
} else {
|
|
1750
|
+
return { error: `cd: '${path6}': No such file or directory` };
|
|
1751
|
+
}
|
|
1752
|
+
}
|
|
1753
|
+
return tempDir;
|
|
1754
|
+
}
|
|
1755
|
+
equals(other) {
|
|
1756
|
+
if (!(other instanceof _GorillaFileSystem)) {
|
|
1757
|
+
return false;
|
|
1758
|
+
}
|
|
1759
|
+
return JSON.stringify(this.root) === JSON.stringify(other.root);
|
|
1760
|
+
}
|
|
1761
|
+
};
|
|
1762
|
+
|
|
1763
|
+
// src/multi-turn/classes/math-api.ts
|
|
1764
|
+
var import_decimal = require("decimal.js");
|
|
1765
|
+
var MathAPI = class {
|
|
1766
|
+
logarithm(value, base, precision) {
|
|
1767
|
+
try {
|
|
1768
|
+
import_decimal.Decimal.set({ precision });
|
|
1769
|
+
const result = new import_decimal.Decimal(value).ln().div(new import_decimal.Decimal(base).ln());
|
|
1770
|
+
return { result: result.toNumber() };
|
|
1771
|
+
} catch (e) {
|
|
1772
|
+
return { error: String(e) };
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
mean(numbers) {
|
|
1776
|
+
if (!numbers.length) {
|
|
1777
|
+
return { error: "Cannot calculate mean of an empty list" };
|
|
1778
|
+
}
|
|
1779
|
+
try {
|
|
1780
|
+
return { result: numbers.reduce((a, b) => a + b, 0) / numbers.length };
|
|
1781
|
+
} catch (_e) {
|
|
1782
|
+
return { error: "All elements in the list must be numbers" };
|
|
1783
|
+
}
|
|
1784
|
+
}
|
|
1785
|
+
standardDeviation(numbers) {
|
|
1786
|
+
if (!numbers.length) {
|
|
1787
|
+
return { error: "Cannot calculate standard deviation of an empty list" };
|
|
1788
|
+
}
|
|
1789
|
+
try {
|
|
1790
|
+
const mean = numbers.reduce((a, b) => a + b, 0) / numbers.length;
|
|
1791
|
+
const variance = numbers.reduce((sum, x) => sum + (x - mean) ** 2, 0) / numbers.length;
|
|
1792
|
+
return { result: Math.sqrt(variance) };
|
|
1793
|
+
} catch (_e) {
|
|
1794
|
+
return { error: "All elements in the list must be numbers" };
|
|
1795
|
+
}
|
|
1796
|
+
}
|
|
1797
|
+
siUnitConversion(value, unitIn, unitOut) {
|
|
1798
|
+
const toMeters = {
|
|
1799
|
+
km: 1e3,
|
|
1800
|
+
m: 1,
|
|
1801
|
+
cm: 0.01,
|
|
1802
|
+
mm: 1e-3,
|
|
1803
|
+
um: 1e-6,
|
|
1804
|
+
nm: 1e-9
|
|
1805
|
+
};
|
|
1806
|
+
const fromMeters = Object.fromEntries(
|
|
1807
|
+
Object.entries(toMeters).map(([k, v]) => [k, 1 / v])
|
|
1808
|
+
);
|
|
1809
|
+
if (typeof value !== "number") {
|
|
1810
|
+
return { error: "Value must be a number" };
|
|
1811
|
+
}
|
|
1812
|
+
if (!(unitIn in toMeters && unitOut in fromMeters)) {
|
|
1813
|
+
return {
|
|
1814
|
+
error: `Conversion from '${unitIn}' to '${unitOut}' is not supported`
|
|
1815
|
+
};
|
|
1816
|
+
}
|
|
1817
|
+
try {
|
|
1818
|
+
const valueInMeters = value * toMeters[unitIn];
|
|
1819
|
+
const result = valueInMeters * fromMeters[unitOut];
|
|
1820
|
+
return { result };
|
|
1821
|
+
} catch (_e) {
|
|
1822
|
+
return { error: "Conversion resulted in a value too large to represent" };
|
|
1823
|
+
}
|
|
1824
|
+
}
|
|
1825
|
+
imperialSiConversion(value, unitIn, unitOut) {
|
|
1826
|
+
const conversion = {
|
|
1827
|
+
cm_to_in: 0.393701,
|
|
1828
|
+
in_to_cm: 2.54,
|
|
1829
|
+
m_to_ft: 3.28084,
|
|
1830
|
+
ft_to_m: 0.3048,
|
|
1831
|
+
m_to_yd: 1.09361,
|
|
1832
|
+
yd_to_m: 0.9144,
|
|
1833
|
+
km_to_miles: 0.621371,
|
|
1834
|
+
miles_to_km: 1.60934,
|
|
1835
|
+
kg_to_lb: 2.20462,
|
|
1836
|
+
lb_to_kg: 0.453592,
|
|
1837
|
+
celsius_to_fahrenheit: 1.8,
|
|
1838
|
+
fahrenheit_to_celsius: 5 / 9
|
|
1839
|
+
};
|
|
1840
|
+
if (typeof value !== "number") {
|
|
1841
|
+
return { error: "Value must be a number" };
|
|
1842
|
+
}
|
|
1843
|
+
if (unitIn === unitOut) {
|
|
1844
|
+
return { result: value };
|
|
1845
|
+
}
|
|
1846
|
+
const conversionKey = `${unitIn}_to_${unitOut}`;
|
|
1847
|
+
if (!(conversionKey in conversion)) {
|
|
1848
|
+
return {
|
|
1849
|
+
error: `Conversion from '${unitIn}' to '${unitOut}' is not supported`
|
|
1850
|
+
};
|
|
1851
|
+
}
|
|
1852
|
+
try {
|
|
1853
|
+
let result;
|
|
1854
|
+
if (unitIn === "celsius" && unitOut === "fahrenheit") {
|
|
1855
|
+
result = value * conversion[conversionKey] + 32;
|
|
1856
|
+
} else if (unitIn === "fahrenheit" && unitOut === "celsius") {
|
|
1857
|
+
result = (value - 32) * conversion[conversionKey];
|
|
1858
|
+
} else {
|
|
1859
|
+
result = value * conversion[conversionKey];
|
|
1860
|
+
}
|
|
1861
|
+
return { result };
|
|
1862
|
+
} catch (_e) {
|
|
1863
|
+
return { error: "Conversion resulted in a value too large to represent" };
|
|
1864
|
+
}
|
|
1865
|
+
}
|
|
1866
|
+
add(a, b) {
|
|
1867
|
+
try {
|
|
1868
|
+
return { result: a + b };
|
|
1869
|
+
} catch (_e) {
|
|
1870
|
+
return { error: "Both inputs must be numbers" };
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
subtract(a, b) {
|
|
1874
|
+
try {
|
|
1875
|
+
return { result: a - b };
|
|
1876
|
+
} catch (_e) {
|
|
1877
|
+
return { error: "Both inputs must be numbers" };
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
multiply(a, b) {
|
|
1881
|
+
if (typeof a !== "number" || typeof b !== "number") {
|
|
1882
|
+
return { error: "Both inputs must be numbers" };
|
|
1883
|
+
}
|
|
1884
|
+
try {
|
|
1885
|
+
return { result: a * b };
|
|
1886
|
+
} catch (_e) {
|
|
1887
|
+
return { error: "Both inputs must be numbers" };
|
|
1888
|
+
}
|
|
1889
|
+
}
|
|
1890
|
+
divide(a, b) {
|
|
1891
|
+
try {
|
|
1892
|
+
if (b === 0) {
|
|
1893
|
+
return { error: "Cannot divide by zero" };
|
|
1894
|
+
}
|
|
1895
|
+
return { result: a / b };
|
|
1896
|
+
} catch (_e) {
|
|
1897
|
+
return { error: "Both inputs must be numbers" };
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
power(base, exponent) {
|
|
1901
|
+
try {
|
|
1902
|
+
return { result: base ** exponent };
|
|
1903
|
+
} catch (_e) {
|
|
1904
|
+
return { error: "Both inputs must be numbers" };
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
squareRoot(number, precision) {
|
|
1908
|
+
try {
|
|
1909
|
+
if (number < 0) {
|
|
1910
|
+
return { error: "Cannot calculate square root of a negative number" };
|
|
1911
|
+
}
|
|
1912
|
+
import_decimal.Decimal.set({ precision });
|
|
1913
|
+
const decimalNumber = new import_decimal.Decimal(number);
|
|
1914
|
+
const result = decimalNumber.sqrt();
|
|
1915
|
+
return { result: result.toNumber() };
|
|
1916
|
+
} catch (_e) {
|
|
1917
|
+
return {
|
|
1918
|
+
error: "Input must be a number or computation resulted in an invalid operation"
|
|
1919
|
+
};
|
|
1920
|
+
}
|
|
1921
|
+
}
|
|
1922
|
+
absoluteValue(number) {
|
|
1923
|
+
try {
|
|
1924
|
+
return { result: Math.abs(number) };
|
|
1925
|
+
} catch (_e) {
|
|
1926
|
+
return { error: "Input must be a number" };
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1929
|
+
roundNumber(number, decimalPlaces = 0) {
|
|
1930
|
+
try {
|
|
1931
|
+
return {
|
|
1932
|
+
result: Math.round(number * 10 ** decimalPlaces) / 10 ** decimalPlaces
|
|
1933
|
+
};
|
|
1934
|
+
} catch (_e) {
|
|
1935
|
+
return {
|
|
1936
|
+
error: "First input must be a number, second input must be an integer"
|
|
1937
|
+
};
|
|
1938
|
+
}
|
|
1939
|
+
}
|
|
1940
|
+
percentage(part, whole) {
|
|
1941
|
+
try {
|
|
1942
|
+
if (whole === 0) {
|
|
1943
|
+
return { error: "Whole value cannot be zero" };
|
|
1944
|
+
}
|
|
1945
|
+
return { result: part / whole * 100 };
|
|
1946
|
+
} catch (_e) {
|
|
1947
|
+
return { error: "Both inputs must be numbers" };
|
|
1948
|
+
}
|
|
1949
|
+
}
|
|
1950
|
+
minValue(numbers) {
|
|
1951
|
+
if (!numbers.length) {
|
|
1952
|
+
return { error: "Cannot find minimum of an empty list" };
|
|
1953
|
+
}
|
|
1954
|
+
try {
|
|
1955
|
+
return { result: Math.min(...numbers) };
|
|
1956
|
+
} catch (_e) {
|
|
1957
|
+
return { error: "All elements in the list must be numbers" };
|
|
1958
|
+
}
|
|
1959
|
+
}
|
|
1960
|
+
maxValue(numbers) {
|
|
1961
|
+
if (!numbers.length) {
|
|
1962
|
+
return { error: "Cannot find maximum of an empty list" };
|
|
1963
|
+
}
|
|
1964
|
+
try {
|
|
1965
|
+
return { result: Math.max(...numbers) };
|
|
1966
|
+
} catch (_e) {
|
|
1967
|
+
return { error: "All elements in the list must be numbers" };
|
|
1968
|
+
}
|
|
1969
|
+
}
|
|
1970
|
+
sumValues(numbers) {
|
|
1971
|
+
if (!numbers.length) {
|
|
1972
|
+
return { error: "Cannot calculate sum of an empty list" };
|
|
1973
|
+
}
|
|
1974
|
+
try {
|
|
1975
|
+
return { result: numbers.reduce((a, b) => a + b, 0) };
|
|
1976
|
+
} catch (_e) {
|
|
1977
|
+
return { error: "All elements in the list must be numbers" };
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
};
|
|
1981
|
+
|
|
1982
|
+
// src/multi-turn/classes/message-api.ts
|
|
1983
|
+
var DEFAULT_STATE = {
|
|
1984
|
+
generated_ids: [],
|
|
1985
|
+
user_count: 4,
|
|
1986
|
+
user_map: {
|
|
1987
|
+
Alice: "USR001",
|
|
1988
|
+
Bob: "USR002",
|
|
1989
|
+
Catherine: "USR003",
|
|
1990
|
+
Daniel: "USR004"
|
|
1991
|
+
},
|
|
1992
|
+
inbox: [
|
|
1993
|
+
{ USR002: "My name is Alice. I want to connect." },
|
|
1994
|
+
{ USR003: "Could you upload the file?" },
|
|
1995
|
+
{ USR004: "Could you upload the file?" }
|
|
1996
|
+
],
|
|
1997
|
+
message_count: 3,
|
|
1998
|
+
current_user: null
|
|
1999
|
+
};
|
|
2000
|
+
var MessageAPI = class _MessageAPI {
|
|
2001
|
+
constructor() {
|
|
2002
|
+
this.generatedIds = /* @__PURE__ */ new Set();
|
|
2003
|
+
this.userCount = 4;
|
|
2004
|
+
this.userMap = {};
|
|
2005
|
+
this.inbox = [];
|
|
2006
|
+
this.messageCount = 0;
|
|
2007
|
+
this.currentUser = null;
|
|
2008
|
+
}
|
|
2009
|
+
_loadScenario(scenario, _longContext = false) {
|
|
2010
|
+
const defaultCopy = JSON.parse(JSON.stringify(DEFAULT_STATE));
|
|
2011
|
+
const generatedIdsData = scenario.generated_ids || [];
|
|
2012
|
+
this.generatedIds = new Set(generatedIdsData);
|
|
2013
|
+
this.userCount = scenario.user_count || defaultCopy.user_count;
|
|
2014
|
+
this.userMap = { ...defaultCopy.user_map, ...scenario.user_map };
|
|
2015
|
+
this.inbox = scenario.inbox || defaultCopy.inbox;
|
|
2016
|
+
this.messageCount = scenario.message_count || defaultCopy.message_count;
|
|
2017
|
+
this.currentUser = scenario.current_user || defaultCopy.current_user;
|
|
2018
|
+
}
|
|
2019
|
+
equals(other) {
|
|
2020
|
+
if (!(other instanceof _MessageAPI)) {
|
|
2021
|
+
return false;
|
|
2022
|
+
}
|
|
2023
|
+
const excludeKeys = /* @__PURE__ */ new Set(["_random", "_apiDescription"]);
|
|
2024
|
+
for (const key of Object.keys(this)) {
|
|
2025
|
+
if (key.startsWith("_") || excludeKeys.has(key)) {
|
|
2026
|
+
continue;
|
|
2027
|
+
}
|
|
2028
|
+
const thisValue = this[key];
|
|
2029
|
+
const otherValue = other[key];
|
|
2030
|
+
if (thisValue !== otherValue) {
|
|
2031
|
+
return false;
|
|
2032
|
+
}
|
|
2033
|
+
}
|
|
2034
|
+
return true;
|
|
2035
|
+
}
|
|
2036
|
+
_generateId() {
|
|
2037
|
+
let newId = Math.floor(Math.random() * 9e4) + 1e4;
|
|
2038
|
+
while (this.generatedIds.has(newId)) {
|
|
2039
|
+
newId = Math.floor(Math.random() * 9e4) + 1e4;
|
|
2040
|
+
}
|
|
2041
|
+
this.generatedIds.add(newId);
|
|
2042
|
+
return { new_id: newId };
|
|
2043
|
+
}
|
|
2044
|
+
listUsers() {
|
|
2045
|
+
return { user_list: Object.keys(this.userMap) };
|
|
2046
|
+
}
|
|
2047
|
+
list_users() {
|
|
2048
|
+
return this.listUsers();
|
|
2049
|
+
}
|
|
2050
|
+
getUserId(user) {
|
|
2051
|
+
if (!(user in this.userMap)) {
|
|
2052
|
+
return { error: `User '${user}' not found in the workspace.` };
|
|
2053
|
+
}
|
|
2054
|
+
return { user_id: this.userMap[user] };
|
|
2055
|
+
}
|
|
2056
|
+
messageLogin(userId) {
|
|
2057
|
+
if (!Object.values(this.userMap).includes(userId)) {
|
|
2058
|
+
return { login_status: false, message: `User ID '${userId}' not found.` };
|
|
2059
|
+
}
|
|
2060
|
+
this.currentUser = userId;
|
|
2061
|
+
return {
|
|
2062
|
+
login_status: true,
|
|
2063
|
+
message: `User '${userId}' logged in successfully.`
|
|
2064
|
+
};
|
|
2065
|
+
}
|
|
2066
|
+
messageGetLoginStatus() {
|
|
2067
|
+
return { login_status: !!this.currentUser };
|
|
2068
|
+
}
|
|
2069
|
+
sendMessage(receiverId, message) {
|
|
2070
|
+
if (!this.currentUser) {
|
|
2071
|
+
return { error: "No user is currently logged in." };
|
|
2072
|
+
}
|
|
2073
|
+
if (!Object.values(this.userMap).includes(receiverId)) {
|
|
2074
|
+
return { error: `Receiver ID '${receiverId}' not found.` };
|
|
2075
|
+
}
|
|
2076
|
+
const messageId = this._generateId();
|
|
2077
|
+
this.inbox.push({ [receiverId]: message });
|
|
2078
|
+
this.messageCount += 1;
|
|
2079
|
+
return {
|
|
2080
|
+
sent_status: true,
|
|
2081
|
+
message_id: messageId,
|
|
2082
|
+
message: `Message sent to '${receiverId}' successfully.`
|
|
2083
|
+
};
|
|
2084
|
+
}
|
|
2085
|
+
deleteMessage(receiverId) {
|
|
2086
|
+
if (!this.currentUser) {
|
|
2087
|
+
return { error: "No user is currently logged in." };
|
|
2088
|
+
}
|
|
2089
|
+
for (let i = this.inbox.length - 1; i >= 0; i--) {
|
|
2090
|
+
const message = this.inbox[i];
|
|
2091
|
+
const [receiver] = Object.keys(message);
|
|
2092
|
+
if (receiver === receiverId) {
|
|
2093
|
+
this.inbox.splice(i, 1);
|
|
2094
|
+
return {
|
|
2095
|
+
deleted_status: true,
|
|
2096
|
+
receiver_id: receiver,
|
|
2097
|
+
message: `Receiver ${receiverId}'s latest message deleted successfully.`
|
|
2098
|
+
};
|
|
2099
|
+
}
|
|
2100
|
+
}
|
|
2101
|
+
return { error: `Receiver ID ${receiverId} not found.` };
|
|
2102
|
+
}
|
|
2103
|
+
viewMessagesSent() {
|
|
2104
|
+
if (!this.currentUser) {
|
|
2105
|
+
return { error: "No user is currently logged in." };
|
|
2106
|
+
}
|
|
2107
|
+
const sentMessages = {};
|
|
2108
|
+
for (const message of this.inbox) {
|
|
2109
|
+
const [receiver, content] = Object.entries(message)[0];
|
|
2110
|
+
if (!sentMessages[receiver]) {
|
|
2111
|
+
sentMessages[receiver] = [];
|
|
2112
|
+
}
|
|
2113
|
+
sentMessages[receiver].push(content);
|
|
2114
|
+
}
|
|
2115
|
+
return { messages: sentMessages };
|
|
2116
|
+
}
|
|
2117
|
+
addContact(userName) {
|
|
2118
|
+
if (userName in this.userMap) {
|
|
2119
|
+
return { error: `User name '${userName}' already exists.` };
|
|
2120
|
+
}
|
|
2121
|
+
this.userCount += 1;
|
|
2122
|
+
const userId = `USR${String(this.userCount).padStart(3, "0")}`;
|
|
2123
|
+
if (Object.values(this.userMap).includes(userId)) {
|
|
2124
|
+
return { error: `User ID '${userId}' already exists.` };
|
|
2125
|
+
}
|
|
2126
|
+
this.userMap[userName] = userId;
|
|
2127
|
+
return {
|
|
2128
|
+
added_status: true,
|
|
2129
|
+
user_id: userId,
|
|
2130
|
+
message: `Contact '${userName}' added successfully.`
|
|
2131
|
+
};
|
|
2132
|
+
}
|
|
2133
|
+
searchMessages(keyword) {
|
|
2134
|
+
if (!this.currentUser) {
|
|
2135
|
+
return { error: "No user is currently logged in." };
|
|
2136
|
+
}
|
|
2137
|
+
const keywordLower = keyword.toLowerCase();
|
|
2138
|
+
const results = [];
|
|
2139
|
+
for (const messageData of this.inbox) {
|
|
2140
|
+
const [receiverId, messageContent] = Object.entries(messageData)[0];
|
|
2141
|
+
if (messageContent.toLowerCase().includes(keywordLower)) {
|
|
2142
|
+
results.push({
|
|
2143
|
+
receiver_id: receiverId,
|
|
2144
|
+
message: messageContent
|
|
2145
|
+
});
|
|
2146
|
+
}
|
|
2147
|
+
}
|
|
2148
|
+
return { results };
|
|
2149
|
+
}
|
|
2150
|
+
getMessageStats() {
|
|
2151
|
+
if (!this.currentUser) {
|
|
2152
|
+
return { error: "No user is currently logged in." };
|
|
2153
|
+
}
|
|
2154
|
+
const receivedCount = this.inbox.length;
|
|
2155
|
+
const contacts = new Set(this.inbox.map((msg) => Object.keys(msg)[0]));
|
|
2156
|
+
const totalContacts = contacts.size;
|
|
2157
|
+
return {
|
|
2158
|
+
stats: {
|
|
2159
|
+
received_count: receivedCount,
|
|
2160
|
+
total_contacts: totalContacts
|
|
2161
|
+
}
|
|
2162
|
+
};
|
|
2163
|
+
}
|
|
2164
|
+
};
|
|
2165
|
+
|
|
2166
|
+
// src/multi-turn/classes/ticket-api.ts
|
|
2167
|
+
var DEFAULT_STATE2 = {
|
|
2168
|
+
ticket_queue: [],
|
|
2169
|
+
ticket_counter: 1,
|
|
2170
|
+
current_user: null
|
|
2171
|
+
};
|
|
2172
|
+
var TicketAPI = class {
|
|
2173
|
+
constructor() {
|
|
2174
|
+
this.ticketQueue = [];
|
|
2175
|
+
this.ticketCounter = 1;
|
|
2176
|
+
this.currentUser = null;
|
|
2177
|
+
}
|
|
2178
|
+
_loadScenario(scenario, _longContext = false) {
|
|
2179
|
+
var _a, _b, _c, _d, _e, _f;
|
|
2180
|
+
const defaultCopy = JSON.parse(
|
|
2181
|
+
JSON.stringify(DEFAULT_STATE2)
|
|
2182
|
+
);
|
|
2183
|
+
this.ticketQueue = (_b = (_a = scenario.ticket_queue) != null ? _a : defaultCopy.ticket_queue) != null ? _b : [];
|
|
2184
|
+
this.ticketCounter = (_d = (_c = scenario.ticket_counter) != null ? _c : defaultCopy.ticket_counter) != null ? _d : 1;
|
|
2185
|
+
this.currentUser = (_f = (_e = scenario.current_user) != null ? _e : defaultCopy.current_user) != null ? _f : null;
|
|
2186
|
+
}
|
|
2187
|
+
create_ticket(title, description = "", priority = 1) {
|
|
2188
|
+
if (!this.currentUser) {
|
|
2189
|
+
return {
|
|
2190
|
+
error: "User not authenticated. Please log in to create a ticket."
|
|
2191
|
+
};
|
|
2192
|
+
}
|
|
2193
|
+
if (priority < 1 || priority > 5) {
|
|
2194
|
+
return { error: "Invalid priority. Priority must be between 1 and 5." };
|
|
2195
|
+
}
|
|
2196
|
+
const ticket = {
|
|
2197
|
+
id: this.ticketCounter,
|
|
2198
|
+
title,
|
|
2199
|
+
description,
|
|
2200
|
+
status: "Open",
|
|
2201
|
+
priority,
|
|
2202
|
+
created_by: this.currentUser
|
|
2203
|
+
};
|
|
2204
|
+
this.ticketQueue.push(ticket);
|
|
2205
|
+
this.ticketCounter += 1;
|
|
2206
|
+
return ticket;
|
|
2207
|
+
}
|
|
2208
|
+
get_ticket(ticket_id) {
|
|
2209
|
+
const ticket = this._findTicket(ticket_id);
|
|
2210
|
+
if (!ticket) {
|
|
2211
|
+
return { error: `Ticket with ID ${ticket_id} not found.` };
|
|
2212
|
+
}
|
|
2213
|
+
return ticket;
|
|
2214
|
+
}
|
|
2215
|
+
close_ticket(ticket_id) {
|
|
2216
|
+
const ticket = this._findTicket(ticket_id);
|
|
2217
|
+
if (!ticket) {
|
|
2218
|
+
return { error: `Ticket with ID ${ticket_id} not found.` };
|
|
2219
|
+
}
|
|
2220
|
+
if (ticket.status === "Closed") {
|
|
2221
|
+
return { error: `Ticket with ID ${ticket_id} is already closed.` };
|
|
2222
|
+
}
|
|
2223
|
+
ticket.status = "Closed";
|
|
2224
|
+
return { status: `Ticket ${ticket_id} has been closed successfully.` };
|
|
2225
|
+
}
|
|
2226
|
+
resolve_ticket(ticket_id, resolution) {
|
|
2227
|
+
const ticket = this._findTicket(ticket_id);
|
|
2228
|
+
if (!ticket) {
|
|
2229
|
+
return { error: `Ticket with ID ${ticket_id} not found.` };
|
|
2230
|
+
}
|
|
2231
|
+
if (ticket.status === "Resolved") {
|
|
2232
|
+
return { error: `Ticket with ID ${ticket_id} is already resolved.` };
|
|
2233
|
+
}
|
|
2234
|
+
ticket.status = "Resolved";
|
|
2235
|
+
ticket.resolution = resolution;
|
|
2236
|
+
return { status: `Ticket ${ticket_id} has been resolved successfully.` };
|
|
2237
|
+
}
|
|
2238
|
+
edit_ticket(ticket_id, updates) {
|
|
2239
|
+
const ticket = this._findTicket(ticket_id);
|
|
2240
|
+
if (!ticket) {
|
|
2241
|
+
return { error: `Ticket with ID ${ticket_id} not found.` };
|
|
2242
|
+
}
|
|
2243
|
+
const validFields = /* @__PURE__ */ new Set(["title", "description", "status", "priority"]);
|
|
2244
|
+
const invalidFields = Object.keys(updates).filter(
|
|
2245
|
+
(field) => !validFields.has(field)
|
|
2246
|
+
);
|
|
2247
|
+
if (invalidFields.length > 0) {
|
|
2248
|
+
return {
|
|
2249
|
+
error: `Invalid fields for update: ${invalidFields.join(", ")}`
|
|
2250
|
+
};
|
|
2251
|
+
}
|
|
2252
|
+
for (const [key, value] of Object.entries(updates)) {
|
|
2253
|
+
if (value !== null) {
|
|
2254
|
+
ticket[key] = value;
|
|
2255
|
+
}
|
|
2256
|
+
}
|
|
2257
|
+
return { status: `Ticket ${ticket_id} has been updated successfully.` };
|
|
2258
|
+
}
|
|
2259
|
+
_findTicket(ticket_id) {
|
|
2260
|
+
return this.ticketQueue.find((ticket) => ticket.id === ticket_id);
|
|
2261
|
+
}
|
|
2262
|
+
ticket_login(username, password) {
|
|
2263
|
+
if (username && password) {
|
|
2264
|
+
this.currentUser = username;
|
|
2265
|
+
return { success: true };
|
|
2266
|
+
}
|
|
2267
|
+
return { success: false };
|
|
2268
|
+
}
|
|
2269
|
+
ticket_get_login_status() {
|
|
2270
|
+
return { login_status: !!this.currentUser };
|
|
2271
|
+
}
|
|
2272
|
+
logout() {
|
|
2273
|
+
if (this.currentUser) {
|
|
2274
|
+
this.currentUser = null;
|
|
2275
|
+
return { success: true };
|
|
2276
|
+
}
|
|
2277
|
+
return { success: false };
|
|
2278
|
+
}
|
|
2279
|
+
get_user_tickets(status) {
|
|
2280
|
+
if (!this.currentUser) {
|
|
2281
|
+
return [
|
|
2282
|
+
{ error: "User not authenticated. Please log in to view tickets." }
|
|
2283
|
+
];
|
|
2284
|
+
}
|
|
2285
|
+
let userTickets = this.ticketQueue.filter(
|
|
2286
|
+
(ticket) => ticket.created_by === this.currentUser
|
|
2287
|
+
);
|
|
2288
|
+
if (status) {
|
|
2289
|
+
userTickets = userTickets.filter(
|
|
2290
|
+
(ticket) => ticket.status.toLowerCase() === status.toLowerCase()
|
|
2291
|
+
);
|
|
2292
|
+
}
|
|
2293
|
+
return userTickets;
|
|
2294
|
+
}
|
|
2295
|
+
};
|
|
2296
|
+
|
|
2297
|
+
// src/multi-turn/classes/trading-bot.ts
|
|
2298
|
+
var CURRENT_TIME = new Date(2024, 8, 1, 10, 30);
|
|
2299
|
+
var DEFAULT_STATE3 = {
|
|
2300
|
+
orders: {
|
|
2301
|
+
12345: {
|
|
2302
|
+
id: 12345,
|
|
2303
|
+
order_type: "Buy",
|
|
2304
|
+
symbol: "AAPL",
|
|
2305
|
+
price: 210.65,
|
|
2306
|
+
amount: 10,
|
|
2307
|
+
status: "Completed"
|
|
2308
|
+
},
|
|
2309
|
+
12446: {
|
|
2310
|
+
id: 12446,
|
|
2311
|
+
order_type: "Sell",
|
|
2312
|
+
symbol: "GOOG",
|
|
2313
|
+
price: 2840.56,
|
|
2314
|
+
amount: 5,
|
|
2315
|
+
status: "Pending"
|
|
2316
|
+
}
|
|
2317
|
+
},
|
|
2318
|
+
account_info: {
|
|
2319
|
+
account_id: 12345,
|
|
2320
|
+
balance: 1e4,
|
|
2321
|
+
binding_card: 1974202140965533
|
|
2322
|
+
},
|
|
2323
|
+
authenticated: false,
|
|
2324
|
+
market_status: "Closed",
|
|
2325
|
+
order_counter: 12446,
|
|
2326
|
+
stocks: {
|
|
2327
|
+
AAPL: {
|
|
2328
|
+
price: 227.16,
|
|
2329
|
+
percent_change: 0.17,
|
|
2330
|
+
volume: 2.552,
|
|
2331
|
+
"MA(5)": 227.11,
|
|
2332
|
+
"MA(20)": 227.09
|
|
2333
|
+
},
|
|
2334
|
+
GOOG: {
|
|
2335
|
+
price: 2840.34,
|
|
2336
|
+
percent_change: 0.24,
|
|
2337
|
+
volume: 1.123,
|
|
2338
|
+
"MA(5)": 2835.67,
|
|
2339
|
+
"MA(20)": 2842.15
|
|
2340
|
+
},
|
|
2341
|
+
TSLA: {
|
|
2342
|
+
price: 667.92,
|
|
2343
|
+
percent_change: -0.12,
|
|
2344
|
+
volume: 1.654,
|
|
2345
|
+
"MA(5)": 671.15,
|
|
2346
|
+
"MA(20)": 668.2
|
|
2347
|
+
},
|
|
2348
|
+
MSFT: {
|
|
2349
|
+
price: 310.23,
|
|
2350
|
+
percent_change: 0.09,
|
|
2351
|
+
volume: 3.234,
|
|
2352
|
+
"MA(5)": 309.88,
|
|
2353
|
+
"MA(20)": 310.11
|
|
2354
|
+
},
|
|
2355
|
+
NVDA: {
|
|
2356
|
+
price: 220.34,
|
|
2357
|
+
percent_change: 0.34,
|
|
2358
|
+
volume: 1.234,
|
|
2359
|
+
"MA(5)": 220.45,
|
|
2360
|
+
"MA(20)": 220.67
|
|
2361
|
+
},
|
|
2362
|
+
ALPH: {
|
|
2363
|
+
price: 1320.45,
|
|
2364
|
+
percent_change: -0.08,
|
|
2365
|
+
volume: 1.567,
|
|
2366
|
+
"MA(5)": 1321.12,
|
|
2367
|
+
"MA(20)": 1325.78
|
|
2368
|
+
},
|
|
2369
|
+
OMEG: {
|
|
2370
|
+
price: 457.23,
|
|
2371
|
+
percent_change: 0.12,
|
|
2372
|
+
volume: 2.345,
|
|
2373
|
+
"MA(5)": 456.78,
|
|
2374
|
+
"MA(20)": 458.12
|
|
2375
|
+
},
|
|
2376
|
+
QUAS: {
|
|
2377
|
+
price: 725.89,
|
|
2378
|
+
percent_change: -0.03,
|
|
2379
|
+
volume: 1.789,
|
|
2380
|
+
"MA(5)": 726.45,
|
|
2381
|
+
"MA(20)": 728
|
|
2382
|
+
},
|
|
2383
|
+
NEPT: {
|
|
2384
|
+
price: 88.34,
|
|
2385
|
+
percent_change: 0.19,
|
|
2386
|
+
volume: 0.654,
|
|
2387
|
+
"MA(5)": 88.21,
|
|
2388
|
+
"MA(20)": 88.67
|
|
2389
|
+
},
|
|
2390
|
+
SYNX: {
|
|
2391
|
+
price: 345.67,
|
|
2392
|
+
percent_change: 0.11,
|
|
2393
|
+
volume: 2.112,
|
|
2394
|
+
"MA(5)": 345.34,
|
|
2395
|
+
"MA(20)": 346.12
|
|
2396
|
+
},
|
|
2397
|
+
ZETA: {
|
|
2398
|
+
price: 22.09,
|
|
2399
|
+
percent_change: -0.05,
|
|
2400
|
+
volume: 0.789,
|
|
2401
|
+
"MA(5)": 22.12,
|
|
2402
|
+
"MA(20)": 22.34
|
|
2403
|
+
}
|
|
2404
|
+
},
|
|
2405
|
+
watch_list: ["NVDA"],
|
|
2406
|
+
transaction_history: [],
|
|
2407
|
+
random_seed: 1053520
|
|
2408
|
+
};
|
|
2409
|
+
var SeededRandom = class {
|
|
2410
|
+
constructor(seed) {
|
|
2411
|
+
this.seed = seed;
|
|
2412
|
+
}
|
|
2413
|
+
randint(min, max) {
|
|
2414
|
+
this.seed = (this.seed * 9301 + 49297) % 233280;
|
|
2415
|
+
return Math.floor(min + this.seed / 233280 * (max - min + 1));
|
|
2416
|
+
}
|
|
2417
|
+
};
|
|
2418
|
+
var TradingBot = class _TradingBot {
|
|
2419
|
+
constructor() {
|
|
2420
|
+
// Aliases for camelCase compatibility
|
|
2421
|
+
this.placeOrder = this.place_order.bind(this);
|
|
2422
|
+
this.getAccountInfo = this.get_account_info.bind(this);
|
|
2423
|
+
this.tradingLogin = this.trading_login.bind(this);
|
|
2424
|
+
this.tradingGetLoginStatus = this.trading_get_login_status.bind(this);
|
|
2425
|
+
this.getOrderHistory = this.get_order_history.bind(this);
|
|
2426
|
+
this.orders = {};
|
|
2427
|
+
this.accountInfo = { account_id: 0, balance: 0, binding_card: 0 };
|
|
2428
|
+
this.authenticated = false;
|
|
2429
|
+
this.marketStatus = "Closed";
|
|
2430
|
+
this.orderCounter = 0;
|
|
2431
|
+
this.stocks = {};
|
|
2432
|
+
this.watchList = [];
|
|
2433
|
+
this.transactionHistory = [];
|
|
2434
|
+
this._random = new SeededRandom(1053520);
|
|
2435
|
+
}
|
|
2436
|
+
_loadScenario(scenario, _longContext = false) {
|
|
2437
|
+
var _a, _b, _c, _d, _e, _f;
|
|
2438
|
+
const defaultCopy = JSON.parse(JSON.stringify(DEFAULT_STATE3));
|
|
2439
|
+
this.orders = { ...defaultCopy.orders, ...scenario.orders };
|
|
2440
|
+
const convertedOrders = {};
|
|
2441
|
+
for (const [k, v] of Object.entries(this.orders)) {
|
|
2442
|
+
const numKey = Number.parseInt(k, 10);
|
|
2443
|
+
if (!Number.isNaN(numKey)) {
|
|
2444
|
+
convertedOrders[numKey] = v;
|
|
2445
|
+
}
|
|
2446
|
+
}
|
|
2447
|
+
this.orders = convertedOrders;
|
|
2448
|
+
this.accountInfo = {
|
|
2449
|
+
...defaultCopy.account_info,
|
|
2450
|
+
...scenario.account_info
|
|
2451
|
+
};
|
|
2452
|
+
this.authenticated = (_a = scenario.authenticated) != null ? _a : defaultCopy.authenticated;
|
|
2453
|
+
this.marketStatus = (_b = scenario.market_status) != null ? _b : defaultCopy.market_status;
|
|
2454
|
+
this.orderCounter = (_c = scenario.order_counter) != null ? _c : defaultCopy.order_counter;
|
|
2455
|
+
this.stocks = { ...defaultCopy.stocks, ...scenario.stocks };
|
|
2456
|
+
this.watchList = (_d = scenario.watch_list) != null ? _d : defaultCopy.watch_list;
|
|
2457
|
+
this.transactionHistory = (_e = scenario.transaction_history) != null ? _e : defaultCopy.transaction_history;
|
|
2458
|
+
this._random = new SeededRandom(
|
|
2459
|
+
(_f = scenario.random_seed) != null ? _f : defaultCopy.random_seed
|
|
2460
|
+
);
|
|
2461
|
+
}
|
|
2462
|
+
_generateTransactionTimestamp() {
|
|
2463
|
+
const startDate = CURRENT_TIME;
|
|
2464
|
+
const endDate = new Date(startDate.getTime() + 24 * 60 * 60 * 1e3);
|
|
2465
|
+
const startTimestamp = Math.floor(startDate.getTime() / 1e3);
|
|
2466
|
+
const endTimestamp = Math.floor(endDate.getTime() / 1e3);
|
|
2467
|
+
const randomTimestamp = this._random.randint(startTimestamp, endTimestamp);
|
|
2468
|
+
const randomDate = new Date(randomTimestamp * 1e3);
|
|
2469
|
+
const pad = (n) => n.toString().padStart(2, "0");
|
|
2470
|
+
return `${randomDate.getFullYear()}-${pad(randomDate.getMonth() + 1)}-${pad(randomDate.getDate())} ${pad(randomDate.getHours())}:${pad(randomDate.getMinutes())}:${pad(randomDate.getSeconds())}`;
|
|
2471
|
+
}
|
|
2472
|
+
equals(other) {
|
|
2473
|
+
if (!(other instanceof _TradingBot)) {
|
|
2474
|
+
return false;
|
|
2475
|
+
}
|
|
2476
|
+
return JSON.stringify(this.orders) === JSON.stringify(other.orders) && JSON.stringify(this.accountInfo) === JSON.stringify(other.accountInfo) && this.authenticated === other.authenticated && this.marketStatus === other.marketStatus && JSON.stringify(this.watchList) === JSON.stringify(other.watchList);
|
|
2477
|
+
}
|
|
2478
|
+
get_current_time() {
|
|
2479
|
+
const hours = CURRENT_TIME.getHours();
|
|
2480
|
+
const minutes = CURRENT_TIME.getMinutes();
|
|
2481
|
+
const ampm = hours >= 12 ? "PM" : "AM";
|
|
2482
|
+
const formattedHours = hours % 12 || 12;
|
|
2483
|
+
const formattedMinutes = minutes.toString().padStart(2, "0");
|
|
2484
|
+
return { current_time: `${formattedHours}:${formattedMinutes} ${ampm}` };
|
|
2485
|
+
}
|
|
2486
|
+
get_symbol_by_name(name) {
|
|
2487
|
+
var _a;
|
|
2488
|
+
const symbolMap = {
|
|
2489
|
+
Apple: "AAPL",
|
|
2490
|
+
Google: "GOOG",
|
|
2491
|
+
Tesla: "TSLA",
|
|
2492
|
+
Microsoft: "MSFT",
|
|
2493
|
+
Nvidia: "NVDA",
|
|
2494
|
+
"Zeta Corp": "ZETA",
|
|
2495
|
+
"Alpha Tech": "ALPH",
|
|
2496
|
+
"Omega Industries": "OMEG",
|
|
2497
|
+
"Quasar Ltd.": "QUAS",
|
|
2498
|
+
"Neptune Systems": "NEPT",
|
|
2499
|
+
"Synex Solutions": "SYNX",
|
|
2500
|
+
Amazon: "AMZN",
|
|
2501
|
+
Gorilla: "GORI"
|
|
2502
|
+
};
|
|
2503
|
+
return { symbol: (_a = symbolMap[name]) != null ? _a : "Stock not found" };
|
|
2504
|
+
}
|
|
2505
|
+
get_stock_info(symbol) {
|
|
2506
|
+
if (!(symbol in this.stocks)) {
|
|
2507
|
+
return { error: `Stock with symbol '${symbol}' not found.` };
|
|
2508
|
+
}
|
|
2509
|
+
return this.stocks[symbol];
|
|
2510
|
+
}
|
|
2511
|
+
get_order_details(order_id) {
|
|
2512
|
+
const orderId = Number(order_id);
|
|
2513
|
+
if (!(orderId in this.orders)) {
|
|
2514
|
+
return {
|
|
2515
|
+
error: `Order with ID ${orderId} not found. Here is the list of orders_id: ${Object.keys(this.orders).join(", ")}`
|
|
2516
|
+
};
|
|
2517
|
+
}
|
|
2518
|
+
return this.orders[orderId];
|
|
2519
|
+
}
|
|
2520
|
+
cancel_order(order_id) {
|
|
2521
|
+
const orderId = Number(order_id);
|
|
2522
|
+
if (!(orderId in this.orders)) {
|
|
2523
|
+
return { error: `Order with ID ${orderId} not found.` };
|
|
2524
|
+
}
|
|
2525
|
+
if (this.orders[orderId].status === "Completed") {
|
|
2526
|
+
return {
|
|
2527
|
+
error: `Can't cancel order ${orderId}. Order is already completed.`
|
|
2528
|
+
};
|
|
2529
|
+
}
|
|
2530
|
+
this.orders[orderId].status = "Cancelled";
|
|
2531
|
+
return { order_id: orderId, status: "Cancelled" };
|
|
2532
|
+
}
|
|
2533
|
+
place_order(order_type, symbol, price, amount) {
|
|
2534
|
+
var _a, _b;
|
|
2535
|
+
if (!this.authenticated) {
|
|
2536
|
+
return {
|
|
2537
|
+
error: "User not authenticated. Please log in to place an order."
|
|
2538
|
+
};
|
|
2539
|
+
}
|
|
2540
|
+
if (!(symbol in this.stocks)) {
|
|
2541
|
+
return { error: `Invalid stock symbol: ${symbol}` };
|
|
2542
|
+
}
|
|
2543
|
+
if (price <= 0 || amount <= 0) {
|
|
2544
|
+
return { error: "Price and amount must be positive values." };
|
|
2545
|
+
}
|
|
2546
|
+
if (order_type.toLowerCase() === "buy") {
|
|
2547
|
+
const totalCost = Number(price) * Number(amount);
|
|
2548
|
+
if (totalCost > ((_a = this.accountInfo.balance) != null ? _a : 0)) {
|
|
2549
|
+
return {
|
|
2550
|
+
error: `Insufficient funds: required $${totalCost.toFixed(2)} but only $${((_b = this.accountInfo.balance) != null ? _b : 0).toFixed(2)} available.`
|
|
2551
|
+
};
|
|
2552
|
+
}
|
|
2553
|
+
}
|
|
2554
|
+
const orderId = this.orderCounter;
|
|
2555
|
+
this.orders[orderId] = {
|
|
2556
|
+
id: orderId,
|
|
2557
|
+
order_type,
|
|
2558
|
+
symbol,
|
|
2559
|
+
price: Number(price),
|
|
2560
|
+
amount: Number(amount),
|
|
2561
|
+
status: "Open"
|
|
2562
|
+
};
|
|
2563
|
+
this.orderCounter += 1;
|
|
2564
|
+
return {
|
|
2565
|
+
order_id: orderId,
|
|
2566
|
+
order_type,
|
|
2567
|
+
status: "Pending",
|
|
2568
|
+
price: Number(price),
|
|
2569
|
+
amount: Number(amount)
|
|
2570
|
+
};
|
|
2571
|
+
}
|
|
2572
|
+
withdraw_funds(amount) {
|
|
2573
|
+
if (!this.authenticated) {
|
|
2574
|
+
return {
|
|
2575
|
+
error: "User not authenticated. Please log in to make a transaction."
|
|
2576
|
+
};
|
|
2577
|
+
}
|
|
2578
|
+
if (this.marketStatus !== "Open") {
|
|
2579
|
+
return { error: "Market is closed. Transactions are not allowed." };
|
|
2580
|
+
}
|
|
2581
|
+
if (amount <= 0) {
|
|
2582
|
+
return { error: "Transaction amount must be positive." };
|
|
2583
|
+
}
|
|
2584
|
+
if (amount > this.accountInfo.balance) {
|
|
2585
|
+
return { error: "Insufficient funds for withdrawal." };
|
|
2586
|
+
}
|
|
2587
|
+
this.accountInfo.balance -= amount;
|
|
2588
|
+
this.transactionHistory.push({
|
|
2589
|
+
type: "withdrawal",
|
|
2590
|
+
amount,
|
|
2591
|
+
timestamp: this._generateTransactionTimestamp()
|
|
2592
|
+
});
|
|
2593
|
+
return {
|
|
2594
|
+
status: "Withdrawal successful",
|
|
2595
|
+
new_balance: this.accountInfo.balance
|
|
2596
|
+
};
|
|
2597
|
+
}
|
|
2598
|
+
get_account_info() {
|
|
2599
|
+
if (!this.authenticated) {
|
|
2600
|
+
return {
|
|
2601
|
+
error: "User not authenticated. Please log in to view account information."
|
|
2602
|
+
};
|
|
2603
|
+
}
|
|
2604
|
+
return this.accountInfo;
|
|
2605
|
+
}
|
|
2606
|
+
trading_login(_username, _password) {
|
|
2607
|
+
if (this.authenticated) {
|
|
2608
|
+
return { status: "Already logged in" };
|
|
2609
|
+
}
|
|
2610
|
+
this.authenticated = true;
|
|
2611
|
+
return { status: "Logged in successfully" };
|
|
2612
|
+
}
|
|
2613
|
+
trading_get_login_status() {
|
|
2614
|
+
return { status: this.authenticated };
|
|
2615
|
+
}
|
|
2616
|
+
trading_logout() {
|
|
2617
|
+
if (!this.authenticated) {
|
|
2618
|
+
return { status: "No user is currently logged in" };
|
|
2619
|
+
}
|
|
2620
|
+
this.authenticated = false;
|
|
2621
|
+
return { status: "Logged out successfully" };
|
|
2622
|
+
}
|
|
2623
|
+
fund_account(amount) {
|
|
2624
|
+
if (!this.authenticated) {
|
|
2625
|
+
return {
|
|
2626
|
+
error: "User not authenticated. Please log in to fund the account."
|
|
2627
|
+
};
|
|
2628
|
+
}
|
|
2629
|
+
if (amount <= 0) {
|
|
2630
|
+
return { error: "Funding amount must be positive." };
|
|
2631
|
+
}
|
|
2632
|
+
this.accountInfo.balance += amount;
|
|
2633
|
+
this.transactionHistory.push({
|
|
2634
|
+
type: "deposit",
|
|
2635
|
+
amount,
|
|
2636
|
+
timestamp: this._generateTransactionTimestamp()
|
|
2637
|
+
});
|
|
2638
|
+
return {
|
|
2639
|
+
status: "Account funded successfully",
|
|
2640
|
+
new_balance: this.accountInfo.balance
|
|
2641
|
+
};
|
|
2642
|
+
}
|
|
2643
|
+
remove_stock_from_watchlist(symbol) {
|
|
2644
|
+
if (!this.authenticated) {
|
|
2645
|
+
return {
|
|
2646
|
+
error: "User not authenticated. Please log in to modify the watchlist."
|
|
2647
|
+
};
|
|
2648
|
+
}
|
|
2649
|
+
const idx = this.watchList.indexOf(symbol);
|
|
2650
|
+
if (idx === -1) {
|
|
2651
|
+
return { error: `Stock ${symbol} not found in watchlist.` };
|
|
2652
|
+
}
|
|
2653
|
+
this.watchList.splice(idx, 1);
|
|
2654
|
+
return { status: `Stock ${symbol} removed from watchlist successfully.` };
|
|
2655
|
+
}
|
|
2656
|
+
get_watchlist() {
|
|
2657
|
+
if (!this.authenticated) {
|
|
2658
|
+
return {
|
|
2659
|
+
error: "User not authenticated. Please log in to view the watchlist."
|
|
2660
|
+
};
|
|
2661
|
+
}
|
|
2662
|
+
return { watchlist: this.watchList };
|
|
2663
|
+
}
|
|
2664
|
+
get_order_history() {
|
|
2665
|
+
if (!this.authenticated) {
|
|
2666
|
+
return {
|
|
2667
|
+
error: "User not authenticated. Please log in to view order history."
|
|
2668
|
+
};
|
|
2669
|
+
}
|
|
2670
|
+
return { history: Object.keys(this.orders).map(Number) };
|
|
2671
|
+
}
|
|
2672
|
+
get_transaction_history(start_date, end_date) {
|
|
2673
|
+
if (!this.authenticated) {
|
|
2674
|
+
return {
|
|
2675
|
+
error: "User not authenticated. Please log in to view transaction history."
|
|
2676
|
+
};
|
|
2677
|
+
}
|
|
2678
|
+
const start = start_date ? new Date(start_date) : /* @__PURE__ */ new Date(0);
|
|
2679
|
+
const end = end_date ? new Date(end_date) : /* @__PURE__ */ new Date(864e13);
|
|
2680
|
+
const filteredHistory = this.transactionHistory.filter((t) => {
|
|
2681
|
+
const txDate = new Date(t.timestamp);
|
|
2682
|
+
return txDate >= start && txDate <= end;
|
|
2683
|
+
});
|
|
2684
|
+
return { transaction_history: filteredHistory };
|
|
2685
|
+
}
|
|
2686
|
+
get_available_stocks(sector) {
|
|
2687
|
+
var _a;
|
|
2688
|
+
const sectorMap = {
|
|
2689
|
+
Technology: ["AAPL", "GOOG", "MSFT", "NVDA"],
|
|
2690
|
+
Automobile: ["TSLA", "F", "GM"]
|
|
2691
|
+
};
|
|
2692
|
+
return { stock_list: (_a = sectorMap[sector]) != null ? _a : [] };
|
|
2693
|
+
}
|
|
2694
|
+
filter_stocks_by_price(stocks, min_price, max_price) {
|
|
2695
|
+
const filteredStocks = stocks.filter((symbol) => {
|
|
2696
|
+
const stock = this.stocks[symbol];
|
|
2697
|
+
if (!stock) {
|
|
2698
|
+
return false;
|
|
2699
|
+
}
|
|
2700
|
+
return stock.price >= min_price && stock.price <= max_price;
|
|
2701
|
+
});
|
|
2702
|
+
return { filtered_stocks: filteredStocks };
|
|
2703
|
+
}
|
|
2704
|
+
add_to_watchlist(stock) {
|
|
2705
|
+
if (!this.watchList.includes(stock) && stock in this.stocks) {
|
|
2706
|
+
this.watchList.push(stock);
|
|
2707
|
+
}
|
|
2708
|
+
return { watchlist: this.watchList };
|
|
2709
|
+
}
|
|
2710
|
+
notify_price_change(stocks, threshold) {
|
|
2711
|
+
const changedStocks = stocks.filter((symbol) => {
|
|
2712
|
+
const stock = this.stocks[symbol];
|
|
2713
|
+
if (!stock) {
|
|
2714
|
+
return false;
|
|
2715
|
+
}
|
|
2716
|
+
return Math.abs(stock.percent_change) >= threshold;
|
|
2717
|
+
});
|
|
2718
|
+
if (changedStocks.length > 0) {
|
|
2719
|
+
return {
|
|
2720
|
+
notification: `Stocks ${changedStocks.join(", ")} have significant price changes.`
|
|
2721
|
+
};
|
|
2722
|
+
}
|
|
2723
|
+
return {
|
|
2724
|
+
notification: "No significant price changes in the selected stocks."
|
|
2725
|
+
};
|
|
2726
|
+
}
|
|
2727
|
+
};
|
|
2728
|
+
|
|
2729
|
+
// src/multi-turn/classes/travel-api.ts
|
|
2730
|
+
var DIGIT_REGEX = /\d/;
|
|
2731
|
+
var DATE_FORMAT_REGEX = /^\d{4}-\d{2}-\d{2}$/;
|
|
2732
|
+
var DEFAULT_STATE4 = {
|
|
2733
|
+
random_seed: 141053,
|
|
2734
|
+
credit_card_list: {},
|
|
2735
|
+
booking_record: {},
|
|
2736
|
+
access_token: void 0,
|
|
2737
|
+
token_type: void 0,
|
|
2738
|
+
token_expires_in: void 0,
|
|
2739
|
+
token_scope: void 0,
|
|
2740
|
+
user_first_name: void 0,
|
|
2741
|
+
user_last_name: void 0,
|
|
2742
|
+
budget_limit: void 0
|
|
2743
|
+
};
|
|
2744
|
+
var BASE_COSTS = /* @__PURE__ */ new Map([
|
|
2745
|
+
["SFO|LAX", 200],
|
|
2746
|
+
["SFO|JFK", 500],
|
|
2747
|
+
["SFO|ORD", 400],
|
|
2748
|
+
["SFO|BOS", 450],
|
|
2749
|
+
["SFO|RMS", 300],
|
|
2750
|
+
["SFO|SBK", 350],
|
|
2751
|
+
["SFO|MPC", 370],
|
|
2752
|
+
["SFO|SVP", 320],
|
|
2753
|
+
["SFO|SHD", 330],
|
|
2754
|
+
["SFO|SSV", 340],
|
|
2755
|
+
["SFO|OKD", 360],
|
|
2756
|
+
["SFO|WLB", 310],
|
|
2757
|
+
["SFO|CRH", 380],
|
|
2758
|
+
["SFO|ATV", 390],
|
|
2759
|
+
["SFO|PHV", 420],
|
|
2760
|
+
["SFO|GFD", 430],
|
|
2761
|
+
["SFO|CIA", 700],
|
|
2762
|
+
["LAX|SFO", 100],
|
|
2763
|
+
["LAX|JFK", 600],
|
|
2764
|
+
["LAX|ORD", 500],
|
|
2765
|
+
["LAX|BOS", 550],
|
|
2766
|
+
["LAX|RMS", 310],
|
|
2767
|
+
["LAX|SBK", 320],
|
|
2768
|
+
["LAX|MPC", 330],
|
|
2769
|
+
["LAX|SVP", 340],
|
|
2770
|
+
["LAX|SHD", 350],
|
|
2771
|
+
["LAX|SSV", 360],
|
|
2772
|
+
["LAX|OKD", 370],
|
|
2773
|
+
["LAX|WLB", 380],
|
|
2774
|
+
["LAX|CRH", 390],
|
|
2775
|
+
["LAX|ATV", 400],
|
|
2776
|
+
["LAX|PHV", 410],
|
|
2777
|
+
["LAX|GFD", 420],
|
|
2778
|
+
["LAX|HND", 430],
|
|
2779
|
+
["JFK|ORD", 300],
|
|
2780
|
+
["JFK|BOS", 250],
|
|
2781
|
+
["JFK|RMS", 450],
|
|
2782
|
+
["JFK|SBK", 460],
|
|
2783
|
+
["JFK|MPC", 470],
|
|
2784
|
+
["JFK|SVP", 480],
|
|
2785
|
+
["JFK|SHD", 490],
|
|
2786
|
+
["JFK|SSV", 500],
|
|
2787
|
+
["JFK|OKD", 510],
|
|
2788
|
+
["JFK|WLB", 520],
|
|
2789
|
+
["JFK|CRH", 530],
|
|
2790
|
+
["JFK|ATV", 540],
|
|
2791
|
+
["JFK|PHV", 550],
|
|
2792
|
+
["JFK|GFD", 560],
|
|
2793
|
+
["JFK|LAX", 570],
|
|
2794
|
+
["JFK|HND", 800],
|
|
2795
|
+
["JFK|PVG", 950],
|
|
2796
|
+
["JFK|PEK", 1e3],
|
|
2797
|
+
["ORD|LAX", 180],
|
|
2798
|
+
["ORD|BOS", 200],
|
|
2799
|
+
["ORD|RMS", 350],
|
|
2800
|
+
["ORD|SBK", 360],
|
|
2801
|
+
["ORD|MPC", 370],
|
|
2802
|
+
["ORD|SVP", 380],
|
|
2803
|
+
["ORD|SHD", 390],
|
|
2804
|
+
["ORD|SSV", 400],
|
|
2805
|
+
["ORD|OKD", 410],
|
|
2806
|
+
["ORD|WLB", 420],
|
|
2807
|
+
["ORD|CRH", 430],
|
|
2808
|
+
["ORD|ATV", 440],
|
|
2809
|
+
["ORD|PHV", 450],
|
|
2810
|
+
["ORD|GFD", 460],
|
|
2811
|
+
["BOS|RMS", 400],
|
|
2812
|
+
["BOS|SBK", 410],
|
|
2813
|
+
["BOS|MPC", 420],
|
|
2814
|
+
["BOS|SVP", 430],
|
|
2815
|
+
["BOS|SHD", 440],
|
|
2816
|
+
["BOS|SSV", 450],
|
|
2817
|
+
["BOS|OKD", 460],
|
|
2818
|
+
["BOS|WLB", 470],
|
|
2819
|
+
["BOS|CRH", 480],
|
|
2820
|
+
["BOS|ATV", 490],
|
|
2821
|
+
["BOS|PHV", 500],
|
|
2822
|
+
["BOS|GFD", 510],
|
|
2823
|
+
["RMS|BOS", 200],
|
|
2824
|
+
["RMS|JFK", 210],
|
|
2825
|
+
["RMS|SBK", 220],
|
|
2826
|
+
["RMS|MPC", 230],
|
|
2827
|
+
["RMS|SVP", 240],
|
|
2828
|
+
["RMS|SHD", 250],
|
|
2829
|
+
["RMS|SSV", 260],
|
|
2830
|
+
["RMS|OKD", 270],
|
|
2831
|
+
["RMS|WLB", 280],
|
|
2832
|
+
["RMS|CRH", 290],
|
|
2833
|
+
["RMS|ATV", 300],
|
|
2834
|
+
["RMS|PHV", 310],
|
|
2835
|
+
["RMS|GFD", 320],
|
|
2836
|
+
["RMS|LAX", 330],
|
|
2837
|
+
["SBK|MPC", 200],
|
|
2838
|
+
["SBK|SVP", 210],
|
|
2839
|
+
["SBK|SHD", 220],
|
|
2840
|
+
["SBK|SSV", 230],
|
|
2841
|
+
["SBK|OKD", 240],
|
|
2842
|
+
["SBK|WLB", 250],
|
|
2843
|
+
["SBK|CRH", 260],
|
|
2844
|
+
["SBK|ATV", 270],
|
|
2845
|
+
["SBK|PHV", 280],
|
|
2846
|
+
["SBK|GFD", 290],
|
|
2847
|
+
["MPC|SVP", 210],
|
|
2848
|
+
["MPC|SHD", 220],
|
|
2849
|
+
["MPC|SSV", 230],
|
|
2850
|
+
["MPC|OKD", 240],
|
|
2851
|
+
["MPC|WLB", 250],
|
|
2852
|
+
["MPC|CRH", 260],
|
|
2853
|
+
["MPC|ATV", 270],
|
|
2854
|
+
["MPC|PHV", 280],
|
|
2855
|
+
["MPC|GFD", 290],
|
|
2856
|
+
["SVP|SHD", 230],
|
|
2857
|
+
["SVP|SSV", 240],
|
|
2858
|
+
["SVP|OKD", 250],
|
|
2859
|
+
["SVP|WLB", 260],
|
|
2860
|
+
["SVP|CRH", 270],
|
|
2861
|
+
["SVP|ATV", 280],
|
|
2862
|
+
["SVP|PHV", 290],
|
|
2863
|
+
["SVP|GFD", 300],
|
|
2864
|
+
["SHD|SSV", 220],
|
|
2865
|
+
["SHD|OKD", 230],
|
|
2866
|
+
["SHD|WLB", 240],
|
|
2867
|
+
["SHD|CRH", 250],
|
|
2868
|
+
["SHD|ATV", 260],
|
|
2869
|
+
["SHD|PHV", 270],
|
|
2870
|
+
["SHD|GFD", 280],
|
|
2871
|
+
["SSV|OKD", 240],
|
|
2872
|
+
["SSV|WLB", 250],
|
|
2873
|
+
["SSV|CRH", 260],
|
|
2874
|
+
["SSV|ATV", 270],
|
|
2875
|
+
["SSV|PHV", 280],
|
|
2876
|
+
["SSV|GFD", 290],
|
|
2877
|
+
["OKD|WLB", 230],
|
|
2878
|
+
["OKD|CRH", 240],
|
|
2879
|
+
["OKD|ATV", 250],
|
|
2880
|
+
["OKD|PHV", 260],
|
|
2881
|
+
["OKD|GFD", 270],
|
|
2882
|
+
["WLB|CRH", 250],
|
|
2883
|
+
["WLB|ATV", 260],
|
|
2884
|
+
["WLB|PHV", 270],
|
|
2885
|
+
["WLB|GFD", 280],
|
|
2886
|
+
["CRH|ATV", 240],
|
|
2887
|
+
["CRH|PHV", 250],
|
|
2888
|
+
["CRH|GFD", 260],
|
|
2889
|
+
["CRH|SFO", 270],
|
|
2890
|
+
["CRH|RMS", 280],
|
|
2891
|
+
["CRH|HKG", 290],
|
|
2892
|
+
["CRH|JFK", 300],
|
|
2893
|
+
["ATV|PHV", 230],
|
|
2894
|
+
["ATV|GFD", 240],
|
|
2895
|
+
["PHV|GFD", 220],
|
|
2896
|
+
["LHR|CDG", 100],
|
|
2897
|
+
["OKD|LAX", 220]
|
|
2898
|
+
]);
|
|
2899
|
+
var AIRPORT_MAP = {
|
|
2900
|
+
Rivermist: "RMS",
|
|
2901
|
+
Stonebrook: "SBK",
|
|
2902
|
+
Maplecrest: "MPC",
|
|
2903
|
+
Silverpine: "SVP",
|
|
2904
|
+
Shadowridge: "SHD",
|
|
2905
|
+
London: "LHR",
|
|
2906
|
+
Paris: "CDG",
|
|
2907
|
+
"Sunset Valley": "SSV",
|
|
2908
|
+
Oakendale: "OKD",
|
|
2909
|
+
Willowbend: "WLB",
|
|
2910
|
+
"Crescent Hollow": "CRH",
|
|
2911
|
+
Autumnville: "ATV",
|
|
2912
|
+
Pinehaven: "PHV",
|
|
2913
|
+
Greenfield: "GFD",
|
|
2914
|
+
"San Francisco": "SFO",
|
|
2915
|
+
"Los Angeles": "LAX",
|
|
2916
|
+
"New York": "JFK",
|
|
2917
|
+
Chicago: "ORD",
|
|
2918
|
+
Boston: "BOS",
|
|
2919
|
+
Beijing: "PEK",
|
|
2920
|
+
"Hong Kong": "HKG",
|
|
2921
|
+
Rome: "CIA",
|
|
2922
|
+
Tokyo: "HND"
|
|
2923
|
+
};
|
|
2924
|
+
var EXCHANGE_RATES = /* @__PURE__ */ new Map([
|
|
2925
|
+
["USD|RMB", 7],
|
|
2926
|
+
["USD|EUR", 0.8],
|
|
2927
|
+
["USD|JPY", 110],
|
|
2928
|
+
["USD|GBP", 0.7],
|
|
2929
|
+
["USD|CAD", 1.3],
|
|
2930
|
+
["USD|AUD", 1.4],
|
|
2931
|
+
["USD|INR", 70],
|
|
2932
|
+
["USD|RUB", 60],
|
|
2933
|
+
["USD|BRL", 3.8],
|
|
2934
|
+
["USD|MXN", 20]
|
|
2935
|
+
]);
|
|
2936
|
+
var ALL_AIRPORTS = [
|
|
2937
|
+
"RMS",
|
|
2938
|
+
"SBK",
|
|
2939
|
+
"MPC",
|
|
2940
|
+
"SVP",
|
|
2941
|
+
"SHD",
|
|
2942
|
+
"CDG",
|
|
2943
|
+
"LHR",
|
|
2944
|
+
"SSV",
|
|
2945
|
+
"OKD",
|
|
2946
|
+
"WLB",
|
|
2947
|
+
"PEK",
|
|
2948
|
+
"HND",
|
|
2949
|
+
"HKG",
|
|
2950
|
+
"CIA",
|
|
2951
|
+
"CRH",
|
|
2952
|
+
"ATV",
|
|
2953
|
+
"PHV",
|
|
2954
|
+
"GFD",
|
|
2955
|
+
"SFO",
|
|
2956
|
+
"LAX",
|
|
2957
|
+
"JFK",
|
|
2958
|
+
"ORD",
|
|
2959
|
+
"BOS"
|
|
2960
|
+
];
|
|
2961
|
+
var SeededRandom2 = class {
|
|
2962
|
+
constructor(seed) {
|
|
2963
|
+
this.seed = seed;
|
|
2964
|
+
}
|
|
2965
|
+
next() {
|
|
2966
|
+
this.seed = this.seed * 1103515245 + 12345 & 2147483647;
|
|
2967
|
+
return this.seed;
|
|
2968
|
+
}
|
|
2969
|
+
randInt(min, max) {
|
|
2970
|
+
return min + this.next() % (max - min + 1);
|
|
2971
|
+
}
|
|
2972
|
+
};
|
|
2973
|
+
var TravelAPI = class {
|
|
2974
|
+
constructor() {
|
|
2975
|
+
this.longContext = false;
|
|
2976
|
+
this._flightCostLookup = /* @__PURE__ */ new Map();
|
|
2977
|
+
var _a;
|
|
2978
|
+
this.creditCardList = {};
|
|
2979
|
+
this.bookingRecord = {};
|
|
2980
|
+
this._random = new SeededRandom2((_a = DEFAULT_STATE4.random_seed) != null ? _a : 141053);
|
|
2981
|
+
}
|
|
2982
|
+
_loadScenario(scenario, longContext = false) {
|
|
2983
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
|
2984
|
+
const defaultCopy = JSON.parse(JSON.stringify(DEFAULT_STATE4));
|
|
2985
|
+
this._random = new SeededRandom2(
|
|
2986
|
+
(_a = scenario.random_seed) != null ? _a : defaultCopy.random_seed
|
|
2987
|
+
);
|
|
2988
|
+
this.creditCardList = (_b = scenario.credit_card_list) != null ? _b : defaultCopy.credit_card_list;
|
|
2989
|
+
this.bookingRecord = (_c = scenario.booking_record) != null ? _c : defaultCopy.booking_record;
|
|
2990
|
+
this.accessToken = (_d = scenario.access_token) != null ? _d : defaultCopy.access_token;
|
|
2991
|
+
this.tokenType = (_e = scenario.token_type) != null ? _e : defaultCopy.token_type;
|
|
2992
|
+
this.tokenExpiresIn = (_f = scenario.token_expires_in) != null ? _f : defaultCopy.token_expires_in;
|
|
2993
|
+
this.tokenScope = (_g = scenario.token_scope) != null ? _g : defaultCopy.token_scope;
|
|
2994
|
+
this.userFirstName = (_h = scenario.user_first_name) != null ? _h : defaultCopy.user_first_name;
|
|
2995
|
+
this.userLastName = (_i = scenario.user_last_name) != null ? _i : defaultCopy.user_last_name;
|
|
2996
|
+
this.budgetLimit = (_j = scenario.budget_limit) != null ? _j : defaultCopy.budget_limit;
|
|
2997
|
+
this.longContext = longContext;
|
|
2998
|
+
}
|
|
2999
|
+
authenticateTravel(_clientId, _clientSecret, _refreshToken, grantType, userFirstName, userLastName) {
|
|
3000
|
+
this.tokenExpiresIn = 2;
|
|
3001
|
+
this.accessToken = String(this._random.randInt(1e5, 999999));
|
|
3002
|
+
this.tokenType = "Bearer";
|
|
3003
|
+
this.tokenScope = grantType;
|
|
3004
|
+
this.userFirstName = userFirstName;
|
|
3005
|
+
this.userLastName = userLastName;
|
|
3006
|
+
return {
|
|
3007
|
+
expires_in: 2,
|
|
3008
|
+
access_token: this.accessToken,
|
|
3009
|
+
token_type: "Bearer",
|
|
3010
|
+
scope: grantType
|
|
3011
|
+
};
|
|
3012
|
+
}
|
|
3013
|
+
travelGetLoginStatus() {
|
|
3014
|
+
const isNotLoggedIn = this.tokenExpiresIn === void 0 || this.tokenExpiresIn === 0;
|
|
3015
|
+
return { status: !isNotLoggedIn };
|
|
3016
|
+
}
|
|
3017
|
+
getBudgetFiscalYear(_lastModifiedAfter, _includeRemoved) {
|
|
3018
|
+
return { budget_fiscal_year: "2018" };
|
|
3019
|
+
}
|
|
3020
|
+
registerCreditCard(accessToken, cardNumber, expirationDate, cardholderName, cardVerificationNumber) {
|
|
3021
|
+
if (this.tokenExpiresIn === void 0) {
|
|
3022
|
+
return { error: "Token not initialized" };
|
|
3023
|
+
}
|
|
3024
|
+
if (this.tokenExpiresIn === 0) {
|
|
3025
|
+
return { error: "Token expired" };
|
|
3026
|
+
}
|
|
3027
|
+
if (accessToken !== this.accessToken) {
|
|
3028
|
+
return { error: "Invalid access token" };
|
|
3029
|
+
}
|
|
3030
|
+
if (cardNumber in this.creditCardList) {
|
|
3031
|
+
return { error: "Card already registered" };
|
|
3032
|
+
}
|
|
3033
|
+
const cardId = String(
|
|
3034
|
+
this._random.randInt(1e11, 999999999999)
|
|
3035
|
+
);
|
|
3036
|
+
this.creditCardList[cardId] = {
|
|
3037
|
+
card_number: cardNumber,
|
|
3038
|
+
expiration_date: expirationDate,
|
|
3039
|
+
cardholder_name: cardholderName,
|
|
3040
|
+
card_verification_number: cardVerificationNumber,
|
|
3041
|
+
balance: this._random.randInt(1e4, 99999)
|
|
3042
|
+
};
|
|
3043
|
+
return { card_id: cardId };
|
|
3044
|
+
}
|
|
3045
|
+
getFlightCost(travelFrom, travelTo, travelDate, travelClass) {
|
|
3046
|
+
const key = `${travelFrom}|${travelTo}`;
|
|
3047
|
+
const baseCost = BASE_COSTS.get(key);
|
|
3048
|
+
if (baseCost === void 0) {
|
|
3049
|
+
throw new Error("No available route for the given airports.");
|
|
3050
|
+
}
|
|
3051
|
+
let factor = 1;
|
|
3052
|
+
if (travelClass === "economy") {
|
|
3053
|
+
factor = 1;
|
|
3054
|
+
} else if (travelClass === "business") {
|
|
3055
|
+
factor = 2;
|
|
3056
|
+
} else if (travelClass === "first") {
|
|
3057
|
+
factor = 5;
|
|
3058
|
+
} else {
|
|
3059
|
+
throw new Error(
|
|
3060
|
+
"Invalid travel class. Options are: economy, business, first."
|
|
3061
|
+
);
|
|
3062
|
+
}
|
|
3063
|
+
const digitSum = travelDate.split("").filter((c) => DIGIT_REGEX.test(c)).reduce((sum, c) => sum + Number.parseInt(c, 10), 0);
|
|
3064
|
+
const travelDateMultiplier = digitSum % 2 === 0 ? 2 : 1;
|
|
3065
|
+
const travelCost = baseCost * factor * travelDateMultiplier;
|
|
3066
|
+
const travelCostList = [];
|
|
3067
|
+
if (this.longContext) {
|
|
3068
|
+
this._flightCostLookup.clear();
|
|
3069
|
+
for (const [route, base] of BASE_COSTS.entries()) {
|
|
3070
|
+
const cost = base * factor * travelDateMultiplier;
|
|
3071
|
+
const [from, to] = route.split("|");
|
|
3072
|
+
const cacheKey = `${from}|${to}|${travelClass}|${travelDate}`;
|
|
3073
|
+
this._flightCostLookup.set(cacheKey, { cost });
|
|
3074
|
+
travelCostList.push(cost);
|
|
3075
|
+
}
|
|
3076
|
+
} else {
|
|
3077
|
+
travelCostList.push(travelCost);
|
|
3078
|
+
const cacheKey = `${travelFrom}|${travelTo}|${travelClass}|${travelDate}`;
|
|
3079
|
+
this._flightCostLookup.set(cacheKey, { cost: travelCost });
|
|
3080
|
+
}
|
|
3081
|
+
return { travel_cost_list: travelCostList };
|
|
3082
|
+
}
|
|
3083
|
+
getCreditCardBalance(accessToken, cardId) {
|
|
3084
|
+
if (this.tokenExpiresIn === 0) {
|
|
3085
|
+
return { error: "Token expired" };
|
|
3086
|
+
}
|
|
3087
|
+
if (accessToken !== this.accessToken) {
|
|
3088
|
+
return { error: "Invalid access token" };
|
|
3089
|
+
}
|
|
3090
|
+
if (!(cardId in this.creditCardList)) {
|
|
3091
|
+
return {
|
|
3092
|
+
error: `Card not registered. Here are a list of card_id's: ${Object.keys(this.creditCardList).join(", ")}`
|
|
3093
|
+
};
|
|
3094
|
+
}
|
|
3095
|
+
return { card_balance: this.creditCardList[cardId].balance };
|
|
3096
|
+
}
|
|
3097
|
+
bookFlight(accessToken, cardId, travelDate, travelFrom, travelTo, travelClass) {
|
|
3098
|
+
if (this.tokenExpiresIn === 0) {
|
|
3099
|
+
return { booking_status: false, error: "Token expired" };
|
|
3100
|
+
}
|
|
3101
|
+
if (accessToken !== this.accessToken) {
|
|
3102
|
+
return { booking_status: false, error: "Invalid access token" };
|
|
3103
|
+
}
|
|
3104
|
+
if (!(cardId in this.creditCardList)) {
|
|
3105
|
+
return { booking_status: false, error: "Card not registered" };
|
|
3106
|
+
}
|
|
3107
|
+
if (!("balance" in this.creditCardList[cardId])) {
|
|
3108
|
+
return { booking_status: false, error: "Balance not found" };
|
|
3109
|
+
}
|
|
3110
|
+
const allAirports = this.listAllAirports();
|
|
3111
|
+
if (!allAirports.includes(travelFrom)) {
|
|
3112
|
+
return {
|
|
3113
|
+
booking_status: false,
|
|
3114
|
+
error: `Invalid departure airport code: ${travelFrom}`
|
|
3115
|
+
};
|
|
3116
|
+
}
|
|
3117
|
+
if (!allAirports.includes(travelTo)) {
|
|
3118
|
+
return {
|
|
3119
|
+
booking_status: false,
|
|
3120
|
+
error: `Invalid destination airport code: ${travelTo}`
|
|
3121
|
+
};
|
|
3122
|
+
}
|
|
3123
|
+
if (!DATE_FORMAT_REGEX.test(travelDate)) {
|
|
3124
|
+
return {
|
|
3125
|
+
booking_status: false,
|
|
3126
|
+
error: "Invalid date format. Use YYYY-MM-DD."
|
|
3127
|
+
};
|
|
3128
|
+
}
|
|
3129
|
+
const validClasses = /* @__PURE__ */ new Set(["economy", "business", "first"]);
|
|
3130
|
+
if (!validClasses.has(travelClass)) {
|
|
3131
|
+
return {
|
|
3132
|
+
booking_status: false,
|
|
3133
|
+
error: `Invalid travel class. Must be one of ${[...validClasses].join(", ")}`
|
|
3134
|
+
};
|
|
3135
|
+
}
|
|
3136
|
+
try {
|
|
3137
|
+
this.getFlightCost(travelFrom, travelTo, travelDate, travelClass);
|
|
3138
|
+
const cacheKey = `${travelFrom}|${travelTo}|${travelClass}|${travelDate}`;
|
|
3139
|
+
const travelCostEntry = this._flightCostLookup.get(cacheKey);
|
|
3140
|
+
if (!travelCostEntry) {
|
|
3141
|
+
return {
|
|
3142
|
+
booking_status: false,
|
|
3143
|
+
error: "No available route for the given parameters"
|
|
3144
|
+
};
|
|
3145
|
+
}
|
|
3146
|
+
const travelCost = travelCostEntry.cost;
|
|
3147
|
+
if (this.creditCardList[cardId].balance < travelCost) {
|
|
3148
|
+
return { booking_status: false, error: "Insufficient funds" };
|
|
3149
|
+
}
|
|
3150
|
+
if (this.budgetLimit !== void 0 && this.creditCardList[cardId].balance < this.budgetLimit) {
|
|
3151
|
+
return {
|
|
3152
|
+
booking_status: false,
|
|
3153
|
+
error: "Balance is less than budget limit"
|
|
3154
|
+
};
|
|
3155
|
+
}
|
|
3156
|
+
this.creditCardList[cardId].balance -= travelCost;
|
|
3157
|
+
const bookingId = String(this._random.randInt(1e6, 9999999));
|
|
3158
|
+
const transactionId = String(
|
|
3159
|
+
this._random.randInt(1e7, 99999999)
|
|
3160
|
+
);
|
|
3161
|
+
this.bookingRecord[bookingId] = {
|
|
3162
|
+
card_id: cardId,
|
|
3163
|
+
travel_date: travelDate,
|
|
3164
|
+
travel_from: travelFrom,
|
|
3165
|
+
travel_to: travelTo,
|
|
3166
|
+
travel_class: travelClass,
|
|
3167
|
+
travel_cost: travelCost,
|
|
3168
|
+
transaction_id: transactionId
|
|
3169
|
+
};
|
|
3170
|
+
if (this.longContext) {
|
|
3171
|
+
return {
|
|
3172
|
+
booking_id: bookingId,
|
|
3173
|
+
transaction_id: transactionId,
|
|
3174
|
+
booking_status: true,
|
|
3175
|
+
booking_history: this.bookingRecord
|
|
3176
|
+
};
|
|
3177
|
+
}
|
|
3178
|
+
return {
|
|
3179
|
+
booking_id: bookingId,
|
|
3180
|
+
transaction_id: transactionId,
|
|
3181
|
+
booking_status: true,
|
|
3182
|
+
booking_history: {}
|
|
3183
|
+
};
|
|
3184
|
+
} catch (e) {
|
|
3185
|
+
return { booking_status: false, error: String(e) };
|
|
3186
|
+
}
|
|
3187
|
+
}
|
|
3188
|
+
retrieveInvoice(accessToken, bookingId, _insuranceId) {
|
|
3189
|
+
if (this.tokenExpiresIn === 0) {
|
|
3190
|
+
return { error: "Token expired" };
|
|
3191
|
+
}
|
|
3192
|
+
if (accessToken !== this.accessToken) {
|
|
3193
|
+
return { error: "Invalid access token" };
|
|
3194
|
+
}
|
|
3195
|
+
if (!(bookingId && bookingId in this.bookingRecord)) {
|
|
3196
|
+
return { error: "Booking not found" };
|
|
3197
|
+
}
|
|
3198
|
+
const booking = this.bookingRecord[bookingId];
|
|
3199
|
+
return {
|
|
3200
|
+
invoice: {
|
|
3201
|
+
booking_id: bookingId,
|
|
3202
|
+
travel_date: booking.travel_date,
|
|
3203
|
+
travel_from: booking.travel_from,
|
|
3204
|
+
travel_to: booking.travel_to,
|
|
3205
|
+
travel_class: booking.travel_class,
|
|
3206
|
+
travel_cost: booking.travel_cost,
|
|
3207
|
+
transaction_id: booking.transaction_id
|
|
3208
|
+
}
|
|
3209
|
+
};
|
|
3210
|
+
}
|
|
3211
|
+
getBookingHistory(accessToken) {
|
|
3212
|
+
if (this.tokenExpiresIn === 0) {
|
|
3213
|
+
return { error: "Token expired" };
|
|
3214
|
+
}
|
|
3215
|
+
if (accessToken !== this.accessToken) {
|
|
3216
|
+
return { error: "Invalid access token" };
|
|
3217
|
+
}
|
|
3218
|
+
return { booking_history: JSON.parse(JSON.stringify(this.bookingRecord)) };
|
|
3219
|
+
}
|
|
3220
|
+
listAllAirports() {
|
|
3221
|
+
return ALL_AIRPORTS;
|
|
3222
|
+
}
|
|
3223
|
+
cancelBooking(accessToken, bookingId) {
|
|
3224
|
+
if (this.tokenExpiresIn === 0) {
|
|
3225
|
+
return { cancel_status: false, error: "Token expired" };
|
|
3226
|
+
}
|
|
3227
|
+
if (accessToken !== this.accessToken) {
|
|
3228
|
+
return { cancel_status: false, error: "Invalid access token" };
|
|
3229
|
+
}
|
|
3230
|
+
if (!(bookingId in this.bookingRecord)) {
|
|
3231
|
+
return { cancel_status: false, error: "Booking not found" };
|
|
3232
|
+
}
|
|
3233
|
+
const cardId = this.bookingRecord[bookingId].card_id;
|
|
3234
|
+
const travelCost = this.bookingRecord[bookingId].travel_cost;
|
|
3235
|
+
this.creditCardList[cardId].balance += travelCost;
|
|
3236
|
+
delete this.bookingRecord[bookingId];
|
|
3237
|
+
return { cancel_status: true };
|
|
3238
|
+
}
|
|
3239
|
+
computeExchangeRate(baseCurrency, targetCurrency, value) {
|
|
3240
|
+
const forwardKey = `${baseCurrency}|${targetCurrency}`;
|
|
3241
|
+
const reverseKey = `${targetCurrency}|${baseCurrency}`;
|
|
3242
|
+
const forwardRate = EXCHANGE_RATES.get(forwardKey);
|
|
3243
|
+
if (forwardRate !== void 0) {
|
|
3244
|
+
return { exchanged_value: value * forwardRate };
|
|
3245
|
+
}
|
|
3246
|
+
const reverseRate = EXCHANGE_RATES.get(reverseKey);
|
|
3247
|
+
if (reverseRate !== void 0) {
|
|
3248
|
+
return { exchanged_value: Math.round(value / reverseRate * 100) / 100 };
|
|
3249
|
+
}
|
|
3250
|
+
throw new Error("No available exchange rate for the given currencies.");
|
|
3251
|
+
}
|
|
3252
|
+
verifyTravelerInformation(firstName, lastName, dateOfBirth, passportNumber) {
|
|
3253
|
+
if (this.userFirstName !== firstName || this.userLastName !== lastName) {
|
|
3254
|
+
return {
|
|
3255
|
+
verification_status: false,
|
|
3256
|
+
verification_failure: `Cannot book flight information for another user. Expected ${this.userFirstName} ${this.userLastName}, got ${firstName} ${lastName}`
|
|
3257
|
+
};
|
|
3258
|
+
}
|
|
3259
|
+
if (!DATE_FORMAT_REGEX.test(dateOfBirth)) {
|
|
3260
|
+
return {
|
|
3261
|
+
verification_status: false,
|
|
3262
|
+
verification_failure: "Invalid date of birth format. Please use YYYY-MM-DD."
|
|
3263
|
+
};
|
|
3264
|
+
}
|
|
3265
|
+
const birthDate = new Date(dateOfBirth);
|
|
3266
|
+
const today = /* @__PURE__ */ new Date();
|
|
3267
|
+
let age = today.getFullYear() - birthDate.getFullYear();
|
|
3268
|
+
const monthDiff = today.getMonth() - birthDate.getMonth();
|
|
3269
|
+
if (monthDiff < 0 || monthDiff === 0 && today.getDate() < birthDate.getDate()) {
|
|
3270
|
+
age--;
|
|
3271
|
+
}
|
|
3272
|
+
if (age < 18) {
|
|
3273
|
+
return {
|
|
3274
|
+
verification_status: false,
|
|
3275
|
+
verification_failure: "Traveler must be at least 18 years old."
|
|
3276
|
+
};
|
|
3277
|
+
}
|
|
3278
|
+
if (!passportNumber.startsWith("US")) {
|
|
3279
|
+
return {
|
|
3280
|
+
verification_status: false,
|
|
3281
|
+
verification_failure: "Passport must be issued by the United States."
|
|
3282
|
+
};
|
|
3283
|
+
}
|
|
3284
|
+
return { verification_status: true };
|
|
3285
|
+
}
|
|
3286
|
+
setBudgetLimit(accessToken, budgetLimit) {
|
|
3287
|
+
if (this.tokenExpiresIn === 0) {
|
|
3288
|
+
return { error: "Token expired" };
|
|
3289
|
+
}
|
|
3290
|
+
if (accessToken !== this.accessToken) {
|
|
3291
|
+
return { error: "Invalid access token" };
|
|
3292
|
+
}
|
|
3293
|
+
this.budgetLimit = Number(budgetLimit);
|
|
3294
|
+
return { budget_limit: this.budgetLimit };
|
|
3295
|
+
}
|
|
3296
|
+
getNearestAirportByCity(location) {
|
|
3297
|
+
var _a;
|
|
3298
|
+
return { nearest_airport: (_a = AIRPORT_MAP[location]) != null ? _a : "Unknown" };
|
|
3299
|
+
}
|
|
3300
|
+
purchaseInsurance(accessToken, _insuranceType, bookingId, insuranceCost, cardId) {
|
|
3301
|
+
if (this.tokenExpiresIn === 0) {
|
|
3302
|
+
return { insurance_status: false, error: "Token expired" };
|
|
3303
|
+
}
|
|
3304
|
+
if (accessToken !== this.accessToken) {
|
|
3305
|
+
return { insurance_status: false, error: "Invalid access token" };
|
|
3306
|
+
}
|
|
3307
|
+
if (this.budgetLimit !== void 0 && this.budgetLimit < insuranceCost) {
|
|
3308
|
+
return { insurance_status: false, error: "Exceeded budget limit" };
|
|
3309
|
+
}
|
|
3310
|
+
if (!(bookingId in this.bookingRecord)) {
|
|
3311
|
+
return { insurance_status: false, error: "Booking not found" };
|
|
3312
|
+
}
|
|
3313
|
+
if (!(cardId in this.creditCardList)) {
|
|
3314
|
+
return { insurance_status: false, error: "Credit card not registered" };
|
|
3315
|
+
}
|
|
3316
|
+
this.creditCardList[cardId].balance -= insuranceCost;
|
|
3317
|
+
return {
|
|
3318
|
+
insurance_id: String(this._random.randInt(1e8, 999999999)),
|
|
3319
|
+
insurance_status: true
|
|
3320
|
+
};
|
|
3321
|
+
}
|
|
3322
|
+
contactCustomerSupport(bookingId, _message) {
|
|
3323
|
+
if (!(bookingId in this.bookingRecord)) {
|
|
3324
|
+
return { error: "Booking not found" };
|
|
3325
|
+
}
|
|
3326
|
+
return {
|
|
3327
|
+
customer_support_message: "Thank you for contacting customer support. Your message has been received and we will get back to you shortly."
|
|
3328
|
+
};
|
|
3329
|
+
}
|
|
3330
|
+
getAllCreditCards() {
|
|
3331
|
+
return { credit_card_list: this.creditCardList };
|
|
3332
|
+
}
|
|
3333
|
+
};
|
|
3334
|
+
|
|
3335
|
+
// src/multi-turn/classes/twitter-api.ts
|
|
3336
|
+
var DEFAULT_STATE5 = {
|
|
3337
|
+
username: "john",
|
|
3338
|
+
password: "john123",
|
|
3339
|
+
authenticated: false,
|
|
3340
|
+
tweets: {},
|
|
3341
|
+
comments: {},
|
|
3342
|
+
retweets: {},
|
|
3343
|
+
following_list: ["alice", "bob"],
|
|
3344
|
+
tweet_counter: 0
|
|
3345
|
+
};
|
|
3346
|
+
var TwitterAPI = class {
|
|
3347
|
+
constructor() {
|
|
3348
|
+
this.username = "john";
|
|
3349
|
+
this.password = "john123";
|
|
3350
|
+
this.authenticated = false;
|
|
3351
|
+
this.tweets = {};
|
|
3352
|
+
this.comments = {};
|
|
3353
|
+
this.retweets = {};
|
|
3354
|
+
this.followingList = [];
|
|
3355
|
+
this.tweetCounter = 0;
|
|
3356
|
+
}
|
|
3357
|
+
_loadScenario(scenario, _longContext = false) {
|
|
3358
|
+
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
3359
|
+
const defaultCopy = JSON.parse(
|
|
3360
|
+
JSON.stringify(DEFAULT_STATE5)
|
|
3361
|
+
);
|
|
3362
|
+
this.username = (_a = scenario.username) != null ? _a : defaultCopy.username;
|
|
3363
|
+
this.password = (_b = scenario.password) != null ? _b : defaultCopy.password;
|
|
3364
|
+
this.authenticated = (_c = scenario.authenticated) != null ? _c : defaultCopy.authenticated;
|
|
3365
|
+
this.tweets = (_d = scenario.tweets) != null ? _d : defaultCopy.tweets;
|
|
3366
|
+
this.tweets = Object.fromEntries(
|
|
3367
|
+
Object.entries(this.tweets).map(([k, v]) => [Number.parseInt(k, 10), v])
|
|
3368
|
+
);
|
|
3369
|
+
this.comments = (_e = scenario.comments) != null ? _e : defaultCopy.comments;
|
|
3370
|
+
this.retweets = (_f = scenario.retweets) != null ? _f : defaultCopy.retweets;
|
|
3371
|
+
this.followingList = (_g = scenario.following_list) != null ? _g : defaultCopy.following_list;
|
|
3372
|
+
this.tweetCounter = (_h = scenario.tweet_counter) != null ? _h : defaultCopy.tweet_counter;
|
|
3373
|
+
}
|
|
3374
|
+
authenticate_twitter(username, password) {
|
|
3375
|
+
if (username === this.username && password === this.password) {
|
|
3376
|
+
this.authenticated = true;
|
|
3377
|
+
return { authentication_status: true };
|
|
3378
|
+
}
|
|
3379
|
+
return { authentication_status: false };
|
|
3380
|
+
}
|
|
3381
|
+
posting_get_login_status() {
|
|
3382
|
+
return { login_status: !!this.authenticated };
|
|
3383
|
+
}
|
|
3384
|
+
post_tweet(content, tags = [], mentions = []) {
|
|
3385
|
+
if (!this.authenticated) {
|
|
3386
|
+
return {
|
|
3387
|
+
error: "User not authenticated. Please authenticate before posting."
|
|
3388
|
+
};
|
|
3389
|
+
}
|
|
3390
|
+
const tweet = {
|
|
3391
|
+
id: this.tweetCounter,
|
|
3392
|
+
username: this.username,
|
|
3393
|
+
content,
|
|
3394
|
+
tags,
|
|
3395
|
+
mentions
|
|
3396
|
+
};
|
|
3397
|
+
this.tweets[this.tweetCounter] = tweet;
|
|
3398
|
+
this.tweetCounter += 1;
|
|
3399
|
+
return tweet;
|
|
3400
|
+
}
|
|
3401
|
+
retweet(tweetId) {
|
|
3402
|
+
if (!this.authenticated) {
|
|
3403
|
+
return {
|
|
3404
|
+
error: "User not authenticated. Please authenticate before retweeting."
|
|
3405
|
+
};
|
|
3406
|
+
}
|
|
3407
|
+
if (!(tweetId in this.tweets)) {
|
|
3408
|
+
return { error: `Tweet with ID ${tweetId} not found.` };
|
|
3409
|
+
}
|
|
3410
|
+
if (!(this.username in this.retweets)) {
|
|
3411
|
+
this.retweets[this.username] = [];
|
|
3412
|
+
}
|
|
3413
|
+
if (this.retweets[this.username].includes(tweetId)) {
|
|
3414
|
+
return { retweet_status: "Already retweeted" };
|
|
3415
|
+
}
|
|
3416
|
+
this.retweets[this.username].push(tweetId);
|
|
3417
|
+
return { retweet_status: "Successfully retweeted" };
|
|
3418
|
+
}
|
|
3419
|
+
comment(tweetId, commentContent) {
|
|
3420
|
+
if (!this.authenticated) {
|
|
3421
|
+
return {
|
|
3422
|
+
error: "User not authenticated. Please authenticate before commenting."
|
|
3423
|
+
};
|
|
3424
|
+
}
|
|
3425
|
+
if (!(tweetId in this.tweets)) {
|
|
3426
|
+
return { error: `Tweet with ID ${tweetId} not found.` };
|
|
3427
|
+
}
|
|
3428
|
+
if (!(tweetId in this.comments)) {
|
|
3429
|
+
this.comments[tweetId] = [];
|
|
3430
|
+
}
|
|
3431
|
+
this.comments[tweetId].push({
|
|
3432
|
+
username: this.username,
|
|
3433
|
+
content: commentContent
|
|
3434
|
+
});
|
|
3435
|
+
return { comment_status: "Comment added successfully" };
|
|
3436
|
+
}
|
|
3437
|
+
mention(tweetId, mentionedUsernames) {
|
|
3438
|
+
if (!(tweetId in this.tweets)) {
|
|
3439
|
+
return { error: `Tweet with ID ${tweetId} not found.` };
|
|
3440
|
+
}
|
|
3441
|
+
const tweet = this.tweets[tweetId];
|
|
3442
|
+
tweet.mentions.push(...mentionedUsernames);
|
|
3443
|
+
return { mention_status: "Users mentioned successfully" };
|
|
3444
|
+
}
|
|
3445
|
+
follow_user(username_to_follow) {
|
|
3446
|
+
if (!this.authenticated) {
|
|
3447
|
+
return {
|
|
3448
|
+
error: "User not authenticated. Please authenticate before following."
|
|
3449
|
+
};
|
|
3450
|
+
}
|
|
3451
|
+
if (this.followingList.includes(username_to_follow)) {
|
|
3452
|
+
return { follow_status: false };
|
|
3453
|
+
}
|
|
3454
|
+
this.followingList.push(username_to_follow);
|
|
3455
|
+
return { follow_status: true };
|
|
3456
|
+
}
|
|
3457
|
+
list_all_following() {
|
|
3458
|
+
if (!this.authenticated) {
|
|
3459
|
+
return {
|
|
3460
|
+
error: "User not authenticated. Please authenticate before listing following."
|
|
3461
|
+
};
|
|
3462
|
+
}
|
|
3463
|
+
return { following_list: this.followingList };
|
|
3464
|
+
}
|
|
3465
|
+
unfollow_user(username_to_unfollow) {
|
|
3466
|
+
if (!this.authenticated) {
|
|
3467
|
+
return {
|
|
3468
|
+
error: "User not authenticated. Please authenticate before unfollowing."
|
|
3469
|
+
};
|
|
3470
|
+
}
|
|
3471
|
+
if (!this.followingList.includes(username_to_unfollow)) {
|
|
3472
|
+
return { unfollow_status: false };
|
|
3473
|
+
}
|
|
3474
|
+
this.followingList = this.followingList.filter(
|
|
3475
|
+
(u) => u !== username_to_unfollow
|
|
3476
|
+
);
|
|
3477
|
+
return { unfollow_status: true };
|
|
3478
|
+
}
|
|
3479
|
+
get_tweet(tweet_id) {
|
|
3480
|
+
if (!(tweet_id in this.tweets)) {
|
|
3481
|
+
return { error: `Tweet with ID ${tweet_id} not found.` };
|
|
3482
|
+
}
|
|
3483
|
+
return this.tweets[tweet_id];
|
|
3484
|
+
}
|
|
3485
|
+
get_user_tweets(username) {
|
|
3486
|
+
return Object.values(this.tweets).filter(
|
|
3487
|
+
(tweet) => tweet.username === username
|
|
3488
|
+
);
|
|
3489
|
+
}
|
|
3490
|
+
search_tweets(keyword) {
|
|
3491
|
+
const keywordLower = keyword.toLowerCase();
|
|
3492
|
+
return Object.values(this.tweets).filter(
|
|
3493
|
+
(tweet) => tweet.content.toLowerCase().includes(keywordLower) || tweet.tags.some(
|
|
3494
|
+
(tag) => tag.toLowerCase().includes(keywordLower)
|
|
3495
|
+
)
|
|
3496
|
+
);
|
|
3497
|
+
}
|
|
3498
|
+
get_tweet_comments(tweet_id) {
|
|
3499
|
+
if (!(tweet_id in this.tweets)) {
|
|
3500
|
+
return [{ error: `Tweet with ID ${tweet_id} not found.` }];
|
|
3501
|
+
}
|
|
3502
|
+
return this.comments[tweet_id] || [];
|
|
3503
|
+
}
|
|
3504
|
+
get_user_stats(username) {
|
|
3505
|
+
const tweetCount = Object.values(this.tweets).filter(
|
|
3506
|
+
(tweet) => tweet.username === username
|
|
3507
|
+
).length;
|
|
3508
|
+
const followingCount = username === this.username ? this.followingList.length : 0;
|
|
3509
|
+
const retweetCount = (this.retweets[username] || []).length;
|
|
3510
|
+
return {
|
|
3511
|
+
tweet_count: tweetCount,
|
|
3512
|
+
following_count: followingCount,
|
|
3513
|
+
retweet_count: retweetCount
|
|
3514
|
+
};
|
|
3515
|
+
}
|
|
3516
|
+
};
|
|
3517
|
+
|
|
3518
|
+
// src/multi-turn/classes/vehicle-control-api.ts
|
|
3519
|
+
var MAX_FUEL_LEVEL = 50;
|
|
3520
|
+
var MIN_FUEL_LEVEL = 0;
|
|
3521
|
+
var MILE_PER_GALLON = 20;
|
|
3522
|
+
var DEFAULT_STATE6 = {
|
|
3523
|
+
random_seed: 141053,
|
|
3524
|
+
fuelLevel: 0,
|
|
3525
|
+
batteryVoltage: 12.6,
|
|
3526
|
+
engineState: "stopped",
|
|
3527
|
+
remainingUnlockedDoors: 4,
|
|
3528
|
+
doorStatus: {
|
|
3529
|
+
driver: "unlocked",
|
|
3530
|
+
passenger: "unlocked",
|
|
3531
|
+
rear_left: "unlocked",
|
|
3532
|
+
rear_right: "unlocked"
|
|
3533
|
+
},
|
|
3534
|
+
acTemperature: 25,
|
|
3535
|
+
fanSpeed: 50,
|
|
3536
|
+
acMode: "auto",
|
|
3537
|
+
humidityLevel: 50,
|
|
3538
|
+
headLightStatus: "off",
|
|
3539
|
+
parkingBrakeStatus: "released",
|
|
3540
|
+
parkingBrakeForce: 0,
|
|
3541
|
+
slopeAngle: 0,
|
|
3542
|
+
brakePedalStatus: "released",
|
|
3543
|
+
brakePedalForce: 0,
|
|
3544
|
+
distanceToNextVehicle: 50,
|
|
3545
|
+
cruiseStatus: "inactive",
|
|
3546
|
+
destination: "None",
|
|
3547
|
+
frontLeftTirePressure: 32,
|
|
3548
|
+
frontRightTirePressure: 32,
|
|
3549
|
+
rearLeftTirePressure: 30,
|
|
3550
|
+
rearRightTirePressure: 30
|
|
3551
|
+
};
|
|
3552
|
+
var SeededRandom3 = class {
|
|
3553
|
+
constructor(seed) {
|
|
3554
|
+
this.seed = seed;
|
|
3555
|
+
}
|
|
3556
|
+
uniform(min, max) {
|
|
3557
|
+
this.seed = (this.seed * 9301 + 49297) % 233280;
|
|
3558
|
+
const rnd = this.seed / 233280;
|
|
3559
|
+
return min + rnd * (max - min);
|
|
3560
|
+
}
|
|
3561
|
+
};
|
|
3562
|
+
var VehicleControlAPI = class _VehicleControlAPI {
|
|
3563
|
+
constructor() {
|
|
3564
|
+
this.longContext = false;
|
|
3565
|
+
this._apiDescription = "This tool belongs to the vehicle control system, which allows users to control various aspects of the car such as engine, doors, climate control, lights, and more.";
|
|
3566
|
+
this.fuelLevel = 0;
|
|
3567
|
+
this.batteryVoltage = 12.6;
|
|
3568
|
+
this.engineState = "stopped";
|
|
3569
|
+
this.remainingUnlockedDoors = 4;
|
|
3570
|
+
this.doorStatus = {
|
|
3571
|
+
driver: "unlocked",
|
|
3572
|
+
passenger: "unlocked",
|
|
3573
|
+
rear_left: "unlocked",
|
|
3574
|
+
rear_right: "unlocked"
|
|
3575
|
+
};
|
|
3576
|
+
this.acTemperature = 25;
|
|
3577
|
+
this.fanSpeed = 50;
|
|
3578
|
+
this.acMode = "auto";
|
|
3579
|
+
this.humidityLevel = 50;
|
|
3580
|
+
this.headLightStatus = "off";
|
|
3581
|
+
this.parkingBrakeStatus = "released";
|
|
3582
|
+
this._parkingBrakeForce = 0;
|
|
3583
|
+
this._slopeAngle = 0;
|
|
3584
|
+
this.brakePedalStatus = "released";
|
|
3585
|
+
this._brakePedalForce = 0;
|
|
3586
|
+
this.distanceToNextVehicle = 50;
|
|
3587
|
+
this.cruiseStatus = "inactive";
|
|
3588
|
+
this.destination = "None";
|
|
3589
|
+
this.frontLeftTirePressure = 32;
|
|
3590
|
+
this.frontRightTirePressure = 32;
|
|
3591
|
+
this.rearLeftTirePressure = 30;
|
|
3592
|
+
this.rearRightTirePressure = 30;
|
|
3593
|
+
this._random = new SeededRandom3(141053);
|
|
3594
|
+
}
|
|
3595
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Scenario loading requires many field initializations
|
|
3596
|
+
_loadScenario(scenario, longContext = false) {
|
|
3597
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v;
|
|
3598
|
+
const defaultCopy = JSON.parse(JSON.stringify(DEFAULT_STATE6));
|
|
3599
|
+
this._random = new SeededRandom3(
|
|
3600
|
+
(_a = scenario.random_seed) != null ? _a : defaultCopy.random_seed
|
|
3601
|
+
);
|
|
3602
|
+
this.fuelLevel = (_b = scenario.fuelLevel) != null ? _b : defaultCopy.fuelLevel;
|
|
3603
|
+
this.batteryVoltage = (_c = scenario.batteryVoltage) != null ? _c : defaultCopy.batteryVoltage;
|
|
3604
|
+
this.engineState = (_d = scenario.engineState) != null ? _d : defaultCopy.engineState;
|
|
3605
|
+
this.doorStatus = (_e = scenario.doorStatus) != null ? _e : defaultCopy.doorStatus;
|
|
3606
|
+
this.remainingUnlockedDoors = 4 - Object.values(this.doorStatus).filter((status) => status === "locked").length;
|
|
3607
|
+
this.acTemperature = (_f = scenario.acTemperature) != null ? _f : defaultCopy.acTemperature;
|
|
3608
|
+
this.fanSpeed = (_g = scenario.fanSpeed) != null ? _g : defaultCopy.fanSpeed;
|
|
3609
|
+
this.acMode = (_h = scenario.acMode) != null ? _h : defaultCopy.acMode;
|
|
3610
|
+
this.humidityLevel = (_i = scenario.humidityLevel) != null ? _i : defaultCopy.humidityLevel;
|
|
3611
|
+
this.headLightStatus = (_j = scenario.headLightStatus) != null ? _j : defaultCopy.headLightStatus;
|
|
3612
|
+
this.parkingBrakeStatus = (_k = scenario.parkingBrakeStatus) != null ? _k : defaultCopy.parkingBrakeStatus;
|
|
3613
|
+
this._parkingBrakeForce = (_l = scenario.parkingBrakeForce) != null ? _l : defaultCopy.parkingBrakeForce;
|
|
3614
|
+
this._slopeAngle = (_m = scenario.slopeAngle) != null ? _m : defaultCopy.slopeAngle;
|
|
3615
|
+
this.brakePedalStatus = (_n = scenario.brakePedalStatus) != null ? _n : defaultCopy.brakePedalStatus;
|
|
3616
|
+
this._brakePedalForce = (_o = scenario.brakePedalForce) != null ? _o : defaultCopy.brakePedalForce;
|
|
3617
|
+
this.distanceToNextVehicle = (_p = scenario.distanceToNextVehicle) != null ? _p : defaultCopy.distanceToNextVehicle;
|
|
3618
|
+
this.cruiseStatus = (_q = scenario.cruiseStatus) != null ? _q : defaultCopy.cruiseStatus;
|
|
3619
|
+
this.destination = (_r = scenario.destination) != null ? _r : defaultCopy.destination;
|
|
3620
|
+
this.frontLeftTirePressure = (_s = scenario.frontLeftTirePressure) != null ? _s : defaultCopy.frontLeftTirePressure;
|
|
3621
|
+
this.frontRightTirePressure = (_t = scenario.frontRightTirePressure) != null ? _t : defaultCopy.frontRightTirePressure;
|
|
3622
|
+
this.rearLeftTirePressure = (_u = scenario.rearLeftTirePressure) != null ? _u : defaultCopy.rearLeftTirePressure;
|
|
3623
|
+
this.rearRightTirePressure = (_v = scenario.rearRightTirePressure) != null ? _v : defaultCopy.rearRightTirePressure;
|
|
3624
|
+
this.longContext = longContext;
|
|
3625
|
+
}
|
|
3626
|
+
equals(other) {
|
|
3627
|
+
if (!(other instanceof _VehicleControlAPI)) {
|
|
3628
|
+
return false;
|
|
3629
|
+
}
|
|
3630
|
+
for (const attrName of Object.keys(this)) {
|
|
3631
|
+
if (attrName.startsWith("_")) {
|
|
3632
|
+
continue;
|
|
3633
|
+
}
|
|
3634
|
+
const modelAttr = this[attrName];
|
|
3635
|
+
const groundTruthAttr = other[attrName];
|
|
3636
|
+
if (JSON.stringify(modelAttr) !== JSON.stringify(groundTruthAttr)) {
|
|
3637
|
+
return false;
|
|
3638
|
+
}
|
|
3639
|
+
}
|
|
3640
|
+
return true;
|
|
3641
|
+
}
|
|
3642
|
+
startEngine(ignitionMode) {
|
|
3643
|
+
if (ignitionMode === "STOP") {
|
|
3644
|
+
this.engineState = "stopped";
|
|
3645
|
+
}
|
|
3646
|
+
if (this.remainingUnlockedDoors > 0) {
|
|
3647
|
+
const unlockedDoors = Object.entries(this.doorStatus).filter(([, status]) => status === "unlocked").map(([door]) => door).join(", ");
|
|
3648
|
+
return {
|
|
3649
|
+
error: `All doors must be locked before starting the engine. Here are the unlocked doors: ${unlockedDoors}`
|
|
3650
|
+
};
|
|
3651
|
+
}
|
|
3652
|
+
if (this.brakePedalStatus !== "pressed") {
|
|
3653
|
+
return {
|
|
3654
|
+
error: "Brake pedal needs to be pressed when starting the engine."
|
|
3655
|
+
};
|
|
3656
|
+
}
|
|
3657
|
+
if (this._brakePedalForce !== 1e3) {
|
|
3658
|
+
return {
|
|
3659
|
+
error: "Must press the brake fully before starting the engine."
|
|
3660
|
+
};
|
|
3661
|
+
}
|
|
3662
|
+
if (this.fuelLevel < MIN_FUEL_LEVEL) {
|
|
3663
|
+
return { error: "Fuel tank is empty." };
|
|
3664
|
+
}
|
|
3665
|
+
if (ignitionMode === "START") {
|
|
3666
|
+
this.engineState = "running";
|
|
3667
|
+
} else {
|
|
3668
|
+
return { error: "Invalid ignition mode." };
|
|
3669
|
+
}
|
|
3670
|
+
return {
|
|
3671
|
+
engineState: this.engineState,
|
|
3672
|
+
fuelLevel: this.fuelLevel,
|
|
3673
|
+
batteryVoltage: this.batteryVoltage
|
|
3674
|
+
};
|
|
3675
|
+
}
|
|
3676
|
+
fillFuelTank(fuelAmount) {
|
|
3677
|
+
if (fuelAmount < 0) {
|
|
3678
|
+
return { error: "Fuel amount cannot be negative." };
|
|
3679
|
+
}
|
|
3680
|
+
if (this.fuelLevel + fuelAmount > MAX_FUEL_LEVEL) {
|
|
3681
|
+
return { error: "Cannot fill gas above the tank capacity." };
|
|
3682
|
+
}
|
|
3683
|
+
if (this.fuelLevel + fuelAmount < MIN_FUEL_LEVEL) {
|
|
3684
|
+
return { error: "Fuel tank is empty. Min fuel level is 0 gallons." };
|
|
3685
|
+
}
|
|
3686
|
+
this.fuelLevel += fuelAmount;
|
|
3687
|
+
return { fuelLevel: this.fuelLevel };
|
|
3688
|
+
}
|
|
3689
|
+
lockDoors(unlock, door) {
|
|
3690
|
+
if (unlock) {
|
|
3691
|
+
for (const d of door) {
|
|
3692
|
+
if (this.doorStatus[d] === "unlocked") {
|
|
3693
|
+
continue;
|
|
3694
|
+
}
|
|
3695
|
+
this.doorStatus[d] = "unlocked";
|
|
3696
|
+
this.remainingUnlockedDoors += 1;
|
|
3697
|
+
}
|
|
3698
|
+
return {
|
|
3699
|
+
lockStatus: "unlocked",
|
|
3700
|
+
remainingUnlockedDoors: this.remainingUnlockedDoors
|
|
3701
|
+
};
|
|
3702
|
+
}
|
|
3703
|
+
for (const d of door) {
|
|
3704
|
+
if (this.doorStatus[d] === "locked") {
|
|
3705
|
+
continue;
|
|
3706
|
+
}
|
|
3707
|
+
this.doorStatus[d] = "locked";
|
|
3708
|
+
this.remainingUnlockedDoors -= 1;
|
|
3709
|
+
}
|
|
3710
|
+
return {
|
|
3711
|
+
lockStatus: "locked",
|
|
3712
|
+
remainingUnlockedDoors: this.remainingUnlockedDoors
|
|
3713
|
+
};
|
|
3714
|
+
}
|
|
3715
|
+
adjustClimateControl(temperature, unit = "celsius", fanSpeed = 50, mode = "auto") {
|
|
3716
|
+
if (fanSpeed < 0 || fanSpeed > 100) {
|
|
3717
|
+
return { error: "Fan speed must be between 0 and 100." };
|
|
3718
|
+
}
|
|
3719
|
+
this.acTemperature = temperature;
|
|
3720
|
+
if (unit === "fahrenheit") {
|
|
3721
|
+
this.acTemperature = (temperature - 32) * 5 / 9;
|
|
3722
|
+
}
|
|
3723
|
+
this.fanSpeed = fanSpeed;
|
|
3724
|
+
this.acMode = mode;
|
|
3725
|
+
return {
|
|
3726
|
+
currentACTemperature: temperature,
|
|
3727
|
+
climateMode: mode,
|
|
3728
|
+
humidityLevel: this.humidityLevel
|
|
3729
|
+
};
|
|
3730
|
+
}
|
|
3731
|
+
get_outside_temperature_from_google() {
|
|
3732
|
+
return { outsideTemperature: this._random.uniform(-10, 40) };
|
|
3733
|
+
}
|
|
3734
|
+
get_outside_temperature_from_weather_com() {
|
|
3735
|
+
return { error: 404 };
|
|
3736
|
+
}
|
|
3737
|
+
setHeadlights(mode) {
|
|
3738
|
+
if (!["on", "off", "auto"].includes(mode)) {
|
|
3739
|
+
return { error: "Invalid headlight mode." };
|
|
3740
|
+
}
|
|
3741
|
+
this.headLightStatus = mode === "on" ? "on" : "off";
|
|
3742
|
+
return { headlightStatus: this.headLightStatus };
|
|
3743
|
+
}
|
|
3744
|
+
displayCarStatus(option) {
|
|
3745
|
+
const status = {};
|
|
3746
|
+
if (option === "fuel") {
|
|
3747
|
+
status.fuelLevel = this.fuelLevel;
|
|
3748
|
+
} else if (option === "battery") {
|
|
3749
|
+
status.batteryVoltage = this.batteryVoltage;
|
|
3750
|
+
} else if (option === "doors") {
|
|
3751
|
+
status.doorStatus = this.doorStatus;
|
|
3752
|
+
} else if (option === "climate") {
|
|
3753
|
+
status.currentACTemperature = this.acTemperature;
|
|
3754
|
+
status.fanSpeed = this.fanSpeed;
|
|
3755
|
+
status.climateMode = this.acMode;
|
|
3756
|
+
status.humidityLevel = this.humidityLevel;
|
|
3757
|
+
} else if (option === "headlights") {
|
|
3758
|
+
status.headlightStatus = this.headLightStatus;
|
|
3759
|
+
} else if (option === "parkingBrake") {
|
|
3760
|
+
status.parkingBrakeStatus = this.parkingBrakeStatus;
|
|
3761
|
+
status.parkingBrakeForce = this._parkingBrakeForce;
|
|
3762
|
+
status.slopeAngle = this._slopeAngle;
|
|
3763
|
+
} else if (option === "brakePedal") {
|
|
3764
|
+
status.brakePedalStatus = this.brakePedalStatus;
|
|
3765
|
+
status.brakePedalForce = this._brakePedalForce;
|
|
3766
|
+
} else if (option === "engine") {
|
|
3767
|
+
status.engineState = this.engineState;
|
|
3768
|
+
} else {
|
|
3769
|
+
status.error = "Invalid option";
|
|
3770
|
+
}
|
|
3771
|
+
return status;
|
|
3772
|
+
}
|
|
3773
|
+
activateParkingBrake(mode) {
|
|
3774
|
+
if (!["engage", "release"].includes(mode)) {
|
|
3775
|
+
return { error: "Invalid mode" };
|
|
3776
|
+
}
|
|
3777
|
+
if (mode === "engage") {
|
|
3778
|
+
this.parkingBrakeStatus = "engaged";
|
|
3779
|
+
this._parkingBrakeForce = 500;
|
|
3780
|
+
this._slopeAngle = 10;
|
|
3781
|
+
return {
|
|
3782
|
+
parkingBrakeStatus: "engaged",
|
|
3783
|
+
_parkingBrakeForce: 500,
|
|
3784
|
+
_slopeAngle: 10
|
|
3785
|
+
};
|
|
3786
|
+
}
|
|
3787
|
+
this.parkingBrakeStatus = "released";
|
|
3788
|
+
this._parkingBrakeForce = 0;
|
|
3789
|
+
this._slopeAngle = 10;
|
|
3790
|
+
return {
|
|
3791
|
+
parkingBrakeStatus: "released",
|
|
3792
|
+
_parkingBrakeForce: 0,
|
|
3793
|
+
_slopeAngle: 10
|
|
3794
|
+
};
|
|
3795
|
+
}
|
|
3796
|
+
pressBrakePedal(pedalPosition) {
|
|
3797
|
+
if (pedalPosition < 0 || pedalPosition > 1) {
|
|
3798
|
+
return { error: "Pedal position must be between 0 and 1." };
|
|
3799
|
+
}
|
|
3800
|
+
if (pedalPosition === 0) {
|
|
3801
|
+
this.brakePedalStatus = "released";
|
|
3802
|
+
this._brakePedalForce = 0;
|
|
3803
|
+
return { brakePedalStatus: "released", brakePedalForce: 0 };
|
|
3804
|
+
}
|
|
3805
|
+
const maxBrakeForce = 1e3;
|
|
3806
|
+
const force = pedalPosition * maxBrakeForce;
|
|
3807
|
+
this.brakePedalStatus = "pressed";
|
|
3808
|
+
this._brakePedalForce = force;
|
|
3809
|
+
return { brakePedalStatus: "pressed", brakePedalForce: force };
|
|
3810
|
+
}
|
|
3811
|
+
releaseBrakePedal() {
|
|
3812
|
+
this.brakePedalStatus = "released";
|
|
3813
|
+
this._brakePedalForce = 0;
|
|
3814
|
+
return { brakePedalStatus: "released", brakePedalForce: 0 };
|
|
3815
|
+
}
|
|
3816
|
+
setCruiseControl(speed, activate, distanceToNextVehicle) {
|
|
3817
|
+
const dist = Number(distanceToNextVehicle);
|
|
3818
|
+
const spd = Number(speed);
|
|
3819
|
+
if (this.engineState === "stopped") {
|
|
3820
|
+
return {
|
|
3821
|
+
error: "Start the engine before activating the cruise control."
|
|
3822
|
+
};
|
|
3823
|
+
}
|
|
3824
|
+
if (activate) {
|
|
3825
|
+
this.distanceToNextVehicle = dist;
|
|
3826
|
+
if (spd < 0 || spd > 120 || spd % 5 !== 0) {
|
|
3827
|
+
return { error: "Invalid speed" };
|
|
3828
|
+
}
|
|
3829
|
+
this.cruiseStatus = "active";
|
|
3830
|
+
return {
|
|
3831
|
+
cruiseStatus: "active",
|
|
3832
|
+
currentSpeed: spd,
|
|
3833
|
+
distanceToNextVehicle: dist
|
|
3834
|
+
};
|
|
3835
|
+
}
|
|
3836
|
+
this.cruiseStatus = "inactive";
|
|
3837
|
+
this.distanceToNextVehicle = dist;
|
|
3838
|
+
return {
|
|
3839
|
+
cruiseStatus: "inactive",
|
|
3840
|
+
currentSpeed: spd,
|
|
3841
|
+
distanceToNextVehicle: dist
|
|
3842
|
+
};
|
|
3843
|
+
}
|
|
3844
|
+
get_current_speed() {
|
|
3845
|
+
return { currentSpeed: this._random.uniform(0, 120) };
|
|
3846
|
+
}
|
|
3847
|
+
display_log(messages) {
|
|
3848
|
+
return { log: messages };
|
|
3849
|
+
}
|
|
3850
|
+
estimate_drive_feasibility_by_mileage(distance) {
|
|
3851
|
+
if (this.fuelLevel * MILE_PER_GALLON < distance) {
|
|
3852
|
+
return { canDrive: false };
|
|
3853
|
+
}
|
|
3854
|
+
return { canDrive: true };
|
|
3855
|
+
}
|
|
3856
|
+
liter_to_gallon(liter) {
|
|
3857
|
+
return { gallon: liter * 0.264172 };
|
|
3858
|
+
}
|
|
3859
|
+
gallon_to_liter(gallon) {
|
|
3860
|
+
return { liter: gallon * 3.78541 };
|
|
3861
|
+
}
|
|
3862
|
+
estimate_distance(cityA, cityB) {
|
|
3863
|
+
const distances = {
|
|
3864
|
+
"83214-74532": 750,
|
|
3865
|
+
"56108-62947": 320,
|
|
3866
|
+
"71354-83462": 450,
|
|
3867
|
+
"47329-52013": 290,
|
|
3868
|
+
"69238-51479": 630,
|
|
3869
|
+
"94016-83214": 980,
|
|
3870
|
+
"94016-94704": 600,
|
|
3871
|
+
"94704-08540": 2550,
|
|
3872
|
+
"94016-08540": 1950,
|
|
3873
|
+
"62947-47329": 1053,
|
|
3874
|
+
"94016-62947": 780,
|
|
3875
|
+
"74532-94016": 880
|
|
3876
|
+
};
|
|
3877
|
+
const key1 = `${cityA}-${cityB}`;
|
|
3878
|
+
const key2 = `${cityB}-${cityA}`;
|
|
3879
|
+
if (distances[key1] !== void 0) {
|
|
3880
|
+
return { distance: distances[key1] };
|
|
3881
|
+
}
|
|
3882
|
+
if (distances[key2] !== void 0) {
|
|
3883
|
+
return { distance: distances[key2] };
|
|
3884
|
+
}
|
|
3885
|
+
return { error: "distance not found in database." };
|
|
3886
|
+
}
|
|
3887
|
+
get_zipcode_based_on_city(city) {
|
|
3888
|
+
var _a;
|
|
3889
|
+
const zipcodes = {
|
|
3890
|
+
Rivermist: "83214",
|
|
3891
|
+
Stonebrook: "74532",
|
|
3892
|
+
Maplecrest: "56108",
|
|
3893
|
+
Silverpine: "62947",
|
|
3894
|
+
Shadowridge: "71354",
|
|
3895
|
+
"Sunset Valley": "83462",
|
|
3896
|
+
Oakendale: "47329",
|
|
3897
|
+
Willowbend: "52013",
|
|
3898
|
+
"Crescent Hollow": "69238",
|
|
3899
|
+
Autumnville: "51479",
|
|
3900
|
+
"San Francisco": "94016"
|
|
3901
|
+
};
|
|
3902
|
+
return { zipcode: (_a = zipcodes[city]) != null ? _a : "00000" };
|
|
3903
|
+
}
|
|
3904
|
+
set_navigation(destination) {
|
|
3905
|
+
this.destination = destination;
|
|
3906
|
+
return { status: `Navigating to ${destination}` };
|
|
3907
|
+
}
|
|
3908
|
+
check_tire_pressure() {
|
|
3909
|
+
const avgPressure = (this.frontLeftTirePressure + this.frontRightTirePressure + this.rearLeftTirePressure + this.rearRightTirePressure) / 4;
|
|
3910
|
+
const healthyTirePressure = avgPressure >= 30 && avgPressure <= 35;
|
|
3911
|
+
return {
|
|
3912
|
+
frontLeftTirePressure: this.frontLeftTirePressure,
|
|
3913
|
+
frontRightTirePressure: this.frontRightTirePressure,
|
|
3914
|
+
rearLeftTirePressure: this.rearLeftTirePressure,
|
|
3915
|
+
rearRightTirePressure: this.rearRightTirePressure,
|
|
3916
|
+
healthy_tire_pressure: healthyTirePressure,
|
|
3917
|
+
car_info: {}
|
|
3918
|
+
};
|
|
3919
|
+
}
|
|
3920
|
+
find_nearest_tire_shop() {
|
|
3921
|
+
return { shopLocation: "456 Oakwood Avenue, Rivermist, 83214" };
|
|
3922
|
+
}
|
|
3923
|
+
};
|
|
3924
|
+
|
|
3925
|
+
// src/multi-turn/constants.ts
|
|
3926
|
+
var CLASS_NAME_TO_CLASS = {
|
|
3927
|
+
MathAPI,
|
|
3928
|
+
MessageAPI,
|
|
3929
|
+
TicketAPI,
|
|
3930
|
+
TwitterAPI,
|
|
3931
|
+
TravelAPI,
|
|
3932
|
+
TradingBot,
|
|
3933
|
+
VehicleControlAPI,
|
|
3934
|
+
GorillaFileSystem
|
|
3935
|
+
};
|
|
3936
|
+
var STATELESS_CLASSES = /* @__PURE__ */ new Set(["MathAPI"]);
|
|
3937
|
+
|
|
3938
|
+
// src/multi-turn/method-registry.ts
|
|
3939
|
+
var MethodRegistry = class {
|
|
3940
|
+
// method_name -> instance_key
|
|
3941
|
+
constructor() {
|
|
3942
|
+
this.instances = {};
|
|
3943
|
+
this.methodMapping = {};
|
|
3944
|
+
for (const className of STATELESS_CLASSES) {
|
|
3945
|
+
const ClassConstructor = CLASS_NAME_TO_CLASS[className];
|
|
3946
|
+
if (ClassConstructor) {
|
|
3947
|
+
const instance = new ClassConstructor();
|
|
3948
|
+
const instanceKey = `${className}_stateless_instance`;
|
|
3949
|
+
this.instances[instanceKey] = instance;
|
|
3950
|
+
const methods = Object.getOwnPropertyNames(
|
|
3951
|
+
Object.getPrototypeOf(instance)
|
|
3952
|
+
).filter(
|
|
3953
|
+
(name) => typeof instance[name] === "function" && name !== "constructor"
|
|
3954
|
+
);
|
|
3955
|
+
for (const methodName of methods) {
|
|
3956
|
+
this.methodMapping[methodName] = instanceKey;
|
|
3957
|
+
}
|
|
3958
|
+
}
|
|
3959
|
+
}
|
|
3960
|
+
}
|
|
3961
|
+
getOrCreateInstance(className, testEntryId, modelName, scenario, longContext = false, isEvalRun = false) {
|
|
3962
|
+
const instanceKey = `${modelName}_${testEntryId}_${className}_${isEvalRun ? "eval" : "model"}_instance`;
|
|
3963
|
+
if (!this.instances[instanceKey]) {
|
|
3964
|
+
const ClassConstructor = CLASS_NAME_TO_CLASS[className];
|
|
3965
|
+
if (!ClassConstructor) {
|
|
3966
|
+
throw new Error(`Unknown class: ${className}`);
|
|
3967
|
+
}
|
|
3968
|
+
const instance = new ClassConstructor();
|
|
3969
|
+
if (!STATELESS_CLASSES.has(className) && typeof instance._loadScenario === "function") {
|
|
3970
|
+
instance._loadScenario(scenario, longContext);
|
|
3971
|
+
}
|
|
3972
|
+
this.instances[instanceKey] = instance;
|
|
3973
|
+
const methods = Object.getOwnPropertyNames(
|
|
3974
|
+
Object.getPrototypeOf(instance)
|
|
3975
|
+
).filter(
|
|
3976
|
+
(name) => typeof instance[name] === "function" && name !== "constructor" && !name.startsWith("_")
|
|
3977
|
+
);
|
|
3978
|
+
for (const methodName of methods) {
|
|
3979
|
+
this.methodMapping[methodName] = instanceKey;
|
|
3980
|
+
}
|
|
3981
|
+
}
|
|
3982
|
+
return this.instances[instanceKey];
|
|
3983
|
+
}
|
|
3984
|
+
// biome-ignore lint/suspicious/noExplicitAny: Returns dynamically typed class instance
|
|
3985
|
+
getInstanceByMethod(methodName) {
|
|
3986
|
+
const instanceKey = this.methodMapping[methodName];
|
|
3987
|
+
if (!instanceKey) {
|
|
3988
|
+
throw new Error(`Method not found: ${methodName}`);
|
|
3989
|
+
}
|
|
3990
|
+
return this.instances[instanceKey];
|
|
3991
|
+
}
|
|
3992
|
+
// Get all instances for a specific test case
|
|
3993
|
+
getInstancesForTest(testEntryId, modelName) {
|
|
3994
|
+
const result = {};
|
|
3995
|
+
for (const [key, instance] of Object.entries(this.instances)) {
|
|
3996
|
+
if (key.includes(`${modelName}_${testEntryId}`)) {
|
|
3997
|
+
result[key] = instance;
|
|
3998
|
+
}
|
|
3999
|
+
}
|
|
4000
|
+
return result;
|
|
4001
|
+
}
|
|
4002
|
+
// Clear instances for a specific test case
|
|
4003
|
+
clearInstancesForTest(testEntryId, modelName) {
|
|
4004
|
+
const keysToDelete = [];
|
|
4005
|
+
for (const key of Object.keys(this.instances)) {
|
|
4006
|
+
if (key.includes(`${modelName}_${testEntryId}`)) {
|
|
4007
|
+
keysToDelete.push(key);
|
|
4008
|
+
}
|
|
4009
|
+
}
|
|
4010
|
+
for (const key of keysToDelete) {
|
|
4011
|
+
delete this.instances[key];
|
|
4012
|
+
}
|
|
4013
|
+
const methodsToDelete = [];
|
|
4014
|
+
for (const [method, instanceKey] of Object.entries(this.methodMapping)) {
|
|
4015
|
+
if (keysToDelete.includes(instanceKey)) {
|
|
4016
|
+
methodsToDelete.push(method);
|
|
4017
|
+
}
|
|
4018
|
+
}
|
|
4019
|
+
for (const method of methodsToDelete) {
|
|
4020
|
+
delete this.methodMapping[method];
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
// Reset all instances (for testing)
|
|
4024
|
+
reset() {
|
|
4025
|
+
this.instances = {};
|
|
4026
|
+
this.methodMapping = {};
|
|
4027
|
+
for (const className of STATELESS_CLASSES) {
|
|
4028
|
+
const ClassConstructor = CLASS_NAME_TO_CLASS[className];
|
|
4029
|
+
if (ClassConstructor) {
|
|
4030
|
+
const instance = new ClassConstructor();
|
|
4031
|
+
const instanceKey = `${className}_stateless_instance`;
|
|
4032
|
+
this.instances[instanceKey] = instance;
|
|
4033
|
+
const methods = Object.getOwnPropertyNames(
|
|
4034
|
+
Object.getPrototypeOf(instance)
|
|
4035
|
+
).filter(
|
|
4036
|
+
(name) => typeof instance[name] === "function" && name !== "constructor"
|
|
4037
|
+
);
|
|
4038
|
+
for (const methodName of methods) {
|
|
4039
|
+
this.methodMapping[methodName] = instanceKey;
|
|
4040
|
+
}
|
|
4041
|
+
}
|
|
4042
|
+
}
|
|
4043
|
+
}
|
|
4044
|
+
};
|
|
4045
|
+
var globalMethodRegistry = new MethodRegistry();
|
|
4046
|
+
|
|
4047
|
+
// src/multi-turn/safe-executor.ts
|
|
4048
|
+
var WHITESPACE_REGEX2 = /\s/;
|
|
4049
|
+
var DELIMITER_REGEX = /[,)\]}]/;
|
|
4050
|
+
var FUNCTION_CALL_REGEX = /^(\w+(?:\.\w+)?)\((.*)\)$/s;
|
|
4051
|
+
var KEY_EQUALS_REGEX = /^(\w+)\s*=/;
|
|
4052
|
+
var _SafeExecutor = class _SafeExecutor {
|
|
4053
|
+
static extractMethodName(toolName) {
|
|
4054
|
+
var _a;
|
|
4055
|
+
return (_a = toolName.split(".").pop()) != null ? _a : toolName;
|
|
4056
|
+
}
|
|
4057
|
+
static snakeToCamel(str) {
|
|
4058
|
+
return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
|
|
4059
|
+
}
|
|
4060
|
+
static getMethodVariants(methodName) {
|
|
4061
|
+
const camelCase = _SafeExecutor.snakeToCamel(methodName);
|
|
4062
|
+
if (camelCase === methodName) {
|
|
4063
|
+
return [methodName];
|
|
4064
|
+
}
|
|
4065
|
+
return [methodName, camelCase];
|
|
4066
|
+
}
|
|
4067
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Method resolution requires nested loops
|
|
4068
|
+
static findInstanceAndMethod(methodName, involvedInstances) {
|
|
4069
|
+
const variants = _SafeExecutor.getMethodVariants(methodName);
|
|
4070
|
+
if (involvedInstances) {
|
|
4071
|
+
for (const instance of Object.values(involvedInstances)) {
|
|
4072
|
+
if (instance && typeof instance === "object") {
|
|
4073
|
+
for (const variant of variants) {
|
|
4074
|
+
if (variant in instance && typeof instance[variant] === "function") {
|
|
4075
|
+
return { instance, resolvedMethodName: variant };
|
|
4076
|
+
}
|
|
4077
|
+
}
|
|
4078
|
+
}
|
|
4079
|
+
}
|
|
4080
|
+
}
|
|
4081
|
+
for (const variant of variants) {
|
|
4082
|
+
try {
|
|
4083
|
+
const instance = globalMethodRegistry.getInstanceByMethod(variant);
|
|
4084
|
+
if (instance) {
|
|
4085
|
+
return { instance, resolvedMethodName: variant };
|
|
4086
|
+
}
|
|
4087
|
+
} catch (e) {
|
|
4088
|
+
}
|
|
4089
|
+
}
|
|
4090
|
+
return void 0;
|
|
4091
|
+
}
|
|
4092
|
+
static buildArgs(methodName, args) {
|
|
4093
|
+
const paramOrder = _SafeExecutor.METHOD_PARAM_ORDER[methodName];
|
|
4094
|
+
if (!paramOrder) {
|
|
4095
|
+
return Object.values(args);
|
|
4096
|
+
}
|
|
4097
|
+
return paramOrder.map((param) => args[param]);
|
|
4098
|
+
}
|
|
4099
|
+
static isDangerous(methodName) {
|
|
4100
|
+
return _SafeExecutor.DANGEROUS_METHODS.has(methodName) || methodName.startsWith("__");
|
|
4101
|
+
}
|
|
4102
|
+
static async execute(toolCall, involvedInstances) {
|
|
4103
|
+
const methodName = _SafeExecutor.extractMethodName(toolCall.toolName);
|
|
4104
|
+
if (_SafeExecutor.isDangerous(methodName)) {
|
|
4105
|
+
return {
|
|
4106
|
+
success: false,
|
|
4107
|
+
error: `Dangerous method blocked: ${methodName}`
|
|
4108
|
+
};
|
|
4109
|
+
}
|
|
4110
|
+
const found = _SafeExecutor.findInstanceAndMethod(
|
|
4111
|
+
methodName,
|
|
4112
|
+
involvedInstances
|
|
4113
|
+
);
|
|
4114
|
+
if (!found) {
|
|
4115
|
+
return {
|
|
4116
|
+
success: false,
|
|
4117
|
+
error: `Instance not found for method: ${methodName}`
|
|
4118
|
+
};
|
|
4119
|
+
}
|
|
4120
|
+
const { instance, resolvedMethodName } = found;
|
|
4121
|
+
const method = instance[resolvedMethodName];
|
|
4122
|
+
if (typeof method !== "function") {
|
|
4123
|
+
return {
|
|
4124
|
+
success: false,
|
|
4125
|
+
error: `Method not found: ${methodName}`
|
|
4126
|
+
};
|
|
4127
|
+
}
|
|
4128
|
+
try {
|
|
4129
|
+
const args = _SafeExecutor.buildArgs(methodName, toolCall.args);
|
|
4130
|
+
const result = method.apply(instance, args);
|
|
4131
|
+
const finalResult = result instanceof Promise ? await result : result;
|
|
4132
|
+
return { success: true, result: finalResult };
|
|
4133
|
+
} catch (error) {
|
|
4134
|
+
return {
|
|
4135
|
+
success: false,
|
|
4136
|
+
error: `Execution error: ${error instanceof Error ? error.message : String(error)}`
|
|
4137
|
+
};
|
|
4138
|
+
}
|
|
4139
|
+
}
|
|
4140
|
+
static async executeMany(toolCalls, involvedInstances) {
|
|
4141
|
+
const results = [];
|
|
4142
|
+
for (const toolCall of toolCalls) {
|
|
4143
|
+
results.push(await _SafeExecutor.execute(toolCall, involvedInstances));
|
|
4144
|
+
}
|
|
4145
|
+
return results;
|
|
4146
|
+
}
|
|
4147
|
+
static serializeResult(result) {
|
|
4148
|
+
if (result === null || result === void 0) {
|
|
4149
|
+
return "None";
|
|
4150
|
+
}
|
|
4151
|
+
if (typeof result === "string") {
|
|
4152
|
+
return result;
|
|
4153
|
+
}
|
|
4154
|
+
if (typeof result === "object") {
|
|
4155
|
+
try {
|
|
4156
|
+
return JSON.stringify(result);
|
|
4157
|
+
} catch (e) {
|
|
4158
|
+
return String(result);
|
|
4159
|
+
}
|
|
4160
|
+
}
|
|
4161
|
+
return String(result);
|
|
4162
|
+
}
|
|
4163
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Python call parsing requires complex branching
|
|
4164
|
+
static parsePythonCall(pythonCall) {
|
|
4165
|
+
const match = pythonCall.match(FUNCTION_CALL_REGEX);
|
|
4166
|
+
if (!match) {
|
|
4167
|
+
throw new Error(`Invalid function call format: ${pythonCall}`);
|
|
4168
|
+
}
|
|
4169
|
+
const [, fullMethodName, argsString] = match;
|
|
4170
|
+
const methodName = fullMethodName.split(".").pop() || fullMethodName;
|
|
4171
|
+
const args = {};
|
|
4172
|
+
if (!argsString.trim()) {
|
|
4173
|
+
return { toolName: methodName, args };
|
|
4174
|
+
}
|
|
4175
|
+
const parsedArgs = _SafeExecutor.parseArgsString(argsString);
|
|
4176
|
+
const paramOrder = _SafeExecutor.METHOD_PARAM_ORDER[methodName];
|
|
4177
|
+
if (paramOrder && parsedArgs.some((a) => a.key)) {
|
|
4178
|
+
for (const arg of parsedArgs) {
|
|
4179
|
+
if (arg.key) {
|
|
4180
|
+
args[arg.key] = arg.value;
|
|
4181
|
+
}
|
|
4182
|
+
}
|
|
4183
|
+
} else if (paramOrder) {
|
|
4184
|
+
let idx = 0;
|
|
4185
|
+
for (const arg of parsedArgs) {
|
|
4186
|
+
if (arg.key) {
|
|
4187
|
+
args[arg.key] = arg.value;
|
|
4188
|
+
} else if (idx < paramOrder.length) {
|
|
4189
|
+
args[paramOrder[idx]] = arg.value;
|
|
4190
|
+
idx++;
|
|
4191
|
+
}
|
|
4192
|
+
}
|
|
4193
|
+
} else {
|
|
4194
|
+
let idx = 0;
|
|
4195
|
+
for (const arg of parsedArgs) {
|
|
4196
|
+
if (arg.key) {
|
|
4197
|
+
args[arg.key] = arg.value;
|
|
4198
|
+
} else {
|
|
4199
|
+
args[`arg${idx}`] = arg.value;
|
|
4200
|
+
idx++;
|
|
4201
|
+
}
|
|
4202
|
+
}
|
|
4203
|
+
}
|
|
4204
|
+
return { toolName: methodName, args };
|
|
4205
|
+
}
|
|
4206
|
+
static parseArgsString(argsString) {
|
|
4207
|
+
const results = [];
|
|
4208
|
+
let i = 0;
|
|
4209
|
+
const s = argsString.trim();
|
|
4210
|
+
while (i < s.length) {
|
|
4211
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4212
|
+
i++;
|
|
4213
|
+
}
|
|
4214
|
+
if (i >= s.length) {
|
|
4215
|
+
break;
|
|
4216
|
+
}
|
|
4217
|
+
let key;
|
|
4218
|
+
const keyMatch = s.slice(i).match(KEY_EQUALS_REGEX);
|
|
4219
|
+
if (keyMatch) {
|
|
4220
|
+
key = keyMatch[1];
|
|
4221
|
+
i += keyMatch[0].length;
|
|
4222
|
+
}
|
|
4223
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4224
|
+
i++;
|
|
4225
|
+
}
|
|
4226
|
+
const value = _SafeExecutor.parseValue(s, i);
|
|
4227
|
+
i = value.endIndex;
|
|
4228
|
+
results.push({ key, value: value.value });
|
|
4229
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4230
|
+
i++;
|
|
4231
|
+
}
|
|
4232
|
+
if (i < s.length && s[i] === ",") {
|
|
4233
|
+
i++;
|
|
4234
|
+
}
|
|
4235
|
+
}
|
|
4236
|
+
return results;
|
|
4237
|
+
}
|
|
4238
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Python value parsing requires complex branching for strings, lists, dicts, etc.
|
|
4239
|
+
static parseValue(s, start) {
|
|
4240
|
+
let i = start;
|
|
4241
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4242
|
+
i++;
|
|
4243
|
+
}
|
|
4244
|
+
if (s[i] === "'" || s[i] === '"') {
|
|
4245
|
+
const quote = s[i];
|
|
4246
|
+
i++;
|
|
4247
|
+
let value = "";
|
|
4248
|
+
while (i < s.length && s[i] !== quote) {
|
|
4249
|
+
if (s[i] === "\\" && i + 1 < s.length) {
|
|
4250
|
+
const next = s[i + 1];
|
|
4251
|
+
if (next === "n") {
|
|
4252
|
+
value += "\n";
|
|
4253
|
+
} else if (next === "t") {
|
|
4254
|
+
value += " ";
|
|
4255
|
+
} else if (next === "r") {
|
|
4256
|
+
value += "\r";
|
|
4257
|
+
} else {
|
|
4258
|
+
value += next;
|
|
4259
|
+
}
|
|
4260
|
+
i += 2;
|
|
4261
|
+
} else {
|
|
4262
|
+
value += s[i];
|
|
4263
|
+
i++;
|
|
4264
|
+
}
|
|
4265
|
+
}
|
|
4266
|
+
return { value, endIndex: i + 1 };
|
|
4267
|
+
}
|
|
4268
|
+
if (s[i] === "[") {
|
|
4269
|
+
return _SafeExecutor.parseList(s, i);
|
|
4270
|
+
}
|
|
4271
|
+
if (s[i] === "{") {
|
|
4272
|
+
return _SafeExecutor.parseDict(s, i);
|
|
4273
|
+
}
|
|
4274
|
+
let token = "";
|
|
4275
|
+
while (i < s.length && !DELIMITER_REGEX.test(s[i])) {
|
|
4276
|
+
token += s[i];
|
|
4277
|
+
i++;
|
|
4278
|
+
}
|
|
4279
|
+
token = token.trim();
|
|
4280
|
+
if (token === "True") {
|
|
4281
|
+
return { value: true, endIndex: i };
|
|
4282
|
+
}
|
|
4283
|
+
if (token === "False") {
|
|
4284
|
+
return { value: false, endIndex: i };
|
|
4285
|
+
}
|
|
4286
|
+
if (token === "None") {
|
|
4287
|
+
return { value: null, endIndex: i };
|
|
4288
|
+
}
|
|
4289
|
+
const num = Number(token);
|
|
4290
|
+
if (!Number.isNaN(num)) {
|
|
4291
|
+
return { value: num, endIndex: i };
|
|
4292
|
+
}
|
|
4293
|
+
return { value: token, endIndex: i };
|
|
4294
|
+
}
|
|
4295
|
+
static parseList(s, start) {
|
|
4296
|
+
const items = [];
|
|
4297
|
+
let i = start + 1;
|
|
4298
|
+
while (i < s.length && s[i] !== "]") {
|
|
4299
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4300
|
+
i++;
|
|
4301
|
+
}
|
|
4302
|
+
if (s[i] === "]") {
|
|
4303
|
+
break;
|
|
4304
|
+
}
|
|
4305
|
+
const item = _SafeExecutor.parseValue(s, i);
|
|
4306
|
+
items.push(item.value);
|
|
4307
|
+
i = item.endIndex;
|
|
4308
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4309
|
+
i++;
|
|
4310
|
+
}
|
|
4311
|
+
if (s[i] === ",") {
|
|
4312
|
+
i++;
|
|
4313
|
+
}
|
|
4314
|
+
}
|
|
4315
|
+
return { value: items, endIndex: i + 1 };
|
|
4316
|
+
}
|
|
4317
|
+
static parseDict(s, start) {
|
|
4318
|
+
const obj = {};
|
|
4319
|
+
let i = start + 1;
|
|
4320
|
+
while (i < s.length && s[i] !== "}") {
|
|
4321
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4322
|
+
i++;
|
|
4323
|
+
}
|
|
4324
|
+
if (s[i] === "}") {
|
|
4325
|
+
break;
|
|
4326
|
+
}
|
|
4327
|
+
const keyResult = _SafeExecutor.parseValue(s, i);
|
|
4328
|
+
i = keyResult.endIndex;
|
|
4329
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4330
|
+
i++;
|
|
4331
|
+
}
|
|
4332
|
+
if (s[i] === ":") {
|
|
4333
|
+
i++;
|
|
4334
|
+
}
|
|
4335
|
+
const valResult = _SafeExecutor.parseValue(s, i);
|
|
4336
|
+
obj[String(keyResult.value)] = valResult.value;
|
|
4337
|
+
i = valResult.endIndex;
|
|
4338
|
+
while (i < s.length && WHITESPACE_REGEX2.test(s[i])) {
|
|
4339
|
+
i++;
|
|
4340
|
+
}
|
|
4341
|
+
if (s[i] === ",") {
|
|
4342
|
+
i++;
|
|
4343
|
+
}
|
|
4344
|
+
}
|
|
4345
|
+
return { value: obj, endIndex: i + 1 };
|
|
4346
|
+
}
|
|
4347
|
+
};
|
|
4348
|
+
_SafeExecutor.DANGEROUS_METHODS = /* @__PURE__ */ new Set([
|
|
4349
|
+
"kill",
|
|
4350
|
+
"exit",
|
|
4351
|
+
"quit",
|
|
4352
|
+
"system",
|
|
4353
|
+
"exec",
|
|
4354
|
+
"eval",
|
|
4355
|
+
"import"
|
|
4356
|
+
]);
|
|
4357
|
+
_SafeExecutor.METHOD_PARAM_ORDER = {
|
|
4358
|
+
cd: ["folder"],
|
|
4359
|
+
mkdir: ["dir_name"],
|
|
4360
|
+
touch: ["file_name"],
|
|
4361
|
+
echo: ["content", "file_name"],
|
|
4362
|
+
cat: ["file_name"],
|
|
4363
|
+
grep: ["file_name", "pattern"],
|
|
4364
|
+
sort: ["file_name"],
|
|
4365
|
+
tail: ["file_name", "lines"],
|
|
4366
|
+
diff: ["file_name1", "file_name2"],
|
|
4367
|
+
mv: ["source", "destination"],
|
|
4368
|
+
cp: ["source", "destination"],
|
|
4369
|
+
rm: ["file_name"],
|
|
4370
|
+
rmdir: ["dir_name"],
|
|
4371
|
+
find: ["path", "name"],
|
|
4372
|
+
ls: ["a"],
|
|
4373
|
+
pwd: [],
|
|
4374
|
+
wc: ["file_name", "mode"],
|
|
4375
|
+
du: ["human_readable"],
|
|
4376
|
+
// TwitterAPI (PostingAPI)
|
|
4377
|
+
authenticate_twitter: ["username", "password"],
|
|
4378
|
+
posting_get_login_status: [],
|
|
4379
|
+
post_tweet: ["content", "tags", "mentions"],
|
|
4380
|
+
retweet: ["tweet_id"],
|
|
4381
|
+
comment: ["tweet_id", "comment_content"],
|
|
4382
|
+
mention: ["tweet_id", "mentioned_usernames"],
|
|
4383
|
+
follow_user: ["username_to_follow"],
|
|
4384
|
+
list_all_following: [],
|
|
4385
|
+
unfollow_user: ["username_to_unfollow"],
|
|
4386
|
+
get_tweet: ["tweet_id"],
|
|
4387
|
+
get_user_tweets: ["username"],
|
|
4388
|
+
search_tweets: ["keyword"],
|
|
4389
|
+
get_tweet_comments: ["tweet_id"],
|
|
4390
|
+
get_user_stats: ["username"],
|
|
4391
|
+
// TicketAPI
|
|
4392
|
+
create_ticket: ["title", "description", "priority"],
|
|
4393
|
+
get_ticket: ["ticket_id"],
|
|
4394
|
+
close_ticket: ["ticket_id"],
|
|
4395
|
+
resolve_ticket: ["ticket_id", "resolution"],
|
|
4396
|
+
edit_ticket: ["ticket_id", "updates"],
|
|
4397
|
+
ticket_login: ["username", "password"],
|
|
4398
|
+
ticket_get_login_status: [],
|
|
4399
|
+
logout: [],
|
|
4400
|
+
get_user_tickets: ["status"],
|
|
4401
|
+
// MathAPI
|
|
4402
|
+
mean: ["numbers"],
|
|
4403
|
+
std: ["numbers"],
|
|
4404
|
+
add: ["a", "b"],
|
|
4405
|
+
subtract: ["a", "b"],
|
|
4406
|
+
multiply: ["a", "b"],
|
|
4407
|
+
divide: ["a", "b"],
|
|
4408
|
+
absolute: ["number"],
|
|
4409
|
+
absolute_value: ["number"],
|
|
4410
|
+
power: ["base", "exponent"],
|
|
4411
|
+
logarithm: ["value", "base", "precision"],
|
|
4412
|
+
log: ["value", "base"],
|
|
4413
|
+
max_value: ["numbers"],
|
|
4414
|
+
min_value: ["numbers"],
|
|
4415
|
+
percentage: ["part", "whole"],
|
|
4416
|
+
round_number: ["number", "decimal_places"],
|
|
4417
|
+
square_root: ["number", "precision"],
|
|
4418
|
+
standard_deviation: ["numbers"],
|
|
4419
|
+
sum_values: ["numbers"],
|
|
4420
|
+
imperial_si_conversion: ["value", "unit_in", "unit_out"],
|
|
4421
|
+
si_unit_conversion: ["value", "unit_in", "unit_out"],
|
|
4422
|
+
// MessageAPI
|
|
4423
|
+
send_message: ["receiver_id", "message"],
|
|
4424
|
+
view_messages_received: [],
|
|
4425
|
+
view_messages_sent: [],
|
|
4426
|
+
add_contact: ["user_name", "user_id"],
|
|
4427
|
+
delete_contact: ["user_id"],
|
|
4428
|
+
delete_message: ["receiver_id"],
|
|
4429
|
+
search_messages: ["keyword"],
|
|
4430
|
+
get_message_stats: [],
|
|
4431
|
+
get_user_id: ["user"],
|
|
4432
|
+
message_login: ["user_id"],
|
|
4433
|
+
message_get_login_status: [],
|
|
4434
|
+
list_users: [],
|
|
4435
|
+
// TradingBot
|
|
4436
|
+
add_to_watchlist: ["stock"],
|
|
4437
|
+
cancel_order: ["order_id"],
|
|
4438
|
+
filter_stocks_by_price: ["stocks", "min_price", "max_price"],
|
|
4439
|
+
fund_account: ["amount"],
|
|
4440
|
+
get_account_info: [],
|
|
4441
|
+
get_available_stocks: ["sector"],
|
|
4442
|
+
get_current_time: [],
|
|
4443
|
+
get_order_details: ["order_id"],
|
|
4444
|
+
get_order_history: [],
|
|
4445
|
+
get_stock_info: ["symbol"],
|
|
4446
|
+
get_symbol_by_name: ["name"],
|
|
4447
|
+
get_transaction_history: ["start_date", "end_date"],
|
|
4448
|
+
get_watchlist: [],
|
|
4449
|
+
notify_price_change: ["stocks", "threshold"],
|
|
4450
|
+
place_order: ["order_type", "symbol", "price", "amount"],
|
|
4451
|
+
remove_stock_from_watchlist: ["symbol"],
|
|
4452
|
+
trading_get_login_status: [],
|
|
4453
|
+
trading_login: ["username", "password"],
|
|
4454
|
+
trading_logout: [],
|
|
4455
|
+
withdraw_funds: ["amount"],
|
|
4456
|
+
// TravelAPI
|
|
4457
|
+
authenticate_travel: [
|
|
4458
|
+
"client_id",
|
|
4459
|
+
"client_secret",
|
|
4460
|
+
"refresh_token",
|
|
4461
|
+
"grant_type",
|
|
4462
|
+
"user_first_name",
|
|
4463
|
+
"user_last_name"
|
|
4464
|
+
],
|
|
4465
|
+
book_flight: [
|
|
4466
|
+
"access_token",
|
|
4467
|
+
"card_id",
|
|
4468
|
+
"travel_date",
|
|
4469
|
+
"travel_from",
|
|
4470
|
+
"travel_to",
|
|
4471
|
+
"travel_class"
|
|
4472
|
+
],
|
|
4473
|
+
cancel_booking: ["access_token", "booking_id"],
|
|
4474
|
+
compute_exchange_rate: ["base_currency", "target_currency", "value"],
|
|
4475
|
+
contact_customer_support: ["booking_id", "message"],
|
|
4476
|
+
get_all_credit_cards: [],
|
|
4477
|
+
get_booking_history: ["access_token"],
|
|
4478
|
+
get_budget_fiscal_year: ["lastModifiedAfter", "includeRemoved"],
|
|
4479
|
+
get_credit_card_balance: ["access_token", "card_id"],
|
|
4480
|
+
get_flight_cost: [
|
|
4481
|
+
"travel_from",
|
|
4482
|
+
"travel_to",
|
|
4483
|
+
"travel_date",
|
|
4484
|
+
"travel_class"
|
|
4485
|
+
],
|
|
4486
|
+
get_nearest_airport_by_city: ["location"],
|
|
4487
|
+
list_all_airports: [],
|
|
4488
|
+
purchase_insurance: [
|
|
4489
|
+
"access_token",
|
|
4490
|
+
"insurance_type",
|
|
4491
|
+
"insurance_cost",
|
|
4492
|
+
"booking_id",
|
|
4493
|
+
"card_id"
|
|
4494
|
+
],
|
|
4495
|
+
register_credit_card: [
|
|
4496
|
+
"access_token",
|
|
4497
|
+
"card_number",
|
|
4498
|
+
"expiration_date",
|
|
4499
|
+
"cardholder_name",
|
|
4500
|
+
"card_verification_number"
|
|
4501
|
+
],
|
|
4502
|
+
retrieve_invoice: ["access_token", "booking_id", "insurance_id"],
|
|
4503
|
+
set_budget_limit: ["access_token", "budget_limit"],
|
|
4504
|
+
travel_get_login_status: [],
|
|
4505
|
+
verify_traveler_information: [
|
|
4506
|
+
"first_name",
|
|
4507
|
+
"last_name",
|
|
4508
|
+
"date_of_birth",
|
|
4509
|
+
"passport_number"
|
|
4510
|
+
],
|
|
4511
|
+
// VehicleControlAPI
|
|
4512
|
+
activateParkingBrake: ["mode"],
|
|
4513
|
+
adjustClimateControl: ["temperature", "unit", "fanSpeed", "mode"],
|
|
4514
|
+
check_tire_pressure: [],
|
|
4515
|
+
displayCarStatus: ["option"],
|
|
4516
|
+
display_log: ["messages"],
|
|
4517
|
+
estimate_distance: ["cityA", "cityB"],
|
|
4518
|
+
estimate_drive_feasibility_by_mileage: ["distance"],
|
|
4519
|
+
fillFuelTank: ["fuelAmount"],
|
|
4520
|
+
find_nearest_tire_shop: [],
|
|
4521
|
+
gallon_to_liter: ["gallon"],
|
|
4522
|
+
get_current_speed: [],
|
|
4523
|
+
get_outside_temperature_from_google: [],
|
|
4524
|
+
get_outside_temperature_from_weather_com: [],
|
|
4525
|
+
get_zipcode_based_on_city: ["city"],
|
|
4526
|
+
liter_to_gallon: ["liter"],
|
|
4527
|
+
lockDoors: ["unlock", "door"],
|
|
4528
|
+
pressBrakePedal: ["pedalPosition"],
|
|
4529
|
+
releaseBrakePedal: [],
|
|
4530
|
+
setCruiseControl: ["speed", "activate", "distanceToNextVehicle"],
|
|
4531
|
+
setHeadlights: ["mode"],
|
|
4532
|
+
set_navigation: ["destination"],
|
|
4533
|
+
startEngine: ["ignitionMode"]
|
|
4534
|
+
};
|
|
4535
|
+
var SafeExecutor = _SafeExecutor;
|
|
4536
|
+
|
|
4537
|
+
// src/multi-turn/execution-engine.ts
|
|
4538
|
+
async function executeMultiTurnFuncCall(toolCalls, initialConfig, involvedClasses, modelName, testEntryId, longContext = false, isEvalRun = false) {
|
|
4539
|
+
const configCopy = JSON.parse(JSON.stringify(initialConfig));
|
|
4540
|
+
const involvedInstances = {};
|
|
4541
|
+
for (const className of involvedClasses) {
|
|
4542
|
+
involvedInstances[className] = globalMethodRegistry.getOrCreateInstance(
|
|
4543
|
+
className,
|
|
4544
|
+
testEntryId,
|
|
4545
|
+
modelName,
|
|
4546
|
+
configCopy[className] || {},
|
|
4547
|
+
longContext,
|
|
4548
|
+
isEvalRun
|
|
4549
|
+
);
|
|
4550
|
+
}
|
|
4551
|
+
const results = await SafeExecutor.executeMany(toolCalls, involvedInstances);
|
|
4552
|
+
const executionResults = results.map(
|
|
4553
|
+
(r) => r.success ? SafeExecutor.serializeResult(r.result) : `Error during execution: ${r.error}`
|
|
4554
|
+
);
|
|
4555
|
+
return { executionResults, involvedInstances };
|
|
4556
|
+
}
|
|
4557
|
+
function resetInstancesForTest(testEntryId, modelName) {
|
|
4558
|
+
globalMethodRegistry.clearInstancesForTest(testEntryId, modelName);
|
|
4559
|
+
}
|
|
4560
|
+
function isEmptyExecuteResponse(responseList) {
|
|
4561
|
+
return responseList.every(
|
|
4562
|
+
(response) => response === "" || response === "None" || response === "{}" || response === "[]" || response.includes("Error during execution")
|
|
4563
|
+
);
|
|
4564
|
+
}
|
|
4565
|
+
|
|
4566
|
+
// src/multi-turn/response-checker.ts
|
|
4567
|
+
function responseChecker(modelResponseList, groundTruthResponseList, _turnIndex) {
|
|
4568
|
+
const isSubsequenceResult = isSubsequenceUnordered(
|
|
4569
|
+
groundTruthResponseList,
|
|
4570
|
+
modelResponseList
|
|
4571
|
+
);
|
|
4572
|
+
if (!isSubsequenceResult.isSubsequence) {
|
|
4573
|
+
return {
|
|
4574
|
+
valid: false,
|
|
4575
|
+
error_type: "multi_turn:execution_response_mismatch",
|
|
4576
|
+
details: {
|
|
4577
|
+
missing_items: isSubsequenceResult.missingItems,
|
|
4578
|
+
model_response: modelResponseList,
|
|
4579
|
+
ground_truth_response: groundTruthResponseList
|
|
4580
|
+
}
|
|
4581
|
+
};
|
|
4582
|
+
}
|
|
4583
|
+
return { valid: true };
|
|
4584
|
+
}
|
|
4585
|
+
function isSubsequenceUnordered(groundTruthList, modelList) {
|
|
4586
|
+
if (groundTruthList.length === 0) {
|
|
4587
|
+
return { isSubsequence: true, missingItems: [] };
|
|
4588
|
+
}
|
|
4589
|
+
if (modelList.length === 0) {
|
|
4590
|
+
return { isSubsequence: false, missingItems: [...groundTruthList] };
|
|
4591
|
+
}
|
|
4592
|
+
const remainingModel = [...modelList];
|
|
4593
|
+
const missingItems = [];
|
|
4594
|
+
for (const groundTruthItem of groundTruthList) {
|
|
4595
|
+
let found = false;
|
|
4596
|
+
for (let i = 0; i < remainingModel.length; i++) {
|
|
4597
|
+
if (itemsEqual(groundTruthItem, remainingModel[i])) {
|
|
4598
|
+
remainingModel.splice(i, 1);
|
|
4599
|
+
found = true;
|
|
4600
|
+
break;
|
|
4601
|
+
}
|
|
4602
|
+
}
|
|
4603
|
+
if (!found) {
|
|
4604
|
+
missingItems.push(groundTruthItem);
|
|
4605
|
+
}
|
|
4606
|
+
}
|
|
4607
|
+
return {
|
|
4608
|
+
isSubsequence: missingItems.length === 0,
|
|
4609
|
+
missingItems
|
|
4610
|
+
};
|
|
4611
|
+
}
|
|
4612
|
+
function itemsEqual(a, b) {
|
|
4613
|
+
if (a == null && b == null) {
|
|
4614
|
+
return true;
|
|
4615
|
+
}
|
|
4616
|
+
if (a == null || b == null) {
|
|
4617
|
+
return false;
|
|
4618
|
+
}
|
|
4619
|
+
if (typeof a === "string" && typeof b === "string") {
|
|
4620
|
+
return normalizeResponse(a) === normalizeResponse(b);
|
|
4621
|
+
}
|
|
4622
|
+
if (typeof a === "object" && typeof b === "object") {
|
|
4623
|
+
try {
|
|
4624
|
+
return JSON.stringify(normalizeObject2(a)) === JSON.stringify(normalizeObject2(b));
|
|
4625
|
+
} catch (e) {
|
|
4626
|
+
return String(a) === String(b);
|
|
4627
|
+
}
|
|
4628
|
+
}
|
|
4629
|
+
return a === b;
|
|
4630
|
+
}
|
|
4631
|
+
function normalizeResponse(response) {
|
|
4632
|
+
return response.trim().replace(/\s+/g, " ");
|
|
4633
|
+
}
|
|
4634
|
+
function normalizeObject2(obj) {
|
|
4635
|
+
if (obj == null) {
|
|
4636
|
+
return obj;
|
|
4637
|
+
}
|
|
4638
|
+
if (typeof obj !== "object") {
|
|
4639
|
+
return obj;
|
|
4640
|
+
}
|
|
4641
|
+
if (Array.isArray(obj)) {
|
|
4642
|
+
return obj.map((item) => normalizeObject2(item));
|
|
4643
|
+
}
|
|
4644
|
+
const normalized = {};
|
|
4645
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
4646
|
+
if (value !== void 0) {
|
|
4647
|
+
normalized[key] = normalizeObject2(value);
|
|
4648
|
+
}
|
|
4649
|
+
}
|
|
4650
|
+
const sorted = {};
|
|
4651
|
+
for (const key of Object.keys(normalized).sort()) {
|
|
4652
|
+
sorted[key] = normalized[key];
|
|
4653
|
+
}
|
|
4654
|
+
return sorted;
|
|
4655
|
+
}
|
|
4656
|
+
|
|
4657
|
+
// src/multi-turn/state-checker.ts
|
|
4658
|
+
function stateChecker(modelInstances, groundTruthInstances) {
|
|
4659
|
+
for (const [className, groundTruthInstance] of Object.entries(
|
|
4660
|
+
groundTruthInstances
|
|
4661
|
+
)) {
|
|
4662
|
+
const modelInstance = modelInstances[className];
|
|
4663
|
+
if (!modelInstance) {
|
|
4664
|
+
return {
|
|
4665
|
+
valid: false,
|
|
4666
|
+
error_type: "multi_turn:instance_state_mismatch",
|
|
4667
|
+
details: {
|
|
4668
|
+
differences: { [className]: "Instance not found in model instances" }
|
|
4669
|
+
}
|
|
4670
|
+
};
|
|
4671
|
+
}
|
|
4672
|
+
const comparisonResult = compareInstances(
|
|
4673
|
+
modelInstance,
|
|
4674
|
+
groundTruthInstance
|
|
4675
|
+
);
|
|
4676
|
+
if (!comparisonResult.valid) {
|
|
4677
|
+
console.log("[DEBUG] State mismatch detected!");
|
|
4678
|
+
console.log(
|
|
4679
|
+
"[DEBUG] Differences:",
|
|
4680
|
+
JSON.stringify(comparisonResult.differences, null, 2)
|
|
4681
|
+
);
|
|
4682
|
+
console.log(
|
|
4683
|
+
"[DEBUG] Model instance state:",
|
|
4684
|
+
JSON.stringify(serializeInstanceState(modelInstance), null, 2)
|
|
4685
|
+
);
|
|
4686
|
+
console.log(
|
|
4687
|
+
"[DEBUG] Ground truth instance state:",
|
|
4688
|
+
JSON.stringify(serializeInstanceState(groundTruthInstance), null, 2)
|
|
4689
|
+
);
|
|
4690
|
+
return {
|
|
4691
|
+
valid: false,
|
|
4692
|
+
error_type: "multi_turn:instance_state_mismatch",
|
|
4693
|
+
details: {
|
|
4694
|
+
differences: comparisonResult.differences,
|
|
4695
|
+
model_instance_state: serializeInstanceState(modelInstance),
|
|
4696
|
+
ground_truth_instance_state: serializeInstanceState(groundTruthInstance)
|
|
4697
|
+
}
|
|
4698
|
+
};
|
|
4699
|
+
}
|
|
4700
|
+
}
|
|
4701
|
+
return { valid: true };
|
|
4702
|
+
}
|
|
4703
|
+
function compareInstances(modelObject, groundTruthObject) {
|
|
4704
|
+
const differences = {};
|
|
4705
|
+
const SKIP_ATTRS = /* @__PURE__ */ new Set(["parent", "_parent"]);
|
|
4706
|
+
for (const attrName of Object.keys(groundTruthObject)) {
|
|
4707
|
+
if (attrName.startsWith("_") || SKIP_ATTRS.has(attrName)) {
|
|
4708
|
+
continue;
|
|
4709
|
+
}
|
|
4710
|
+
const modelAttr = modelObject[attrName];
|
|
4711
|
+
const groundTruthAttr = groundTruthObject[attrName];
|
|
4712
|
+
if (!deepEqual(modelAttr, groundTruthAttr)) {
|
|
4713
|
+
differences[attrName] = {
|
|
4714
|
+
model: modelAttr,
|
|
4715
|
+
ground_truth: groundTruthAttr
|
|
4716
|
+
};
|
|
4717
|
+
}
|
|
4718
|
+
}
|
|
4719
|
+
return {
|
|
4720
|
+
valid: Object.keys(differences).length === 0,
|
|
4721
|
+
differences
|
|
4722
|
+
};
|
|
4723
|
+
}
|
|
4724
|
+
var SKIP_KEYS = /* @__PURE__ */ new Set(["parent", "_parent"]);
|
|
4725
|
+
function shouldSkipKey(key) {
|
|
4726
|
+
return key.startsWith("_") || SKIP_KEYS.has(key);
|
|
4727
|
+
}
|
|
4728
|
+
function getFilteredKeys(obj) {
|
|
4729
|
+
return Object.keys(obj).filter((k) => !shouldSkipKey(k));
|
|
4730
|
+
}
|
|
4731
|
+
function deepEqualArrays(a, b, seen) {
|
|
4732
|
+
if (a.length !== b.length) {
|
|
4733
|
+
return false;
|
|
4734
|
+
}
|
|
4735
|
+
for (let i = 0; i < a.length; i++) {
|
|
4736
|
+
if (!deepEqual(a[i], b[i], seen)) {
|
|
4737
|
+
return false;
|
|
4738
|
+
}
|
|
4739
|
+
}
|
|
4740
|
+
return true;
|
|
4741
|
+
}
|
|
4742
|
+
function deepEqualObjects(a, b, seen) {
|
|
4743
|
+
const keysA = getFilteredKeys(a);
|
|
4744
|
+
const keysB = getFilteredKeys(b);
|
|
4745
|
+
if (keysA.length !== keysB.length) {
|
|
4746
|
+
return false;
|
|
4747
|
+
}
|
|
4748
|
+
for (const key of keysA) {
|
|
4749
|
+
if (!keysB.includes(key)) {
|
|
4750
|
+
return false;
|
|
4751
|
+
}
|
|
4752
|
+
if (!deepEqual(a[key], b[key], seen)) {
|
|
4753
|
+
return false;
|
|
4754
|
+
}
|
|
4755
|
+
}
|
|
4756
|
+
return true;
|
|
4757
|
+
}
|
|
4758
|
+
function deepEqual(a, b, seen = /* @__PURE__ */ new WeakSet()) {
|
|
4759
|
+
if (a === b) {
|
|
4760
|
+
return true;
|
|
4761
|
+
}
|
|
4762
|
+
if (a == null || b == null) {
|
|
4763
|
+
return a === b;
|
|
4764
|
+
}
|
|
4765
|
+
if (typeof a !== typeof b) {
|
|
4766
|
+
return false;
|
|
4767
|
+
}
|
|
4768
|
+
if (typeof a !== "object") {
|
|
4769
|
+
return a === b;
|
|
4770
|
+
}
|
|
4771
|
+
if (seen.has(a) || seen.has(b)) {
|
|
4772
|
+
return true;
|
|
4773
|
+
}
|
|
4774
|
+
seen.add(a);
|
|
4775
|
+
if (typeof b === "object") {
|
|
4776
|
+
seen.add(b);
|
|
4777
|
+
}
|
|
4778
|
+
if (Array.isArray(a) && Array.isArray(b)) {
|
|
4779
|
+
return deepEqualArrays(a, b, seen);
|
|
4780
|
+
}
|
|
4781
|
+
if (Array.isArray(a) !== Array.isArray(b)) {
|
|
4782
|
+
return false;
|
|
4783
|
+
}
|
|
4784
|
+
return deepEqualObjects(a, b, seen);
|
|
4785
|
+
}
|
|
4786
|
+
function serializeInstanceState(instance) {
|
|
4787
|
+
const state = {};
|
|
4788
|
+
for (const [key, value] of Object.entries(instance)) {
|
|
4789
|
+
if (!key.startsWith("_")) {
|
|
4790
|
+
try {
|
|
4791
|
+
if (key === "root" && typeof value === "object") {
|
|
4792
|
+
state[key] = serializeDirectory(value);
|
|
4793
|
+
} else {
|
|
4794
|
+
state[key] = typeof value === "object" ? JSON.parse(JSON.stringify(value)) : value;
|
|
4795
|
+
}
|
|
4796
|
+
} catch (e) {
|
|
4797
|
+
state[key] = String(value);
|
|
4798
|
+
}
|
|
4799
|
+
}
|
|
4800
|
+
}
|
|
4801
|
+
return state;
|
|
4802
|
+
}
|
|
4803
|
+
function serializeDirectory(dir, depth = 0) {
|
|
4804
|
+
if (depth > 5) {
|
|
4805
|
+
return { value: "[Max depth reached]" };
|
|
4806
|
+
}
|
|
4807
|
+
const result = {
|
|
4808
|
+
name: dir.name,
|
|
4809
|
+
contents: {}
|
|
4810
|
+
};
|
|
4811
|
+
const contents = result.contents;
|
|
4812
|
+
for (const [name, item] of Object.entries(dir.contents || {})) {
|
|
4813
|
+
if (item instanceof File) {
|
|
4814
|
+
contents[name] = { type: "file", content: item.content };
|
|
4815
|
+
} else if (item instanceof Directory) {
|
|
4816
|
+
contents[name] = serializeDirectory(item, depth + 1);
|
|
4817
|
+
}
|
|
4818
|
+
}
|
|
4819
|
+
return result;
|
|
4820
|
+
}
|
|
4821
|
+
|
|
4822
|
+
// src/multi-turn/checker.ts
|
|
4823
|
+
function parseGroundTruth(pythonCalls) {
|
|
4824
|
+
return pythonCalls.map((call) => SafeExecutor.parsePythonCall(call));
|
|
4825
|
+
}
|
|
4826
|
+
async function multiTurnChecker(modelToolCalls, groundTruthPythonCalls, testEntry, testCategory, modelName) {
|
|
4827
|
+
const initialConfig = testEntry.initial_config || {};
|
|
4828
|
+
const involvedClasses = testEntry.involved_classes || [];
|
|
4829
|
+
resetInstancesForTest(testEntry.id, modelName);
|
|
4830
|
+
resetInstancesForTest(testEntry.id, `${modelName}_ground_truth`);
|
|
4831
|
+
const initResult = await executeMultiTurnFuncCall(
|
|
4832
|
+
[],
|
|
4833
|
+
initialConfig,
|
|
4834
|
+
involvedClasses,
|
|
4835
|
+
modelName,
|
|
4836
|
+
testEntry.id,
|
|
4837
|
+
testCategory.includes("long_context"),
|
|
4838
|
+
true
|
|
4839
|
+
);
|
|
4840
|
+
const initGtResult = await executeMultiTurnFuncCall(
|
|
4841
|
+
[],
|
|
4842
|
+
initialConfig,
|
|
4843
|
+
involvedClasses,
|
|
4844
|
+
`${modelName}_ground_truth`,
|
|
4845
|
+
testEntry.id,
|
|
4846
|
+
testCategory.includes("long_context"),
|
|
4847
|
+
true
|
|
4848
|
+
);
|
|
4849
|
+
const allTurnModelExecutionResults = [];
|
|
4850
|
+
let finalModelInstances = initResult.involvedInstances;
|
|
4851
|
+
let finalGroundTruthInstances = initGtResult.involvedInstances;
|
|
4852
|
+
for (let turnIndex = 0; turnIndex < groundTruthPythonCalls.length; turnIndex++) {
|
|
4853
|
+
const groundTruthCalls = parseGroundTruth(
|
|
4854
|
+
groundTruthPythonCalls[turnIndex]
|
|
4855
|
+
);
|
|
4856
|
+
const modelSteps = modelToolCalls[turnIndex] || [];
|
|
4857
|
+
const singleTurnModelExecutionResults = [];
|
|
4858
|
+
for (const stepToolCalls of modelSteps) {
|
|
4859
|
+
const stepResult = await executeMultiTurnFuncCall(
|
|
4860
|
+
stepToolCalls,
|
|
4861
|
+
initialConfig,
|
|
4862
|
+
involvedClasses,
|
|
4863
|
+
modelName,
|
|
4864
|
+
testEntry.id,
|
|
4865
|
+
testCategory.includes("long_context"),
|
|
4866
|
+
true
|
|
4867
|
+
);
|
|
4868
|
+
singleTurnModelExecutionResults.push(...stepResult.executionResults);
|
|
4869
|
+
finalModelInstances = stepResult.involvedInstances;
|
|
4870
|
+
}
|
|
4871
|
+
const groundTruthResult = await executeMultiTurnFuncCall(
|
|
4872
|
+
groundTruthCalls,
|
|
4873
|
+
initialConfig,
|
|
4874
|
+
involvedClasses,
|
|
4875
|
+
`${modelName}_ground_truth`,
|
|
4876
|
+
testEntry.id,
|
|
4877
|
+
testCategory.includes("long_context"),
|
|
4878
|
+
true
|
|
4879
|
+
);
|
|
4880
|
+
allTurnModelExecutionResults.push(...singleTurnModelExecutionResults);
|
|
4881
|
+
finalGroundTruthInstances = groundTruthResult.involvedInstances;
|
|
4882
|
+
if (groundTruthCalls.length === 0) {
|
|
4883
|
+
continue;
|
|
4884
|
+
}
|
|
4885
|
+
const stateCheckResult = stateChecker(
|
|
4886
|
+
finalModelInstances,
|
|
4887
|
+
finalGroundTruthInstances
|
|
4888
|
+
);
|
|
4889
|
+
if (!stateCheckResult.valid) {
|
|
4890
|
+
return {
|
|
4891
|
+
valid: false,
|
|
4892
|
+
error_type: stateCheckResult.error_type,
|
|
4893
|
+
details: stateCheckResult.details
|
|
4894
|
+
};
|
|
4895
|
+
}
|
|
4896
|
+
const responseCheckResult = responseChecker(
|
|
4897
|
+
allTurnModelExecutionResults,
|
|
4898
|
+
groundTruthResult.executionResults,
|
|
4899
|
+
turnIndex
|
|
4900
|
+
);
|
|
4901
|
+
if (!responseCheckResult.valid) {
|
|
4902
|
+
return {
|
|
4903
|
+
valid: false,
|
|
4904
|
+
error_type: responseCheckResult.error_type,
|
|
4905
|
+
details: responseCheckResult.details
|
|
4906
|
+
};
|
|
4907
|
+
}
|
|
4908
|
+
}
|
|
4909
|
+
return { valid: true };
|
|
4910
|
+
}
|
|
4911
|
+
function multiTurnIrrelevanceChecker(modelToolCalls, groundTruthPythonCalls) {
|
|
4912
|
+
for (let turnIndex = 0; turnIndex < groundTruthPythonCalls.length; turnIndex++) {
|
|
4913
|
+
const groundTruthCalls = groundTruthPythonCalls[turnIndex];
|
|
4914
|
+
const modelSteps = modelToolCalls[turnIndex] || [];
|
|
4915
|
+
const flatModelCalls = modelSteps.flat();
|
|
4916
|
+
const modelCallStrings = flatModelCalls.map(
|
|
4917
|
+
(tc) => tc.args ? JSON.stringify(tc) : "None"
|
|
4918
|
+
);
|
|
4919
|
+
if (groundTruthCalls.length === 0 && !isEmptyExecuteResponse(modelCallStrings)) {
|
|
4920
|
+
return {
|
|
4921
|
+
valid: false,
|
|
4922
|
+
error_type: "multi_turn:irrelevance_error:decoder_success",
|
|
4923
|
+
details: { model_response_decoded: modelSteps }
|
|
4924
|
+
};
|
|
4925
|
+
}
|
|
4926
|
+
}
|
|
4927
|
+
return { valid: true };
|
|
4928
|
+
}
|
|
4929
|
+
function resetTestInstances(testEntryId, modelName) {
|
|
4930
|
+
resetInstancesForTest(testEntryId, modelName);
|
|
4931
|
+
resetInstancesForTest(testEntryId, `${modelName}_ground_truth`);
|
|
4932
|
+
}
|
|
4933
|
+
|
|
4934
|
+
// src/benchmarks/bfcl-multi-turn.ts
|
|
4935
|
+
var LINE_SPLIT_REGEX2 = /\r?\n/;
|
|
4936
|
+
var NUMERIC_STRING_REGEX2 = /^\d+$/;
|
|
4937
|
+
var MAXIMUM_STEP_LIMIT = 20;
|
|
4938
|
+
var DEFAULT_USER_PROMPT_FOR_ADDITIONAL_FUNCTION_FC = "I have updated some more functions you can choose from. What about now?";
|
|
4939
|
+
var MULTI_TURN_SYSTEM_PROMPT = `You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out.
|
|
4940
|
+
|
|
4941
|
+
At each turn, you should try your best to complete the tasks requested by the user within the current turn. Continue to output functions to call until you have fulfilled the user's request to the best of your ability. Once you have no more functions to call, the system will consider the current turn complete and proceed to the next turn or task.`;
|
|
4942
|
+
var RETRY_MAX_ATTEMPTS = 5;
|
|
4943
|
+
var RETRY_INITIAL_DELAY_MS = 1e3;
|
|
4944
|
+
var RETRY_MAX_DELAY_MS = 6e4;
|
|
4945
|
+
var RETRY_BACKOFF_MULTIPLIER = 2;
|
|
4946
|
+
var MULTI_TURN_DATASETS = [
|
|
4947
|
+
{
|
|
4948
|
+
name: "bfcl-multi-turn-base",
|
|
4949
|
+
description: "BFCL v4 Multi-Turn Base Function Calling",
|
|
4950
|
+
testFile: "BFCL_v4_multi_turn_base.jsonl",
|
|
4951
|
+
answerFile: "possible_answer/BFCL_v4_multi_turn_base.jsonl"
|
|
4952
|
+
},
|
|
4953
|
+
{
|
|
4954
|
+
name: "bfcl-multi-turn-long-context",
|
|
4955
|
+
description: "BFCL v4 Multi-Turn Long-Context Function Calling",
|
|
4956
|
+
testFile: "BFCL_v4_multi_turn_long_context.jsonl",
|
|
4957
|
+
answerFile: "possible_answer/BFCL_v4_multi_turn_long_context.jsonl"
|
|
4958
|
+
},
|
|
4959
|
+
{
|
|
4960
|
+
name: "bfcl-multi-turn-miss-func",
|
|
4961
|
+
description: "BFCL v4 Multi-Turn Missing Function Calling",
|
|
4962
|
+
testFile: "BFCL_v4_multi_turn_miss_func.jsonl",
|
|
4963
|
+
answerFile: "possible_answer/BFCL_v4_multi_turn_miss_func.jsonl"
|
|
4964
|
+
},
|
|
4965
|
+
{
|
|
4966
|
+
name: "bfcl-multi-turn-miss-param",
|
|
4967
|
+
description: "BFCL v4 Multi-Turn Missing Parameter Function Calling",
|
|
4968
|
+
testFile: "BFCL_v4_multi_turn_miss_param.jsonl",
|
|
4969
|
+
answerFile: "possible_answer/BFCL_v4_multi_turn_miss_param.jsonl"
|
|
4970
|
+
}
|
|
4971
|
+
];
|
|
4972
|
+
var MULTI_TURN_DOCS = {
|
|
4973
|
+
GorillaFileSystem: "multi_turn_func_doc/gorilla_file_system.jsonl",
|
|
4974
|
+
MathAPI: "multi_turn_func_doc/math_api.jsonl",
|
|
4975
|
+
MessageAPI: "multi_turn_func_doc/message_api.jsonl",
|
|
4976
|
+
TwitterAPI: "multi_turn_func_doc/posting_api.jsonl",
|
|
4977
|
+
TicketAPI: "multi_turn_func_doc/ticket_api.jsonl",
|
|
4978
|
+
TradingBot: "multi_turn_func_doc/trading_bot.jsonl",
|
|
4979
|
+
TravelAPI: "multi_turn_func_doc/travel_booking.jsonl",
|
|
4980
|
+
VehicleControlAPI: "multi_turn_func_doc/vehicle_control.jsonl"
|
|
4981
|
+
};
|
|
4982
|
+
var toolDocCache = /* @__PURE__ */ new Map();
|
|
4983
|
+
var normalizeTurns = (question) => {
|
|
4984
|
+
if (Array.isArray(question) && question.some((m) => Array.isArray(m))) {
|
|
4985
|
+
return question;
|
|
4986
|
+
}
|
|
4987
|
+
return [question];
|
|
4988
|
+
};
|
|
4989
|
+
var fixSchema = (schema) => {
|
|
4990
|
+
if (!schema || typeof schema !== "object") {
|
|
4991
|
+
return { type: "object", properties: {} };
|
|
4992
|
+
}
|
|
4993
|
+
const copy = Array.isArray(schema) ? schema.map((v) => fixSchema(v)) : { ...schema };
|
|
4994
|
+
if (Array.isArray(copy)) {
|
|
4995
|
+
return copy;
|
|
4996
|
+
}
|
|
4997
|
+
if (!copy.type) {
|
|
4998
|
+
copy.type = "object";
|
|
4999
|
+
}
|
|
5000
|
+
if (copy.type === "dict") {
|
|
5001
|
+
copy.type = "object";
|
|
5002
|
+
}
|
|
5003
|
+
if (copy.type === "tuple") {
|
|
5004
|
+
copy.type = "array";
|
|
5005
|
+
}
|
|
5006
|
+
if (copy.type === "integer" || copy.type === "float") {
|
|
5007
|
+
copy.type = "number";
|
|
5008
|
+
}
|
|
5009
|
+
if (copy.properties && typeof copy.properties === "object") {
|
|
5010
|
+
for (const k of Object.keys(copy.properties)) {
|
|
5011
|
+
copy.properties[k] = fixSchema(
|
|
5012
|
+
copy.properties[k]
|
|
5013
|
+
);
|
|
5014
|
+
}
|
|
5015
|
+
}
|
|
5016
|
+
if (copy.items) {
|
|
5017
|
+
copy.items = fixSchema(copy.items);
|
|
5018
|
+
}
|
|
5019
|
+
return copy;
|
|
5020
|
+
};
|
|
5021
|
+
var sanitizeName = (toolName) => {
|
|
5022
|
+
const s = toolName.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
|
|
5023
|
+
return s.length > 0 ? s : "tool";
|
|
5024
|
+
};
|
|
5025
|
+
var buildTransformedTools = (tools) => {
|
|
5026
|
+
const nameMap = /* @__PURE__ */ new Map();
|
|
5027
|
+
const transformedTools = tools.map((t) => {
|
|
5028
|
+
const fixed = fixSchema(t.parameters);
|
|
5029
|
+
const isObjectSchema = fixed && typeof fixed === "object" && fixed.type === "object";
|
|
5030
|
+
const inputSchema = isObjectSchema ? fixed : { type: "object", properties: {} };
|
|
5031
|
+
const sanitized = sanitizeName(t.name);
|
|
5032
|
+
nameMap.set(sanitized, t.name);
|
|
5033
|
+
return {
|
|
5034
|
+
type: "function",
|
|
5035
|
+
name: sanitized,
|
|
5036
|
+
description: t.description,
|
|
5037
|
+
inputSchema
|
|
5038
|
+
};
|
|
5039
|
+
});
|
|
5040
|
+
return { transformedTools, nameMap };
|
|
5041
|
+
};
|
|
5042
|
+
var buildToolsMap = (transformedTools) => Object.fromEntries(
|
|
5043
|
+
transformedTools.map((t) => [
|
|
5044
|
+
t.name,
|
|
5045
|
+
(0, import_ai2.tool)({
|
|
5046
|
+
description: typeof t.description === "string" ? t.description : void 0,
|
|
5047
|
+
inputSchema: (0, import_ai2.jsonSchema)(t.inputSchema)
|
|
5048
|
+
})
|
|
5049
|
+
])
|
|
5050
|
+
);
|
|
5051
|
+
var parseToolArgs = (extractedArgs) => {
|
|
5052
|
+
if (typeof extractedArgs !== "string") {
|
|
5053
|
+
return extractedArgs;
|
|
5054
|
+
}
|
|
5055
|
+
try {
|
|
5056
|
+
return JSON.parse(extractedArgs);
|
|
5057
|
+
} catch (e) {
|
|
5058
|
+
try {
|
|
5059
|
+
const escaped = extractedArgs.replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\t/g, "\\t");
|
|
5060
|
+
return JSON.parse(escaped);
|
|
5061
|
+
} catch (e2) {
|
|
5062
|
+
return extractedArgs;
|
|
5063
|
+
}
|
|
5064
|
+
}
|
|
5065
|
+
};
|
|
5066
|
+
var getSanitizedName = (rawName, transformedTools) => {
|
|
5067
|
+
var _a, _b;
|
|
5068
|
+
if (typeof rawName === "string" && NUMERIC_STRING_REGEX2.test(rawName)) {
|
|
5069
|
+
return (_b = (_a = transformedTools[Number(rawName)]) == null ? void 0 : _a.name) != null ? _b : rawName;
|
|
5070
|
+
}
|
|
5071
|
+
return rawName;
|
|
5072
|
+
};
|
|
5073
|
+
var restoreToolCalls = (toolCalls, nameMap, transformedTools) => (toolCalls || []).map((c) => {
|
|
5074
|
+
var _a, _b, _c, _d, _e, _f;
|
|
5075
|
+
const call = c;
|
|
5076
|
+
const rawName = (_a = call.toolName) != null ? _a : call.name;
|
|
5077
|
+
const sanitizedFromIndex = getSanitizedName(rawName, transformedTools);
|
|
5078
|
+
const originalName = (_b = nameMap.get(sanitizedFromIndex)) != null ? _b : sanitizedFromIndex;
|
|
5079
|
+
const extractedArgs = (_f = (_e = (_d = (_c = call.args) != null ? _c : call.arguments) != null ? _d : call.input) != null ? _e : call.params) != null ? _f : call.parameters;
|
|
5080
|
+
const parsedArgs = parseToolArgs(extractedArgs);
|
|
5081
|
+
return {
|
|
5082
|
+
toolCallId: typeof call.toolCallId === "string" ? call.toolCallId : void 0,
|
|
5083
|
+
toolName: String(originalName),
|
|
5084
|
+
args: parsedArgs != null ? parsedArgs : {}
|
|
5085
|
+
};
|
|
5086
|
+
});
|
|
5087
|
+
var loadToolsForClass = async (className, dataDir) => {
|
|
5088
|
+
const forceRefresh = process.env.BFCL_FORCE_CACHE_REFRESH === "true";
|
|
5089
|
+
if (!forceRefresh) {
|
|
5090
|
+
const cached = toolDocCache.get(className);
|
|
5091
|
+
if (cached) {
|
|
5092
|
+
return cached;
|
|
5093
|
+
}
|
|
5094
|
+
}
|
|
5095
|
+
const relPath = MULTI_TURN_DOCS[className];
|
|
5096
|
+
if (!relPath) {
|
|
5097
|
+
throw new Error(`Missing tool doc mapping for class: ${className}`);
|
|
5098
|
+
}
|
|
5099
|
+
const raw = await import_node_fs3.promises.readFile(import_node_path3.default.join(dataDir, relPath), "utf-8");
|
|
5100
|
+
const tools = raw.split(LINE_SPLIT_REGEX2).filter((line) => line.trim().length > 0).map((line) => JSON.parse(line)).map((entry) => {
|
|
5101
|
+
var _a;
|
|
5102
|
+
const methodName = typeof entry.name === "string" ? entry.name : "tool";
|
|
5103
|
+
const fullName = `${className}.${methodName}`;
|
|
5104
|
+
return {
|
|
5105
|
+
name: fullName,
|
|
5106
|
+
description: typeof entry.description === "string" ? entry.description : void 0,
|
|
5107
|
+
parameters: (_a = entry.parameters) != null ? _a : {
|
|
5108
|
+
type: "object",
|
|
5109
|
+
properties: {}
|
|
5110
|
+
}
|
|
5111
|
+
};
|
|
5112
|
+
});
|
|
5113
|
+
toolDocCache.set(className, tools);
|
|
5114
|
+
return tools;
|
|
5115
|
+
};
|
|
5116
|
+
var loadToolsForClasses = async (classes, dataDir) => {
|
|
5117
|
+
const toolsPerClass = await Promise.all(
|
|
5118
|
+
classes.map((cls) => loadToolsForClass(cls, dataDir))
|
|
5119
|
+
);
|
|
5120
|
+
return toolsPerClass.flat();
|
|
5121
|
+
};
|
|
5122
|
+
var getMethodName = (toolName) => {
|
|
5123
|
+
var _a;
|
|
5124
|
+
return (_a = toolName.split(".").pop()) != null ? _a : toolName;
|
|
5125
|
+
};
|
|
5126
|
+
var isRateLimitError = (error) => {
|
|
5127
|
+
if (!(error instanceof Error)) {
|
|
5128
|
+
return false;
|
|
5129
|
+
}
|
|
5130
|
+
const message = error.message.toLowerCase();
|
|
5131
|
+
if (message.includes("429") || message.includes("rate limit")) {
|
|
5132
|
+
return true;
|
|
5133
|
+
}
|
|
5134
|
+
const anyError = error;
|
|
5135
|
+
if (anyError.status === 429 || anyError.statusCode === 429) {
|
|
5136
|
+
return true;
|
|
5137
|
+
}
|
|
5138
|
+
if (anyError.cause && typeof anyError.cause === "object" && anyError.cause.status === 429) {
|
|
5139
|
+
return true;
|
|
5140
|
+
}
|
|
5141
|
+
return false;
|
|
5142
|
+
};
|
|
5143
|
+
var sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
5144
|
+
var withRetry = async (fn, options = {}) => {
|
|
5145
|
+
let lastError;
|
|
5146
|
+
let delay = RETRY_INITIAL_DELAY_MS;
|
|
5147
|
+
for (let attempt = 1; attempt <= RETRY_MAX_ATTEMPTS; attempt++) {
|
|
5148
|
+
try {
|
|
5149
|
+
return await fn();
|
|
5150
|
+
} catch (error) {
|
|
5151
|
+
lastError = error;
|
|
5152
|
+
if (!isRateLimitError(error)) {
|
|
5153
|
+
throw error;
|
|
5154
|
+
}
|
|
5155
|
+
if (attempt === RETRY_MAX_ATTEMPTS) {
|
|
5156
|
+
throw error;
|
|
5157
|
+
}
|
|
5158
|
+
if (options.debug) {
|
|
5159
|
+
console.log(
|
|
5160
|
+
`[DEBUG] Rate limit hit, retrying in ${delay}ms (attempt ${attempt}/${RETRY_MAX_ATTEMPTS})`
|
|
5161
|
+
);
|
|
5162
|
+
}
|
|
5163
|
+
await sleep(delay);
|
|
5164
|
+
delay = Math.min(delay * RETRY_BACKOFF_MULTIPLIER, RETRY_MAX_DELAY_MS);
|
|
5165
|
+
}
|
|
5166
|
+
}
|
|
5167
|
+
throw lastError;
|
|
5168
|
+
};
|
|
5169
|
+
var createBfclMultiTurnBenchmark = (name, description, testDataFile, answerDataFile) => ({
|
|
5170
|
+
name,
|
|
5171
|
+
version: "1.0.0",
|
|
5172
|
+
description,
|
|
5173
|
+
async run(model, config) {
|
|
5174
|
+
console.log("Starting BFCL multi-turn benchmark...");
|
|
5175
|
+
toolDocCache.clear();
|
|
5176
|
+
globalMethodRegistry.reset();
|
|
5177
|
+
const logs = [];
|
|
5178
|
+
let correctCount = 0;
|
|
5179
|
+
let testCases = [];
|
|
5180
|
+
const dataPath = resolveDataDir();
|
|
5181
|
+
const configHash = JSON.stringify(config || {});
|
|
5182
|
+
const runId = `bfcl_${Date.now()}_${Math.random().toString(36).slice(2, 8)}_${configHash.slice(0, 10)}`;
|
|
5183
|
+
try {
|
|
5184
|
+
logs.push(`[INFO] Using data dir: ${dataPath}`);
|
|
5185
|
+
const testCasesJson = await import_node_fs3.promises.readFile(
|
|
5186
|
+
import_node_path3.default.join(dataPath, testDataFile),
|
|
5187
|
+
"utf-8"
|
|
5188
|
+
);
|
|
5189
|
+
const possibleAnswersJson = await import_node_fs3.promises.readFile(
|
|
5190
|
+
import_node_path3.default.join(dataPath, answerDataFile),
|
|
5191
|
+
"utf-8"
|
|
5192
|
+
);
|
|
5193
|
+
testCases = testCasesJson.split(LINE_SPLIT_REGEX2).filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
5194
|
+
const possibleAnswers = possibleAnswersJson.split(LINE_SPLIT_REGEX2).filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
5195
|
+
const possibleAnswersMap = new Map(
|
|
5196
|
+
possibleAnswers.map((ans) => [ans.id, ans])
|
|
5197
|
+
);
|
|
5198
|
+
const limitEnv = process.env.BFCL_LIMIT;
|
|
5199
|
+
const limit = limitEnv ? Number(limitEnv) : void 0;
|
|
5200
|
+
if (limit && Number.isFinite(limit) && limit > 0) {
|
|
5201
|
+
testCases = testCases.slice(0, limit);
|
|
5202
|
+
logs.push(`[INFO] Limiting test cases to ${limit} due to BFCL_LIMIT.`);
|
|
5203
|
+
}
|
|
5204
|
+
const concurrencyEnv = process.env.BFCL_CONCURRENCY;
|
|
5205
|
+
const concurrency = concurrencyEnv && Number.isFinite(Number(concurrencyEnv)) ? Math.max(1, Number(concurrencyEnv)) : 4;
|
|
5206
|
+
logs.push(
|
|
5207
|
+
`[INFO] Running ${testCases.length} test cases with concurrency=${concurrency}`
|
|
5208
|
+
);
|
|
5209
|
+
const temp = config == null ? void 0 : config.temperature;
|
|
5210
|
+
const temperature = typeof temp === "number" ? temp : void 0;
|
|
5211
|
+
const maxTok = config == null ? void 0 : config.maxTokens;
|
|
5212
|
+
const maxTokens = typeof maxTok === "number" ? maxTok : void 0;
|
|
5213
|
+
const debugMode = process.env.BFCL_DEBUG === "true";
|
|
5214
|
+
const executeModelGeneration = async (options) => {
|
|
5215
|
+
var _a, _b, _c;
|
|
5216
|
+
const { messages, toolsMap } = options;
|
|
5217
|
+
const providerOptions = {
|
|
5218
|
+
toolCallMiddleware: {
|
|
5219
|
+
debugSummary: {}
|
|
5220
|
+
}
|
|
5221
|
+
};
|
|
5222
|
+
const { toolCalls, text, finishReason } = await withRetry(
|
|
5223
|
+
() => (0, import_ai2.generateText)({
|
|
5224
|
+
model,
|
|
5225
|
+
system: MULTI_TURN_SYSTEM_PROMPT,
|
|
5226
|
+
messages,
|
|
5227
|
+
tools: toolsMap,
|
|
5228
|
+
toolChoice: "auto",
|
|
5229
|
+
providerOptions,
|
|
5230
|
+
...temperature !== void 0 ? { temperature } : {},
|
|
5231
|
+
...maxTokens !== void 0 ? { maxOutputTokens: maxTokens } : {}
|
|
5232
|
+
}),
|
|
5233
|
+
{ debug: debugMode }
|
|
5234
|
+
);
|
|
5235
|
+
if (debugMode) {
|
|
5236
|
+
console.log("[DEBUG] generateText response:");
|
|
5237
|
+
console.log(" finishReason:", finishReason);
|
|
5238
|
+
console.log(" text:", (_b = (_a = text == null ? void 0 : text.slice) == null ? void 0 : _a.call(text, 0, 200)) != null ? _b : text);
|
|
5239
|
+
console.log(
|
|
5240
|
+
" toolCalls:",
|
|
5241
|
+
(_c = JSON.stringify(toolCalls, null, 2)) == null ? void 0 : _c.slice(0, 500)
|
|
5242
|
+
);
|
|
5243
|
+
}
|
|
5244
|
+
return { toolCalls, text, finishReason };
|
|
5245
|
+
};
|
|
5246
|
+
const buildWithholdUntil = (missedFunctionMap) => {
|
|
5247
|
+
const withholdUntil = /* @__PURE__ */ new Map();
|
|
5248
|
+
for (const [turnStr, funcs] of Object.entries(missedFunctionMap)) {
|
|
5249
|
+
const turnIndex = Number(turnStr);
|
|
5250
|
+
if (!Number.isFinite(turnIndex)) {
|
|
5251
|
+
continue;
|
|
5252
|
+
}
|
|
5253
|
+
for (const fn of funcs) {
|
|
5254
|
+
withholdUntil.set(fn, turnIndex);
|
|
5255
|
+
}
|
|
5256
|
+
}
|
|
5257
|
+
return withholdUntil;
|
|
5258
|
+
};
|
|
5259
|
+
const getTurnMessages = (turns, turnIndex, missedFunctionMap) => {
|
|
5260
|
+
var _a, _b;
|
|
5261
|
+
const turnMessages = (_a = turns[turnIndex]) != null ? _a : [];
|
|
5262
|
+
const missedFunctionsForTurn = (_b = missedFunctionMap[String(turnIndex)]) != null ? _b : [];
|
|
5263
|
+
if (turnMessages.length === 0 && missedFunctionsForTurn.length > 0) {
|
|
5264
|
+
return [
|
|
5265
|
+
{
|
|
5266
|
+
role: "user",
|
|
5267
|
+
content: DEFAULT_USER_PROMPT_FOR_ADDITIONAL_FUNCTION_FC
|
|
5268
|
+
}
|
|
5269
|
+
];
|
|
5270
|
+
}
|
|
5271
|
+
return turnMessages;
|
|
5272
|
+
};
|
|
5273
|
+
const getAvailableTools = (tools, excludedFunctions, withholdUntil, turnIndex) => tools.filter((toolSpec) => {
|
|
5274
|
+
var _a;
|
|
5275
|
+
const methodName = getMethodName(toolSpec.name);
|
|
5276
|
+
if (excludedFunctions.has(methodName) || excludedFunctions.has(toolSpec.name)) {
|
|
5277
|
+
return false;
|
|
5278
|
+
}
|
|
5279
|
+
const availableFrom = (_a = withholdUntil.get(methodName)) != null ? _a : withholdUntil.get(toolSpec.name);
|
|
5280
|
+
if (availableFrom !== void 0 && turnIndex < availableFrom) {
|
|
5281
|
+
return false;
|
|
5282
|
+
}
|
|
5283
|
+
return true;
|
|
5284
|
+
});
|
|
5285
|
+
const runToolStep = async (options) => {
|
|
5286
|
+
const {
|
|
5287
|
+
history,
|
|
5288
|
+
toolsMap,
|
|
5289
|
+
transformedTools,
|
|
5290
|
+
nameMap,
|
|
5291
|
+
turnIndex,
|
|
5292
|
+
stepCount,
|
|
5293
|
+
initialConfig,
|
|
5294
|
+
involvedClasses,
|
|
5295
|
+
isLongContext,
|
|
5296
|
+
testCaseId
|
|
5297
|
+
} = options;
|
|
5298
|
+
const { toolCalls, text, finishReason } = await executeModelGeneration({
|
|
5299
|
+
messages: history,
|
|
5300
|
+
toolsMap
|
|
5301
|
+
});
|
|
5302
|
+
const toolCallsArray = Array.isArray(toolCalls) ? toolCalls : [];
|
|
5303
|
+
console.log(`[DEBUG] TestCase ${testCaseId} Step ${stepCount}:`);
|
|
5304
|
+
console.log(` History length: ${history.length}`);
|
|
5305
|
+
console.log(" Last message:", history.at(-1));
|
|
5306
|
+
console.log(` Finish reason: ${finishReason}`);
|
|
5307
|
+
console.log(` Text response: "${text}"`);
|
|
5308
|
+
console.log(` Tool calls count: ${toolCallsArray.length}`);
|
|
5309
|
+
if (toolCallsArray.length > 0) {
|
|
5310
|
+
console.log(
|
|
5311
|
+
" Tool calls:",
|
|
5312
|
+
toolCallsArray.map((tc) => ({
|
|
5313
|
+
name: tc.toolName,
|
|
5314
|
+
args: tc.args
|
|
5315
|
+
}))
|
|
5316
|
+
);
|
|
5317
|
+
}
|
|
5318
|
+
if (toolCallsArray.length === 0) {
|
|
5319
|
+
const textContent = typeof text === "string" ? text : "";
|
|
5320
|
+
const updatedHistory = textContent ? [
|
|
5321
|
+
...history,
|
|
5322
|
+
{
|
|
5323
|
+
role: "assistant",
|
|
5324
|
+
content: [{ type: "text", text: textContent }]
|
|
5325
|
+
}
|
|
5326
|
+
] : history;
|
|
5327
|
+
return { done: true, history: updatedHistory, toolCalls: [] };
|
|
5328
|
+
}
|
|
5329
|
+
const isLastStep = finishReason === "stop" || finishReason === "end_turn" || finishReason === "length";
|
|
5330
|
+
const restoredCalls = restoreToolCalls(
|
|
5331
|
+
toolCallsArray,
|
|
5332
|
+
nameMap,
|
|
5333
|
+
transformedTools
|
|
5334
|
+
);
|
|
5335
|
+
const toolCallParts = toolCallsArray.map((call, idx) => {
|
|
5336
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
|
5337
|
+
const record = call;
|
|
5338
|
+
const toolCallId = typeof record.toolCallId === "string" ? record.toolCallId : `toolcall-${turnIndex}-${stepCount}-${idx}`;
|
|
5339
|
+
const rawName = (_a = record.toolName) != null ? _a : record.name;
|
|
5340
|
+
const toolName = typeof rawName === "string" ? rawName : (_c = (_b = transformedTools[idx]) == null ? void 0 : _b.name) != null ? _c : "tool";
|
|
5341
|
+
const extractedArgs = (_g = (_f = (_e = (_d = record.args) != null ? _d : record.arguments) != null ? _e : record.input) != null ? _f : record.params) != null ? _g : record.parameters;
|
|
5342
|
+
const parsedInput = (_h = parseToolArgs(extractedArgs)) != null ? _h : {};
|
|
5343
|
+
if (debugMode) {
|
|
5344
|
+
console.log(`[DEBUG] Tool call ${idx} args processing:`);
|
|
5345
|
+
console.log(` record.args type: ${typeof record.args}`);
|
|
5346
|
+
console.log(` record.arguments type: ${typeof record.arguments}`);
|
|
5347
|
+
console.log(
|
|
5348
|
+
` extractedArgs type: ${typeof extractedArgs}, value: ${(_i = JSON.stringify(extractedArgs)) == null ? void 0 : _i.slice(0, 200)}`
|
|
5349
|
+
);
|
|
5350
|
+
console.log(
|
|
5351
|
+
` parsedInput type: ${typeof parsedInput}, value: ${(_j = JSON.stringify(parsedInput)) == null ? void 0 : _j.slice(0, 200)}`
|
|
5352
|
+
);
|
|
5353
|
+
}
|
|
5354
|
+
return {
|
|
5355
|
+
type: "tool-call",
|
|
5356
|
+
toolCallId,
|
|
5357
|
+
toolName,
|
|
5358
|
+
input: parsedInput
|
|
5359
|
+
};
|
|
5360
|
+
});
|
|
5361
|
+
const historyWithToolCalls = [
|
|
5362
|
+
...history,
|
|
5363
|
+
{
|
|
5364
|
+
role: "assistant",
|
|
5365
|
+
content: toolCallParts
|
|
5366
|
+
}
|
|
5367
|
+
];
|
|
5368
|
+
const toolCallsForExecution = restoredCalls.map((call) => {
|
|
5369
|
+
var _a;
|
|
5370
|
+
return {
|
|
5371
|
+
toolName: call.toolName,
|
|
5372
|
+
args: (_a = call.args) != null ? _a : {}
|
|
5373
|
+
};
|
|
5374
|
+
});
|
|
5375
|
+
const executionResult = await executeMultiTurnFuncCall(
|
|
5376
|
+
toolCallsForExecution,
|
|
5377
|
+
initialConfig,
|
|
5378
|
+
involvedClasses,
|
|
5379
|
+
runId,
|
|
5380
|
+
testCaseId,
|
|
5381
|
+
isLongContext,
|
|
5382
|
+
false
|
|
5383
|
+
);
|
|
5384
|
+
const executionResults = executionResult.executionResults;
|
|
5385
|
+
console.log("[DEBUG] Tool call execution results:", executionResults);
|
|
5386
|
+
console.log("[DEBUG] Tool calls:", toolCallsForExecution);
|
|
5387
|
+
const toolResultParts = executionResults.map((result, idx) => {
|
|
5388
|
+
const toolCallPart = toolCallParts[idx];
|
|
5389
|
+
return {
|
|
5390
|
+
type: "tool-result",
|
|
5391
|
+
toolCallId: toolCallPart.toolCallId,
|
|
5392
|
+
toolName: toolCallPart.toolName,
|
|
5393
|
+
output: {
|
|
5394
|
+
type: "text",
|
|
5395
|
+
value: result
|
|
5396
|
+
}
|
|
5397
|
+
};
|
|
5398
|
+
});
|
|
5399
|
+
const historyWithToolResults = [
|
|
5400
|
+
...historyWithToolCalls,
|
|
5401
|
+
{
|
|
5402
|
+
role: "tool",
|
|
5403
|
+
content: toolResultParts
|
|
5404
|
+
}
|
|
5405
|
+
];
|
|
5406
|
+
return {
|
|
5407
|
+
done: isLastStep,
|
|
5408
|
+
history: historyWithToolResults,
|
|
5409
|
+
toolCalls: toolCallsForExecution
|
|
5410
|
+
};
|
|
5411
|
+
};
|
|
5412
|
+
const runTurn = async (options) => {
|
|
5413
|
+
const {
|
|
5414
|
+
testCase,
|
|
5415
|
+
turnIndex,
|
|
5416
|
+
history,
|
|
5417
|
+
tools,
|
|
5418
|
+
excludedFunctions,
|
|
5419
|
+
withholdUntil,
|
|
5420
|
+
initialConfig,
|
|
5421
|
+
involvedClasses,
|
|
5422
|
+
isLongContext
|
|
5423
|
+
} = options;
|
|
5424
|
+
const availableTools = getAvailableTools(
|
|
5425
|
+
tools,
|
|
5426
|
+
excludedFunctions,
|
|
5427
|
+
withholdUntil,
|
|
5428
|
+
turnIndex
|
|
5429
|
+
);
|
|
5430
|
+
const { transformedTools, nameMap } = buildTransformedTools(availableTools);
|
|
5431
|
+
const toolsMap = buildToolsMap(transformedTools);
|
|
5432
|
+
const turnResults = [];
|
|
5433
|
+
let stepCount = 0;
|
|
5434
|
+
let updatedHistory = history;
|
|
5435
|
+
while (stepCount <= MAXIMUM_STEP_LIMIT) {
|
|
5436
|
+
const stepResult = await runToolStep({
|
|
5437
|
+
history: updatedHistory,
|
|
5438
|
+
toolsMap,
|
|
5439
|
+
transformedTools,
|
|
5440
|
+
nameMap,
|
|
5441
|
+
turnIndex,
|
|
5442
|
+
stepCount,
|
|
5443
|
+
initialConfig,
|
|
5444
|
+
involvedClasses,
|
|
5445
|
+
isLongContext,
|
|
5446
|
+
testCaseId: testCase.id
|
|
5447
|
+
});
|
|
5448
|
+
if (stepResult.done) {
|
|
5449
|
+
return {
|
|
5450
|
+
history: stepResult.history,
|
|
5451
|
+
turnResults,
|
|
5452
|
+
forceQuit: false
|
|
5453
|
+
};
|
|
5454
|
+
}
|
|
5455
|
+
turnResults.push(stepResult.toolCalls);
|
|
5456
|
+
updatedHistory = stepResult.history;
|
|
5457
|
+
stepCount += 1;
|
|
5458
|
+
}
|
|
5459
|
+
return { history: updatedHistory, turnResults, forceQuit: true };
|
|
5460
|
+
};
|
|
5461
|
+
const buildCaseContext = async (testCase, possibleAnswer) => {
|
|
5462
|
+
var _a, _b, _c, _d, _e;
|
|
5463
|
+
const turns = normalizeTurns((_a = testCase.question) != null ? _a : []);
|
|
5464
|
+
const expectedGroundTruth = possibleAnswer.ground_truth;
|
|
5465
|
+
const involvedClasses = (_b = testCase.involved_classes) != null ? _b : [];
|
|
5466
|
+
const initialConfig = (_c = testCase.initial_config) != null ? _c : {};
|
|
5467
|
+
const excludedFunctions = new Set((_d = testCase.excluded_function) != null ? _d : []);
|
|
5468
|
+
const missedFunctionMap = (_e = testCase.missed_function) != null ? _e : {};
|
|
5469
|
+
const tools = await loadToolsForClasses(involvedClasses, dataPath);
|
|
5470
|
+
const withholdUntil = buildWithholdUntil(missedFunctionMap);
|
|
5471
|
+
const isLongContext = testCase.id.includes("long_context") || testCase.id.includes("composite");
|
|
5472
|
+
return {
|
|
5473
|
+
turns,
|
|
5474
|
+
expectedGroundTruth,
|
|
5475
|
+
involvedClasses,
|
|
5476
|
+
initialConfig,
|
|
5477
|
+
excludedFunctions,
|
|
5478
|
+
missedFunctionMap,
|
|
5479
|
+
tools,
|
|
5480
|
+
withholdUntil,
|
|
5481
|
+
isLongContext
|
|
5482
|
+
};
|
|
5483
|
+
};
|
|
5484
|
+
const runConversation = async (context) => {
|
|
5485
|
+
const {
|
|
5486
|
+
turns,
|
|
5487
|
+
testCase,
|
|
5488
|
+
tools,
|
|
5489
|
+
excludedFunctions,
|
|
5490
|
+
withholdUntil,
|
|
5491
|
+
initialConfig,
|
|
5492
|
+
involvedClasses,
|
|
5493
|
+
missedFunctionMap,
|
|
5494
|
+
isLongContext
|
|
5495
|
+
} = context;
|
|
5496
|
+
let history = [];
|
|
5497
|
+
const modelResultsByTurn = [];
|
|
5498
|
+
for (let turnIndex = 0; turnIndex < turns.length; turnIndex += 1) {
|
|
5499
|
+
const turnMessages = getTurnMessages(
|
|
5500
|
+
turns,
|
|
5501
|
+
turnIndex,
|
|
5502
|
+
missedFunctionMap
|
|
5503
|
+
);
|
|
5504
|
+
if (turnMessages.length > 0) {
|
|
5505
|
+
history = [
|
|
5506
|
+
...history,
|
|
5507
|
+
...turnMessages
|
|
5508
|
+
];
|
|
5509
|
+
}
|
|
5510
|
+
const turnOutcome = await runTurn({
|
|
5511
|
+
testCase,
|
|
5512
|
+
turnIndex,
|
|
5513
|
+
history,
|
|
5514
|
+
tools,
|
|
5515
|
+
excludedFunctions,
|
|
5516
|
+
withholdUntil,
|
|
5517
|
+
initialConfig,
|
|
5518
|
+
involvedClasses,
|
|
5519
|
+
isLongContext
|
|
5520
|
+
});
|
|
5521
|
+
history = turnOutcome.history;
|
|
5522
|
+
modelResultsByTurn.push(turnOutcome.turnResults);
|
|
5523
|
+
if (turnOutcome.forceQuit) {
|
|
5524
|
+
return { modelResultsByTurn, forceQuit: true };
|
|
5525
|
+
}
|
|
5526
|
+
}
|
|
5527
|
+
return { modelResultsByTurn, forceQuit: false };
|
|
5528
|
+
};
|
|
5529
|
+
const checkCase = async (testCase, modelResultsByTurn, expectedGroundTruth) => {
|
|
5530
|
+
const testCategory = testCase.id.split("_").slice(0, -1).join("_");
|
|
5531
|
+
const checkResult = await multiTurnChecker(
|
|
5532
|
+
modelResultsByTurn,
|
|
5533
|
+
expectedGroundTruth,
|
|
5534
|
+
testCase,
|
|
5535
|
+
testCategory,
|
|
5536
|
+
runId
|
|
5537
|
+
);
|
|
5538
|
+
const irrelevanceResult = multiTurnIrrelevanceChecker(
|
|
5539
|
+
modelResultsByTurn,
|
|
5540
|
+
expectedGroundTruth
|
|
5541
|
+
);
|
|
5542
|
+
return {
|
|
5543
|
+
valid: checkResult.valid && irrelevanceResult.valid,
|
|
5544
|
+
error_type: checkResult.error_type || irrelevanceResult.error_type,
|
|
5545
|
+
details: checkResult.details || irrelevanceResult.details
|
|
5546
|
+
};
|
|
5547
|
+
};
|
|
5548
|
+
const runSingleCase2 = async (testCase) => {
|
|
5549
|
+
var _a;
|
|
5550
|
+
try {
|
|
5551
|
+
const caseLogs = [];
|
|
5552
|
+
const possibleAnswer = possibleAnswersMap.get(testCase.id);
|
|
5553
|
+
if (!possibleAnswer) {
|
|
5554
|
+
caseLogs.push(`[FAIL] ${testCase.id}: missing possible answer`);
|
|
5555
|
+
return { valid: false, logs: caseLogs };
|
|
5556
|
+
}
|
|
5557
|
+
const context = await buildCaseContext(testCase, possibleAnswer);
|
|
5558
|
+
const conversationResult = await runConversation({
|
|
5559
|
+
...context,
|
|
5560
|
+
testCase
|
|
5561
|
+
});
|
|
5562
|
+
if (conversationResult.forceQuit) {
|
|
5563
|
+
caseLogs.push(
|
|
5564
|
+
`[FAIL] ${testCase.id}: force-terminated after ${MAXIMUM_STEP_LIMIT} steps`
|
|
5565
|
+
);
|
|
5566
|
+
return { valid: false, logs: caseLogs };
|
|
5567
|
+
}
|
|
5568
|
+
const checkerResult = await checkCase(
|
|
5569
|
+
testCase,
|
|
5570
|
+
conversationResult.modelResultsByTurn,
|
|
5571
|
+
context.expectedGroundTruth
|
|
5572
|
+
);
|
|
5573
|
+
console.log(`[DEBUG] Test case: ${testCase.id}`);
|
|
5574
|
+
console.log(
|
|
5575
|
+
`[DEBUG] Model results (${conversationResult.modelResultsByTurn.length} turns):`,
|
|
5576
|
+
conversationResult.modelResultsByTurn
|
|
5577
|
+
);
|
|
5578
|
+
console.log(
|
|
5579
|
+
`[DEBUG] Ground truth (${context.expectedGroundTruth.length} turns):`,
|
|
5580
|
+
context.expectedGroundTruth
|
|
5581
|
+
);
|
|
5582
|
+
for (let turn = 0; turn < Math.max(
|
|
5583
|
+
conversationResult.modelResultsByTurn.length,
|
|
5584
|
+
context.expectedGroundTruth.length
|
|
5585
|
+
); turn++) {
|
|
5586
|
+
const modelTurn = conversationResult.modelResultsByTurn[turn] || [];
|
|
5587
|
+
const gtTurn = context.expectedGroundTruth[turn] || [];
|
|
5588
|
+
console.log(`[DEBUG] Turn ${turn}:`);
|
|
5589
|
+
console.log(` Model: ${JSON.stringify(modelTurn)}`);
|
|
5590
|
+
console.log(` Ground Truth: ${JSON.stringify(gtTurn)}`);
|
|
5591
|
+
console.log(
|
|
5592
|
+
` Match: ${JSON.stringify(modelTurn) === JSON.stringify(gtTurn)}`
|
|
5593
|
+
);
|
|
5594
|
+
}
|
|
5595
|
+
console.log("[DEBUG] Checker result:", checkerResult);
|
|
5596
|
+
console.log(
|
|
5597
|
+
`[DEBUG] Ground truth (${context.expectedGroundTruth.length} turns):`,
|
|
5598
|
+
context.expectedGroundTruth
|
|
5599
|
+
);
|
|
5600
|
+
console.log("[DEBUG] Checker result:", checkerResult);
|
|
5601
|
+
if (checkerResult.valid === true) {
|
|
5602
|
+
caseLogs.push(`[PASS] ${testCase.id}`);
|
|
5603
|
+
return { valid: true, logs: caseLogs };
|
|
5604
|
+
}
|
|
5605
|
+
caseLogs.push(
|
|
5606
|
+
`[FAIL] ${testCase.id}: ${(_a = checkerResult.error_type) != null ? _a : "unknown error"}`
|
|
5607
|
+
);
|
|
5608
|
+
return { valid: false, logs: caseLogs };
|
|
5609
|
+
} catch (e) {
|
|
5610
|
+
const errorMsg = e instanceof Error ? e.message : "unknown error in runSingleCase";
|
|
5611
|
+
return {
|
|
5612
|
+
valid: false,
|
|
5613
|
+
logs: [`[FAIL] ${testCase.id}: ${errorMsg}`]
|
|
5614
|
+
};
|
|
5615
|
+
}
|
|
5616
|
+
};
|
|
5617
|
+
const runSingleCaseSafe = async (testCase) => {
|
|
5618
|
+
try {
|
|
5619
|
+
return await runSingleCase2(testCase);
|
|
5620
|
+
} catch (e) {
|
|
5621
|
+
const errorMsg = e instanceof Error ? e.message : "unknown error occurred";
|
|
5622
|
+
if (debugMode) {
|
|
5623
|
+
console.error(`[DEBUG] Error in test case ${testCase.id}:`, e);
|
|
5624
|
+
}
|
|
5625
|
+
return {
|
|
5626
|
+
valid: false,
|
|
5627
|
+
logs: [`[FAIL] ${testCase.id}: ${errorMsg}`]
|
|
5628
|
+
};
|
|
5629
|
+
}
|
|
5630
|
+
};
|
|
5631
|
+
const mapWithConcurrency2 = async (items, concurrencyLimit, mapper) => {
|
|
5632
|
+
const results = new Array(items.length);
|
|
5633
|
+
let idx = 0;
|
|
5634
|
+
const workers = new Array(Math.min(concurrencyLimit, items.length)).fill(0).map(async () => {
|
|
5635
|
+
while (true) {
|
|
5636
|
+
const current = idx;
|
|
5637
|
+
idx += 1;
|
|
5638
|
+
if (current >= items.length) {
|
|
5639
|
+
break;
|
|
5640
|
+
}
|
|
5641
|
+
results[current] = await mapper(items[current], current);
|
|
5642
|
+
}
|
|
5643
|
+
});
|
|
5644
|
+
await Promise.all(workers);
|
|
5645
|
+
return results;
|
|
5646
|
+
};
|
|
5647
|
+
const resultsPerCase = await mapWithConcurrency2(
|
|
5648
|
+
testCases,
|
|
5649
|
+
concurrency,
|
|
5650
|
+
async (tc) => runSingleCaseSafe(tc)
|
|
5651
|
+
);
|
|
5652
|
+
correctCount = resultsPerCase.reduce(
|
|
5653
|
+
(acc, r) => acc + (r.valid ? 1 : 0),
|
|
5654
|
+
0
|
|
5655
|
+
);
|
|
5656
|
+
for (const r of resultsPerCase) {
|
|
5657
|
+
logs.push(...r.logs);
|
|
5658
|
+
}
|
|
5659
|
+
if (testCases.length === 0) {
|
|
5660
|
+
return {
|
|
5661
|
+
score: 0,
|
|
5662
|
+
success: false,
|
|
5663
|
+
metrics: {},
|
|
5664
|
+
logs: ["No test cases found."]
|
|
5665
|
+
};
|
|
5666
|
+
}
|
|
5667
|
+
const score = correctCount / testCases.length;
|
|
5668
|
+
const caseResults = resultsPerCase.map((r, i) => ({
|
|
5669
|
+
id: testCases[i].id,
|
|
5670
|
+
valid: r.valid
|
|
5671
|
+
}));
|
|
5672
|
+
return {
|
|
5673
|
+
score,
|
|
5674
|
+
success: score > 0.95,
|
|
5675
|
+
metrics: {
|
|
5676
|
+
correct_count: correctCount,
|
|
5677
|
+
total_cases: testCases.length,
|
|
5678
|
+
accuracy: score,
|
|
5679
|
+
case_results: JSON.stringify(caseResults)
|
|
5680
|
+
},
|
|
5681
|
+
logs
|
|
5682
|
+
};
|
|
5683
|
+
} catch (e) {
|
|
5684
|
+
return {
|
|
5685
|
+
score: 0,
|
|
5686
|
+
success: false,
|
|
5687
|
+
metrics: {},
|
|
5688
|
+
error: e,
|
|
5689
|
+
logs: [
|
|
5690
|
+
`[ERROR] Failed to run BFCL multi-turn benchmark: ${e.message}`
|
|
5691
|
+
]
|
|
5692
|
+
};
|
|
5693
|
+
} finally {
|
|
5694
|
+
resetTestInstances("", runId);
|
|
5695
|
+
}
|
|
5696
|
+
}
|
|
5697
|
+
});
|
|
5698
|
+
var bfclMultiTurnBaseBenchmark = createBfclMultiTurnBenchmark(
|
|
5699
|
+
MULTI_TURN_DATASETS[0].name,
|
|
5700
|
+
MULTI_TURN_DATASETS[0].description,
|
|
5701
|
+
MULTI_TURN_DATASETS[0].testFile,
|
|
5702
|
+
MULTI_TURN_DATASETS[0].answerFile
|
|
5703
|
+
);
|
|
5704
|
+
var bfclMultiTurnLongContextBenchmark = createBfclMultiTurnBenchmark(
|
|
5705
|
+
MULTI_TURN_DATASETS[1].name,
|
|
5706
|
+
MULTI_TURN_DATASETS[1].description,
|
|
5707
|
+
MULTI_TURN_DATASETS[1].testFile,
|
|
5708
|
+
MULTI_TURN_DATASETS[1].answerFile
|
|
5709
|
+
);
|
|
5710
|
+
var bfclMultiTurnMissFuncBenchmark = createBfclMultiTurnBenchmark(
|
|
5711
|
+
MULTI_TURN_DATASETS[2].name,
|
|
5712
|
+
MULTI_TURN_DATASETS[2].description,
|
|
5713
|
+
MULTI_TURN_DATASETS[2].testFile,
|
|
5714
|
+
MULTI_TURN_DATASETS[2].answerFile
|
|
5715
|
+
);
|
|
5716
|
+
var bfclMultiTurnMissParamBenchmark = createBfclMultiTurnBenchmark(
|
|
5717
|
+
MULTI_TURN_DATASETS[3].name,
|
|
5718
|
+
MULTI_TURN_DATASETS[3].description,
|
|
5719
|
+
MULTI_TURN_DATASETS[3].testFile,
|
|
5720
|
+
MULTI_TURN_DATASETS[3].answerFile
|
|
5721
|
+
);
|
|
5722
|
+
|
|
1313
5723
|
// src/benchmarks/complex-func-bench.ts
|
|
1314
|
-
var
|
|
1315
|
-
var
|
|
1316
|
-
var
|
|
1317
|
-
var
|
|
5724
|
+
var import_node_fs4 = require("fs");
|
|
5725
|
+
var import_node_path4 = __toESM(require("path"), 1);
|
|
5726
|
+
var import_ai3 = require("ai");
|
|
5727
|
+
var LINE_SPLIT_REGEX3 = /\r?\n/;
|
|
1318
5728
|
function standardizeString2(input) {
|
|
1319
5729
|
if (typeof input !== "string") {
|
|
1320
5730
|
return input;
|
|
@@ -1460,22 +5870,22 @@ var fixSchemaType = (copy) => {
|
|
|
1460
5870
|
copy.type = "number";
|
|
1461
5871
|
}
|
|
1462
5872
|
};
|
|
1463
|
-
var
|
|
5873
|
+
var fixSchema2 = (schema) => {
|
|
1464
5874
|
if (!schema || typeof schema !== "object") {
|
|
1465
5875
|
return { type: "object", properties: {} };
|
|
1466
5876
|
}
|
|
1467
|
-
const copy = Array.isArray(schema) ? schema.map((v) =>
|
|
5877
|
+
const copy = Array.isArray(schema) ? schema.map((v) => fixSchema2(v)) : { ...schema };
|
|
1468
5878
|
if (!Array.isArray(copy)) {
|
|
1469
5879
|
fixSchemaType(copy);
|
|
1470
5880
|
if (copy.properties && typeof copy.properties === "object") {
|
|
1471
5881
|
for (const k of Object.keys(copy.properties)) {
|
|
1472
|
-
copy.properties[k] =
|
|
5882
|
+
copy.properties[k] = fixSchema2(
|
|
1473
5883
|
copy.properties[k]
|
|
1474
5884
|
);
|
|
1475
5885
|
}
|
|
1476
5886
|
}
|
|
1477
5887
|
if (copy.items) {
|
|
1478
|
-
copy.items =
|
|
5888
|
+
copy.items = fixSchema2(copy.items);
|
|
1479
5889
|
}
|
|
1480
5890
|
}
|
|
1481
5891
|
return copy;
|
|
@@ -1483,7 +5893,7 @@ var fixSchema = (schema) => {
|
|
|
1483
5893
|
function buildTools(tools) {
|
|
1484
5894
|
const nameMap = /* @__PURE__ */ new Map();
|
|
1485
5895
|
const transformedTools = tools.map((t) => {
|
|
1486
|
-
const fixed =
|
|
5896
|
+
const fixed = fixSchema2(t.parameters);
|
|
1487
5897
|
const inputSchema = fixed && typeof fixed === "object" && fixed.type === "object" ? fixed : { type: "object", properties: {} };
|
|
1488
5898
|
const sanitized = t.name.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64) || "tool";
|
|
1489
5899
|
nameMap.set(sanitized, t.name);
|
|
@@ -1497,9 +5907,9 @@ function buildTools(tools) {
|
|
|
1497
5907
|
const toolsMap = Object.fromEntries(
|
|
1498
5908
|
transformedTools.map((t) => [
|
|
1499
5909
|
t.name,
|
|
1500
|
-
(0,
|
|
5910
|
+
(0, import_ai3.tool)({
|
|
1501
5911
|
description: typeof t.description === "string" ? t.description : void 0,
|
|
1502
|
-
inputSchema: (0,
|
|
5912
|
+
inputSchema: (0, import_ai3.jsonSchema)(t.inputSchema)
|
|
1503
5913
|
})
|
|
1504
5914
|
])
|
|
1505
5915
|
);
|
|
@@ -1530,7 +5940,7 @@ async function runSingleCase(testCase, model, possibleAnswersMap, temperature, m
|
|
|
1530
5940
|
const providerOptions = {
|
|
1531
5941
|
toolCallMiddleware: { debugSummary: debugSummaryRef }
|
|
1532
5942
|
};
|
|
1533
|
-
const { toolCalls, finishReason } = await (0,
|
|
5943
|
+
const { toolCalls, finishReason } = await (0, import_ai3.generateText)({
|
|
1534
5944
|
model,
|
|
1535
5945
|
messages,
|
|
1536
5946
|
tools: toolsMap,
|
|
@@ -1573,18 +5983,18 @@ async function runSingleCase(testCase, model, possibleAnswersMap, temperature, m
|
|
|
1573
5983
|
}
|
|
1574
5984
|
}
|
|
1575
5985
|
async function loadTestData(dataPath, testDataFile) {
|
|
1576
|
-
const testCasesJson = await
|
|
1577
|
-
|
|
5986
|
+
const testCasesJson = await import_node_fs4.promises.readFile(
|
|
5987
|
+
import_node_path4.default.join(dataPath, testDataFile),
|
|
1578
5988
|
"utf-8"
|
|
1579
5989
|
);
|
|
1580
|
-
return testCasesJson.split(
|
|
5990
|
+
return testCasesJson.split(LINE_SPLIT_REGEX3).filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
1581
5991
|
}
|
|
1582
5992
|
async function loadAnswerData(dataPath, answerDataFile) {
|
|
1583
|
-
const answersJson = await
|
|
1584
|
-
|
|
5993
|
+
const answersJson = await import_node_fs4.promises.readFile(
|
|
5994
|
+
import_node_path4.default.join(dataPath, answerDataFile),
|
|
1585
5995
|
"utf-8"
|
|
1586
5996
|
);
|
|
1587
|
-
const answers = answersJson.split(
|
|
5997
|
+
const answers = answersJson.split(LINE_SPLIT_REGEX3).filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
1588
5998
|
return new Map(answers.map((ans) => [ans.id, ans]));
|
|
1589
5999
|
}
|
|
1590
6000
|
function getConfigValues(config) {
|
|
@@ -1679,14 +6089,14 @@ var complexFuncBenchBenchmark = createComplexFuncBenchBenchmark(
|
|
|
1679
6089
|
);
|
|
1680
6090
|
|
|
1681
6091
|
// src/benchmarks/json-generation.ts
|
|
1682
|
-
var
|
|
1683
|
-
var
|
|
1684
|
-
var
|
|
6092
|
+
var import_node_fs5 = require("fs");
|
|
6093
|
+
var import_node_path5 = __toESM(require("path"), 1);
|
|
6094
|
+
var import_ai4 = require("ai");
|
|
1685
6095
|
var import_ajv = __toESM(require("ajv"), 1);
|
|
1686
6096
|
var JSON_FENCE_REGEX = /```json\s*([\s\S]*?)```/i;
|
|
1687
6097
|
var CODE_FENCE_REGEX = /```\s*([\s\S]*?)```/i;
|
|
1688
6098
|
var NEWLINE_REGEX = /\r?\n/;
|
|
1689
|
-
var
|
|
6099
|
+
var LINE_SPLIT_REGEX4 = /\r?\n/;
|
|
1690
6100
|
function tryDirectParse(text) {
|
|
1691
6101
|
try {
|
|
1692
6102
|
return JSON.parse(text);
|
|
@@ -1775,12 +6185,12 @@ function subsetMatch(expected, actual) {
|
|
|
1775
6185
|
async function loadDatasets() {
|
|
1776
6186
|
try {
|
|
1777
6187
|
const dataDir = resolveDataDir();
|
|
1778
|
-
const testsJsonl = await
|
|
1779
|
-
|
|
6188
|
+
const testsJsonl = await import_node_fs5.promises.readFile(
|
|
6189
|
+
import_node_path5.default.join(dataDir, "json_generation_tests.jsonl"),
|
|
1780
6190
|
"utf-8"
|
|
1781
6191
|
);
|
|
1782
|
-
const expectedJsonl = await
|
|
1783
|
-
|
|
6192
|
+
const expectedJsonl = await import_node_fs5.promises.readFile(
|
|
6193
|
+
import_node_path5.default.join(dataDir, "json_generation_expected.jsonl"),
|
|
1784
6194
|
"utf-8"
|
|
1785
6195
|
);
|
|
1786
6196
|
const tests = testsJsonl.split(NEWLINE_REGEX).filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
@@ -1840,7 +6250,7 @@ async function processTestCase(tc, context) {
|
|
|
1840
6250
|
const messages = buildMessages(tc);
|
|
1841
6251
|
const temp = (_a = context.config) == null ? void 0 : _a.temperature;
|
|
1842
6252
|
const temperature = typeof temp === "number" ? temp : void 0;
|
|
1843
|
-
const { text } = await (0,
|
|
6253
|
+
const { text } = await (0, import_ai4.generateText)({
|
|
1844
6254
|
model: context.model,
|
|
1845
6255
|
messages,
|
|
1846
6256
|
...temperature !== void 0 ? { temperature } : {}
|
|
@@ -1942,11 +6352,11 @@ function buildBenchmarkResult(total, counts, logs) {
|
|
|
1942
6352
|
async function loadSchemaOnlyTests() {
|
|
1943
6353
|
try {
|
|
1944
6354
|
const dataDir = resolveDataDir();
|
|
1945
|
-
const testsJsonl = await
|
|
1946
|
-
|
|
6355
|
+
const testsJsonl = await import_node_fs5.promises.readFile(
|
|
6356
|
+
import_node_path5.default.join(dataDir, "json_generation_tests.jsonl"),
|
|
1947
6357
|
"utf-8"
|
|
1948
6358
|
);
|
|
1949
|
-
const tests = testsJsonl.split(
|
|
6359
|
+
const tests = testsJsonl.split(LINE_SPLIT_REGEX4).filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
1950
6360
|
return { tests };
|
|
1951
6361
|
} catch (e) {
|
|
1952
6362
|
return { tests: [], error: e };
|
|
@@ -1957,7 +6367,7 @@ async function processSchemaOnlyTestCase(tc, context) {
|
|
|
1957
6367
|
const messages = buildMessages(tc);
|
|
1958
6368
|
const temp = (_a = context.config) == null ? void 0 : _a.temperature;
|
|
1959
6369
|
const temperature = typeof temp === "number" ? temp : void 0;
|
|
1960
|
-
const { text } = await (0,
|
|
6370
|
+
const { text } = await (0, import_ai4.generateText)({
|
|
1961
6371
|
model: context.model,
|
|
1962
6372
|
messages,
|
|
1963
6373
|
...temperature !== void 0 ? { temperature } : {}
|
|
@@ -2036,7 +6446,7 @@ var jsonGenerationSchemaOnlyBenchmark = {
|
|
|
2036
6446
|
|
|
2037
6447
|
// src/evaluate.ts
|
|
2038
6448
|
var import_middleware = require("@ai-sdk-tool/middleware");
|
|
2039
|
-
var
|
|
6449
|
+
var import_ai5 = require("ai");
|
|
2040
6450
|
|
|
2041
6451
|
// src/reporters/console.ts
|
|
2042
6452
|
var colors = {
|
|
@@ -2051,6 +6461,8 @@ var colors = {
|
|
|
2051
6461
|
white: "\x1B[37m"
|
|
2052
6462
|
};
|
|
2053
6463
|
var DEBUG_FAIL_REGEX = /^\[DEBUG-FAIL\] /;
|
|
6464
|
+
var PASS_REGEX = /^\[PASS\] (.+)$/;
|
|
6465
|
+
var FAIL_REGEX = /^\[FAIL\] ([^:]+)(?:: (.+))?$/;
|
|
2054
6466
|
function formatDiff(diff) {
|
|
2055
6467
|
if (!diff || diff.length === 0) {
|
|
2056
6468
|
return "";
|
|
@@ -2142,6 +6554,42 @@ function printFailureSummary(failures) {
|
|
|
2142
6554
|
);
|
|
2143
6555
|
}
|
|
2144
6556
|
}
|
|
6557
|
+
function parseTestResults(logs) {
|
|
6558
|
+
const results = [];
|
|
6559
|
+
for (const log of logs) {
|
|
6560
|
+
const passMatch = log.match(PASS_REGEX);
|
|
6561
|
+
if (passMatch) {
|
|
6562
|
+
results.push({ id: passMatch[1], passed: true });
|
|
6563
|
+
continue;
|
|
6564
|
+
}
|
|
6565
|
+
const failMatch = log.match(FAIL_REGEX);
|
|
6566
|
+
if (failMatch) {
|
|
6567
|
+
results.push({
|
|
6568
|
+
id: failMatch[1],
|
|
6569
|
+
passed: false,
|
|
6570
|
+
reason: failMatch[2]
|
|
6571
|
+
});
|
|
6572
|
+
}
|
|
6573
|
+
}
|
|
6574
|
+
return results;
|
|
6575
|
+
}
|
|
6576
|
+
function printTestResults(testResults) {
|
|
6577
|
+
const passed = testResults.filter((r) => r.passed);
|
|
6578
|
+
const failed = testResults.filter((r) => !r.passed);
|
|
6579
|
+
if (passed.length > 0) {
|
|
6580
|
+
const passedIds = passed.map((r) => r.id).join(", ");
|
|
6581
|
+
console.log(
|
|
6582
|
+
` ${colors.green}\u2714 Passed (${passed.length}):${colors.reset} ${passedIds}`
|
|
6583
|
+
);
|
|
6584
|
+
}
|
|
6585
|
+
if (failed.length > 0) {
|
|
6586
|
+
console.log(` ${colors.red}\u2716 Failed (${failed.length}):${colors.reset}`);
|
|
6587
|
+
for (const f of failed) {
|
|
6588
|
+
const reason = f.reason ? `: ${colors.gray}${f.reason}${colors.reset}` : "";
|
|
6589
|
+
console.log(` ${colors.red}${f.id}${colors.reset}${reason}`);
|
|
6590
|
+
}
|
|
6591
|
+
}
|
|
6592
|
+
}
|
|
2145
6593
|
function printResult(result) {
|
|
2146
6594
|
const { model, modelKey, benchmark, result: benchmarkResult } = result;
|
|
2147
6595
|
const passed = benchmarkResult.metrics.correct_count;
|
|
@@ -2161,15 +6609,14 @@ function printResult(result) {
|
|
|
2161
6609
|
` ${colors.red}Error: ${benchmarkResult.error.message}${colors.reset}`
|
|
2162
6610
|
);
|
|
2163
6611
|
}
|
|
2164
|
-
if (
|
|
2165
|
-
const
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
}
|
|
6612
|
+
if (benchmarkResult.logs && benchmarkResult.logs.length > 0) {
|
|
6613
|
+
const structuredFailures = parseFailures(benchmarkResult.logs);
|
|
6614
|
+
const testResults = parseTestResults(benchmarkResult.logs);
|
|
6615
|
+
if (testResults.length > 0) {
|
|
6616
|
+
printTestResults(testResults);
|
|
6617
|
+
}
|
|
6618
|
+
if (structuredFailures.length > 0) {
|
|
6619
|
+
printFailureSummary(structuredFailures);
|
|
2173
6620
|
}
|
|
2174
6621
|
}
|
|
2175
6622
|
}
|
|
@@ -2960,7 +7407,7 @@ function buildEffectiveModel(baseModel, userMiddleware, cacheOptions) {
|
|
|
2960
7407
|
if (middlewares.length === 0) {
|
|
2961
7408
|
return baseModel;
|
|
2962
7409
|
}
|
|
2963
|
-
return (0,
|
|
7410
|
+
return (0, import_ai5.wrapLanguageModel)({
|
|
2964
7411
|
// biome-ignore lint/suspicious/noExplicitAny: AI SDK v5/v6 type mismatch
|
|
2965
7412
|
model: baseModel,
|
|
2966
7413
|
middleware: middlewares.length === 1 ? middlewares[0] : middlewares
|
|
@@ -2970,11 +7417,19 @@ async function runSingleBenchmark(model, benchmark, modelKey, config) {
|
|
|
2970
7417
|
const modelId = typeof model === "object" && model !== null && "modelId" in model && typeof model.modelId === "string" ? model.modelId : "unknown-model";
|
|
2971
7418
|
const prefix = `[${modelId}]${modelKey ? ` (${modelKey})` : ""} ${benchmark.name}`;
|
|
2972
7419
|
try {
|
|
2973
|
-
process.stdout.
|
|
7420
|
+
if (process.stdout.isTTY) {
|
|
7421
|
+
process.stdout.write(`${prefix}: ...`);
|
|
7422
|
+
} else {
|
|
7423
|
+
console.log(`${prefix}: ...`);
|
|
7424
|
+
}
|
|
2974
7425
|
const result = await benchmark.run(model, config);
|
|
2975
7426
|
const scoreDisplay = result.score.toFixed(2);
|
|
2976
|
-
process.stdout.
|
|
7427
|
+
if (process.stdout.isTTY) {
|
|
7428
|
+
process.stdout.write(`\r${prefix}: .... Score: ${scoreDisplay}
|
|
2977
7429
|
`);
|
|
7430
|
+
} else {
|
|
7431
|
+
console.log(`${prefix}: .... Score: ${scoreDisplay}`);
|
|
7432
|
+
}
|
|
2978
7433
|
return {
|
|
2979
7434
|
model: modelId,
|
|
2980
7435
|
modelKey,
|
|
@@ -2982,8 +7437,12 @@ async function runSingleBenchmark(model, benchmark, modelKey, config) {
|
|
|
2982
7437
|
result
|
|
2983
7438
|
};
|
|
2984
7439
|
} catch (error) {
|
|
2985
|
-
process.stdout.
|
|
7440
|
+
if (process.stdout.isTTY) {
|
|
7441
|
+
process.stdout.write(`\r${prefix}: .... Score: ERROR
|
|
2986
7442
|
`);
|
|
7443
|
+
} else {
|
|
7444
|
+
console.log(`${prefix}: .... Score: ERROR`);
|
|
7445
|
+
}
|
|
2987
7446
|
console.error(error);
|
|
2988
7447
|
return {
|
|
2989
7448
|
model: modelId,
|
|
@@ -3031,12 +7490,17 @@ async function evaluate(options) {
|
|
|
3031
7490
|
}
|
|
3032
7491
|
// Annotate the CommonJS export names for ESM import in node:
|
|
3033
7492
|
0 && (module.exports = {
|
|
7493
|
+
bfclMultiTurnBaseBenchmark,
|
|
7494
|
+
bfclMultiTurnLongContextBenchmark,
|
|
7495
|
+
bfclMultiTurnMissFuncBenchmark,
|
|
7496
|
+
bfclMultiTurnMissParamBenchmark,
|
|
3034
7497
|
bfclMultipleBenchmark,
|
|
3035
7498
|
bfclParallelBenchmark,
|
|
3036
7499
|
bfclParallelMultipleBenchmark,
|
|
3037
7500
|
bfclSimpleBenchmark,
|
|
3038
7501
|
complexFuncBenchBenchmark,
|
|
3039
7502
|
evaluate,
|
|
7503
|
+
executeMultiTurnFuncCall,
|
|
3040
7504
|
jsonGenerationBenchmark,
|
|
3041
7505
|
jsonGenerationSchemaOnlyBenchmark
|
|
3042
7506
|
});
|