llmtester 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -0
- package/bin/cli.js +2 -0
- package/bin/tui.js +2 -0
- package/dist/benchmarks.d.ts +17 -0
- package/dist/benchmarks.d.ts.map +1 -0
- package/dist/benchmarks.js +612 -0
- package/dist/benchmarks.js.map +1 -0
- package/dist/client.d.ts +69 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +103 -0
- package/dist/client.js.map +1 -0
- package/dist/evaluator.d.ts +57 -0
- package/dist/evaluator.d.ts.map +1 -0
- package/dist/evaluator.js +410 -0
- package/dist/evaluator.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +515 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +16 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +30 -0
- package/dist/logger.js.map +1 -0
- package/dist/paths.d.ts +6 -0
- package/dist/paths.d.ts.map +1 -0
- package/dist/paths.js +49 -0
- package/dist/paths.js.map +1 -0
- package/dist/progress.d.ts +13 -0
- package/dist/progress.d.ts.map +1 -0
- package/dist/progress.js +47 -0
- package/dist/progress.js.map +1 -0
- package/dist/tui.d.ts +3 -0
- package/dist/tui.d.ts.map +1 -0
- package/dist/tui.js +326 -0
- package/dist/tui.js.map +1 -0
- package/package.json +45 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":";;;;;;AAAA,wDAA0B;AAC1B,gDAAwB;AACxB,kDAA0B;AAa1B,MAAa,MAAM;IACT,OAAO,CAAS;IAExB,YAAY,OAAe;QACzB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,kBAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAC5B,CAAC;IAED,QAAQ,CAAC,OAAmB,EAAE,MAAe;QAC3C,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,SAAS,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;YACjD,MAAM,QAAQ,GAAG,cAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,QAAQ,QAAQ,CAAC,CAAC;YAC9D,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC/D,kBAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAK,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,MAAM,eAAe,QAAQ,QAAQ,CAAC,CAAC,CAAC;QACpF,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;CACF;AAnBD,wBAmBC"}
|
package/dist/paths.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare function getAppDataDir(): string;
|
|
2
|
+
export declare function getConfigDir(): string;
|
|
3
|
+
export declare function getDetailedLogsDir(): string;
|
|
4
|
+
export declare function getResultsDir(): string;
|
|
5
|
+
export declare function getProgressDir(): string;
|
|
6
|
+
//# sourceMappingURL=paths.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"paths.d.ts","sourceRoot":"","sources":["../src/paths.ts"],"names":[],"mappings":"AAKA,wBAAgB,aAAa,IAAI,MAAM,CAetC;AAED,wBAAgB,YAAY,IAAI,MAAM,CAerC;AAED,wBAAgB,kBAAkB,IAAI,MAAM,CAE3C;AAED,wBAAgB,aAAa,IAAI,MAAM,CAEtC;AAED,wBAAgB,cAAc,IAAI,MAAM,CAEvC"}
|
package/dist/paths.js
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.getAppDataDir = getAppDataDir;
|
|
7
|
+
exports.getConfigDir = getConfigDir;
|
|
8
|
+
exports.getDetailedLogsDir = getDetailedLogsDir;
|
|
9
|
+
exports.getResultsDir = getResultsDir;
|
|
10
|
+
exports.getProgressDir = getProgressDir;
|
|
11
|
+
const path_1 = __importDefault(require("path"));
|
|
12
|
+
const os_1 = __importDefault(require("os"));
|
|
13
|
+
const APP_NAME = 'llmtester';
|
|
14
|
+
function getAppDataDir() {
|
|
15
|
+
const homeDir = os_1.default.homedir();
|
|
16
|
+
// macOS: ~/Library/Application Support/llmbenchmark/
|
|
17
|
+
if (process.platform === 'darwin') {
|
|
18
|
+
return path_1.default.join(homeDir, 'Library', 'Application Support', APP_NAME);
|
|
19
|
+
}
|
|
20
|
+
// Linux: ~/.local/share/llmbenchmark/
|
|
21
|
+
if (process.platform === 'linux') {
|
|
22
|
+
return path_1.default.join(homeDir, '.local', 'share', APP_NAME);
|
|
23
|
+
}
|
|
24
|
+
// Windows: %APPDATA%/llmbenchmark/
|
|
25
|
+
return path_1.default.join(process.env.APPDATA || homeDir, APP_NAME);
|
|
26
|
+
}
|
|
27
|
+
function getConfigDir() {
|
|
28
|
+
const homeDir = os_1.default.homedir();
|
|
29
|
+
// Linux: ~/.config/llmbenchmark/
|
|
30
|
+
if (process.platform === 'linux') {
|
|
31
|
+
return path_1.default.join(homeDir, '.config', APP_NAME);
|
|
32
|
+
}
|
|
33
|
+
// macOS: ~/Library/Application Support/llmbenchmark/
|
|
34
|
+
if (process.platform === 'darwin') {
|
|
35
|
+
return path_1.default.join(homeDir, 'Library', 'Application Support', APP_NAME);
|
|
36
|
+
}
|
|
37
|
+
// Windows: %APPDATA%/llmbenchmark/
|
|
38
|
+
return path_1.default.join(process.env.APPDATA || homeDir, APP_NAME);
|
|
39
|
+
}
|
|
40
|
+
function getDetailedLogsDir() {
|
|
41
|
+
return path_1.default.join(getAppDataDir(), 'detailed_logs');
|
|
42
|
+
}
|
|
43
|
+
function getResultsDir() {
|
|
44
|
+
return path_1.default.join(getAppDataDir(), 'results');
|
|
45
|
+
}
|
|
46
|
+
function getProgressDir() {
|
|
47
|
+
return path_1.default.join(getAppDataDir(), 'eval_progress');
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=paths.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"paths.js","sourceRoot":"","sources":["../src/paths.ts"],"names":[],"mappings":";;;;;AAKA,sCAeC;AAED,oCAeC;AAED,gDAEC;AAED,sCAEC;AAED,wCAEC;AAjDD,gDAAwB;AACxB,4CAAoB;AAEpB,MAAM,QAAQ,GAAG,WAAW,CAAC;AAE7B,SAAgB,aAAa;IAC3B,MAAM,OAAO,GAAG,YAAE,CAAC,OAAO,EAAE,CAAC;IAE7B,qDAAqD;IACrD,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAClC,OAAO,cAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,qBAAqB,EAAE,QAAQ,CAAC,CAAC;IACxE,CAAC;IAED,sCAAsC;IACtC,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;QACjC,OAAO,cAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IACzD,CAAC;IAED,mCAAmC;IACnC,OAAO,cAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,OAAO,EAAE,QAAQ,CAAC,CAAC;AAC7D,CAAC;AAED,SAAgB,YAAY;IAC1B,MAAM,OAAO,GAAG,YAAE,CAAC,OAAO,EAAE,CAAC;IAE7B,iCAAiC;IACjC,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;QACjC,OAAO,cAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,qDAAqD;IACrD,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAClC,OAAO,cAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,qBAAqB,EAAE,QAAQ,CAAC,CAAC;IACxE,CAAC;IAED,mCAAmC;IACnC,OAAO,cAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,OAAO,EAAE,QAAQ,CAAC,CAAC;AAC7D,CAAC;AAED,SAAgB,kBAAkB;IAChC,OAAO,cAAI,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,eAAe,CAAC,CAAC;AACrD,CAAC;AAED,SAAgB,aAAa;IAC3B,OAAO,cAAI,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,SAAS,CAAC,CAAC;AAC/C,CAAC;AAED,SAAgB,cAAc;IAC5B,OAAO,cAAI,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,eAAe,CAAC,CAAC;AACrD,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export interface ProgressData {
|
|
2
|
+
completed: number;
|
|
3
|
+
total: number;
|
|
4
|
+
seed?: number;
|
|
5
|
+
}
|
|
6
|
+
export declare class ProgressTracker {
|
|
7
|
+
private progressDir;
|
|
8
|
+
constructor(progressDir: string);
|
|
9
|
+
save(filename: string, data: ProgressData): void;
|
|
10
|
+
load(filename: string): ProgressData | null;
|
|
11
|
+
clear(filename: string): void;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=progress.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"progress.d.ts","sourceRoot":"","sources":["../src/progress.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,WAAW,CAAS;gBAEhB,WAAW,EAAE,MAAM;IAI/B,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,IAAI;IAQhD,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI;IAY3C,KAAK,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;CAU9B"}
|
package/dist/progress.js
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.ProgressTracker = void 0;
|
|
7
|
+
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
8
|
+
const path_1 = __importDefault(require("path"));
|
|
9
|
+
class ProgressTracker {
|
|
10
|
+
progressDir;
|
|
11
|
+
constructor(progressDir) {
|
|
12
|
+
this.progressDir = progressDir;
|
|
13
|
+
}
|
|
14
|
+
save(filename, data) {
|
|
15
|
+
try {
|
|
16
|
+
fs_extra_1.default.writeJsonSync(path_1.default.join(this.progressDir, filename), data, { spaces: 2 });
|
|
17
|
+
}
|
|
18
|
+
catch (error) {
|
|
19
|
+
console.error(`Failed to save progress: ${error}`);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
load(filename) {
|
|
23
|
+
try {
|
|
24
|
+
const filePath = path_1.default.join(this.progressDir, filename);
|
|
25
|
+
if (fs_extra_1.default.existsSync(filePath)) {
|
|
26
|
+
return fs_extra_1.default.readJsonSync(filePath);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
catch (error) {
|
|
30
|
+
console.error(`Failed to load progress: ${error}`);
|
|
31
|
+
}
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
clear(filename) {
|
|
35
|
+
try {
|
|
36
|
+
const filePath = path_1.default.join(this.progressDir, filename);
|
|
37
|
+
if (fs_extra_1.default.existsSync(filePath)) {
|
|
38
|
+
fs_extra_1.default.removeSync(filePath);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
console.error(`Failed to clear progress: ${error}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
exports.ProgressTracker = ProgressTracker;
|
|
47
|
+
//# sourceMappingURL=progress.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"progress.js","sourceRoot":"","sources":["../src/progress.ts"],"names":[],"mappings":";;;;;;AAAA,wDAA0B;AAC1B,gDAAwB;AAQxB,MAAa,eAAe;IAClB,WAAW,CAAS;IAE5B,YAAY,WAAmB;QAC7B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,QAAgB,EAAE,IAAkB;QACvC,IAAI,CAAC;YACH,kBAAE,CAAC,aAAa,CAAC,cAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,EAAE,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC;QAC/E,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,KAAK,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,IAAI,CAAC,QAAgB;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,cAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YACvD,IAAI,kBAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,OAAO,kBAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,KAAK,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,QAAgB;QACpB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,cAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YACvD,IAAI,kBAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,kBAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;CACF;AArCD,0CAqCC"}
|
package/dist/tui.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tui.d.ts","sourceRoot":"","sources":["../src/tui.ts"],"names":[],"mappings":";AAgVA,wBAAsB,SAAS,kBAG9B"}
|
package/dist/tui.js
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
4
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
5
|
+
};
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.launchTUI = launchTUI;
|
|
8
|
+
const blessed_1 = __importDefault(require("blessed"));
|
|
9
|
+
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
10
|
+
const path_1 = __importDefault(require("path"));
|
|
11
|
+
const paths_js_1 = require("./paths.js");
|
|
12
|
+
const resultsDir = (0, paths_js_1.getAppDataDir)();
|
|
13
|
+
const detailedLogsDir = (0, paths_js_1.getDetailedLogsDir)();
|
|
14
|
+
const evalResultsDir = (0, paths_js_1.getResultsDir)();
|
|
15
|
+
const configDir = (0, paths_js_1.getConfigDir)();
|
|
16
|
+
const configPath = path_1.default.join(configDir, 'tui_config.json');
|
|
17
|
+
let runs = [];
|
|
18
|
+
let allEntries = [];
|
|
19
|
+
let currentRun = 0;
|
|
20
|
+
let currentEntry = 0;
|
|
21
|
+
let state = 'runs';
|
|
22
|
+
let showThinking = true;
|
|
23
|
+
let showFailedOnly = false;
|
|
24
|
+
function loadConfig() {
|
|
25
|
+
try {
|
|
26
|
+
const config = fs_extra_1.default.readJsonSync(configPath);
|
|
27
|
+
showFailedOnly = config.showFailedOnly ?? false;
|
|
28
|
+
showThinking = config.showThinking ?? true;
|
|
29
|
+
}
|
|
30
|
+
catch (e) { }
|
|
31
|
+
}
|
|
32
|
+
function saveConfig() {
|
|
33
|
+
try {
|
|
34
|
+
fs_extra_1.default.ensureDirSync(configDir);
|
|
35
|
+
fs_extra_1.default.writeJsonSync(configPath, { showFailedOnly, showThinking }, { spaces: 2 });
|
|
36
|
+
}
|
|
37
|
+
catch (e) { }
|
|
38
|
+
}
|
|
39
|
+
let screen = null;
|
|
40
|
+
let runsList = null;
|
|
41
|
+
let entriesList = null;
|
|
42
|
+
let detailBox = null;
|
|
43
|
+
let header = null;
|
|
44
|
+
let footer = null;
|
|
45
|
+
function formatDate(timestamp) {
|
|
46
|
+
const d = new Date(timestamp);
|
|
47
|
+
return d.toLocaleDateString() + ' ' + d.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
|
|
48
|
+
}
|
|
49
|
+
function getResponseText(r) {
|
|
50
|
+
const text = typeof r === 'object' ? (r.content || JSON.stringify(r)) : r;
|
|
51
|
+
let str = text.toString();
|
|
52
|
+
if (!showThinking) {
|
|
53
|
+
str = str.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
|
54
|
+
str = str.replace(/<think>[\s\S]*?<\/thinking>/gi, '');
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
str = str.replace(/<think>/g, '{gray-fg}<think>{/gray-fg}');
|
|
58
|
+
str = str.replace(/<\/think>/g, '{/gray-fg}</think>{/gray-fg}');
|
|
59
|
+
str = str.replace(/<\/thinking>/g, '{/gray-fg}</thinking>{/gray-fg}');
|
|
60
|
+
}
|
|
61
|
+
str = str.replace(/```(\w*)\n?([\s\S]*?)```/g, (match, lang, code) => {
|
|
62
|
+
return '{gray-fg}```{/gray-fg}' + lang + '\n{cyan-fg}' + code + '{/cyan-fg}{gray-fg}```{/gray-fg}';
|
|
63
|
+
});
|
|
64
|
+
return str;
|
|
65
|
+
}
|
|
66
|
+
async function loadData() {
|
|
67
|
+
let resultFiles = [];
|
|
68
|
+
try {
|
|
69
|
+
const files = await fs_extra_1.default.readdir(evalResultsDir);
|
|
70
|
+
resultFiles = files.filter(f => f.startsWith('eval_results_') && f.endsWith('.json')).sort().reverse();
|
|
71
|
+
}
|
|
72
|
+
catch (e) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
if (resultFiles.length === 0)
|
|
76
|
+
return false;
|
|
77
|
+
runs = [];
|
|
78
|
+
for (const file of resultFiles) {
|
|
79
|
+
try {
|
|
80
|
+
const data = await fs_extra_1.default.readJson(path_1.default.join(evalResultsDir, file));
|
|
81
|
+
runs.push({ file, timestamp: data.timestamp, results: data.results || [] });
|
|
82
|
+
}
|
|
83
|
+
catch (e) { }
|
|
84
|
+
}
|
|
85
|
+
return runs.length > 0;
|
|
86
|
+
}
|
|
87
|
+
async function loadAllEntries() {
|
|
88
|
+
const run = runs[currentRun];
|
|
89
|
+
const runBenchmarks = run.results.map((r) => r.benchmark.toLowerCase());
|
|
90
|
+
let logFiles = [];
|
|
91
|
+
try {
|
|
92
|
+
logFiles = await fs_extra_1.default.readdir(detailedLogsDir);
|
|
93
|
+
}
|
|
94
|
+
catch (e) { }
|
|
95
|
+
const matchingLogs = logFiles.filter(f => runBenchmarks.some((b) => f.toLowerCase().startsWith(b)));
|
|
96
|
+
if (matchingLogs.length === 0)
|
|
97
|
+
return [];
|
|
98
|
+
try {
|
|
99
|
+
const allEntries = [];
|
|
100
|
+
for (const logFile of matchingLogs) {
|
|
101
|
+
const content = await fs_extra_1.default.readFile(path_1.default.join(detailedLogsDir, logFile), 'utf-8');
|
|
102
|
+
const entries = content.trim().split('\n').map((line) => JSON.parse(line));
|
|
103
|
+
allEntries.push(...entries.map((e, i) => ({
|
|
104
|
+
benchName: e.benchmark,
|
|
105
|
+
index: i,
|
|
106
|
+
question: e.question,
|
|
107
|
+
response: e.response,
|
|
108
|
+
isCorrect: e.isCorrect,
|
|
109
|
+
judgeResponse: e.judgeResponse
|
|
110
|
+
})));
|
|
111
|
+
}
|
|
112
|
+
return allEntries;
|
|
113
|
+
}
|
|
114
|
+
catch (e) {
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
function showRuns() {
|
|
119
|
+
state = 'runs';
|
|
120
|
+
runsList.show();
|
|
121
|
+
entriesList.hide();
|
|
122
|
+
if (detailBox)
|
|
123
|
+
detailBox.hide();
|
|
124
|
+
header.setContent('{center}{bold}{cyan-fg}LLM Benchmark Explorer{/cyan-fg}{/bold}{/center}');
|
|
125
|
+
footer.setContent('{center}Enter: Select | Q: Quit{/center}');
|
|
126
|
+
runsList.focus();
|
|
127
|
+
render();
|
|
128
|
+
}
|
|
129
|
+
async function showEntries() {
|
|
130
|
+
state = 'entries';
|
|
131
|
+
allEntries = await loadAllEntries();
|
|
132
|
+
runsList.hide();
|
|
133
|
+
if (detailBox)
|
|
134
|
+
detailBox.hide();
|
|
135
|
+
if (allEntries.length === 0) {
|
|
136
|
+
header.setContent('{center}{yellow-fg}No Detailed Entries{/yellow-fg}{/center}');
|
|
137
|
+
entriesList.setItems(['No detailed entries for this run']);
|
|
138
|
+
entriesList.show();
|
|
139
|
+
footer.setContent('{center}Esc: Back | Q: Quit{/center}');
|
|
140
|
+
render();
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
currentEntry = 0;
|
|
144
|
+
renderEntries();
|
|
145
|
+
}
|
|
146
|
+
function renderEntries() {
|
|
147
|
+
const run = runs[currentRun];
|
|
148
|
+
const filteredEntries = showFailedOnly ? allEntries.filter(e => !e.isCorrect) : allEntries;
|
|
149
|
+
const correct = allEntries.filter(e => e.isCorrect).length;
|
|
150
|
+
const pct = ((correct / allEntries.length) * 100).toFixed(1);
|
|
151
|
+
const filterLabel = showFailedOnly ? ' (failed only)' : '';
|
|
152
|
+
header.setContent('{center}' + run.results.map(r => r.benchmark).join(', ') + filterLabel + ' | ' + pct + '% (' + correct + ' of ' + allEntries.length + '){/center}');
|
|
153
|
+
const items = filteredEntries.map((e, i) => {
|
|
154
|
+
const snippet = getQuestionSnippet(e.question);
|
|
155
|
+
const status = e.isCorrect ? '{green-fg}[OK]{/green-fg}' : '{red-fg}[X]{/red-fg}';
|
|
156
|
+
return status + ' [' + e.benchName + '] ' + snippet;
|
|
157
|
+
});
|
|
158
|
+
entriesList.setItems(items);
|
|
159
|
+
entriesList.show();
|
|
160
|
+
footer.setContent('{center}F: ' + (showFailedOnly ? 'Show All' : 'Show Failed') + ' | Enter: View Detail | Esc: Back | Q: Quit{/center}');
|
|
161
|
+
entriesList.focus();
|
|
162
|
+
render();
|
|
163
|
+
}
|
|
164
|
+
function showDetail() {
|
|
165
|
+
state = 'detail';
|
|
166
|
+
const entry = allEntries[currentEntry];
|
|
167
|
+
if (!entry)
|
|
168
|
+
return;
|
|
169
|
+
entriesList.hide();
|
|
170
|
+
const responseText = getResponseText(entry.response);
|
|
171
|
+
const status = entry.isCorrect ? '{green-fg}[CORRECT]{/green-fg}' : '{red-fg}[INCORRECT]{/red-fg}';
|
|
172
|
+
const thinkStatus = showThinking ? '{yellow-fg}[Thinking: ON]{/yellow-fg}' : '{gray-fg}[Thinking: OFF]{/gray-fg}';
|
|
173
|
+
let content = '{center}{bold}Entry ' + (currentEntry + 1) + '/' + allEntries.length + ' | ' + entry.benchName.toUpperCase() + '{/bold}{/center}\n\n';
|
|
174
|
+
content += '{center}' + status + '{/center}\n\n';
|
|
175
|
+
content += '{bold}Question:{/bold}\n';
|
|
176
|
+
content += getQuestionText(entry.question) + '\n\n';
|
|
177
|
+
content += '{bold}Model Response ' + thinkStatus + ':{/bold}\n';
|
|
178
|
+
content += responseText + '\n';
|
|
179
|
+
if (entry.judgeResponse) {
|
|
180
|
+
const judgeColor = entry.judgeResponse.toUpperCase().includes('YES') ? 'green-fg' : 'red-fg';
|
|
181
|
+
content += '\n{bold}Judge Feedback:{/bold}\n';
|
|
182
|
+
content += '{' + judgeColor + '}' + entry.judgeResponse + '{/' + judgeColor + '}';
|
|
183
|
+
}
|
|
184
|
+
if (detailBox) {
|
|
185
|
+
detailBox.setContent(content);
|
|
186
|
+
detailBox.show();
|
|
187
|
+
detailBox.focus();
|
|
188
|
+
}
|
|
189
|
+
footer.setContent('{center}Up/Down: Scroll | T: Toggle Thinking | Esc: Back | Q: Quit{/center}');
|
|
190
|
+
render();
|
|
191
|
+
}
|
|
192
|
+
function getQuestionSnippet(q) {
|
|
193
|
+
const text = typeof q === 'object' ? (q.question || q.target || '') : q;
|
|
194
|
+
return text.toString().replace(/\n/g, ' ').trim().substring(0, 200);
|
|
195
|
+
}
|
|
196
|
+
function getQuestionText(q) {
|
|
197
|
+
const text = typeof q === 'object' ? (q.question || q.target || JSON.stringify(q)) : q;
|
|
198
|
+
return text.toString();
|
|
199
|
+
}
|
|
200
|
+
function render() {
|
|
201
|
+
if (screen)
|
|
202
|
+
screen.render();
|
|
203
|
+
}
|
|
204
|
+
function cleanup() {
|
|
205
|
+
if (screen) {
|
|
206
|
+
screen.destroy();
|
|
207
|
+
screen = null;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
async function main() {
|
|
211
|
+
const hasData = await loadData();
|
|
212
|
+
if (!hasData) {
|
|
213
|
+
console.log('No results found.');
|
|
214
|
+
process.exit(0);
|
|
215
|
+
}
|
|
216
|
+
screen = blessed_1.default.screen({ smartCSR: true, fullUnicode: true });
|
|
217
|
+
screen.title = 'LLM Benchmark Explorer';
|
|
218
|
+
const container = blessed_1.default.box({
|
|
219
|
+
parent: screen,
|
|
220
|
+
width: '100%', height: '100%',
|
|
221
|
+
style: { fg: 'white', bg: 'black' }
|
|
222
|
+
});
|
|
223
|
+
header = blessed_1.default.text({
|
|
224
|
+
parent: container,
|
|
225
|
+
top: 0, left: 0, width: '100%', height: 1,
|
|
226
|
+
align: 'center', valign: 'middle',
|
|
227
|
+
tags: true,
|
|
228
|
+
style: { fg: 'white', bg: 'black', bold: true, tags: true }
|
|
229
|
+
});
|
|
230
|
+
runsList = blessed_1.default.list({
|
|
231
|
+
parent: container,
|
|
232
|
+
top: 2, left: 0, right: 0, bottom: 1,
|
|
233
|
+
border: { type: 'line', fg: 'cyan' },
|
|
234
|
+
style: { fg: 'white', border: { fg: 'cyan' }, selected: { bg: 'cyan', fg: 'black' } },
|
|
235
|
+
items: runs.map(r => {
|
|
236
|
+
const summary = r.results.map((x) => x.benchmark + ' - ' + x.correct + ' of ' + x.total + ' (' + x.accuracy.toFixed(0) + '%)').join(' | ');
|
|
237
|
+
return formatDate(r.timestamp) + ' | ' + summary;
|
|
238
|
+
}),
|
|
239
|
+
keys: true, vi: true, mouse: true
|
|
240
|
+
});
|
|
241
|
+
entriesList = blessed_1.default.list({
|
|
242
|
+
parent: container,
|
|
243
|
+
top: 2, left: 0, right: 0, bottom: 1,
|
|
244
|
+
border: { type: 'line', fg: 'yellow' },
|
|
245
|
+
style: { fg: 'white', border: { fg: 'yellow' }, selected: { bg: 'yellow', fg: 'black' } },
|
|
246
|
+
tags: true,
|
|
247
|
+
keys: true, vi: true, mouse: true,
|
|
248
|
+
hidden: true
|
|
249
|
+
});
|
|
250
|
+
detailBox = blessed_1.default.scrollabletext({
|
|
251
|
+
parent: container,
|
|
252
|
+
top: 2, left: 0, right: 0, bottom: 1,
|
|
253
|
+
border: { type: 'line', fg: 'magenta' },
|
|
254
|
+
style: { fg: 'white', border: { fg: 'magenta' }, tags: true },
|
|
255
|
+
tags: true,
|
|
256
|
+
scrollable: true, alwaysScroll: true,
|
|
257
|
+
mouse: true, keys: true, vi: true,
|
|
258
|
+
scrollbar: { ch: '|', track: { bg: 'gray' }, style: { fg: 'cyan' } },
|
|
259
|
+
wrap: true, hidden: true
|
|
260
|
+
});
|
|
261
|
+
footer = blessed_1.default.text({
|
|
262
|
+
parent: container,
|
|
263
|
+
bottom: 0, left: 0, width: '100%', height: 1,
|
|
264
|
+
align: 'center', tags: true,
|
|
265
|
+
style: { fg: 'gray' }
|
|
266
|
+
});
|
|
267
|
+
runsList.on('select', (_, i) => {
|
|
268
|
+
currentRun = i;
|
|
269
|
+
currentEntry = 0;
|
|
270
|
+
showEntries();
|
|
271
|
+
});
|
|
272
|
+
entriesList.on('select', (_, i) => {
|
|
273
|
+
currentEntry = i;
|
|
274
|
+
showDetail();
|
|
275
|
+
});
|
|
276
|
+
screen.key(['b', 'B'], () => {
|
|
277
|
+
if (state === 'detail')
|
|
278
|
+
showEntries();
|
|
279
|
+
else if (state === 'entries')
|
|
280
|
+
showRuns();
|
|
281
|
+
});
|
|
282
|
+
screen.key(['escape'], () => {
|
|
283
|
+
if (state === 'detail')
|
|
284
|
+
showEntries();
|
|
285
|
+
else if (state === 'entries')
|
|
286
|
+
showRuns();
|
|
287
|
+
else {
|
|
288
|
+
cleanup();
|
|
289
|
+
process.exit(0);
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
screen.key(['q', 'Q', 'C-c'], () => {
|
|
293
|
+
cleanup();
|
|
294
|
+
process.exit(0);
|
|
295
|
+
});
|
|
296
|
+
screen.key(['t', 'T'], () => {
|
|
297
|
+
showThinking = !showThinking;
|
|
298
|
+
saveConfig();
|
|
299
|
+
if (state === 'detail')
|
|
300
|
+
showDetail();
|
|
301
|
+
});
|
|
302
|
+
screen.key(['f', 'F'], () => {
|
|
303
|
+
if (state === 'entries') {
|
|
304
|
+
showFailedOnly = !showFailedOnly;
|
|
305
|
+
saveConfig();
|
|
306
|
+
currentEntry = 0;
|
|
307
|
+
renderEntries();
|
|
308
|
+
}
|
|
309
|
+
});
|
|
310
|
+
loadConfig();
|
|
311
|
+
showRuns();
|
|
312
|
+
}
|
|
313
|
+
async function launchTUI() {
|
|
314
|
+
try {
|
|
315
|
+
await main();
|
|
316
|
+
}
|
|
317
|
+
catch (e) {
|
|
318
|
+
console.error('Error:', e);
|
|
319
|
+
cleanup();
|
|
320
|
+
process.exit(1);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
if (require.main === module) {
|
|
324
|
+
main().catch(e => { console.error('Error:', e); cleanup(); process.exit(1); });
|
|
325
|
+
}
|
|
326
|
+
//# sourceMappingURL=tui.js.map
|
package/dist/tui.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tui.js","sourceRoot":"","sources":["../src/tui.ts"],"names":[],"mappings":";;;;;;AAgVA,8BAGC;AAlVD,sDAA8B;AAC9B,wDAA0B;AAC1B,gDAAwB;AACxB,yCAA4F;AAO5F,MAAM,UAAU,GAAG,IAAA,wBAAa,GAAE,CAAC;AACnC,MAAM,eAAe,GAAG,IAAA,6BAAkB,GAAE,CAAC;AAC7C,MAAM,cAAc,GAAG,IAAA,wBAAa,GAAE,CAAC;AACvC,MAAM,SAAS,GAAG,IAAA,uBAAY,GAAE,CAAC;AACjC,MAAM,UAAU,GAAG,cAAI,CAAC,IAAI,CAAC,SAAS,EAAE,iBAAiB,CAAC,CAAC;AAE3D,IAAI,IAAI,GAAU,EAAE,CAAC;AACrB,IAAI,UAAU,GAAY,EAAE,CAAC;AAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;AACnB,IAAI,YAAY,GAAG,CAAC,CAAC;AACrB,IAAI,KAAK,GAAgB,MAAM,CAAC;AAChC,IAAI,YAAY,GAAG,IAAI,CAAC;AACxB,IAAI,cAAc,GAAG,KAAK,CAAC;AAE3B,SAAS,UAAU;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,kBAAE,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QAC3C,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,KAAK,CAAC;QAChD,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,IAAI,CAAC;IAC7C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC,CAAA,CAAC;AAChB,CAAC;AAED,SAAS,UAAU;IACjB,IAAI,CAAC;QACH,kBAAE,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC5B,kBAAE,CAAC,aAAa,CAAC,UAAU,EAAE,EAAE,cAAc,EAAE,YAAY,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC;IAChF,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC,CAAA,CAAC;AAChB,CAAC;AAED,IAAI,MAAM,GAAQ,IAAI,CAAC;AACvB,IAAI,QAAQ,GAAQ,IAAI,CAAC;AACzB,IAAI,WAAW,GAAQ,IAAI,CAAC;AAC5B,IAAI,SAAS,GAAQ,IAAI,CAAC;AAC1B,IAAI,MAAM,GAAQ,IAAI,CAAC;AACvB,IAAI,MAAM,GAAQ,IAAI,CAAC;AAEvB,SAAS,UAAU,CAAC,SAAiB;IACnC,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC;IAC9B,OAAO,CAAC,CAAC,kBAAkB,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC,kBAAkB,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;AACzG,CAAC;AAED,SAAS,eAAe,CAAC,CAAM;IAC7B,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1E,IAAI,GAAG,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;IAE1B,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,EAAE,EAAE,CAAC,CAAC;QACpD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,+BAA+B,EAAE,EAAE,CAAC,CAAC;IACzD,CAAC;SAAM,CAAC;QACN,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,4BAA4B,CAAC,CAAC;QAC5D,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,8BAA8B,CAAC,CAAC;QAChE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,iCAAiC,CAAC,CAAC;IACxE,CAAC;IAED,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,2BAA2B,EAAE,CAAC,KAAa,EAAE,IAAY,EAAE,IAAY,EAAE,EAAE;QAC3F,OAAO,wBAAwB,GAAG,IAAI,GAAG,aAAa,GAAG,IAAI,GAAG,kCAAkC,CAAC;IACrG,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,CAAC;AACb,CAAC;AAED,KAAK,UAAU,QAAQ;IACrB,IAAI,WAAW,GAAa,EAAE,CAAC;IAC/B,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,kBAAE,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAC/C,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;IACzG,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QAAC,OAAO,KAAK,CAAC;IAAC,CAAC;IAC7B,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAE3C,IAAI,GAAG,EAAE,CAAC;IACV,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,kBAAE,CAAC,QAAQ,CAAC,cAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC,CAAC;YAChE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;QAC9E,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC,CAAA,CAAC;IAChB,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,cAAc;IAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7B,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC;IAE7E,IAAI,QAAQ,GAAa,EAAE,CAAC;IAC5B,IAAI,CAAC;QAAC,QAAQ,GAAG,MAAM,kBAAE,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;IAAC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC,CAAA,CAAC;IAElE,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACvC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CACjE,CAAC;IACF,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEzC,IAAI,CAAC;QACH,MAAM,UAAU,GAAY,EAAE,CAAC;QAC/B,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,kBAAE,CAAC,QAAQ,CAAC,cAAI,CAAC,IAAI,CAAC,eAAe,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,CAAC;YAChF,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;YACnF,UAAU,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,CAAS,EAAE,EAAE,CAAC,CAAC;gBACrD,SAAS,EAAE,CAAC,CAAC,SAAS;gBACtB,KAAK,EAAE,CAAC;gBACR,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,SAAS,EAAE,CAAC,CAAC,SAAS;gBACtB,aAAa,EAAE,CAAC,CAAC,aAAa;aAC/B,CAAC,CAAC,CAAC,CAAC;QACP,CAAC;QACD,OAAO,UAAU,CAAC;IACpB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QAAC,OAAO,EAAE,CAAC;IAAC,CAAC;AAC5B,CAAC;AAED,SAAS,QAAQ;IACf,KAAK,GAAG,MAAM,CAAC;IACf,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChB,WAAW,CAAC,IAAI,EAAE,CAAC;IACnB,IAAI,SAAS;QAAE,SAAS,CAAC,IAAI,EAAE,CAAC;IAChC,MAAM,CAAC,UAAU,CAAC,yEAAyE,CAAC,CAAC;IAC7F,MAAM,CAAC,UAAU,CAAC,0CAA0C,CAAC,CAAC;IAC9D,QAAQ,CAAC,KAAK,EAAE,CAAC;IACjB,MAAM,EAAE,CAAC;AACX,CAAC;AAED,KAAK,UAAU,WAAW;IACxB,KAAK,GAAG,SAAS,CAAC;IAClB,UAAU,GAAG,MAAM,cAAc,EAAE,CAAC;IAEpC,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChB,IAAI,SAAS;QAAE,SAAS,CAAC,IAAI,EAAE,CAAC;IAEhC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,UAAU,CAAC,6DAA6D,CAAC,CAAC;QACjF,WAAW,CAAC,QAAQ,CAAC,CAAC,kCAAkC,CAAC,CAAC,CAAC;QAC3D,WAAW,CAAC,IAAI,EAAE,CAAC;QACnB,MAAM,CAAC,UAAU,CAAC,sCAAsC,CAAC,CAAC;QAC1D,MAAM,EAAE,CAAC;QACT,OAAO;IACT,CAAC;IAED,YAAY,GAAG,CAAC,CAAC;IACjB,aAAa,EAAE,CAAC;AAClB,CAAC;AAED,SAAS,aAAa;IACpB,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7B,MAAM,eAAe,GAAG,cAAc,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;IAC3F,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC;IAC3D,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAC7D,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC;IAC3D,MAAM,CAAC,UAAU,CAAC,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,WAAW,GAAG,KAAK,GAAG,GAAG,GAAG,KAAK,GAAG,OAAO,GAAG,MAAM,GAAG,UAAU,CAAC,MAAM,GAAG,YAAY,CAAC,CAAC;IAEvK,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzC,MAAM,OAAO,GAAG,kBAAkB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAC/C,MAAM,MAAM,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,CAAC,sBAAsB,CAAC;QAClF,OAAO,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,SAAS,GAAG,IAAI,GAAG,OAAO,CAAC;IACtD,CAAC,CAAC,CAAC;IACH,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC5B,WAAW,CAAC,IAAI,EAAE,CAAC;IACnB,MAAM,CAAC,UAAU,CAAC,aAAa,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,sDAAsD,CAAC,CAAC;IAC1I,WAAW,CAAC,KAAK,EAAE,CAAC;IACpB,MAAM,EAAE,CAAC;AACX,CAAC;AAED,SAAS,UAAU;IACjB,KAAK,GAAG,QAAQ,CAAC;IACjB,MAAM,KAAK,GAAG,UAAU,CAAC,YAAY,CAAC,CAAC;IACvC,IAAI,CAAC,KAAK;QAAE,OAAO;IAEnB,WAAW,CAAC,IAAI,EAAE,CAAC;IAEnB,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,gCAAgC,CAAC,CAAC,CAAC,8BAA8B,CAAC;IACnG,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,uCAAuC,CAAC,CAAC,CAAC,oCAAoC,CAAC;IAElH,IAAI,OAAO,GAAG,sBAAsB,GAAG,CAAC,YAAY,GAAG,CAAC,CAAC,GAAG,GAAG,GAAG,UAAU,CAAC,MAAM,GAAG,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,WAAW,EAAE,GAAG,sBAAsB,CAAC;IACrJ,OAAO,IAAI,UAAU,GAAG,MAAM,GAAG,eAAe,CAAC;IACjD,OAAO,IAAI,0BAA0B,CAAC;IACtC,OAAO,IAAI,eAAe,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC;IACpD,OAAO,IAAI,uBAAuB,GAAG,WAAW,GAAG,YAAY,CAAC;IAChE,OAAO,IAAI,YAAY,GAAG,IAAI,CAAC;IAC/B,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC;QACxB,MAAM,UAAU,GAAG,KAAK,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC7F,OAAO,IAAI,kCAAkC,CAAC;QAC9C,OAAO,IAAI,GAAG,GAAG,UAAU,GAAG,GAAG,GAAG,KAAK,CAAC,aAAa,GAAG,IAAI,GAAG,UAAU,GAAG,GAAG,CAAC;IACpF,CAAC;IAED,IAAI,SAAS,EAAE,CAAC;QACd,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC9B,SAAS,CAAC,IAAI,EAAE,CAAC;QACjB,SAAS,CAAC,KAAK,EAAE,CAAC;IACpB,CAAC;IACD,MAAM,CAAC,UAAU,CAAC,6EAA6E,CAAC,CAAC;IACjG,MAAM,EAAE,CAAC;AACX,CAAC;AAED,SAAS,kBAAkB,CAAC,CAAM;IAChC,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACxE,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,eAAe,CAAC,CAAM;IAC7B,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACvF,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;AACzB,CAAC;AAED,SAAS,MAAM;IACb,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,EAAE,CAAC;AAC9B,CAAC;AAED,SAAS,OAAO;IACd,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,CAAC,OAAO,EAAE,CAAC;QACjB,MAAM,GAAG,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,OAAO,GAAG,MAAM,QAAQ,EAAE,CAAC;IAEjC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,GAAG,iBAAO,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/D,MAAM,CAAC,KAAK,GAAG,wBAAwB,CAAC;IAExC,MAAM,SAAS,GAAG,iBAAO,CAAC,GAAG,CAAC;QAC5B,MAAM,EAAE,MAAM;QACd,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;QAC7B,KAAK,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,OAAO,EAAE;KACpC,CAAC,CAAC;IAEH,MAAM,GAAG,iBAAO,CAAC,IAAI,CAAC;QACpB,MAAM,EAAE,SAAS;QACjB,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;QACzC,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ;QACjC,IAAI,EAAE,IAAI;QACV,KAAK,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE;KAC5D,CAAC,CAAC;IAEH,QAAQ,GAAG,iBAAO,CAAC,IAAI,CAAC;QACtB,MAAM,EAAE,SAAS;QACjB,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;QACpC,MAAM,EAAE,EAAE,IAAI,EAAE,MAAa,EAAE,EAAE,EAAE,MAAa,EAAE;QAClD,KAAK,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE;QACrF,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YAClB,MAAM,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,CAAC,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChJ,OAAO,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,KAAK,GAAG,OAAO,CAAC;QACnD,CAAC,CAAC;QACF,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI;KAClC,CAAC,CAAC;IAEH,WAAW,GAAG,iBAAO,CAAC,IAAI,CAAC;QACzB,MAAM,EAAE,SAAS;QACjB,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;QACpC,MAAM,EAAE,EAAE,IAAI,EAAE,MAAa,EAAE,EAAE,EAAE,QAAe,EAAE;QACpD,KAAK,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE;QACzF,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI;QACjC,MAAM,EAAE,IAAI;KACb,CAAC,CAAC;IAEH,SAAS,GAAG,iBAAO,CAAC,cAAc,CAAC;QACjC,MAAM,EAAE,SAAS;QACjB,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;QACpC,MAAM,EAAE,EAAE,IAAI,EAAE,MAAa,EAAE,EAAE,EAAE,SAAgB,EAAE;QACrD,KAAK,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;QAC7D,IAAI,EAAE,IAAI;QACV,UAAU,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI;QACpC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI;QACjC,SAAS,EAAE,EAAE,EAAE,EAAE,GAAU,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE;QAC3E,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI;KACzB,CAAC,CAAC;IAEH,MAAM,GAAG,iBAAO,CAAC,IAAI,CAAC;QACpB,MAAM,EAAE,SAAS;QACjB,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;QAC5C,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI;QAC3B,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE;KACtB,CAAC,CAAC;IAEH,QAAQ,CAAC,EAAE,CAAC,QAAQ,EAAE,CAAC,CAAM,EAAE,CAAS,EAAE,EAAE;QAC1C,UAAU,GAAG,CAAC,CAAC;QACf,YAAY,GAAG,CAAC,CAAC;QACjB,WAAW,EAAE,CAAC;IAChB,CAAC,CAAC,CAAC;IAEH,WAAW,CAAC,EAAE,CAAC,QAAQ,EAAE,CAAC,CAAM,EAAE,CAAS,EAAE,EAAE;QAC7C,YAAY,GAAG,CAAC,CAAC;QACjB,UAAU,EAAE,CAAC;IACf,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE;QAC1B,IAAI,KAAK,KAAK,QAAQ;YAAE,WAAW,EAAE,CAAC;aACjC,IAAI,KAAK,KAAK,SAAS;YAAE,QAAQ,EAAE,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,GAAG,EAAE;QAC1B,IAAI,KAAK,KAAK,QAAQ;YAAE,WAAW,EAAE,CAAC;aACjC,IAAI,KAAK,KAAK,SAAS;YAAE,QAAQ,EAAE,CAAC;aACpC,CAAC;YAAC,OAAO,EAAE,CAAC;YAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,GAAG,EAAE;QACjC,OAAO,EAAE,CAAC;QACV,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE;QAC1B,YAAY,GAAG,CAAC,YAAY,CAAC;QAC7B,UAAU,EAAE,CAAC;QACb,IAAI,KAAK,KAAK,QAAQ;YAAE,UAAU,EAAE,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE;QAC1B,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,cAAc,GAAG,CAAC,cAAc,CAAC;YACjC,UAAU,EAAE,CAAC;YACb,YAAY,GAAG,CAAC,CAAC;YACjB,aAAa,EAAE,CAAC;QAClB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,UAAU,EAAE,CAAC;IACb,QAAQ,EAAE,CAAC;AACb,CAAC;AAEM,KAAK,UAAU,SAAS;IAC7B,IAAI,CAAC;QAAC,MAAM,IAAI,EAAE,CAAC;IAAC,CAAC;IACrB,OAAO,CAAC,EAAE,CAAC;QAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QAAC,OAAO,EAAE,CAAC;QAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAAC,CAAC;AACvE,CAAC;AAED,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;IAC5B,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACjF,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "llmtester",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Interactive LLM Evaluation Runner for benchmarking LLMs across multiple benchmarks",
|
|
5
|
+
"repository": "https://github.com/officiallymarky/llmtester",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"llmtester": "./bin/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist/",
|
|
12
|
+
"bin/"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsc",
|
|
16
|
+
"start": "node bin/cli.js",
|
|
17
|
+
"dev": "tsc && node bin/cli.js"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"llm",
|
|
21
|
+
"benchmark",
|
|
22
|
+
"evaluation",
|
|
23
|
+
"openai",
|
|
24
|
+
"ai"
|
|
25
|
+
],
|
|
26
|
+
"author": "Marky",
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@dsnp/parquetjs": "^1.8.7",
|
|
30
|
+
"axios": "^1.7.9",
|
|
31
|
+
"blessed": "^0.1.81",
|
|
32
|
+
"chalk": "^4.1.2",
|
|
33
|
+
"dotenv": "^16.4.5",
|
|
34
|
+
"fs-extra": "^11.2.0"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@types/blessed": "^0.1.27",
|
|
38
|
+
"@types/fs-extra": "^11.0.4",
|
|
39
|
+
"@types/node": "^22.10.0",
|
|
40
|
+
"typescript": "^5.7.0"
|
|
41
|
+
},
|
|
42
|
+
"engines": {
|
|
43
|
+
"node": ">=18.0.0"
|
|
44
|
+
}
|
|
45
|
+
}
|