@cliwatch/cli-bench 0.6.3 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +3 -0
- package/dist/assertions.d.ts +1 -1
- package/dist/assertions.d.ts.map +1 -1
- package/dist/assertions.js +6 -6
- package/dist/assertions.js.map +1 -1
- package/dist/ci.d.ts.map +1 -1
- package/dist/ci.js +14 -0
- package/dist/ci.js.map +1 -1
- package/dist/client/index.d.ts +1 -1
- package/dist/client/index.d.ts.map +1 -1
- package/dist/client/types.gen.d.ts +143 -93
- package/dist/client/types.gen.d.ts.map +1 -1
- package/dist/client/zod.gen.d.ts +75 -42
- package/dist/client/zod.gen.d.ts.map +1 -1
- package/dist/client/zod.gen.js +86 -54
- package/dist/client/zod.gen.js.map +1 -1
- package/dist/config.d.ts +2 -3
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +8 -15
- package/dist/config.js.map +1 -1
- package/dist/exec.d.ts +2 -0
- package/dist/exec.d.ts.map +1 -1
- package/dist/exec.js +6 -2
- package/dist/exec.js.map +1 -1
- package/dist/github-comment.d.ts +16 -0
- package/dist/github-comment.d.ts.map +1 -0
- package/dist/github-comment.js +90 -0
- package/dist/github-comment.js.map +1 -0
- package/dist/index.d.ts +2 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +31 -36
- package/dist/index.js.map +1 -1
- package/dist/init.js +1 -1
- package/dist/models.d.ts +9 -9
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +1 -1
- package/dist/models.js.map +1 -1
- package/dist/project.d.ts +11 -2
- package/dist/project.d.ts.map +1 -1
- package/dist/project.js +108 -9
- package/dist/project.js.map +1 -1
- package/dist/prompt.d.ts +2 -8
- package/dist/prompt.d.ts.map +1 -1
- package/dist/prompt.js +2 -35
- package/dist/prompt.js.map +1 -1
- package/dist/providers.d.ts +9 -7
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +26 -8
- package/dist/providers.js.map +1 -1
- package/dist/runner.d.ts +32 -4
- package/dist/runner.d.ts.map +1 -1
- package/dist/runner.js +177 -177
- package/dist/runner.js.map +1 -1
- package/dist/schemas.d.ts +20 -1
- package/dist/schemas.d.ts.map +1 -1
- package/dist/schemas.js +8 -1
- package/dist/schemas.js.map +1 -1
- package/dist/suite-generator.d.ts.map +1 -1
- package/dist/suite-generator.js +93 -10
- package/dist/suite-generator.js.map +1 -1
- package/package.json +2 -2
- package/dist/help-loader.d.ts +0 -17
- package/dist/help-loader.d.ts.map +0 -1
- package/dist/help-loader.js +0 -65
- package/dist/help-loader.js.map +0 -1
- package/task_suites/curl.yaml +0 -138
- package/task_suites/docker.yaml +0 -163
- package/task_suites/gh.yaml +0 -118
- package/task_suites/jq.yaml +0 -172
- package/task_suites/kubectl.yaml +0 -74
package/dist/config.d.ts
CHANGED
|
@@ -1,22 +1,21 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CLI argument parsing for @cliwatch/cli-bench.
|
|
3
3
|
*/
|
|
4
|
-
import { type ContextMode } from './models.js';
|
|
5
4
|
export interface Config {
|
|
6
5
|
filter: string[];
|
|
7
6
|
models: string[];
|
|
8
7
|
output?: string;
|
|
9
8
|
dryRun: boolean;
|
|
10
|
-
helpCacheDir: string;
|
|
11
9
|
concurrency: number;
|
|
12
10
|
upload: boolean;
|
|
13
11
|
backendUrl: string;
|
|
14
12
|
apiKey: string;
|
|
15
|
-
contextModes: ContextMode[];
|
|
16
13
|
configFile?: string;
|
|
17
14
|
initMode: boolean;
|
|
18
15
|
workdir?: string;
|
|
19
16
|
repeat?: number;
|
|
17
|
+
tags: string[];
|
|
18
|
+
githubCommentPath?: string;
|
|
20
19
|
}
|
|
21
20
|
export declare function parseArgs(argv: string[]): Config;
|
|
22
21
|
//# sourceMappingURL=config.d.ts.map
|
package/dist/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,MAAM;IACrB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAgEhD"}
|
package/dist/config.js
CHANGED
|
@@ -1,21 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CLI argument parsing for @cliwatch/cli-bench.
|
|
3
3
|
*/
|
|
4
|
-
import { CONTEXT_MODES } from './models.js';
|
|
5
4
|
export function parseArgs(argv) {
|
|
6
5
|
const args = argv.slice(2);
|
|
7
6
|
const config = {
|
|
8
7
|
filter: [],
|
|
9
8
|
models: [],
|
|
10
9
|
dryRun: false,
|
|
11
|
-
helpCacheDir: './help_cache',
|
|
12
10
|
concurrency: 3,
|
|
13
11
|
output: undefined,
|
|
14
12
|
upload: false,
|
|
15
13
|
backendUrl: process.env['CLIWATCH_BACKEND_URL'] ?? 'https://api.cliwatch.com',
|
|
16
14
|
apiKey: process.env['CLIWATCH_API_KEY'] ?? '',
|
|
17
|
-
contextModes: ['zero-shot'],
|
|
18
15
|
initMode: false,
|
|
16
|
+
tags: [],
|
|
19
17
|
};
|
|
20
18
|
for (let i = 0; i < args.length; i++) {
|
|
21
19
|
switch (args[i]) {
|
|
@@ -37,9 +35,6 @@ export function parseArgs(argv) {
|
|
|
37
35
|
case '--dry-run':
|
|
38
36
|
config.dryRun = true;
|
|
39
37
|
break;
|
|
40
|
-
case '--help-cache':
|
|
41
|
-
config.helpCacheDir = args[++i] ?? './help_cache';
|
|
42
|
-
break;
|
|
43
38
|
case '--concurrency':
|
|
44
39
|
config.concurrency = parseInt(args[++i] ?? '3', 10);
|
|
45
40
|
break;
|
|
@@ -52,13 +47,11 @@ export function parseArgs(argv) {
|
|
|
52
47
|
case '--repeat':
|
|
53
48
|
config.repeat = parseInt(args[++i] ?? '1', 10);
|
|
54
49
|
break;
|
|
55
|
-
case '--
|
|
56
|
-
config.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if (config.contextModes.length === 0)
|
|
61
|
-
config.contextModes = ['zero-shot'];
|
|
50
|
+
case '--tags':
|
|
51
|
+
config.tags = (args[++i] ?? '').split(',').map((s) => s.trim()).filter(Boolean);
|
|
52
|
+
break;
|
|
53
|
+
case '--github-comment':
|
|
54
|
+
config.githubCommentPath = args[++i];
|
|
62
55
|
break;
|
|
63
56
|
case '--help':
|
|
64
57
|
printUsage();
|
|
@@ -83,13 +76,13 @@ Options:
|
|
|
83
76
|
--models <models> Comma-separated model IDs (default: all in config/registry)
|
|
84
77
|
--output <file> Write JSON GridReport to file
|
|
85
78
|
--dry-run Print prompt for first task without calling API
|
|
86
|
-
--help-cache <dir> Directory with cached help text JSON files
|
|
87
|
-
--context <modes> Comma-separated context modes: zero-shot,help,docs (default: zero-shot)
|
|
88
79
|
--concurrency <n> Max concurrent API calls (default: 3)
|
|
89
80
|
--workdir <dir> Working directory for commands
|
|
90
81
|
--repeat <n> Run each task N times for statistical confidence (default: 1)
|
|
91
82
|
Note: tasks with non-idempotent setup may collide across repeats
|
|
83
|
+
--tags <tags> Comma-separated task tags to include (default: all tasks)
|
|
92
84
|
--upload POST GridReport to backend after run
|
|
85
|
+
--github-comment <path> Write PR comment markdown to file
|
|
93
86
|
--help Show this help message`);
|
|
94
87
|
}
|
|
95
88
|
//# sourceMappingURL=config.js.map
|
package/dist/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAmBH,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,MAAM,GAAW;QACrB,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,KAAK;QACb,WAAW,EAAE,CAAC;QACd,MAAM,EAAE,SAAS;QACjB,MAAM,EAAE,KAAK;QACb,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,0BAA0B;QAC7E,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,IAAI,EAAE;QAC7C,QAAQ,EAAE,KAAK;QACf,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,QAAQ,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;YAChB,KAAK,MAAM;gBACT,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC;gBACvB,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC9B,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAClF,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAClF,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC1B,MAAM;YACR,KAAK,WAAW;gBACd,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBACrB,MAAM;YACR,KAAK,eAAe;gBAClB,MAAM,CAAC,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBACpD,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBACrB,MAAM;YACR,KAAK,WAAW;gBACd,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC3B,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBAC/C,MAAM;YACR,KAAK,QAAQ;gBACX,MAAM,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAChF,MAAM;YACR,KAAK,kBAAkB;gBACrB,MAAM,CAAC,iBAAiB,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACrC,MAAM;YACR,KAAK,QAAQ;gBACX,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB;gBACE,OAAO,CAAC,KAAK,CAAC,mBAAmB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC5C,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,UAAU;IACjB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;+CAkBiC,CAAC,CAAC;AACjD,CAAC"}
|
package/dist/exec.d.ts
CHANGED
|
@@ -11,8 +11,10 @@ export interface ExecResult {
|
|
|
11
11
|
export declare function execCommand(command: string, opts?: {
|
|
12
12
|
cwd?: string;
|
|
13
13
|
timeout?: number;
|
|
14
|
+
env?: Record<string, string>;
|
|
14
15
|
}): Promise<ExecResult>;
|
|
15
16
|
export declare function runSetup(commands: string[], opts?: {
|
|
16
17
|
cwd?: string;
|
|
18
|
+
env?: Record<string, string>;
|
|
17
19
|
}): Promise<void>;
|
|
18
20
|
//# sourceMappingURL=exec.d.ts.map
|
package/dist/exec.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,
|
|
1
|
+
{"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACtE,OAAO,CAAC,UAAU,CAAC,CAuBrB;AAED,wBAAsB,QAAQ,CAC5B,QAAQ,EAAE,MAAM,EAAE,EAClB,IAAI,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACpD,OAAO,CAAC,IAAI,CAAC,CAOf"}
|
package/dist/exec.js
CHANGED
|
@@ -8,9 +8,13 @@ const EXEC_TIMEOUT_MS = 30_000;
|
|
|
8
8
|
const MAX_OUTPUT_CHARS = 2_000;
|
|
9
9
|
export async function execCommand(command, opts) {
|
|
10
10
|
const timeout = opts?.timeout ?? EXEC_TIMEOUT_MS;
|
|
11
|
+
const env = opts?.env ? { ...process.env, ...opts.env } : undefined;
|
|
11
12
|
return new Promise((resolve) => {
|
|
12
|
-
execFile('sh', ['-c', command], { timeout, maxBuffer: 256 * 1024, cwd: opts?.cwd }, (err, stdout, stderr) => {
|
|
13
|
-
const exitCode = err
|
|
13
|
+
execFile('sh', ['-c', command], { timeout, maxBuffer: 256 * 1024, cwd: opts?.cwd, env }, (err, stdout, stderr) => {
|
|
14
|
+
const exitCode = err == null ? 0
|
|
15
|
+
: typeof err.code === 'number' ? err.code
|
|
16
|
+
: err.killed ? 137
|
|
17
|
+
: 1;
|
|
14
18
|
resolve({
|
|
15
19
|
stdout: (stdout ?? '').toString().slice(0, MAX_OUTPUT_CHARS),
|
|
16
20
|
stderr: (stderr ?? '').toString().slice(0, MAX_OUTPUT_CHARS),
|
package/dist/exec.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exec.js","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAQ/B,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,OAAe,EACf,
|
|
1
|
+
{"version":3,"file":"exec.js","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAQ/B,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,OAAe,EACf,IAAuE;IAEvE,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,eAAe,CAAC;IACjD,MAAM,GAAG,GAAG,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAEpE,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,QAAQ,CACN,IAAI,EACJ,CAAC,IAAI,EAAE,OAAO,CAAC,EACf,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,GAAG,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,EACvD,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE;YACtB,MAAM,QAAQ,GACZ,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;gBACf,CAAC,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI;oBACzC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG;wBAClB,CAAC,CAAC,CAAC,CAAC;YACN,OAAO,CAAC;gBACN,MAAM,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC;gBAC5D,MAAM,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC;gBAC5D,QAAQ;aACT,CAAC,CAAC;QACL,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAkB,EAClB,IAAqD;IAErD,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAC5C,IAAI,MAAM,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,IAAI,CAAC,uCAAuC,GAAG,eAAe,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generate a GitHub PR comment summarizing benchmark results.
|
|
3
|
+
*
|
|
4
|
+
* Pure function, no side effects. The caller is responsible for writing
|
|
5
|
+
* the returned markdown to a file; the CI workflow posts it as a comment.
|
|
6
|
+
*/
|
|
7
|
+
import type { GridReport } from './models.js';
|
|
8
|
+
/**
|
|
9
|
+
* Build a markdown PR comment from a GridReport.
|
|
10
|
+
*
|
|
11
|
+
* @param report - The completed grid report
|
|
12
|
+
* @param dashboardUrl - Optional link to the CLIWatch dashboard for this CLI
|
|
13
|
+
* @returns Markdown string ready to be posted as a GitHub PR comment
|
|
14
|
+
*/
|
|
15
|
+
export declare function formatPrComment(report: GridReport, dashboardUrl?: string): string;
|
|
16
|
+
//# sourceMappingURL=github-comment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-comment.d.ts","sourceRoot":"","sources":["../src/github-comment.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAyB,MAAM,aAAa,CAAC;AAQrE;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,UAAU,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAyDjF"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generate a GitHub PR comment summarizing benchmark results.
|
|
3
|
+
*
|
|
4
|
+
* Pure function, no side effects. The caller is responsible for writing
|
|
5
|
+
* the returned markdown to a file; the CI workflow posts it as a comment.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Build a markdown PR comment from a GridReport.
|
|
9
|
+
*
|
|
10
|
+
* @param report - The completed grid report
|
|
11
|
+
* @param dashboardUrl - Optional link to the CLIWatch dashboard for this CLI
|
|
12
|
+
* @returns Markdown string ready to be posted as a GitHub PR comment
|
|
13
|
+
*/
|
|
14
|
+
export function formatPrComment(report, dashboardUrl) {
|
|
15
|
+
const lines = [];
|
|
16
|
+
// HTML comment marker for upsert (find-and-replace existing comment)
|
|
17
|
+
lines.push(`<!-- cliwatch-bench-${report.cli} -->`);
|
|
18
|
+
// Header
|
|
19
|
+
const name = report.displayName ?? report.cli;
|
|
20
|
+
const version = report.cliVersion ? ` ${report.cliVersion}` : '';
|
|
21
|
+
lines.push(`### CLIWatch | ${name}${version}`);
|
|
22
|
+
lines.push('');
|
|
23
|
+
// Metadata line
|
|
24
|
+
const parts = [];
|
|
25
|
+
parts.push(`${report.taskCount} tasks`);
|
|
26
|
+
if (report.gitSha) {
|
|
27
|
+
parts.push(`\`${report.gitSha.slice(0, 7)}\``);
|
|
28
|
+
}
|
|
29
|
+
if (dashboardUrl) {
|
|
30
|
+
parts.push(`[View details](${dashboardUrl})`);
|
|
31
|
+
}
|
|
32
|
+
lines.push(parts.join(' | '));
|
|
33
|
+
lines.push('');
|
|
34
|
+
// Model summary table
|
|
35
|
+
lines.push('| Model | Pass Rate | Avg Turns |');
|
|
36
|
+
lines.push('|:------|----------:|----------:|');
|
|
37
|
+
for (const mr of report.modelResults) {
|
|
38
|
+
const total = mr.taskResults.length;
|
|
39
|
+
const passed = mr.taskResults.filter((t) => t.passed).length;
|
|
40
|
+
const pct = total > 0 ? Math.round(mr.passRate * 100) : 0;
|
|
41
|
+
const turns = mr.avgTurnsToSuccess.toFixed(1);
|
|
42
|
+
lines.push(`| ${mr.displayName} | **${pct}%** (${passed}/${total}) | ${turns} |`);
|
|
43
|
+
}
|
|
44
|
+
lines.push('');
|
|
45
|
+
// Collect all failing tasks
|
|
46
|
+
const failures = collectFailures(report.modelResults);
|
|
47
|
+
if (failures.length === 0) {
|
|
48
|
+
lines.push('All tasks passed.');
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
lines.push(`<details>`);
|
|
52
|
+
lines.push(`<summary>${failures.length} failing task${failures.length === 1 ? '' : 's'}</summary>`);
|
|
53
|
+
lines.push('');
|
|
54
|
+
lines.push('| Task | Model | Reason |');
|
|
55
|
+
lines.push('|:-----|:------|:-------|');
|
|
56
|
+
for (const f of failures) {
|
|
57
|
+
lines.push(`| \`${f.taskId}\` | ${f.modelName} | ${f.reason} |`);
|
|
58
|
+
}
|
|
59
|
+
lines.push('');
|
|
60
|
+
lines.push('</details>');
|
|
61
|
+
}
|
|
62
|
+
return lines.join('\n');
|
|
63
|
+
}
|
|
64
|
+
function collectFailures(modelResults) {
|
|
65
|
+
const failures = [];
|
|
66
|
+
for (const mr of modelResults) {
|
|
67
|
+
for (const tr of mr.taskResults) {
|
|
68
|
+
if (!tr.passed) {
|
|
69
|
+
failures.push({
|
|
70
|
+
taskId: tr.taskId,
|
|
71
|
+
modelName: mr.displayName,
|
|
72
|
+
reason: truncateReason(tr),
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return failures;
|
|
78
|
+
}
|
|
79
|
+
const MAX_REASON_LENGTH = 100;
|
|
80
|
+
function truncateReason(tr) {
|
|
81
|
+
const raw = tr.failureReason ?? 'unknown';
|
|
82
|
+
// Collapse newlines first (would break table rows), then truncate, then escape pipes.
|
|
83
|
+
// Truncate before escaping so we never slice through a \| escape sequence.
|
|
84
|
+
const flat = raw.replace(/\n/g, ' ');
|
|
85
|
+
const truncated = flat.length > MAX_REASON_LENGTH
|
|
86
|
+
? flat.slice(0, MAX_REASON_LENGTH - 3) + '...'
|
|
87
|
+
: flat;
|
|
88
|
+
return truncated.replace(/\|/g, '\\|');
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=github-comment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github-comment.js","sourceRoot":"","sources":["../src/github-comment.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,MAAkB,EAAE,YAAqB;IACvE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,qEAAqE;IACrE,KAAK,CAAC,IAAI,CAAC,uBAAuB,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC;IAEpD,SAAS;IACT,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,GAAG,CAAC;IAC9C,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,KAAK,CAAC,IAAI,CAAC,kBAAkB,IAAI,GAAG,OAAO,EAAE,CAAC,CAAC;IAC/C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,gBAAgB;IAChB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,SAAS,QAAQ,CAAC,CAAC;IACxC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IACjD,CAAC;IACD,IAAI,YAAY,EAAE,CAAC;QACjB,KAAK,CAAC,IAAI,CAAC,kBAAkB,YAAY,GAAG,CAAC,CAAC;IAChD,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,sBAAsB;IACtB,KAAK,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IAChD,KAAK,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IAEhD,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,MAAM,CAAC;QACpC,MAAM,MAAM,GAAG,EAAE,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC7D,MAAM,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC9C,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,WAAW,QAAQ,GAAG,QAAQ,MAAM,IAAI,KAAK,OAAO,KAAK,IAAI,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,4BAA4B;IAC5B,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;IAEtD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAClC,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,YAAY,QAAQ,CAAC,MAAM,gBAAgB,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC;QACpG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,QAAQ,CAAC,CAAC,SAAS,MAAM,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;QACnE,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,eAAe,CAAC,YAA2B;IAClD,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;QAC9B,KAAK,MAAM,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAChC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC;gBACf,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,EAAE,CAAC,MAAM;oBACjB,SAAS,EAAE,EAAE,CAAC,WAAW;oBACzB,MAAM,EAAE,cAAc,CAAC,EAAE,CAAC;iBAC3B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B,SAAS,cAAc,CAAC,EAAY;IAClC,MAAM,GAAG,GAAG,EAAE,CAAC,aAAa,IAAI,SAAS,CAAC;IAC1C,sFAAsF;IACtF,2EAA2E;IAC3E,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACrC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,iBAAiB;QAC/C,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,GAAG,CAAC,CAAC,GAAG,KAAK;QAC9C,CAAC,CAAC,IAAI,CAAC;IACT,OAAO,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;AACzC,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -5,10 +5,9 @@
|
|
|
5
5
|
* Tests CLI agent-readiness by having LLMs execute tasks,
|
|
6
6
|
* then validating results with assertion-based checks.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
8
|
+
* Entry modes:
|
|
9
9
|
* 1. Config file mode: cli-bench.yaml found → load config → run grid
|
|
10
|
-
* 2.
|
|
11
|
-
* 3. Init mode: scaffold cli-bench.yaml
|
|
10
|
+
* 2. Init mode: scaffold cli-bench.yaml
|
|
12
11
|
*/
|
|
13
12
|
export {};
|
|
14
13
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;GASG"}
|
package/dist/index.js
CHANGED
|
@@ -5,20 +5,18 @@
|
|
|
5
5
|
* Tests CLI agent-readiness by having LLMs execute tasks,
|
|
6
6
|
* then validating results with assertion-based checks.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
8
|
+
* Entry modes:
|
|
9
9
|
* 1. Config file mode: cli-bench.yaml found → load config → run grid
|
|
10
|
-
* 2.
|
|
11
|
-
* 3. Init mode: scaffold cli-bench.yaml
|
|
10
|
+
* 2. Init mode: scaffold cli-bench.yaml
|
|
12
11
|
*/
|
|
13
12
|
import { writeFile } from 'node:fs/promises';
|
|
14
|
-
import { dirname } from 'node:path';
|
|
15
13
|
import { parseArgs } from './config.js';
|
|
16
14
|
import { runGrid, uploadReport } from './runner.js';
|
|
17
15
|
import { resolveConfigFile, loadProject } from './project.js';
|
|
18
16
|
import { scaffoldProject } from './init.js';
|
|
19
|
-
import {
|
|
20
|
-
import { CONTEXT_MODES } from './models.js';
|
|
17
|
+
import { validateApiKeys, resolveProviders } from './providers.js';
|
|
21
18
|
import { checkThresholds, printThresholdResults } from './thresholds.js';
|
|
19
|
+
import { formatPrComment } from './github-comment.js';
|
|
22
20
|
async function main() {
|
|
23
21
|
const config = parseArgs(process.argv);
|
|
24
22
|
// Init mode — scaffold and exit
|
|
@@ -34,7 +32,7 @@ async function main() {
|
|
|
34
32
|
}
|
|
35
33
|
return;
|
|
36
34
|
}
|
|
37
|
-
console.log('@cliwatch/cli-bench v0.
|
|
35
|
+
console.log('@cliwatch/cli-bench v0.7.1');
|
|
38
36
|
// Try to find a config file
|
|
39
37
|
const configPath = await resolveConfigFile(config.configFile);
|
|
40
38
|
let reports;
|
|
@@ -43,15 +41,20 @@ async function main() {
|
|
|
43
41
|
if (configPath) {
|
|
44
42
|
// Config file mode
|
|
45
43
|
console.log(`Config: ${configPath}`);
|
|
46
|
-
const { config: fileConfig, tasks, taskSuiteContent } = await loadProject(configPath);
|
|
44
|
+
const { config: fileConfig, tasks: allTasks, taskSuiteContent, projectFiles } = await loadProject(configPath);
|
|
47
45
|
thresholdsConfig = fileConfig.thresholds;
|
|
46
|
+
// Filter tasks by tags if --tags was provided
|
|
47
|
+
const tasks = config.tags.length > 0
|
|
48
|
+
? allTasks.filter((t) => t.tags?.some((tag) => config.tags.includes(tag)))
|
|
49
|
+
: allTasks;
|
|
50
|
+
if (tasks.length === 0) {
|
|
51
|
+
console.error(`No tasks match tags: ${config.tags.join(', ')}`);
|
|
52
|
+
process.exit(1);
|
|
53
|
+
}
|
|
48
54
|
// Merge CLI args with file config
|
|
49
55
|
const providers = config.models.length > 0
|
|
50
56
|
? config.models
|
|
51
57
|
: fileConfig.providers ?? ['anthropic/claude-sonnet-4-20250514'];
|
|
52
|
-
const contextModes = fileConfig.context
|
|
53
|
-
? fileConfig.context.filter((s) => CONTEXT_MODES.includes(s))
|
|
54
|
-
: config.contextModes;
|
|
55
58
|
const concurrency = fileConfig.concurrency ?? config.concurrency;
|
|
56
59
|
// Determine upload behavior
|
|
57
60
|
const uploadMode = fileConfig.upload ?? 'auto';
|
|
@@ -61,16 +64,15 @@ async function main() {
|
|
|
61
64
|
console.log(`CLI: ${fileConfig.cli}`);
|
|
62
65
|
console.log(`Providers: ${providers.join(', ')}`);
|
|
63
66
|
console.log(`Tasks: ${tasks.length}`);
|
|
64
|
-
console.log(`Context: ${contextModes.join(', ')}`);
|
|
65
67
|
console.log(`Dry run: ${config.dryRun}`);
|
|
66
|
-
// Validate
|
|
68
|
+
// Validate API keys before running
|
|
67
69
|
if (!config.dryRun) {
|
|
68
|
-
|
|
70
|
+
validateApiKeys(providers);
|
|
69
71
|
}
|
|
70
72
|
const models = resolveProviders(providers);
|
|
71
73
|
const globalRepeat = config.repeat ?? fileConfig.repeat;
|
|
72
74
|
reports = await runGrid({
|
|
73
|
-
config: { ...config, concurrency
|
|
75
|
+
config: { ...config, concurrency },
|
|
74
76
|
tasks,
|
|
75
77
|
cliName: fileConfig.cli,
|
|
76
78
|
models,
|
|
@@ -83,9 +85,13 @@ async function main() {
|
|
|
83
85
|
websiteUrl: fileConfig.website_url,
|
|
84
86
|
githubUrl: fileConfig.github_url,
|
|
85
87
|
taskSuiteContent,
|
|
86
|
-
configDir: dirname(configPath),
|
|
87
88
|
redactEnvVars: fileConfig.redact_env,
|
|
88
89
|
redactPatterns: fileConfig.redact_patterns,
|
|
90
|
+
fileEnv: fileConfig.env,
|
|
91
|
+
fileSetup: fileConfig.setup,
|
|
92
|
+
fileCleanup: fileConfig.cleanup,
|
|
93
|
+
fileScaffold: fileConfig.scaffold,
|
|
94
|
+
projectFiles,
|
|
89
95
|
});
|
|
90
96
|
// Check thresholds before upload so results are included in the payload
|
|
91
97
|
if (thresholdsConfig && reports.length > 0 && !config.dryRun) {
|
|
@@ -111,27 +117,16 @@ async function main() {
|
|
|
111
117
|
}
|
|
112
118
|
}
|
|
113
119
|
}
|
|
120
|
+
// Write PR comment markdown if requested
|
|
121
|
+
if (config.githubCommentPath && reports.length > 0 && !config.dryRun) {
|
|
122
|
+
const markdown = formatPrComment(reports[0]);
|
|
123
|
+
await writeFile(config.githubCommentPath, markdown, 'utf-8');
|
|
124
|
+
console.log(`\nPR comment written to ${config.githubCommentPath}`);
|
|
125
|
+
}
|
|
114
126
|
}
|
|
115
127
|
else {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
console.log(`Models: ${config.models.length > 0 ? config.models.join(', ') : 'all'}`);
|
|
119
|
-
console.log(`Context: ${config.contextModes.join(', ')}`);
|
|
120
|
-
console.log(`Dry run: ${config.dryRun}`);
|
|
121
|
-
if (!config.dryRun && config.models.length > 0) {
|
|
122
|
-
validateGatewayKey();
|
|
123
|
-
}
|
|
124
|
-
reports = await runGrid({ config, globalRepeat: config.repeat });
|
|
125
|
-
if (config.upload) {
|
|
126
|
-
for (const report of reports) {
|
|
127
|
-
try {
|
|
128
|
-
await uploadReport(report, config.backendUrl, config.apiKey);
|
|
129
|
-
}
|
|
130
|
-
catch (e) {
|
|
131
|
-
console.error(`Failed to upload report for ${report.cli}: ${e instanceof Error ? e.message : e}`);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
}
|
|
128
|
+
console.error('No cli-bench.yaml found. Run `npx @cliwatch/cli-bench --init` to create one.');
|
|
129
|
+
process.exit(1);
|
|
135
130
|
}
|
|
136
131
|
if (config.output && reports.length > 0) {
|
|
137
132
|
const output = JSON.stringify(reports.length === 1 ? reports[0] : reports, null, 2);
|
|
@@ -143,7 +138,7 @@ async function main() {
|
|
|
143
138
|
console.log('\n=== Final Summary ===');
|
|
144
139
|
for (const report of reports) {
|
|
145
140
|
for (const mr of report.modelResults) {
|
|
146
|
-
console.log(`${report.cli} x ${mr.displayName}
|
|
141
|
+
console.log(`${report.cli} x ${mr.displayName}: ${(mr.passRate * 100).toFixed(0)}% pass, avgTurns=${mr.avgTurnsToSuccess.toFixed(1)}`);
|
|
147
142
|
}
|
|
148
143
|
}
|
|
149
144
|
}
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;GASG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAgB,MAAM,gBAAgB,CAAC;AACjF,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAEtD,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAEvC,gCAAgC;IAChC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,oFAAoF,CAAC,CAAC;QACpG,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAE1C,4BAA4B;IAC5B,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAE9D,IAAI,OAAO,CAAC;IACZ,IAAI,gBAAoE,CAAC;IACzE,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,IAAI,UAAU,EAAE,CAAC;QACf,mBAAmB;QACnB,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,EAAE,CAAC,CAAC;QACrC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,YAAY,EAAE,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC;QAC9G,gBAAgB,GAAG,UAAU,CAAC,UAAU,CAAC;QAEzC,8CAA8C;QAC9C,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1E,CAAC,CAAC,QAAQ,CAAC;QACb,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,KAAK,CAAC,wBAAwB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,kCAAkC;QAClC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,MAAM,CAAC,MAAM;YACf,CAAC,CAAC,UAAU,CAAC,SAAS,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACnE,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;QAEjE,4BAA4B;QAC5B,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC;QAC/C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM;eAC7B,UAAU,KAAK,QAAQ;eACvB,CAAC,UAAU,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEhD,OAAO,CAAC,GAAG,CAAC,QAAQ,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,mCAAmC;QACnC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,eAAe,CAAC,SAAS,CAAC,CAAC;QAC7B,CAAC;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAE3C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC;QAExD,OAAO,GAAG,MAAM,OAAO,CAAC;YACtB,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE;YAClC,KAAK;YACL,OAAO,EAAE,UAAU,CAAC,GAAG;YACvB,MAAM;YACN,cAAc,EAAE,UAAU,CAAC,eAAe;YAC1C,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO;YAC7C,YAAY;YACZ,YAAY,EAAE,UAAU,CAAC,aAAa;YACtC,WAAW,EAAE,UAAU,CAAC,YAAY;YACpC,QAAQ,EAAE,UAAU,CAAC,QAAQ;YAC7B,UAAU,EAAE,UAAU,CAAC,WAAW;YAClC,SAAS,EAAE,UAAU,CAAC,UAAU;YAChC,gBAAgB;YAChB,aAAa,EAAE,UAAU,CAAC,UAAU;YACpC,cAAc,EAAE,UAAU,CAAC,eAAe;YAC1C,OAAO,EAAE,UAAU,CAAC,GAAG;YACvB,SAAS,EAAE,UAAU,CAAC,KAAK;YAC3B,WAAW,EAAE,UAAU,CAAC,OAAO;YAC/B,YAAY,EAAE,UAAU,CAAC,QAAQ;YACjC,YAAY;SACb,CAAC,CAAC;QAEH,wEAAwE;QACxE,IAAI,gBAAgB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,eAAe,CAC3B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,EACtC,gBAAgB,CACjB,CAAC;YACF,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAC7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAClC,CAAC;YACD,+DAA+D;YAC/D,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACnD,eAAe,GAAG,IAAI,CAAC;YACzB,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,IAAI,YAAY,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,UAAU,CAAC;YAC/D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBACxD,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QACD,yCAAyC;QACzC,IAAI,MAAM,CAAC,iBAAiB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACrE,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC;YAC9C,MAAM,SAAS,CAAC,MAAM,CAAC,iBAAiB,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC7D,OAAO,CAAC,GAAG,CAAC,2BAA2B,MAAM,CAAC,iBAAiB,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,8EAA8E,CAAC,CAAC;QAC9F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAC3B,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,EAC3C,IAAI,EACJ,CAAC,CACF,CAAC;QACF,MAAM,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,eAAe;IACf,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;QACvC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CACT,GAAG,MAAM,CAAC,GAAG,MAAM,EAAE,CAAC,WAAW,KAAK,CAAC,EAAE,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC1H,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,IAAI,eAAe,EAAE,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACjB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/dist/init.js
CHANGED
|
@@ -17,9 +17,9 @@ providers:
|
|
|
17
17
|
# - google/gemini-2.5-pro
|
|
18
18
|
|
|
19
19
|
# Optional settings
|
|
20
|
-
# context: [zero-shot] # zero-shot | help | docs
|
|
21
20
|
# concurrency: 3 # max concurrent API calls
|
|
22
21
|
# workdir: ./workspace # working directory for commands (default: temp dir)
|
|
22
|
+
# scaffold: scaffolds/my-project # directory copied into workdir before each task
|
|
23
23
|
# upload: auto # auto | always | never (auto uploads if CLIWATCH_API_KEY is set)
|
|
24
24
|
|
|
25
25
|
tasks:
|
package/dist/models.d.ts
CHANGED
|
@@ -4,8 +4,6 @@
|
|
|
4
4
|
* Assertion-based evaluation: tasks define assertions that are checked
|
|
5
5
|
* against the agent's execution trace.
|
|
6
6
|
*/
|
|
7
|
-
export type ContextMode = 'zero-shot' | 'help' | 'docs';
|
|
8
|
-
export declare const CONTEXT_MODES: ContextMode[];
|
|
9
7
|
export type Assertion = {
|
|
10
8
|
output_contains: string;
|
|
11
9
|
} | {
|
|
@@ -49,10 +47,14 @@ export interface Task {
|
|
|
49
47
|
intent: string;
|
|
50
48
|
assert: Assertion[];
|
|
51
49
|
setup?: string[];
|
|
50
|
+
cleanup?: string[];
|
|
51
|
+
env?: Record<string, string>;
|
|
52
52
|
max_turns?: number;
|
|
53
53
|
difficulty?: 'easy' | 'medium' | 'hard';
|
|
54
54
|
category?: string;
|
|
55
55
|
repeat?: number;
|
|
56
|
+
tags?: string[];
|
|
57
|
+
scaffold?: string | false;
|
|
56
58
|
}
|
|
57
59
|
export interface TaskSuite {
|
|
58
60
|
cli: string;
|
|
@@ -78,7 +80,6 @@ export interface ConfigFile {
|
|
|
78
80
|
website_url?: string;
|
|
79
81
|
github_url?: string;
|
|
80
82
|
providers?: string[];
|
|
81
|
-
context?: string[];
|
|
82
83
|
system_prompt?: string;
|
|
83
84
|
concurrency?: number;
|
|
84
85
|
workdir?: string;
|
|
@@ -88,6 +89,10 @@ export interface ConfigFile {
|
|
|
88
89
|
redact_env?: string[];
|
|
89
90
|
redact_patterns?: string[];
|
|
90
91
|
thresholds?: ThresholdsConfig;
|
|
92
|
+
env?: Record<string, string>;
|
|
93
|
+
setup?: string[];
|
|
94
|
+
cleanup?: string[];
|
|
95
|
+
scaffold?: string;
|
|
91
96
|
tasks: (Task | string)[];
|
|
92
97
|
}
|
|
93
98
|
export type Provider = string;
|
|
@@ -118,7 +123,6 @@ export interface ModelResult {
|
|
|
118
123
|
provider: Provider;
|
|
119
124
|
modelId: string;
|
|
120
125
|
displayName: string;
|
|
121
|
-
contextMode: ContextMode;
|
|
122
126
|
taskResults: TaskEval[];
|
|
123
127
|
passRate: number;
|
|
124
128
|
avgTurnsToSuccess: number;
|
|
@@ -166,11 +170,7 @@ export interface GridReport {
|
|
|
166
170
|
tags?: string[];
|
|
167
171
|
taskSuiteHash?: string;
|
|
168
172
|
taskSuiteContent?: string;
|
|
173
|
+
projectFiles?: Record<string, string>;
|
|
169
174
|
thresholdResults?: ThresholdCheckResult;
|
|
170
175
|
}
|
|
171
|
-
export interface HelpCache {
|
|
172
|
-
cli_name: string;
|
|
173
|
-
help_texts: Record<string, string>;
|
|
174
|
-
version?: string;
|
|
175
|
-
}
|
|
176
176
|
//# sourceMappingURL=models.d.ts.map
|
package/dist/models.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,MAAM,
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,MAAM,SAAS,GACjB;IAAE,eAAe,EAAE,MAAM,CAAA;CAAE,GAC3B;IAAE,aAAa,EAAE,MAAM,CAAA;CAAE,GACzB;IAAE,cAAc,EAAE,MAAM,CAAA;CAAE,GAC1B;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GACrB;IAAE,WAAW,EAAE,MAAM,CAAA;CAAE,GACvB;IAAE,aAAa,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GACjD;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GACf;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACnB;IAAE,SAAS,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GAC9D;IAAE,MAAM,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAElF,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,SAAS,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC;CAC3B;AAED,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAMD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;AAErD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,GAAG,eAAe,CAAC;CACtC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC;CAC1B;AAMD,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC;AAE9B,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAMD,MAAM,WAAW,QAAQ;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,OAAO,EAAE,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACzC,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,EAAE,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,QAAQ,EAAE,OAAO,GAAG,eAAe,CAAC;CACrC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,WAAW,EAAE,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,gBAAgB,CAAC,EAAE,oBAAoB,CAAC;CACzC"}
|
package/dist/models.js
CHANGED
package/dist/models.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG
|
|
1
|
+
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
|
package/dist/project.d.ts
CHANGED
|
@@ -9,13 +9,21 @@ import type { Task, ConfigFile } from './models.js';
|
|
|
9
9
|
export declare function resolveConfigFile(explicitPath?: string): Promise<string | null>;
|
|
10
10
|
/**
|
|
11
11
|
* Parse and validate a cli-bench.yaml config file.
|
|
12
|
+
* Returns the parsed config and the raw file content.
|
|
12
13
|
*/
|
|
13
|
-
export declare function loadConfigFile(path: string): Promise<
|
|
14
|
+
export declare function loadConfigFile(path: string): Promise<{
|
|
15
|
+
config: ConfigFile;
|
|
16
|
+
rawContent: string;
|
|
17
|
+
}>;
|
|
14
18
|
/**
|
|
15
19
|
* Resolve file:// references and inline tasks into a flat task array.
|
|
16
20
|
* Deduplicates by task ID (first occurrence wins).
|
|
21
|
+
* Also tracks all loaded file paths and their raw contents.
|
|
17
22
|
*/
|
|
18
|
-
export declare function resolveTaskRefs(tasks: (Task | string)[], baseDir: string): Promise<
|
|
23
|
+
export declare function resolveTaskRefs(tasks: (Task | string)[], baseDir: string): Promise<{
|
|
24
|
+
tasks: Task[];
|
|
25
|
+
loadedFiles: Map<string, string>;
|
|
26
|
+
}>;
|
|
19
27
|
/**
|
|
20
28
|
* Load config file and resolve all task references.
|
|
21
29
|
*/
|
|
@@ -23,5 +31,6 @@ export declare function loadProject(configPath: string): Promise<{
|
|
|
23
31
|
config: ConfigFile;
|
|
24
32
|
tasks: Task[];
|
|
25
33
|
taskSuiteContent: string;
|
|
34
|
+
projectFiles: Record<string, string>;
|
|
26
35
|
}>;
|
|
27
36
|
//# sourceMappingURL=project.d.ts.map
|
package/dist/project.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"project.d.ts","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIpD;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAqBrF;AAED
|
|
1
|
+
{"version":3,"file":"project.d.ts","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIpD;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAqBrF;AAED;;;GAGG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,CAKtG;AAED;;;;GAIG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,EACxB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC;IAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IAAC,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,CAAC,CAgD9D;AAsDD;;GAEG;AACH,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7D,MAAM,EAAE,UAAU,CAAC;IACnB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC,CA+CD"}
|