@cliwatch/cli-bench 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/README.md +3 -0
  3. package/dist/assertions.d.ts +1 -1
  4. package/dist/assertions.d.ts.map +1 -1
  5. package/dist/assertions.js +6 -6
  6. package/dist/assertions.js.map +1 -1
  7. package/dist/ci.d.ts.map +1 -1
  8. package/dist/ci.js +14 -0
  9. package/dist/ci.js.map +1 -1
  10. package/dist/client/index.d.ts +1 -1
  11. package/dist/client/index.d.ts.map +1 -1
  12. package/dist/client/types.gen.d.ts +143 -93
  13. package/dist/client/types.gen.d.ts.map +1 -1
  14. package/dist/client/zod.gen.d.ts +75 -42
  15. package/dist/client/zod.gen.d.ts.map +1 -1
  16. package/dist/client/zod.gen.js +86 -54
  17. package/dist/client/zod.gen.js.map +1 -1
  18. package/dist/config.d.ts +2 -3
  19. package/dist/config.d.ts.map +1 -1
  20. package/dist/config.js +8 -15
  21. package/dist/config.js.map +1 -1
  22. package/dist/exec.d.ts +2 -0
  23. package/dist/exec.d.ts.map +1 -1
  24. package/dist/exec.js +6 -2
  25. package/dist/exec.js.map +1 -1
  26. package/dist/github-comment.d.ts +16 -0
  27. package/dist/github-comment.d.ts.map +1 -0
  28. package/dist/github-comment.js +90 -0
  29. package/dist/github-comment.js.map +1 -0
  30. package/dist/index.d.ts +2 -3
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +31 -36
  33. package/dist/index.js.map +1 -1
  34. package/dist/init.js +1 -1
  35. package/dist/models.d.ts +9 -9
  36. package/dist/models.d.ts.map +1 -1
  37. package/dist/models.js +1 -1
  38. package/dist/models.js.map +1 -1
  39. package/dist/project.d.ts +11 -2
  40. package/dist/project.d.ts.map +1 -1
  41. package/dist/project.js +108 -9
  42. package/dist/project.js.map +1 -1
  43. package/dist/prompt.d.ts +2 -8
  44. package/dist/prompt.d.ts.map +1 -1
  45. package/dist/prompt.js +2 -35
  46. package/dist/prompt.js.map +1 -1
  47. package/dist/providers.d.ts +9 -7
  48. package/dist/providers.d.ts.map +1 -1
  49. package/dist/providers.js +26 -8
  50. package/dist/providers.js.map +1 -1
  51. package/dist/runner.d.ts +32 -4
  52. package/dist/runner.d.ts.map +1 -1
  53. package/dist/runner.js +177 -177
  54. package/dist/runner.js.map +1 -1
  55. package/dist/schemas.d.ts +20 -1
  56. package/dist/schemas.d.ts.map +1 -1
  57. package/dist/schemas.js +8 -1
  58. package/dist/schemas.js.map +1 -1
  59. package/dist/suite-generator.d.ts.map +1 -1
  60. package/dist/suite-generator.js +93 -10
  61. package/dist/suite-generator.js.map +1 -1
  62. package/package.json +2 -2
  63. package/dist/help-loader.d.ts +0 -17
  64. package/dist/help-loader.d.ts.map +0 -1
  65. package/dist/help-loader.js +0 -65
  66. package/dist/help-loader.js.map +0 -1
  67. package/task_suites/curl.yaml +0 -138
  68. package/task_suites/docker.yaml +0 -163
  69. package/task_suites/gh.yaml +0 -118
  70. package/task_suites/jq.yaml +0 -172
  71. package/task_suites/kubectl.yaml +0 -74
package/dist/config.d.ts CHANGED
@@ -1,22 +1,21 @@
1
1
  /**
2
2
  * CLI argument parsing for @cliwatch/cli-bench.
3
3
  */
4
- import { type ContextMode } from './models.js';
5
4
  export interface Config {
6
5
  filter: string[];
7
6
  models: string[];
8
7
  output?: string;
9
8
  dryRun: boolean;
10
- helpCacheDir: string;
11
9
  concurrency: number;
12
10
  upload: boolean;
13
11
  backendUrl: string;
14
12
  apiKey: string;
15
- contextModes: ContextMode[];
16
13
  configFile?: string;
17
14
  initMode: boolean;
18
15
  workdir?: string;
19
16
  repeat?: number;
17
+ tags: string[];
18
+ githubCommentPath?: string;
20
19
  }
21
20
  export declare function parseArgs(argv: string[]): Config;
22
21
  //# sourceMappingURL=config.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,KAAK,WAAW,EAAiB,MAAM,aAAa,CAAC;AAE9D,MAAM,WAAW,MAAM;IACrB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,WAAW,EAAE,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAqEhD"}
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,MAAM;IACrB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAgEhD"}
package/dist/config.js CHANGED
@@ -1,21 +1,19 @@
1
1
  /**
2
2
  * CLI argument parsing for @cliwatch/cli-bench.
3
3
  */
4
- import { CONTEXT_MODES } from './models.js';
5
4
  export function parseArgs(argv) {
6
5
  const args = argv.slice(2);
7
6
  const config = {
8
7
  filter: [],
9
8
  models: [],
10
9
  dryRun: false,
11
- helpCacheDir: './help_cache',
12
10
  concurrency: 3,
13
11
  output: undefined,
14
12
  upload: false,
15
13
  backendUrl: process.env['CLIWATCH_BACKEND_URL'] ?? 'https://api.cliwatch.com',
16
14
  apiKey: process.env['CLIWATCH_API_KEY'] ?? '',
17
- contextModes: ['zero-shot'],
18
15
  initMode: false,
16
+ tags: [],
19
17
  };
20
18
  for (let i = 0; i < args.length; i++) {
21
19
  switch (args[i]) {
@@ -37,9 +35,6 @@ export function parseArgs(argv) {
37
35
  case '--dry-run':
38
36
  config.dryRun = true;
39
37
  break;
40
- case '--help-cache':
41
- config.helpCacheDir = args[++i] ?? './help_cache';
42
- break;
43
38
  case '--concurrency':
44
39
  config.concurrency = parseInt(args[++i] ?? '3', 10);
45
40
  break;
@@ -52,13 +47,11 @@ export function parseArgs(argv) {
52
47
  case '--repeat':
53
48
  config.repeat = parseInt(args[++i] ?? '1', 10);
54
49
  break;
55
- case '--context':
56
- config.contextModes = (args[++i] ?? 'zero-shot')
57
- .split(',')
58
- .map((s) => s.trim())
59
- .filter((s) => CONTEXT_MODES.includes(s));
60
- if (config.contextModes.length === 0)
61
- config.contextModes = ['zero-shot'];
50
+ case '--tags':
51
+ config.tags = (args[++i] ?? '').split(',').map((s) => s.trim()).filter(Boolean);
52
+ break;
53
+ case '--github-comment':
54
+ config.githubCommentPath = args[++i];
62
55
  break;
63
56
  case '--help':
64
57
  printUsage();
@@ -83,13 +76,13 @@ Options:
83
76
  --models <models> Comma-separated model IDs (default: all in config/registry)
84
77
  --output <file> Write JSON GridReport to file
85
78
  --dry-run Print prompt for first task without calling API
86
- --help-cache <dir> Directory with cached help text JSON files
87
- --context <modes> Comma-separated context modes: zero-shot,help,docs (default: zero-shot)
88
79
  --concurrency <n> Max concurrent API calls (default: 3)
89
80
  --workdir <dir> Working directory for commands
90
81
  --repeat <n> Run each task N times for statistical confidence (default: 1)
91
82
  Note: tasks with non-idempotent setup may collide across repeats
83
+ --tags <tags> Comma-separated task tags to include (default: all tasks)
92
84
  --upload POST GridReport to backend after run
85
+ --github-comment <path> Write PR comment markdown to file
93
86
  --help Show this help message`);
94
87
  }
95
88
  //# sourceMappingURL=config.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAoB,aAAa,EAAE,MAAM,aAAa,CAAC;AAmB9D,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,MAAM,GAAW;QACrB,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,KAAK;QACb,YAAY,EAAE,cAAc;QAC5B,WAAW,EAAE,CAAC;QACd,MAAM,EAAE,SAAS;QACjB,MAAM,EAAE,KAAK;QACb,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,0BAA0B;QAC7E,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,IAAI,EAAE;QAC7C,YAAY,EAAE,CAAC,WAAW,CAAC;QAC3B,QAAQ,EAAE,KAAK;KAChB,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,QAAQ,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;YAChB,KAAK,MAAM;gBACT,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC;gBACvB,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC9B,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAClF,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAClF,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC1B,MAAM;YACR,KAAK,WAAW;gBACd,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBACrB,MAAM;YACR,KAAK,cAAc;gBACjB,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,cAAc,CAAC;gBAClD,MAAM;YACR,KAAK,eAAe;gBAClB,MAAM,CAAC,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBACpD,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBACrB,MAAM;YACR,KAAK,WAAW;gBACd,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC3B,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBAC/C,MAAM;YACR,KAAK,WAAW;gBACd,MAAM,CAAC,YAAY,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,WAAW,CAAC;qBAC7C,KAAK,CAAC,GAAG,CAAC;qBACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;qBACpB,MAAM,CAAC,CAAC,CAAC,EAAoB,EAAE,CAAE,aAA0B,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC5E,IAAI,MAAM,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;oBAAE,MAAM,CAAC,YAAY,GAAG,CAAC,WAAW,CAAC,CAAC;gBAC1E,MAAM;YACR,KAAK,QAAQ;gBACX,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB;gBACE,OAAO,CAAC,KAAK,CAAC,mBAAmB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC5C,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,UAAU;IACjB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;+CAkBiC,CAAC,CAAC;AACjD,CAAC"}
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAmBH,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,MAAM,GAAW;QACrB,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,KAAK;QACb,WAAW,EAAE,CAAC;QACd,MAAM,EAAE,SAAS;QACjB,MAAM,EAAE,KAAK;QACb,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,0BAA0B;QAC7E,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,IAAI,EAAE;QAC7C,QAAQ,EAAE,KAAK;QACf,IAAI,EAAE,EAAE;KACT,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,QAAQ,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;YAChB,KAAK,MAAM;gBACT,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC;gBACvB,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC9B,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAClF,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAClF,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC1B,MAAM;YACR,KAAK,WAAW;gBACd,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBACrB,MAAM;YACR,KAAK,eAAe;gBAClB,MAAM,CAAC,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBACpD,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;gBACrB,MAAM;YACR,KAAK,WAAW;gBACd,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC3B,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBAC/C,MAAM;YACR,KAAK,QAAQ;gBACX,MAAM,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAChF,MAAM;YACR,KAAK,kBAAkB;gBACrB,MAAM,CAAC,iBAAiB,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;gBACrC,MAAM;YACR,KAAK,QAAQ;gBACX,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB;gBACE,OAAO,CAAC,KAAK,CAAC,mBAAmB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC5C,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,UAAU;IACjB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;+CAkBiC,CAAC,CAAC;AACjD,CAAC"}
package/dist/exec.d.ts CHANGED
@@ -11,8 +11,10 @@ export interface ExecResult {
11
11
  export declare function execCommand(command: string, opts?: {
12
12
  cwd?: string;
13
13
  timeout?: number;
14
+ env?: Record<string, string>;
14
15
  }): Promise<ExecResult>;
15
16
  export declare function runSetup(commands: string[], opts?: {
16
17
  cwd?: string;
18
+ env?: Record<string, string>;
17
19
  }): Promise<void>;
18
20
  //# sourceMappingURL=exec.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GACxC,OAAO,CAAC,UAAU,CAAC,CAkBrB;AAED,wBAAsB,QAAQ,CAC5B,QAAQ,EAAE,MAAM,EAAE,EAClB,IAAI,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,GACtB,OAAO,CAAC,IAAI,CAAC,CAOf"}
1
+ {"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,MAAM,EACf,IAAI,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACtE,OAAO,CAAC,UAAU,CAAC,CAuBrB;AAED,wBAAsB,QAAQ,CAC5B,QAAQ,EAAE,MAAM,EAAE,EAClB,IAAI,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACpD,OAAO,CAAC,IAAI,CAAC,CAOf"}
package/dist/exec.js CHANGED
@@ -8,9 +8,13 @@ const EXEC_TIMEOUT_MS = 30_000;
8
8
  const MAX_OUTPUT_CHARS = 2_000;
9
9
  export async function execCommand(command, opts) {
10
10
  const timeout = opts?.timeout ?? EXEC_TIMEOUT_MS;
11
+ const env = opts?.env ? { ...process.env, ...opts.env } : undefined;
11
12
  return new Promise((resolve) => {
12
- execFile('sh', ['-c', command], { timeout, maxBuffer: 256 * 1024, cwd: opts?.cwd }, (err, stdout, stderr) => {
13
- const exitCode = err && 'code' in err ? err.code : err ? 1 : 0;
13
+ execFile('sh', ['-c', command], { timeout, maxBuffer: 256 * 1024, cwd: opts?.cwd, env }, (err, stdout, stderr) => {
14
+ const exitCode = err == null ? 0
15
+ : typeof err.code === 'number' ? err.code
16
+ : err.killed ? 137
17
+ : 1;
14
18
  resolve({
15
19
  stdout: (stdout ?? '').toString().slice(0, MAX_OUTPUT_CHARS),
16
20
  stderr: (stderr ?? '').toString().slice(0, MAX_OUTPUT_CHARS),
package/dist/exec.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"exec.js","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAQ/B,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,OAAe,EACf,IAAyC;IAEzC,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,eAAe,CAAC;IAEjD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,QAAQ,CACN,IAAI,EACJ,CAAC,IAAI,EAAE,OAAO,CAAC,EACf,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,GAAG,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,EAClD,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE;YACtB,MAAM,QAAQ,GAAG,GAAG,IAAI,MAAM,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,CAAC,IAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3E,OAAO,CAAC;gBACN,MAAM,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC;gBAC5D,MAAM,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC;gBAC5D,QAAQ;aACT,CAAC,CAAC;QACL,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAkB,EAClB,IAAuB;IAEvB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAC5C,IAAI,MAAM,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,IAAI,CAAC,uCAAuC,GAAG,eAAe,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"exec.js","sourceRoot":"","sources":["../src/exec.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAQ/B,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,OAAe,EACf,IAAuE;IAEvE,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,eAAe,CAAC;IACjD,MAAM,GAAG,GAAG,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAEpE,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,QAAQ,CACN,IAAI,EACJ,CAAC,IAAI,EAAE,OAAO,CAAC,EACf,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,GAAG,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,EACvD,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE;YACtB,MAAM,QAAQ,GACZ,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;gBACf,CAAC,CAAC,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI;oBACzC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG;wBAClB,CAAC,CAAC,CAAC,CAAC;YACN,OAAO,CAAC;gBACN,MAAM,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC;gBAC5D,MAAM,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC;gBAC5D,QAAQ;aACT,CAAC,CAAC;QACL,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAkB,EAClB,IAAqD;IAErD,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAC5C,IAAI,MAAM,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,IAAI,CAAC,uCAAuC,GAAG,eAAe,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Generate a GitHub PR comment summarizing benchmark results.
3
+ *
4
+ * Pure function, no side effects. The caller is responsible for writing
5
+ * the returned markdown to a file; the CI workflow posts it as a comment.
6
+ */
7
+ import type { GridReport } from './models.js';
8
+ /**
9
+ * Build a markdown PR comment from a GridReport.
10
+ *
11
+ * @param report - The completed grid report
12
+ * @param dashboardUrl - Optional link to the CLIWatch dashboard for this CLI
13
+ * @returns Markdown string ready to be posted as a GitHub PR comment
14
+ */
15
+ export declare function formatPrComment(report: GridReport, dashboardUrl?: string): string;
16
+ //# sourceMappingURL=github-comment.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"github-comment.d.ts","sourceRoot":"","sources":["../src/github-comment.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAyB,MAAM,aAAa,CAAC;AAQrE;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,UAAU,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAyDjF"}
@@ -0,0 +1,90 @@
1
+ /**
2
+ * Generate a GitHub PR comment summarizing benchmark results.
3
+ *
4
+ * Pure function, no side effects. The caller is responsible for writing
5
+ * the returned markdown to a file; the CI workflow posts it as a comment.
6
+ */
7
+ /**
8
+ * Build a markdown PR comment from a GridReport.
9
+ *
10
+ * @param report - The completed grid report
11
+ * @param dashboardUrl - Optional link to the CLIWatch dashboard for this CLI
12
+ * @returns Markdown string ready to be posted as a GitHub PR comment
13
+ */
14
+ export function formatPrComment(report, dashboardUrl) {
15
+ const lines = [];
16
+ // HTML comment marker for upsert (find-and-replace existing comment)
17
+ lines.push(`<!-- cliwatch-bench-${report.cli} -->`);
18
+ // Header
19
+ const name = report.displayName ?? report.cli;
20
+ const version = report.cliVersion ? ` ${report.cliVersion}` : '';
21
+ lines.push(`### CLIWatch | ${name}${version}`);
22
+ lines.push('');
23
+ // Metadata line
24
+ const parts = [];
25
+ parts.push(`${report.taskCount} tasks`);
26
+ if (report.gitSha) {
27
+ parts.push(`\`${report.gitSha.slice(0, 7)}\``);
28
+ }
29
+ if (dashboardUrl) {
30
+ parts.push(`[View details](${dashboardUrl})`);
31
+ }
32
+ lines.push(parts.join(' | '));
33
+ lines.push('');
34
+ // Model summary table
35
+ lines.push('| Model | Pass Rate | Avg Turns |');
36
+ lines.push('|:------|----------:|----------:|');
37
+ for (const mr of report.modelResults) {
38
+ const total = mr.taskResults.length;
39
+ const passed = mr.taskResults.filter((t) => t.passed).length;
40
+ const pct = total > 0 ? Math.round(mr.passRate * 100) : 0;
41
+ const turns = mr.avgTurnsToSuccess.toFixed(1);
42
+ lines.push(`| ${mr.displayName} | **${pct}%** (${passed}/${total}) | ${turns} |`);
43
+ }
44
+ lines.push('');
45
+ // Collect all failing tasks
46
+ const failures = collectFailures(report.modelResults);
47
+ if (failures.length === 0) {
48
+ lines.push('All tasks passed.');
49
+ }
50
+ else {
51
+ lines.push(`<details>`);
52
+ lines.push(`<summary>${failures.length} failing task${failures.length === 1 ? '' : 's'}</summary>`);
53
+ lines.push('');
54
+ lines.push('| Task | Model | Reason |');
55
+ lines.push('|:-----|:------|:-------|');
56
+ for (const f of failures) {
57
+ lines.push(`| \`${f.taskId}\` | ${f.modelName} | ${f.reason} |`);
58
+ }
59
+ lines.push('');
60
+ lines.push('</details>');
61
+ }
62
+ return lines.join('\n');
63
+ }
64
+ function collectFailures(modelResults) {
65
+ const failures = [];
66
+ for (const mr of modelResults) {
67
+ for (const tr of mr.taskResults) {
68
+ if (!tr.passed) {
69
+ failures.push({
70
+ taskId: tr.taskId,
71
+ modelName: mr.displayName,
72
+ reason: truncateReason(tr),
73
+ });
74
+ }
75
+ }
76
+ }
77
+ return failures;
78
+ }
79
+ const MAX_REASON_LENGTH = 100;
80
+ function truncateReason(tr) {
81
+ const raw = tr.failureReason ?? 'unknown';
82
+ // Collapse newlines first (would break table rows), then truncate, then escape pipes.
83
+ // Truncate before escaping so we never slice through a \| escape sequence.
84
+ const flat = raw.replace(/\n/g, ' ');
85
+ const truncated = flat.length > MAX_REASON_LENGTH
86
+ ? flat.slice(0, MAX_REASON_LENGTH - 3) + '...'
87
+ : flat;
88
+ return truncated.replace(/\|/g, '\\|');
89
+ }
90
+ //# sourceMappingURL=github-comment.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"github-comment.js","sourceRoot":"","sources":["../src/github-comment.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,MAAkB,EAAE,YAAqB;IACvE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,qEAAqE;IACrE,KAAK,CAAC,IAAI,CAAC,uBAAuB,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC;IAEpD,SAAS;IACT,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,GAAG,CAAC;IAC9C,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,KAAK,CAAC,IAAI,CAAC,kBAAkB,IAAI,GAAG,OAAO,EAAE,CAAC,CAAC;IAC/C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,gBAAgB;IAChB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,SAAS,QAAQ,CAAC,CAAC;IACxC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IACjD,CAAC;IACD,IAAI,YAAY,EAAE,CAAC;QACjB,KAAK,CAAC,IAAI,CAAC,kBAAkB,YAAY,GAAG,CAAC,CAAC;IAChD,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,sBAAsB;IACtB,KAAK,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IAChD,KAAK,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IAEhD,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,MAAM,CAAC;QACpC,MAAM,MAAM,GAAG,EAAE,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC7D,MAAM,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC9C,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,WAAW,QAAQ,GAAG,QAAQ,MAAM,IAAI,KAAK,OAAO,KAAK,IAAI,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,4BAA4B;IAC5B,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;IAEtD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAClC,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxB,KAAK,CAAC,IAAI,CAAC,YAAY,QAAQ,CAAC,MAAM,gBAAgB,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC;QACpG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACxC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,QAAQ,CAAC,CAAC,SAAS,MAAM,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;QACnE,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,eAAe,CAAC,YAA2B;IAClD,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;QAC9B,KAAK,MAAM,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAChC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC;gBACf,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,EAAE,CAAC,MAAM;oBACjB,SAAS,EAAE,EAAE,CAAC,WAAW;oBACzB,MAAM,EAAE,cAAc,CAAC,EAAE,CAAC;iBAC3B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B,SAAS,cAAc,CAAC,EAAY;IAClC,MAAM,GAAG,GAAG,EAAE,CAAC,aAAa,IAAI,SAAS,CAAC;IAC1C,sFAAsF;IACtF,2EAA2E;IAC3E,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACrC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,iBAAiB;QAC/C,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,GAAG,CAAC,CAAC,GAAG,KAAK;QAC9C,CAAC,CAAC,IAAI,CAAC;IACT,OAAO,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;AACzC,CAAC"}
package/dist/index.d.ts CHANGED
@@ -5,10 +5,9 @@
5
5
  * Tests CLI agent-readiness by having LLMs execute tasks,
6
6
  * then validating results with assertion-based checks.
7
7
  *
8
- * Dual-mode entry:
8
+ * Entry modes:
9
9
  * 1. Config file mode: cli-bench.yaml found → load config → run grid
10
- * 2. Legacy mode: no config → discover task_suites/ → run grid
11
- * 3. Init mode: scaffold cli-bench.yaml
10
+ * 2. Init mode: scaffold cli-bench.yaml
12
11
  */
13
12
  export {};
14
13
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;GAUG"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;GASG"}
package/dist/index.js CHANGED
@@ -5,20 +5,18 @@
5
5
  * Tests CLI agent-readiness by having LLMs execute tasks,
6
6
  * then validating results with assertion-based checks.
7
7
  *
8
- * Dual-mode entry:
8
+ * Entry modes:
9
9
  * 1. Config file mode: cli-bench.yaml found → load config → run grid
10
- * 2. Legacy mode: no config → discover task_suites/ → run grid
11
- * 3. Init mode: scaffold cli-bench.yaml
10
+ * 2. Init mode: scaffold cli-bench.yaml
12
11
  */
13
12
  import { writeFile } from 'node:fs/promises';
14
- import { dirname } from 'node:path';
15
13
  import { parseArgs } from './config.js';
16
14
  import { runGrid, uploadReport } from './runner.js';
17
15
  import { resolveConfigFile, loadProject } from './project.js';
18
16
  import { scaffoldProject } from './init.js';
19
- import { validateGatewayKey, resolveProviders } from './providers.js';
20
- import { CONTEXT_MODES } from './models.js';
17
+ import { validateApiKeys, resolveProviders } from './providers.js';
21
18
  import { checkThresholds, printThresholdResults } from './thresholds.js';
19
+ import { formatPrComment } from './github-comment.js';
22
20
  async function main() {
23
21
  const config = parseArgs(process.argv);
24
22
  // Init mode — scaffold and exit
@@ -34,7 +32,7 @@ async function main() {
34
32
  }
35
33
  return;
36
34
  }
37
- console.log('@cliwatch/cli-bench v0.6.3');
35
+ console.log('@cliwatch/cli-bench v0.7.1');
38
36
  // Try to find a config file
39
37
  const configPath = await resolveConfigFile(config.configFile);
40
38
  let reports;
@@ -43,15 +41,20 @@ async function main() {
43
41
  if (configPath) {
44
42
  // Config file mode
45
43
  console.log(`Config: ${configPath}`);
46
- const { config: fileConfig, tasks, taskSuiteContent } = await loadProject(configPath);
44
+ const { config: fileConfig, tasks: allTasks, taskSuiteContent, projectFiles } = await loadProject(configPath);
47
45
  thresholdsConfig = fileConfig.thresholds;
46
+ // Filter tasks by tags if --tags was provided
47
+ const tasks = config.tags.length > 0
48
+ ? allTasks.filter((t) => t.tags?.some((tag) => config.tags.includes(tag)))
49
+ : allTasks;
50
+ if (tasks.length === 0) {
51
+ console.error(`No tasks match tags: ${config.tags.join(', ')}`);
52
+ process.exit(1);
53
+ }
48
54
  // Merge CLI args with file config
49
55
  const providers = config.models.length > 0
50
56
  ? config.models
51
57
  : fileConfig.providers ?? ['anthropic/claude-sonnet-4-20250514'];
52
- const contextModes = fileConfig.context
53
- ? fileConfig.context.filter((s) => CONTEXT_MODES.includes(s))
54
- : config.contextModes;
55
58
  const concurrency = fileConfig.concurrency ?? config.concurrency;
56
59
  // Determine upload behavior
57
60
  const uploadMode = fileConfig.upload ?? 'auto';
@@ -61,16 +64,15 @@ async function main() {
61
64
  console.log(`CLI: ${fileConfig.cli}`);
62
65
  console.log(`Providers: ${providers.join(', ')}`);
63
66
  console.log(`Tasks: ${tasks.length}`);
64
- console.log(`Context: ${contextModes.join(', ')}`);
65
67
  console.log(`Dry run: ${config.dryRun}`);
66
- // Validate gateway key before running
68
+ // Validate API keys before running
67
69
  if (!config.dryRun) {
68
- validateGatewayKey();
70
+ validateApiKeys(providers);
69
71
  }
70
72
  const models = resolveProviders(providers);
71
73
  const globalRepeat = config.repeat ?? fileConfig.repeat;
72
74
  reports = await runGrid({
73
- config: { ...config, concurrency, contextModes },
75
+ config: { ...config, concurrency },
74
76
  tasks,
75
77
  cliName: fileConfig.cli,
76
78
  models,
@@ -83,9 +85,13 @@ async function main() {
83
85
  websiteUrl: fileConfig.website_url,
84
86
  githubUrl: fileConfig.github_url,
85
87
  taskSuiteContent,
86
- configDir: dirname(configPath),
87
88
  redactEnvVars: fileConfig.redact_env,
88
89
  redactPatterns: fileConfig.redact_patterns,
90
+ fileEnv: fileConfig.env,
91
+ fileSetup: fileConfig.setup,
92
+ fileCleanup: fileConfig.cleanup,
93
+ fileScaffold: fileConfig.scaffold,
94
+ projectFiles,
89
95
  });
90
96
  // Check thresholds before upload so results are included in the payload
91
97
  if (thresholdsConfig && reports.length > 0 && !config.dryRun) {
@@ -111,27 +117,16 @@ async function main() {
111
117
  }
112
118
  }
113
119
  }
120
+ // Write PR comment markdown if requested
121
+ if (config.githubCommentPath && reports.length > 0 && !config.dryRun) {
122
+ const markdown = formatPrComment(reports[0]);
123
+ await writeFile(config.githubCommentPath, markdown, 'utf-8');
124
+ console.log(`\nPR comment written to ${config.githubCommentPath}`);
125
+ }
114
126
  }
115
127
  else {
116
- // Legacy task_suites/ discovery mode
117
- console.log(`Filter: ${config.filter.length > 0 ? config.filter.join(', ') : 'all'}`);
118
- console.log(`Models: ${config.models.length > 0 ? config.models.join(', ') : 'all'}`);
119
- console.log(`Context: ${config.contextModes.join(', ')}`);
120
- console.log(`Dry run: ${config.dryRun}`);
121
- if (!config.dryRun && config.models.length > 0) {
122
- validateGatewayKey();
123
- }
124
- reports = await runGrid({ config, globalRepeat: config.repeat });
125
- if (config.upload) {
126
- for (const report of reports) {
127
- try {
128
- await uploadReport(report, config.backendUrl, config.apiKey);
129
- }
130
- catch (e) {
131
- console.error(`Failed to upload report for ${report.cli}: ${e instanceof Error ? e.message : e}`);
132
- }
133
- }
134
- }
128
+ console.error('No cli-bench.yaml found. Run `npx @cliwatch/cli-bench --init` to create one.');
129
+ process.exit(1);
135
130
  }
136
131
  if (config.output && reports.length > 0) {
137
132
  const output = JSON.stringify(reports.length === 1 ? reports[0] : reports, null, 2);
@@ -143,7 +138,7 @@ async function main() {
143
138
  console.log('\n=== Final Summary ===');
144
139
  for (const report of reports) {
145
140
  for (const mr of report.modelResults) {
146
- console.log(`${report.cli} x ${mr.displayName} [${mr.contextMode}]: ${(mr.passRate * 100).toFixed(0)}% pass, avgTurns=${mr.avgTurnsToSuccess.toFixed(1)}`);
141
+ console.log(`${report.cli} x ${mr.displayName}: ${(mr.passRate * 100).toFixed(0)}% pass, avgTurns=${mr.avgTurnsToSuccess.toFixed(1)}`);
147
142
  }
148
143
  }
149
144
  }
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAgB,MAAM,gBAAgB,CAAC;AACpF,OAAO,EAAoB,aAAa,EAAE,MAAM,aAAa,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAC;AAEzE,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAEvC,gCAAgC;IAChC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,oFAAoF,CAAC,CAAC;QACpG,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAE1C,4BAA4B;IAC5B,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAE9D,IAAI,OAAO,CAAC;IACZ,IAAI,gBAAoE,CAAC;IACzE,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,IAAI,UAAU,EAAE,CAAC;QACf,mBAAmB;QACnB,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,EAAE,CAAC,CAAC;QACrC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,gBAAgB,EAAE,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC;QACtF,gBAAgB,GAAG,UAAU,CAAC,UAAU,CAAC;QAEzC,kCAAkC;QAClC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,MAAM,CAAC,MAAM;YACf,CAAC,CAAC,UAAU,CAAC,SAAS,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACnE,MAAM,YAAY,GAAG,UAAU,CAAC,OAAO;YACrC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAoB,EAAE,CAAE,aAA0B,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC7F,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;QACxB,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;QAEjE,4BAA4B;QAC5B,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC;QAC/C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM;eAC7B,UAAU,KAAK,QAAQ;eACvB,CAAC,UAAU,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEhD,OAAO,CAAC,GAAG,CAAC,QAAQ,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,YAAY,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnD,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,sCAAsC;QACtC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,kBAAkB,EAAE,CAAC;QACvB,CAAC;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAE3C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC;QAExD,OAAO,GAAG,MAAM,OAAO,CAAC;YACtB,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE;YAChD,KAAK;YACL,OAAO,EAAE,UAAU,CAAC,GAAG;YACvB,MAAM;YACN,cAAc,EAAE,UAAU,CAAC,eAAe;YAC1C,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO;YAC7C,YAAY;YACZ,YAAY,EAAE,UAAU,CAAC,aAAa;YACtC,WAAW,EAAE,UAAU,CAAC,YAAY;YACpC,QAAQ,EAAE,UAAU,CAAC,QAAQ;YAC7B,UAAU,EAAE,UAAU,CAAC,WAAW;YAClC,SAAS,EAAE,UAAU,CAAC,UAAU;YAChC,gBAAgB;YAChB,SAAS,EAAE,OAAO,CAAC,UAAU,CAAC;YAC9B,aAAa,EAAE,UAAU,CAAC,UAAU;YACpC,cAAc,EAAE,UAAU,CAAC,eAAe;SAC3C,CAAC,CAAC;QAEH,wEAAwE;QACxE,IAAI,gBAAgB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,eAAe,CAC3B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,EACtC,gBAAgB,CACjB,CAAC;YACF,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAC7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAClC,CAAC;YACD,+DAA+D;YAC/D,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACnD,eAAe,GAAG,IAAI,CAAC;YACzB,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,IAAI,YAAY,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,UAAU,CAAC;YAC/D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBACxD,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,qCAAqC;QACrC,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACtF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACtF,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/C,kBAAkB,EAAE,CAAC;QACvB,CAAC;QAED,OAAO,GAAG,MAAM,OAAO,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEjE,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC/D,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAC3B,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,EAC3C,IAAI,EACJ,CAAC,CACF,CAAC;QACF,MAAM,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,eAAe;IACf,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;QACvC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CACT,GAAG,MAAM,CAAC,GAAG,MAAM,EAAE,CAAC,WAAW,KAAK,EAAE,CAAC,WAAW,MAAM,CAAC,EAAE,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC9I,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,IAAI,eAAe,EAAE,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACjB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;GASG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAgB,MAAM,gBAAgB,CAAC;AACjF,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAEtD,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAEvC,gCAAgC;IAChC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,oFAAoF,CAAC,CAAC;QACpG,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAE1C,4BAA4B;IAC5B,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAE9D,IAAI,OAAO,CAAC;IACZ,IAAI,gBAAoE,CAAC;IACzE,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,IAAI,UAAU,EAAE,CAAC;QACf,mBAAmB;QACnB,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,EAAE,CAAC,CAAC;QACrC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,YAAY,EAAE,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC;QAC9G,gBAAgB,GAAG,UAAU,CAAC,UAAU,CAAC;QAEzC,8CAA8C;QAC9C,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1E,CAAC,CAAC,QAAQ,CAAC;QACb,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,KAAK,CAAC,wBAAwB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,kCAAkC;QAClC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,MAAM,CAAC,MAAM;YACf,CAAC,CAAC,UAAU,CAAC,SAAS,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACnE,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;QAEjE,4BAA4B;QAC5B,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC;QAC/C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM;eAC7B,UAAU,KAAK,QAAQ;eACvB,CAAC,UAAU,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEhD,OAAO,CAAC,GAAG,CAAC,QAAQ,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,mCAAmC;QACnC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,eAAe,CAAC,SAAS,CAAC,CAAC;QAC7B,CAAC;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAE3C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC;QAExD,OAAO,GAAG,MAAM,OAAO,CAAC;YACtB,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE;YAClC,KAAK;YACL,OAAO,EAAE,UAAU,CAAC,GAAG;YACvB,MAAM;YACN,cAAc,EAAE,UAAU,CAAC,eAAe;YAC1C,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO;YAC7C,YAAY;YACZ,YAAY,EAAE,UAAU,CAAC,aAAa;YACtC,WAAW,EAAE,UAAU,CAAC,YAAY;YACpC,QAAQ,EAAE,UAAU,CAAC,QAAQ;YAC7B,UAAU,EAAE,UAAU,CAAC,WAAW;YAClC,SAAS,EAAE,UAAU,CAAC,UAAU;YAChC,gBAAgB;YAChB,aAAa,EAAE,UAAU,CAAC,UAAU;YACpC,cAAc,EAAE,UAAU,CAAC,eAAe;YAC1C,OAAO,EAAE,UAAU,CAAC,GAAG;YACvB,SAAS,EAAE,UAAU,CAAC,KAAK;YAC3B,WAAW,EAAE,UAAU,CAAC,OAAO;YAC/B,YAAY,EAAE,UAAU,CAAC,QAAQ;YACjC,YAAY;SACb,CAAC,CAAC;QAEH,wEAAwE;QACxE,IAAI,gBAAgB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,eAAe,CAC3B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,EACtC,gBAAgB,CACjB,CAAC;YACF,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAC7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAClC,CAAC;YACD,+DAA+D;YAC/D,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACnD,eAAe,GAAG,IAAI,CAAC;YACzB,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,IAAI,YAAY,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,UAAU,CAAC;YAC/D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBACxD,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QACD,yCAAyC;QACzC,IAAI,MAAM,CAAC,iBAAiB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACrE,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC;YAC9C,MAAM,SAAS,CAAC,MAAM,CAAC,iBAAiB,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC7D,OAAO,CAAC,GAAG,CAAC,2BAA2B,MAAM,CAAC,iBAAiB,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,8EAA8E,CAAC,CAAC;QAC9F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAC3B,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,EAC3C,IAAI,EACJ,CAAC,CACF,CAAC;QACF,MAAM,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,eAAe;IACf,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;QACvC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CACT,GAAG,MAAM,CAAC,GAAG,MAAM,EAAE,CAAC,WAAW,KAAK,CAAC,EAAE,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC1H,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,IAAI,eAAe,EAAE,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACjB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
package/dist/init.js CHANGED
@@ -17,9 +17,9 @@ providers:
17
17
  # - google/gemini-2.5-pro
18
18
 
19
19
  # Optional settings
20
- # context: [zero-shot] # zero-shot | help | docs
21
20
  # concurrency: 3 # max concurrent API calls
22
21
  # workdir: ./workspace # working directory for commands (default: temp dir)
22
+ # scaffold: scaffolds/my-project # directory copied into workdir before each task
23
23
  # upload: auto # auto | always | never (auto uploads if CLIWATCH_API_KEY is set)
24
24
 
25
25
  tasks:
package/dist/models.d.ts CHANGED
@@ -4,8 +4,6 @@
4
4
  * Assertion-based evaluation: tasks define assertions that are checked
5
5
  * against the agent's execution trace.
6
6
  */
7
- export type ContextMode = 'zero-shot' | 'help' | 'docs';
8
- export declare const CONTEXT_MODES: ContextMode[];
9
7
  export type Assertion = {
10
8
  output_contains: string;
11
9
  } | {
@@ -49,10 +47,14 @@ export interface Task {
49
47
  intent: string;
50
48
  assert: Assertion[];
51
49
  setup?: string[];
50
+ cleanup?: string[];
51
+ env?: Record<string, string>;
52
52
  max_turns?: number;
53
53
  difficulty?: 'easy' | 'medium' | 'hard';
54
54
  category?: string;
55
55
  repeat?: number;
56
+ tags?: string[];
57
+ scaffold?: string | false;
56
58
  }
57
59
  export interface TaskSuite {
58
60
  cli: string;
@@ -78,7 +80,6 @@ export interface ConfigFile {
78
80
  website_url?: string;
79
81
  github_url?: string;
80
82
  providers?: string[];
81
- context?: string[];
82
83
  system_prompt?: string;
83
84
  concurrency?: number;
84
85
  workdir?: string;
@@ -88,6 +89,10 @@ export interface ConfigFile {
88
89
  redact_env?: string[];
89
90
  redact_patterns?: string[];
90
91
  thresholds?: ThresholdsConfig;
92
+ env?: Record<string, string>;
93
+ setup?: string[];
94
+ cleanup?: string[];
95
+ scaffold?: string;
91
96
  tasks: (Task | string)[];
92
97
  }
93
98
  export type Provider = string;
@@ -118,7 +123,6 @@ export interface ModelResult {
118
123
  provider: Provider;
119
124
  modelId: string;
120
125
  displayName: string;
121
- contextMode: ContextMode;
122
126
  taskResults: TaskEval[];
123
127
  passRate: number;
124
128
  avgTurnsToSuccess: number;
@@ -166,11 +170,7 @@ export interface GridReport {
166
170
  tags?: string[];
167
171
  taskSuiteHash?: string;
168
172
  taskSuiteContent?: string;
173
+ projectFiles?: Record<string, string>;
169
174
  thresholdResults?: ThresholdCheckResult;
170
175
  }
171
- export interface HelpCache {
172
- cli_name: string;
173
- help_texts: Record<string, string>;
174
- version?: string;
175
- }
176
176
  //# sourceMappingURL=models.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,MAAM,GAAG,MAAM,CAAC;AACxD,eAAO,MAAM,aAAa,EAAE,WAAW,EAAkC,CAAC;AAM1E,MAAM,MAAM,SAAS,GACjB;IAAE,eAAe,EAAE,MAAM,CAAA;CAAE,GAC3B;IAAE,aAAa,EAAE,MAAM,CAAA;CAAE,GACzB;IAAE,cAAc,EAAE,MAAM,CAAA;CAAE,GAC1B;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GACrB;IAAE,WAAW,EAAE,MAAM,CAAA;CAAE,GACvB;IAAE,aAAa,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GACjD;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GACf;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACnB;IAAE,SAAS,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GAC9D;IAAE,MAAM,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAElF,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,SAAS,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAMD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;AAErD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,GAAG,eAAe,CAAC;CACtC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC;CAC1B;AAMD,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC;AAE9B,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAMD,MAAM,WAAW,QAAQ;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,OAAO,EAAE,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACzC,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,WAAW,CAAC;IACzB,WAAW,EAAE,QAAQ,EAAE,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,QAAQ,EAAE,OAAO,GAAG,eAAe,CAAC;CACrC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,WAAW,EAAE,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,gBAAgB,CAAC,EAAE,oBAAoB,CAAC;CACzC;AAMD,MAAM,WAAW,SAAS;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB"}
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,MAAM,SAAS,GACjB;IAAE,eAAe,EAAE,MAAM,CAAA;CAAE,GAC3B;IAAE,aAAa,EAAE,MAAM,CAAA;CAAE,GACzB;IAAE,cAAc,EAAE,MAAM,CAAA;CAAE,GAC1B;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GACrB;IAAE,WAAW,EAAE,MAAM,CAAA;CAAE,GACvB;IAAE,aAAa,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GACjD;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GACf;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACnB;IAAE,SAAS,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GAC9D;IAAE,MAAM,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAElF,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,SAAS,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC;CAC3B;AAED,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAMD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;AAErD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,GAAG,eAAe,CAAC;CACtC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC;CAC1B;AAMD,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC;AAE9B,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAMD,MAAM,WAAW,QAAQ;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,OAAO,EAAE,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACzC,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,QAAQ,EAAE,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,QAAQ,EAAE,OAAO,GAAG,eAAe,CAAC;CACrC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,WAAW,EAAE,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,gBAAgB,CAAC,EAAE,oBAAoB,CAAC;CACzC"}
package/dist/models.js CHANGED
@@ -4,5 +4,5 @@
4
4
  * Assertion-based evaluation: tasks define assertions that are checked
5
5
  * against the agent's execution trace.
6
6
  */
7
- export const CONTEXT_MODES = ['zero-shot', 'help', 'docs'];
7
+ export {};
8
8
  //# sourceMappingURL=models.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAOH,MAAM,CAAC,MAAM,aAAa,GAAkB,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC"}
1
+ {"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
package/dist/project.d.ts CHANGED
@@ -9,13 +9,21 @@ import type { Task, ConfigFile } from './models.js';
9
9
  export declare function resolveConfigFile(explicitPath?: string): Promise<string | null>;
10
10
  /**
11
11
  * Parse and validate a cli-bench.yaml config file.
12
+ * Returns the parsed config and the raw file content.
12
13
  */
13
- export declare function loadConfigFile(path: string): Promise<ConfigFile>;
14
+ export declare function loadConfigFile(path: string): Promise<{
15
+ config: ConfigFile;
16
+ rawContent: string;
17
+ }>;
14
18
  /**
15
19
  * Resolve file:// references and inline tasks into a flat task array.
16
20
  * Deduplicates by task ID (first occurrence wins).
21
+ * Also tracks all loaded file paths and their raw contents.
17
22
  */
18
- export declare function resolveTaskRefs(tasks: (Task | string)[], baseDir: string): Promise<Task[]>;
23
+ export declare function resolveTaskRefs(tasks: (Task | string)[], baseDir: string): Promise<{
24
+ tasks: Task[];
25
+ loadedFiles: Map<string, string>;
26
+ }>;
19
27
  /**
20
28
  * Load config file and resolve all task references.
21
29
  */
@@ -23,5 +31,6 @@ export declare function loadProject(configPath: string): Promise<{
23
31
  config: ConfigFile;
24
32
  tasks: Task[];
25
33
  taskSuiteContent: string;
34
+ projectFiles: Record<string, string>;
26
35
  }>;
27
36
  //# sourceMappingURL=project.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"project.d.ts","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIpD;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAqBrF;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAItE;AAED;;;GAGG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,EACxB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,IAAI,EAAE,CAAC,CA6CjB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,IAAI,EAAE,CAAC;IAAC,gBAAgB,EAAE,MAAM,CAAA;CAAE,CAAC,CAM9H"}
1
+ {"version":3,"file":"project.d.ts","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIpD;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAqBrF;AAED;;;GAGG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,CAKtG;AAED;;;;GAIG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,EACxB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC;IAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IAAC,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,CAAC,CAgD9D;AAsDD;;GAEG;AACH,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7D,MAAM,EAAE,UAAU,CAAC;IACnB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC,CA+CD"}