@cliwatch/cli-bench 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/README.md +3 -0
  3. package/dist/assertions.d.ts +1 -1
  4. package/dist/assertions.d.ts.map +1 -1
  5. package/dist/assertions.js +6 -6
  6. package/dist/assertions.js.map +1 -1
  7. package/dist/ci.d.ts.map +1 -1
  8. package/dist/ci.js +14 -0
  9. package/dist/ci.js.map +1 -1
  10. package/dist/client/index.d.ts +1 -1
  11. package/dist/client/index.d.ts.map +1 -1
  12. package/dist/client/types.gen.d.ts +143 -93
  13. package/dist/client/types.gen.d.ts.map +1 -1
  14. package/dist/client/zod.gen.d.ts +75 -42
  15. package/dist/client/zod.gen.d.ts.map +1 -1
  16. package/dist/client/zod.gen.js +86 -54
  17. package/dist/client/zod.gen.js.map +1 -1
  18. package/dist/config.d.ts +2 -3
  19. package/dist/config.d.ts.map +1 -1
  20. package/dist/config.js +8 -15
  21. package/dist/config.js.map +1 -1
  22. package/dist/exec.d.ts +2 -0
  23. package/dist/exec.d.ts.map +1 -1
  24. package/dist/exec.js +6 -2
  25. package/dist/exec.js.map +1 -1
  26. package/dist/github-comment.d.ts +16 -0
  27. package/dist/github-comment.d.ts.map +1 -0
  28. package/dist/github-comment.js +90 -0
  29. package/dist/github-comment.js.map +1 -0
  30. package/dist/index.d.ts +2 -3
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +31 -36
  33. package/dist/index.js.map +1 -1
  34. package/dist/init.js +1 -1
  35. package/dist/models.d.ts +9 -9
  36. package/dist/models.d.ts.map +1 -1
  37. package/dist/models.js +1 -1
  38. package/dist/models.js.map +1 -1
  39. package/dist/project.d.ts +11 -2
  40. package/dist/project.d.ts.map +1 -1
  41. package/dist/project.js +108 -9
  42. package/dist/project.js.map +1 -1
  43. package/dist/prompt.d.ts +2 -8
  44. package/dist/prompt.d.ts.map +1 -1
  45. package/dist/prompt.js +2 -35
  46. package/dist/prompt.js.map +1 -1
  47. package/dist/providers.d.ts +9 -7
  48. package/dist/providers.d.ts.map +1 -1
  49. package/dist/providers.js +26 -8
  50. package/dist/providers.js.map +1 -1
  51. package/dist/runner.d.ts +32 -4
  52. package/dist/runner.d.ts.map +1 -1
  53. package/dist/runner.js +177 -177
  54. package/dist/runner.js.map +1 -1
  55. package/dist/schemas.d.ts +20 -1
  56. package/dist/schemas.d.ts.map +1 -1
  57. package/dist/schemas.js +8 -1
  58. package/dist/schemas.js.map +1 -1
  59. package/dist/suite-generator.d.ts.map +1 -1
  60. package/dist/suite-generator.js +93 -10
  61. package/dist/suite-generator.js.map +1 -1
  62. package/package.json +2 -2
  63. package/dist/help-loader.d.ts +0 -17
  64. package/dist/help-loader.d.ts.map +0 -1
  65. package/dist/help-loader.js +0 -65
  66. package/dist/help-loader.js.map +0 -1
  67. package/task_suites/curl.yaml +0 -138
  68. package/task_suites/docker.yaml +0 -163
  69. package/task_suites/gh.yaml +0 -118
  70. package/task_suites/jq.yaml +0 -172
  71. package/task_suites/kubectl.yaml +0 -74
package/dist/project.js CHANGED
@@ -2,8 +2,8 @@
2
2
  * Config file loader — discovers and parses cli-bench.yaml,
3
3
  * resolves file:// task references with glob support.
4
4
  */
5
- import { readFile, access } from 'node:fs/promises';
6
- import { join, dirname, resolve } from 'node:path';
5
+ import { readFile, readdir, access, stat, open } from 'node:fs/promises';
6
+ import { join, dirname, resolve, relative } from 'node:path';
7
7
  import { glob } from 'node:fs/promises';
8
8
  import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';
9
9
  import { ConfigFileSchema, TaskFileSchema, TaskSchema } from './schemas.js';
@@ -35,19 +35,23 @@ export async function resolveConfigFile(explicitPath) {
35
35
  }
36
36
  /**
37
37
  * Parse and validate a cli-bench.yaml config file.
38
+ * Returns the parsed config and the raw file content.
38
39
  */
39
40
  export async function loadConfigFile(path) {
40
- const raw = await readFile(path, 'utf-8');
41
- const parsed = parseYaml(raw);
42
- return ConfigFileSchema.parse(parsed);
41
+ const rawContent = await readFile(path, 'utf-8');
42
+ const parsed = parseYaml(rawContent);
43
+ const config = ConfigFileSchema.parse(parsed);
44
+ return { config, rawContent };
43
45
  }
44
46
  /**
45
47
  * Resolve file:// references and inline tasks into a flat task array.
46
48
  * Deduplicates by task ID (first occurrence wins).
49
+ * Also tracks all loaded file paths and their raw contents.
47
50
  */
48
51
  export async function resolveTaskRefs(tasks, baseDir) {
49
52
  const resolved = [];
50
53
  const seenIds = new Set();
54
+ const loadedFiles = new Map();
51
55
  for (const entry of tasks) {
52
56
  if (typeof entry === 'string') {
53
57
  // file:// reference
@@ -68,6 +72,8 @@ export async function resolveTaskRefs(tasks, baseDir) {
68
72
  }
69
73
  for (const filePath of paths) {
70
74
  const raw = await readFile(filePath, 'utf-8');
75
+ const relPath = relative(baseDir, filePath);
76
+ loadedFiles.set(relPath, raw);
71
77
  const parsed = parseYaml(raw);
72
78
  const fileTasks = TaskFileSchema.parse(parsed);
73
79
  for (const task of fileTasks) {
@@ -87,16 +93,109 @@ export async function resolveTaskRefs(tasks, baseDir) {
87
93
  }
88
94
  }
89
95
  }
90
- return resolved;
96
+ return { tasks: resolved, loadedFiles };
97
+ }
98
+ /**
99
+ * Assert that a path exists and is a directory.
100
+ */
101
+ async function assertDirectory(absPath, label) {
102
+ let s;
103
+ try {
104
+ s = await stat(absPath);
105
+ }
106
+ catch {
107
+ throw new Error(`${label}: path does not exist: ${absPath}`);
108
+ }
109
+ if (!s.isDirectory()) {
110
+ throw new Error(`${label}: path is not a directory: ${absPath}`);
111
+ }
112
+ }
113
+ /**
114
+ * Recursively walk a directory, yielding absolute file paths.
115
+ */
116
+ async function* walkDir(dir) {
117
+ const entries = await readdir(dir, { withFileTypes: true });
118
+ for (const entry of entries) {
119
+ const fullPath = join(dir, entry.name);
120
+ if (entry.isDirectory()) {
121
+ yield* walkDir(fullPath);
122
+ }
123
+ else if (entry.isFile()) {
124
+ yield fullPath;
125
+ }
126
+ }
127
+ }
128
+ /**
129
+ * Check if a file is likely a text file by reading the first 512 bytes
130
+ * and looking for null bytes.
131
+ */
132
+ async function isTextFile(filePath) {
133
+ try {
134
+ const fh = await open(filePath, 'r');
135
+ try {
136
+ const buf = Buffer.alloc(512);
137
+ const { bytesRead } = await fh.read(buf, 0, 512, 0);
138
+ for (let i = 0; i < bytesRead; i++) {
139
+ if (buf[i] === 0)
140
+ return false;
141
+ }
142
+ return true;
143
+ }
144
+ finally {
145
+ await fh.close();
146
+ }
147
+ }
148
+ catch {
149
+ return false;
150
+ }
91
151
  }
92
152
  /**
93
153
  * Load config file and resolve all task references.
94
154
  */
95
155
  export async function loadProject(configPath) {
96
- const config = await loadConfigFile(configPath);
156
+ const { config, rawContent } = await loadConfigFile(configPath);
97
157
  const baseDir = dirname(configPath);
98
- const tasks = await resolveTaskRefs(config.tasks, baseDir);
158
+ const { tasks, loadedFiles } = await resolveTaskRefs(config.tasks, baseDir);
159
+ // Serialize before resolving scaffold paths to avoid leaking absolute host paths
99
160
  const taskSuiteContent = stringifyYaml(tasks);
100
- return { config, tasks, taskSuiteContent };
161
+ // Resolve scaffold paths to absolute and validate
162
+ if (config.scaffold) {
163
+ const abs = resolve(baseDir, config.scaffold);
164
+ await assertDirectory(abs, `config scaffold "${config.scaffold}"`);
165
+ config.scaffold = abs;
166
+ }
167
+ for (const task of tasks) {
168
+ if (typeof task.scaffold === 'string') {
169
+ const abs = resolve(baseDir, task.scaffold);
170
+ await assertDirectory(abs, `task "${task.id}" scaffold "${task.scaffold}"`);
171
+ task.scaffold = abs;
172
+ }
173
+ }
174
+ // Build projectFiles map: config file + all loaded task files
175
+ const configFileName = configPath.endsWith('.yml') ? 'cli-bench.yml' : 'cli-bench.yaml';
176
+ const projectFiles = {
177
+ [configFileName]: rawContent,
178
+ };
179
+ for (const [relPath, content] of loadedFiles) {
180
+ projectFiles[relPath] = content;
181
+ }
182
+ // Read all scaffold directories referenced by config or tasks
183
+ const scaffoldDirs = new Set();
184
+ if (config.scaffold)
185
+ scaffoldDirs.add(config.scaffold);
186
+ for (const task of tasks) {
187
+ if (typeof task.scaffold === 'string')
188
+ scaffoldDirs.add(task.scaffold);
189
+ }
190
+ for (const absDir of scaffoldDirs) {
191
+ const relDir = relative(baseDir, absDir);
192
+ for await (const entry of walkDir(absDir)) {
193
+ const relPath = join(relDir, relative(absDir, entry));
194
+ if (!(await isTextFile(entry)))
195
+ continue;
196
+ projectFiles[relPath] = await readFile(entry, 'utf-8');
197
+ }
198
+ }
199
+ return { config, tasks, taskSuiteContent, projectFiles };
101
200
  }
102
201
  //# sourceMappingURL=project.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"project.js","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AACxC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,SAAS,IAAI,aAAa,EAAE,MAAM,MAAM,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAG5E,MAAM,gBAAgB,GAAG,CAAC,gBAAgB,EAAE,eAAe,CAAC,CAAC;AAE7D;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,YAAqB;IAC3D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3B,OAAO,OAAO,CAAC,YAAY,CAAC,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YACxB,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAY;IAC/C,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;IAC9B,OAAO,gBAAgB,CAAC,KAAK,CAAC,MAAM,CAAe,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAwB,EACxB,OAAe;IAEf,MAAM,QAAQ,GAAW,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,KAAK,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,oBAAoB;YACpB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAEnC,gCAAgC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBACxC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBACtD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACpB,CAAC;gBACH,CAAC;gBACD,KAAK,CAAC,IAAI,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC9C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;gBAC9B,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAW,CAAC;gBACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;oBAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;wBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;wBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAS,CAAC;YAC7C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAkB;IAClD,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,UAAU,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACpC,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC3D,MAAM,gBAAgB,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAC9C,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;AAC7C,CAAC"}
1
+ {"version":3,"file":"project.js","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AACzE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC7D,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AACxC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,SAAS,IAAI,aAAa,EAAE,MAAM,MAAM,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAG5E,MAAM,gBAAgB,GAAG,CAAC,gBAAgB,EAAE,eAAe,CAAC,CAAC;AAE7D;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,YAAqB;IAC3D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3B,OAAO,OAAO,CAAC,YAAY,CAAC,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YACxB,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAY;IAC/C,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,gBAAgB,CAAC,KAAK,CAAC,MAAM,CAAe,CAAC;IAC5D,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAwB,EACxB,OAAe;IAEf,MAAM,QAAQ,GAAW,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE9C,KAAK,MAAM,KAAK,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,oBAAoB;YACpB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAEnC,gCAAgC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBACxC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBACtD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACpB,CAAC;gBACH,CAAC;gBACD,KAAK,CAAC,IAAI,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC9C,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;gBAC5C,WAAW,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;gBAC9B,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;gBAC9B,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAW,CAAC;gBACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;oBAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;wBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;wBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAS,CAAC;YAC7C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,eAAe,CAAC,OAAe,EAAE,KAAa;IAC3D,IAAI,CAAC,CAAC;IACN,IAAI,CAAC;QACH,CAAC,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,0BAA0B,OAAO,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,IAAI,CAAC,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,8BAA8B,OAAO,EAAE,CAAC,CAAC;IACnE,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,SAAS,CAAC,CAAC,OAAO,CAAC,GAAW;IACjC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC3B,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1B,MAAM,QAAQ,CAAC;QACjB,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,UAAU,CAAC,QAAgB;IACxC,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC9B,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnC,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;oBAAE,OAAO,KAAK,CAAC;YACjC,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;gBAAS,CAAC;YACT,MAAM,EAAE,CAAC,KAAK,EAAE,CAAC;QACnB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAkB;IAMlD,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,cAAc,CAAC,UAAU,CAAC,CAAC;IAChE,MAAM,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACpC,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAE5E,iFAAiF;IACjF,MAAM,gBAAgB,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAE9C,kDAAkD;IAClD,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,eAAe,CAAC,GAAG,EAAE,oBAAoB,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC;QACnE,MAAM,CAAC,QAAQ,GAAG,GAAG,CAAC;IACxB,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC5C,MAAM,eAAe,CAAC,GAAG,EAAE,SAAS,IAAI,CAAC,EAAE,eAAe,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YAC5E,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC;QACtB,CAAC;IACH,CAAC;IAED,8DAA8D;IAC9D,MAAM,cAAc,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,gBAAgB,CAAC;IACxF,MAAM,YAAY,GAA2B;QAC3C,CAAC,cAAc,CAAC,EAAE,UAAU;KAC7B,CAAC;IACF,KAAK,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,WAAW,EAAE,CAAC;QAC7C,YAAY,CAAC,OAAO,CAAC,GAAG,OAAO,CAAC;IAClC,CAAC;IAED,8DAA8D;IAC9D,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACvC,IAAI,MAAM,CAAC,QAAQ;QAAE,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACvD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ;YAAE,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzE,CAAC;IACD,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACzC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;YACtD,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;gBAAE,SAAS;YACzC,YAAY,CAAC,OAAO,CAAC,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,gBAAgB,EAAE,YAAY,EAAE,CAAC;AAC3D,CAAC"}
package/dist/prompt.d.ts CHANGED
@@ -1,13 +1,7 @@
1
1
  /**
2
2
  * Builds prompts for CLI benchmark agents.
3
- *
4
- * One unified system message for all context modes.
5
- * The user message varies by context mode:
6
- * - zero-shot: CLI name + task intent only
7
- * - help: CLI name + top-level --help output + task intent
8
- * - docs: CLI name + documentation contents + task intent
9
3
  */
10
- import type { ContextMode, Task } from './models.js';
4
+ import type { Task } from './models.js';
11
5
  export declare function buildSystemMessage(customPrompt?: string): string;
12
- export declare function buildUserMessage(cliName: string, task: Task, contextMode: ContextMode, contextPayload?: string | null): string;
6
+ export declare function buildUserMessage(cliName: string, task: Task): string;
13
7
  //# sourceMappingURL=prompt.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AASrD,wBAAgB,kBAAkB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAKhE;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,IAAI,EACV,WAAW,EAAE,WAAW,EACxB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,GAC7B,MAAM,CA+BR"}
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AASxC,wBAAgB,kBAAkB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAKhE;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,GAAG,MAAM,CAEpE"}
package/dist/prompt.js CHANGED
@@ -1,11 +1,5 @@
1
1
  /**
2
2
  * Builds prompts for CLI benchmark agents.
3
- *
4
- * One unified system message for all context modes.
5
- * The user message varies by context mode:
6
- * - zero-shot: CLI name + task intent only
7
- * - help: CLI name + top-level --help output + task intent
8
- * - docs: CLI name + documentation contents + task intent
9
3
  */
10
4
  const AGENT_SYSTEM_MESSAGE = `You are a CLI expert. You have a run_command tool to execute shell commands. Use it to accomplish the user's task.
11
5
 
@@ -19,34 +13,7 @@ export function buildSystemMessage(customPrompt) {
19
13
  }
20
14
  return AGENT_SYSTEM_MESSAGE;
21
15
  }
22
- export function buildUserMessage(cliName, task, contextMode, contextPayload) {
23
- switch (contextMode) {
24
- case 'help':
25
- if (contextPayload) {
26
- return `CLI: ${cliName}
27
-
28
- Help text:
29
- $ ${cliName} --help
30
- ${contextPayload}
31
-
32
- Task: ${task.intent}`;
33
- }
34
- // Fall through to zero-shot if no help available
35
- return `CLI: ${cliName}\n\nTask: ${task.intent}`;
36
- case 'docs':
37
- if (contextPayload) {
38
- return `CLI: ${cliName}
39
-
40
- Documentation:
41
- ${contextPayload}
42
-
43
- Task: ${task.intent}`;
44
- }
45
- // Fall through to zero-shot if no docs available
46
- return `CLI: ${cliName}\n\nTask: ${task.intent}`;
47
- case 'zero-shot':
48
- default:
49
- return `CLI: ${cliName}\n\nTask: ${task.intent}`;
50
- }
16
+ export function buildUserMessage(cliName, task) {
17
+ return `CLI: ${cliName}\n\nTask: ${task.intent}`;
51
18
  }
52
19
  //# sourceMappingURL=prompt.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,MAAM,oBAAoB,GAAG;;;;;gDAKmB,CAAC;AAEjD,MAAM,UAAU,kBAAkB,CAAC,YAAqB;IACtD,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,GAAG,oBAAoB,OAAO,YAAY,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,oBAAoB,CAAC;AAC9B,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,OAAe,EACf,IAAU,EACV,WAAwB,EACxB,cAA8B;IAE9B,QAAQ,WAAW,EAAE,CAAC;QACpB,KAAK,MAAM;YACT,IAAI,cAAc,EAAE,CAAC;gBACnB,OAAO,QAAQ,OAAO;;;IAG1B,OAAO;EACT,cAAc;;QAER,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,CAAC;YACD,iDAAiD;YACjD,OAAO,QAAQ,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC;QAEnD,KAAK,MAAM;YACT,IAAI,cAAc,EAAE,CAAC;gBACnB,OAAO,QAAQ,OAAO;;;EAG5B,cAAc;;QAER,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,CAAC;YACD,iDAAiD;YACjD,OAAO,QAAQ,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC;QAEnD,KAAK,WAAW,CAAC;QACjB;YACE,OAAO,QAAQ,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC;IACrD,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,oBAAoB,GAAG;;;;;gDAKmB,CAAC;AAEjD,MAAM,UAAU,kBAAkB,CAAC,YAAqB;IACtD,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,GAAG,oBAAoB,OAAO,YAAY,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,oBAAoB,CAAC;AAC9B,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,IAAU;IAC1D,OAAO,QAAQ,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC;AACnD,CAAC"}
@@ -1,16 +1,17 @@
1
1
  /**
2
- * Model registry + AI Gateway configuration.
2
+ * Model registry + provider routing.
3
3
  *
4
- * Uses Vercel AI SDK v6 gateway() which is re-exported from 'ai'.
5
- * Model IDs use the gateway format: "provider/model-id".
4
+ * Google models use @ai-sdk/google directly when GOOGLE_GENERATIVE_AI_API_KEY
5
+ * is set, bypassing the gateway. All other models use the Vercel AI Gateway.
6
6
  */
7
+ import { type LanguageModel } from 'ai';
7
8
  export interface ModelEntry {
8
9
  id: string;
9
10
  displayName: string;
10
11
  provider: string;
11
12
  }
12
13
  export declare const MODELS: readonly ModelEntry[];
13
- export declare function getModel(modelId: string): import("@ai-sdk/provider").LanguageModelV3;
14
+ export declare function getModel(modelId: string): LanguageModel;
14
15
  export declare function getModelEntry(modelId: string): ModelEntry | undefined;
15
16
  export declare function filterModels(modelIds: string[]): ModelEntry[];
16
17
  /**
@@ -19,8 +20,9 @@ export declare function filterModels(modelIds: string[]): ModelEntry[];
19
20
  */
20
21
  export declare function resolveProviders(providerIds: string[]): ModelEntry[];
21
22
  /**
22
- * Validate that AI_GATEWAY_API_KEY is set.
23
- * All model calls go through the AI Gateway — no per-provider keys are needed.
23
+ * Validate that the required API keys are set for the given models.
24
+ * Google models can use either GOOGLE_GENERATIVE_AI_API_KEY (direct) or AI_GATEWAY_API_KEY.
25
+ * All other models require AI_GATEWAY_API_KEY.
24
26
  */
25
- export declare function validateGatewayKey(): void;
27
+ export declare function validateApiKeys(modelIds: string[]): void;
26
28
  //# sourceMappingURL=providers.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,eAAO,MAAM,MAAM,EAAE,SAAS,UAAU,EAc9B,CAAC;AAEX,wBAAgB,QAAQ,CAAC,OAAO,EAAE,MAAM,8CAEvC;AAED,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAErE;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,UAAU,EAAE,CAG7D;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,UAAU,EAAE,CAWpE;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,IAAI,IAAI,CAMzC"}
1
+ {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAW,KAAK,aAAa,EAAE,MAAM,IAAI,CAAC;AAGjD,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,eAAO,MAAM,MAAM,EAAE,SAAS,UAAU,EAiB9B,CAAC;AAEX,wBAAgB,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,CAQvD;AAED,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAErE;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,UAAU,EAAE,CAG7D;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,UAAU,EAAE,CAWpE;AAED;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,IAAI,CAgBxD"}
package/dist/providers.js CHANGED
@@ -1,10 +1,11 @@
1
1
  /**
2
- * Model registry + AI Gateway configuration.
2
+ * Model registry + provider routing.
3
3
  *
4
- * Uses Vercel AI SDK v6 gateway() which is re-exported from 'ai'.
5
- * Model IDs use the gateway format: "provider/model-id".
4
+ * Google models use @ai-sdk/google directly when GOOGLE_GENERATIVE_AI_API_KEY
5
+ * is set, bypassing the gateway. All other models use the Vercel AI Gateway.
6
6
  */
7
7
  import { gateway } from 'ai';
8
+ import { createGoogleGenerativeAI } from '@ai-sdk/google';
8
9
  export const MODELS = [
9
10
  { id: 'anthropic/claude-opus-4.6', displayName: 'Claude Opus 4.6', provider: 'anthropic' },
10
11
  { id: 'anthropic/claude-sonnet-4.6', displayName: 'Claude Sonnet 4.6', provider: 'anthropic' },
@@ -14,13 +15,22 @@ export const MODELS = [
14
15
  { id: 'openai/gpt-5.2', displayName: 'GPT-5.2', provider: 'openai' },
15
16
  { id: 'openai/gpt-4o', displayName: 'GPT-4o', provider: 'openai' },
16
17
  { id: 'openai/gpt-4o-mini', displayName: 'GPT-4o Mini', provider: 'openai' },
18
+ { id: 'google/gemini-3-pro', displayName: 'Gemini 3 Pro', provider: 'google' },
17
19
  { id: 'google/gemini-3-flash', displayName: 'Gemini 3 Flash', provider: 'google' },
18
20
  { id: 'google/gemini-2.5-pro', displayName: 'Gemini 2.5 Pro', provider: 'google' },
19
21
  { id: 'google/gemini-2.5-flash', displayName: 'Gemini 2.5 Flash', provider: 'google' },
22
+ { id: 'google/gemini-2.5-flash-lite', displayName: 'Gemini 2.5 Flash Lite', provider: 'google' },
23
+ { id: 'google/gemini-2.0-flash', displayName: 'Gemini 2.0 Flash', provider: 'google' },
20
24
  { id: 'meta/llama-3.1-8b', displayName: 'Llama 3.1 8B', provider: 'meta' },
21
25
  { id: 'mistral/ministral-3b', displayName: 'Ministral 3B', provider: 'mistral' },
22
26
  ];
23
27
  export function getModel(modelId) {
28
+ // Direct Google API — takes priority when key is set
29
+ if (modelId.startsWith('google/') && process.env['GOOGLE_GENERATIVE_AI_API_KEY']) {
30
+ const google = createGoogleGenerativeAI();
31
+ return google(modelId.replace('google/', ''));
32
+ }
33
+ // All other models go through Vercel AI Gateway
24
34
  return gateway(modelId);
25
35
  }
26
36
  export function getModelEntry(modelId) {
@@ -49,12 +59,20 @@ export function resolveProviders(providerIds) {
49
59
  });
50
60
  }
51
61
  /**
52
- * Validate that AI_GATEWAY_API_KEY is set.
53
- * All model calls go through the AI Gateway — no per-provider keys are needed.
62
+ * Validate that the required API keys are set for the given models.
63
+ * Google models can use either GOOGLE_GENERATIVE_AI_API_KEY (direct) or AI_GATEWAY_API_KEY.
64
+ * All other models require AI_GATEWAY_API_KEY.
54
65
  */
55
- export function validateGatewayKey() {
56
- if (!process.env['AI_GATEWAY_API_KEY']) {
57
- throw new Error('Missing AI_GATEWAY_API_KEY. All model calls require the AI Gateway.\nSet AI_GATEWAY_API_KEY in your environment before running cli-bench.');
66
+ export function validateApiKeys(modelIds) {
67
+ const hasGoogleKey = !!process.env['GOOGLE_GENERATIVE_AI_API_KEY'];
68
+ const hasGatewayKey = !!process.env['AI_GATEWAY_API_KEY'];
69
+ const googleModels = modelIds.filter((id) => id.startsWith('google/'));
70
+ const otherModels = modelIds.filter((id) => !id.startsWith('google/'));
71
+ if (googleModels.length > 0 && !hasGoogleKey && !hasGatewayKey) {
72
+ throw new Error('Google models require GOOGLE_GENERATIVE_AI_API_KEY or AI_GATEWAY_API_KEY.');
73
+ }
74
+ if (otherModels.length > 0 && !hasGatewayKey) {
75
+ throw new Error(`Non-Google models (${otherModels.join(', ')}) require AI_GATEWAY_API_KEY.`);
58
76
  }
59
77
  }
60
78
  //# sourceMappingURL=providers.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"providers.js","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAQ7B,MAAM,CAAC,MAAM,MAAM,GAA0B;IAC3C,EAAE,EAAE,EAAE,2BAA2B,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,WAAW,EAAE;IAC1F,EAAE,EAAE,EAAE,6BAA6B,EAAE,WAAW,EAAE,mBAAmB,EAAE,QAAQ,EAAE,WAAW,EAAE;IAC9F,EAAE,EAAE,EAAE,oCAAoC,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,WAAW,EAAE;IACnG,EAAE,EAAE,EAAE,4BAA4B,EAAE,WAAW,EAAE,kBAAkB,EAAE,QAAQ,EAAE,WAAW,EAAE;IAC5F,EAAE,EAAE,EAAE,qCAAqC,EAAE,WAAW,EAAE,kBAAkB,EAAE,QAAQ,EAAE,WAAW,EAAE;IACrG,EAAE,EAAE,EAAE,gBAAgB,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE;IACpE,EAAE,EAAE,EAAE,eAAe,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAClE,EAAE,EAAE,EAAE,oBAAoB,EAAE,WAAW,EAAE,aAAa,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAC5E,EAAE,EAAE,EAAE,uBAAuB,EAAE,WAAW,EAAE,gBAAgB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAClF,EAAE,EAAE,EAAE,uBAAuB,EAAE,WAAW,EAAE,gBAAgB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAClF,EAAE,EAAE,EAAE,yBAAyB,EAAE,WAAW,EAAE,kBAAkB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IACtF,EAAE,EAAE,EAAE,mBAAmB,EAAE,WAAW,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,EAAE;IAC1E,EAAE,EAAE,EAAE,sBAAsB,EAAE,WAAW,EAAE,cAAc,EAAE,QAAQ,EAAE,SAAS,EAAE;CACxE,CAAC;AAEX,MAAM,UAAU,QAAQ,CAAC,OAAe;IACtC,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,OAAe;IAC3C,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,QAAkB;IAC7C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,GAAG,MAAM,CAAC,CAAC;IAC9C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACvD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,WAAqB;IACpD,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE;QAC5B,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QACjD,IAAI,QAAQ;YAAE,OAAO,EAAE,GAAG,QAAQ,EAAE,CAAC;QACrC,MAAM,CAAC,QAAQ,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC1C,OAAO;YACL,EAAE;YACF,WAAW,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;YAC3B,QAAQ,EAAE,QAAkC;SAC7C,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB;IAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,EAAE,CAAC;QACvC,MAAM,IAAI,KAAK,CACb,2IAA2I,CAC5I,CAAC;IACJ,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"providers.js","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,OAAO,EAAsB,MAAM,IAAI,CAAC;AACjD,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAQ1D,MAAM,CAAC,MAAM,MAAM,GAA0B;IAC3C,EAAE,EAAE,EAAE,2BAA2B,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,WAAW,EAAE;IAC1F,EAAE,EAAE,EAAE,6BAA6B,EAAE,WAAW,EAAE,mBAAmB,EAAE,QAAQ,EAAE,WAAW,EAAE;IAC9F,EAAE,EAAE,EAAE,oCAAoC,EAAE,WAAW,EAAE,iBAAiB,EAAE,QAAQ,EAAE,WAAW,EAAE;IACnG,EAAE,EAAE,EAAE,4BAA4B,EAAE,WAAW,EAAE,kBAAkB,EAAE,QAAQ,EAAE,WAAW,EAAE;IAC5F,EAAE,EAAE,EAAE,qCAAqC,EAAE,WAAW,EAAE,kBAAkB,EAAE,QAAQ,EAAE,WAAW,EAAE;IACrG,EAAE,EAAE,EAAE,gBAAgB,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE;IACpE,EAAE,EAAE,EAAE,eAAe,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAClE,EAAE,EAAE,EAAE,oBAAoB,EAAE,WAAW,EAAE,aAAa,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAC5E,EAAE,EAAE,EAAE,qBAAqB,EAAE,WAAW,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAC9E,EAAE,EAAE,EAAE,uBAAuB,EAAE,WAAW,EAAE,gBAAgB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAClF,EAAE,EAAE,EAAE,uBAAuB,EAAE,WAAW,EAAE,gBAAgB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAClF,EAAE,EAAE,EAAE,yBAAyB,EAAE,WAAW,EAAE,kBAAkB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IACtF,EAAE,EAAE,EAAE,8BAA8B,EAAE,WAAW,EAAE,uBAAuB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IAChG,EAAE,EAAE,EAAE,yBAAyB,EAAE,WAAW,EAAE,kBAAkB,EAAE,QAAQ,EAAE,QAAQ,EAAE;IACtF,EAAE,EAAE,EAAE,mBAAmB,EAAE,WAAW,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,EAAE;IAC1E,EAAE,EAAE,EAAE,sBAAsB,EAAE,WAAW,EAAE,cAAc,EAAE,QAAQ,EAAE,SAAS,EAAE;CACxE,CAAC;AAEX,MAAM,UAAU,QAAQ,CAAC,OAAe;IACtC,qDAAqD;IACrD,IAAI,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,EAAE,CAAC;QACjF,MAAM,MAAM,GAAG,wBAAwB,EAAE,CAAC;QAC1C,OAAO,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC;IAChD,CAAC;IACD,gDAAgD;IAChD,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,OAAe;IAC3C,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,QAAkB;IAC7C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,GAAG,MAAM,CAAC,CAAC;IAC9C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACvD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,WAAqB;IACpD,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE;QAC5B,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QACjD,IAAI,QAAQ;YAAE,OAAO,EAAE,GAAG,QAAQ,EAAE,CAAC;QACrC,MAAM,CAAC,QAAQ,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC1C,OAAO;YACL,EAAE;YACF,WAAW,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;YAC3B,QAAQ,EAAE,QAAkC;SAC7C,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,QAAkB;IAChD,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;IACnE,MAAM,aAAa,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IAC1D,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;IACvE,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;IAEvE,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,aAAa,EAAE,CAAC;QAC/D,MAAM,IAAI,KAAK,CACb,2EAA2E,CAC5E,CAAC;IACJ,CAAC;IACD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CACb,sBAAsB,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,+BAA+B,CAC5E,CAAC;IACJ,CAAC;AACH,CAAC"}
package/dist/runner.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Orchestrates the task x model x contextMode matrix with tool-calling agents.
2
+ * Orchestrates the task x model matrix with tool-calling agents.
3
3
  *
4
4
  * For each task:
5
5
  * - Create temp workdir (or use configured one)
@@ -9,11 +9,29 @@
9
9
  * - After maxSteps or completion: run assertions
10
10
  * - Aggregate into GridReport
11
11
  */
12
+ import { type LanguageModel } from 'ai';
12
13
  import type { TaskSuite, Task, GridReport } from './models.js';
13
14
  import type { Config } from './config.js';
14
15
  import type { ModelEntry } from './providers.js';
15
16
  export declare function loadTaskSuite(filePath: string): Promise<TaskSuite>;
16
- export declare function discoverTaskSuites(suiteDir: string): Promise<Map<string, string>>;
17
+ /**
18
+ * Replace {{workdir}} in a string with the actual path.
19
+ */
20
+ export declare function resolveTemplate(s: string, vars: {
21
+ workdir: string;
22
+ }): string;
23
+ /**
24
+ * Resolve env var templates: replace {{workdir}} with actual path.
25
+ */
26
+ export declare function resolveEnv(env: Record<string, string> | undefined, vars: {
27
+ workdir: string;
28
+ }): Record<string, string> | undefined;
29
+ /**
30
+ * Resolve {{workdir}} in an array of command strings.
31
+ */
32
+ export declare function resolveCommands(commands: string[], vars: {
33
+ workdir: string;
34
+ }): string[];
17
35
  export interface RunGridOptions {
18
36
  config: Config;
19
37
  /** Override tasks (from config file mode). */
@@ -37,12 +55,22 @@ export interface RunGridOptions {
37
55
  githubUrl?: string;
38
56
  /** Serialized YAML of the resolved task suite. */
39
57
  taskSuiteContent?: string;
40
- /** Directory containing the config file (for resolving docs.md). */
41
- configDir?: string;
42
58
  /** Env var names whose values should be redacted from results. */
43
59
  redactEnvVars?: string[];
44
60
  /** Regex patterns to redact from results (e.g. API-echoed account IDs). */
45
61
  redactPatterns?: string[];
62
+ /** Config-level env vars (merged with per-task env, supports {{workdir}}). */
63
+ fileEnv?: Record<string, string>;
64
+ /** Config-level setup commands (run before each task). */
65
+ fileSetup?: string[];
66
+ /** Config-level cleanup commands (run after each task). */
67
+ fileCleanup?: string[];
68
+ /** Config-level scaffold directory (copied into workdir before setup). */
69
+ fileScaffold?: string;
70
+ /** Original project files (config + task files) for upload. */
71
+ projectFiles?: Record<string, string>;
72
+ /** Override the language model (for testing with mock models). */
73
+ modelOverride?: LanguageModel;
46
74
  }
47
75
  export declare function runGrid(opts: RunGridOptions): Promise<GridReport[]>;
48
76
  export declare function uploadReport(report: GridReport, backendUrl: string, apiKey: string): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AASH,OAAO,KAAK,EACV,SAAS,EACT,IAAI,EACJ,UAAU,EAKX,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAUjD,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAKxE;AAED,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAc9B;AAiOD,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+CAA+C;IAC/C,MAAM,CAAC,EAAE,UAAU,EAAE,CAAC;IACtB,wDAAwD;IACxD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mDAAmD;IACnD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kDAAkD;IAClD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,oEAAoE;IACpE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kEAAkE;IAClE,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,2EAA2E;IAC3E,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,wBAAsB,OAAO,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAsEzE;AA+PD,wBAAsB,YAAY,CAChC,MAAM,EAAE,UAAU,EAClB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,IAAI,CAAC,CAsBf"}
1
+ {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,OAAO,EAA6B,KAAK,aAAa,EAAE,MAAM,IAAI,CAAC;AAGnE,OAAO,KAAK,EACV,SAAS,EACT,IAAI,EACJ,UAAU,EAIX,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AASjD,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAKxE;AAGD;;GAEG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GAAG,MAAM,CAE5E;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,SAAS,EACvC,IAAI,EAAE;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACxB,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,SAAS,CAOpC;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,MAAM,EAAE,EAClB,IAAI,EAAE;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACxB,MAAM,EAAE,CAEV;AAkOD,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+CAA+C;IAC/C,MAAM,CAAC,EAAE,UAAU,EAAE,CAAC;IACtB,wDAAwD;IACxD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mDAAmD;IACnD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kDAAkD;IAClD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,kEAAkE;IAClE,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,2EAA2E;IAC3E,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,8EAA8E;IAC9E,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,2DAA2D;IAC3D,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,0EAA0E;IAC1E,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,+DAA+D;IAC/D,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,kEAAkE;IAClE,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B;AAED,wBAAsB,OAAO,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAmCzE;AA8RD,wBAAsB,YAAY,CAChC,MAAM,EAAE,UAAU,EAClB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,IAAI,CAAC,CAsBf"}