@cliwatch/cli-bench 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -11,11 +11,13 @@
11
11
  * 3. Init mode: scaffold cli-bench.yaml
12
12
  */
13
13
  import { writeFile } from 'node:fs/promises';
14
+ import { dirname } from 'node:path';
14
15
  import { parseArgs } from './config.js';
15
16
  import { runGrid, uploadReport } from './runner.js';
16
17
  import { resolveConfigFile, loadProject } from './project.js';
17
18
  import { scaffoldProject } from './init.js';
18
19
  import { validateGatewayKey, resolveProviders } from './providers.js';
20
+ import { CONTEXT_MODES } from './models.js';
19
21
  import { checkThresholds, printThresholdResults } from './thresholds.js';
20
22
  async function main() {
21
23
  const config = parseArgs(process.argv);
@@ -41,15 +43,15 @@ async function main() {
41
43
  if (configPath) {
42
44
  // Config file mode
43
45
  console.log(`Config: ${configPath}`);
44
- const { config: fileConfig, tasks } = await loadProject(configPath);
46
+ const { config: fileConfig, tasks, taskSuiteContent } = await loadProject(configPath);
45
47
  thresholdsConfig = fileConfig.thresholds;
46
48
  // Merge CLI args with file config
47
49
  const providers = config.models.length > 0
48
50
  ? config.models
49
51
  : fileConfig.providers ?? ['anthropic/claude-sonnet-4-20250514'];
50
- const helpModes = fileConfig.help_modes
51
- ? fileConfig.help_modes.filter((s) => ['injected', 'discoverable', 'none'].includes(s))
52
- : config.helpModes;
52
+ const contextModes = fileConfig.context
53
+ ? fileConfig.context.filter((s) => CONTEXT_MODES.includes(s))
54
+ : config.contextModes;
53
55
  const concurrency = fileConfig.concurrency ?? config.concurrency;
54
56
  // Determine upload behavior
55
57
  const uploadMode = fileConfig.upload ?? 'auto';
@@ -59,7 +61,7 @@ async function main() {
59
61
  console.log(`CLI: ${fileConfig.cli}`);
60
62
  console.log(`Providers: ${providers.join(', ')}`);
61
63
  console.log(`Tasks: ${tasks.length}`);
62
- console.log(`Help modes: ${helpModes.join(', ')}`);
64
+ console.log(`Context: ${contextModes.join(', ')}`);
63
65
  console.log(`Dry run: ${config.dryRun}`);
64
66
  // Validate gateway key before running
65
67
  if (!config.dryRun) {
@@ -68,7 +70,7 @@ async function main() {
68
70
  const models = resolveProviders(providers);
69
71
  const globalRepeat = config.repeat ?? fileConfig.repeat;
70
72
  reports = await runGrid({
71
- config: { ...config, concurrency, helpModes },
73
+ config: { ...config, concurrency, contextModes },
72
74
  tasks,
73
75
  cliName: fileConfig.cli,
74
76
  models,
@@ -76,6 +78,12 @@ async function main() {
76
78
  workdir: fileConfig.workdir ?? config.workdir,
77
79
  globalRepeat,
78
80
  systemPrompt: fileConfig.system_prompt,
81
+ displayName: fileConfig.display_name,
82
+ category: fileConfig.category,
83
+ websiteUrl: fileConfig.website_url,
84
+ githubUrl: fileConfig.github_url,
85
+ taskSuiteContent,
86
+ configDir: dirname(configPath),
79
87
  });
80
88
  // Check thresholds before upload so results are included in the payload
81
89
  if (thresholdsConfig && reports.length > 0 && !config.dryRun) {
@@ -106,7 +114,7 @@ async function main() {
106
114
  // Legacy task_suites/ discovery mode
107
115
  console.log(`Filter: ${config.filter.length > 0 ? config.filter.join(', ') : 'all'}`);
108
116
  console.log(`Models: ${config.models.length > 0 ? config.models.join(', ') : 'all'}`);
109
- console.log(`Help modes: ${config.helpModes.join(', ')}`);
117
+ console.log(`Context: ${config.contextModes.join(', ')}`);
110
118
  console.log(`Dry run: ${config.dryRun}`);
111
119
  if (!config.dryRun && config.models.length > 0) {
112
120
  validateGatewayKey();
@@ -133,7 +141,7 @@ async function main() {
133
141
  console.log('\n=== Final Summary ===');
134
142
  for (const report of reports) {
135
143
  for (const mr of report.modelResults) {
136
- console.log(`${report.cli} x ${mr.displayName} [${mr.helpMode}]: ${(mr.passRate * 100).toFixed(0)}% pass, avgTurns=${mr.avgTurnsToSuccess.toFixed(1)}`);
144
+ console.log(`${report.cli} x ${mr.displayName} [${mr.contextMode}]: ${(mr.passRate * 100).toFixed(0)}% pass, avgTurns=${mr.avgTurnsToSuccess.toFixed(1)}`);
137
145
  }
138
146
  }
139
147
  }
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAgB,MAAM,gBAAgB,CAAC;AAEpF,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAC;AAEzE,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAEvC,gCAAgC;IAChC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,oFAAoF,CAAC,CAAC;QACpG,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAE1C,4BAA4B;IAC5B,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAE9D,IAAI,OAAO,CAAC;IACZ,IAAI,gBAAoE,CAAC;IACzE,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,IAAI,UAAU,EAAE,CAAC;QACf,mBAAmB;QACnB,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,EAAE,CAAC,CAAC;QACrC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC;QACpE,gBAAgB,GAAG,UAAU,CAAC,UAAU,CAAC;QAEzC,kCAAkC;QAClC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,MAAM,CAAC,MAAM;YACf,CAAC,CAAC,UAAU,CAAC,SAAS,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACnE,MAAM,SAAS,GAAG,UAAU,CAAC,UAAU;YACrC,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAiB,EAAE,CAAC,CAAC,UAAU,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YACtG,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC;QACrB,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;QAEjE,4BAA4B;QAC5B,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC;QAC/C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM;eAC7B,UAAU,KAAK,QAAQ;eACvB,CAAC,UAAU,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEhD,OAAO,CAAC,GAAG,CAAC,QAAQ,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,eAAe,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnD,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,sCAAsC;QACtC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,kBAAkB,EAAE,CAAC;QACvB,CAAC;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAE3C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC;QAExD,OAAO,GAAG,MAAM,OAAO,CAAC;YACtB,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE;YAC7C,KAAK;YACL,OAAO,EAAE,UAAU,CAAC,GAAG;YACvB,MAAM;YACN,cAAc,EAAE,UAAU,CAAC,eAAe;YAC1C,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO;YAC7C,YAAY;YACZ,YAAY,EAAE,UAAU,CAAC,aAAa;SACvC,CAAC,CAAC;QAEH,wEAAwE;QACxE,IAAI,gBAAgB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,eAAe,CAC3B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,EACtC,gBAAgB,CACjB,CAAC;YACF,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAC7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAClC,CAAC;YACD,+DAA+D;YAC/D,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACnD,eAAe,GAAG,IAAI,CAAC;YACzB,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,IAAI,YAAY,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,UAAU,CAAC;YAC/D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBACxD,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,qCAAqC;QACrC,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACtF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACtF,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/C,kBAAkB,EAAE,CAAC;QACvB,CAAC;QAED,OAAO,GAAG,MAAM,OAAO,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEjE,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC/D,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAC3B,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,EAC3C,IAAI,EACJ,CAAC,CACF,CAAC;QACF,MAAM,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,eAAe;IACf,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;QACvC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CACT,GAAG,MAAM,CAAC,GAAG,MAAM,EAAE,CAAC,WAAW,KAAK,EAAE,CAAC,QAAQ,MAAM,CAAC,EAAE,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC3I,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,IAAI,eAAe,EAAE,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACjB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAgB,MAAM,gBAAgB,CAAC;AACpF,OAAO,EAAoB,aAAa,EAAE,MAAM,aAAa,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAC;AAEzE,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAEvC,gCAAgC;IAChC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,oFAAoF,CAAC,CAAC;QACpG,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAE1C,4BAA4B;IAC5B,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAE9D,IAAI,OAAO,CAAC;IACZ,IAAI,gBAAoE,CAAC;IACzE,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,IAAI,UAAU,EAAE,CAAC;QACf,mBAAmB;QACnB,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,EAAE,CAAC,CAAC;QACrC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,gBAAgB,EAAE,GAAG,MAAM,WAAW,CAAC,UAAU,CAAC,CAAC;QACtF,gBAAgB,GAAG,UAAU,CAAC,UAAU,CAAC;QAEzC,kCAAkC;QAClC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,MAAM,CAAC,MAAM;YACf,CAAC,CAAC,UAAU,CAAC,SAAS,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACnE,MAAM,YAAY,GAAG,UAAU,CAAC,OAAO;YACrC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAoB,EAAE,CAAE,aAA0B,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC7F,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;QACxB,MAAM,WAAW,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;QAEjE,4BAA4B;QAC5B,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC;QAC/C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM;eAC7B,UAAU,KAAK,QAAQ;eACvB,CAAC,UAAU,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEhD,OAAO,CAAC,GAAG,CAAC,QAAQ,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,YAAY,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnD,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,sCAAsC;QACtC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,kBAAkB,EAAE,CAAC;QACvB,CAAC;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAE3C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC;QAExD,OAAO,GAAG,MAAM,OAAO,CAAC;YACtB,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE;YAChD,KAAK;YACL,OAAO,EAAE,UAAU,CAAC,GAAG;YACvB,MAAM;YACN,cAAc,EAAE,UAAU,CAAC,eAAe;YAC1C,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO;YAC7C,YAAY;YACZ,YAAY,EAAE,UAAU,CAAC,aAAa;YACtC,WAAW,EAAE,UAAU,CAAC,YAAY;YACpC,QAAQ,EAAE,UAAU,CAAC,QAAQ;YAC7B,UAAU,EAAE,UAAU,CAAC,WAAW;YAClC,SAAS,EAAE,UAAU,CAAC,UAAU;YAChC,gBAAgB;YAChB,SAAS,EAAE,OAAO,CAAC,UAAU,CAAC;SAC/B,CAAC,CAAC;QAEH,wEAAwE;QACxE,IAAI,gBAAgB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,eAAe,CAC3B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,EACtC,gBAAgB,CACjB,CAAC;YACF,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAC7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAClC,CAAC;YACD,+DAA+D;YAC/D,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACnD,eAAe,GAAG,IAAI,CAAC;YACzB,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,IAAI,YAAY,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,IAAI,MAAM,CAAC,UAAU,CAAC;YAC/D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBACxD,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,qCAAqC;QACrC,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACtF,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACtF,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/C,kBAAkB,EAAE,CAAC;QACvB,CAAC;QAED,OAAO,GAAG,MAAM,OAAO,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAEjE,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC/D,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CACX,+BAA+B,MAAM,CAAC,GAAG,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CACnF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAC3B,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,EAC3C,IAAI,EACJ,CAAC,CACF,CAAC;QACF,MAAM,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,eAAe;IACf,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;QACvC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CACT,GAAG,MAAM,CAAC,GAAG,MAAM,EAAE,CAAC,WAAW,KAAK,EAAE,CAAC,WAAW,MAAM,CAAC,EAAE,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,oBAAoB,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC9I,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,IAAI,eAAe,EAAE,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;IACjB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACjB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
package/dist/init.js CHANGED
@@ -17,7 +17,7 @@ providers:
17
17
  # - google/gemini-2.5-pro
18
18
 
19
19
  # Optional settings
20
- # help_modes: [injected] # injected | discoverable | none
20
+ # context: [zero-shot] # zero-shot | help | docs
21
21
  # concurrency: 3 # max concurrent API calls
22
22
  # workdir: ./workspace # working directory for commands (default: temp dir)
23
23
  # upload: auto # auto | always | never (auto uploads if CLIWATCH_API_KEY is set)
package/dist/models.d.ts CHANGED
@@ -4,7 +4,8 @@
4
4
  * Assertion-based evaluation: tasks define assertions that are checked
5
5
  * against the agent's execution trace.
6
6
  */
7
- export type HelpMode = 'injected' | 'discoverable' | 'none';
7
+ export type ContextMode = 'zero-shot' | 'help' | 'docs';
8
+ export declare const CONTEXT_MODES: ContextMode[];
8
9
  export type Assertion = {
9
10
  output_contains: string;
10
11
  } | {
@@ -56,6 +57,10 @@ export interface Task {
56
57
  export interface TaskSuite {
57
58
  cli: string;
58
59
  version_command?: string;
60
+ display_name?: string;
61
+ category?: string;
62
+ website_url?: string;
63
+ github_url?: string;
59
64
  tasks: Task[];
60
65
  }
61
66
  export type UploadMode = 'auto' | 'always' | 'never';
@@ -68,8 +73,12 @@ export interface ThresholdsConfig {
68
73
  export interface ConfigFile {
69
74
  cli: string;
70
75
  version_command?: string;
76
+ display_name?: string;
77
+ category?: string;
78
+ website_url?: string;
79
+ github_url?: string;
71
80
  providers?: string[];
72
- help_modes?: string[];
81
+ context?: string[];
73
82
  system_prompt?: string;
74
83
  concurrency?: number;
75
84
  workdir?: string;
@@ -107,7 +116,7 @@ export interface ModelResult {
107
116
  provider: Provider;
108
117
  modelId: string;
109
118
  displayName: string;
110
- helpMode: HelpMode;
119
+ contextMode: ContextMode;
111
120
  taskResults: TaskEval[];
112
121
  passRate: number;
113
122
  avgTurnsToSuccess: number;
@@ -137,6 +146,10 @@ export interface GridReport {
137
146
  generatedAt: string;
138
147
  modelResults: ModelResult[];
139
148
  systemPrompt?: string;
149
+ displayName?: string;
150
+ category?: string;
151
+ websiteUrl?: string;
152
+ githubUrl?: string;
140
153
  gitSha?: string;
141
154
  gitRef?: string;
142
155
  gitHeadRef?: string;
@@ -150,6 +163,7 @@ export interface GridReport {
150
163
  repository?: string;
151
164
  tags?: string[];
152
165
  taskSuiteHash?: string;
166
+ taskSuiteContent?: string;
153
167
  thresholdResults?: ThresholdCheckResult;
154
168
  }
155
169
  export interface HelpCache {
@@ -1 +1 @@
1
- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,MAAM,QAAQ,GAAG,UAAU,GAAG,cAAc,GAAG,MAAM,CAAC;AAM5D,MAAM,MAAM,SAAS,GACjB;IAAE,eAAe,EAAE,MAAM,CAAA;CAAE,GAC3B;IAAE,aAAa,EAAE,MAAM,CAAA;CAAE,GACzB;IAAE,cAAc,EAAE,MAAM,CAAA;CAAE,GAC1B;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GACrB;IAAE,WAAW,EAAE,MAAM,CAAA;CAAE,GACvB;IAAE,aAAa,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GACjD;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GACf;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACnB;IAAE,SAAS,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GAC9D;IAAE,MAAM,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAElF,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,SAAS,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAMD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;AAErD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,GAAG,eAAe,CAAC;CACtC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC;CAC1B;AAMD,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC;AAE9B,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAMD,MAAM,WAAW,QAAQ;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,OAAO,EAAE,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACzC,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,WAAW,EAAE,QAAQ,EAAE,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,QAAQ,EAAE,OAAO,GAAG,eAAe,CAAC;CACrC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,WAAW,EAAE,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,oBAAoB,CAAC;CACzC;AAMD,MAAM,WAAW,SAAS;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB"}
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,MAAM,GAAG,MAAM,CAAC;AACxD,eAAO,MAAM,aAAa,EAAE,WAAW,EAAkC,CAAC;AAM1E,MAAM,MAAM,SAAS,GACjB;IAAE,eAAe,EAAE,MAAM,CAAA;CAAE,GAC3B;IAAE,aAAa,EAAE,MAAM,CAAA;CAAE,GACzB;IAAE,cAAc,EAAE,MAAM,CAAA;CAAE,GAC1B;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GACrB;IAAE,WAAW,EAAE,MAAM,CAAA;CAAE,GACvB;IAAE,aAAa,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GACjD;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GACf;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACnB;IAAE,SAAS,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GAC9D;IAAE,MAAM,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAElF,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,SAAS,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAMD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;AAErD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,GAAG,eAAe,CAAC;CACtC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC;CAC1B;AAMD,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC;AAE9B,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAMD,MAAM,WAAW,QAAQ;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,OAAO,EAAE,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACzC,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,WAAW,CAAC;IACzB,WAAW,EAAE,QAAQ,EAAE,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,QAAQ,EAAE,OAAO,GAAG,eAAe,CAAC;CACrC;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,WAAW,EAAE,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,gBAAgB,CAAC,EAAE,oBAAoB,CAAC;CACzC;AAMD,MAAM,WAAW,SAAS;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB"}
package/dist/models.js CHANGED
@@ -4,5 +4,5 @@
4
4
  * Assertion-based evaluation: tasks define assertions that are checked
5
5
  * against the agent's execution trace.
6
6
  */
7
- export {};
7
+ export const CONTEXT_MODES = ['zero-shot', 'help', 'docs'];
8
8
  //# sourceMappingURL=models.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
1
+ {"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAOH,MAAM,CAAC,MAAM,aAAa,GAAkB,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC"}
package/dist/project.d.ts CHANGED
@@ -22,5 +22,6 @@ export declare function resolveTaskRefs(tasks: (Task | string)[], baseDir: strin
22
22
  export declare function loadProject(configPath: string): Promise<{
23
23
  config: ConfigFile;
24
24
  tasks: Task[];
25
+ taskSuiteContent: string;
25
26
  }>;
26
27
  //# sourceMappingURL=project.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"project.d.ts","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIpD;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAqBrF;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAItE;AAED;;;GAGG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,EACxB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,IAAI,EAAE,CAAC,CA6CjB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,IAAI,EAAE,CAAA;CAAE,CAAC,CAKpG"}
1
+ {"version":3,"file":"project.d.ts","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIpD;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAqBrF;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAItE;AAED;;;GAGG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,EACxB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,IAAI,EAAE,CAAC,CA6CjB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,IAAI,EAAE,CAAC;IAAC,gBAAgB,EAAE,MAAM,CAAA;CAAE,CAAC,CAM9H"}
package/dist/project.js CHANGED
@@ -5,7 +5,7 @@
5
5
  import { readFile, access } from 'node:fs/promises';
6
6
  import { join, dirname, resolve } from 'node:path';
7
7
  import { glob } from 'node:fs/promises';
8
- import { parse as parseYaml } from 'yaml';
8
+ import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';
9
9
  import { ConfigFileSchema, TaskFileSchema, TaskSchema } from './schemas.js';
10
10
  const CONFIG_FILENAMES = ['cli-bench.yaml', 'cli-bench.yml'];
11
11
  /**
@@ -96,6 +96,7 @@ export async function loadProject(configPath) {
96
96
  const config = await loadConfigFile(configPath);
97
97
  const baseDir = dirname(configPath);
98
98
  const tasks = await resolveTaskRefs(config.tasks, baseDir);
99
- return { config, tasks };
99
+ const taskSuiteContent = stringifyYaml(tasks);
100
+ return { config, tasks, taskSuiteContent };
100
101
  }
101
102
  //# sourceMappingURL=project.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"project.js","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AACxC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAG5E,MAAM,gBAAgB,GAAG,CAAC,gBAAgB,EAAE,eAAe,CAAC,CAAC;AAE7D;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,YAAqB;IAC3D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3B,OAAO,OAAO,CAAC,YAAY,CAAC,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YACxB,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAY;IAC/C,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;IAC9B,OAAO,gBAAgB,CAAC,KAAK,CAAC,MAAM,CAAe,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAwB,EACxB,OAAe;IAEf,MAAM,QAAQ,GAAW,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,KAAK,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,oBAAoB;YACpB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAEnC,gCAAgC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBACxC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBACtD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACpB,CAAC;gBACH,CAAC;gBACD,KAAK,CAAC,IAAI,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC9C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;gBAC9B,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAW,CAAC;gBACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;oBAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;wBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;wBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAS,CAAC;YAC7C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAkB;IAClD,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,UAAU,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACpC,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC3D,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;AAC3B,CAAC"}
1
+ {"version":3,"file":"project.js","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AACxC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,SAAS,IAAI,aAAa,EAAE,MAAM,MAAM,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAG5E,MAAM,gBAAgB,GAAG,CAAC,gBAAgB,EAAE,eAAe,CAAC,CAAC;AAE7D;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,YAAqB;IAC3D,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;YAC3B,OAAO,OAAO,CAAC,YAAY,CAAC,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YACxB,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAY;IAC/C,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;IAC9B,OAAO,gBAAgB,CAAC,KAAK,CAAC,MAAM,CAAe,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAwB,EACxB,OAAe;IAEf,MAAM,QAAQ,GAAW,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,KAAK,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,oBAAoB;YACpB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAEnC,gCAAgC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBACxC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBACtD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACpB,CAAC;gBACH,CAAC;gBACD,KAAK,CAAC,IAAI,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC9C,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;gBAC9B,MAAM,SAAS,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAW,CAAC;gBACzD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;oBAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;wBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;wBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAS,CAAC;YAC7C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAkB;IAClD,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,UAAU,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACpC,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC3D,MAAM,gBAAgB,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAC9C,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;AAC7C,CAAC"}
package/dist/prompt.d.ts CHANGED
@@ -1,12 +1,13 @@
1
1
  /**
2
- * Builds prompts from help text + task definition.
2
+ * Builds prompts for CLI benchmark agents.
3
3
  *
4
- * Three help modes:
5
- * - injected: help text included in prompt (current behavior)
6
- * - discoverable: agent must run --help to discover commands
7
- * - none: agent relies on training knowledge only
4
+ * One unified system message for all context modes.
5
+ * The user message varies by context mode:
6
+ * - zero-shot: CLI name + task intent only
7
+ * - help: CLI name + top-level --help output + task intent
8
+ * - docs: CLI name + documentation contents + task intent
8
9
  */
9
- import type { HelpMode, Task } from './models.js';
10
- export declare function buildSystemMessage(helpMode: HelpMode, customPrompt?: string): string;
11
- export declare function buildUserMessage(cliName: string, helpTexts: Record<string, string> | null, task: Task, helpMode: HelpMode): string;
10
+ import type { ContextMode, Task } from './models.js';
11
+ export declare function buildSystemMessage(customPrompt?: string): string;
12
+ export declare function buildUserMessage(cliName: string, task: Task, contextMode: ContextMode, contextPayload?: string | null): string;
12
13
  //# sourceMappingURL=prompt.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AA6BlD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAkBpF;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,EACxC,IAAI,EAAE,IAAI,EACV,QAAQ,EAAE,QAAQ,GACjB,MAAM,CAaR"}
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AASrD,wBAAgB,kBAAkB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAKhE;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,IAAI,EACV,WAAW,EAAE,WAAW,EACxB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,GAC7B,MAAM,CA+BR"}
package/dist/prompt.js CHANGED
@@ -1,96 +1,52 @@
1
1
  /**
2
- * Builds prompts from help text + task definition.
2
+ * Builds prompts for CLI benchmark agents.
3
3
  *
4
- * Three help modes:
5
- * - injected: help text included in prompt (current behavior)
6
- * - discoverable: agent must run --help to discover commands
7
- * - none: agent relies on training knowledge only
4
+ * One unified system message for all context modes.
5
+ * The user message varies by context mode:
6
+ * - zero-shot: CLI name + task intent only
7
+ * - help: CLI name + top-level --help output + task intent
8
+ * - docs: CLI name + documentation contents + task intent
8
9
  */
9
10
  const AGENT_SYSTEM_MESSAGE = `You are a CLI expert. You have a run_command tool to execute shell commands. Use it to accomplish the user's task.
10
11
 
11
12
  Rules:
12
- - Read the help text carefully to understand available subcommands and flags
13
13
  - Execute commands using the run_command tool
14
14
  - If a command fails, read the error and retry with corrected flags
15
- - Do NOT invent flags that don't exist in the help text
16
15
  - When the task is complete, stop calling tools`;
17
- const AGENT_SYSTEM_MESSAGE_DISCOVERABLE = `You are a CLI expert. You have a run_command tool to execute shell commands. Use it to accomplish the user's task.
18
-
19
- Rules:
20
- - Use <cli> --help and <cli> <subcommand> --help to discover available commands and flags
21
- - Execute commands using the run_command tool
22
- - If a command fails, read the error and retry with corrected flags
23
- - Do NOT invent flags — always check --help first
24
- - When the task is complete, stop calling tools`;
25
- const AGENT_SYSTEM_MESSAGE_NONE = `You are a CLI expert. You have a run_command tool to execute shell commands. Use it to accomplish the user's task.
26
-
27
- Rules:
28
- - Use your training knowledge of CLI tools to construct commands
29
- - Execute commands using the run_command tool
30
- - If a command fails, read the error and retry with corrected flags
31
- - Do NOT run --help commands — rely on your knowledge
32
- - When the task is complete, stop calling tools`;
33
- export function buildSystemMessage(helpMode, customPrompt) {
34
- let base;
35
- switch (helpMode) {
36
- case 'injected':
37
- base = AGENT_SYSTEM_MESSAGE;
38
- break;
39
- case 'discoverable':
40
- base = AGENT_SYSTEM_MESSAGE_DISCOVERABLE;
41
- break;
42
- case 'none':
43
- base = AGENT_SYSTEM_MESSAGE_NONE;
44
- break;
45
- }
16
+ export function buildSystemMessage(customPrompt) {
46
17
  if (customPrompt) {
47
- return `${base}\n\n${customPrompt}`;
18
+ return `${AGENT_SYSTEM_MESSAGE}\n\n${customPrompt}`;
48
19
  }
49
- return base;
20
+ return AGENT_SYSTEM_MESSAGE;
50
21
  }
51
- export function buildUserMessage(cliName, helpTexts, task, helpMode) {
52
- if (helpMode === 'injected' && helpTexts) {
53
- const relevantHelp = selectRelevantHelp(cliName, helpTexts, task);
54
- return `CLI: ${cliName}
22
+ export function buildUserMessage(cliName, task, contextMode, contextPayload) {
23
+ switch (contextMode) {
24
+ case 'help':
25
+ if (contextPayload) {
26
+ return `CLI: ${cliName}
55
27
 
56
28
  Help text:
57
- ${relevantHelp}
29
+ $ ${cliName} --help
30
+ ${contextPayload}
58
31
 
59
32
  Task: ${task.intent}`;
60
- }
61
- return `CLI: ${cliName}
33
+ }
34
+ // Fall through to zero-shot if no help available
35
+ return `CLI: ${cliName}\n\nTask: ${task.intent}`;
36
+ case 'docs':
37
+ if (contextPayload) {
38
+ return `CLI: ${cliName}
39
+
40
+ Documentation:
41
+ ${contextPayload}
62
42
 
63
43
  Task: ${task.intent}`;
64
- }
65
- /**
66
- * Select the most relevant help text sections for a task.
67
- * Trims to stay within reasonable context limits (~4K chars).
68
- */
69
- function selectRelevantHelp(cliName, helpTexts, task) {
70
- const sections = [];
71
- const maxChars = 4000;
72
- let totalChars = 0;
73
- // Always include root help
74
- const rootHelp = helpTexts[''];
75
- if (rootHelp) {
76
- sections.push(`$ ${cliName} --help\n${rootHelp}`);
77
- totalChars += rootHelp.length;
78
- }
79
- // Add help sections that match task keywords
80
- if (totalChars < maxChars * 0.7) {
81
- const taskWords = task.intent.toLowerCase().split(/\s+/);
82
- for (const [key, help] of Object.entries(helpTexts)) {
83
- if (key === '' || sections.some((s) => s.includes(`${cliName} ${key} --help`))) {
84
- continue;
85
- }
86
- const keyWords = key.split(' ');
87
- const matches = keyWords.some((kw) => taskWords.includes(kw));
88
- if (matches && totalChars + help.length < maxChars) {
89
- sections.push(`$ ${cliName} ${key} --help\n${help}`);
90
- totalChars += help.length;
91
44
  }
92
- }
45
+ // Fall through to zero-shot if no docs available
46
+ return `CLI: ${cliName}\n\nTask: ${task.intent}`;
47
+ case 'zero-shot':
48
+ default:
49
+ return `CLI: ${cliName}\n\nTask: ${task.intent}`;
93
50
  }
94
- return sections.join('\n\n');
95
51
  }
96
52
  //# sourceMappingURL=prompt.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,MAAM,oBAAoB,GAAG;;;;;;;gDAOmB,CAAC;AAEjD,MAAM,iCAAiC,GAAG;;;;;;;gDAOM,CAAC;AAEjD,MAAM,yBAAyB,GAAG;;;;;;;gDAOc,CAAC;AAEjD,MAAM,UAAU,kBAAkB,CAAC,QAAkB,EAAE,YAAqB;IAC1E,IAAI,IAAY,CAAC;IACjB,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,UAAU;YACb,IAAI,GAAG,oBAAoB,CAAC;YAC5B,MAAM;QACR,KAAK,cAAc;YACjB,IAAI,GAAG,iCAAiC,CAAC;YACzC,MAAM;QACR,KAAK,MAAM;YACT,IAAI,GAAG,yBAAyB,CAAC;YACjC,MAAM;IACV,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,GAAG,IAAI,OAAO,YAAY,EAAE,CAAC;IACtC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,OAAe,EACf,SAAwC,EACxC,IAAU,EACV,QAAkB;IAElB,IAAI,QAAQ,KAAK,UAAU,IAAI,SAAS,EAAE,CAAC;QACzC,MAAM,YAAY,GAAG,kBAAkB,CAAC,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAClE,OAAO,QAAQ,OAAO;;;EAGxB,YAAY;;QAEN,IAAI,CAAC,MAAM,EAAE,CAAC;IACpB,CAAC;IACD,OAAO,QAAQ,OAAO;;QAEhB,IAAI,CAAC,MAAM,EAAE,CAAC;AACtB,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CACzB,OAAe,EACf,SAAiC,EACjC,IAAU;IAEV,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC;IACtB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;IAC/B,IAAI,QAAQ,EAAE,CAAC;QACb,QAAQ,CAAC,IAAI,CAAC,KAAK,OAAO,YAAY,QAAQ,EAAE,CAAC,CAAC;QAClD,UAAU,IAAI,QAAQ,CAAC,MAAM,CAAC;IAChC,CAAC;IAED,6CAA6C;IAC7C,IAAI,UAAU,GAAG,QAAQ,GAAG,GAAG,EAAE,CAAC;QAChC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACzD,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;YACpD,IAAI,GAAG,KAAK,EAAE,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,OAAO,IAAI,GAAG,SAAS,CAAC,CAAC,EAAE,CAAC;gBAC/E,SAAS;YACX,CAAC;YACD,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAChC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;YAC9D,IAAI,OAAO,IAAI,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;gBACnD,QAAQ,CAAC,IAAI,CAAC,KAAK,OAAO,IAAI,GAAG,YAAY,IAAI,EAAE,CAAC,CAAC;gBACrD,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC;YAC5B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC"}
1
+ {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,MAAM,oBAAoB,GAAG;;;;;gDAKmB,CAAC;AAEjD,MAAM,UAAU,kBAAkB,CAAC,YAAqB;IACtD,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,GAAG,oBAAoB,OAAO,YAAY,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,oBAAoB,CAAC;AAC9B,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,OAAe,EACf,IAAU,EACV,WAAwB,EACxB,cAA8B;IAE9B,QAAQ,WAAW,EAAE,CAAC;QACpB,KAAK,MAAM;YACT,IAAI,cAAc,EAAE,CAAC;gBACnB,OAAO,QAAQ,OAAO;;;IAG1B,OAAO;EACT,cAAc;;QAER,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,CAAC;YACD,iDAAiD;YACjD,OAAO,QAAQ,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC;QAEnD,KAAK,MAAM;YACT,IAAI,cAAc,EAAE,CAAC;gBACnB,OAAO,QAAQ,OAAO;;;EAG5B,cAAc;;QAER,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,CAAC;YACD,iDAAiD;YACjD,OAAO,QAAQ,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC;QAEnD,KAAK,WAAW,CAAC;QACjB;YACE,OAAO,QAAQ,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC;IACrD,CAAC;AACH,CAAC"}
package/dist/runner.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Orchestrates the task x model x helpMode matrix with tool-calling agents.
2
+ * Orchestrates the task x model x contextMode matrix with tool-calling agents.
3
3
  *
4
4
  * For each task:
5
5
  * - Create temp workdir (or use configured one)
@@ -30,6 +30,15 @@ export interface RunGridOptions {
30
30
  globalRepeat?: number;
31
31
  /** Custom system prompt appended to the default. */
32
32
  systemPrompt?: string;
33
+ /** Profile metadata (passed through to upload). */
34
+ displayName?: string;
35
+ category?: string;
36
+ websiteUrl?: string;
37
+ githubUrl?: string;
38
+ /** Serialized YAML of the resolved task suite. */
39
+ taskSuiteContent?: string;
40
+ /** Directory containing the config file (for resolving docs.md). */
41
+ configDir?: string;
33
42
  }
34
43
  export declare function runGrid(opts: RunGridOptions): Promise<GridReport[]>;
35
44
  export declare function uploadReport(report: GridReport, backendUrl: string, apiKey: string): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AASH,OAAO,KAAK,EACV,SAAS,EACT,IAAI,EACJ,UAAU,EAKX,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AASjD,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAKxE;AAED,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAc9B;AAkND,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+CAA+C;IAC/C,MAAM,CAAC,EAAE,UAAU,EAAE,CAAC;IACtB,wDAAwD;IACxD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,wBAAsB,OAAO,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CA0DzE;AAoND,wBAAsB,YAAY,CAChC,MAAM,EAAE,UAAU,EAClB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,IAAI,CAAC,CAsBf"}
1
+ {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AASH,OAAO,KAAK,EACV,SAAS,EACT,IAAI,EACJ,UAAU,EAKX,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AASjD,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAKxE;AAED,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAc9B;AAkND,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+CAA+C;IAC/C,MAAM,CAAC,EAAE,UAAU,EAAE,CAAC;IACtB,wDAAwD;IACxD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mDAAmD;IACnD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kDAAkD;IAClD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,oEAAoE;IACpE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,wBAAsB,OAAO,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAoEzE;AAiPD,wBAAsB,YAAY,CAChC,MAAM,EAAE,UAAU,EAClB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,IAAI,CAAC,CAsBf"}