@kradle/cli 0.0.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +93 -65
  2. package/dist/commands/agent/list.d.ts +4 -0
  3. package/dist/commands/agent/list.js +6 -4
  4. package/dist/commands/challenge/build.d.ts +9 -1
  5. package/dist/commands/challenge/build.js +40 -12
  6. package/dist/commands/challenge/create.d.ts +5 -1
  7. package/dist/commands/challenge/create.js +17 -18
  8. package/dist/commands/challenge/delete.d.ts +4 -1
  9. package/dist/commands/challenge/delete.js +5 -5
  10. package/dist/commands/challenge/list.d.ts +5 -0
  11. package/dist/commands/challenge/list.js +11 -10
  12. package/dist/commands/challenge/run.d.ts +8 -1
  13. package/dist/commands/challenge/run.js +13 -8
  14. package/dist/commands/challenge/watch.d.ts +4 -1
  15. package/dist/commands/challenge/watch.js +8 -8
  16. package/dist/commands/{evaluation → experiment}/create.d.ts +4 -0
  17. package/dist/commands/{evaluation → experiment}/create.js +22 -21
  18. package/dist/commands/{evaluation → experiment}/list.js +17 -19
  19. package/dist/commands/experiment/recordings.d.ts +19 -0
  20. package/dist/commands/experiment/recordings.js +416 -0
  21. package/dist/commands/experiment/run.d.ts +17 -0
  22. package/dist/commands/experiment/run.js +67 -0
  23. package/dist/commands/init.js +2 -2
  24. package/dist/lib/api-client.d.ts +51 -10
  25. package/dist/lib/api-client.js +108 -39
  26. package/dist/lib/arguments.d.ts +3 -2
  27. package/dist/lib/arguments.js +5 -3
  28. package/dist/lib/challenge.d.ts +13 -18
  29. package/dist/lib/challenge.js +58 -62
  30. package/dist/lib/experiment/experimenter.d.ts +92 -0
  31. package/dist/lib/experiment/experimenter.js +368 -0
  32. package/dist/lib/{evaluation → experiment}/index.d.ts +1 -1
  33. package/dist/lib/{evaluation → experiment}/index.js +1 -1
  34. package/dist/lib/{evaluation → experiment}/runner.d.ts +2 -0
  35. package/dist/lib/{evaluation → experiment}/runner.js +21 -2
  36. package/dist/lib/{evaluation → experiment}/tui.d.ts +1 -1
  37. package/dist/lib/{evaluation → experiment}/tui.js +3 -3
  38. package/dist/lib/{evaluation → experiment}/types.d.ts +10 -4
  39. package/dist/lib/{evaluation → experiment}/types.js +5 -3
  40. package/dist/lib/flags.d.ts +47 -0
  41. package/dist/lib/flags.js +63 -0
  42. package/dist/lib/schemas.d.ts +63 -2
  43. package/dist/lib/schemas.js +27 -1
  44. package/dist/lib/utils.d.ts +9 -10
  45. package/dist/lib/utils.js +12 -12
  46. package/oclif.manifest.json +423 -64
  47. package/package.json +11 -8
  48. package/static/challenge.ts +12 -13
  49. package/static/experiment_template.ts +114 -0
  50. package/static/project_template/dev.env +5 -5
  51. package/static/project_template/prod.env +4 -4
  52. package/static/project_template/tsconfig.json +1 -1
  53. package/dist/commands/challenge/multi-upload.d.ts +0 -6
  54. package/dist/commands/challenge/multi-upload.js +0 -80
  55. package/dist/commands/evaluation/run.d.ts +0 -13
  56. package/dist/commands/evaluation/run.js +0 -61
  57. package/dist/lib/config.d.ts +0 -12
  58. package/dist/lib/config.js +0 -49
  59. package/dist/lib/evaluation/evaluator.d.ts +0 -88
  60. package/dist/lib/evaluation/evaluator.js +0 -268
  61. package/static/evaluation_template.ts +0 -69
  62. /package/dist/commands/{evaluation → experiment}/list.d.ts +0 -0
@@ -0,0 +1,368 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import pc from "picocolors";
4
+ import { executeNodeCommand, openInBrowser } from "../utils.js";
5
+ import { Runner } from "./runner.js";
6
+ import { TUI } from "./tui.js";
7
+ import { ExperimentMetadataSchema, ManifestSchema, ProgressSchema } from "./types.js";
8
+ // Sanitize timestamp string for use in filenames
9
+ function sanitizeTimestamp(timestamp) {
10
+ // Replace colons, spaces, and other problematic characters
11
+ return timestamp
12
+ .replace(/:/g, "-")
13
+ .replace(/\s+/g, "_")
14
+ .replace(/[<>:"|?*]/g, "_");
15
+ }
16
+ export class Experimenter {
17
+ name;
18
+ webUrl;
19
+ api;
20
+ experimentDir;
21
+ metadataPath;
22
+ runner;
23
+ tui;
24
+ currentVersion;
25
+ constructor(name, webUrl, api) {
26
+ this.name = name;
27
+ this.webUrl = webUrl;
28
+ this.api = api;
29
+ this.experimentDir = path.resolve(process.cwd(), "experiments", name);
30
+ this.metadataPath = path.join(this.experimentDir, ".experiment.json");
31
+ }
32
+ /**
33
+ * Get paths for a specific version
34
+ */
35
+ getVersionPaths(version) {
36
+ const versionDir = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"));
37
+ return {
38
+ versionDir,
39
+ configPath: path.join(versionDir, "config.ts"),
40
+ manifestPath: path.join(versionDir, "manifest.json"),
41
+ progressPath: path.join(versionDir, "progress.json"),
42
+ };
43
+ }
44
+ get configPath() {
45
+ return path.join(this.experimentDir, "config.ts");
46
+ }
47
+ /**
48
+ * Get the current version directory path
49
+ */
50
+ getCurrentVersionDir() {
51
+ if (this.currentVersion === undefined) {
52
+ throw new Error("No version set");
53
+ }
54
+ return this.getVersionPaths(this.currentVersion).versionDir;
55
+ }
56
+ /**
57
+ * Check if experiment exists
58
+ */
59
+ async exists() {
60
+ try {
61
+ await fs.access(this.experimentDir);
62
+ return true;
63
+ }
64
+ catch {
65
+ return false;
66
+ }
67
+ }
68
+ /**
69
+ * Check if config.ts exists (master config)
70
+ */
71
+ async configExists() {
72
+ try {
73
+ await fs.access(this.configPath);
74
+ return true;
75
+ }
76
+ catch {
77
+ return false;
78
+ }
79
+ }
80
+ /**
81
+ * Load experiment metadata
82
+ */
83
+ async loadMetadata() {
84
+ try {
85
+ const content = await fs.readFile(this.metadataPath, "utf-8");
86
+ const data = JSON.parse(content);
87
+ return ExperimentMetadataSchema.parse(data);
88
+ }
89
+ catch {
90
+ return null;
91
+ }
92
+ }
93
+ /**
94
+ * Save experiment metadata
95
+ */
96
+ async saveMetadata(metadata) {
97
+ await fs.writeFile(this.metadataPath, JSON.stringify(metadata, null, 2));
98
+ }
99
+ /**
100
+ * Get the current version number, or -1 if none exists
101
+ */
102
+ async getCurrentVersionNumber() {
103
+ const metadata = await this.loadMetadata();
104
+ return metadata?.currentVersion ?? -1;
105
+ }
106
+ /**
107
+ * Create a new version
108
+ */
109
+ async createNewVersion() {
110
+ const currentVersion = await this.getCurrentVersionNumber();
111
+ const newVersion = currentVersion + 1;
112
+ const paths = this.getVersionPaths(newVersion);
113
+ // Create version directory
114
+ await fs.mkdir(paths.versionDir, { recursive: true });
115
+ // Copy master config to version
116
+ const masterConfigPath = path.join(this.experimentDir, "config.ts");
117
+ await fs.copyFile(masterConfigPath, paths.configPath);
118
+ // Generate manifest from config
119
+ const manifest = await this.generateManifest(paths.configPath);
120
+ await fs.writeFile(paths.manifestPath, JSON.stringify(manifest, null, 2));
121
+ // Update metadata
122
+ await this.saveMetadata({ currentVersion: newVersion });
123
+ this.currentVersion = newVersion;
124
+ return newVersion;
125
+ }
126
+ /**
127
+ * Get or create a version
128
+ * @param createNew - If true, always create a new version. Otherwise, use current version or create first one if none exists.
129
+ */
130
+ async getOrCreateVersion(createNew) {
131
+ if (createNew) {
132
+ return await this.createNewVersion();
133
+ }
134
+ const currentVersion = await this.getCurrentVersionNumber();
135
+ if (currentVersion < 0) {
136
+ return await this.createNewVersion();
137
+ }
138
+ this.currentVersion = currentVersion;
139
+ return currentVersion;
140
+ }
141
+ /**
142
+ * Load manifest from version
143
+ */
144
+ async loadManifest(version) {
145
+ const paths = this.getVersionPaths(version);
146
+ const content = await fs.readFile(paths.manifestPath, "utf-8");
147
+ const data = JSON.parse(content);
148
+ return ManifestSchema.parse(data);
149
+ }
150
+ /**
151
+ * Load progress from version
152
+ */
153
+ async loadProgress(version) {
154
+ try {
155
+ const paths = this.getVersionPaths(version);
156
+ const content = await fs.readFile(paths.progressPath, "utf-8");
157
+ const data = JSON.parse(content);
158
+ return ProgressSchema.parse(data);
159
+ }
160
+ catch {
161
+ return null;
162
+ }
163
+ }
164
+ /**
165
+ * Save progress to current version
166
+ */
167
+ async saveProgress() {
168
+ if (!this.runner || this.currentVersion === undefined)
169
+ return;
170
+ const paths = this.getVersionPaths(this.currentVersion);
171
+ const progress = {
172
+ entries: this.runner.getProgressEntries(),
173
+ lastUpdated: Date.now(),
174
+ };
175
+ await fs.writeFile(paths.progressPath, JSON.stringify(progress, null, 2));
176
+ }
177
+ /**
178
+ * Execute config.ts to generate manifest
179
+ */
180
+ async generateManifest(configPath) {
181
+ const manifest = await this.executeConfigFile(configPath);
182
+ return ManifestSchema.parse(manifest);
183
+ }
184
+ /**
185
+ * Execute config.ts file and return the manifest
186
+ */
187
+ async executeConfigFile(configPath) {
188
+ // We spawn a new NodeJS process to execute & log the config file.
189
+ // We can't directly import the file because it would be cached, and import cache can't be invalidated.
190
+ const stdout = await executeNodeCommand([
191
+ "--experimental-transform-types",
192
+ "--no-warnings",
193
+ "-e",
194
+ `console.log(JSON.stringify(require("${configPath}").main()));`,
195
+ ], {});
196
+ return JSON.parse(stdout.trim());
197
+ }
198
+ /**
199
+ * Run the experiment
200
+ */
201
+ async run(options) {
202
+ const version = await this.getOrCreateVersion(options.new);
203
+ this.currentVersion = version;
204
+ // Load manifest
205
+ const manifest = await this.loadManifest(version);
206
+ // We have 2 mandatory tags: "exp-<experiment-name>" and "exp-<experiment-name>-v<version>"
207
+ const experimentTag = `exp-${this.name}`;
208
+ const versionTag = `${experimentTag}-v${version}`;
209
+ const tags = [experimentTag, versionTag, ...(manifest.tags ?? [])];
210
+ // Create runner
211
+ this.runner = new Runner(manifest.runs, this.api, this.webUrl, {
212
+ maxConcurrent: options.maxConcurrent,
213
+ tags: tags,
214
+ onStateChange: () => this.onRunStateChange(),
215
+ onRunComplete: options.downloadRecordings
216
+ ? async (index, runId) => {
217
+ const state = this.runner?.getRunState(index);
218
+ if (!state?.participantIds) {
219
+ console.error(pc.yellow(`Warning: Participant IDs not available for run ${runId}, skipping recording download.`));
220
+ return;
221
+ }
222
+ await this.downloadRecordingsForRun(runId, state.participantIds, version);
223
+ }
224
+ : undefined,
225
+ });
226
+ // Restore progress if applicable
227
+ const progress = await this.loadProgress(version);
228
+ if (progress) {
229
+ this.runner.restoreProgress(progress.entries);
230
+ }
231
+ // Create TUI
232
+ this.tui = new TUI({
233
+ experimentName: `${this.name} (v${version})`,
234
+ onQuit: () => this.handleQuit(),
235
+ onOpenRun: (index) => this.openRun(index),
236
+ });
237
+ // Initial state update
238
+ this.tui.updateStates(this.runner.getAllStates());
239
+ this.tui.updateStatusCounts(this.runner.getStatusCounts());
240
+ // Start TUI
241
+ this.tui.start();
242
+ try {
243
+ // Execute runs
244
+ await this.runner.execute();
245
+ // Final save
246
+ await this.saveProgress();
247
+ }
248
+ finally {
249
+ this.tui.stop();
250
+ console.log("");
251
+ }
252
+ if (options.openMetabase ?? true) {
253
+ openInBrowser(`https://daunt-fair.metabaseapp.com/dashboard/10-runs-analysis?run_tags=${versionTag}`);
254
+ }
255
+ const errors = this.runner?.getAllStates().filter((state) => state.status === "error");
256
+ if (errors?.length > 0) {
257
+ throw new Error(`${errors.map((error) => error.error).join("\n\n")}`);
258
+ }
259
+ }
260
+ /**
261
+ * Handle state change from runner
262
+ */
263
+ onRunStateChange() {
264
+ if (this.tui && this.runner) {
265
+ this.tui.updateStates(this.runner.getAllStates());
266
+ this.tui.updateStatusCounts(this.runner.getStatusCounts());
267
+ }
268
+ // Periodically save progress
269
+ this.saveProgress().catch(() => { });
270
+ }
271
+ /**
272
+ * Handle quit request
273
+ */
274
+ handleQuit() {
275
+ this.runner?.stop();
276
+ this.tui?.stop();
277
+ console.log(pc.yellow(`\nThe experiment has been interrupted. You can resume it later by running "kradle experiment run ${this.name}".`));
278
+ process.exit(0);
279
+ }
280
+ /**
281
+ * Open run in browser
282
+ */
283
+ openRun(index) {
284
+ const url = this.runner?.getRunUrl(index);
285
+ if (url) {
286
+ openInBrowser(url);
287
+ }
288
+ }
289
+ /**
290
+ * Download recordings for a completed run with smart polling
291
+ * Polls for 90 seconds after run completion (matching pod grace period)
292
+ */
293
+ async downloadRecordingsForRun(runId, participantIds, version) {
294
+ const POLL_INTERVAL_MS = 5000; // Check every 5 seconds
295
+ const TOTAL_POLL_DURATION_MS = 90000; // Poll for 90 seconds total (pod grace period)
296
+ const downloadedRecordings = new Set(); // Track downloaded recordings by timestamp
297
+ const failedDownloads = new Set(); // Track failed downloads to avoid spamming logs
298
+ const startTime = Date.now();
299
+ // Keep polling until grace period expires, then do one final check
300
+ let isLastAttempt = false;
301
+ while (true) {
302
+ // For each participant in the run
303
+ for (const participantId of participantIds) {
304
+ try {
305
+ // Fetch current available recordings
306
+ const recordings = await this.api.getRunRecordings(runId, participantId);
307
+ // Download any new recordings
308
+ for (const recording of recordings) {
309
+ const recordingKey = `${participantId}-${recording.timestamp}`;
310
+ // Skip if already downloaded
311
+ if (downloadedRecordings.has(recordingKey)) {
312
+ continue;
313
+ }
314
+ const outputPath = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, `${sanitizeTimestamp(recording.timestamp)}.mcpr`);
315
+ // Check if file already exists on disk
316
+ try {
317
+ await fs.access(outputPath);
318
+ downloadedRecordings.add(recordingKey);
319
+ continue;
320
+ }
321
+ catch { }
322
+ try {
323
+ // Download the recording
324
+ const { downloadUrl } = await this.api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
325
+ const response = await fetch(downloadUrl);
326
+ if (!response.ok) {
327
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
328
+ }
329
+ const buffer = await response.arrayBuffer();
330
+ await fs.mkdir(path.dirname(outputPath), { recursive: true });
331
+ await fs.writeFile(outputPath, Buffer.from(buffer));
332
+ downloadedRecordings.add(recordingKey);
333
+ // Remove from failed set if it was previously failing
334
+ if (failedDownloads.has(recordingKey)) {
335
+ failedDownloads.delete(recordingKey);
336
+ }
337
+ }
338
+ catch (error) {
339
+ // Only log each failure once to avoid spam
340
+ if (!failedDownloads.has(recordingKey)) {
341
+ console.error(pc.yellow(`Warning: Failed to download recording ${recording.timestamp} for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
342
+ failedDownloads.add(recordingKey);
343
+ }
344
+ }
345
+ }
346
+ }
347
+ catch (error) {
348
+ // Log API errors (e.g., fetching recordings list)
349
+ console.error(pc.yellow(`Warning: Failed to fetch recordings for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
350
+ }
351
+ }
352
+ // Exit if this was the last attempt
353
+ if (isLastAttempt) {
354
+ break;
355
+ }
356
+ // Check if we've exceeded the polling duration
357
+ const elapsed = Date.now() - startTime;
358
+ if (elapsed >= TOTAL_POLL_DURATION_MS) {
359
+ // Do one final attempt before exiting
360
+ isLastAttempt = true;
361
+ }
362
+ else {
363
+ // Wait before next poll
364
+ await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
365
+ }
366
+ }
367
+ }
368
+ }
@@ -1,4 +1,4 @@
1
- export { Evaluator } from "./evaluator.js";
1
+ export { Experimenter } from "./experimenter.js";
2
2
  export { Runner } from "./runner.js";
3
3
  export { TUI } from "./tui.js";
4
4
  export * from "./types.js";
@@ -1,4 +1,4 @@
1
- export { Evaluator } from "./evaluator.js";
1
+ export { Experimenter } from "./experimenter.js";
2
2
  export { Runner } from "./runner.js";
3
3
  export { TUI } from "./tui.js";
4
4
  export * from "./types.js";
@@ -12,10 +12,12 @@ export declare class Runner {
12
12
  private maxConcurrent;
13
13
  private tags;
14
14
  private onStateChange?;
15
+ private onRunComplete?;
15
16
  constructor(runs: RunConfig[], api: ApiClient, baseUrl: string, options?: {
16
17
  maxConcurrent?: number;
17
18
  tags?: string[];
18
19
  onStateChange?: (index: number, state: RunState) => void;
20
+ onRunComplete?: (index: number, runId: string) => Promise<void>;
19
21
  });
20
22
  /**
21
23
  * Restore progress from a previous run
@@ -13,6 +13,7 @@ export class Runner {
13
13
  maxConcurrent;
14
14
  tags;
15
15
  onStateChange;
16
+ onRunComplete;
16
17
  constructor(runs, api, baseUrl, options = {}) {
17
18
  this.runs = runs;
18
19
  this.api = api;
@@ -26,6 +27,7 @@ export class Runner {
26
27
  }
27
28
  }
28
29
  this.onStateChange = options.onStateChange;
30
+ this.onRunComplete = options.onRunComplete;
29
31
  // Initialize all run states as queued
30
32
  this.states = runs.map((config, index) => ({
31
33
  index,
@@ -155,9 +157,18 @@ export class Runner {
155
157
  throw new Error("No run ID returned from API");
156
158
  }
157
159
  const runId = response.runIds[0];
158
- this.updateState(index, { runId, status: "running" });
160
+ // Extract participant IDs from response and sort by inputOrder
161
+ const participantIds = response.participants
162
+ ? Object.keys(response.participants).sort((a, b) => {
163
+ const aOrder = response.participants?.[a]?.inputOrder ?? 0;
164
+ const bOrder = response.participants?.[b]?.inputOrder ?? 0;
165
+ return aOrder - bOrder;
166
+ })
167
+ : undefined;
168
+ this.updateState(index, { runId, participantIds, status: "running" });
159
169
  // Tag the run with all configured tags
160
- await Promise.all(this.tags.map((tag) => this.api.tagRun(runId, tag)));
170
+ const tags = [...this.tags, ...(state.config.tags ?? [])];
171
+ await Promise.all(tags.map((tag) => this.api.tagRun(runId, tag)));
161
172
  // Poll for completion
162
173
  await this.pollRunStatus(index, runId);
163
174
  }
@@ -191,6 +202,13 @@ export class Runner {
191
202
  if (normalizedStatus === "completed" || normalizedStatus === "finished" || normalizedStatus === "game_over") {
192
203
  this.completedRuns.add(index);
193
204
  this.activeRuns.delete(index);
205
+ // Trigger recording download if callback provided
206
+ if (this.onRunComplete) {
207
+ // Don't await - run in background to avoid blocking
208
+ this.onRunComplete(index, runId).catch(() => {
209
+ // Error already logged in experimenter, just continue
210
+ });
211
+ }
194
212
  return;
195
213
  }
196
214
  if (normalizedStatus === "error") {
@@ -271,6 +289,7 @@ export class Runner {
271
289
  index: state.index,
272
290
  status: state.status,
273
291
  runId: state.runId,
292
+ participantIds: state.participantIds,
274
293
  startTime: state.startTime,
275
294
  endTime: this.completedRuns.has(state.index) ? Date.now() : undefined,
276
295
  error: state.error,
@@ -1,6 +1,6 @@
1
1
  import type { RunState, StatusCounts } from "./types.js";
2
2
  export interface TUIOptions {
3
- evaluationName: string;
3
+ experimentName: string;
4
4
  onQuit: () => void;
5
5
  onOpenRun: (index: number) => void;
6
6
  }
@@ -39,7 +39,7 @@ const RenderRunLine = ({ state, total, isSelected, padding, }) => {
39
39
  const summaryText = summary.length > maxSummaryLength ? summary.slice(0, maxSummaryLength - 1) + "…" : summary;
40
40
  return (_jsxs(Text, { inverse: isSelected, children: [_jsx(Text, { color: color, children: icon }), " ", indexLabel, " ", _jsx(Text, { color: color, children: statusLabel }), elapsedLabel ? (_jsxs(_Fragment, { children: [" ", _jsx(Text, { dimColor: true, children: elapsedLabel })] })) : null, " ", _jsx(Text, { dimColor: true, children: summaryText })] }));
41
41
  };
42
- const EvaluationUI = ({ evaluationName, states, statusCounts, onQuit, onOpenRun }) => {
42
+ const ExperimentUI = ({ experimentName, states, statusCounts, onQuit, onOpenRun }) => {
43
43
  const [selectedIndex, setSelectedIndex] = useState(0);
44
44
  const [scrollOffset, setScrollOffset] = useState(0);
45
45
  const [tick, setTick] = useState(0); // force elapsed-time updates
@@ -89,7 +89,7 @@ const EvaluationUI = ({ evaluationName, states, statusCounts, onQuit, onOpenRun
89
89
  const rangeEnd = Math.min(scrollOffset + rowsAvailable, states.length);
90
90
  const horizontalRule = "─".repeat(Math.min(process.stdout.columns || 80, 80));
91
91
  const padding = states.length.toString().length;
92
- return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `Evaluation: ${evaluationName}` }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsx(Text, { dimColor: true, children: "q:quit \u2191/\u2193/j/k:select o:open in browser" }), _jsx(Text, { children: " " }), _jsxs(Box, { flexDirection: "column", children: [visibleRuns.map((state, index) => (_jsx(RenderRunLine, { state: state, total: states.length, isSelected: scrollOffset + index === selectedIndex, padding: padding }, state.index))), visibleRuns.length < rowsAvailable
92
+ return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `Experiment: ${experimentName}` }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsx(Text, { dimColor: true, children: "q:quit \u2191/\u2193/j/k:select o:open in browser" }), _jsx(Text, { children: " " }), _jsxs(Box, { flexDirection: "column", children: [visibleRuns.map((state, index) => (_jsx(RenderRunLine, { state: state, total: states.length, isSelected: scrollOffset + index === selectedIndex, padding: padding }, state.index))), visibleRuns.length < rowsAvailable
93
93
  ? Array.from({ length: rowsAvailable - visibleRuns.length }).map((_, index) => (_jsx(Text, { children: " " }, `empty-${index}`)))
94
94
  : null] }), showScroll ? _jsx(Text, { dimColor: true, children: `[${rangeStart}-${rangeEnd} of ${states.length}]` }) : _jsx(Text, { children: " " }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsxs(Text, { children: [_jsx(Text, { children: "Completed: " }), _jsx(Text, { color: "green", children: statusCounts.completed }), _jsx(Text, { children: ` | Active: ` }), _jsx(Text, { color: "yellow", children: statusCounts.active }), _jsx(Text, { children: ` | Queued: ` }), _jsx(Text, { dimColor: true, children: statusCounts.queued }), statusCounts.errors > 0 ? (_jsxs(_Fragment, { children: [_jsx(Text, { children: ` | Errors: ` }), _jsx(Text, { color: "red", children: statusCounts.errors })] })) : null] })] }));
95
95
  };
@@ -125,6 +125,6 @@ export class TUI {
125
125
  this.app.rerender(this.renderApp());
126
126
  }
127
127
  renderApp() {
128
- return (_jsx(EvaluationUI, { evaluationName: this.options.evaluationName, states: this.states, statusCounts: this.statusCounts, onQuit: this.options.onQuit, onOpenRun: this.options.onOpenRun }));
128
+ return (_jsx(ExperimentUI, { experimentName: this.options.experimentName, states: this.states, statusCounts: this.statusCounts, onQuit: this.options.onQuit, onOpenRun: this.options.onOpenRun }));
129
129
  }
130
130
  }
@@ -10,6 +10,7 @@ export declare const RunConfigSchema: z.ZodObject<{
10
10
  agent: z.ZodString;
11
11
  role: z.ZodOptional<z.ZodString>;
12
12
  }, z.core.$strip>>;
13
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
13
14
  }, z.core.$strip>;
14
15
  export type RunConfig = z.infer<typeof RunConfigSchema>;
15
16
  export declare const ManifestSchema: z.ZodObject<{
@@ -19,6 +20,7 @@ export declare const ManifestSchema: z.ZodObject<{
19
20
  agent: z.ZodString;
20
21
  role: z.ZodOptional<z.ZodString>;
21
22
  }, z.core.$strip>>;
23
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
22
24
  }, z.core.$strip>>;
23
25
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
24
26
  }, z.core.$strip>;
@@ -40,6 +42,7 @@ export declare const ProgressEntrySchema: z.ZodObject<{
40
42
  finished: "finished";
41
43
  }>;
42
44
  runId: z.ZodOptional<z.ZodString>;
45
+ participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
43
46
  startTime: z.ZodOptional<z.ZodNumber>;
44
47
  endTime: z.ZodOptional<z.ZodNumber>;
45
48
  error: z.ZodOptional<z.ZodString>;
@@ -62,6 +65,7 @@ export declare const ProgressSchema: z.ZodObject<{
62
65
  finished: "finished";
63
66
  }>;
64
67
  runId: z.ZodOptional<z.ZodString>;
68
+ participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
65
69
  startTime: z.ZodOptional<z.ZodNumber>;
66
70
  endTime: z.ZodOptional<z.ZodNumber>;
67
71
  error: z.ZodOptional<z.ZodString>;
@@ -92,6 +96,7 @@ export interface RunState {
92
96
  config: RunConfig;
93
97
  status: RunStatus;
94
98
  runId?: string;
99
+ participantIds?: string[];
95
100
  startTime?: number;
96
101
  error?: string;
97
102
  }
@@ -113,14 +118,15 @@ export declare const RunLogsResponseSchema: z.ZodObject<{
113
118
  logs: z.ZodArray<z.ZodUnknown>;
114
119
  }, z.core.$strip>;
115
120
  export type RunLogsResponse = z.infer<typeof RunLogsResponseSchema>;
116
- export declare const EvaluationMetadataSchema: z.ZodObject<{
117
- currentIteration: z.ZodNumber;
121
+ export declare const ExperimentMetadataSchema: z.ZodObject<{
122
+ currentVersion: z.ZodNumber;
118
123
  }, z.core.$strip>;
119
- export type EvaluationMetadata = z.infer<typeof EvaluationMetadataSchema>;
120
- export interface EvaluationOptions {
124
+ export type ExperimentMetadata = z.infer<typeof ExperimentMetadataSchema>;
125
+ export interface ExperimentOptions {
121
126
  new: boolean;
122
127
  maxConcurrent: number;
123
128
  openMetabase?: boolean;
129
+ downloadRecordings?: boolean;
124
130
  }
125
131
  export declare const STATUS_ICONS: Record<RunStatus, {
126
132
  icon: string;
@@ -8,6 +8,7 @@ export const ParticipantSchema = z.object({
8
8
  export const RunConfigSchema = z.object({
9
9
  challenge_slug: z.string(),
10
10
  participants: z.array(ParticipantSchema),
11
+ tags: z.array(z.string()).optional(),
11
12
  });
12
13
  // Manifest returned by config.ts main()
13
14
  export const ManifestSchema = z.object({
@@ -31,6 +32,7 @@ export const ProgressEntrySchema = z.object({
31
32
  "error",
32
33
  ]),
33
34
  runId: z.string().optional(),
35
+ participantIds: z.array(z.string()).optional(),
34
36
  startTime: z.number().optional(),
35
37
  endTime: z.number().optional(),
36
38
  error: z.string().optional(),
@@ -66,9 +68,9 @@ export const RunStatusResponseSchema = z.object({
66
68
  export const RunLogsResponseSchema = z.object({
67
69
  logs: z.array(z.unknown()),
68
70
  });
69
- // Evaluation metadata stored in .evaluation.json
70
- export const EvaluationMetadataSchema = z.object({
71
- currentIteration: z.number(),
71
+ // Experiment metadata stored in .experiment.json
72
+ export const ExperimentMetadataSchema = z.object({
73
+ currentVersion: z.number(),
72
74
  });
73
75
  // Icons and colors for TUI
74
76
  export const STATUS_ICONS = {
@@ -0,0 +1,47 @@
1
+ /**
2
+ * All available config flags that can be used by commands.
3
+ * Each flag has an `env` property that allows it to be set via environment variable.
4
+ */
5
+ export declare const ALL_CONFIG_FLAGS: {
6
+ readonly "api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
7
+ readonly "web-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
8
+ readonly "studio-api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
9
+ readonly "studio-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
10
+ readonly "api-key": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
11
+ readonly "challenges-path": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
12
+ };
13
+ /**
14
+ * Type representing all config flag keys.
15
+ */
16
+ export type ConfigFlagKey = keyof typeof ALL_CONFIG_FLAGS;
17
+ /**
18
+ * Type representing the parsed values of all config flags.
19
+ */
20
+ export type AllConfigFlagValues = {
21
+ [K in ConfigFlagKey]: string;
22
+ };
23
+ /**
24
+ * Returns a subset of config flags for use in a command's static flags definition.
25
+ *
26
+ * @example
27
+ * // In a command file:
28
+ * static override flags = {
29
+ * ...getConfigFlags("api-key", "web-api-url", "studio-api-url"),
30
+ * // other command-specific flags
31
+ * };
32
+ *
33
+ * @param keys - The config flag keys to include.
34
+ * @returns An object containing only the specified flags
35
+ */
36
+ export declare function getConfigFlags<K extends ConfigFlagKey>(...keys: K[]): Pick<typeof ALL_CONFIG_FLAGS, K>;
37
+ /**
38
+ * Helper type to extract the parsed flag values for a subset of config flags.
39
+ * Use this to type the flags object after parsing.
40
+ *
41
+ * @example
42
+ * type MyFlags = ConfigFlagValues<"api-key" | "web-api-url">;
43
+ * // Results in: { "api-key": string; "web-api-url": string }
44
+ */
45
+ export type ConfigFlagValues<K extends ConfigFlagKey> = {
46
+ [P in K]: AllConfigFlagValues[P];
47
+ };