@kradle/cli 0.0.17 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -65
- package/dist/commands/agent/list.d.ts +4 -0
- package/dist/commands/agent/list.js +6 -4
- package/dist/commands/challenge/build.d.ts +9 -1
- package/dist/commands/challenge/build.js +40 -12
- package/dist/commands/challenge/create.d.ts +5 -1
- package/dist/commands/challenge/create.js +17 -18
- package/dist/commands/challenge/delete.d.ts +4 -1
- package/dist/commands/challenge/delete.js +5 -5
- package/dist/commands/challenge/list.d.ts +5 -0
- package/dist/commands/challenge/list.js +11 -10
- package/dist/commands/challenge/run.d.ts +8 -1
- package/dist/commands/challenge/run.js +13 -8
- package/dist/commands/challenge/watch.d.ts +4 -1
- package/dist/commands/challenge/watch.js +8 -8
- package/dist/commands/{evaluation → experiment}/create.d.ts +4 -0
- package/dist/commands/{evaluation → experiment}/create.js +22 -21
- package/dist/commands/{evaluation → experiment}/list.js +17 -19
- package/dist/commands/experiment/recordings.d.ts +19 -0
- package/dist/commands/experiment/recordings.js +416 -0
- package/dist/commands/experiment/run.d.ts +17 -0
- package/dist/commands/experiment/run.js +67 -0
- package/dist/commands/init.js +2 -2
- package/dist/lib/api-client.d.ts +51 -10
- package/dist/lib/api-client.js +108 -39
- package/dist/lib/arguments.d.ts +3 -2
- package/dist/lib/arguments.js +5 -3
- package/dist/lib/challenge.d.ts +13 -18
- package/dist/lib/challenge.js +58 -62
- package/dist/lib/experiment/experimenter.d.ts +92 -0
- package/dist/lib/experiment/experimenter.js +368 -0
- package/dist/lib/{evaluation → experiment}/index.d.ts +1 -1
- package/dist/lib/{evaluation → experiment}/index.js +1 -1
- package/dist/lib/{evaluation → experiment}/runner.d.ts +2 -0
- package/dist/lib/{evaluation → experiment}/runner.js +21 -2
- package/dist/lib/{evaluation → experiment}/tui.d.ts +1 -1
- package/dist/lib/{evaluation → experiment}/tui.js +3 -3
- package/dist/lib/{evaluation → experiment}/types.d.ts +10 -4
- package/dist/lib/{evaluation → experiment}/types.js +5 -3
- package/dist/lib/flags.d.ts +47 -0
- package/dist/lib/flags.js +63 -0
- package/dist/lib/schemas.d.ts +63 -2
- package/dist/lib/schemas.js +27 -1
- package/dist/lib/utils.d.ts +9 -10
- package/dist/lib/utils.js +12 -12
- package/oclif.manifest.json +423 -64
- package/package.json +11 -8
- package/static/challenge.ts +12 -13
- package/static/experiment_template.ts +114 -0
- package/static/project_template/dev.env +5 -5
- package/static/project_template/prod.env +4 -4
- package/static/project_template/tsconfig.json +1 -1
- package/dist/commands/challenge/multi-upload.d.ts +0 -6
- package/dist/commands/challenge/multi-upload.js +0 -80
- package/dist/commands/evaluation/run.d.ts +0 -13
- package/dist/commands/evaluation/run.js +0 -61
- package/dist/lib/config.d.ts +0 -12
- package/dist/lib/config.js +0 -49
- package/dist/lib/evaluation/evaluator.d.ts +0 -88
- package/dist/lib/evaluation/evaluator.js +0 -268
- package/static/evaluation_template.ts +0 -69
- /package/dist/commands/{evaluation → experiment}/list.d.ts +0 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import pc from "picocolors";
|
|
4
|
+
import { executeNodeCommand, openInBrowser } from "../utils.js";
|
|
5
|
+
import { Runner } from "./runner.js";
|
|
6
|
+
import { TUI } from "./tui.js";
|
|
7
|
+
import { ExperimentMetadataSchema, ManifestSchema, ProgressSchema } from "./types.js";
|
|
8
|
+
// Sanitize timestamp string for use in filenames
|
|
9
|
+
function sanitizeTimestamp(timestamp) {
|
|
10
|
+
// Replace colons, spaces, and other problematic characters
|
|
11
|
+
return timestamp
|
|
12
|
+
.replace(/:/g, "-")
|
|
13
|
+
.replace(/\s+/g, "_")
|
|
14
|
+
.replace(/[<>:"|?*]/g, "_");
|
|
15
|
+
}
|
|
16
|
+
export class Experimenter {
|
|
17
|
+
name;
|
|
18
|
+
webUrl;
|
|
19
|
+
api;
|
|
20
|
+
experimentDir;
|
|
21
|
+
metadataPath;
|
|
22
|
+
runner;
|
|
23
|
+
tui;
|
|
24
|
+
currentVersion;
|
|
25
|
+
constructor(name, webUrl, api) {
|
|
26
|
+
this.name = name;
|
|
27
|
+
this.webUrl = webUrl;
|
|
28
|
+
this.api = api;
|
|
29
|
+
this.experimentDir = path.resolve(process.cwd(), "experiments", name);
|
|
30
|
+
this.metadataPath = path.join(this.experimentDir, ".experiment.json");
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Get paths for a specific version
|
|
34
|
+
*/
|
|
35
|
+
getVersionPaths(version) {
|
|
36
|
+
const versionDir = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"));
|
|
37
|
+
return {
|
|
38
|
+
versionDir,
|
|
39
|
+
configPath: path.join(versionDir, "config.ts"),
|
|
40
|
+
manifestPath: path.join(versionDir, "manifest.json"),
|
|
41
|
+
progressPath: path.join(versionDir, "progress.json"),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
get configPath() {
|
|
45
|
+
return path.join(this.experimentDir, "config.ts");
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Get the current version directory path
|
|
49
|
+
*/
|
|
50
|
+
getCurrentVersionDir() {
|
|
51
|
+
if (this.currentVersion === undefined) {
|
|
52
|
+
throw new Error("No version set");
|
|
53
|
+
}
|
|
54
|
+
return this.getVersionPaths(this.currentVersion).versionDir;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Check if experiment exists
|
|
58
|
+
*/
|
|
59
|
+
async exists() {
|
|
60
|
+
try {
|
|
61
|
+
await fs.access(this.experimentDir);
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Check if config.ts exists (master config)
|
|
70
|
+
*/
|
|
71
|
+
async configExists() {
|
|
72
|
+
try {
|
|
73
|
+
await fs.access(this.configPath);
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Load experiment metadata
|
|
82
|
+
*/
|
|
83
|
+
async loadMetadata() {
|
|
84
|
+
try {
|
|
85
|
+
const content = await fs.readFile(this.metadataPath, "utf-8");
|
|
86
|
+
const data = JSON.parse(content);
|
|
87
|
+
return ExperimentMetadataSchema.parse(data);
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Save experiment metadata
|
|
95
|
+
*/
|
|
96
|
+
async saveMetadata(metadata) {
|
|
97
|
+
await fs.writeFile(this.metadataPath, JSON.stringify(metadata, null, 2));
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Get the current version number, or -1 if none exists
|
|
101
|
+
*/
|
|
102
|
+
async getCurrentVersionNumber() {
|
|
103
|
+
const metadata = await this.loadMetadata();
|
|
104
|
+
return metadata?.currentVersion ?? -1;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Create a new version
|
|
108
|
+
*/
|
|
109
|
+
async createNewVersion() {
|
|
110
|
+
const currentVersion = await this.getCurrentVersionNumber();
|
|
111
|
+
const newVersion = currentVersion + 1;
|
|
112
|
+
const paths = this.getVersionPaths(newVersion);
|
|
113
|
+
// Create version directory
|
|
114
|
+
await fs.mkdir(paths.versionDir, { recursive: true });
|
|
115
|
+
// Copy master config to version
|
|
116
|
+
const masterConfigPath = path.join(this.experimentDir, "config.ts");
|
|
117
|
+
await fs.copyFile(masterConfigPath, paths.configPath);
|
|
118
|
+
// Generate manifest from config
|
|
119
|
+
const manifest = await this.generateManifest(paths.configPath);
|
|
120
|
+
await fs.writeFile(paths.manifestPath, JSON.stringify(manifest, null, 2));
|
|
121
|
+
// Update metadata
|
|
122
|
+
await this.saveMetadata({ currentVersion: newVersion });
|
|
123
|
+
this.currentVersion = newVersion;
|
|
124
|
+
return newVersion;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Get or create a version
|
|
128
|
+
* @param createNew - If true, always create a new version. Otherwise, use current version or create first one if none exists.
|
|
129
|
+
*/
|
|
130
|
+
async getOrCreateVersion(createNew) {
|
|
131
|
+
if (createNew) {
|
|
132
|
+
return await this.createNewVersion();
|
|
133
|
+
}
|
|
134
|
+
const currentVersion = await this.getCurrentVersionNumber();
|
|
135
|
+
if (currentVersion < 0) {
|
|
136
|
+
return await this.createNewVersion();
|
|
137
|
+
}
|
|
138
|
+
this.currentVersion = currentVersion;
|
|
139
|
+
return currentVersion;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Load manifest from version
|
|
143
|
+
*/
|
|
144
|
+
async loadManifest(version) {
|
|
145
|
+
const paths = this.getVersionPaths(version);
|
|
146
|
+
const content = await fs.readFile(paths.manifestPath, "utf-8");
|
|
147
|
+
const data = JSON.parse(content);
|
|
148
|
+
return ManifestSchema.parse(data);
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Load progress from version
|
|
152
|
+
*/
|
|
153
|
+
async loadProgress(version) {
|
|
154
|
+
try {
|
|
155
|
+
const paths = this.getVersionPaths(version);
|
|
156
|
+
const content = await fs.readFile(paths.progressPath, "utf-8");
|
|
157
|
+
const data = JSON.parse(content);
|
|
158
|
+
return ProgressSchema.parse(data);
|
|
159
|
+
}
|
|
160
|
+
catch {
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Save progress to current version
|
|
166
|
+
*/
|
|
167
|
+
async saveProgress() {
|
|
168
|
+
if (!this.runner || this.currentVersion === undefined)
|
|
169
|
+
return;
|
|
170
|
+
const paths = this.getVersionPaths(this.currentVersion);
|
|
171
|
+
const progress = {
|
|
172
|
+
entries: this.runner.getProgressEntries(),
|
|
173
|
+
lastUpdated: Date.now(),
|
|
174
|
+
};
|
|
175
|
+
await fs.writeFile(paths.progressPath, JSON.stringify(progress, null, 2));
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Execute config.ts to generate manifest
|
|
179
|
+
*/
|
|
180
|
+
async generateManifest(configPath) {
|
|
181
|
+
const manifest = await this.executeConfigFile(configPath);
|
|
182
|
+
return ManifestSchema.parse(manifest);
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Execute config.ts file and return the manifest
|
|
186
|
+
*/
|
|
187
|
+
async executeConfigFile(configPath) {
|
|
188
|
+
// We spawn a new NodeJS process to execute & log the config file.
|
|
189
|
+
// We can't directly import the file because it would be cached, and import cache can't be invalidated.
|
|
190
|
+
const stdout = await executeNodeCommand([
|
|
191
|
+
"--experimental-transform-types",
|
|
192
|
+
"--no-warnings",
|
|
193
|
+
"-e",
|
|
194
|
+
`console.log(JSON.stringify(require("${configPath}").main()));`,
|
|
195
|
+
], {});
|
|
196
|
+
return JSON.parse(stdout.trim());
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Run the experiment
|
|
200
|
+
*/
|
|
201
|
+
async run(options) {
|
|
202
|
+
const version = await this.getOrCreateVersion(options.new);
|
|
203
|
+
this.currentVersion = version;
|
|
204
|
+
// Load manifest
|
|
205
|
+
const manifest = await this.loadManifest(version);
|
|
206
|
+
// We have 2 mandatory tags: "exp-<experiment-name>" and "exp-<experiment-name>-v<version>"
|
|
207
|
+
const experimentTag = `exp-${this.name}`;
|
|
208
|
+
const versionTag = `${experimentTag}-v${version}`;
|
|
209
|
+
const tags = [experimentTag, versionTag, ...(manifest.tags ?? [])];
|
|
210
|
+
// Create runner
|
|
211
|
+
this.runner = new Runner(manifest.runs, this.api, this.webUrl, {
|
|
212
|
+
maxConcurrent: options.maxConcurrent,
|
|
213
|
+
tags: tags,
|
|
214
|
+
onStateChange: () => this.onRunStateChange(),
|
|
215
|
+
onRunComplete: options.downloadRecordings
|
|
216
|
+
? async (index, runId) => {
|
|
217
|
+
const state = this.runner?.getRunState(index);
|
|
218
|
+
if (!state?.participantIds) {
|
|
219
|
+
console.error(pc.yellow(`Warning: Participant IDs not available for run ${runId}, skipping recording download.`));
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
await this.downloadRecordingsForRun(runId, state.participantIds, version);
|
|
223
|
+
}
|
|
224
|
+
: undefined,
|
|
225
|
+
});
|
|
226
|
+
// Restore progress if applicable
|
|
227
|
+
const progress = await this.loadProgress(version);
|
|
228
|
+
if (progress) {
|
|
229
|
+
this.runner.restoreProgress(progress.entries);
|
|
230
|
+
}
|
|
231
|
+
// Create TUI
|
|
232
|
+
this.tui = new TUI({
|
|
233
|
+
experimentName: `${this.name} (v${version})`,
|
|
234
|
+
onQuit: () => this.handleQuit(),
|
|
235
|
+
onOpenRun: (index) => this.openRun(index),
|
|
236
|
+
});
|
|
237
|
+
// Initial state update
|
|
238
|
+
this.tui.updateStates(this.runner.getAllStates());
|
|
239
|
+
this.tui.updateStatusCounts(this.runner.getStatusCounts());
|
|
240
|
+
// Start TUI
|
|
241
|
+
this.tui.start();
|
|
242
|
+
try {
|
|
243
|
+
// Execute runs
|
|
244
|
+
await this.runner.execute();
|
|
245
|
+
// Final save
|
|
246
|
+
await this.saveProgress();
|
|
247
|
+
}
|
|
248
|
+
finally {
|
|
249
|
+
this.tui.stop();
|
|
250
|
+
console.log("");
|
|
251
|
+
}
|
|
252
|
+
if (options.openMetabase ?? true) {
|
|
253
|
+
openInBrowser(`https://daunt-fair.metabaseapp.com/dashboard/10-runs-analysis?run_tags=${versionTag}`);
|
|
254
|
+
}
|
|
255
|
+
const errors = this.runner?.getAllStates().filter((state) => state.status === "error");
|
|
256
|
+
if (errors?.length > 0) {
|
|
257
|
+
throw new Error(`${errors.map((error) => error.error).join("\n\n")}`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Handle state change from runner
|
|
262
|
+
*/
|
|
263
|
+
onRunStateChange() {
|
|
264
|
+
if (this.tui && this.runner) {
|
|
265
|
+
this.tui.updateStates(this.runner.getAllStates());
|
|
266
|
+
this.tui.updateStatusCounts(this.runner.getStatusCounts());
|
|
267
|
+
}
|
|
268
|
+
// Periodically save progress
|
|
269
|
+
this.saveProgress().catch(() => { });
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Handle quit request
|
|
273
|
+
*/
|
|
274
|
+
handleQuit() {
|
|
275
|
+
this.runner?.stop();
|
|
276
|
+
this.tui?.stop();
|
|
277
|
+
console.log(pc.yellow(`\nThe experiment has been interrupted. You can resume it later by running "kradle experiment run ${this.name}".`));
|
|
278
|
+
process.exit(0);
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Open run in browser
|
|
282
|
+
*/
|
|
283
|
+
openRun(index) {
|
|
284
|
+
const url = this.runner?.getRunUrl(index);
|
|
285
|
+
if (url) {
|
|
286
|
+
openInBrowser(url);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Download recordings for a completed run with smart polling
|
|
291
|
+
* Polls for 90 seconds after run completion (matching pod grace period)
|
|
292
|
+
*/
|
|
293
|
+
async downloadRecordingsForRun(runId, participantIds, version) {
|
|
294
|
+
const POLL_INTERVAL_MS = 5000; // Check every 5 seconds
|
|
295
|
+
const TOTAL_POLL_DURATION_MS = 90000; // Poll for 90 seconds total (pod grace period)
|
|
296
|
+
const downloadedRecordings = new Set(); // Track downloaded recordings by timestamp
|
|
297
|
+
const failedDownloads = new Set(); // Track failed downloads to avoid spamming logs
|
|
298
|
+
const startTime = Date.now();
|
|
299
|
+
// Keep polling until grace period expires, then do one final check
|
|
300
|
+
let isLastAttempt = false;
|
|
301
|
+
while (true) {
|
|
302
|
+
// For each participant in the run
|
|
303
|
+
for (const participantId of participantIds) {
|
|
304
|
+
try {
|
|
305
|
+
// Fetch current available recordings
|
|
306
|
+
const recordings = await this.api.getRunRecordings(runId, participantId);
|
|
307
|
+
// Download any new recordings
|
|
308
|
+
for (const recording of recordings) {
|
|
309
|
+
const recordingKey = `${participantId}-${recording.timestamp}`;
|
|
310
|
+
// Skip if already downloaded
|
|
311
|
+
if (downloadedRecordings.has(recordingKey)) {
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
const outputPath = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, `${sanitizeTimestamp(recording.timestamp)}.mcpr`);
|
|
315
|
+
// Check if file already exists on disk
|
|
316
|
+
try {
|
|
317
|
+
await fs.access(outputPath);
|
|
318
|
+
downloadedRecordings.add(recordingKey);
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
catch { }
|
|
322
|
+
try {
|
|
323
|
+
// Download the recording
|
|
324
|
+
const { downloadUrl } = await this.api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
|
|
325
|
+
const response = await fetch(downloadUrl);
|
|
326
|
+
if (!response.ok) {
|
|
327
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
328
|
+
}
|
|
329
|
+
const buffer = await response.arrayBuffer();
|
|
330
|
+
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
331
|
+
await fs.writeFile(outputPath, Buffer.from(buffer));
|
|
332
|
+
downloadedRecordings.add(recordingKey);
|
|
333
|
+
// Remove from failed set if it was previously failing
|
|
334
|
+
if (failedDownloads.has(recordingKey)) {
|
|
335
|
+
failedDownloads.delete(recordingKey);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
catch (error) {
|
|
339
|
+
// Only log each failure once to avoid spam
|
|
340
|
+
if (!failedDownloads.has(recordingKey)) {
|
|
341
|
+
console.error(pc.yellow(`Warning: Failed to download recording ${recording.timestamp} for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
|
|
342
|
+
failedDownloads.add(recordingKey);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
catch (error) {
|
|
348
|
+
// Log API errors (e.g., fetching recordings list)
|
|
349
|
+
console.error(pc.yellow(`Warning: Failed to fetch recordings for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
// Exit if this was the last attempt
|
|
353
|
+
if (isLastAttempt) {
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
// Check if we've exceeded the polling duration
|
|
357
|
+
const elapsed = Date.now() - startTime;
|
|
358
|
+
if (elapsed >= TOTAL_POLL_DURATION_MS) {
|
|
359
|
+
// Do one final attempt before exiting
|
|
360
|
+
isLastAttempt = true;
|
|
361
|
+
}
|
|
362
|
+
else {
|
|
363
|
+
// Wait before next poll
|
|
364
|
+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
@@ -12,10 +12,12 @@ export declare class Runner {
|
|
|
12
12
|
private maxConcurrent;
|
|
13
13
|
private tags;
|
|
14
14
|
private onStateChange?;
|
|
15
|
+
private onRunComplete?;
|
|
15
16
|
constructor(runs: RunConfig[], api: ApiClient, baseUrl: string, options?: {
|
|
16
17
|
maxConcurrent?: number;
|
|
17
18
|
tags?: string[];
|
|
18
19
|
onStateChange?: (index: number, state: RunState) => void;
|
|
20
|
+
onRunComplete?: (index: number, runId: string) => Promise<void>;
|
|
19
21
|
});
|
|
20
22
|
/**
|
|
21
23
|
* Restore progress from a previous run
|
|
@@ -13,6 +13,7 @@ export class Runner {
|
|
|
13
13
|
maxConcurrent;
|
|
14
14
|
tags;
|
|
15
15
|
onStateChange;
|
|
16
|
+
onRunComplete;
|
|
16
17
|
constructor(runs, api, baseUrl, options = {}) {
|
|
17
18
|
this.runs = runs;
|
|
18
19
|
this.api = api;
|
|
@@ -26,6 +27,7 @@ export class Runner {
|
|
|
26
27
|
}
|
|
27
28
|
}
|
|
28
29
|
this.onStateChange = options.onStateChange;
|
|
30
|
+
this.onRunComplete = options.onRunComplete;
|
|
29
31
|
// Initialize all run states as queued
|
|
30
32
|
this.states = runs.map((config, index) => ({
|
|
31
33
|
index,
|
|
@@ -155,9 +157,18 @@ export class Runner {
|
|
|
155
157
|
throw new Error("No run ID returned from API");
|
|
156
158
|
}
|
|
157
159
|
const runId = response.runIds[0];
|
|
158
|
-
|
|
160
|
+
// Extract participant IDs from response and sort by inputOrder
|
|
161
|
+
const participantIds = response.participants
|
|
162
|
+
? Object.keys(response.participants).sort((a, b) => {
|
|
163
|
+
const aOrder = response.participants?.[a]?.inputOrder ?? 0;
|
|
164
|
+
const bOrder = response.participants?.[b]?.inputOrder ?? 0;
|
|
165
|
+
return aOrder - bOrder;
|
|
166
|
+
})
|
|
167
|
+
: undefined;
|
|
168
|
+
this.updateState(index, { runId, participantIds, status: "running" });
|
|
159
169
|
// Tag the run with all configured tags
|
|
160
|
-
|
|
170
|
+
const tags = [...this.tags, ...(state.config.tags ?? [])];
|
|
171
|
+
await Promise.all(tags.map((tag) => this.api.tagRun(runId, tag)));
|
|
161
172
|
// Poll for completion
|
|
162
173
|
await this.pollRunStatus(index, runId);
|
|
163
174
|
}
|
|
@@ -191,6 +202,13 @@ export class Runner {
|
|
|
191
202
|
if (normalizedStatus === "completed" || normalizedStatus === "finished" || normalizedStatus === "game_over") {
|
|
192
203
|
this.completedRuns.add(index);
|
|
193
204
|
this.activeRuns.delete(index);
|
|
205
|
+
// Trigger recording download if callback provided
|
|
206
|
+
if (this.onRunComplete) {
|
|
207
|
+
// Don't await - run in background to avoid blocking
|
|
208
|
+
this.onRunComplete(index, runId).catch(() => {
|
|
209
|
+
// Error already logged in experimenter, just continue
|
|
210
|
+
});
|
|
211
|
+
}
|
|
194
212
|
return;
|
|
195
213
|
}
|
|
196
214
|
if (normalizedStatus === "error") {
|
|
@@ -271,6 +289,7 @@ export class Runner {
|
|
|
271
289
|
index: state.index,
|
|
272
290
|
status: state.status,
|
|
273
291
|
runId: state.runId,
|
|
292
|
+
participantIds: state.participantIds,
|
|
274
293
|
startTime: state.startTime,
|
|
275
294
|
endTime: this.completedRuns.has(state.index) ? Date.now() : undefined,
|
|
276
295
|
error: state.error,
|
|
@@ -39,7 +39,7 @@ const RenderRunLine = ({ state, total, isSelected, padding, }) => {
|
|
|
39
39
|
const summaryText = summary.length > maxSummaryLength ? summary.slice(0, maxSummaryLength - 1) + "…" : summary;
|
|
40
40
|
return (_jsxs(Text, { inverse: isSelected, children: [_jsx(Text, { color: color, children: icon }), " ", indexLabel, " ", _jsx(Text, { color: color, children: statusLabel }), elapsedLabel ? (_jsxs(_Fragment, { children: [" ", _jsx(Text, { dimColor: true, children: elapsedLabel })] })) : null, " ", _jsx(Text, { dimColor: true, children: summaryText })] }));
|
|
41
41
|
};
|
|
42
|
-
const
|
|
42
|
+
const ExperimentUI = ({ experimentName, states, statusCounts, onQuit, onOpenRun }) => {
|
|
43
43
|
const [selectedIndex, setSelectedIndex] = useState(0);
|
|
44
44
|
const [scrollOffset, setScrollOffset] = useState(0);
|
|
45
45
|
const [tick, setTick] = useState(0); // force elapsed-time updates
|
|
@@ -89,7 +89,7 @@ const EvaluationUI = ({ evaluationName, states, statusCounts, onQuit, onOpenRun
|
|
|
89
89
|
const rangeEnd = Math.min(scrollOffset + rowsAvailable, states.length);
|
|
90
90
|
const horizontalRule = "─".repeat(Math.min(process.stdout.columns || 80, 80));
|
|
91
91
|
const padding = states.length.toString().length;
|
|
92
|
-
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `
|
|
92
|
+
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `Experiment: ${experimentName}` }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsx(Text, { dimColor: true, children: "q:quit \u2191/\u2193/j/k:select o:open in browser" }), _jsx(Text, { children: " " }), _jsxs(Box, { flexDirection: "column", children: [visibleRuns.map((state, index) => (_jsx(RenderRunLine, { state: state, total: states.length, isSelected: scrollOffset + index === selectedIndex, padding: padding }, state.index))), visibleRuns.length < rowsAvailable
|
|
93
93
|
? Array.from({ length: rowsAvailable - visibleRuns.length }).map((_, index) => (_jsx(Text, { children: " " }, `empty-${index}`)))
|
|
94
94
|
: null] }), showScroll ? _jsx(Text, { dimColor: true, children: `[${rangeStart}-${rangeEnd} of ${states.length}]` }) : _jsx(Text, { children: " " }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsxs(Text, { children: [_jsx(Text, { children: "Completed: " }), _jsx(Text, { color: "green", children: statusCounts.completed }), _jsx(Text, { children: ` | Active: ` }), _jsx(Text, { color: "yellow", children: statusCounts.active }), _jsx(Text, { children: ` | Queued: ` }), _jsx(Text, { dimColor: true, children: statusCounts.queued }), statusCounts.errors > 0 ? (_jsxs(_Fragment, { children: [_jsx(Text, { children: ` | Errors: ` }), _jsx(Text, { color: "red", children: statusCounts.errors })] })) : null] })] }));
|
|
95
95
|
};
|
|
@@ -125,6 +125,6 @@ export class TUI {
|
|
|
125
125
|
this.app.rerender(this.renderApp());
|
|
126
126
|
}
|
|
127
127
|
renderApp() {
|
|
128
|
-
return (_jsx(
|
|
128
|
+
return (_jsx(ExperimentUI, { experimentName: this.options.experimentName, states: this.states, statusCounts: this.statusCounts, onQuit: this.options.onQuit, onOpenRun: this.options.onOpenRun }));
|
|
129
129
|
}
|
|
130
130
|
}
|
|
@@ -10,6 +10,7 @@ export declare const RunConfigSchema: z.ZodObject<{
|
|
|
10
10
|
agent: z.ZodString;
|
|
11
11
|
role: z.ZodOptional<z.ZodString>;
|
|
12
12
|
}, z.core.$strip>>;
|
|
13
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
13
14
|
}, z.core.$strip>;
|
|
14
15
|
export type RunConfig = z.infer<typeof RunConfigSchema>;
|
|
15
16
|
export declare const ManifestSchema: z.ZodObject<{
|
|
@@ -19,6 +20,7 @@ export declare const ManifestSchema: z.ZodObject<{
|
|
|
19
20
|
agent: z.ZodString;
|
|
20
21
|
role: z.ZodOptional<z.ZodString>;
|
|
21
22
|
}, z.core.$strip>>;
|
|
23
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
22
24
|
}, z.core.$strip>>;
|
|
23
25
|
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
24
26
|
}, z.core.$strip>;
|
|
@@ -40,6 +42,7 @@ export declare const ProgressEntrySchema: z.ZodObject<{
|
|
|
40
42
|
finished: "finished";
|
|
41
43
|
}>;
|
|
42
44
|
runId: z.ZodOptional<z.ZodString>;
|
|
45
|
+
participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
43
46
|
startTime: z.ZodOptional<z.ZodNumber>;
|
|
44
47
|
endTime: z.ZodOptional<z.ZodNumber>;
|
|
45
48
|
error: z.ZodOptional<z.ZodString>;
|
|
@@ -62,6 +65,7 @@ export declare const ProgressSchema: z.ZodObject<{
|
|
|
62
65
|
finished: "finished";
|
|
63
66
|
}>;
|
|
64
67
|
runId: z.ZodOptional<z.ZodString>;
|
|
68
|
+
participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
65
69
|
startTime: z.ZodOptional<z.ZodNumber>;
|
|
66
70
|
endTime: z.ZodOptional<z.ZodNumber>;
|
|
67
71
|
error: z.ZodOptional<z.ZodString>;
|
|
@@ -92,6 +96,7 @@ export interface RunState {
|
|
|
92
96
|
config: RunConfig;
|
|
93
97
|
status: RunStatus;
|
|
94
98
|
runId?: string;
|
|
99
|
+
participantIds?: string[];
|
|
95
100
|
startTime?: number;
|
|
96
101
|
error?: string;
|
|
97
102
|
}
|
|
@@ -113,14 +118,15 @@ export declare const RunLogsResponseSchema: z.ZodObject<{
|
|
|
113
118
|
logs: z.ZodArray<z.ZodUnknown>;
|
|
114
119
|
}, z.core.$strip>;
|
|
115
120
|
export type RunLogsResponse = z.infer<typeof RunLogsResponseSchema>;
|
|
116
|
-
export declare const
|
|
117
|
-
|
|
121
|
+
export declare const ExperimentMetadataSchema: z.ZodObject<{
|
|
122
|
+
currentVersion: z.ZodNumber;
|
|
118
123
|
}, z.core.$strip>;
|
|
119
|
-
export type
|
|
120
|
-
export interface
|
|
124
|
+
export type ExperimentMetadata = z.infer<typeof ExperimentMetadataSchema>;
|
|
125
|
+
export interface ExperimentOptions {
|
|
121
126
|
new: boolean;
|
|
122
127
|
maxConcurrent: number;
|
|
123
128
|
openMetabase?: boolean;
|
|
129
|
+
downloadRecordings?: boolean;
|
|
124
130
|
}
|
|
125
131
|
export declare const STATUS_ICONS: Record<RunStatus, {
|
|
126
132
|
icon: string;
|
|
@@ -8,6 +8,7 @@ export const ParticipantSchema = z.object({
|
|
|
8
8
|
export const RunConfigSchema = z.object({
|
|
9
9
|
challenge_slug: z.string(),
|
|
10
10
|
participants: z.array(ParticipantSchema),
|
|
11
|
+
tags: z.array(z.string()).optional(),
|
|
11
12
|
});
|
|
12
13
|
// Manifest returned by config.ts main()
|
|
13
14
|
export const ManifestSchema = z.object({
|
|
@@ -31,6 +32,7 @@ export const ProgressEntrySchema = z.object({
|
|
|
31
32
|
"error",
|
|
32
33
|
]),
|
|
33
34
|
runId: z.string().optional(),
|
|
35
|
+
participantIds: z.array(z.string()).optional(),
|
|
34
36
|
startTime: z.number().optional(),
|
|
35
37
|
endTime: z.number().optional(),
|
|
36
38
|
error: z.string().optional(),
|
|
@@ -66,9 +68,9 @@ export const RunStatusResponseSchema = z.object({
|
|
|
66
68
|
export const RunLogsResponseSchema = z.object({
|
|
67
69
|
logs: z.array(z.unknown()),
|
|
68
70
|
});
|
|
69
|
-
//
|
|
70
|
-
export const
|
|
71
|
-
|
|
71
|
+
// Experiment metadata stored in .experiment.json
|
|
72
|
+
export const ExperimentMetadataSchema = z.object({
|
|
73
|
+
currentVersion: z.number(),
|
|
72
74
|
});
|
|
73
75
|
// Icons and colors for TUI
|
|
74
76
|
export const STATUS_ICONS = {
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* All available config flags that can be used by commands.
|
|
3
|
+
* Each flag has an `env` property that allows it to be set via environment variable.
|
|
4
|
+
*/
|
|
5
|
+
export declare const ALL_CONFIG_FLAGS: {
|
|
6
|
+
readonly "api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
7
|
+
readonly "web-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
8
|
+
readonly "studio-api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
9
|
+
readonly "studio-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
10
|
+
readonly "api-key": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
11
|
+
readonly "challenges-path": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
12
|
+
};
|
|
13
|
+
/**
|
|
14
|
+
* Type representing all config flag keys.
|
|
15
|
+
*/
|
|
16
|
+
export type ConfigFlagKey = keyof typeof ALL_CONFIG_FLAGS;
|
|
17
|
+
/**
|
|
18
|
+
* Type representing the parsed values of all config flags.
|
|
19
|
+
*/
|
|
20
|
+
export type AllConfigFlagValues = {
|
|
21
|
+
[K in ConfigFlagKey]: string;
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* Returns a subset of config flags for use in a command's static flags definition.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* // In a command file:
|
|
28
|
+
* static override flags = {
|
|
29
|
+
* ...getConfigFlags("api-key", "web-api-url", "studio-api-url"),
|
|
30
|
+
* // other command-specific flags
|
|
31
|
+
* };
|
|
32
|
+
*
|
|
33
|
+
* @param keys - The config flag keys to include.
|
|
34
|
+
* @returns An object containing only the specified flags
|
|
35
|
+
*/
|
|
36
|
+
export declare function getConfigFlags<K extends ConfigFlagKey>(...keys: K[]): Pick<typeof ALL_CONFIG_FLAGS, K>;
|
|
37
|
+
/**
|
|
38
|
+
* Helper type to extract the parsed flag values for a subset of config flags.
|
|
39
|
+
* Use this to type the flags object after parsing.
|
|
40
|
+
*
|
|
41
|
+
* @example
|
|
42
|
+
* type MyFlags = ConfigFlagValues<"api-key" | "web-api-url">;
|
|
43
|
+
* // Results in: { "api-key": string; "web-api-url": string }
|
|
44
|
+
*/
|
|
45
|
+
export type ConfigFlagValues<K extends ConfigFlagKey> = {
|
|
46
|
+
[P in K]: AllConfigFlagValues[P];
|
|
47
|
+
};
|