@kradle/cli 0.0.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +93 -65
  2. package/dist/commands/agent/list.d.ts +4 -0
  3. package/dist/commands/agent/list.js +6 -4
  4. package/dist/commands/challenge/build.d.ts +9 -1
  5. package/dist/commands/challenge/build.js +40 -12
  6. package/dist/commands/challenge/create.d.ts +5 -1
  7. package/dist/commands/challenge/create.js +17 -18
  8. package/dist/commands/challenge/delete.d.ts +4 -1
  9. package/dist/commands/challenge/delete.js +5 -5
  10. package/dist/commands/challenge/list.d.ts +5 -0
  11. package/dist/commands/challenge/list.js +11 -10
  12. package/dist/commands/challenge/run.d.ts +8 -1
  13. package/dist/commands/challenge/run.js +13 -8
  14. package/dist/commands/challenge/watch.d.ts +4 -1
  15. package/dist/commands/challenge/watch.js +8 -8
  16. package/dist/commands/{evaluation → experiment}/create.d.ts +4 -0
  17. package/dist/commands/{evaluation → experiment}/create.js +22 -21
  18. package/dist/commands/{evaluation → experiment}/list.js +17 -19
  19. package/dist/commands/experiment/recordings.d.ts +19 -0
  20. package/dist/commands/experiment/recordings.js +416 -0
  21. package/dist/commands/experiment/run.d.ts +17 -0
  22. package/dist/commands/experiment/run.js +67 -0
  23. package/dist/commands/init.js +2 -2
  24. package/dist/lib/api-client.d.ts +51 -10
  25. package/dist/lib/api-client.js +108 -39
  26. package/dist/lib/arguments.d.ts +3 -2
  27. package/dist/lib/arguments.js +5 -3
  28. package/dist/lib/challenge.d.ts +13 -18
  29. package/dist/lib/challenge.js +58 -62
  30. package/dist/lib/experiment/experimenter.d.ts +92 -0
  31. package/dist/lib/experiment/experimenter.js +368 -0
  32. package/dist/lib/{evaluation → experiment}/index.d.ts +1 -1
  33. package/dist/lib/{evaluation → experiment}/index.js +1 -1
  34. package/dist/lib/{evaluation → experiment}/runner.d.ts +2 -0
  35. package/dist/lib/{evaluation → experiment}/runner.js +21 -2
  36. package/dist/lib/{evaluation → experiment}/tui.d.ts +1 -1
  37. package/dist/lib/{evaluation → experiment}/tui.js +3 -3
  38. package/dist/lib/{evaluation → experiment}/types.d.ts +10 -4
  39. package/dist/lib/{evaluation → experiment}/types.js +5 -3
  40. package/dist/lib/flags.d.ts +47 -0
  41. package/dist/lib/flags.js +63 -0
  42. package/dist/lib/schemas.d.ts +63 -2
  43. package/dist/lib/schemas.js +27 -1
  44. package/dist/lib/utils.d.ts +9 -10
  45. package/dist/lib/utils.js +12 -12
  46. package/oclif.manifest.json +423 -64
  47. package/package.json +11 -8
  48. package/static/challenge.ts +12 -13
  49. package/static/experiment_template.ts +114 -0
  50. package/static/project_template/dev.env +5 -5
  51. package/static/project_template/prod.env +4 -4
  52. package/static/project_template/tsconfig.json +1 -1
  53. package/dist/commands/challenge/multi-upload.d.ts +0 -6
  54. package/dist/commands/challenge/multi-upload.js +0 -80
  55. package/dist/commands/evaluation/run.d.ts +0 -13
  56. package/dist/commands/evaluation/run.js +0 -61
  57. package/dist/lib/config.d.ts +0 -12
  58. package/dist/lib/config.js +0 -49
  59. package/dist/lib/evaluation/evaluator.d.ts +0 -88
  60. package/dist/lib/evaluation/evaluator.js +0 -268
  61. package/static/evaluation_template.ts +0 -69
  62. /package/dist/commands/{evaluation → experiment}/list.d.ts +0 -0
@@ -0,0 +1,416 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { Args, Command, Flags } from "@oclif/core";
4
+ import enquirer from "enquirer";
5
+ import { Listr } from "listr2";
6
+ import pc from "picocolors";
7
+ import { ApiClient } from "../../lib/api-client.js";
8
+ import { Experimenter } from "../../lib/experiment/experimenter.js";
9
+ import { getConfigFlags } from "../../lib/flags.js";
10
+ // Check if recordings exist locally for a run
11
+ async function checkRecordingsExist(experimentDir, version, runId) {
12
+ const recordingsPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId);
13
+ try {
14
+ await fs.access(recordingsPath);
15
+ const files = await fs.readdir(recordingsPath, { recursive: true });
16
+ return files.some((f) => String(f).endsWith(".mcpr"));
17
+ }
18
+ catch {
19
+ return false;
20
+ }
21
+ }
22
+ // Format bytes for display
23
+ function formatBytes(bytes) {
24
+ if (bytes === 0)
25
+ return "0 Bytes";
26
+ const k = 1024;
27
+ const sizes = ["Bytes", "KB", "MB", "GB"];
28
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
29
+ return `${Math.round((bytes / k ** i) * 100) / 100} ${sizes[i]}`;
30
+ }
31
+ // Sanitize timestamp string for use in filenames
32
+ function sanitizeTimestamp(timestamp) {
33
+ // Replace colons, spaces, and other problematic characters
34
+ return timestamp
35
+ .replace(/:/g, "-")
36
+ .replace(/\s+/g, "_")
37
+ .replace(/[<>:"|?*]/g, "_");
38
+ }
39
+ // Download file with retry logic
40
+ async function downloadFile(url, outputPath, maxRetries = 3) {
41
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
42
+ try {
43
+ const response = await fetch(url);
44
+ if (!response.ok) {
45
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
46
+ }
47
+ const buffer = await response.arrayBuffer();
48
+ await fs.mkdir(path.dirname(outputPath), { recursive: true });
49
+ await fs.writeFile(outputPath, Buffer.from(buffer));
50
+ return;
51
+ }
52
+ catch (error) {
53
+ if (attempt === maxRetries)
54
+ throw error;
55
+ await new Promise((resolve) => setTimeout(resolve, 1000 * attempt)); // linear backoff: 1s, 2s, 3s
56
+ }
57
+ }
58
+ }
59
+ // Get all versions for an experiment
60
+ async function getAllVersions(experimentDir) {
61
+ const versionsDir = path.join(experimentDir, "versions");
62
+ try {
63
+ const entries = await fs.readdir(versionsDir, { withFileTypes: true });
64
+ return entries
65
+ .filter((e) => e.isDirectory())
66
+ .map((e) => parseInt(e.name, 10))
67
+ .filter((n) => !Number.isNaN(n))
68
+ .sort((a, b) => a - b);
69
+ }
70
+ catch {
71
+ return [];
72
+ }
73
+ }
74
+ export default class Recordings extends Command {
75
+ static description = "Download recordings from an experiment run";
76
+ static examples = [
77
+ "<%= config.bin %> <%= command.id %> my-experiment",
78
+ "<%= config.bin %> <%= command.id %> my-experiment <run-id>",
79
+ "<%= config.bin %> <%= command.id %> my-experiment --all",
80
+ "<%= config.bin %> <%= command.id %> my-experiment <run-id> --all",
81
+ "<%= config.bin %> <%= command.id %> my-experiment --version 2",
82
+ "<%= config.bin %> <%= command.id %> my-experiment --version 1 --all",
83
+ ];
84
+ static args = {
85
+ experimentName: Args.string({
86
+ description: "Experiment name",
87
+ required: true,
88
+ }),
89
+ runId: Args.string({
90
+ description: "Specific run ID to download recordings from (optional)",
91
+ required: false,
92
+ }),
93
+ };
94
+ static flags = {
95
+ all: Flags.boolean({
96
+ description: "Download all runs and participants (if no run specified), or all participants (if run specified)",
97
+ default: false,
98
+ }),
99
+ version: Flags.integer({
100
+ description: "Specific experiment version to download recordings from (e.g., 0, 1, 2)",
101
+ required: false,
102
+ }),
103
+ ...getConfigFlags("api-key", "api-url"),
104
+ };
105
+ async run() {
106
+ const { args, flags } = await this.parse(Recordings);
107
+ const api = new ApiClient(flags["api-url"], flags["api-key"]);
108
+ const { experimentName, runId } = args;
109
+ await this.downloadForExperiment(experimentName, runId, api, flags.all, flags.version);
110
+ }
111
+ async downloadForExperiment(experimentName, runId, api, all, version) {
112
+ const experimenter = new Experimenter(experimentName, "", api);
113
+ // Check if experiment exists
114
+ if (!(await experimenter.exists())) {
115
+ this.error(pc.red(`Experiment '${experimentName}' does not exist. Run 'kradle experiment list' to see available experiments.`));
116
+ }
117
+ const experimentDir = experimenter.experimentDir;
118
+ // Get all versions
119
+ const allVersions = await getAllVersions(experimentDir);
120
+ if (allVersions.length === 0) {
121
+ this.error(pc.red("No experiment versions found. Run the experiment first."));
122
+ }
123
+ // Default to latest version if not specified
124
+ let targetVersion;
125
+ if (version !== undefined) {
126
+ if (!allVersions.includes(version)) {
127
+ this.error(pc.red(`Version ${version} not found in experiment '${experimentName}'. ` +
128
+ `Available versions: ${allVersions.join(", ")}`));
129
+ }
130
+ targetVersion = version;
131
+ this.log(pc.blue(`>> Filtering to version ${version}`));
132
+ }
133
+ else {
134
+ // Default to latest version
135
+ targetVersion = Math.max(...allVersions);
136
+ }
137
+ const allRunInfos = [];
138
+ const completedStatuses = new Set(["completed", "finished", "game_over"]);
139
+ const progressPath = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "progress.json");
140
+ try {
141
+ const progressData = await fs.readFile(progressPath, "utf-8");
142
+ const progress = JSON.parse(progressData);
143
+ for (const entry of progress.entries) {
144
+ // Only include runs that are completed (exclude in-progress, queued, or error runs)
145
+ if (entry.runId && completedStatuses.has(entry.status)) {
146
+ const hasRecordings = await checkRecordingsExist(experimentDir, targetVersion, entry.runId);
147
+ allRunInfos.push({
148
+ version: targetVersion,
149
+ runId: entry.runId,
150
+ index: entry.index,
151
+ status: entry.status,
152
+ hasRecordings,
153
+ participantIds: entry.participantIds,
154
+ endTime: entry.endTime,
155
+ });
156
+ }
157
+ }
158
+ }
159
+ catch { }
160
+ if (allRunInfos.length === 0) {
161
+ this.error(pc.yellow("No completed runs found. Wait for runs to finish or run the experiment first."));
162
+ }
163
+ let selectedRuns;
164
+ if (all && !runId) {
165
+ // Download all runs and all participants (--all without specific run)
166
+ selectedRuns = allRunInfos;
167
+ this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
168
+ }
169
+ else if (runId) {
170
+ // Find specific run by ID
171
+ const matchingRun = allRunInfos.find((r) => r.runId === runId);
172
+ if (!matchingRun) {
173
+ this.error(pc.red(`Run ID '${runId}' not found in experiment '${experimentName}'. ` +
174
+ `Run 'kradle experiment recordings ${experimentName}' to see available runs.`));
175
+ }
176
+ selectedRuns = [matchingRun];
177
+ this.log(pc.blue(`>> Downloading recordings for run: ${runId}`));
178
+ }
179
+ else {
180
+ // Interactive run selection
181
+ const choices = [
182
+ // Only show "All runs" option if there are multiple runs
183
+ ...(allRunInfos.length > 1
184
+ ? [
185
+ {
186
+ name: "all",
187
+ message: `All runs (${allRunInfos.length} total)`,
188
+ hint: "Download all",
189
+ },
190
+ ]
191
+ : []),
192
+ ...allRunInfos.map((run) => {
193
+ const indicator = run.hasRecordings ? pc.blue("·") : "☐";
194
+ const participants = run.participantIds?.join(", ") || "No participants";
195
+ return {
196
+ name: run.runId,
197
+ message: `${indicator} ${participants} - ${run.runId}`,
198
+ hint: run.status,
199
+ };
200
+ }),
201
+ ];
202
+ const { selectedRunId } = await enquirer.prompt({
203
+ type: "select",
204
+ name: "selectedRunId",
205
+ message: "Select a run to download recordings from",
206
+ choices,
207
+ });
208
+ if (selectedRunId === "all") {
209
+ selectedRuns = allRunInfos;
210
+ this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
211
+ }
212
+ else {
213
+ const selectedRun = allRunInfos.find((r) => r.runId === selectedRunId);
214
+ if (!selectedRun) {
215
+ this.error(pc.red("Selected run not found."));
216
+ }
217
+ selectedRuns = [selectedRun];
218
+ }
219
+ }
220
+ // Build download targets
221
+ const downloadTargets = [];
222
+ // Prompt for participant selection only if: single run selected AND not --all flag
223
+ if (selectedRuns.length === 1 && !all) {
224
+ const firstRunInfo = allRunInfos.find((r) => r.runId === selectedRuns[0].runId);
225
+ if (!firstRunInfo?.participantIds || firstRunInfo.participantIds.length === 0) {
226
+ this.error(pc.red("Participant IDs not available for this run. This may be an old run created before recording support was added."));
227
+ }
228
+ // Interactive participant selection for single run
229
+ const participantChoices = [
230
+ { name: "all", message: "All participants", value: "all" },
231
+ ...firstRunInfo.participantIds.map((id, idx) => ({
232
+ name: id,
233
+ message: `Participant ${idx}: ${id}`,
234
+ value: id,
235
+ })),
236
+ ];
237
+ const { participantSelection } = await enquirer.prompt({
238
+ type: "select",
239
+ name: "participantSelection",
240
+ message: "Select participant(s)",
241
+ choices: participantChoices,
242
+ });
243
+ const run = selectedRuns[0];
244
+ if (participantSelection === "all") {
245
+ // Download all participants
246
+ for (const participantId of firstRunInfo.participantIds) {
247
+ downloadTargets.push({
248
+ version: run.version,
249
+ runId: run.runId,
250
+ participantId,
251
+ experimentDir,
252
+ });
253
+ }
254
+ }
255
+ else {
256
+ // Download single participant
257
+ downloadTargets.push({
258
+ version: run.version,
259
+ runId: run.runId,
260
+ participantId: participantSelection,
261
+ experimentDir,
262
+ });
263
+ }
264
+ }
265
+ else {
266
+ // Multiple runs OR --all flag: automatically download all participants
267
+ for (const run of selectedRuns) {
268
+ const runInfo = allRunInfos.find((r) => r.runId === run.runId);
269
+ if (!runInfo?.participantIds || runInfo.participantIds.length === 0) {
270
+ this.log(pc.yellow(`Warning: Skipping run ${run.runId} - no participant IDs available`));
271
+ continue;
272
+ }
273
+ // Download all participants for this run
274
+ for (const participantId of runInfo.participantIds) {
275
+ downloadTargets.push({
276
+ version: run.version,
277
+ runId: run.runId,
278
+ participantId,
279
+ experimentDir,
280
+ });
281
+ }
282
+ }
283
+ }
284
+ // Ensure we wait at least 90 seconds after run completion before downloading
285
+ // to avoid missing recordings that take time to upload
286
+ const MIN_WAIT_AFTER_COMPLETION_MS = 90000; // 90 seconds
287
+ const now = Date.now();
288
+ // Group targets by run and check wait times
289
+ const runEndTimes = new Map();
290
+ for (const target of downloadTargets) {
291
+ const runInfo = allRunInfos.find((r) => r.runId === target.runId);
292
+ if (runInfo?.endTime) {
293
+ runEndTimes.set(target.runId, runInfo.endTime);
294
+ }
295
+ }
296
+ // Separate into ready and deferred targets
297
+ const readyTargets = [];
298
+ const deferredTargets = [];
299
+ for (const target of downloadTargets) {
300
+ const endTime = runEndTimes.get(target.runId);
301
+ if (endTime) {
302
+ const timeSinceCompletion = now - endTime;
303
+ if (timeSinceCompletion < MIN_WAIT_AFTER_COMPLETION_MS) {
304
+ deferredTargets.push(target);
305
+ }
306
+ else {
307
+ readyTargets.push(target);
308
+ }
309
+ }
310
+ else {
311
+ // No end time available, process immediately
312
+ readyTargets.push(target);
313
+ }
314
+ }
315
+ // Process ready targets first
316
+ if (readyTargets.length > 0) {
317
+ this.log(pc.blue(`>> Fetching and downloading recordings for ${readyTargets.length} ready target(s)...`));
318
+ await this.fetchAndDownloadTargets(api, readyTargets);
319
+ }
320
+ // Wait for deferred targets, then process them
321
+ if (deferredTargets.length > 0) {
322
+ const uniqueDeferredRuns = new Set(deferredTargets.map((t) => t.runId));
323
+ const maxWaitNeeded = Math.max(...Array.from(uniqueDeferredRuns)
324
+ .map((runId) => {
325
+ const endTime = runEndTimes.get(runId);
326
+ if (!endTime)
327
+ return 0;
328
+ return MIN_WAIT_AFTER_COMPLETION_MS - (now - endTime);
329
+ })
330
+ .filter((wait) => wait > 0));
331
+ this.log(pc.yellow(`>> Waiting ${Math.ceil(maxWaitNeeded / 1000)}s for ${uniqueDeferredRuns.size} recent run(s) to ensure all recordings are uploaded...`));
332
+ // Wait the necessary time
333
+ await new Promise((resolve) => setTimeout(resolve, maxWaitNeeded));
334
+ this.log(pc.blue(`>> Fetching and downloading recordings for ${deferredTargets.length} deferred target(s)...`));
335
+ await this.fetchAndDownloadTargets(api, deferredTargets);
336
+ }
337
+ if (readyTargets.length === 0 && deferredTargets.length === 0) {
338
+ this.log(pc.yellow("No recordings to download."));
339
+ return;
340
+ }
341
+ const totalTargets = readyTargets.length + deferredTargets.length;
342
+ const uniqueRuns = new Set([...readyTargets, ...deferredTargets].map((t) => t.runId));
343
+ // All targets are from the same version
344
+ const recordingsDir = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "recordings");
345
+ this.log(pc.green(`\n✓ Downloaded recordings for ${totalTargets} participant(s) across ${uniqueRuns.size} run(s) to ${recordingsDir}`));
346
+ }
347
+ async fetchAndDownloadTargets(api, targets) {
348
+ // Fetch all recordings to show summary
349
+ let totalCount = 0;
350
+ let totalSize = 0;
351
+ for (const target of targets) {
352
+ try {
353
+ const recordings = await api.getRunRecordings(target.runId, target.participantId);
354
+ totalCount += recordings.length;
355
+ totalSize += recordings.reduce((sum, r) => sum + r.sizeBytes, 0);
356
+ }
357
+ catch {
358
+ // Skip targets with no recordings
359
+ }
360
+ }
361
+ if (totalCount === 0) {
362
+ this.log(pc.yellow(" No recordings found for these targets."));
363
+ return;
364
+ }
365
+ this.log(pc.blue(` Found ${totalCount} recordings (Total: ${formatBytes(totalSize)})`));
366
+ // Download all recordings
367
+ await this.downloadRecordings(api, targets);
368
+ }
369
+ async downloadRecordings(api, targets) {
370
+ const allTasks = [];
371
+ for (const target of targets) {
372
+ const { version, runId, participantId, experimentDir } = target;
373
+ // Fetch recordings for this target
374
+ let recordings;
375
+ try {
376
+ recordings = await api.getRunRecordings(runId, participantId);
377
+ }
378
+ catch {
379
+ // Skip targets with no recordings
380
+ continue;
381
+ }
382
+ if (recordings.length === 0)
383
+ continue;
384
+ // Create tasks for each recording
385
+ for (const recording of recordings) {
386
+ const sanitizedFilename = `${sanitizeTimestamp(recording.timestamp)}.mcpr`;
387
+ allTasks.push({
388
+ title: `${participantId} - ${runId} - ${sanitizedFilename}`,
389
+ task: async (_, task) => {
390
+ const outputPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, sanitizedFilename);
391
+ // Skip if already exists
392
+ try {
393
+ await fs.access(outputPath);
394
+ task.skip("Already downloaded");
395
+ return;
396
+ }
397
+ catch { }
398
+ // Get download URL
399
+ const { downloadUrl } = await api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
400
+ // Download file
401
+ await downloadFile(downloadUrl, outputPath);
402
+ task.title = `${participantId} - ${runId} - ${sanitizedFilename} (${formatBytes(recording.sizeBytes)})`;
403
+ },
404
+ });
405
+ }
406
+ }
407
+ if (allTasks.length === 0) {
408
+ return;
409
+ }
410
+ const tasks = new Listr(allTasks, {
411
+ concurrent: 3,
412
+ exitOnError: false,
413
+ });
414
+ await tasks.run();
415
+ }
416
+ }
@@ -0,0 +1,17 @@
1
+ import { Command } from "@oclif/core";
2
+ export default class Run extends Command {
3
+ static description: string;
4
+ static examples: string[];
5
+ static args: {
6
+ name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
7
+ };
8
+ static flags: {
9
+ "api-key": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
10
+ "api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
11
+ "web-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
12
+ "new-version": import("@oclif/core/interfaces").BooleanFlag<boolean>;
13
+ "max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
14
+ "download-recordings": import("@oclif/core/interfaces").BooleanFlag<boolean>;
15
+ };
16
+ run(): Promise<void>;
17
+ }
@@ -0,0 +1,67 @@
1
+ import { Args, Command, Flags } from "@oclif/core";
2
+ import pc from "picocolors";
3
+ import { ApiClient } from "../../lib/api-client.js";
4
+ import { Experimenter } from "../../lib/experiment/experimenter.js";
5
+ import { getConfigFlags } from "../../lib/flags.js";
6
+ const DEFAULT_MAX_CONCURRENT = 5;
7
+ export default class Run extends Command {
8
+ static description = "Run an experiment. If the experiment had an ongoing version, it will resume from the last state.";
9
+ static examples = [
10
+ "<%= config.bin %> <%= command.id %> my-experiment",
11
+ "<%= config.bin %> <%= command.id %> my-experiment --new-version",
12
+ "<%= config.bin %> <%= command.id %> my-experiment --max-concurrent 10",
13
+ ];
14
+ static args = {
15
+ name: Args.string({
16
+ description: "Name of the experiment to run",
17
+ required: true,
18
+ }),
19
+ };
20
+ static flags = {
21
+ "new-version": Flags.boolean({
22
+ char: "n",
23
+ description: "Start a new version of the experiment",
24
+ default: false,
25
+ }),
26
+ "max-concurrent": Flags.integer({
27
+ char: "m",
28
+ description: "Maximum concurrent runs",
29
+ default: DEFAULT_MAX_CONCURRENT,
30
+ }),
31
+ "download-recordings": Flags.boolean({
32
+ char: "d",
33
+ description: "Automatically download recordings after each run finishes",
34
+ default: false,
35
+ }),
36
+ ...getConfigFlags("api-key", "api-url", "web-url"),
37
+ };
38
+ async run() {
39
+ const { args, flags } = await this.parse(Run);
40
+ const api = new ApiClient(flags["api-url"], flags["api-key"]);
41
+ const experimenter = new Experimenter(args.name, flags["web-url"], api);
42
+ // Check if experiment exists
43
+ if (!(await experimenter.exists())) {
44
+ this.error(pc.red(`Experiment '${args.name}' does not exist. Run 'kradle experiment create ${args.name}' first.`));
45
+ }
46
+ // Check if config.ts exists
47
+ if (!(await experimenter.configExists())) {
48
+ this.error(pc.red(`Config file not found at ${experimenter.configPath}`));
49
+ }
50
+ this.log(pc.blue(`>> Starting experiment: ${args.name}`));
51
+ if (flags["new-version"]) {
52
+ this.log(pc.yellow(" Starting a new version of the experiment."));
53
+ }
54
+ try {
55
+ await experimenter.run({
56
+ new: flags["new-version"],
57
+ maxConcurrent: flags["max-concurrent"],
58
+ openMetabase: true,
59
+ downloadRecordings: flags["download-recordings"],
60
+ });
61
+ this.log(pc.green("\n✓ Experiment complete!"));
62
+ }
63
+ catch (error) {
64
+ this.error(pc.red(`Experiment failed: ${error instanceof Error ? error.message : String(error)}`));
65
+ }
66
+ }
67
+ }
@@ -34,10 +34,10 @@ export default class Init extends Command {
34
34
  const nonHiddenFiles = files.filter((f) => !f.startsWith("."));
35
35
  const useCurrentDir = nonHiddenFiles.length === 0;
36
36
  if (useCurrentDir) {
37
- this.log(pc.yellow("Current directory is empty, it will be used to store challenges and evaluations."));
37
+ this.log(pc.yellow("Current directory is empty, it will be used to store challenges and experiments."));
38
38
  }
39
39
  else {
40
- this.log(pc.yellow("Current directory is not empty, a subdirectory will be created to store challenges and evaluations."));
40
+ this.log(pc.yellow("Current directory is not empty, a subdirectory will be created to store challenges and experiments."));
41
41
  }
42
42
  let projectName;
43
43
  if (flags.name) {
@@ -1,10 +1,10 @@
1
1
  import type z from "zod";
2
- import type { Challenge } from "./challenge.js";
3
- import type { Config } from "./config.js";
4
- import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema, type RunStatusSchemaType } from "./schemas.js";
2
+ import { type AgentSchemaType, type ChallengeConfigSchemaType, type ChallengeSchemaType, HumanSchema, type RecordingDownloadUrlResponse, type RecordingMetadata, type RunStatusSchemaType } from "./schemas.js";
5
3
  export declare class ApiClient {
6
- private config;
7
- constructor(config: Config);
4
+ private apiUrl;
5
+ private kradleApiKey;
6
+ private isStudio;
7
+ constructor(apiUrl: string, kradleApiKey: string, isStudio?: boolean);
8
8
  private request;
9
9
  private get;
10
10
  private post;
@@ -40,17 +40,43 @@ export declare class ApiClient {
40
40
  createChallenge(slug: string): Promise<unknown>;
41
41
  /**
42
42
  * Update a challenge definition in the cloud.
43
- * @param challenge - The challenge to update.
44
- * @param challengeConfig - The challenge config to upload. If not provided, the config will be loaded from the challenge.
43
+ * @param challengeSlug - The slug of the challenge.
44
+ * @param challengeConfig - The challenge config to upload.
45
+ * @param visibility - The visibility to set.
45
46
  * @returns The updated challenge.
46
47
  */
47
- updateChallenge(challenge: Challenge, challengeConfig?: ChallengeSchemaType): Promise<void>;
48
- getChallengeUploadUrl(challenge: Challenge): Promise<string>;
48
+ updateChallenge(challengeSlug: string, challengeConfig: ChallengeConfigSchemaType, visibility: "private" | "public"): Promise<void>;
49
+ /**
50
+ * Update the visibility of a challenge.
51
+ * @param challengeSlug - The slug of the challenge.
52
+ * @param visibility - The visibility to set.
53
+ * @returns The updated challenge.
54
+ */
55
+ updateChallengeVisibility(challengeSlug: string, visibility: "private" | "public"): Promise<void>;
56
+ /**
57
+ * Upload a challenge datapack to Google Cloud Storage.
58
+ * @param slug - The slug of the challenge.
59
+ * @param tarballPath - The path to the tarball file.
60
+ * @returns The upload URL.
61
+ */
62
+ uploadChallengeDatapack(slug: string, tarballPath: string): Promise<void>;
63
+ /**
64
+ * Get the upload URL for a challenge datapack.
65
+ * @param slug - The slug of the challenge.
66
+ * @returns The upload URL.
67
+ */
68
+ getChallengeUploadUrl(slug: string): Promise<string>;
49
69
  runChallenge(runData: {
50
70
  challenge: string;
51
71
  participants: unknown[];
52
- }, studio?: boolean): Promise<{
72
+ }): Promise<{
53
73
  runIds?: string[] | undefined;
74
+ participants?: Record<string, {
75
+ agent: string;
76
+ role: string;
77
+ inputOrder: number;
78
+ }> | undefined;
79
+ id?: string | undefined;
54
80
  }>;
55
81
  deleteChallenge(challengeId: string): Promise<void>;
56
82
  /**
@@ -66,4 +92,19 @@ export declare class ApiClient {
66
92
  * @throws an error if the tag fails to be added.
67
93
  */
68
94
  tagRun(runId: string, tag: string): Promise<void>;
95
+ /**
96
+ * Get recordings for a specific participant in a run.
97
+ * @param runId - The ID of the run.
98
+ * @param participantId - The ID of the participant.
99
+ * @returns Array of recording metadata.
100
+ */
101
+ getRunRecordings(runId: string, participantId: string): Promise<RecordingMetadata[]>;
102
+ /**
103
+ * Get a signed download URL for a specific recording.
104
+ * @param runId - The ID of the run.
105
+ * @param participantId - The ID of the participant.
106
+ * @param timestamp - The timestamp of the recording.
107
+ * @returns Download URL and expiration time.
108
+ */
109
+ getRecordingDownloadUrl(runId: string, participantId: string, timestamp: string): Promise<RecordingDownloadUrlResponse>;
69
110
  }