@kradle/cli 0.0.17 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -65
- package/dist/commands/agent/list.d.ts +4 -0
- package/dist/commands/agent/list.js +6 -4
- package/dist/commands/challenge/build.d.ts +9 -1
- package/dist/commands/challenge/build.js +40 -12
- package/dist/commands/challenge/create.d.ts +5 -1
- package/dist/commands/challenge/create.js +17 -18
- package/dist/commands/challenge/delete.d.ts +4 -1
- package/dist/commands/challenge/delete.js +5 -5
- package/dist/commands/challenge/list.d.ts +5 -0
- package/dist/commands/challenge/list.js +11 -10
- package/dist/commands/challenge/run.d.ts +8 -1
- package/dist/commands/challenge/run.js +13 -8
- package/dist/commands/challenge/watch.d.ts +4 -1
- package/dist/commands/challenge/watch.js +8 -8
- package/dist/commands/{evaluation → experiment}/create.d.ts +4 -0
- package/dist/commands/{evaluation → experiment}/create.js +22 -21
- package/dist/commands/{evaluation → experiment}/list.js +17 -19
- package/dist/commands/experiment/recordings.d.ts +19 -0
- package/dist/commands/experiment/recordings.js +416 -0
- package/dist/commands/experiment/run.d.ts +17 -0
- package/dist/commands/experiment/run.js +67 -0
- package/dist/commands/init.js +2 -2
- package/dist/lib/api-client.d.ts +51 -10
- package/dist/lib/api-client.js +108 -39
- package/dist/lib/arguments.d.ts +3 -2
- package/dist/lib/arguments.js +5 -3
- package/dist/lib/challenge.d.ts +13 -18
- package/dist/lib/challenge.js +58 -62
- package/dist/lib/experiment/experimenter.d.ts +92 -0
- package/dist/lib/experiment/experimenter.js +368 -0
- package/dist/lib/{evaluation → experiment}/index.d.ts +1 -1
- package/dist/lib/{evaluation → experiment}/index.js +1 -1
- package/dist/lib/{evaluation → experiment}/runner.d.ts +2 -0
- package/dist/lib/{evaluation → experiment}/runner.js +21 -2
- package/dist/lib/{evaluation → experiment}/tui.d.ts +1 -1
- package/dist/lib/{evaluation → experiment}/tui.js +3 -3
- package/dist/lib/{evaluation → experiment}/types.d.ts +10 -4
- package/dist/lib/{evaluation → experiment}/types.js +5 -3
- package/dist/lib/flags.d.ts +47 -0
- package/dist/lib/flags.js +63 -0
- package/dist/lib/schemas.d.ts +63 -2
- package/dist/lib/schemas.js +27 -1
- package/dist/lib/utils.d.ts +9 -10
- package/dist/lib/utils.js +12 -12
- package/oclif.manifest.json +423 -64
- package/package.json +11 -8
- package/static/challenge.ts +12 -13
- package/static/experiment_template.ts +114 -0
- package/static/project_template/dev.env +5 -5
- package/static/project_template/prod.env +4 -4
- package/static/project_template/tsconfig.json +1 -1
- package/dist/commands/challenge/multi-upload.d.ts +0 -6
- package/dist/commands/challenge/multi-upload.js +0 -80
- package/dist/commands/evaluation/run.d.ts +0 -13
- package/dist/commands/evaluation/run.js +0 -61
- package/dist/lib/config.d.ts +0 -12
- package/dist/lib/config.js +0 -49
- package/dist/lib/evaluation/evaluator.d.ts +0 -88
- package/dist/lib/evaluation/evaluator.js +0 -268
- package/static/evaluation_template.ts +0 -69
- /package/dist/commands/{evaluation → experiment}/list.d.ts +0 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Args, Command, Flags } from "@oclif/core";
|
|
4
|
+
import enquirer from "enquirer";
|
|
5
|
+
import { Listr } from "listr2";
|
|
6
|
+
import pc from "picocolors";
|
|
7
|
+
import { ApiClient } from "../../lib/api-client.js";
|
|
8
|
+
import { Experimenter } from "../../lib/experiment/experimenter.js";
|
|
9
|
+
import { getConfigFlags } from "../../lib/flags.js";
|
|
10
|
+
// Check if recordings exist locally for a run
|
|
11
|
+
async function checkRecordingsExist(experimentDir, version, runId) {
|
|
12
|
+
const recordingsPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId);
|
|
13
|
+
try {
|
|
14
|
+
await fs.access(recordingsPath);
|
|
15
|
+
const files = await fs.readdir(recordingsPath, { recursive: true });
|
|
16
|
+
return files.some((f) => String(f).endsWith(".mcpr"));
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// Format bytes for display
|
|
23
|
+
function formatBytes(bytes) {
|
|
24
|
+
if (bytes === 0)
|
|
25
|
+
return "0 Bytes";
|
|
26
|
+
const k = 1024;
|
|
27
|
+
const sizes = ["Bytes", "KB", "MB", "GB"];
|
|
28
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
29
|
+
return `${Math.round((bytes / k ** i) * 100) / 100} ${sizes[i]}`;
|
|
30
|
+
}
|
|
31
|
+
// Sanitize timestamp string for use in filenames
|
|
32
|
+
function sanitizeTimestamp(timestamp) {
|
|
33
|
+
// Replace colons, spaces, and other problematic characters
|
|
34
|
+
return timestamp
|
|
35
|
+
.replace(/:/g, "-")
|
|
36
|
+
.replace(/\s+/g, "_")
|
|
37
|
+
.replace(/[<>:"|?*]/g, "_");
|
|
38
|
+
}
|
|
39
|
+
// Download file with retry logic
|
|
40
|
+
async function downloadFile(url, outputPath, maxRetries = 3) {
|
|
41
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
42
|
+
try {
|
|
43
|
+
const response = await fetch(url);
|
|
44
|
+
if (!response.ok) {
|
|
45
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
46
|
+
}
|
|
47
|
+
const buffer = await response.arrayBuffer();
|
|
48
|
+
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
49
|
+
await fs.writeFile(outputPath, Buffer.from(buffer));
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
if (attempt === maxRetries)
|
|
54
|
+
throw error;
|
|
55
|
+
await new Promise((resolve) => setTimeout(resolve, 1000 * attempt)); // linear backoff: 1s, 2s, 3s
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Get all versions for an experiment
|
|
60
|
+
async function getAllVersions(experimentDir) {
|
|
61
|
+
const versionsDir = path.join(experimentDir, "versions");
|
|
62
|
+
try {
|
|
63
|
+
const entries = await fs.readdir(versionsDir, { withFileTypes: true });
|
|
64
|
+
return entries
|
|
65
|
+
.filter((e) => e.isDirectory())
|
|
66
|
+
.map((e) => parseInt(e.name, 10))
|
|
67
|
+
.filter((n) => !Number.isNaN(n))
|
|
68
|
+
.sort((a, b) => a - b);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
return [];
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
export default class Recordings extends Command {
|
|
75
|
+
static description = "Download recordings from an experiment run";
|
|
76
|
+
static examples = [
|
|
77
|
+
"<%= config.bin %> <%= command.id %> my-experiment",
|
|
78
|
+
"<%= config.bin %> <%= command.id %> my-experiment <run-id>",
|
|
79
|
+
"<%= config.bin %> <%= command.id %> my-experiment --all",
|
|
80
|
+
"<%= config.bin %> <%= command.id %> my-experiment <run-id> --all",
|
|
81
|
+
"<%= config.bin %> <%= command.id %> my-experiment --version 2",
|
|
82
|
+
"<%= config.bin %> <%= command.id %> my-experiment --version 1 --all",
|
|
83
|
+
];
|
|
84
|
+
static args = {
|
|
85
|
+
experimentName: Args.string({
|
|
86
|
+
description: "Experiment name",
|
|
87
|
+
required: true,
|
|
88
|
+
}),
|
|
89
|
+
runId: Args.string({
|
|
90
|
+
description: "Specific run ID to download recordings from (optional)",
|
|
91
|
+
required: false,
|
|
92
|
+
}),
|
|
93
|
+
};
|
|
94
|
+
static flags = {
|
|
95
|
+
all: Flags.boolean({
|
|
96
|
+
description: "Download all runs and participants (if no run specified), or all participants (if run specified)",
|
|
97
|
+
default: false,
|
|
98
|
+
}),
|
|
99
|
+
version: Flags.integer({
|
|
100
|
+
description: "Specific experiment version to download recordings from (e.g., 0, 1, 2)",
|
|
101
|
+
required: false,
|
|
102
|
+
}),
|
|
103
|
+
...getConfigFlags("api-key", "api-url"),
|
|
104
|
+
};
|
|
105
|
+
async run() {
|
|
106
|
+
const { args, flags } = await this.parse(Recordings);
|
|
107
|
+
const api = new ApiClient(flags["api-url"], flags["api-key"]);
|
|
108
|
+
const { experimentName, runId } = args;
|
|
109
|
+
await this.downloadForExperiment(experimentName, runId, api, flags.all, flags.version);
|
|
110
|
+
}
|
|
111
|
+
async downloadForExperiment(experimentName, runId, api, all, version) {
|
|
112
|
+
const experimenter = new Experimenter(experimentName, "", api);
|
|
113
|
+
// Check if experiment exists
|
|
114
|
+
if (!(await experimenter.exists())) {
|
|
115
|
+
this.error(pc.red(`Experiment '${experimentName}' does not exist. Run 'kradle experiment list' to see available experiments.`));
|
|
116
|
+
}
|
|
117
|
+
const experimentDir = experimenter.experimentDir;
|
|
118
|
+
// Get all versions
|
|
119
|
+
const allVersions = await getAllVersions(experimentDir);
|
|
120
|
+
if (allVersions.length === 0) {
|
|
121
|
+
this.error(pc.red("No experiment versions found. Run the experiment first."));
|
|
122
|
+
}
|
|
123
|
+
// Default to latest version if not specified
|
|
124
|
+
let targetVersion;
|
|
125
|
+
if (version !== undefined) {
|
|
126
|
+
if (!allVersions.includes(version)) {
|
|
127
|
+
this.error(pc.red(`Version ${version} not found in experiment '${experimentName}'. ` +
|
|
128
|
+
`Available versions: ${allVersions.join(", ")}`));
|
|
129
|
+
}
|
|
130
|
+
targetVersion = version;
|
|
131
|
+
this.log(pc.blue(`>> Filtering to version ${version}`));
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
// Default to latest version
|
|
135
|
+
targetVersion = Math.max(...allVersions);
|
|
136
|
+
}
|
|
137
|
+
const allRunInfos = [];
|
|
138
|
+
const completedStatuses = new Set(["completed", "finished", "game_over"]);
|
|
139
|
+
const progressPath = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "progress.json");
|
|
140
|
+
try {
|
|
141
|
+
const progressData = await fs.readFile(progressPath, "utf-8");
|
|
142
|
+
const progress = JSON.parse(progressData);
|
|
143
|
+
for (const entry of progress.entries) {
|
|
144
|
+
// Only include runs that are completed (exclude in-progress, queued, or error runs)
|
|
145
|
+
if (entry.runId && completedStatuses.has(entry.status)) {
|
|
146
|
+
const hasRecordings = await checkRecordingsExist(experimentDir, targetVersion, entry.runId);
|
|
147
|
+
allRunInfos.push({
|
|
148
|
+
version: targetVersion,
|
|
149
|
+
runId: entry.runId,
|
|
150
|
+
index: entry.index,
|
|
151
|
+
status: entry.status,
|
|
152
|
+
hasRecordings,
|
|
153
|
+
participantIds: entry.participantIds,
|
|
154
|
+
endTime: entry.endTime,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
catch { }
|
|
160
|
+
if (allRunInfos.length === 0) {
|
|
161
|
+
this.error(pc.yellow("No completed runs found. Wait for runs to finish or run the experiment first."));
|
|
162
|
+
}
|
|
163
|
+
let selectedRuns;
|
|
164
|
+
if (all && !runId) {
|
|
165
|
+
// Download all runs and all participants (--all without specific run)
|
|
166
|
+
selectedRuns = allRunInfos;
|
|
167
|
+
this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
|
|
168
|
+
}
|
|
169
|
+
else if (runId) {
|
|
170
|
+
// Find specific run by ID
|
|
171
|
+
const matchingRun = allRunInfos.find((r) => r.runId === runId);
|
|
172
|
+
if (!matchingRun) {
|
|
173
|
+
this.error(pc.red(`Run ID '${runId}' not found in experiment '${experimentName}'. ` +
|
|
174
|
+
`Run 'kradle experiment recordings ${experimentName}' to see available runs.`));
|
|
175
|
+
}
|
|
176
|
+
selectedRuns = [matchingRun];
|
|
177
|
+
this.log(pc.blue(`>> Downloading recordings for run: ${runId}`));
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
// Interactive run selection
|
|
181
|
+
const choices = [
|
|
182
|
+
// Only show "All runs" option if there are multiple runs
|
|
183
|
+
...(allRunInfos.length > 1
|
|
184
|
+
? [
|
|
185
|
+
{
|
|
186
|
+
name: "all",
|
|
187
|
+
message: `All runs (${allRunInfos.length} total)`,
|
|
188
|
+
hint: "Download all",
|
|
189
|
+
},
|
|
190
|
+
]
|
|
191
|
+
: []),
|
|
192
|
+
...allRunInfos.map((run) => {
|
|
193
|
+
const indicator = run.hasRecordings ? pc.blue("·") : "☐";
|
|
194
|
+
const participants = run.participantIds?.join(", ") || "No participants";
|
|
195
|
+
return {
|
|
196
|
+
name: run.runId,
|
|
197
|
+
message: `${indicator} ${participants} - ${run.runId}`,
|
|
198
|
+
hint: run.status,
|
|
199
|
+
};
|
|
200
|
+
}),
|
|
201
|
+
];
|
|
202
|
+
const { selectedRunId } = await enquirer.prompt({
|
|
203
|
+
type: "select",
|
|
204
|
+
name: "selectedRunId",
|
|
205
|
+
message: "Select a run to download recordings from",
|
|
206
|
+
choices,
|
|
207
|
+
});
|
|
208
|
+
if (selectedRunId === "all") {
|
|
209
|
+
selectedRuns = allRunInfos;
|
|
210
|
+
this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
|
|
211
|
+
}
|
|
212
|
+
else {
|
|
213
|
+
const selectedRun = allRunInfos.find((r) => r.runId === selectedRunId);
|
|
214
|
+
if (!selectedRun) {
|
|
215
|
+
this.error(pc.red("Selected run not found."));
|
|
216
|
+
}
|
|
217
|
+
selectedRuns = [selectedRun];
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// Build download targets
|
|
221
|
+
const downloadTargets = [];
|
|
222
|
+
// Prompt for participant selection only if: single run selected AND not --all flag
|
|
223
|
+
if (selectedRuns.length === 1 && !all) {
|
|
224
|
+
const firstRunInfo = allRunInfos.find((r) => r.runId === selectedRuns[0].runId);
|
|
225
|
+
if (!firstRunInfo?.participantIds || firstRunInfo.participantIds.length === 0) {
|
|
226
|
+
this.error(pc.red("Participant IDs not available for this run. This may be an old run created before recording support was added."));
|
|
227
|
+
}
|
|
228
|
+
// Interactive participant selection for single run
|
|
229
|
+
const participantChoices = [
|
|
230
|
+
{ name: "all", message: "All participants", value: "all" },
|
|
231
|
+
...firstRunInfo.participantIds.map((id, idx) => ({
|
|
232
|
+
name: id,
|
|
233
|
+
message: `Participant ${idx}: ${id}`,
|
|
234
|
+
value: id,
|
|
235
|
+
})),
|
|
236
|
+
];
|
|
237
|
+
const { participantSelection } = await enquirer.prompt({
|
|
238
|
+
type: "select",
|
|
239
|
+
name: "participantSelection",
|
|
240
|
+
message: "Select participant(s)",
|
|
241
|
+
choices: participantChoices,
|
|
242
|
+
});
|
|
243
|
+
const run = selectedRuns[0];
|
|
244
|
+
if (participantSelection === "all") {
|
|
245
|
+
// Download all participants
|
|
246
|
+
for (const participantId of firstRunInfo.participantIds) {
|
|
247
|
+
downloadTargets.push({
|
|
248
|
+
version: run.version,
|
|
249
|
+
runId: run.runId,
|
|
250
|
+
participantId,
|
|
251
|
+
experimentDir,
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
// Download single participant
|
|
257
|
+
downloadTargets.push({
|
|
258
|
+
version: run.version,
|
|
259
|
+
runId: run.runId,
|
|
260
|
+
participantId: participantSelection,
|
|
261
|
+
experimentDir,
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
// Multiple runs OR --all flag: automatically download all participants
|
|
267
|
+
for (const run of selectedRuns) {
|
|
268
|
+
const runInfo = allRunInfos.find((r) => r.runId === run.runId);
|
|
269
|
+
if (!runInfo?.participantIds || runInfo.participantIds.length === 0) {
|
|
270
|
+
this.log(pc.yellow(`Warning: Skipping run ${run.runId} - no participant IDs available`));
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
// Download all participants for this run
|
|
274
|
+
for (const participantId of runInfo.participantIds) {
|
|
275
|
+
downloadTargets.push({
|
|
276
|
+
version: run.version,
|
|
277
|
+
runId: run.runId,
|
|
278
|
+
participantId,
|
|
279
|
+
experimentDir,
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
// Ensure we wait at least 90 seconds after run completion before downloading
|
|
285
|
+
// to avoid missing recordings that take time to upload
|
|
286
|
+
const MIN_WAIT_AFTER_COMPLETION_MS = 90000; // 90 seconds
|
|
287
|
+
const now = Date.now();
|
|
288
|
+
// Group targets by run and check wait times
|
|
289
|
+
const runEndTimes = new Map();
|
|
290
|
+
for (const target of downloadTargets) {
|
|
291
|
+
const runInfo = allRunInfos.find((r) => r.runId === target.runId);
|
|
292
|
+
if (runInfo?.endTime) {
|
|
293
|
+
runEndTimes.set(target.runId, runInfo.endTime);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
// Separate into ready and deferred targets
|
|
297
|
+
const readyTargets = [];
|
|
298
|
+
const deferredTargets = [];
|
|
299
|
+
for (const target of downloadTargets) {
|
|
300
|
+
const endTime = runEndTimes.get(target.runId);
|
|
301
|
+
if (endTime) {
|
|
302
|
+
const timeSinceCompletion = now - endTime;
|
|
303
|
+
if (timeSinceCompletion < MIN_WAIT_AFTER_COMPLETION_MS) {
|
|
304
|
+
deferredTargets.push(target);
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
readyTargets.push(target);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
else {
|
|
311
|
+
// No end time available, process immediately
|
|
312
|
+
readyTargets.push(target);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
// Process ready targets first
|
|
316
|
+
if (readyTargets.length > 0) {
|
|
317
|
+
this.log(pc.blue(`>> Fetching and downloading recordings for ${readyTargets.length} ready target(s)...`));
|
|
318
|
+
await this.fetchAndDownloadTargets(api, readyTargets);
|
|
319
|
+
}
|
|
320
|
+
// Wait for deferred targets, then process them
|
|
321
|
+
if (deferredTargets.length > 0) {
|
|
322
|
+
const uniqueDeferredRuns = new Set(deferredTargets.map((t) => t.runId));
|
|
323
|
+
const maxWaitNeeded = Math.max(...Array.from(uniqueDeferredRuns)
|
|
324
|
+
.map((runId) => {
|
|
325
|
+
const endTime = runEndTimes.get(runId);
|
|
326
|
+
if (!endTime)
|
|
327
|
+
return 0;
|
|
328
|
+
return MIN_WAIT_AFTER_COMPLETION_MS - (now - endTime);
|
|
329
|
+
})
|
|
330
|
+
.filter((wait) => wait > 0));
|
|
331
|
+
this.log(pc.yellow(`>> Waiting ${Math.ceil(maxWaitNeeded / 1000)}s for ${uniqueDeferredRuns.size} recent run(s) to ensure all recordings are uploaded...`));
|
|
332
|
+
// Wait the necessary time
|
|
333
|
+
await new Promise((resolve) => setTimeout(resolve, maxWaitNeeded));
|
|
334
|
+
this.log(pc.blue(`>> Fetching and downloading recordings for ${deferredTargets.length} deferred target(s)...`));
|
|
335
|
+
await this.fetchAndDownloadTargets(api, deferredTargets);
|
|
336
|
+
}
|
|
337
|
+
if (readyTargets.length === 0 && deferredTargets.length === 0) {
|
|
338
|
+
this.log(pc.yellow("No recordings to download."));
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
const totalTargets = readyTargets.length + deferredTargets.length;
|
|
342
|
+
const uniqueRuns = new Set([...readyTargets, ...deferredTargets].map((t) => t.runId));
|
|
343
|
+
// All targets are from the same version
|
|
344
|
+
const recordingsDir = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "recordings");
|
|
345
|
+
this.log(pc.green(`\n✓ Downloaded recordings for ${totalTargets} participant(s) across ${uniqueRuns.size} run(s) to ${recordingsDir}`));
|
|
346
|
+
}
|
|
347
|
+
async fetchAndDownloadTargets(api, targets) {
|
|
348
|
+
// Fetch all recordings to show summary
|
|
349
|
+
let totalCount = 0;
|
|
350
|
+
let totalSize = 0;
|
|
351
|
+
for (const target of targets) {
|
|
352
|
+
try {
|
|
353
|
+
const recordings = await api.getRunRecordings(target.runId, target.participantId);
|
|
354
|
+
totalCount += recordings.length;
|
|
355
|
+
totalSize += recordings.reduce((sum, r) => sum + r.sizeBytes, 0);
|
|
356
|
+
}
|
|
357
|
+
catch {
|
|
358
|
+
// Skip targets with no recordings
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
if (totalCount === 0) {
|
|
362
|
+
this.log(pc.yellow(" No recordings found for these targets."));
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
this.log(pc.blue(` Found ${totalCount} recordings (Total: ${formatBytes(totalSize)})`));
|
|
366
|
+
// Download all recordings
|
|
367
|
+
await this.downloadRecordings(api, targets);
|
|
368
|
+
}
|
|
369
|
+
async downloadRecordings(api, targets) {
|
|
370
|
+
const allTasks = [];
|
|
371
|
+
for (const target of targets) {
|
|
372
|
+
const { version, runId, participantId, experimentDir } = target;
|
|
373
|
+
// Fetch recordings for this target
|
|
374
|
+
let recordings;
|
|
375
|
+
try {
|
|
376
|
+
recordings = await api.getRunRecordings(runId, participantId);
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
// Skip targets with no recordings
|
|
380
|
+
continue;
|
|
381
|
+
}
|
|
382
|
+
if (recordings.length === 0)
|
|
383
|
+
continue;
|
|
384
|
+
// Create tasks for each recording
|
|
385
|
+
for (const recording of recordings) {
|
|
386
|
+
const sanitizedFilename = `${sanitizeTimestamp(recording.timestamp)}.mcpr`;
|
|
387
|
+
allTasks.push({
|
|
388
|
+
title: `${participantId} - ${runId} - ${sanitizedFilename}`,
|
|
389
|
+
task: async (_, task) => {
|
|
390
|
+
const outputPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, sanitizedFilename);
|
|
391
|
+
// Skip if already exists
|
|
392
|
+
try {
|
|
393
|
+
await fs.access(outputPath);
|
|
394
|
+
task.skip("Already downloaded");
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
catch { }
|
|
398
|
+
// Get download URL
|
|
399
|
+
const { downloadUrl } = await api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
|
|
400
|
+
// Download file
|
|
401
|
+
await downloadFile(downloadUrl, outputPath);
|
|
402
|
+
task.title = `${participantId} - ${runId} - ${sanitizedFilename} (${formatBytes(recording.sizeBytes)})`;
|
|
403
|
+
},
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
if (allTasks.length === 0) {
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
const tasks = new Listr(allTasks, {
|
|
411
|
+
concurrent: 3,
|
|
412
|
+
exitOnError: false,
|
|
413
|
+
});
|
|
414
|
+
await tasks.run();
|
|
415
|
+
}
|
|
416
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { Command } from "@oclif/core";
|
|
2
|
+
export default class Run extends Command {
|
|
3
|
+
static description: string;
|
|
4
|
+
static examples: string[];
|
|
5
|
+
static args: {
|
|
6
|
+
name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
|
|
7
|
+
};
|
|
8
|
+
static flags: {
|
|
9
|
+
"api-key": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
10
|
+
"api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
11
|
+
"web-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
12
|
+
"new-version": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
13
|
+
"max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
14
|
+
"download-recordings": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
15
|
+
};
|
|
16
|
+
run(): Promise<void>;
|
|
17
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { Args, Command, Flags } from "@oclif/core";
|
|
2
|
+
import pc from "picocolors";
|
|
3
|
+
import { ApiClient } from "../../lib/api-client.js";
|
|
4
|
+
import { Experimenter } from "../../lib/experiment/experimenter.js";
|
|
5
|
+
import { getConfigFlags } from "../../lib/flags.js";
|
|
6
|
+
const DEFAULT_MAX_CONCURRENT = 5;
|
|
7
|
+
export default class Run extends Command {
|
|
8
|
+
static description = "Run an experiment. If the experiment had an ongoing version, it will resume from the last state.";
|
|
9
|
+
static examples = [
|
|
10
|
+
"<%= config.bin %> <%= command.id %> my-experiment",
|
|
11
|
+
"<%= config.bin %> <%= command.id %> my-experiment --new-version",
|
|
12
|
+
"<%= config.bin %> <%= command.id %> my-experiment --max-concurrent 10",
|
|
13
|
+
];
|
|
14
|
+
static args = {
|
|
15
|
+
name: Args.string({
|
|
16
|
+
description: "Name of the experiment to run",
|
|
17
|
+
required: true,
|
|
18
|
+
}),
|
|
19
|
+
};
|
|
20
|
+
static flags = {
|
|
21
|
+
"new-version": Flags.boolean({
|
|
22
|
+
char: "n",
|
|
23
|
+
description: "Start a new version of the experiment",
|
|
24
|
+
default: false,
|
|
25
|
+
}),
|
|
26
|
+
"max-concurrent": Flags.integer({
|
|
27
|
+
char: "m",
|
|
28
|
+
description: "Maximum concurrent runs",
|
|
29
|
+
default: DEFAULT_MAX_CONCURRENT,
|
|
30
|
+
}),
|
|
31
|
+
"download-recordings": Flags.boolean({
|
|
32
|
+
char: "d",
|
|
33
|
+
description: "Automatically download recordings after each run finishes",
|
|
34
|
+
default: false,
|
|
35
|
+
}),
|
|
36
|
+
...getConfigFlags("api-key", "api-url", "web-url"),
|
|
37
|
+
};
|
|
38
|
+
async run() {
|
|
39
|
+
const { args, flags } = await this.parse(Run);
|
|
40
|
+
const api = new ApiClient(flags["api-url"], flags["api-key"]);
|
|
41
|
+
const experimenter = new Experimenter(args.name, flags["web-url"], api);
|
|
42
|
+
// Check if experiment exists
|
|
43
|
+
if (!(await experimenter.exists())) {
|
|
44
|
+
this.error(pc.red(`Experiment '${args.name}' does not exist. Run 'kradle experiment create ${args.name}' first.`));
|
|
45
|
+
}
|
|
46
|
+
// Check if config.ts exists
|
|
47
|
+
if (!(await experimenter.configExists())) {
|
|
48
|
+
this.error(pc.red(`Config file not found at ${experimenter.configPath}`));
|
|
49
|
+
}
|
|
50
|
+
this.log(pc.blue(`>> Starting experiment: ${args.name}`));
|
|
51
|
+
if (flags["new-version"]) {
|
|
52
|
+
this.log(pc.yellow(" Starting a new version of the experiment."));
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
55
|
+
await experimenter.run({
|
|
56
|
+
new: flags["new-version"],
|
|
57
|
+
maxConcurrent: flags["max-concurrent"],
|
|
58
|
+
openMetabase: true,
|
|
59
|
+
downloadRecordings: flags["download-recordings"],
|
|
60
|
+
});
|
|
61
|
+
this.log(pc.green("\n✓ Experiment complete!"));
|
|
62
|
+
}
|
|
63
|
+
catch (error) {
|
|
64
|
+
this.error(pc.red(`Experiment failed: ${error instanceof Error ? error.message : String(error)}`));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
package/dist/commands/init.js
CHANGED
|
@@ -34,10 +34,10 @@ export default class Init extends Command {
|
|
|
34
34
|
const nonHiddenFiles = files.filter((f) => !f.startsWith("."));
|
|
35
35
|
const useCurrentDir = nonHiddenFiles.length === 0;
|
|
36
36
|
if (useCurrentDir) {
|
|
37
|
-
this.log(pc.yellow("Current directory is empty, it will be used to store challenges and
|
|
37
|
+
this.log(pc.yellow("Current directory is empty, it will be used to store challenges and experiments."));
|
|
38
38
|
}
|
|
39
39
|
else {
|
|
40
|
-
this.log(pc.yellow("Current directory is not empty, a subdirectory will be created to store challenges and
|
|
40
|
+
this.log(pc.yellow("Current directory is not empty, a subdirectory will be created to store challenges and experiments."));
|
|
41
41
|
}
|
|
42
42
|
let projectName;
|
|
43
43
|
if (flags.name) {
|
package/dist/lib/api-client.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import type z from "zod";
|
|
2
|
-
import type
|
|
3
|
-
import type { Config } from "./config.js";
|
|
4
|
-
import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema, type RunStatusSchemaType } from "./schemas.js";
|
|
2
|
+
import { type AgentSchemaType, type ChallengeConfigSchemaType, type ChallengeSchemaType, HumanSchema, type RecordingDownloadUrlResponse, type RecordingMetadata, type RunStatusSchemaType } from "./schemas.js";
|
|
5
3
|
export declare class ApiClient {
|
|
6
|
-
private
|
|
7
|
-
|
|
4
|
+
private apiUrl;
|
|
5
|
+
private kradleApiKey;
|
|
6
|
+
private isStudio;
|
|
7
|
+
constructor(apiUrl: string, kradleApiKey: string, isStudio?: boolean);
|
|
8
8
|
private request;
|
|
9
9
|
private get;
|
|
10
10
|
private post;
|
|
@@ -40,17 +40,43 @@ export declare class ApiClient {
|
|
|
40
40
|
createChallenge(slug: string): Promise<unknown>;
|
|
41
41
|
/**
|
|
42
42
|
* Update a challenge definition in the cloud.
|
|
43
|
-
* @param
|
|
44
|
-
* @param challengeConfig - The challenge config to upload.
|
|
43
|
+
* @param challengeSlug - The slug of the challenge.
|
|
44
|
+
* @param challengeConfig - The challenge config to upload.
|
|
45
|
+
* @param visibility - The visibility to set.
|
|
45
46
|
* @returns The updated challenge.
|
|
46
47
|
*/
|
|
47
|
-
updateChallenge(
|
|
48
|
-
|
|
48
|
+
updateChallenge(challengeSlug: string, challengeConfig: ChallengeConfigSchemaType, visibility: "private" | "public"): Promise<void>;
|
|
49
|
+
/**
|
|
50
|
+
* Update the visibility of a challenge.
|
|
51
|
+
* @param challengeSlug - The slug of the challenge.
|
|
52
|
+
* @param visibility - The visibility to set.
|
|
53
|
+
* @returns The updated challenge.
|
|
54
|
+
*/
|
|
55
|
+
updateChallengeVisibility(challengeSlug: string, visibility: "private" | "public"): Promise<void>;
|
|
56
|
+
/**
|
|
57
|
+
* Upload a challenge datapack to Google Cloud Storage.
|
|
58
|
+
* @param slug - The slug of the challenge.
|
|
59
|
+
* @param tarballPath - The path to the tarball file.
|
|
60
|
+
* @returns The upload URL.
|
|
61
|
+
*/
|
|
62
|
+
uploadChallengeDatapack(slug: string, tarballPath: string): Promise<void>;
|
|
63
|
+
/**
|
|
64
|
+
* Get the upload URL for a challenge datapack.
|
|
65
|
+
* @param slug - The slug of the challenge.
|
|
66
|
+
* @returns The upload URL.
|
|
67
|
+
*/
|
|
68
|
+
getChallengeUploadUrl(slug: string): Promise<string>;
|
|
49
69
|
runChallenge(runData: {
|
|
50
70
|
challenge: string;
|
|
51
71
|
participants: unknown[];
|
|
52
|
-
}
|
|
72
|
+
}): Promise<{
|
|
53
73
|
runIds?: string[] | undefined;
|
|
74
|
+
participants?: Record<string, {
|
|
75
|
+
agent: string;
|
|
76
|
+
role: string;
|
|
77
|
+
inputOrder: number;
|
|
78
|
+
}> | undefined;
|
|
79
|
+
id?: string | undefined;
|
|
54
80
|
}>;
|
|
55
81
|
deleteChallenge(challengeId: string): Promise<void>;
|
|
56
82
|
/**
|
|
@@ -66,4 +92,19 @@ export declare class ApiClient {
|
|
|
66
92
|
* @throws an error if the tag fails to be added.
|
|
67
93
|
*/
|
|
68
94
|
tagRun(runId: string, tag: string): Promise<void>;
|
|
95
|
+
/**
|
|
96
|
+
* Get recordings for a specific participant in a run.
|
|
97
|
+
* @param runId - The ID of the run.
|
|
98
|
+
* @param participantId - The ID of the participant.
|
|
99
|
+
* @returns Array of recording metadata.
|
|
100
|
+
*/
|
|
101
|
+
getRunRecordings(runId: string, participantId: string): Promise<RecordingMetadata[]>;
|
|
102
|
+
/**
|
|
103
|
+
* Get a signed download URL for a specific recording.
|
|
104
|
+
* @param runId - The ID of the run.
|
|
105
|
+
* @param participantId - The ID of the participant.
|
|
106
|
+
* @param timestamp - The timestamp of the recording.
|
|
107
|
+
* @returns Download URL and expiration time.
|
|
108
|
+
*/
|
|
109
|
+
getRecordingDownloadUrl(runId: string, participantId: string, timestamp: string): Promise<RecordingDownloadUrlResponse>;
|
|
69
110
|
}
|