@kradle/cli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -1
- package/dist/commands/challenge/create.js +1 -1
- package/dist/commands/challenge/list.js +3 -1
- package/dist/commands/challenge/run.js +1 -1
- package/dist/commands/experiment/recordings.d.ts +19 -0
- package/dist/commands/experiment/recordings.js +416 -0
- package/dist/commands/experiment/run.d.ts +1 -0
- package/dist/commands/experiment/run.js +6 -0
- package/dist/lib/api-client.d.ts +24 -2
- package/dist/lib/api-client.js +29 -4
- package/dist/lib/experiment/experimenter.d.ts +5 -0
- package/dist/lib/experiment/experimenter.js +98 -0
- package/dist/lib/experiment/runner.d.ts +2 -0
- package/dist/lib/experiment/runner.js +19 -1
- package/dist/lib/experiment/types.d.ts +4 -0
- package/dist/lib/experiment/types.js +1 -0
- package/dist/lib/schemas.d.ts +31 -2
- package/dist/lib/schemas.js +19 -1
- package/oclif.manifest.json +82 -1
- package/package.json +7 -3
package/README.md
CHANGED
|
@@ -173,7 +173,7 @@ Execute or resume an experiment:
|
|
|
173
173
|
|
|
174
174
|
```bash
|
|
175
175
|
kradle experiment run <name> # Resume current version or create first one
|
|
176
|
-
kradle experiment run <name> --new # Start a new version
|
|
176
|
+
kradle experiment run <name> --new-version # Start a new version
|
|
177
177
|
kradle experiment run <name> --max-concurrent 10 # Control parallelism (default: 5)
|
|
178
178
|
```
|
|
179
179
|
|
|
@@ -242,6 +242,32 @@ npm run lint:fix # Auto-fix linting issues
|
|
|
242
242
|
npm run format # Format code with Biome
|
|
243
243
|
```
|
|
244
244
|
|
|
245
|
+
### Running Tests
|
|
246
|
+
|
|
247
|
+
The CLI has integration tests that verify commands work correctly with the dev API.
|
|
248
|
+
|
|
249
|
+
**Setup:**
|
|
250
|
+
|
|
251
|
+
1. Copy `.env.test.example` to `.env.test`
|
|
252
|
+
2. Add your Kradle API key (from https://dev.kradle.ai/settings/api-keys)
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
cp .env.test.example .env.test
|
|
256
|
+
# Edit .env.test and add your API key
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
**Run tests:**
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
npm test # Run all tests
|
|
263
|
+
npm run test:watch # Run tests in watch mode
|
|
264
|
+
npm run test:integration # Run integration tests
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
**Note:** Integration tests make real API calls to the dev environment and may create/delete challenges.
|
|
268
|
+
|
|
269
|
+
**CI Configuration:** Integration tests run in GitHub Actions on PRs. The `KRADLE_API_KEY` secret must be configured in the repository settings.
|
|
270
|
+
|
|
245
271
|
### Challenge Structure
|
|
246
272
|
|
|
247
273
|
Each challenge is a folder in `challenges/<slug>/` containing:
|
|
@@ -281,6 +307,11 @@ kradle-cli/
|
|
|
281
307
|
│ │ └── experiment/ # Experiment commands
|
|
282
308
|
│ └── lib/ # Core libraries
|
|
283
309
|
│ └── experiment/ # Experiment system
|
|
310
|
+
├── tests/ # Integration tests
|
|
311
|
+
│ ├── helpers/ # Test utilities
|
|
312
|
+
│ └── integration/ # Integration test suites
|
|
313
|
+
│ ├── challenge/ # Challenge command tests
|
|
314
|
+
│ └── experiment/ # Experiment command tests
|
|
284
315
|
└── static/ # Template files
|
|
285
316
|
└── project_template/ # Files for kradle init
|
|
286
317
|
```
|
|
@@ -70,7 +70,7 @@ export const config = ${configStr};
|
|
|
70
70
|
{
|
|
71
71
|
title: "Uploading initial datapack",
|
|
72
72
|
task: async (_, task) => {
|
|
73
|
-
api.uploadChallengeDatapack(args.challengeSlug, challenge.tarballPath);
|
|
73
|
+
await api.uploadChallengeDatapack(args.challengeSlug, challenge.tarballPath);
|
|
74
74
|
task.title = `Uploaded initial datapack`;
|
|
75
75
|
},
|
|
76
76
|
},
|
|
@@ -27,7 +27,9 @@ export default class List extends Command {
|
|
|
27
27
|
for (const slug of Array.from(allSlugs).sort()) {
|
|
28
28
|
const challenge = new Challenge(slug, flags["challenges-path"]);
|
|
29
29
|
const inCloud = cloudMap.has(slug);
|
|
30
|
-
|
|
30
|
+
// Extract short slug (after the colon) for local comparison
|
|
31
|
+
const shortSlug = slug.includes(":") ? slug.split(":")[1] : slug;
|
|
32
|
+
const inLocal = localChallenges.includes(shortSlug);
|
|
31
33
|
let status;
|
|
32
34
|
if (inCloud && inLocal) {
|
|
33
35
|
status = pc.green("✓ synced");
|
|
@@ -22,7 +22,7 @@ export default class Run extends Command {
|
|
|
22
22
|
async run() {
|
|
23
23
|
const { args, flags } = await this.parse(Run);
|
|
24
24
|
const apiUrl = flags.studio ? flags["studio-api-url"] : flags["api-url"];
|
|
25
|
-
const studioApi = new ApiClient(apiUrl, flags["api-key"]);
|
|
25
|
+
const studioApi = new ApiClient(apiUrl, flags["api-key"], flags.studio);
|
|
26
26
|
const challenge = new Challenge(args.challengeSlug, flags["challenges-path"]);
|
|
27
27
|
try {
|
|
28
28
|
const { participants } = (await loadTemplateRun());
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Command } from "@oclif/core";
|
|
2
|
+
export default class Recordings extends Command {
|
|
3
|
+
static description: string;
|
|
4
|
+
static examples: string[];
|
|
5
|
+
static args: {
|
|
6
|
+
experimentName: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
|
|
7
|
+
runId: import("@oclif/core/interfaces").Arg<string | undefined, Record<string, unknown>>;
|
|
8
|
+
};
|
|
9
|
+
static flags: {
|
|
10
|
+
"api-key": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
11
|
+
"api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
12
|
+
all: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
13
|
+
version: import("@oclif/core/interfaces").OptionFlag<number | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
14
|
+
};
|
|
15
|
+
run(): Promise<void>;
|
|
16
|
+
private downloadForExperiment;
|
|
17
|
+
private fetchAndDownloadTargets;
|
|
18
|
+
private downloadRecordings;
|
|
19
|
+
}
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Args, Command, Flags } from "@oclif/core";
|
|
4
|
+
import enquirer from "enquirer";
|
|
5
|
+
import { Listr } from "listr2";
|
|
6
|
+
import pc from "picocolors";
|
|
7
|
+
import { ApiClient } from "../../lib/api-client.js";
|
|
8
|
+
import { Experimenter } from "../../lib/experiment/experimenter.js";
|
|
9
|
+
import { getConfigFlags } from "../../lib/flags.js";
|
|
10
|
+
// Check if recordings exist locally for a run
|
|
11
|
+
async function checkRecordingsExist(experimentDir, version, runId) {
|
|
12
|
+
const recordingsPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId);
|
|
13
|
+
try {
|
|
14
|
+
await fs.access(recordingsPath);
|
|
15
|
+
const files = await fs.readdir(recordingsPath, { recursive: true });
|
|
16
|
+
return files.some((f) => String(f).endsWith(".mcpr"));
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// Format bytes for display
|
|
23
|
+
function formatBytes(bytes) {
|
|
24
|
+
if (bytes === 0)
|
|
25
|
+
return "0 Bytes";
|
|
26
|
+
const k = 1024;
|
|
27
|
+
const sizes = ["Bytes", "KB", "MB", "GB"];
|
|
28
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
29
|
+
return `${Math.round((bytes / k ** i) * 100) / 100} ${sizes[i]}`;
|
|
30
|
+
}
|
|
31
|
+
// Sanitize timestamp string for use in filenames
|
|
32
|
+
function sanitizeTimestamp(timestamp) {
|
|
33
|
+
// Replace colons, spaces, and other problematic characters
|
|
34
|
+
return timestamp
|
|
35
|
+
.replace(/:/g, "-")
|
|
36
|
+
.replace(/\s+/g, "_")
|
|
37
|
+
.replace(/[<>:"|?*]/g, "_");
|
|
38
|
+
}
|
|
39
|
+
// Download file with retry logic
|
|
40
|
+
async function downloadFile(url, outputPath, maxRetries = 3) {
|
|
41
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
42
|
+
try {
|
|
43
|
+
const response = await fetch(url);
|
|
44
|
+
if (!response.ok) {
|
|
45
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
46
|
+
}
|
|
47
|
+
const buffer = await response.arrayBuffer();
|
|
48
|
+
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
49
|
+
await fs.writeFile(outputPath, Buffer.from(buffer));
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
if (attempt === maxRetries)
|
|
54
|
+
throw error;
|
|
55
|
+
await new Promise((resolve) => setTimeout(resolve, 1000 * attempt)); // linear backoff: 1s, 2s, 3s
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Get all versions for an experiment
|
|
60
|
+
async function getAllVersions(experimentDir) {
|
|
61
|
+
const versionsDir = path.join(experimentDir, "versions");
|
|
62
|
+
try {
|
|
63
|
+
const entries = await fs.readdir(versionsDir, { withFileTypes: true });
|
|
64
|
+
return entries
|
|
65
|
+
.filter((e) => e.isDirectory())
|
|
66
|
+
.map((e) => parseInt(e.name, 10))
|
|
67
|
+
.filter((n) => !Number.isNaN(n))
|
|
68
|
+
.sort((a, b) => a - b);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
return [];
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
export default class Recordings extends Command {
|
|
75
|
+
static description = "Download recordings from an experiment run";
|
|
76
|
+
static examples = [
|
|
77
|
+
"<%= config.bin %> <%= command.id %> my-experiment",
|
|
78
|
+
"<%= config.bin %> <%= command.id %> my-experiment <run-id>",
|
|
79
|
+
"<%= config.bin %> <%= command.id %> my-experiment --all",
|
|
80
|
+
"<%= config.bin %> <%= command.id %> my-experiment <run-id> --all",
|
|
81
|
+
"<%= config.bin %> <%= command.id %> my-experiment --version 2",
|
|
82
|
+
"<%= config.bin %> <%= command.id %> my-experiment --version 1 --all",
|
|
83
|
+
];
|
|
84
|
+
static args = {
|
|
85
|
+
experimentName: Args.string({
|
|
86
|
+
description: "Experiment name",
|
|
87
|
+
required: true,
|
|
88
|
+
}),
|
|
89
|
+
runId: Args.string({
|
|
90
|
+
description: "Specific run ID to download recordings from (optional)",
|
|
91
|
+
required: false,
|
|
92
|
+
}),
|
|
93
|
+
};
|
|
94
|
+
static flags = {
|
|
95
|
+
all: Flags.boolean({
|
|
96
|
+
description: "Download all runs and participants (if no run specified), or all participants (if run specified)",
|
|
97
|
+
default: false,
|
|
98
|
+
}),
|
|
99
|
+
version: Flags.integer({
|
|
100
|
+
description: "Specific experiment version to download recordings from (e.g., 0, 1, 2)",
|
|
101
|
+
required: false,
|
|
102
|
+
}),
|
|
103
|
+
...getConfigFlags("api-key", "api-url"),
|
|
104
|
+
};
|
|
105
|
+
async run() {
|
|
106
|
+
const { args, flags } = await this.parse(Recordings);
|
|
107
|
+
const api = new ApiClient(flags["api-url"], flags["api-key"]);
|
|
108
|
+
const { experimentName, runId } = args;
|
|
109
|
+
await this.downloadForExperiment(experimentName, runId, api, flags.all, flags.version);
|
|
110
|
+
}
|
|
111
|
+
async downloadForExperiment(experimentName, runId, api, all, version) {
|
|
112
|
+
const experimenter = new Experimenter(experimentName, "", api);
|
|
113
|
+
// Check if experiment exists
|
|
114
|
+
if (!(await experimenter.exists())) {
|
|
115
|
+
this.error(pc.red(`Experiment '${experimentName}' does not exist. Run 'kradle experiment list' to see available experiments.`));
|
|
116
|
+
}
|
|
117
|
+
const experimentDir = experimenter.experimentDir;
|
|
118
|
+
// Get all versions
|
|
119
|
+
const allVersions = await getAllVersions(experimentDir);
|
|
120
|
+
if (allVersions.length === 0) {
|
|
121
|
+
this.error(pc.red("No experiment versions found. Run the experiment first."));
|
|
122
|
+
}
|
|
123
|
+
// Default to latest version if not specified
|
|
124
|
+
let targetVersion;
|
|
125
|
+
if (version !== undefined) {
|
|
126
|
+
if (!allVersions.includes(version)) {
|
|
127
|
+
this.error(pc.red(`Version ${version} not found in experiment '${experimentName}'. ` +
|
|
128
|
+
`Available versions: ${allVersions.join(", ")}`));
|
|
129
|
+
}
|
|
130
|
+
targetVersion = version;
|
|
131
|
+
this.log(pc.blue(`>> Filtering to version ${version}`));
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
// Default to latest version
|
|
135
|
+
targetVersion = Math.max(...allVersions);
|
|
136
|
+
}
|
|
137
|
+
const allRunInfos = [];
|
|
138
|
+
const completedStatuses = new Set(["completed", "finished", "game_over"]);
|
|
139
|
+
const progressPath = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "progress.json");
|
|
140
|
+
try {
|
|
141
|
+
const progressData = await fs.readFile(progressPath, "utf-8");
|
|
142
|
+
const progress = JSON.parse(progressData);
|
|
143
|
+
for (const entry of progress.entries) {
|
|
144
|
+
// Only include runs that are completed (exclude in-progress, queued, or error runs)
|
|
145
|
+
if (entry.runId && completedStatuses.has(entry.status)) {
|
|
146
|
+
const hasRecordings = await checkRecordingsExist(experimentDir, targetVersion, entry.runId);
|
|
147
|
+
allRunInfos.push({
|
|
148
|
+
version: targetVersion,
|
|
149
|
+
runId: entry.runId,
|
|
150
|
+
index: entry.index,
|
|
151
|
+
status: entry.status,
|
|
152
|
+
hasRecordings,
|
|
153
|
+
participantIds: entry.participantIds,
|
|
154
|
+
endTime: entry.endTime,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
catch { }
|
|
160
|
+
if (allRunInfos.length === 0) {
|
|
161
|
+
this.error(pc.yellow("No completed runs found. Wait for runs to finish or run the experiment first."));
|
|
162
|
+
}
|
|
163
|
+
let selectedRuns;
|
|
164
|
+
if (all && !runId) {
|
|
165
|
+
// Download all runs and all participants (--all without specific run)
|
|
166
|
+
selectedRuns = allRunInfos;
|
|
167
|
+
this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
|
|
168
|
+
}
|
|
169
|
+
else if (runId) {
|
|
170
|
+
// Find specific run by ID
|
|
171
|
+
const matchingRun = allRunInfos.find((r) => r.runId === runId);
|
|
172
|
+
if (!matchingRun) {
|
|
173
|
+
this.error(pc.red(`Run ID '${runId}' not found in experiment '${experimentName}'. ` +
|
|
174
|
+
`Run 'kradle experiment recordings ${experimentName}' to see available runs.`));
|
|
175
|
+
}
|
|
176
|
+
selectedRuns = [matchingRun];
|
|
177
|
+
this.log(pc.blue(`>> Downloading recordings for run: ${runId}`));
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
// Interactive run selection
|
|
181
|
+
const choices = [
|
|
182
|
+
// Only show "All runs" option if there are multiple runs
|
|
183
|
+
...(allRunInfos.length > 1
|
|
184
|
+
? [
|
|
185
|
+
{
|
|
186
|
+
name: "all",
|
|
187
|
+
message: `All runs (${allRunInfos.length} total)`,
|
|
188
|
+
hint: "Download all",
|
|
189
|
+
},
|
|
190
|
+
]
|
|
191
|
+
: []),
|
|
192
|
+
...allRunInfos.map((run) => {
|
|
193
|
+
const indicator = run.hasRecordings ? pc.blue("·") : "☐";
|
|
194
|
+
const participants = run.participantIds?.join(", ") || "No participants";
|
|
195
|
+
return {
|
|
196
|
+
name: run.runId,
|
|
197
|
+
message: `${indicator} ${participants} - ${run.runId}`,
|
|
198
|
+
hint: run.status,
|
|
199
|
+
};
|
|
200
|
+
}),
|
|
201
|
+
];
|
|
202
|
+
const { selectedRunId } = await enquirer.prompt({
|
|
203
|
+
type: "select",
|
|
204
|
+
name: "selectedRunId",
|
|
205
|
+
message: "Select a run to download recordings from",
|
|
206
|
+
choices,
|
|
207
|
+
});
|
|
208
|
+
if (selectedRunId === "all") {
|
|
209
|
+
selectedRuns = allRunInfos;
|
|
210
|
+
this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
|
|
211
|
+
}
|
|
212
|
+
else {
|
|
213
|
+
const selectedRun = allRunInfos.find((r) => r.runId === selectedRunId);
|
|
214
|
+
if (!selectedRun) {
|
|
215
|
+
this.error(pc.red("Selected run not found."));
|
|
216
|
+
}
|
|
217
|
+
selectedRuns = [selectedRun];
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// Build download targets
|
|
221
|
+
const downloadTargets = [];
|
|
222
|
+
// Prompt for participant selection only if: single run selected AND not --all flag
|
|
223
|
+
if (selectedRuns.length === 1 && !all) {
|
|
224
|
+
const firstRunInfo = allRunInfos.find((r) => r.runId === selectedRuns[0].runId);
|
|
225
|
+
if (!firstRunInfo?.participantIds || firstRunInfo.participantIds.length === 0) {
|
|
226
|
+
this.error(pc.red("Participant IDs not available for this run. This may be an old run created before recording support was added."));
|
|
227
|
+
}
|
|
228
|
+
// Interactive participant selection for single run
|
|
229
|
+
const participantChoices = [
|
|
230
|
+
{ name: "all", message: "All participants", value: "all" },
|
|
231
|
+
...firstRunInfo.participantIds.map((id, idx) => ({
|
|
232
|
+
name: id,
|
|
233
|
+
message: `Participant ${idx}: ${id}`,
|
|
234
|
+
value: id,
|
|
235
|
+
})),
|
|
236
|
+
];
|
|
237
|
+
const { participantSelection } = await enquirer.prompt({
|
|
238
|
+
type: "select",
|
|
239
|
+
name: "participantSelection",
|
|
240
|
+
message: "Select participant(s)",
|
|
241
|
+
choices: participantChoices,
|
|
242
|
+
});
|
|
243
|
+
const run = selectedRuns[0];
|
|
244
|
+
if (participantSelection === "all") {
|
|
245
|
+
// Download all participants
|
|
246
|
+
for (const participantId of firstRunInfo.participantIds) {
|
|
247
|
+
downloadTargets.push({
|
|
248
|
+
version: run.version,
|
|
249
|
+
runId: run.runId,
|
|
250
|
+
participantId,
|
|
251
|
+
experimentDir,
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
// Download single participant
|
|
257
|
+
downloadTargets.push({
|
|
258
|
+
version: run.version,
|
|
259
|
+
runId: run.runId,
|
|
260
|
+
participantId: participantSelection,
|
|
261
|
+
experimentDir,
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
// Multiple runs OR --all flag: automatically download all participants
|
|
267
|
+
for (const run of selectedRuns) {
|
|
268
|
+
const runInfo = allRunInfos.find((r) => r.runId === run.runId);
|
|
269
|
+
if (!runInfo?.participantIds || runInfo.participantIds.length === 0) {
|
|
270
|
+
this.log(pc.yellow(`Warning: Skipping run ${run.runId} - no participant IDs available`));
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
// Download all participants for this run
|
|
274
|
+
for (const participantId of runInfo.participantIds) {
|
|
275
|
+
downloadTargets.push({
|
|
276
|
+
version: run.version,
|
|
277
|
+
runId: run.runId,
|
|
278
|
+
participantId,
|
|
279
|
+
experimentDir,
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
// Ensure we wait at least 90 seconds after run completion before downloading
|
|
285
|
+
// to avoid missing recordings that take time to upload
|
|
286
|
+
const MIN_WAIT_AFTER_COMPLETION_MS = 90000; // 90 seconds
|
|
287
|
+
const now = Date.now();
|
|
288
|
+
// Group targets by run and check wait times
|
|
289
|
+
const runEndTimes = new Map();
|
|
290
|
+
for (const target of downloadTargets) {
|
|
291
|
+
const runInfo = allRunInfos.find((r) => r.runId === target.runId);
|
|
292
|
+
if (runInfo?.endTime) {
|
|
293
|
+
runEndTimes.set(target.runId, runInfo.endTime);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
// Separate into ready and deferred targets
|
|
297
|
+
const readyTargets = [];
|
|
298
|
+
const deferredTargets = [];
|
|
299
|
+
for (const target of downloadTargets) {
|
|
300
|
+
const endTime = runEndTimes.get(target.runId);
|
|
301
|
+
if (endTime) {
|
|
302
|
+
const timeSinceCompletion = now - endTime;
|
|
303
|
+
if (timeSinceCompletion < MIN_WAIT_AFTER_COMPLETION_MS) {
|
|
304
|
+
deferredTargets.push(target);
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
readyTargets.push(target);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
else {
|
|
311
|
+
// No end time available, process immediately
|
|
312
|
+
readyTargets.push(target);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
// Process ready targets first
|
|
316
|
+
if (readyTargets.length > 0) {
|
|
317
|
+
this.log(pc.blue(`>> Fetching and downloading recordings for ${readyTargets.length} ready target(s)...`));
|
|
318
|
+
await this.fetchAndDownloadTargets(api, readyTargets);
|
|
319
|
+
}
|
|
320
|
+
// Wait for deferred targets, then process them
|
|
321
|
+
if (deferredTargets.length > 0) {
|
|
322
|
+
const uniqueDeferredRuns = new Set(deferredTargets.map((t) => t.runId));
|
|
323
|
+
const maxWaitNeeded = Math.max(...Array.from(uniqueDeferredRuns)
|
|
324
|
+
.map((runId) => {
|
|
325
|
+
const endTime = runEndTimes.get(runId);
|
|
326
|
+
if (!endTime)
|
|
327
|
+
return 0;
|
|
328
|
+
return MIN_WAIT_AFTER_COMPLETION_MS - (now - endTime);
|
|
329
|
+
})
|
|
330
|
+
.filter((wait) => wait > 0));
|
|
331
|
+
this.log(pc.yellow(`>> Waiting ${Math.ceil(maxWaitNeeded / 1000)}s for ${uniqueDeferredRuns.size} recent run(s) to ensure all recordings are uploaded...`));
|
|
332
|
+
// Wait the necessary time
|
|
333
|
+
await new Promise((resolve) => setTimeout(resolve, maxWaitNeeded));
|
|
334
|
+
this.log(pc.blue(`>> Fetching and downloading recordings for ${deferredTargets.length} deferred target(s)...`));
|
|
335
|
+
await this.fetchAndDownloadTargets(api, deferredTargets);
|
|
336
|
+
}
|
|
337
|
+
if (readyTargets.length === 0 && deferredTargets.length === 0) {
|
|
338
|
+
this.log(pc.yellow("No recordings to download."));
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
const totalTargets = readyTargets.length + deferredTargets.length;
|
|
342
|
+
const uniqueRuns = new Set([...readyTargets, ...deferredTargets].map((t) => t.runId));
|
|
343
|
+
// All targets are from the same version
|
|
344
|
+
const recordingsDir = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "recordings");
|
|
345
|
+
this.log(pc.green(`\n✓ Downloaded recordings for ${totalTargets} participant(s) across ${uniqueRuns.size} run(s) to ${recordingsDir}`));
|
|
346
|
+
}
|
|
347
|
+
async fetchAndDownloadTargets(api, targets) {
|
|
348
|
+
// Fetch all recordings to show summary
|
|
349
|
+
let totalCount = 0;
|
|
350
|
+
let totalSize = 0;
|
|
351
|
+
for (const target of targets) {
|
|
352
|
+
try {
|
|
353
|
+
const recordings = await api.getRunRecordings(target.runId, target.participantId);
|
|
354
|
+
totalCount += recordings.length;
|
|
355
|
+
totalSize += recordings.reduce((sum, r) => sum + r.sizeBytes, 0);
|
|
356
|
+
}
|
|
357
|
+
catch {
|
|
358
|
+
// Skip targets with no recordings
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
if (totalCount === 0) {
|
|
362
|
+
this.log(pc.yellow(" No recordings found for these targets."));
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
this.log(pc.blue(` Found ${totalCount} recordings (Total: ${formatBytes(totalSize)})`));
|
|
366
|
+
// Download all recordings
|
|
367
|
+
await this.downloadRecordings(api, targets);
|
|
368
|
+
}
|
|
369
|
+
async downloadRecordings(api, targets) {
|
|
370
|
+
const allTasks = [];
|
|
371
|
+
for (const target of targets) {
|
|
372
|
+
const { version, runId, participantId, experimentDir } = target;
|
|
373
|
+
// Fetch recordings for this target
|
|
374
|
+
let recordings;
|
|
375
|
+
try {
|
|
376
|
+
recordings = await api.getRunRecordings(runId, participantId);
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
// Skip targets with no recordings
|
|
380
|
+
continue;
|
|
381
|
+
}
|
|
382
|
+
if (recordings.length === 0)
|
|
383
|
+
continue;
|
|
384
|
+
// Create tasks for each recording
|
|
385
|
+
for (const recording of recordings) {
|
|
386
|
+
const sanitizedFilename = `${sanitizeTimestamp(recording.timestamp)}.mcpr`;
|
|
387
|
+
allTasks.push({
|
|
388
|
+
title: `${participantId} - ${runId} - ${sanitizedFilename}`,
|
|
389
|
+
task: async (_, task) => {
|
|
390
|
+
const outputPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, sanitizedFilename);
|
|
391
|
+
// Skip if already exists
|
|
392
|
+
try {
|
|
393
|
+
await fs.access(outputPath);
|
|
394
|
+
task.skip("Already downloaded");
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
catch { }
|
|
398
|
+
// Get download URL
|
|
399
|
+
const { downloadUrl } = await api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
|
|
400
|
+
// Download file
|
|
401
|
+
await downloadFile(downloadUrl, outputPath);
|
|
402
|
+
task.title = `${participantId} - ${runId} - ${sanitizedFilename} (${formatBytes(recording.sizeBytes)})`;
|
|
403
|
+
},
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
if (allTasks.length === 0) {
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
const tasks = new Listr(allTasks, {
|
|
411
|
+
concurrent: 3,
|
|
412
|
+
exitOnError: false,
|
|
413
|
+
});
|
|
414
|
+
await tasks.run();
|
|
415
|
+
}
|
|
416
|
+
}
|
|
@@ -11,6 +11,7 @@ export default class Run extends Command {
|
|
|
11
11
|
"web-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
12
12
|
"new-version": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
13
13
|
"max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
14
|
+
"download-recordings": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
14
15
|
};
|
|
15
16
|
run(): Promise<void>;
|
|
16
17
|
}
|
|
@@ -28,6 +28,11 @@ export default class Run extends Command {
|
|
|
28
28
|
description: "Maximum concurrent runs",
|
|
29
29
|
default: DEFAULT_MAX_CONCURRENT,
|
|
30
30
|
}),
|
|
31
|
+
"download-recordings": Flags.boolean({
|
|
32
|
+
char: "d",
|
|
33
|
+
description: "Automatically download recordings after each run finishes",
|
|
34
|
+
default: false,
|
|
35
|
+
}),
|
|
31
36
|
...getConfigFlags("api-key", "api-url", "web-url"),
|
|
32
37
|
};
|
|
33
38
|
async run() {
|
|
@@ -51,6 +56,7 @@ export default class Run extends Command {
|
|
|
51
56
|
new: flags["new-version"],
|
|
52
57
|
maxConcurrent: flags["max-concurrent"],
|
|
53
58
|
openMetabase: true,
|
|
59
|
+
downloadRecordings: flags["download-recordings"],
|
|
54
60
|
});
|
|
55
61
|
this.log(pc.green("\n✓ Experiment complete!"));
|
|
56
62
|
}
|
package/dist/lib/api-client.d.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import type z from "zod";
|
|
2
|
-
import { type AgentSchemaType, type ChallengeConfigSchemaType, type ChallengeSchemaType, HumanSchema, type RunStatusSchemaType } from "./schemas.js";
|
|
2
|
+
import { type AgentSchemaType, type ChallengeConfigSchemaType, type ChallengeSchemaType, HumanSchema, type RecordingDownloadUrlResponse, type RecordingMetadata, type RunStatusSchemaType } from "./schemas.js";
|
|
3
3
|
export declare class ApiClient {
|
|
4
4
|
private apiUrl;
|
|
5
5
|
private kradleApiKey;
|
|
6
|
-
|
|
6
|
+
private isStudio;
|
|
7
|
+
constructor(apiUrl: string, kradleApiKey: string, isStudio?: boolean);
|
|
7
8
|
private request;
|
|
8
9
|
private get;
|
|
9
10
|
private post;
|
|
@@ -70,6 +71,12 @@ export declare class ApiClient {
|
|
|
70
71
|
participants: unknown[];
|
|
71
72
|
}): Promise<{
|
|
72
73
|
runIds?: string[] | undefined;
|
|
74
|
+
participants?: Record<string, {
|
|
75
|
+
agent: string;
|
|
76
|
+
role: string;
|
|
77
|
+
inputOrder: number;
|
|
78
|
+
}> | undefined;
|
|
79
|
+
id?: string | undefined;
|
|
73
80
|
}>;
|
|
74
81
|
deleteChallenge(challengeId: string): Promise<void>;
|
|
75
82
|
/**
|
|
@@ -85,4 +92,19 @@ export declare class ApiClient {
|
|
|
85
92
|
* @throws an error if the tag fails to be added.
|
|
86
93
|
*/
|
|
87
94
|
tagRun(runId: string, tag: string): Promise<void>;
|
|
95
|
+
/**
|
|
96
|
+
* Get recordings for a specific participant in a run.
|
|
97
|
+
* @param runId - The ID of the run.
|
|
98
|
+
* @param participantId - The ID of the participant.
|
|
99
|
+
* @returns Array of recording metadata.
|
|
100
|
+
*/
|
|
101
|
+
getRunRecordings(runId: string, participantId: string): Promise<RecordingMetadata[]>;
|
|
102
|
+
/**
|
|
103
|
+
* Get a signed download URL for a specific recording.
|
|
104
|
+
* @param runId - The ID of the run.
|
|
105
|
+
* @param participantId - The ID of the participant.
|
|
106
|
+
* @param timestamp - The timestamp of the recording.
|
|
107
|
+
* @returns Download URL and expiration time.
|
|
108
|
+
*/
|
|
109
|
+
getRecordingDownloadUrl(runId: string, participantId: string, timestamp: string): Promise<RecordingDownloadUrlResponse>;
|
|
88
110
|
}
|
package/dist/lib/api-client.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import fs from "node:fs/promises";
|
|
3
|
-
import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema,
|
|
3
|
+
import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, JobResponseSchema, RecordingDownloadUrlResponseSchema, RecordingsListResponseSchema, RunStatusSchema, UploadUrlResponseSchema, } from "./schemas.js";
|
|
4
4
|
const DEFAULT_PAGE_SIZE = 30;
|
|
5
5
|
const DEFAULT_CHALLENGE_SCHEMA = {
|
|
6
6
|
slug: "",
|
|
@@ -25,9 +25,11 @@ const DEFAULT_CHALLENGE_SCHEMA = {
|
|
|
25
25
|
export class ApiClient {
|
|
26
26
|
apiUrl;
|
|
27
27
|
kradleApiKey;
|
|
28
|
-
|
|
28
|
+
isStudio;
|
|
29
|
+
constructor(apiUrl, kradleApiKey, isStudio = false) {
|
|
29
30
|
this.apiUrl = apiUrl;
|
|
30
31
|
this.kradleApiKey = kradleApiKey;
|
|
32
|
+
this.isStudio = isStudio;
|
|
31
33
|
}
|
|
32
34
|
async request(endpoint, options) {
|
|
33
35
|
const fullUrl = `${this.apiUrl}/${endpoint}`;
|
|
@@ -210,9 +212,10 @@ export class ApiClient {
|
|
|
210
212
|
}
|
|
211
213
|
async runChallenge(runData) {
|
|
212
214
|
const url = "jobs";
|
|
215
|
+
const payload = this.isStudio ? runData : { ...runData, jobType: "background" };
|
|
213
216
|
return this.post(url, {
|
|
214
|
-
body: JSON.stringify(
|
|
215
|
-
},
|
|
217
|
+
body: JSON.stringify(payload),
|
|
218
|
+
}, JobResponseSchema);
|
|
216
219
|
}
|
|
217
220
|
async deleteChallenge(challengeId) {
|
|
218
221
|
const url = `challenges/${challengeId}`;
|
|
@@ -239,4 +242,26 @@ export class ApiClient {
|
|
|
239
242
|
body: JSON.stringify({ tag }),
|
|
240
243
|
});
|
|
241
244
|
}
|
|
245
|
+
/**
|
|
246
|
+
* Get recordings for a specific participant in a run.
|
|
247
|
+
* @param runId - The ID of the run.
|
|
248
|
+
* @param participantId - The ID of the participant.
|
|
249
|
+
* @returns Array of recording metadata.
|
|
250
|
+
*/
|
|
251
|
+
async getRunRecordings(runId, participantId) {
|
|
252
|
+
const url = `runs/${runId}/recordings/${participantId}`;
|
|
253
|
+
const response = await this.get(url, {}, RecordingsListResponseSchema);
|
|
254
|
+
return response.recordings;
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Get a signed download URL for a specific recording.
|
|
258
|
+
* @param runId - The ID of the run.
|
|
259
|
+
* @param participantId - The ID of the participant.
|
|
260
|
+
* @param timestamp - The timestamp of the recording.
|
|
261
|
+
* @returns Download URL and expiration time.
|
|
262
|
+
*/
|
|
263
|
+
async getRecordingDownloadUrl(runId, participantId, timestamp) {
|
|
264
|
+
const url = `runs/${runId}/recordings/${participantId}/downloadUrl?timestamp=${encodeURIComponent(timestamp)}`;
|
|
265
|
+
return this.get(url, {}, RecordingDownloadUrlResponseSchema);
|
|
266
|
+
}
|
|
242
267
|
}
|
|
@@ -84,4 +84,9 @@ export declare class Experimenter {
|
|
|
84
84
|
* Open run in browser
|
|
85
85
|
*/
|
|
86
86
|
private openRun;
|
|
87
|
+
/**
|
|
88
|
+
* Download recordings for a completed run with smart polling
|
|
89
|
+
* Polls for 90 seconds after run completion (matching pod grace period)
|
|
90
|
+
*/
|
|
91
|
+
private downloadRecordingsForRun;
|
|
87
92
|
}
|
|
@@ -5,6 +5,14 @@ import { executeNodeCommand, openInBrowser } from "../utils.js";
|
|
|
5
5
|
import { Runner } from "./runner.js";
|
|
6
6
|
import { TUI } from "./tui.js";
|
|
7
7
|
import { ExperimentMetadataSchema, ManifestSchema, ProgressSchema } from "./types.js";
|
|
8
|
+
// Sanitize timestamp string for use in filenames
|
|
9
|
+
function sanitizeTimestamp(timestamp) {
|
|
10
|
+
// Replace colons, spaces, and other problematic characters
|
|
11
|
+
return timestamp
|
|
12
|
+
.replace(/:/g, "-")
|
|
13
|
+
.replace(/\s+/g, "_")
|
|
14
|
+
.replace(/[<>:"|?*]/g, "_");
|
|
15
|
+
}
|
|
8
16
|
export class Experimenter {
|
|
9
17
|
name;
|
|
10
18
|
webUrl;
|
|
@@ -192,6 +200,7 @@ export class Experimenter {
|
|
|
192
200
|
*/
|
|
193
201
|
async run(options) {
|
|
194
202
|
const version = await this.getOrCreateVersion(options.new);
|
|
203
|
+
this.currentVersion = version;
|
|
195
204
|
// Load manifest
|
|
196
205
|
const manifest = await this.loadManifest(version);
|
|
197
206
|
// We have 2 mandatory tags: "exp-<experiment-name>" and "exp-<experiment-name>-v<version>"
|
|
@@ -203,6 +212,16 @@ export class Experimenter {
|
|
|
203
212
|
maxConcurrent: options.maxConcurrent,
|
|
204
213
|
tags: tags,
|
|
205
214
|
onStateChange: () => this.onRunStateChange(),
|
|
215
|
+
onRunComplete: options.downloadRecordings
|
|
216
|
+
? async (index, runId) => {
|
|
217
|
+
const state = this.runner?.getRunState(index);
|
|
218
|
+
if (!state?.participantIds) {
|
|
219
|
+
console.error(pc.yellow(`Warning: Participant IDs not available for run ${runId}, skipping recording download.`));
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
await this.downloadRecordingsForRun(runId, state.participantIds, version);
|
|
223
|
+
}
|
|
224
|
+
: undefined,
|
|
206
225
|
});
|
|
207
226
|
// Restore progress if applicable
|
|
208
227
|
const progress = await this.loadProgress(version);
|
|
@@ -267,4 +286,83 @@ export class Experimenter {
|
|
|
267
286
|
openInBrowser(url);
|
|
268
287
|
}
|
|
269
288
|
}
|
|
289
|
+
/**
|
|
290
|
+
* Download recordings for a completed run with smart polling
|
|
291
|
+
* Polls for 90 seconds after run completion (matching pod grace period)
|
|
292
|
+
*/
|
|
293
|
+
async downloadRecordingsForRun(runId, participantIds, version) {
|
|
294
|
+
const POLL_INTERVAL_MS = 5000; // Check every 5 seconds
|
|
295
|
+
const TOTAL_POLL_DURATION_MS = 90000; // Poll for 90 seconds total (pod grace period)
|
|
296
|
+
const downloadedRecordings = new Set(); // Track downloaded recordings by timestamp
|
|
297
|
+
const failedDownloads = new Set(); // Track failed downloads to avoid spamming logs
|
|
298
|
+
const startTime = Date.now();
|
|
299
|
+
// Keep polling until grace period expires, then do one final check
|
|
300
|
+
let isLastAttempt = false;
|
|
301
|
+
while (true) {
|
|
302
|
+
// For each participant in the run
|
|
303
|
+
for (const participantId of participantIds) {
|
|
304
|
+
try {
|
|
305
|
+
// Fetch current available recordings
|
|
306
|
+
const recordings = await this.api.getRunRecordings(runId, participantId);
|
|
307
|
+
// Download any new recordings
|
|
308
|
+
for (const recording of recordings) {
|
|
309
|
+
const recordingKey = `${participantId}-${recording.timestamp}`;
|
|
310
|
+
// Skip if already downloaded
|
|
311
|
+
if (downloadedRecordings.has(recordingKey)) {
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
const outputPath = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, `${sanitizeTimestamp(recording.timestamp)}.mcpr`);
|
|
315
|
+
// Check if file already exists on disk
|
|
316
|
+
try {
|
|
317
|
+
await fs.access(outputPath);
|
|
318
|
+
downloadedRecordings.add(recordingKey);
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
catch { }
|
|
322
|
+
try {
|
|
323
|
+
// Download the recording
|
|
324
|
+
const { downloadUrl } = await this.api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
|
|
325
|
+
const response = await fetch(downloadUrl);
|
|
326
|
+
if (!response.ok) {
|
|
327
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
328
|
+
}
|
|
329
|
+
const buffer = await response.arrayBuffer();
|
|
330
|
+
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
331
|
+
await fs.writeFile(outputPath, Buffer.from(buffer));
|
|
332
|
+
downloadedRecordings.add(recordingKey);
|
|
333
|
+
// Remove from failed set if it was previously failing
|
|
334
|
+
if (failedDownloads.has(recordingKey)) {
|
|
335
|
+
failedDownloads.delete(recordingKey);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
catch (error) {
|
|
339
|
+
// Only log each failure once to avoid spam
|
|
340
|
+
if (!failedDownloads.has(recordingKey)) {
|
|
341
|
+
console.error(pc.yellow(`Warning: Failed to download recording ${recording.timestamp} for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
|
|
342
|
+
failedDownloads.add(recordingKey);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
catch (error) {
|
|
348
|
+
// Log API errors (e.g., fetching recordings list)
|
|
349
|
+
console.error(pc.yellow(`Warning: Failed to fetch recordings for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
// Exit if this was the last attempt
|
|
353
|
+
if (isLastAttempt) {
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
// Check if we've exceeded the polling duration
|
|
357
|
+
const elapsed = Date.now() - startTime;
|
|
358
|
+
if (elapsed >= TOTAL_POLL_DURATION_MS) {
|
|
359
|
+
// Do one final attempt before exiting
|
|
360
|
+
isLastAttempt = true;
|
|
361
|
+
}
|
|
362
|
+
else {
|
|
363
|
+
// Wait before next poll
|
|
364
|
+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
270
368
|
}
|
|
@@ -12,10 +12,12 @@ export declare class Runner {
|
|
|
12
12
|
private maxConcurrent;
|
|
13
13
|
private tags;
|
|
14
14
|
private onStateChange?;
|
|
15
|
+
private onRunComplete?;
|
|
15
16
|
constructor(runs: RunConfig[], api: ApiClient, baseUrl: string, options?: {
|
|
16
17
|
maxConcurrent?: number;
|
|
17
18
|
tags?: string[];
|
|
18
19
|
onStateChange?: (index: number, state: RunState) => void;
|
|
20
|
+
onRunComplete?: (index: number, runId: string) => Promise<void>;
|
|
19
21
|
});
|
|
20
22
|
/**
|
|
21
23
|
* Restore progress from a previous run
|
|
@@ -13,6 +13,7 @@ export class Runner {
|
|
|
13
13
|
maxConcurrent;
|
|
14
14
|
tags;
|
|
15
15
|
onStateChange;
|
|
16
|
+
onRunComplete;
|
|
16
17
|
constructor(runs, api, baseUrl, options = {}) {
|
|
17
18
|
this.runs = runs;
|
|
18
19
|
this.api = api;
|
|
@@ -26,6 +27,7 @@ export class Runner {
|
|
|
26
27
|
}
|
|
27
28
|
}
|
|
28
29
|
this.onStateChange = options.onStateChange;
|
|
30
|
+
this.onRunComplete = options.onRunComplete;
|
|
29
31
|
// Initialize all run states as queued
|
|
30
32
|
this.states = runs.map((config, index) => ({
|
|
31
33
|
index,
|
|
@@ -155,7 +157,15 @@ export class Runner {
|
|
|
155
157
|
throw new Error("No run ID returned from API");
|
|
156
158
|
}
|
|
157
159
|
const runId = response.runIds[0];
|
|
158
|
-
|
|
160
|
+
// Extract participant IDs from response and sort by inputOrder
|
|
161
|
+
const participantIds = response.participants
|
|
162
|
+
? Object.keys(response.participants).sort((a, b) => {
|
|
163
|
+
const aOrder = response.participants?.[a]?.inputOrder ?? 0;
|
|
164
|
+
const bOrder = response.participants?.[b]?.inputOrder ?? 0;
|
|
165
|
+
return aOrder - bOrder;
|
|
166
|
+
})
|
|
167
|
+
: undefined;
|
|
168
|
+
this.updateState(index, { runId, participantIds, status: "running" });
|
|
159
169
|
// Tag the run with all configured tags
|
|
160
170
|
const tags = [...this.tags, ...(state.config.tags ?? [])];
|
|
161
171
|
await Promise.all(tags.map((tag) => this.api.tagRun(runId, tag)));
|
|
@@ -192,6 +202,13 @@ export class Runner {
|
|
|
192
202
|
if (normalizedStatus === "completed" || normalizedStatus === "finished" || normalizedStatus === "game_over") {
|
|
193
203
|
this.completedRuns.add(index);
|
|
194
204
|
this.activeRuns.delete(index);
|
|
205
|
+
// Trigger recording download if callback provided
|
|
206
|
+
if (this.onRunComplete) {
|
|
207
|
+
// Don't await - run in background to avoid blocking
|
|
208
|
+
this.onRunComplete(index, runId).catch(() => {
|
|
209
|
+
// Error already logged in experimenter, just continue
|
|
210
|
+
});
|
|
211
|
+
}
|
|
195
212
|
return;
|
|
196
213
|
}
|
|
197
214
|
if (normalizedStatus === "error") {
|
|
@@ -272,6 +289,7 @@ export class Runner {
|
|
|
272
289
|
index: state.index,
|
|
273
290
|
status: state.status,
|
|
274
291
|
runId: state.runId,
|
|
292
|
+
participantIds: state.participantIds,
|
|
275
293
|
startTime: state.startTime,
|
|
276
294
|
endTime: this.completedRuns.has(state.index) ? Date.now() : undefined,
|
|
277
295
|
error: state.error,
|
|
@@ -42,6 +42,7 @@ export declare const ProgressEntrySchema: z.ZodObject<{
|
|
|
42
42
|
finished: "finished";
|
|
43
43
|
}>;
|
|
44
44
|
runId: z.ZodOptional<z.ZodString>;
|
|
45
|
+
participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
45
46
|
startTime: z.ZodOptional<z.ZodNumber>;
|
|
46
47
|
endTime: z.ZodOptional<z.ZodNumber>;
|
|
47
48
|
error: z.ZodOptional<z.ZodString>;
|
|
@@ -64,6 +65,7 @@ export declare const ProgressSchema: z.ZodObject<{
|
|
|
64
65
|
finished: "finished";
|
|
65
66
|
}>;
|
|
66
67
|
runId: z.ZodOptional<z.ZodString>;
|
|
68
|
+
participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
67
69
|
startTime: z.ZodOptional<z.ZodNumber>;
|
|
68
70
|
endTime: z.ZodOptional<z.ZodNumber>;
|
|
69
71
|
error: z.ZodOptional<z.ZodString>;
|
|
@@ -94,6 +96,7 @@ export interface RunState {
|
|
|
94
96
|
config: RunConfig;
|
|
95
97
|
status: RunStatus;
|
|
96
98
|
runId?: string;
|
|
99
|
+
participantIds?: string[];
|
|
97
100
|
startTime?: number;
|
|
98
101
|
error?: string;
|
|
99
102
|
}
|
|
@@ -123,6 +126,7 @@ export interface ExperimentOptions {
|
|
|
123
126
|
new: boolean;
|
|
124
127
|
maxConcurrent: number;
|
|
125
128
|
openMetabase?: boolean;
|
|
129
|
+
downloadRecordings?: boolean;
|
|
126
130
|
}
|
|
127
131
|
export declare const STATUS_ICONS: Record<RunStatus, {
|
|
128
132
|
icon: string;
|
package/dist/lib/schemas.d.ts
CHANGED
|
@@ -118,8 +118,19 @@ export declare const ChallengesResponseSchema: z.ZodObject<{
|
|
|
118
118
|
export declare const HumanSchema: z.ZodObject<{
|
|
119
119
|
username: z.ZodString;
|
|
120
120
|
}, z.core.$strip>;
|
|
121
|
-
export declare const
|
|
121
|
+
export declare const RunParticipantSchema: z.ZodObject<{
|
|
122
|
+
agent: z.ZodString;
|
|
123
|
+
role: z.ZodString;
|
|
124
|
+
inputOrder: z.ZodNumber;
|
|
125
|
+
}, z.core.$strip>;
|
|
126
|
+
export declare const JobResponseSchema: z.ZodObject<{
|
|
122
127
|
runIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
128
|
+
participants: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
129
|
+
agent: z.ZodString;
|
|
130
|
+
role: z.ZodString;
|
|
131
|
+
inputOrder: z.ZodNumber;
|
|
132
|
+
}, z.core.$strip>>>;
|
|
133
|
+
id: z.ZodOptional<z.ZodString>;
|
|
123
134
|
}, z.core.$strip>;
|
|
124
135
|
export declare const RunStatusSchema: z.ZodObject<{
|
|
125
136
|
id: z.ZodString;
|
|
@@ -165,11 +176,29 @@ export declare const AgentsResponseSchema: z.ZodObject<{
|
|
|
165
176
|
}, z.core.$strip>>;
|
|
166
177
|
nextPageToken: z.ZodOptional<z.ZodString>;
|
|
167
178
|
}, z.core.$strip>;
|
|
179
|
+
export declare const RecordingMetadataSchema: z.ZodObject<{
|
|
180
|
+
timestamp: z.ZodString;
|
|
181
|
+
sizeBytes: z.ZodNumber;
|
|
182
|
+
}, z.core.$strip>;
|
|
183
|
+
export declare const RecordingsListResponseSchema: z.ZodObject<{
|
|
184
|
+
recordings: z.ZodArray<z.ZodObject<{
|
|
185
|
+
timestamp: z.ZodString;
|
|
186
|
+
sizeBytes: z.ZodNumber;
|
|
187
|
+
}, z.core.$strip>>;
|
|
188
|
+
}, z.core.$strip>;
|
|
189
|
+
export declare const RecordingDownloadUrlResponseSchema: z.ZodObject<{
|
|
190
|
+
downloadUrl: z.ZodString;
|
|
191
|
+
expiresAt: z.ZodString;
|
|
192
|
+
}, z.core.$strip>;
|
|
168
193
|
export type ChallengeSchemaType = z.infer<typeof ChallengeSchema>;
|
|
169
194
|
export type ChallengeConfigSchemaType = z.infer<typeof ChallengeConfigSchema>;
|
|
170
195
|
export type ChallengesResponseType = z.infer<typeof ChallengesResponseSchema>;
|
|
171
196
|
export type HumanSchemaType = z.infer<typeof HumanSchema>;
|
|
172
|
-
export type
|
|
197
|
+
export type JobResponseType = z.infer<typeof JobResponseSchema>;
|
|
173
198
|
export type RunStatusSchemaType = z.infer<typeof RunStatusSchema>;
|
|
174
199
|
export type AgentSchemaType = z.infer<typeof AgentSchema>;
|
|
175
200
|
export type AgentsResponseType = z.infer<typeof AgentsResponseSchema>;
|
|
201
|
+
export type RecordingMetadata = z.infer<typeof RecordingMetadataSchema>;
|
|
202
|
+
export type RecordingsListResponse = z.infer<typeof RecordingsListResponseSchema>;
|
|
203
|
+
export type RecordingDownloadUrlResponse = z.infer<typeof RecordingDownloadUrlResponseSchema>;
|
|
204
|
+
export type RunParticipant = z.infer<typeof RunParticipantSchema>;
|
package/dist/lib/schemas.js
CHANGED
|
@@ -43,8 +43,15 @@ export const ChallengesResponseSchema = z.object({
|
|
|
43
43
|
export const HumanSchema = z.object({
|
|
44
44
|
username: z.string(),
|
|
45
45
|
});
|
|
46
|
-
export const
|
|
46
|
+
export const RunParticipantSchema = z.object({
|
|
47
|
+
agent: z.string(),
|
|
48
|
+
role: z.string(),
|
|
49
|
+
inputOrder: z.number(),
|
|
50
|
+
});
|
|
51
|
+
export const JobResponseSchema = z.object({
|
|
47
52
|
runIds: z.array(z.string()).optional(),
|
|
53
|
+
participants: z.record(z.string(), RunParticipantSchema).optional(),
|
|
54
|
+
id: z.string().optional(),
|
|
48
55
|
});
|
|
49
56
|
export const RunStatusSchema = z.object({
|
|
50
57
|
id: z.string(),
|
|
@@ -72,3 +79,14 @@ export const AgentsResponseSchema = z.object({
|
|
|
72
79
|
agents: z.array(AgentSchema),
|
|
73
80
|
nextPageToken: z.string().optional(),
|
|
74
81
|
});
|
|
82
|
+
export const RecordingMetadataSchema = z.object({
|
|
83
|
+
timestamp: z.string(),
|
|
84
|
+
sizeBytes: z.number(),
|
|
85
|
+
});
|
|
86
|
+
export const RecordingsListResponseSchema = z.object({
|
|
87
|
+
recordings: z.array(RecordingMetadataSchema),
|
|
88
|
+
});
|
|
89
|
+
export const RecordingDownloadUrlResponseSchema = z.object({
|
|
90
|
+
downloadUrl: z.string(),
|
|
91
|
+
expiresAt: z.string(),
|
|
92
|
+
});
|
package/oclif.manifest.json
CHANGED
|
@@ -600,6 +600,80 @@
|
|
|
600
600
|
"list.js"
|
|
601
601
|
]
|
|
602
602
|
},
|
|
603
|
+
"experiment:recordings": {
|
|
604
|
+
"aliases": [],
|
|
605
|
+
"args": {
|
|
606
|
+
"experimentName": {
|
|
607
|
+
"description": "Experiment name",
|
|
608
|
+
"name": "experimentName",
|
|
609
|
+
"required": true
|
|
610
|
+
},
|
|
611
|
+
"runId": {
|
|
612
|
+
"description": "Specific run ID to download recordings from (optional)",
|
|
613
|
+
"name": "runId",
|
|
614
|
+
"required": false
|
|
615
|
+
}
|
|
616
|
+
},
|
|
617
|
+
"description": "Download recordings from an experiment run",
|
|
618
|
+
"examples": [
|
|
619
|
+
"<%= config.bin %> <%= command.id %> my-experiment",
|
|
620
|
+
"<%= config.bin %> <%= command.id %> my-experiment <run-id>",
|
|
621
|
+
"<%= config.bin %> <%= command.id %> my-experiment --all",
|
|
622
|
+
"<%= config.bin %> <%= command.id %> my-experiment <run-id> --all",
|
|
623
|
+
"<%= config.bin %> <%= command.id %> my-experiment --version 2",
|
|
624
|
+
"<%= config.bin %> <%= command.id %> my-experiment --version 1 --all"
|
|
625
|
+
],
|
|
626
|
+
"flags": {
|
|
627
|
+
"all": {
|
|
628
|
+
"description": "Download all runs and participants (if no run specified), or all participants (if run specified)",
|
|
629
|
+
"name": "all",
|
|
630
|
+
"allowNo": false,
|
|
631
|
+
"type": "boolean"
|
|
632
|
+
},
|
|
633
|
+
"version": {
|
|
634
|
+
"description": "Specific experiment version to download recordings from (e.g., 0, 1, 2)",
|
|
635
|
+
"name": "version",
|
|
636
|
+
"required": false,
|
|
637
|
+
"hasDynamicHelp": false,
|
|
638
|
+
"multiple": false,
|
|
639
|
+
"type": "option"
|
|
640
|
+
},
|
|
641
|
+
"api-key": {
|
|
642
|
+
"description": "Kradle API key",
|
|
643
|
+
"env": "KRADLE_API_KEY",
|
|
644
|
+
"name": "api-key",
|
|
645
|
+
"required": true,
|
|
646
|
+
"hasDynamicHelp": false,
|
|
647
|
+
"multiple": false,
|
|
648
|
+
"type": "option"
|
|
649
|
+
},
|
|
650
|
+
"api-url": {
|
|
651
|
+
"description": "Kradle Web API URL",
|
|
652
|
+
"env": "KRADLE_API_URL",
|
|
653
|
+
"name": "api-url",
|
|
654
|
+
"required": true,
|
|
655
|
+
"default": "https://api.kradle.ai/v0",
|
|
656
|
+
"hasDynamicHelp": false,
|
|
657
|
+
"multiple": false,
|
|
658
|
+
"type": "option"
|
|
659
|
+
}
|
|
660
|
+
},
|
|
661
|
+
"hasDynamicHelp": false,
|
|
662
|
+
"hiddenAliases": [],
|
|
663
|
+
"id": "experiment:recordings",
|
|
664
|
+
"pluginAlias": "@kradle/cli",
|
|
665
|
+
"pluginName": "@kradle/cli",
|
|
666
|
+
"pluginType": "core",
|
|
667
|
+
"strict": true,
|
|
668
|
+
"enableJsonFlag": false,
|
|
669
|
+
"isESM": true,
|
|
670
|
+
"relativePath": [
|
|
671
|
+
"dist",
|
|
672
|
+
"commands",
|
|
673
|
+
"experiment",
|
|
674
|
+
"recordings.js"
|
|
675
|
+
]
|
|
676
|
+
},
|
|
603
677
|
"experiment:run": {
|
|
604
678
|
"aliases": [],
|
|
605
679
|
"args": {
|
|
@@ -632,6 +706,13 @@
|
|
|
632
706
|
"multiple": false,
|
|
633
707
|
"type": "option"
|
|
634
708
|
},
|
|
709
|
+
"download-recordings": {
|
|
710
|
+
"char": "d",
|
|
711
|
+
"description": "Automatically download recordings after each run finishes",
|
|
712
|
+
"name": "download-recordings",
|
|
713
|
+
"allowNo": false,
|
|
714
|
+
"type": "boolean"
|
|
715
|
+
},
|
|
635
716
|
"api-key": {
|
|
636
717
|
"description": "Kradle API key",
|
|
637
718
|
"env": "KRADLE_API_KEY",
|
|
@@ -679,5 +760,5 @@
|
|
|
679
760
|
]
|
|
680
761
|
}
|
|
681
762
|
},
|
|
682
|
-
"version": "0.
|
|
763
|
+
"version": "0.2.0"
|
|
683
764
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kradle/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Kradle's CLI. Manage challenges, experiments, agents and more!",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"cli"
|
|
@@ -26,7 +26,10 @@
|
|
|
26
26
|
"lint": "biome check .",
|
|
27
27
|
"format": "biome format --write . && biome check --write .",
|
|
28
28
|
"prepack": "npm run build && npm run version",
|
|
29
|
-
"version": "oclif manifest && oclif readme && git add README.md"
|
|
29
|
+
"version": "oclif manifest && oclif readme && git add README.md",
|
|
30
|
+
"test": "vitest run",
|
|
31
|
+
"test:watch": "vitest",
|
|
32
|
+
"test:integration": "vitest run --config vitest.config.ts"
|
|
30
33
|
},
|
|
31
34
|
"dependencies": {
|
|
32
35
|
"@google-cloud/storage": "^7.17.3",
|
|
@@ -54,7 +57,8 @@
|
|
|
54
57
|
"chai": "^4",
|
|
55
58
|
"oclif": "^4",
|
|
56
59
|
"tsx": "^4.20.6",
|
|
57
|
-
"typescript": "^5.9.3"
|
|
60
|
+
"typescript": "^5.9.3",
|
|
61
|
+
"vitest": "^2.1.9"
|
|
58
62
|
},
|
|
59
63
|
"engines": {
|
|
60
64
|
"node": ">=22.18.0"
|