npm - @kradle/cli - Versions diffs - 0.1.0 → 0.2.0 - Mend

@kradle/cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +32 -1
package/dist/commands/challenge/create.js +1 -1
package/dist/commands/challenge/list.js +3 -1
package/dist/commands/challenge/run.js +1 -1
package/dist/commands/experiment/recordings.d.ts +19 -0
package/dist/commands/experiment/recordings.js +416 -0
package/dist/commands/experiment/run.d.ts +1 -0
package/dist/commands/experiment/run.js +6 -0
package/dist/lib/api-client.d.ts +24 -2
package/dist/lib/api-client.js +29 -4
package/dist/lib/experiment/experimenter.d.ts +5 -0
package/dist/lib/experiment/experimenter.js +98 -0
package/dist/lib/experiment/runner.d.ts +2 -0
package/dist/lib/experiment/runner.js +19 -1
package/dist/lib/experiment/types.d.ts +4 -0
package/dist/lib/experiment/types.js +1 -0
package/dist/lib/schemas.d.ts +31 -2
package/dist/lib/schemas.js +19 -1
package/oclif.manifest.json +82 -1
package/package.json +7 -3

package/README.md CHANGED Viewed

@@ -173,7 +173,7 @@ Execute or resume an experiment:
 ```bash
 kradle experiment run <name>                  # Resume current version or create first one
-kradle experiment run <name> --new            # Start a new version
+kradle experiment run <name> --new-version            # Start a new version
 kradle experiment run <name> --max-concurrent 10  # Control parallelism (default: 5)
 ```
@@ -242,6 +242,32 @@ npm run lint:fix           # Auto-fix linting issues
 npm run format             # Format code with Biome
 ```
+### Running Tests
+The CLI has integration tests that verify commands work correctly with the dev API.
+**Setup:**
+1. Copy `.env.test.example` to `.env.test`
+2. Add your Kradle API key (from https://dev.kradle.ai/settings/api-keys)
+```bash
+cp .env.test.example .env.test
+# Edit .env.test and add your API key
+```
+**Run tests:**
+```bash
+npm test                   # Run all tests
+npm run test:watch         # Run tests in watch mode
+npm run test:integration   # Run integration tests
+```
+**Note:** Integration tests make real API calls to the dev environment and may create/delete challenges.
+**CI Configuration:** Integration tests run in GitHub Actions on PRs. The `KRADLE_API_KEY` secret must be configured in the repository settings.
 ### Challenge Structure
 Each challenge is a folder in `challenges/<slug>/` containing:
@@ -281,6 +307,11 @@ kradle-cli/
 │   │   └── experiment/       # Experiment commands
 │   └── lib/                  # Core libraries
 │       └── experiment/       # Experiment system
+├── tests/                    # Integration tests
+│   ├── helpers/              # Test utilities
+│   └── integration/          # Integration test suites
+│       ├── challenge/        # Challenge command tests
+│       └── experiment/       # Experiment command tests
 └── static/                   # Template files
     └── project_template/     # Files for kradle init
 ```

package/dist/commands/challenge/create.js CHANGED Viewed

@@ -70,7 +70,7 @@ export const config = ${configStr};
             {
                 title: "Uploading initial datapack",
                 task: async (_, task) => {
-                    api.uploadChallengeDatapack(args.challengeSlug, challenge.tarballPath);
+                    await api.uploadChallengeDatapack(args.challengeSlug, challenge.tarballPath);
                     task.title = `Uploaded initial datapack`;
                 },
             },

package/dist/commands/challenge/list.js CHANGED Viewed

@@ -27,7 +27,9 @@ export default class List extends Command {
         for (const slug of Array.from(allSlugs).sort()) {
             const challenge = new Challenge(slug, flags["challenges-path"]);
             const inCloud = cloudMap.has(slug);
-            const inLocal = localChallenges.includes(slug);
+            // Extract short slug (after the colon) for local comparison
+            const shortSlug = slug.includes(":") ? slug.split(":")[1] : slug;
+            const inLocal = localChallenges.includes(shortSlug);
             let status;
             if (inCloud && inLocal) {
                 status = pc.green("✓ synced");

package/dist/commands/challenge/run.js CHANGED Viewed

@@ -22,7 +22,7 @@ export default class Run extends Command {
     async run() {
         const { args, flags } = await this.parse(Run);
         const apiUrl = flags.studio ? flags["studio-api-url"] : flags["api-url"];
-        const studioApi = new ApiClient(apiUrl, flags["api-key"]);
+        const studioApi = new ApiClient(apiUrl, flags["api-key"], flags.studio);
         const challenge = new Challenge(args.challengeSlug, flags["challenges-path"]);
         try {
             const { participants } = (await loadTemplateRun());

package/dist/commands/experiment/recordings.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import { Command } from "@oclif/core";
+export default class Recordings extends Command {
+    static description: string;
+    static examples: string[];
+    static args: {
+        experimentName: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
+        runId: import("@oclif/core/interfaces").Arg<string | undefined, Record<string, unknown>>;
+    };
+    static flags: {
+        "api-key": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+        "api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+        all: import("@oclif/core/interfaces").BooleanFlag<boolean>;
+        version: import("@oclif/core/interfaces").OptionFlag<number | undefined, import("@oclif/core/interfaces").CustomOptions>;
+    };
+    run(): Promise<void>;
+    private downloadForExperiment;
+    private fetchAndDownloadTargets;
+    private downloadRecordings;
+}

package/dist/commands/experiment/recordings.js ADDED Viewed

@@ -0,0 +1,416 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Args, Command, Flags } from "@oclif/core";
+import enquirer from "enquirer";
+import { Listr } from "listr2";
+import pc from "picocolors";
+import { ApiClient } from "../../lib/api-client.js";
+import { Experimenter } from "../../lib/experiment/experimenter.js";
+import { getConfigFlags } from "../../lib/flags.js";
+// Check if recordings exist locally for a run
+async function checkRecordingsExist(experimentDir, version, runId) {
+    const recordingsPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId);
+    try {
+        await fs.access(recordingsPath);
+        const files = await fs.readdir(recordingsPath, { recursive: true });
+        return files.some((f) => String(f).endsWith(".mcpr"));
+    }
+    catch {
+        return false;
+    }
+}
+// Format bytes for display
+function formatBytes(bytes) {
+    if (bytes === 0)
+        return "0 Bytes";
+    const k = 1024;
+    const sizes = ["Bytes", "KB", "MB", "GB"];
+    const i = Math.floor(Math.log(bytes) / Math.log(k));
+    return `${Math.round((bytes / k ** i) * 100) / 100} ${sizes[i]}`;
+}
+// Sanitize timestamp string for use in filenames
+function sanitizeTimestamp(timestamp) {
+    // Replace colons, spaces, and other problematic characters
+    return timestamp
+        .replace(/:/g, "-")
+        .replace(/\s+/g, "_")
+        .replace(/[<>:"|?*]/g, "_");
+}
+// Download file with retry logic
+async function downloadFile(url, outputPath, maxRetries = 3) {
+    for (let attempt = 1; attempt <= maxRetries; attempt++) {
+        try {
+            const response = await fetch(url);
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+            }
+            const buffer = await response.arrayBuffer();
+            await fs.mkdir(path.dirname(outputPath), { recursive: true });
+            await fs.writeFile(outputPath, Buffer.from(buffer));
+            return;
+        }
+        catch (error) {
+            if (attempt === maxRetries)
+                throw error;
+            await new Promise((resolve) => setTimeout(resolve, 1000 * attempt)); // linear backoff: 1s, 2s, 3s
+        }
+    }
+}
+// Get all versions for an experiment
+async function getAllVersions(experimentDir) {
+    const versionsDir = path.join(experimentDir, "versions");
+    try {
+        const entries = await fs.readdir(versionsDir, { withFileTypes: true });
+        return entries
+            .filter((e) => e.isDirectory())
+            .map((e) => parseInt(e.name, 10))
+            .filter((n) => !Number.isNaN(n))
+            .sort((a, b) => a - b);
+    }
+    catch {
+        return [];
+    }
+}
+export default class Recordings extends Command {
+    static description = "Download recordings from an experiment run";
+    static examples = [
+        "<%= config.bin %> <%= command.id %> my-experiment",
+        "<%= config.bin %> <%= command.id %> my-experiment <run-id>",
+        "<%= config.bin %> <%= command.id %> my-experiment --all",
+        "<%= config.bin %> <%= command.id %> my-experiment <run-id> --all",
+        "<%= config.bin %> <%= command.id %> my-experiment --version 2",
+        "<%= config.bin %> <%= command.id %> my-experiment --version 1 --all",
+    ];
+    static args = {
+        experimentName: Args.string({
+            description: "Experiment name",
+            required: true,
+        }),
+        runId: Args.string({
+            description: "Specific run ID to download recordings from (optional)",
+            required: false,
+        }),
+    };
+    static flags = {
+        all: Flags.boolean({
+            description: "Download all runs and participants (if no run specified), or all participants (if run specified)",
+            default: false,
+        }),
+        version: Flags.integer({
+            description: "Specific experiment version to download recordings from (e.g., 0, 1, 2)",
+            required: false,
+        }),
+        ...getConfigFlags("api-key", "api-url"),
+    };
+    async run() {
+        const { args, flags } = await this.parse(Recordings);
+        const api = new ApiClient(flags["api-url"], flags["api-key"]);
+        const { experimentName, runId } = args;
+        await this.downloadForExperiment(experimentName, runId, api, flags.all, flags.version);
+    }
+    async downloadForExperiment(experimentName, runId, api, all, version) {
+        const experimenter = new Experimenter(experimentName, "", api);
+        // Check if experiment exists
+        if (!(await experimenter.exists())) {
+            this.error(pc.red(`Experiment '${experimentName}' does not exist. Run 'kradle experiment list' to see available experiments.`));
+        }
+        const experimentDir = experimenter.experimentDir;
+        // Get all versions
+        const allVersions = await getAllVersions(experimentDir);
+        if (allVersions.length === 0) {
+            this.error(pc.red("No experiment versions found. Run the experiment first."));
+        }
+        // Default to latest version if not specified
+        let targetVersion;
+        if (version !== undefined) {
+            if (!allVersions.includes(version)) {
+                this.error(pc.red(`Version ${version} not found in experiment '${experimentName}'. ` +
+                    `Available versions: ${allVersions.join(", ")}`));
+            }
+            targetVersion = version;
+            this.log(pc.blue(`>> Filtering to version ${version}`));
+        }
+        else {
+            // Default to latest version
+            targetVersion = Math.max(...allVersions);
+        }
+        const allRunInfos = [];
+        const completedStatuses = new Set(["completed", "finished", "game_over"]);
+        const progressPath = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "progress.json");
+        try {
+            const progressData = await fs.readFile(progressPath, "utf-8");
+            const progress = JSON.parse(progressData);
+            for (const entry of progress.entries) {
+                // Only include runs that are completed (exclude in-progress, queued, or error runs)
+                if (entry.runId && completedStatuses.has(entry.status)) {
+                    const hasRecordings = await checkRecordingsExist(experimentDir, targetVersion, entry.runId);
+                    allRunInfos.push({
+                        version: targetVersion,
+                        runId: entry.runId,
+                        index: entry.index,
+                        status: entry.status,
+                        hasRecordings,
+                        participantIds: entry.participantIds,
+                        endTime: entry.endTime,
+                    });
+                }
+            }
+        }
+        catch { }
+        if (allRunInfos.length === 0) {
+            this.error(pc.yellow("No completed runs found. Wait for runs to finish or run the experiment first."));
+        }
+        let selectedRuns;
+        if (all && !runId) {
+            // Download all runs and all participants (--all without specific run)
+            selectedRuns = allRunInfos;
+            this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
+        }
+        else if (runId) {
+            // Find specific run by ID
+            const matchingRun = allRunInfos.find((r) => r.runId === runId);
+            if (!matchingRun) {
+                this.error(pc.red(`Run ID '${runId}' not found in experiment '${experimentName}'. ` +
+                    `Run 'kradle experiment recordings ${experimentName}' to see available runs.`));
+            }
+            selectedRuns = [matchingRun];
+            this.log(pc.blue(`>> Downloading recordings for run: ${runId}`));
+        }
+        else {
+            // Interactive run selection
+            const choices = [
+                // Only show "All runs" option if there are multiple runs
+                ...(allRunInfos.length > 1
+                    ? [
+                        {
+                            name: "all",
+                            message: `All runs (${allRunInfos.length} total)`,
+                            hint: "Download all",
+                        },
+                    ]
+                    : []),
+                ...allRunInfos.map((run) => {
+                    const indicator = run.hasRecordings ? pc.blue("·") : "☐";
+                    const participants = run.participantIds?.join(", ") || "No participants";
+                    return {
+                        name: run.runId,
+                        message: `${indicator} ${participants} - ${run.runId}`,
+                        hint: run.status,
+                    };
+                }),
+            ];
+            const { selectedRunId } = await enquirer.prompt({
+                type: "select",
+                name: "selectedRunId",
+                message: "Select a run to download recordings from",
+                choices,
+            });
+            if (selectedRunId === "all") {
+                selectedRuns = allRunInfos;
+                this.log(pc.blue(`>> Downloading recordings for all ${selectedRuns.length} runs`));
+            }
+            else {
+                const selectedRun = allRunInfos.find((r) => r.runId === selectedRunId);
+                if (!selectedRun) {
+                    this.error(pc.red("Selected run not found."));
+                }
+                selectedRuns = [selectedRun];
+            }
+        }
+        // Build download targets
+        const downloadTargets = [];
+        // Prompt for participant selection only if: single run selected AND not --all flag
+        if (selectedRuns.length === 1 && !all) {
+            const firstRunInfo = allRunInfos.find((r) => r.runId === selectedRuns[0].runId);
+            if (!firstRunInfo?.participantIds || firstRunInfo.participantIds.length === 0) {
+                this.error(pc.red("Participant IDs not available for this run. This may be an old run created before recording support was added."));
+            }
+            // Interactive participant selection for single run
+            const participantChoices = [
+                { name: "all", message: "All participants", value: "all" },
+                ...firstRunInfo.participantIds.map((id, idx) => ({
+                    name: id,
+                    message: `Participant ${idx}: ${id}`,
+                    value: id,
+                })),
+            ];
+            const { participantSelection } = await enquirer.prompt({
+                type: "select",
+                name: "participantSelection",
+                message: "Select participant(s)",
+                choices: participantChoices,
+            });
+            const run = selectedRuns[0];
+            if (participantSelection === "all") {
+                // Download all participants
+                for (const participantId of firstRunInfo.participantIds) {
+                    downloadTargets.push({
+                        version: run.version,
+                        runId: run.runId,
+                        participantId,
+                        experimentDir,
+                    });
+                }
+            }
+            else {
+                // Download single participant
+                downloadTargets.push({
+                    version: run.version,
+                    runId: run.runId,
+                    participantId: participantSelection,
+                    experimentDir,
+                });
+            }
+        }
+        else {
+            // Multiple runs OR --all flag: automatically download all participants
+            for (const run of selectedRuns) {
+                const runInfo = allRunInfos.find((r) => r.runId === run.runId);
+                if (!runInfo?.participantIds || runInfo.participantIds.length === 0) {
+                    this.log(pc.yellow(`Warning: Skipping run ${run.runId} - no participant IDs available`));
+                    continue;
+                }
+                // Download all participants for this run
+                for (const participantId of runInfo.participantIds) {
+                    downloadTargets.push({
+                        version: run.version,
+                        runId: run.runId,
+                        participantId,
+                        experimentDir,
+                    });
+                }
+            }
+        }
+        // Ensure we wait at least 90 seconds after run completion before downloading
+        // to avoid missing recordings that take time to upload
+        const MIN_WAIT_AFTER_COMPLETION_MS = 90000; // 90 seconds
+        const now = Date.now();
+        // Group targets by run and check wait times
+        const runEndTimes = new Map();
+        for (const target of downloadTargets) {
+            const runInfo = allRunInfos.find((r) => r.runId === target.runId);
+            if (runInfo?.endTime) {
+                runEndTimes.set(target.runId, runInfo.endTime);
+            }
+        }
+        // Separate into ready and deferred targets
+        const readyTargets = [];
+        const deferredTargets = [];
+        for (const target of downloadTargets) {
+            const endTime = runEndTimes.get(target.runId);
+            if (endTime) {
+                const timeSinceCompletion = now - endTime;
+                if (timeSinceCompletion < MIN_WAIT_AFTER_COMPLETION_MS) {
+                    deferredTargets.push(target);
+                }
+                else {
+                    readyTargets.push(target);
+                }
+            }
+            else {
+                // No end time available, process immediately
+                readyTargets.push(target);
+            }
+        }
+        // Process ready targets first
+        if (readyTargets.length > 0) {
+            this.log(pc.blue(`>> Fetching and downloading recordings for ${readyTargets.length} ready target(s)...`));
+            await this.fetchAndDownloadTargets(api, readyTargets);
+        }
+        // Wait for deferred targets, then process them
+        if (deferredTargets.length > 0) {
+            const uniqueDeferredRuns = new Set(deferredTargets.map((t) => t.runId));
+            const maxWaitNeeded = Math.max(...Array.from(uniqueDeferredRuns)
+                .map((runId) => {
+                const endTime = runEndTimes.get(runId);
+                if (!endTime)
+                    return 0;
+                return MIN_WAIT_AFTER_COMPLETION_MS - (now - endTime);
+            })
+                .filter((wait) => wait > 0));
+            this.log(pc.yellow(`>> Waiting ${Math.ceil(maxWaitNeeded / 1000)}s for ${uniqueDeferredRuns.size} recent run(s) to ensure all recordings are uploaded...`));
+            // Wait the necessary time
+            await new Promise((resolve) => setTimeout(resolve, maxWaitNeeded));
+            this.log(pc.blue(`>> Fetching and downloading recordings for ${deferredTargets.length} deferred target(s)...`));
+            await this.fetchAndDownloadTargets(api, deferredTargets);
+        }
+        if (readyTargets.length === 0 && deferredTargets.length === 0) {
+            this.log(pc.yellow("No recordings to download."));
+            return;
+        }
+        const totalTargets = readyTargets.length + deferredTargets.length;
+        const uniqueRuns = new Set([...readyTargets, ...deferredTargets].map((t) => t.runId));
+        // All targets are from the same version
+        const recordingsDir = path.join(experimentDir, "versions", targetVersion.toString().padStart(3, "0"), "recordings");
+        this.log(pc.green(`\n✓ Downloaded recordings for ${totalTargets} participant(s) across ${uniqueRuns.size} run(s) to ${recordingsDir}`));
+    }
+    async fetchAndDownloadTargets(api, targets) {
+        // Fetch all recordings to show summary
+        let totalCount = 0;
+        let totalSize = 0;
+        for (const target of targets) {
+            try {
+                const recordings = await api.getRunRecordings(target.runId, target.participantId);
+                totalCount += recordings.length;
+                totalSize += recordings.reduce((sum, r) => sum + r.sizeBytes, 0);
+            }
+            catch {
+                // Skip targets with no recordings
+            }
+        }
+        if (totalCount === 0) {
+            this.log(pc.yellow("   No recordings found for these targets."));
+            return;
+        }
+        this.log(pc.blue(`   Found ${totalCount} recordings (Total: ${formatBytes(totalSize)})`));
+        // Download all recordings
+        await this.downloadRecordings(api, targets);
+    }
+    async downloadRecordings(api, targets) {
+        const allTasks = [];
+        for (const target of targets) {
+            const { version, runId, participantId, experimentDir } = target;
+            // Fetch recordings for this target
+            let recordings;
+            try {
+                recordings = await api.getRunRecordings(runId, participantId);
+            }
+            catch {
+                // Skip targets with no recordings
+                continue;
+            }
+            if (recordings.length === 0)
+                continue;
+            // Create tasks for each recording
+            for (const recording of recordings) {
+                const sanitizedFilename = `${sanitizeTimestamp(recording.timestamp)}.mcpr`;
+                allTasks.push({
+                    title: `${participantId} - ${runId} - ${sanitizedFilename}`,
+                    task: async (_, task) => {
+                        const outputPath = path.join(experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, sanitizedFilename);
+                        // Skip if already exists
+                        try {
+                            await fs.access(outputPath);
+                            task.skip("Already downloaded");
+                            return;
+                        }
+                        catch { }
+                        // Get download URL
+                        const { downloadUrl } = await api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
+                        // Download file
+                        await downloadFile(downloadUrl, outputPath);
+                        task.title = `${participantId} - ${runId} - ${sanitizedFilename} (${formatBytes(recording.sizeBytes)})`;
+                    },
+                });
+            }
+        }
+        if (allTasks.length === 0) {
+            return;
+        }
+        const tasks = new Listr(allTasks, {
+            concurrent: 3,
+            exitOnError: false,
+        });
+        await tasks.run();
+    }
+}

package/dist/commands/experiment/run.d.ts CHANGED Viewed

@@ -11,6 +11,7 @@ export default class Run extends Command {
         "web-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
         "new-version": import("@oclif/core/interfaces").BooleanFlag<boolean>;
         "max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
+        "download-recordings": import("@oclif/core/interfaces").BooleanFlag<boolean>;
     };
     run(): Promise<void>;
 }

package/dist/commands/experiment/run.js CHANGED Viewed

@@ -28,6 +28,11 @@ export default class Run extends Command {
             description: "Maximum concurrent runs",
             default: DEFAULT_MAX_CONCURRENT,
         }),
+        "download-recordings": Flags.boolean({
+            char: "d",
+            description: "Automatically download recordings after each run finishes",
+            default: false,
+        }),
         ...getConfigFlags("api-key", "api-url", "web-url"),
     };
     async run() {
@@ -51,6 +56,7 @@ export default class Run extends Command {
                 new: flags["new-version"],
                 maxConcurrent: flags["max-concurrent"],
                 openMetabase: true,
+                downloadRecordings: flags["download-recordings"],
             });
             this.log(pc.green("\n✓ Experiment complete!"));
         }

package/dist/lib/api-client.d.ts CHANGED Viewed

@@ -1,9 +1,10 @@
 import type z from "zod";
-import { type AgentSchemaType, type ChallengeConfigSchemaType, type ChallengeSchemaType, HumanSchema, type RunStatusSchemaType } from "./schemas.js";
+import { type AgentSchemaType, type ChallengeConfigSchemaType, type ChallengeSchemaType, HumanSchema, type RecordingDownloadUrlResponse, type RecordingMetadata, type RunStatusSchemaType } from "./schemas.js";
 export declare class ApiClient {
     private apiUrl;
     private kradleApiKey;
-    constructor(apiUrl: string, kradleApiKey: string);
+    private isStudio;
+    constructor(apiUrl: string, kradleApiKey: string, isStudio?: boolean);
     private request;
     private get;
     private post;
@@ -70,6 +71,12 @@ export declare class ApiClient {
         participants: unknown[];
     }): Promise<{
         runIds?: string[] | undefined;
+        participants?: Record<string, {
+            agent: string;
+            role: string;
+            inputOrder: number;
+        }> | undefined;
+        id?: string | undefined;
     }>;
     deleteChallenge(challengeId: string): Promise<void>;
     /**
@@ -85,4 +92,19 @@ export declare class ApiClient {
      * @throws an error if the tag fails to be added.
      */
     tagRun(runId: string, tag: string): Promise<void>;
+    /**
+     * Get recordings for a specific participant in a run.
+     * @param runId - The ID of the run.
+     * @param participantId - The ID of the participant.
+     * @returns Array of recording metadata.
+     */
+    getRunRecordings(runId: string, participantId: string): Promise<RecordingMetadata[]>;
+    /**
+     * Get a signed download URL for a specific recording.
+     * @param runId - The ID of the run.
+     * @param participantId - The ID of the participant.
+     * @param timestamp - The timestamp of the recording.
+     * @returns Download URL and expiration time.
+     */
+    getRecordingDownloadUrl(runId: string, participantId: string, timestamp: string): Promise<RecordingDownloadUrlResponse>;
 }

package/dist/lib/api-client.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { existsSync } from "node:fs";
 import fs from "node:fs/promises";
-import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, RunResponseSchema, RunStatusSchema, UploadUrlResponseSchema, } from "./schemas.js";
+import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, JobResponseSchema, RecordingDownloadUrlResponseSchema, RecordingsListResponseSchema, RunStatusSchema, UploadUrlResponseSchema, } from "./schemas.js";
 const DEFAULT_PAGE_SIZE = 30;
 const DEFAULT_CHALLENGE_SCHEMA = {
     slug: "",
@@ -25,9 +25,11 @@ const DEFAULT_CHALLENGE_SCHEMA = {
 export class ApiClient {
     apiUrl;
     kradleApiKey;
-    constructor(apiUrl, kradleApiKey) {
+    isStudio;
+    constructor(apiUrl, kradleApiKey, isStudio = false) {
         this.apiUrl = apiUrl;
         this.kradleApiKey = kradleApiKey;
+        this.isStudio = isStudio;
     }
     async request(endpoint, options) {
         const fullUrl = `${this.apiUrl}/${endpoint}`;
@@ -210,9 +212,10 @@ export class ApiClient {
     }
     async runChallenge(runData) {
         const url = "jobs";
+        const payload = this.isStudio ? runData : { ...runData, jobType: "background" };
         return this.post(url, {
-            body: JSON.stringify(runData),
-        }, RunResponseSchema);
+            body: JSON.stringify(payload),
+        }, JobResponseSchema);
     }
     async deleteChallenge(challengeId) {
         const url = `challenges/${challengeId}`;
@@ -239,4 +242,26 @@ export class ApiClient {
             body: JSON.stringify({ tag }),
         });
     }
+    /**
+     * Get recordings for a specific participant in a run.
+     * @param runId - The ID of the run.
+     * @param participantId - The ID of the participant.
+     * @returns Array of recording metadata.
+     */
+    async getRunRecordings(runId, participantId) {
+        const url = `runs/${runId}/recordings/${participantId}`;
+        const response = await this.get(url, {}, RecordingsListResponseSchema);
+        return response.recordings;
+    }
+    /**
+     * Get a signed download URL for a specific recording.
+     * @param runId - The ID of the run.
+     * @param participantId - The ID of the participant.
+     * @param timestamp - The timestamp of the recording.
+     * @returns Download URL and expiration time.
+     */
+    async getRecordingDownloadUrl(runId, participantId, timestamp) {
+        const url = `runs/${runId}/recordings/${participantId}/downloadUrl?timestamp=${encodeURIComponent(timestamp)}`;
+        return this.get(url, {}, RecordingDownloadUrlResponseSchema);
+    }
 }

package/dist/lib/experiment/experimenter.d.ts CHANGED Viewed

@@ -84,4 +84,9 @@ export declare class Experimenter {
      * Open run in browser
      */
     private openRun;
+    /**
+     * Download recordings for a completed run with smart polling
+     * Polls for 90 seconds after run completion (matching pod grace period)
+     */
+    private downloadRecordingsForRun;
 }

package/dist/lib/experiment/experimenter.js CHANGED Viewed

@@ -5,6 +5,14 @@ import { executeNodeCommand, openInBrowser } from "../utils.js";
 import { Runner } from "./runner.js";
 import { TUI } from "./tui.js";
 import { ExperimentMetadataSchema, ManifestSchema, ProgressSchema } from "./types.js";
+// Sanitize timestamp string for use in filenames
+function sanitizeTimestamp(timestamp) {
+    // Replace colons, spaces, and other problematic characters
+    return timestamp
+        .replace(/:/g, "-")
+        .replace(/\s+/g, "_")
+        .replace(/[<>:"|?*]/g, "_");
+}
 export class Experimenter {
     name;
     webUrl;
@@ -192,6 +200,7 @@ export class Experimenter {
      */
     async run(options) {
         const version = await this.getOrCreateVersion(options.new);
+        this.currentVersion = version;
         // Load manifest
         const manifest = await this.loadManifest(version);
         // We have 2 mandatory tags: "exp-<experiment-name>" and "exp-<experiment-name>-v<version>"
@@ -203,6 +212,16 @@ export class Experimenter {
             maxConcurrent: options.maxConcurrent,
             tags: tags,
             onStateChange: () => this.onRunStateChange(),
+            onRunComplete: options.downloadRecordings
+                ? async (index, runId) => {
+                    const state = this.runner?.getRunState(index);
+                    if (!state?.participantIds) {
+                        console.error(pc.yellow(`Warning: Participant IDs not available for run ${runId}, skipping recording download.`));
+                        return;
+                    }
+                    await this.downloadRecordingsForRun(runId, state.participantIds, version);
+                }
+                : undefined,
         });
         // Restore progress if applicable
         const progress = await this.loadProgress(version);
@@ -267,4 +286,83 @@ export class Experimenter {
             openInBrowser(url);
         }
     }
+    /**
+     * Download recordings for a completed run with smart polling
+     * Polls for 90 seconds after run completion (matching pod grace period)
+     */
+    async downloadRecordingsForRun(runId, participantIds, version) {
+        const POLL_INTERVAL_MS = 5000; // Check every 5 seconds
+        const TOTAL_POLL_DURATION_MS = 90000; // Poll for 90 seconds total (pod grace period)
+        const downloadedRecordings = new Set(); // Track downloaded recordings by timestamp
+        const failedDownloads = new Set(); // Track failed downloads to avoid spamming logs
+        const startTime = Date.now();
+        // Keep polling until grace period expires, then do one final check
+        let isLastAttempt = false;
+        while (true) {
+            // For each participant in the run
+            for (const participantId of participantIds) {
+                try {
+                    // Fetch current available recordings
+                    const recordings = await this.api.getRunRecordings(runId, participantId);
+                    // Download any new recordings
+                    for (const recording of recordings) {
+                        const recordingKey = `${participantId}-${recording.timestamp}`;
+                        // Skip if already downloaded
+                        if (downloadedRecordings.has(recordingKey)) {
+                            continue;
+                        }
+                        const outputPath = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, `${sanitizeTimestamp(recording.timestamp)}.mcpr`);
+                        // Check if file already exists on disk
+                        try {
+                            await fs.access(outputPath);
+                            downloadedRecordings.add(recordingKey);
+                            continue;
+                        }
+                        catch { }
+                        try {
+                            // Download the recording
+                            const { downloadUrl } = await this.api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
+                            const response = await fetch(downloadUrl);
+                            if (!response.ok) {
+                                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+                            }
+                            const buffer = await response.arrayBuffer();
+                            await fs.mkdir(path.dirname(outputPath), { recursive: true });
+                            await fs.writeFile(outputPath, Buffer.from(buffer));
+                            downloadedRecordings.add(recordingKey);
+                            // Remove from failed set if it was previously failing
+                            if (failedDownloads.has(recordingKey)) {
+                                failedDownloads.delete(recordingKey);
+                            }
+                        }
+                        catch (error) {
+                            // Only log each failure once to avoid spam
+                            if (!failedDownloads.has(recordingKey)) {
+                                console.error(pc.yellow(`Warning: Failed to download recording ${recording.timestamp} for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
+                                failedDownloads.add(recordingKey);
+                            }
+                        }
+                    }
+                }
+                catch (error) {
+                    // Log API errors (e.g., fetching recordings list)
+                    console.error(pc.yellow(`Warning: Failed to fetch recordings for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
+                }
+            }
+            // Exit if this was the last attempt
+            if (isLastAttempt) {
+                break;
+            }
+            // Check if we've exceeded the polling duration
+            const elapsed = Date.now() - startTime;
+            if (elapsed >= TOTAL_POLL_DURATION_MS) {
+                // Do one final attempt before exiting
+                isLastAttempt = true;
+            }
+            else {
+                // Wait before next poll
+                await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
+            }
+        }
+    }
 }

package/dist/lib/experiment/runner.d.ts CHANGED Viewed

@@ -12,10 +12,12 @@ export declare class Runner {
     private maxConcurrent;
     private tags;
     private onStateChange?;
+    private onRunComplete?;
     constructor(runs: RunConfig[], api: ApiClient, baseUrl: string, options?: {
         maxConcurrent?: number;
         tags?: string[];
         onStateChange?: (index: number, state: RunState) => void;
+        onRunComplete?: (index: number, runId: string) => Promise<void>;
     });
     /**
      * Restore progress from a previous run

package/dist/lib/experiment/runner.js CHANGED Viewed

@@ -13,6 +13,7 @@ export class Runner {
     maxConcurrent;
     tags;
     onStateChange;
+    onRunComplete;
     constructor(runs, api, baseUrl, options = {}) {
         this.runs = runs;
         this.api = api;
@@ -26,6 +27,7 @@ export class Runner {
             }
         }
         this.onStateChange = options.onStateChange;
+        this.onRunComplete = options.onRunComplete;
         // Initialize all run states as queued
         this.states = runs.map((config, index) => ({
             index,
@@ -155,7 +157,15 @@ export class Runner {
                 throw new Error("No run ID returned from API");
             }
             const runId = response.runIds[0];
-            this.updateState(index, { runId, status: "running" });
+            // Extract participant IDs from response and sort by inputOrder
+            const participantIds = response.participants
+                ? Object.keys(response.participants).sort((a, b) => {
+                    const aOrder = response.participants?.[a]?.inputOrder ?? 0;
+                    const bOrder = response.participants?.[b]?.inputOrder ?? 0;
+                    return aOrder - bOrder;
+                })
+                : undefined;
+            this.updateState(index, { runId, participantIds, status: "running" });
             // Tag the run with all configured tags
             const tags = [...this.tags, ...(state.config.tags ?? [])];
             await Promise.all(tags.map((tag) => this.api.tagRun(runId, tag)));
@@ -192,6 +202,13 @@ export class Runner {
                 if (normalizedStatus === "completed" || normalizedStatus === "finished" || normalizedStatus === "game_over") {
                     this.completedRuns.add(index);
                     this.activeRuns.delete(index);
+                    // Trigger recording download if callback provided
+                    if (this.onRunComplete) {
+                        // Don't await - run in background to avoid blocking
+                        this.onRunComplete(index, runId).catch(() => {
+                            // Error already logged in experimenter, just continue
+                        });
+                    }
                     return;
                 }
                 if (normalizedStatus === "error") {
@@ -272,6 +289,7 @@ export class Runner {
             index: state.index,
             status: state.status,
             runId: state.runId,
+            participantIds: state.participantIds,
             startTime: state.startTime,
             endTime: this.completedRuns.has(state.index) ? Date.now() : undefined,
             error: state.error,

package/dist/lib/experiment/types.d.ts CHANGED Viewed

@@ -42,6 +42,7 @@ export declare const ProgressEntrySchema: z.ZodObject<{
         finished: "finished";
     }>;
     runId: z.ZodOptional<z.ZodString>;
+    participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
     startTime: z.ZodOptional<z.ZodNumber>;
     endTime: z.ZodOptional<z.ZodNumber>;
     error: z.ZodOptional<z.ZodString>;
@@ -64,6 +65,7 @@ export declare const ProgressSchema: z.ZodObject<{
             finished: "finished";
         }>;
         runId: z.ZodOptional<z.ZodString>;
+        participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
         startTime: z.ZodOptional<z.ZodNumber>;
         endTime: z.ZodOptional<z.ZodNumber>;
         error: z.ZodOptional<z.ZodString>;
@@ -94,6 +96,7 @@ export interface RunState {
     config: RunConfig;
     status: RunStatus;
     runId?: string;
+    participantIds?: string[];
     startTime?: number;
     error?: string;
 }
@@ -123,6 +126,7 @@ export interface ExperimentOptions {
     new: boolean;
     maxConcurrent: number;
     openMetabase?: boolean;
+    downloadRecordings?: boolean;
 }
 export declare const STATUS_ICONS: Record<RunStatus, {
     icon: string;

package/dist/lib/experiment/types.js CHANGED Viewed

@@ -32,6 +32,7 @@ export const ProgressEntrySchema = z.object({
         "error",
     ]),
     runId: z.string().optional(),
+    participantIds: z.array(z.string()).optional(),
     startTime: z.number().optional(),
     endTime: z.number().optional(),
     error: z.string().optional(),

package/dist/lib/schemas.d.ts CHANGED Viewed

@@ -118,8 +118,19 @@ export declare const ChallengesResponseSchema: z.ZodObject<{
 export declare const HumanSchema: z.ZodObject<{
     username: z.ZodString;
 }, z.core.$strip>;
-export declare const RunResponseSchema: z.ZodObject<{
+export declare const RunParticipantSchema: z.ZodObject<{
+    agent: z.ZodString;
+    role: z.ZodString;
+    inputOrder: z.ZodNumber;
+}, z.core.$strip>;
+export declare const JobResponseSchema: z.ZodObject<{
     runIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    participants: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
+        agent: z.ZodString;
+        role: z.ZodString;
+        inputOrder: z.ZodNumber;
+    }, z.core.$strip>>>;
+    id: z.ZodOptional<z.ZodString>;
 }, z.core.$strip>;
 export declare const RunStatusSchema: z.ZodObject<{
     id: z.ZodString;
@@ -165,11 +176,29 @@ export declare const AgentsResponseSchema: z.ZodObject<{
     }, z.core.$strip>>;
     nextPageToken: z.ZodOptional<z.ZodString>;
 }, z.core.$strip>;
+export declare const RecordingMetadataSchema: z.ZodObject<{
+    timestamp: z.ZodString;
+    sizeBytes: z.ZodNumber;
+}, z.core.$strip>;
+export declare const RecordingsListResponseSchema: z.ZodObject<{
+    recordings: z.ZodArray<z.ZodObject<{
+        timestamp: z.ZodString;
+        sizeBytes: z.ZodNumber;
+    }, z.core.$strip>>;
+}, z.core.$strip>;
+export declare const RecordingDownloadUrlResponseSchema: z.ZodObject<{
+    downloadUrl: z.ZodString;
+    expiresAt: z.ZodString;
+}, z.core.$strip>;
 export type ChallengeSchemaType = z.infer<typeof ChallengeSchema>;
 export type ChallengeConfigSchemaType = z.infer<typeof ChallengeConfigSchema>;
 export type ChallengesResponseType = z.infer<typeof ChallengesResponseSchema>;
 export type HumanSchemaType = z.infer<typeof HumanSchema>;
-export type RunResponseType = z.infer<typeof RunResponseSchema>;
+export type JobResponseType = z.infer<typeof JobResponseSchema>;
 export type RunStatusSchemaType = z.infer<typeof RunStatusSchema>;
 export type AgentSchemaType = z.infer<typeof AgentSchema>;
 export type AgentsResponseType = z.infer<typeof AgentsResponseSchema>;
+export type RecordingMetadata = z.infer<typeof RecordingMetadataSchema>;
+export type RecordingsListResponse = z.infer<typeof RecordingsListResponseSchema>;
+export type RecordingDownloadUrlResponse = z.infer<typeof RecordingDownloadUrlResponseSchema>;
+export type RunParticipant = z.infer<typeof RunParticipantSchema>;

package/dist/lib/schemas.js CHANGED Viewed

@@ -43,8 +43,15 @@ export const ChallengesResponseSchema = z.object({
 export const HumanSchema = z.object({
     username: z.string(),
 });
-export const RunResponseSchema = z.object({
+export const RunParticipantSchema = z.object({
+    agent: z.string(),
+    role: z.string(),
+    inputOrder: z.number(),
+});
+export const JobResponseSchema = z.object({
     runIds: z.array(z.string()).optional(),
+    participants: z.record(z.string(), RunParticipantSchema).optional(),
+    id: z.string().optional(),
 });
 export const RunStatusSchema = z.object({
     id: z.string(),
@@ -72,3 +79,14 @@ export const AgentsResponseSchema = z.object({
     agents: z.array(AgentSchema),
     nextPageToken: z.string().optional(),
 });
+export const RecordingMetadataSchema = z.object({
+    timestamp: z.string(),
+    sizeBytes: z.number(),
+});
+export const RecordingsListResponseSchema = z.object({
+    recordings: z.array(RecordingMetadataSchema),
+});
+export const RecordingDownloadUrlResponseSchema = z.object({
+    downloadUrl: z.string(),
+    expiresAt: z.string(),
+});

package/oclif.manifest.json CHANGED Viewed

@@ -600,6 +600,80 @@
         "list.js"
       ]
     },
+    "experiment:recordings": {
+      "aliases": [],
+      "args": {
+        "experimentName": {
+          "description": "Experiment name",
+          "name": "experimentName",
+          "required": true
+        },
+        "runId": {
+          "description": "Specific run ID to download recordings from (optional)",
+          "name": "runId",
+          "required": false
+        }
+      },
+      "description": "Download recordings from an experiment run",
+      "examples": [
+        "<%= config.bin %> <%= command.id %> my-experiment",
+        "<%= config.bin %> <%= command.id %> my-experiment <run-id>",
+        "<%= config.bin %> <%= command.id %> my-experiment --all",
+        "<%= config.bin %> <%= command.id %> my-experiment <run-id> --all",
+        "<%= config.bin %> <%= command.id %> my-experiment --version 2",
+        "<%= config.bin %> <%= command.id %> my-experiment --version 1 --all"
+      ],
+      "flags": {
+        "all": {
+          "description": "Download all runs and participants (if no run specified), or all participants (if run specified)",
+          "name": "all",
+          "allowNo": false,
+          "type": "boolean"
+        },
+        "version": {
+          "description": "Specific experiment version to download recordings from (e.g., 0, 1, 2)",
+          "name": "version",
+          "required": false,
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "api-key": {
+          "description": "Kradle API key",
+          "env": "KRADLE_API_KEY",
+          "name": "api-key",
+          "required": true,
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "api-url": {
+          "description": "Kradle Web API URL",
+          "env": "KRADLE_API_URL",
+          "name": "api-url",
+          "required": true,
+          "default": "https://api.kradle.ai/v0",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        }
+      },
+      "hasDynamicHelp": false,
+      "hiddenAliases": [],
+      "id": "experiment:recordings",
+      "pluginAlias": "@kradle/cli",
+      "pluginName": "@kradle/cli",
+      "pluginType": "core",
+      "strict": true,
+      "enableJsonFlag": false,
+      "isESM": true,
+      "relativePath": [
+        "dist",
+        "commands",
+        "experiment",
+        "recordings.js"
+      ]
+    },
     "experiment:run": {
       "aliases": [],
       "args": {
@@ -632,6 +706,13 @@
           "multiple": false,
           "type": "option"
         },
+        "download-recordings": {
+          "char": "d",
+          "description": "Automatically download recordings after each run finishes",
+          "name": "download-recordings",
+          "allowNo": false,
+          "type": "boolean"
+        },
         "api-key": {
           "description": "Kradle API key",
           "env": "KRADLE_API_KEY",
@@ -679,5 +760,5 @@
       ]
     }
   },
-  "version": "0.1.0"
+  "version": "0.2.0"
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@kradle/cli",
-	"version": "0.1.0",
+	"version": "0.2.0",
 	"description": "Kradle's CLI. Manage challenges, experiments, agents and more!",
 	"keywords": [
 		"cli"
@@ -26,7 +26,10 @@
 		"lint": "biome check .",
 		"format": "biome format --write . && biome check --write .",
 		"prepack": "npm run build && npm run version",
-		"version": "oclif manifest && oclif readme && git add README.md"
+		"version": "oclif manifest && oclif readme && git add README.md",
+		"test": "vitest run",
+		"test:watch": "vitest",
+		"test:integration": "vitest run --config vitest.config.ts"
 	},
 	"dependencies": {
 		"@google-cloud/storage": "^7.17.3",
@@ -54,7 +57,8 @@
 		"chai": "^4",
 		"oclif": "^4",
 		"tsx": "^4.20.6",
-		"typescript": "^5.9.3"
+		"typescript": "^5.9.3",
+		"vitest": "^2.1.9"
 	},
 	"engines": {
 		"node": ">=22.18.0"