npm - @kradle/cli - Versions diffs - 0.0.17 → 0.2.0 - Mend

@kradle/cli 0.0.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/README.md +93 -65
package/dist/commands/agent/list.d.ts +4 -0
package/dist/commands/agent/list.js +6 -4
package/dist/commands/challenge/build.d.ts +9 -1
package/dist/commands/challenge/build.js +40 -12
package/dist/commands/challenge/create.d.ts +5 -1
package/dist/commands/challenge/create.js +17 -18
package/dist/commands/challenge/delete.d.ts +4 -1
package/dist/commands/challenge/delete.js +5 -5
package/dist/commands/challenge/list.d.ts +5 -0
package/dist/commands/challenge/list.js +11 -10
package/dist/commands/challenge/run.d.ts +8 -1
package/dist/commands/challenge/run.js +13 -8
package/dist/commands/challenge/watch.d.ts +4 -1
package/dist/commands/challenge/watch.js +8 -8
package/dist/commands/{evaluation → experiment}/create.d.ts +4 -0
package/dist/commands/{evaluation → experiment}/create.js +22 -21
package/dist/commands/{evaluation → experiment}/list.js +17 -19
package/dist/commands/experiment/recordings.d.ts +19 -0
package/dist/commands/experiment/recordings.js +416 -0
package/dist/commands/experiment/run.d.ts +17 -0
package/dist/commands/experiment/run.js +67 -0
package/dist/commands/init.js +2 -2
package/dist/lib/api-client.d.ts +51 -10
package/dist/lib/api-client.js +108 -39
package/dist/lib/arguments.d.ts +3 -2
package/dist/lib/arguments.js +5 -3
package/dist/lib/challenge.d.ts +13 -18
package/dist/lib/challenge.js +58 -62
package/dist/lib/experiment/experimenter.d.ts +92 -0
package/dist/lib/experiment/experimenter.js +368 -0
package/dist/lib/{evaluation → experiment}/index.d.ts +1 -1
package/dist/lib/{evaluation → experiment}/index.js +1 -1
package/dist/lib/{evaluation → experiment}/runner.d.ts +2 -0
package/dist/lib/{evaluation → experiment}/runner.js +21 -2
package/dist/lib/{evaluation → experiment}/tui.d.ts +1 -1
package/dist/lib/{evaluation → experiment}/tui.js +3 -3
package/dist/lib/{evaluation → experiment}/types.d.ts +10 -4
package/dist/lib/{evaluation → experiment}/types.js +5 -3
package/dist/lib/flags.d.ts +47 -0
package/dist/lib/flags.js +63 -0
package/dist/lib/schemas.d.ts +63 -2
package/dist/lib/schemas.js +27 -1
package/dist/lib/utils.d.ts +9 -10
package/dist/lib/utils.js +12 -12
package/oclif.manifest.json +423 -64
package/package.json +11 -8
package/static/challenge.ts +12 -13
package/static/experiment_template.ts +114 -0
package/static/project_template/dev.env +5 -5
package/static/project_template/prod.env +4 -4
package/static/project_template/tsconfig.json +1 -1
package/dist/commands/challenge/multi-upload.d.ts +0 -6
package/dist/commands/challenge/multi-upload.js +0 -80
package/dist/commands/evaluation/run.d.ts +0 -13
package/dist/commands/evaluation/run.js +0 -61
package/dist/lib/config.d.ts +0 -12
package/dist/lib/config.js +0 -49
package/dist/lib/evaluation/evaluator.d.ts +0 -88
package/dist/lib/evaluation/evaluator.js +0 -268
package/static/evaluation_template.ts +0 -69
/package/dist/commands/{evaluation → experiment}/list.d.ts +0 -0

package/dist/lib/experiment/experimenter.js ADDED Viewed

@@ -0,0 +1,368 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import pc from "picocolors";
+import { executeNodeCommand, openInBrowser } from "../utils.js";
+import { Runner } from "./runner.js";
+import { TUI } from "./tui.js";
+import { ExperimentMetadataSchema, ManifestSchema, ProgressSchema } from "./types.js";
+// Sanitize timestamp string for use in filenames
+function sanitizeTimestamp(timestamp) {
+    // Replace colons, spaces, and other problematic characters
+    return timestamp
+        .replace(/:/g, "-")
+        .replace(/\s+/g, "_")
+        .replace(/[<>:"|?*]/g, "_");
+}
+export class Experimenter {
+    name;
+    webUrl;
+    api;
+    experimentDir;
+    metadataPath;
+    runner;
+    tui;
+    currentVersion;
+    constructor(name, webUrl, api) {
+        this.name = name;
+        this.webUrl = webUrl;
+        this.api = api;
+        this.experimentDir = path.resolve(process.cwd(), "experiments", name);
+        this.metadataPath = path.join(this.experimentDir, ".experiment.json");
+    }
+    /**
+     * Get paths for a specific version
+     */
+    getVersionPaths(version) {
+        const versionDir = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"));
+        return {
+            versionDir,
+            configPath: path.join(versionDir, "config.ts"),
+            manifestPath: path.join(versionDir, "manifest.json"),
+            progressPath: path.join(versionDir, "progress.json"),
+        };
+    }
+    get configPath() {
+        return path.join(this.experimentDir, "config.ts");
+    }
+    /**
+     * Get the current version directory path
+     */
+    getCurrentVersionDir() {
+        if (this.currentVersion === undefined) {
+            throw new Error("No version set");
+        }
+        return this.getVersionPaths(this.currentVersion).versionDir;
+    }
+    /**
+     * Check if experiment exists
+     */
+    async exists() {
+        try {
+            await fs.access(this.experimentDir);
+            return true;
+        }
+        catch {
+            return false;
+        }
+    }
+    /**
+     * Check if config.ts exists (master config)
+     */
+    async configExists() {
+        try {
+            await fs.access(this.configPath);
+            return true;
+        }
+        catch {
+            return false;
+        }
+    }
+    /**
+     * Load experiment metadata
+     */
+    async loadMetadata() {
+        try {
+            const content = await fs.readFile(this.metadataPath, "utf-8");
+            const data = JSON.parse(content);
+            return ExperimentMetadataSchema.parse(data);
+        }
+        catch {
+            return null;
+        }
+    }
+    /**
+     * Save experiment metadata
+     */
+    async saveMetadata(metadata) {
+        await fs.writeFile(this.metadataPath, JSON.stringify(metadata, null, 2));
+    }
+    /**
+     * Get the current version number, or -1 if none exists
+     */
+    async getCurrentVersionNumber() {
+        const metadata = await this.loadMetadata();
+        return metadata?.currentVersion ?? -1;
+    }
+    /**
+     * Create a new version
+     */
+    async createNewVersion() {
+        const currentVersion = await this.getCurrentVersionNumber();
+        const newVersion = currentVersion + 1;
+        const paths = this.getVersionPaths(newVersion);
+        // Create version directory
+        await fs.mkdir(paths.versionDir, { recursive: true });
+        // Copy master config to version
+        const masterConfigPath = path.join(this.experimentDir, "config.ts");
+        await fs.copyFile(masterConfigPath, paths.configPath);
+        // Generate manifest from config
+        const manifest = await this.generateManifest(paths.configPath);
+        await fs.writeFile(paths.manifestPath, JSON.stringify(manifest, null, 2));
+        // Update metadata
+        await this.saveMetadata({ currentVersion: newVersion });
+        this.currentVersion = newVersion;
+        return newVersion;
+    }
+    /**
+     * Get or create a version
+     * @param createNew - If true, always create a new version. Otherwise, use current version or create first one if none exists.
+     */
+    async getOrCreateVersion(createNew) {
+        if (createNew) {
+            return await this.createNewVersion();
+        }
+        const currentVersion = await this.getCurrentVersionNumber();
+        if (currentVersion < 0) {
+            return await this.createNewVersion();
+        }
+        this.currentVersion = currentVersion;
+        return currentVersion;
+    }
+    /**
+     * Load manifest from version
+     */
+    async loadManifest(version) {
+        const paths = this.getVersionPaths(version);
+        const content = await fs.readFile(paths.manifestPath, "utf-8");
+        const data = JSON.parse(content);
+        return ManifestSchema.parse(data);
+    }
+    /**
+     * Load progress from version
+     */
+    async loadProgress(version) {
+        try {
+            const paths = this.getVersionPaths(version);
+            const content = await fs.readFile(paths.progressPath, "utf-8");
+            const data = JSON.parse(content);
+            return ProgressSchema.parse(data);
+        }
+        catch {
+            return null;
+        }
+    }
+    /**
+     * Save progress to current version
+     */
+    async saveProgress() {
+        if (!this.runner || this.currentVersion === undefined)
+            return;
+        const paths = this.getVersionPaths(this.currentVersion);
+        const progress = {
+            entries: this.runner.getProgressEntries(),
+            lastUpdated: Date.now(),
+        };
+        await fs.writeFile(paths.progressPath, JSON.stringify(progress, null, 2));
+    }
+    /**
+     * Execute config.ts to generate manifest
+     */
+    async generateManifest(configPath) {
+        const manifest = await this.executeConfigFile(configPath);
+        return ManifestSchema.parse(manifest);
+    }
+    /**
+     * Execute config.ts file and return the manifest
+     */
+    async executeConfigFile(configPath) {
+        // We spawn a new NodeJS process to execute & log the config file.
+        // We can't directly import the file because it would be cached, and import cache can't be invalidated.
+        const stdout = await executeNodeCommand([
+            "--experimental-transform-types",
+            "--no-warnings",
+            "-e",
+            `console.log(JSON.stringify(require("${configPath}").main()));`,
+        ], {});
+        return JSON.parse(stdout.trim());
+    }
+    /**
+     * Run the experiment
+     */
+    async run(options) {
+        const version = await this.getOrCreateVersion(options.new);
+        this.currentVersion = version;
+        // Load manifest
+        const manifest = await this.loadManifest(version);
+        // We have 2 mandatory tags: "exp-<experiment-name>" and "exp-<experiment-name>-v<version>"
+        const experimentTag = `exp-${this.name}`;
+        const versionTag = `${experimentTag}-v${version}`;
+        const tags = [experimentTag, versionTag, ...(manifest.tags ?? [])];
+        // Create runner
+        this.runner = new Runner(manifest.runs, this.api, this.webUrl, {
+            maxConcurrent: options.maxConcurrent,
+            tags: tags,
+            onStateChange: () => this.onRunStateChange(),
+            onRunComplete: options.downloadRecordings
+                ? async (index, runId) => {
+                    const state = this.runner?.getRunState(index);
+                    if (!state?.participantIds) {
+                        console.error(pc.yellow(`Warning: Participant IDs not available for run ${runId}, skipping recording download.`));
+                        return;
+                    }
+                    await this.downloadRecordingsForRun(runId, state.participantIds, version);
+                }
+                : undefined,
+        });
+        // Restore progress if applicable
+        const progress = await this.loadProgress(version);
+        if (progress) {
+            this.runner.restoreProgress(progress.entries);
+        }
+        // Create TUI
+        this.tui = new TUI({
+            experimentName: `${this.name} (v${version})`,
+            onQuit: () => this.handleQuit(),
+            onOpenRun: (index) => this.openRun(index),
+        });
+        // Initial state update
+        this.tui.updateStates(this.runner.getAllStates());
+        this.tui.updateStatusCounts(this.runner.getStatusCounts());
+        // Start TUI
+        this.tui.start();
+        try {
+            // Execute runs
+            await this.runner.execute();
+            // Final save
+            await this.saveProgress();
+        }
+        finally {
+            this.tui.stop();
+            console.log("");
+        }
+        if (options.openMetabase ?? true) {
+            openInBrowser(`https://daunt-fair.metabaseapp.com/dashboard/10-runs-analysis?run_tags=${versionTag}`);
+        }
+        const errors = this.runner?.getAllStates().filter((state) => state.status === "error");
+        if (errors?.length > 0) {
+            throw new Error(`${errors.map((error) => error.error).join("\n\n")}`);
+        }
+    }
+    /**
+     * Handle state change from runner
+     */
+    onRunStateChange() {
+        if (this.tui && this.runner) {
+            this.tui.updateStates(this.runner.getAllStates());
+            this.tui.updateStatusCounts(this.runner.getStatusCounts());
+        }
+        // Periodically save progress
+        this.saveProgress().catch(() => { });
+    }
+    /**
+     * Handle quit request
+     */
+    handleQuit() {
+        this.runner?.stop();
+        this.tui?.stop();
+        console.log(pc.yellow(`\nThe experiment has been interrupted. You can resume it later by running "kradle experiment run ${this.name}".`));
+        process.exit(0);
+    }
+    /**
+     * Open run in browser
+     */
+    openRun(index) {
+        const url = this.runner?.getRunUrl(index);
+        if (url) {
+            openInBrowser(url);
+        }
+    }
+    /**
+     * Download recordings for a completed run with smart polling
+     * Polls for 90 seconds after run completion (matching pod grace period)
+     */
+    async downloadRecordingsForRun(runId, participantIds, version) {
+        const POLL_INTERVAL_MS = 5000; // Check every 5 seconds
+        const TOTAL_POLL_DURATION_MS = 90000; // Poll for 90 seconds total (pod grace period)
+        const downloadedRecordings = new Set(); // Track downloaded recordings by timestamp
+        const failedDownloads = new Set(); // Track failed downloads to avoid spamming logs
+        const startTime = Date.now();
+        // Keep polling until grace period expires, then do one final check
+        let isLastAttempt = false;
+        while (true) {
+            // For each participant in the run
+            for (const participantId of participantIds) {
+                try {
+                    // Fetch current available recordings
+                    const recordings = await this.api.getRunRecordings(runId, participantId);
+                    // Download any new recordings
+                    for (const recording of recordings) {
+                        const recordingKey = `${participantId}-${recording.timestamp}`;
+                        // Skip if already downloaded
+                        if (downloadedRecordings.has(recordingKey)) {
+                            continue;
+                        }
+                        const outputPath = path.join(this.experimentDir, "versions", version.toString().padStart(3, "0"), "recordings", runId, participantId, `${sanitizeTimestamp(recording.timestamp)}.mcpr`);
+                        // Check if file already exists on disk
+                        try {
+                            await fs.access(outputPath);
+                            downloadedRecordings.add(recordingKey);
+                            continue;
+                        }
+                        catch { }
+                        try {
+                            // Download the recording
+                            const { downloadUrl } = await this.api.getRecordingDownloadUrl(runId, participantId, recording.timestamp);
+                            const response = await fetch(downloadUrl);
+                            if (!response.ok) {
+                                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+                            }
+                            const buffer = await response.arrayBuffer();
+                            await fs.mkdir(path.dirname(outputPath), { recursive: true });
+                            await fs.writeFile(outputPath, Buffer.from(buffer));
+                            downloadedRecordings.add(recordingKey);
+                            // Remove from failed set if it was previously failing
+                            if (failedDownloads.has(recordingKey)) {
+                                failedDownloads.delete(recordingKey);
+                            }
+                        }
+                        catch (error) {
+                            // Only log each failure once to avoid spam
+                            if (!failedDownloads.has(recordingKey)) {
+                                console.error(pc.yellow(`Warning: Failed to download recording ${recording.timestamp} for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
+                                failedDownloads.add(recordingKey);
+                            }
+                        }
+                    }
+                }
+                catch (error) {
+                    // Log API errors (e.g., fetching recordings list)
+                    console.error(pc.yellow(`Warning: Failed to fetch recordings for participant ${participantId}: ${error instanceof Error ? error.message : String(error)}`));
+                }
+            }
+            // Exit if this was the last attempt
+            if (isLastAttempt) {
+                break;
+            }
+            // Check if we've exceeded the polling duration
+            const elapsed = Date.now() - startTime;
+            if (elapsed >= TOTAL_POLL_DURATION_MS) {
+                // Do one final attempt before exiting
+                isLastAttempt = true;
+            }
+            else {
+                // Wait before next poll
+                await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
+            }
+        }
+    }
+}

package/dist/lib/{evaluation → experiment}/index.d.ts RENAMED Viewed

@@ -1,4 +1,4 @@
-export { Evaluator } from "./evaluator.js";
+export { Experimenter } from "./experimenter.js";
 export { Runner } from "./runner.js";
 export { TUI } from "./tui.js";
 export * from "./types.js";

package/dist/lib/{evaluation → experiment}/index.js RENAMED Viewed

@@ -1,4 +1,4 @@
-export { Evaluator } from "./evaluator.js";
+export { Experimenter } from "./experimenter.js";
 export { Runner } from "./runner.js";
 export { TUI } from "./tui.js";
 export * from "./types.js";

package/dist/lib/{evaluation → experiment}/runner.d.ts RENAMED Viewed

@@ -12,10 +12,12 @@ export declare class Runner {
     private maxConcurrent;
     private tags;
     private onStateChange?;
+    private onRunComplete?;
     constructor(runs: RunConfig[], api: ApiClient, baseUrl: string, options?: {
         maxConcurrent?: number;
         tags?: string[];
         onStateChange?: (index: number, state: RunState) => void;
+        onRunComplete?: (index: number, runId: string) => Promise<void>;
     });
     /**
      * Restore progress from a previous run

package/dist/lib/{evaluation → experiment}/runner.js RENAMED Viewed

@@ -13,6 +13,7 @@ export class Runner {
     maxConcurrent;
     tags;
     onStateChange;
+    onRunComplete;
     constructor(runs, api, baseUrl, options = {}) {
         this.runs = runs;
         this.api = api;
@@ -26,6 +27,7 @@ export class Runner {
             }
         }
         this.onStateChange = options.onStateChange;
+        this.onRunComplete = options.onRunComplete;
         // Initialize all run states as queued
         this.states = runs.map((config, index) => ({
             index,
@@ -155,9 +157,18 @@ export class Runner {
                 throw new Error("No run ID returned from API");
             }
             const runId = response.runIds[0];
-            this.updateState(index, { runId, status: "running" });
+            // Extract participant IDs from response and sort by inputOrder
+            const participantIds = response.participants
+                ? Object.keys(response.participants).sort((a, b) => {
+                    const aOrder = response.participants?.[a]?.inputOrder ?? 0;
+                    const bOrder = response.participants?.[b]?.inputOrder ?? 0;
+                    return aOrder - bOrder;
+                })
+                : undefined;
+            this.updateState(index, { runId, participantIds, status: "running" });
             // Tag the run with all configured tags
-            await Promise.all(this.tags.map((tag) => this.api.tagRun(runId, tag)));
+            const tags = [...this.tags, ...(state.config.tags ?? [])];
+            await Promise.all(tags.map((tag) => this.api.tagRun(runId, tag)));
             // Poll for completion
             await this.pollRunStatus(index, runId);
         }
@@ -191,6 +202,13 @@ export class Runner {
                 if (normalizedStatus === "completed" || normalizedStatus === "finished" || normalizedStatus === "game_over") {
                     this.completedRuns.add(index);
                     this.activeRuns.delete(index);
+                    // Trigger recording download if callback provided
+                    if (this.onRunComplete) {
+                        // Don't await - run in background to avoid blocking
+                        this.onRunComplete(index, runId).catch(() => {
+                            // Error already logged in experimenter, just continue
+                        });
+                    }
                     return;
                 }
                 if (normalizedStatus === "error") {
@@ -271,6 +289,7 @@ export class Runner {
             index: state.index,
             status: state.status,
             runId: state.runId,
+            participantIds: state.participantIds,
             startTime: state.startTime,
             endTime: this.completedRuns.has(state.index) ? Date.now() : undefined,
             error: state.error,

package/dist/lib/{evaluation → experiment}/tui.d.ts RENAMED Viewed

@@ -1,6 +1,6 @@
 import type { RunState, StatusCounts } from "./types.js";
 export interface TUIOptions {
-    evaluationName: string;
+    experimentName: string;
     onQuit: () => void;
     onOpenRun: (index: number) => void;
 }

package/dist/lib/{evaluation → experiment}/tui.js RENAMED Viewed

@@ -39,7 +39,7 @@ const RenderRunLine = ({ state, total, isSelected, padding, }) => {
     const summaryText = summary.length > maxSummaryLength ? summary.slice(0, maxSummaryLength - 1) + "…" : summary;
     return (_jsxs(Text, { inverse: isSelected, children: [_jsx(Text, { color: color, children: icon }), " ", indexLabel, " ", _jsx(Text, { color: color, children: statusLabel }), elapsedLabel ? (_jsxs(_Fragment, { children: [" ", _jsx(Text, { dimColor: true, children: elapsedLabel })] })) : null, " ", _jsx(Text, { dimColor: true, children: summaryText })] }));
 };
-const EvaluationUI = ({ evaluationName, states, statusCounts, onQuit, onOpenRun }) => {
+const ExperimentUI = ({ experimentName, states, statusCounts, onQuit, onOpenRun }) => {
     const [selectedIndex, setSelectedIndex] = useState(0);
     const [scrollOffset, setScrollOffset] = useState(0);
     const [tick, setTick] = useState(0); // force elapsed-time updates
@@ -89,7 +89,7 @@ const EvaluationUI = ({ evaluationName, states, statusCounts, onQuit, onOpenRun
     const rangeEnd = Math.min(scrollOffset + rowsAvailable, states.length);
     const horizontalRule = "─".repeat(Math.min(process.stdout.columns || 80, 80));
     const padding = states.length.toString().length;
-    return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `Evaluation: ${evaluationName}` }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsx(Text, { dimColor: true, children: "q:quit \u2191/\u2193/j/k:select o:open in browser" }), _jsx(Text, { children: " " }), _jsxs(Box, { flexDirection: "column", children: [visibleRuns.map((state, index) => (_jsx(RenderRunLine, { state: state, total: states.length, isSelected: scrollOffset + index === selectedIndex, padding: padding }, state.index))), visibleRuns.length < rowsAvailable
+    return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `Experiment: ${experimentName}` }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsx(Text, { dimColor: true, children: "q:quit \u2191/\u2193/j/k:select o:open in browser" }), _jsx(Text, { children: " " }), _jsxs(Box, { flexDirection: "column", children: [visibleRuns.map((state, index) => (_jsx(RenderRunLine, { state: state, total: states.length, isSelected: scrollOffset + index === selectedIndex, padding: padding }, state.index))), visibleRuns.length < rowsAvailable
                         ? Array.from({ length: rowsAvailable - visibleRuns.length }).map((_, index) => (_jsx(Text, { children: " " }, `empty-${index}`)))
                         : null] }), showScroll ? _jsx(Text, { dimColor: true, children: `[${rangeStart}-${rangeEnd} of ${states.length}]` }) : _jsx(Text, { children: " " }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsxs(Text, { children: [_jsx(Text, { children: "Completed: " }), _jsx(Text, { color: "green", children: statusCounts.completed }), _jsx(Text, { children: ` | Active: ` }), _jsx(Text, { color: "yellow", children: statusCounts.active }), _jsx(Text, { children: ` | Queued: ` }), _jsx(Text, { dimColor: true, children: statusCounts.queued }), statusCounts.errors > 0 ? (_jsxs(_Fragment, { children: [_jsx(Text, { children: ` | Errors: ` }), _jsx(Text, { color: "red", children: statusCounts.errors })] })) : null] })] }));
 };
@@ -125,6 +125,6 @@ export class TUI {
         this.app.rerender(this.renderApp());
     }
     renderApp() {
-        return (_jsx(EvaluationUI, { evaluationName: this.options.evaluationName, states: this.states, statusCounts: this.statusCounts, onQuit: this.options.onQuit, onOpenRun: this.options.onOpenRun }));
+        return (_jsx(ExperimentUI, { experimentName: this.options.experimentName, states: this.states, statusCounts: this.statusCounts, onQuit: this.options.onQuit, onOpenRun: this.options.onOpenRun }));
     }
 }

package/dist/lib/{evaluation → experiment}/types.d.ts RENAMED Viewed

@@ -10,6 +10,7 @@ export declare const RunConfigSchema: z.ZodObject<{
         agent: z.ZodString;
         role: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;
+    tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
 }, z.core.$strip>;
 export type RunConfig = z.infer<typeof RunConfigSchema>;
 export declare const ManifestSchema: z.ZodObject<{
@@ -19,6 +20,7 @@ export declare const ManifestSchema: z.ZodObject<{
             agent: z.ZodString;
             role: z.ZodOptional<z.ZodString>;
         }, z.core.$strip>>;
+        tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
     }, z.core.$strip>>;
     tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
 }, z.core.$strip>;
@@ -40,6 +42,7 @@ export declare const ProgressEntrySchema: z.ZodObject<{
         finished: "finished";
     }>;
     runId: z.ZodOptional<z.ZodString>;
+    participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
     startTime: z.ZodOptional<z.ZodNumber>;
     endTime: z.ZodOptional<z.ZodNumber>;
     error: z.ZodOptional<z.ZodString>;
@@ -62,6 +65,7 @@ export declare const ProgressSchema: z.ZodObject<{
             finished: "finished";
         }>;
         runId: z.ZodOptional<z.ZodString>;
+        participantIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
         startTime: z.ZodOptional<z.ZodNumber>;
         endTime: z.ZodOptional<z.ZodNumber>;
         error: z.ZodOptional<z.ZodString>;
@@ -92,6 +96,7 @@ export interface RunState {
     config: RunConfig;
     status: RunStatus;
     runId?: string;
+    participantIds?: string[];
     startTime?: number;
     error?: string;
 }
@@ -113,14 +118,15 @@ export declare const RunLogsResponseSchema: z.ZodObject<{
     logs: z.ZodArray<z.ZodUnknown>;
 }, z.core.$strip>;
 export type RunLogsResponse = z.infer<typeof RunLogsResponseSchema>;
-export declare const EvaluationMetadataSchema: z.ZodObject<{
-    currentIteration: z.ZodNumber;
+export declare const ExperimentMetadataSchema: z.ZodObject<{
+    currentVersion: z.ZodNumber;
 }, z.core.$strip>;
-export type EvaluationMetadata = z.infer<typeof EvaluationMetadataSchema>;
-export interface EvaluationOptions {
+export type ExperimentMetadata = z.infer<typeof ExperimentMetadataSchema>;
+export interface ExperimentOptions {
     new: boolean;
     maxConcurrent: number;
     openMetabase?: boolean;
+    downloadRecordings?: boolean;
 }
 export declare const STATUS_ICONS: Record<RunStatus, {
     icon: string;

package/dist/lib/{evaluation → experiment}/types.js RENAMED Viewed

@@ -8,6 +8,7 @@ export const ParticipantSchema = z.object({
 export const RunConfigSchema = z.object({
     challenge_slug: z.string(),
     participants: z.array(ParticipantSchema),
+    tags: z.array(z.string()).optional(),
 });
 // Manifest returned by config.ts main()
 export const ManifestSchema = z.object({
@@ -31,6 +32,7 @@ export const ProgressEntrySchema = z.object({
         "error",
     ]),
     runId: z.string().optional(),
+    participantIds: z.array(z.string()).optional(),
     startTime: z.number().optional(),
     endTime: z.number().optional(),
     error: z.string().optional(),
@@ -66,9 +68,9 @@ export const RunStatusResponseSchema = z.object({
 export const RunLogsResponseSchema = z.object({
     logs: z.array(z.unknown()),
 });
-// Evaluation metadata stored in .evaluation.json
-export const EvaluationMetadataSchema = z.object({
-    currentIteration: z.number(),
+// Experiment metadata stored in .experiment.json
+export const ExperimentMetadataSchema = z.object({
+    currentVersion: z.number(),
 });
 // Icons and colors for TUI
 export const STATUS_ICONS = {

package/dist/lib/flags.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * All available config flags that can be used by commands.
+ * Each flag has an `env` property that allows it to be set via environment variable.
+ */
+export declare const ALL_CONFIG_FLAGS: {
+    readonly "api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+    readonly "web-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+    readonly "studio-api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+    readonly "studio-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+    readonly "api-key": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+    readonly "challenges-path": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
+};
+/**
+ * Type representing all config flag keys.
+ */
+export type ConfigFlagKey = keyof typeof ALL_CONFIG_FLAGS;
+/**
+ * Type representing the parsed values of all config flags.
+ */
+export type AllConfigFlagValues = {
+    [K in ConfigFlagKey]: string;
+};
+/**
+ * Returns a subset of config flags for use in a command's static flags definition.
+ *
+ * @example
+ * // In a command file:
+ * static override flags = {
+ *   ...getConfigFlags("api-key", "web-api-url", "studio-api-url"),
+ *   // other command-specific flags
+ * };
+ *
+ * @param keys - The config flag keys to include.
+ * @returns An object containing only the specified flags
+ */
+export declare function getConfigFlags<K extends ConfigFlagKey>(...keys: K[]): Pick<typeof ALL_CONFIG_FLAGS, K>;
+/**
+ * Helper type to extract the parsed flag values for a subset of config flags.
+ * Use this to type the flags object after parsing.
+ *
+ * @example
+ * type MyFlags = ConfigFlagValues<"api-key" | "web-api-url">;
+ * // Results in: { "api-key": string; "web-api-url": string }
+ */
+export type ConfigFlagValues<K extends ConfigFlagKey> = {
+    [P in K]: AllConfigFlagValues[P];
+};