npm - @kradle/cli - Versions diffs - 0.0.4 → 0.0.5 - Mend

@kradle/cli 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +23 -1
package/dist/commands/challenge/watch.js +2 -1
package/dist/commands/evaluation/init.d.ts +9 -0
package/dist/commands/evaluation/init.js +58 -0
package/dist/commands/evaluation/list.d.ts +7 -0
package/dist/commands/evaluation/list.js +55 -0
package/dist/commands/evaluation/run.d.ts +13 -0
package/dist/commands/evaluation/run.js +60 -0
package/dist/lib/api-client.d.ts +14 -1
package/dist/lib/api-client.js +31 -5
package/dist/lib/challenge.js +5 -0
package/dist/lib/config.d.ts +0 -1
package/dist/lib/config.js +0 -2
package/dist/lib/evaluation/evaluator.d.ts +88 -0
package/dist/lib/evaluation/evaluator.js +275 -0
package/dist/lib/evaluation/index.d.ts +4 -0
package/dist/lib/evaluation/index.js +4 -0
package/dist/lib/evaluation/runner.d.ts +80 -0
package/dist/lib/evaluation/runner.js +280 -0
package/dist/lib/evaluation/tui.d.ts +20 -0
package/dist/lib/evaluation/tui.js +129 -0
package/dist/lib/evaluation/types.d.ts +127 -0
package/dist/lib/evaluation/types.js +86 -0
package/dist/lib/schemas.d.ts +14 -0
package/dist/lib/schemas.js +10 -0
package/oclif.manifest.json +104 -1
package/package.json +8 -1
package/static/evaluation_template.ts +69 -0
package/static/project_template/dev.env +0 -1
package/static/project_template/prod.env +0 -1

package/dist/lib/evaluation/tui.js ADDED Viewed

@@ -0,0 +1,129 @@
+import { jsx as _jsx, Fragment as _Fragment, jsxs as _jsxs } from "react/jsx-runtime";
+import { Box, render, Text, useInput } from "ink";
+import { useEffect, useMemo, useState } from "react";
+import { STATUS_ICONS } from "./types.js";
+const formatElapsed = (startTime) => {
+    const elapsed = Date.now() - startTime;
+    const seconds = Math.floor(elapsed / 1000);
+    const minutes = Math.floor(seconds / 60);
+    const hours = Math.floor(minutes / 60);
+    if (hours > 0)
+        return `${hours}h ${minutes % 60}m`;
+    if (minutes > 0)
+        return `${minutes}m ${seconds % 60}s`;
+    return `${seconds}s`;
+};
+const getVisibleRows = () => {
+    const terminalHeight = process.stdout.rows || 24;
+    return Math.max(1, terminalHeight - 6 - 1); // Header (3 lines) + footer (3 lines) + 1 line of offset
+};
+const getVisibleColumns = () => {
+    const terminalWidth = process.stdout.columns || 80;
+    return Math.max(1, terminalWidth - 1);
+};
+const RenderRunLine = ({ state, total, isSelected, padding, }) => {
+    const { icon, color } = STATUS_ICONS[state.status] ?? STATUS_ICONS.queued;
+    const indexLabel = `${String(state.index + 1).padStart(padding, " ")}/${total}`;
+    const statusLabel = state.status.padEnd(12);
+    const startTime = state.startTime ?? null;
+    const showElapsed = startTime !== null &&
+        state.status !== "completed" &&
+        state.status !== "finished" &&
+        state.status !== "game_over" &&
+        state.status !== "error";
+    const elapsedLabel = showElapsed && startTime !== null ? formatElapsed(startTime) : null;
+    const agents = state.config.participants.map((p) => p.agent.split(":").pop() ?? p.agent).join(", ");
+    const summary = `${state.config.challenge_slug} (${agents})`;
+    const maxSummaryLength = getVisibleColumns() - indexLabel.length - statusLabel.length - (elapsedLabel ? elapsedLabel.length : 0) - 4; // 4 for the spaces and emoji
+    const summaryText = summary.length > maxSummaryLength ? summary.slice(0, maxSummaryLength - 1) + "…" : summary;
+    return (_jsxs(Text, { inverse: isSelected, children: [_jsx(Text, { color: color, children: icon }), " ", indexLabel, " ", _jsx(Text, { color: color, children: statusLabel }), elapsedLabel ? (_jsxs(_Fragment, { children: [" ", _jsx(Text, { dimColor: true, children: elapsedLabel })] })) : null, " ", _jsx(Text, { dimColor: true, children: summaryText })] }));
+};
+const EvaluationUI = ({ evaluationName, states, statusCounts, onQuit, onOpenRun }) => {
+    const [selectedIndex, setSelectedIndex] = useState(0);
+    const [scrollOffset, setScrollOffset] = useState(0);
+    const [tick, setTick] = useState(0); // force elapsed-time updates
+    useEffect(() => {
+        if (states.length === 0) {
+            setSelectedIndex(0);
+            setScrollOffset(0);
+            return;
+        }
+        setSelectedIndex((current) => Math.min(current, states.length - 1));
+    }, [states.length]);
+    useEffect(() => {
+        const rows = getVisibleRows();
+        setScrollOffset((offset) => {
+            if (selectedIndex < offset)
+                return selectedIndex;
+            if (selectedIndex >= offset + rows)
+                return Math.min(selectedIndex - rows + 1, Math.max(0, states.length - rows));
+            return Math.min(offset, Math.max(0, states.length - rows));
+        });
+    }, [selectedIndex, states.length]);
+    useEffect(() => {
+        const interval = setInterval(() => setTick((value) => value + 1), 1000);
+        return () => clearInterval(interval);
+    }, []);
+    useInput((input, key) => {
+        if (input === "q" || (key.ctrl && input === "c")) {
+            onQuit();
+            return;
+        }
+        if (states.length === 0)
+            return;
+        if (key.upArrow || input === "k") {
+            setSelectedIndex((current) => Math.max(0, current - 1));
+        }
+        else if (key.downArrow || input === "j") {
+            setSelectedIndex((current) => Math.min(states.length - 1, current + 1));
+        }
+        else if (input === "o") {
+            onOpenRun(selectedIndex);
+        }
+    });
+    const rowsAvailable = getVisibleRows();
+    const visibleRuns = useMemo(() => states.slice(scrollOffset, scrollOffset + rowsAvailable), [states, scrollOffset, rowsAvailable]);
+    const showScroll = states.length > rowsAvailable;
+    const rangeStart = states.length === 0 ? 0 : scrollOffset + 1;
+    const rangeEnd = Math.min(scrollOffset + rowsAvailable, states.length);
+    const horizontalRule = "─".repeat(Math.min(process.stdout.columns || 80, 80));
+    const padding = states.length.toString().length;
+    return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `Evaluation: ${evaluationName}` }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsx(Text, { dimColor: true, children: "q:quit \u2191/\u2193/j/k:select o:open in browser" }), _jsx(Text, { children: " " }), _jsxs(Box, { flexDirection: "column", children: [visibleRuns.map((state, index) => (_jsx(RenderRunLine, { state: state, total: states.length, isSelected: scrollOffset + index === selectedIndex, padding: padding }, state.index))), visibleRuns.length < rowsAvailable
+                        ? Array.from({ length: rowsAvailable - visibleRuns.length }).map((_, index) => (_jsx(Text, { children: " " }, `empty-${index}`)))
+                        : null] }), showScroll ? _jsx(Text, { dimColor: true, children: `[${rangeStart}-${rangeEnd} of ${states.length}]` }) : _jsx(Text, { children: " " }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsxs(Text, { children: [_jsx(Text, { children: "Completed: " }), _jsx(Text, { color: "green", children: statusCounts.completed }), _jsx(Text, { children: ` | Active: ` }), _jsx(Text, { color: "yellow", children: statusCounts.active }), _jsx(Text, { children: ` | Queued: ` }), _jsx(Text, { dimColor: true, children: statusCounts.queued }), statusCounts.errors > 0 ? (_jsxs(_Fragment, { children: [_jsx(Text, { children: ` | Errors: ` }), _jsx(Text, { color: "red", children: statusCounts.errors })] })) : null] })] }));
+};
+export class TUI {
+    options;
+    states = [];
+    statusCounts = { completed: 0, active: 0, queued: 0, errors: 0 };
+    app;
+    running = false;
+    constructor(options) {
+        this.options = options;
+    }
+    start() {
+        this.running = true;
+        this.app = render(this.renderApp());
+    }
+    stop() {
+        this.running = false;
+        this.app?.unmount();
+        this.app = undefined;
+    }
+    updateStates(states) {
+        this.states = states;
+        this.rerender();
+    }
+    updateStatusCounts(counts) {
+        this.statusCounts = counts;
+        this.rerender();
+    }
+    rerender() {
+        if (!this.running || !this.app)
+            return;
+        this.app.rerender(this.renderApp());
+    }
+    renderApp() {
+        return (_jsx(EvaluationUI, { evaluationName: this.options.evaluationName, states: this.states, statusCounts: this.statusCounts, onQuit: this.options.onQuit, onOpenRun: this.options.onOpenRun }));
+    }
+}

package/dist/lib/evaluation/types.d.ts ADDED Viewed

@@ -0,0 +1,127 @@
+import { z } from "zod";
+export declare const ParticipantSchema: z.ZodObject<{
+    agent: z.ZodString;
+    role: z.ZodOptional<z.ZodString>;
+}, z.core.$strip>;
+export type Participant = z.infer<typeof ParticipantSchema>;
+export declare const RunConfigSchema: z.ZodObject<{
+    challenge_slug: z.ZodString;
+    participants: z.ZodArray<z.ZodObject<{
+        agent: z.ZodString;
+        role: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+}, z.core.$strip>;
+export type RunConfig = z.infer<typeof RunConfigSchema>;
+export declare const ManifestSchema: z.ZodObject<{
+    runs: z.ZodArray<z.ZodObject<{
+        challenge_slug: z.ZodString;
+        participants: z.ZodArray<z.ZodObject<{
+            agent: z.ZodString;
+            role: z.ZodOptional<z.ZodString>;
+        }, z.core.$strip>>;
+    }, z.core.$strip>>;
+    tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
+}, z.core.$strip>;
+export type Manifest = z.infer<typeof ManifestSchema>;
+export type RunStatus = "queued" | "initializing" | "watcher_connected" | "participants_connected" | "started" | "running" | "recovering" | "completed" | "game_over" | "finished" | "error";
+export declare const ProgressEntrySchema: z.ZodObject<{
+    index: z.ZodNumber;
+    status: z.ZodEnum<{
+        error: "error";
+        queued: "queued";
+        initializing: "initializing";
+        watcher_connected: "watcher_connected";
+        participants_connected: "participants_connected";
+        started: "started";
+        running: "running";
+        recovering: "recovering";
+        completed: "completed";
+        game_over: "game_over";
+        finished: "finished";
+    }>;
+    runId: z.ZodOptional<z.ZodString>;
+    startTime: z.ZodOptional<z.ZodNumber>;
+    endTime: z.ZodOptional<z.ZodNumber>;
+    error: z.ZodOptional<z.ZodString>;
+}, z.core.$strip>;
+export type ProgressEntry = z.infer<typeof ProgressEntrySchema>;
+export declare const ProgressSchema: z.ZodObject<{
+    entries: z.ZodArray<z.ZodObject<{
+        index: z.ZodNumber;
+        status: z.ZodEnum<{
+            error: "error";
+            queued: "queued";
+            initializing: "initializing";
+            watcher_connected: "watcher_connected";
+            participants_connected: "participants_connected";
+            started: "started";
+            running: "running";
+            recovering: "recovering";
+            completed: "completed";
+            game_over: "game_over";
+            finished: "finished";
+        }>;
+        runId: z.ZodOptional<z.ZodString>;
+        startTime: z.ZodOptional<z.ZodNumber>;
+        endTime: z.ZodOptional<z.ZodNumber>;
+        error: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    lastUpdated: z.ZodNumber;
+}, z.core.$strip>;
+export type Progress = z.infer<typeof ProgressSchema>;
+export declare const RunResultSchema: z.ZodObject<{
+    index: z.ZodNumber;
+    runId: z.ZodString;
+    challenge_slug: z.ZodString;
+    participants: z.ZodArray<z.ZodObject<{
+        agent: z.ZodString;
+        role: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    status: z.ZodString;
+    startTime: z.ZodNumber;
+    endTime: z.ZodNumber;
+    duration: z.ZodNumber;
+    logs: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
+    summary: z.ZodOptional<z.ZodString>;
+    error: z.ZodOptional<z.ZodString>;
+    outcome: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+}, z.core.$strip>;
+export type RunResult = z.infer<typeof RunResultSchema>;
+export interface RunState {
+    index: number;
+    config: RunConfig;
+    status: RunStatus;
+    runId?: string;
+    startTime?: number;
+    error?: string;
+}
+export interface StatusCounts {
+    completed: number;
+    active: number;
+    queued: number;
+    errors: number;
+}
+export declare const RunStatusResponseSchema: z.ZodObject<{
+    id: z.ZodString;
+    status: z.ZodString;
+    createdAt: z.ZodOptional<z.ZodString>;
+    updatedAt: z.ZodOptional<z.ZodString>;
+    outcome: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+}, z.core.$strip>;
+export type RunStatusResponse = z.infer<typeof RunStatusResponseSchema>;
+export declare const RunLogsResponseSchema: z.ZodObject<{
+    logs: z.ZodArray<z.ZodUnknown>;
+}, z.core.$strip>;
+export type RunLogsResponse = z.infer<typeof RunLogsResponseSchema>;
+export declare const EvaluationMetadataSchema: z.ZodObject<{
+    currentIteration: z.ZodNumber;
+}, z.core.$strip>;
+export type EvaluationMetadata = z.infer<typeof EvaluationMetadataSchema>;
+export interface EvaluationOptions {
+    new: boolean;
+    maxConcurrent: number;
+}
+export declare const STATUS_ICONS: Record<RunStatus, {
+    icon: string;
+    color: "white" | "yellow" | "blue" | "magenta" | "cyan" | "green" | "red";
+}>;

package/dist/lib/evaluation/types.js ADDED Viewed

@@ -0,0 +1,86 @@
+import { z } from "zod";
+// Participant in a run
+export const ParticipantSchema = z.object({
+    agent: z.string(),
+    role: z.string().optional(),
+});
+// Single run configuration
+export const RunConfigSchema = z.object({
+    challenge_slug: z.string(),
+    participants: z.array(ParticipantSchema),
+});
+// Manifest returned by config.ts main()
+export const ManifestSchema = z.object({
+    runs: z.array(RunConfigSchema),
+    tags: z.array(z.string()).optional(),
+});
+// Progress entry for a single run
+export const ProgressEntrySchema = z.object({
+    index: z.number(),
+    status: z.enum([
+        "queued",
+        "initializing",
+        "watcher_connected",
+        "participants_connected",
+        "started",
+        "running",
+        "recovering",
+        "completed",
+        "game_over",
+        "finished",
+        "error",
+    ]),
+    runId: z.string().optional(),
+    startTime: z.number().optional(),
+    endTime: z.number().optional(),
+    error: z.string().optional(),
+});
+// Progress file schema
+export const ProgressSchema = z.object({
+    entries: z.array(ProgressEntrySchema),
+    lastUpdated: z.number(),
+});
+// Run result with logs and summary
+export const RunResultSchema = z.object({
+    index: z.number(),
+    runId: z.string(),
+    challenge_slug: z.string(),
+    participants: z.array(ParticipantSchema),
+    status: z.string(),
+    startTime: z.number(),
+    endTime: z.number(),
+    duration: z.number(),
+    logs: z.array(z.unknown()).optional(),
+    summary: z.string().optional(),
+    error: z.string().optional(),
+    outcome: z.record(z.string(), z.unknown()).optional(),
+});
+// API response schemas for run status
+export const RunStatusResponseSchema = z.object({
+    id: z.string(),
+    status: z.string(),
+    createdAt: z.string().optional(),
+    updatedAt: z.string().optional(),
+    outcome: z.record(z.string(), z.unknown()).optional(),
+});
+export const RunLogsResponseSchema = z.object({
+    logs: z.array(z.unknown()),
+});
+// Evaluation metadata stored in .evaluation.json
+export const EvaluationMetadataSchema = z.object({
+    currentIteration: z.number(),
+});
+// Icons and colors for TUI
+export const STATUS_ICONS = {
+    queued: { icon: "·", color: "white" },
+    initializing: { icon: "○", color: "yellow" },
+    watcher_connected: { icon: "◐", color: "blue" },
+    participants_connected: { icon: "◉", color: "blue" },
+    started: { icon: "▶", color: "magenta" },
+    running: { icon: "▶", color: "magenta" },
+    recovering: { icon: "⟳", color: "cyan" },
+    completed: { icon: "✓", color: "green" },
+    game_over: { icon: "✓", color: "green" },
+    finished: { icon: "✓", color: "green" },
+    error: { icon: "✗", color: "red" },
+};

package/dist/lib/schemas.d.ts CHANGED Viewed

@@ -20,10 +20,13 @@ export declare const ChallengeSchema: z.ZodObject<{
             spectator: "spectator";
         }>;
     }, z.core.$strip>;
+    description: z.ZodOptional<z.ZodString>;
     task: z.ZodString;
     roles: z.ZodRecord<z.ZodString, z.ZodObject<{
         description: z.ZodString;
         specificTask: z.ZodString;
+        minParticipants: z.ZodOptional<z.ZodNumber>;
+        maxParticipants: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>>;
     objective: z.ZodObject<{
         fieldName: z.ZodString;
@@ -58,10 +61,13 @@ export declare const ChallengesResponseSchema: z.ZodObject<{
                 spectator: "spectator";
             }>;
         }, z.core.$strip>;
+        description: z.ZodOptional<z.ZodString>;
         task: z.ZodString;
         roles: z.ZodRecord<z.ZodString, z.ZodObject<{
             description: z.ZodString;
             specificTask: z.ZodString;
+            minParticipants: z.ZodOptional<z.ZodNumber>;
+            maxParticipants: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strip>>;
         objective: z.ZodObject<{
             fieldName: z.ZodString;
@@ -82,6 +88,13 @@ export declare const HumanSchema: z.ZodObject<{
 export declare const RunResponseSchema: z.ZodObject<{
     runIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
 }, z.core.$strip>;
+export declare const RunStatusSchema: z.ZodObject<{
+    id: z.ZodString;
+    status: z.ZodString;
+    createdAt: z.ZodOptional<z.ZodString>;
+    updatedAt: z.ZodOptional<z.ZodString>;
+    outcome: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+}, z.core.$strip>;
 export declare const UploadUrlResponseSchema: z.ZodObject<{
     uploadUrl: z.ZodString;
     expiresAt: z.ZodString;
@@ -123,5 +136,6 @@ export type ChallengeSchemaType = z.infer<typeof ChallengeSchema>;
 export type ChallengesResponseType = z.infer<typeof ChallengesResponseSchema>;
 export type HumanSchemaType = z.infer<typeof HumanSchema>;
 export type RunResponseType = z.infer<typeof RunResponseSchema>;
+export type RunStatusSchemaType = z.infer<typeof RunStatusSchema>;
 export type AgentSchemaType = z.infer<typeof AgentSchema>;
 export type AgentsResponseType = z.infer<typeof AgentsResponseSchema>;

package/dist/lib/schemas.js CHANGED Viewed

@@ -11,10 +11,13 @@ export const ChallengeSchema = z.object({
         datapack: z.boolean(),
         gameMode: z.enum(["survival", "creative", "adventure", "spectator"]),
     }),
+    description: z.string().optional(),
     task: z.string(),
     roles: z.record(z.string(), z.object({
         description: z.string(),
         specificTask: z.string(),
+        minParticipants: z.number().optional(),
+        maxParticipants: z.number().optional(),
     })),
     objective: z.object({
         fieldName: z.string(),
@@ -34,6 +37,13 @@ export const HumanSchema = z.object({
 export const RunResponseSchema = z.object({
     runIds: z.array(z.string()).optional(),
 });
+export const RunStatusSchema = z.object({
+    id: z.string(),
+    status: z.string(),
+    createdAt: z.string().optional(),
+    updatedAt: z.string().optional(),
+    outcome: z.record(z.string(), z.unknown()).optional(),
+});
 export const UploadUrlResponseSchema = z.object({
     uploadUrl: z.string(),
     expiresAt: z.string(),

package/oclif.manifest.json CHANGED Viewed

@@ -304,7 +304,110 @@
         "challenge",
         "watch.js"
       ]
+    },
+    "evaluation:init": {
+      "aliases": [],
+      "args": {
+        "name": {
+          "description": "Name of the evaluation",
+          "name": "name",
+          "required": true
+        }
+      },
+      "description": "Initialize a new evaluation",
+      "examples": [
+        "<%= config.bin %> <%= command.id %> my-evaluation"
+      ],
+      "flags": {},
+      "hasDynamicHelp": false,
+      "hiddenAliases": [],
+      "id": "evaluation:init",
+      "pluginAlias": "@kradle/cli",
+      "pluginName": "@kradle/cli",
+      "pluginType": "core",
+      "strict": true,
+      "enableJsonFlag": false,
+      "isESM": true,
+      "relativePath": [
+        "dist",
+        "commands",
+        "evaluation",
+        "init.js"
+      ]
+    },
+    "evaluation:list": {
+      "aliases": [],
+      "args": {},
+      "description": "List all evaluations",
+      "examples": [
+        "<%= config.bin %> <%= command.id %>"
+      ],
+      "flags": {},
+      "hasDynamicHelp": false,
+      "hiddenAliases": [],
+      "id": "evaluation:list",
+      "pluginAlias": "@kradle/cli",
+      "pluginName": "@kradle/cli",
+      "pluginType": "core",
+      "strict": true,
+      "enableJsonFlag": false,
+      "isESM": true,
+      "relativePath": [
+        "dist",
+        "commands",
+        "evaluation",
+        "list.js"
+      ]
+    },
+    "evaluation:run": {
+      "aliases": [],
+      "args": {
+        "name": {
+          "description": "Name of the evaluation to run",
+          "name": "name",
+          "required": true
+        }
+      },
+      "description": "Run an evaluation. If the evaluation had an ongoing iteration, it will resume from the last state.",
+      "examples": [
+        "<%= config.bin %> <%= command.id %> my-evaluation",
+        "<%= config.bin %> <%= command.id %> my-evaluation --new",
+        "<%= config.bin %> <%= command.id %> my-evaluation --max-concurrent 10"
+      ],
+      "flags": {
+        "new": {
+          "char": "n",
+          "description": "Start a new iteration of the evaluation",
+          "name": "new",
+          "allowNo": false,
+          "type": "boolean"
+        },
+        "max-concurrent": {
+          "char": "m",
+          "description": "Maximum concurrent runs",
+          "name": "max-concurrent",
+          "default": 5,
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        }
+      },
+      "hasDynamicHelp": false,
+      "hiddenAliases": [],
+      "id": "evaluation:run",
+      "pluginAlias": "@kradle/cli",
+      "pluginName": "@kradle/cli",
+      "pluginType": "core",
+      "strict": true,
+      "enableJsonFlag": false,
+      "isESM": true,
+      "relativePath": [
+        "dist",
+        "commands",
+        "evaluation",
+        "run.js"
+      ]
     }
   },
-  "version": "0.0.4"
+  "version": "0.0.5"
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@kradle/cli",
-	"version": "0.0.4",
+	"version": "0.0.5",
 	"description": "Kradle's CLI. Manage challenges, evaluations, agents and more!",
 	"keywords": [
 		"cli"
@@ -38,8 +38,10 @@
 		"chokidar": "^4.0.3",
 		"dotenv": "^17.2.3",
 		"enquirer": "^2.4.1",
+		"ink": "^4.4.1",
 		"listr2": "^9.0.5",
 		"picocolors": "^1.1.1",
+		"react": "^18.2.0",
 		"tar": "^7.5.2",
 		"zod": "^4.1.12"
 	},
@@ -48,6 +50,8 @@
 		"@oclif/test": "^4",
 		"@types/chai": "^4",
 		"@types/node": "^18",
+		"@types/react": "^19.2.7",
+		"@types/react-dom": "^19.2.3",
 		"@types/tar": "^6.1.13",
 		"chai": "^4",
 		"oclif": "^4",
@@ -72,6 +76,9 @@
 			},
 			"agent": {
 				"description": "Manage agents"
+			},
+			"evaluation": {
+				"description": "Manage and run evaluations"
 			}
 		}
 	}

package/static/evaluation_template.ts ADDED Viewed

@@ -0,0 +1,69 @@
+export function main(): Manifest {
+	const CHALLENGE_SLUG = "[INSERT CHALLENGE SLUG HERE]";
+	const AGENTS: string[] = [
+		"team-kradle:claude-sonnet-4",
+		"team-kradle:qwen3-coder",
+		"team-kradle:deepseek-chat-v3-1",
+		"team-kradle:grok-4",
+		"team-kradle:grok-code-fast-1",
+		"team-kradle:gpt-5",
+		"team-kradle:kimi-k2",
+		"team-kradle:gemini-2-5-flash",
+		"team-kradle:gemini-2-5-pro",
+		"team-kradle:glm-4-5-air",
+		"team-kradle:gpt-5-mini",
+		"team-kradle:o3-mini",
+		"team-kradle:codestral-2508",
+	];
+	const NUM_RUNS = 200;
+	const NUM_AGENTS_PER_RUN = 4;
+	const ADDITIONAL_TAGS: string[] = [];
+	const runs: RunConfig[] = [];
+	for (let i = 0; i < NUM_RUNS; i++) {
+		const selectedAgents = sampleWithoutReplacement(AGENTS, NUM_AGENTS_PER_RUN);
+		runs.push({
+			challenge_slug: CHALLENGE_SLUG,
+			participants: selectedAgents.map((agent) => ({ agent })),
+		});
+	}
+	return { runs, tags: ADDITIONAL_TAGS };
+}
+function sampleWithoutReplacement<T>(arr: T[], count: number): T[] {
+	if (count > arr.length) {
+		throw new Error("Sample size cannot be larger than array length.");
+	}
+	const copy = [...arr];
+	const result: T[] = [];
+	for (let i = 0; i < count; i++) {
+		const idx = Math.floor(Math.random() * copy.length);
+		result.push(copy[idx]);
+		copy.splice(idx, 1);
+	}
+	return result;
+}
+type Participant = {
+	agent: string;
+	role?: string;
+};
+type RunConfig = {
+	challenge_slug: string;
+	participants: Participant[];
+};
+type Manifest = {
+	runs: RunConfig[];
+	tags?: string[];
+};

package/static/project_template/dev.env CHANGED Viewed

@@ -2,5 +2,4 @@ WEB_API_URL=https://dev-api.kradle.ai/v0 #https://api.kradle.ai/v0
 WEB_URL=https://dev.kradle.ai #https:/.kradle.ai/workbench
 STUDIO_API_URL=http://localhost:2999/api/v0
 STUDIO_URL=kradle-dev://open #kradle://://open
-GCS_BUCKET=mckradle-3c267.firebasestorage.app #kradle-prod-storage
 KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges

package/static/project_template/prod.env CHANGED Viewed

@@ -2,5 +2,4 @@ WEB_API_URL=https://api.kradle.ai/v0 #https://dev-api.kradle.ai/v0
 WEB_URL=https://kradle.ai #https://dev.kradle.ai
 STUDIO_API_URL=http://localhost:2999/api/v0
 STUDIO_URL=kradle://open #kradle-dev://://open
-GCS_BUCKET=kradle-prod-storage #mckradle-3c267.firebasestorage.app
 KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges