npm - @kradle/cli - Versions diffs - 0.0.4 → 0.0.5 - Mend

@kradle/cli 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +23 -1
package/dist/commands/challenge/watch.js +2 -1
package/dist/commands/evaluation/init.d.ts +9 -0
package/dist/commands/evaluation/init.js +58 -0
package/dist/commands/evaluation/list.d.ts +7 -0
package/dist/commands/evaluation/list.js +55 -0
package/dist/commands/evaluation/run.d.ts +13 -0
package/dist/commands/evaluation/run.js +60 -0
package/dist/lib/api-client.d.ts +14 -1
package/dist/lib/api-client.js +31 -5
package/dist/lib/challenge.js +5 -0
package/dist/lib/config.d.ts +0 -1
package/dist/lib/config.js +0 -2
package/dist/lib/evaluation/evaluator.d.ts +88 -0
package/dist/lib/evaluation/evaluator.js +275 -0
package/dist/lib/evaluation/index.d.ts +4 -0
package/dist/lib/evaluation/index.js +4 -0
package/dist/lib/evaluation/runner.d.ts +80 -0
package/dist/lib/evaluation/runner.js +280 -0
package/dist/lib/evaluation/tui.d.ts +20 -0
package/dist/lib/evaluation/tui.js +129 -0
package/dist/lib/evaluation/types.d.ts +127 -0
package/dist/lib/evaluation/types.js +86 -0
package/dist/lib/schemas.d.ts +14 -0
package/dist/lib/schemas.js +10 -0
package/oclif.manifest.json +104 -1
package/package.json +8 -1
package/static/evaluation_template.ts +69 -0
package/static/project_template/dev.env +0 -1
package/static/project_template/prod.env +0 -1

package/README.md CHANGED Viewed

@@ -45,7 +45,6 @@ WEB_URL=https://kradle.ai
 STUDIO_API_URL=http://localhost:8080
 STUDIO_URL=kradle-studio://
 KRADLE_API_KEY=your-api-key
-GCS_BUCKET=your-gcs-bucket
 KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges
 ```
@@ -126,6 +125,29 @@ kradle challenge multi-upload
 Provides an interactive UI to select multiple challenges and uploads them in parallel.
+### Evaluations (beta)
+Plan and execute batches of runs across challenges/agents, with resumable iterations and a TUI.
+- **Init**: scaffold an evaluation config `evaluations/<name>/config.ts`
+  ```bash
+  kradle evaluation init <name>
+  ```
+- **List**: list local evaluations
+  ```bash
+  kradle evaluation list
+  ```
+- **Run**: execute or resume an evaluation (iterations stored under `evaluations/<name>/iterations/`)
+  ```bash
+  kradle evaluation run <name> [--new] [--max-concurrent N]
+  ```
+Features:
+- Iterations: `--new` starts a new iteration; otherwise resumes the latest.
+- Resumable state: progress is persisted per iteration; in-flight runs are re-polled on resume, completed runs stay completed.
+- Ink TUI: live status counts, elapsed times, scrollable run list; keys `q/Ctrl+C` quit, `↑/↓/j/k` move, `o` open run URL.
+- Per-iteration manifest: generated from the evaluation `config.ts` into `manifest.json` before runs start.
 ## Development
 ### Setup

package/dist/commands/challenge/watch.js CHANGED Viewed

@@ -90,7 +90,8 @@ export default class Watch extends Command {
         this.log(pc.blue(`\nStarting watch mode for ${pc.bold(challenge.shortSlug)}\n`));
         this.log(pc.dim("Watching for changes... (Ctrl+C to stop)\n"));
         const watcher = chokidar.watch([challenge.challengeDir], {
-            ignored: /(^|[/\\])\../, // ignore dotfiles
+            // ⚠️ WE IGNORE THE DATAPACK FOLDER FOR NOW, BUT IT'S A SHORT TERM FIX.
+            ignored: [/(^|[/\\])\../, (p) => p.includes("/datapack")], // ignore dotfiles and datapack folder
             persistent: true,
             ignoreInitial: true,
         });

package/dist/commands/evaluation/init.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { Command } from "@oclif/core";
+export default class Init extends Command {
+    static description: string;
+    static examples: string[];
+    static args: {
+        name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
+    };
+    run(): Promise<void>;
+}

package/dist/commands/evaluation/init.js ADDED Viewed

@@ -0,0 +1,58 @@
+import { exec } from "node:child_process";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Args, Command } from "@oclif/core";
+import pc from "picocolors";
+import { loadConfig } from "../../lib/config.js";
+import { getStaticResourcePath } from "../../lib/utils.js";
+export default class Init extends Command {
+    static description = "Initialize a new evaluation";
+    static examples = ["<%= config.bin %> <%= command.id %> my-evaluation"];
+    static args = {
+        name: Args.string({
+            description: "Name of the evaluation",
+            required: true,
+        }),
+    };
+    async run() {
+        const { args } = await this.parse(Init);
+        loadConfig(); // Validate config is available
+        const evaluationDir = path.resolve(process.cwd(), "evaluations", args.name);
+        const configPath = path.join(evaluationDir, "config.ts");
+        // Check if evaluation already exists
+        try {
+            await fs.access(evaluationDir);
+            this.error(pc.red(`Evaluation '${args.name}' already exists at ${evaluationDir}`));
+        }
+        catch {
+            // Directory doesn't exist, which is what we want
+        }
+        // Create evaluation directory
+        await fs.mkdir(evaluationDir, { recursive: true });
+        // Copy template
+        const templatePath = getStaticResourcePath("evaluation_template.ts");
+        await fs.copyFile(templatePath, configPath);
+        this.log(pc.green(`✓ Created evaluation '${args.name}'`));
+        this.log(pc.dim(`  Config: ${configPath}`));
+        // Offer to open in editor on macOS
+        if (process.platform === "darwin") {
+            this.log("");
+            this.log(pc.blue(">> Opening config.ts in your editor..."));
+            // Try Cursor first, then VS Code, then fallback to default
+            exec(`cursor "${configPath}" || code "${configPath}" || open "${configPath}"`, (error) => {
+                if (error) {
+                    this.log(pc.dim(`  Could not open editor automatically. Please open: ${configPath}`));
+                }
+            });
+        }
+        else {
+            this.log("");
+            this.log(pc.blue(`>> Edit the config file to define your runs:`));
+            this.log(pc.dim(`   ${configPath}`));
+        }
+        this.log("");
+        this.log(pc.blue(">> Next steps:"));
+        this.log(pc.dim(`   1. Edit ${path.basename(configPath)} to define your evaluation runs`));
+        this.log(pc.dim(`   2. Run: kradle evaluation run ${args.name}`));
+    }
+}

package/dist/commands/evaluation/list.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import { Command } from "@oclif/core";
+export default class List extends Command {
+    static description: string;
+    static examples: string[];
+    run(): Promise<void>;
+    private fileExists;
+}

package/dist/commands/evaluation/list.js ADDED Viewed

@@ -0,0 +1,55 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Command } from "@oclif/core";
+import pc from "picocolors";
+import { loadConfig } from "../../lib/config.js";
+export default class List extends Command {
+    static description = "List all evaluations";
+    static examples = ["<%= config.bin %> <%= command.id %>"];
+    async run() {
+        this.parse(List);
+        loadConfig(); // Validate config is available
+        const evaluationsDir = path.resolve(process.cwd(), "evaluations");
+        try {
+            const entries = await fs.readdir(evaluationsDir, { withFileTypes: true });
+            const evaluations = entries.filter((e) => e.isDirectory());
+            if (evaluations.length === 0) {
+                this.log(pc.yellow("No evaluations found."));
+                this.log(pc.dim(`  Run 'kradle evaluation init <name>' to create one.`));
+                return;
+            }
+            this.log(pc.blue(">> Evaluations:"));
+            this.log("");
+            for (const evaluation of evaluations) {
+                const evalDir = path.join(evaluationsDir, evaluation.name);
+                const hasConfig = await this.fileExists(path.join(evalDir, "config.ts"));
+                const hasManifest = await this.fileExists(path.join(evalDir, "manifest.json"));
+                const hasProgress = await this.fileExists(path.join(evalDir, "progress.json"));
+                let status = "";
+                if (hasProgress) {
+                    status = pc.yellow(" (in progress)");
+                }
+                else if (hasManifest) {
+                    status = pc.green(" (ready)");
+                }
+                else if (hasConfig) {
+                    status = pc.dim(" (config only)");
+                }
+                this.log(`  ${pc.bold(evaluation.name)}${status}`);
+            }
+        }
+        catch {
+            this.log(pc.yellow("No evaluations directory found."));
+            this.log(pc.dim(`  Run 'kradle evaluation init <name>' to create your first evaluation.`));
+        }
+    }
+    async fileExists(filePath) {
+        try {
+            await fs.access(filePath);
+            return true;
+        }
+        catch {
+            return false;
+        }
+    }
+}

package/dist/commands/evaluation/run.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import { Command } from "@oclif/core";
+export default class Run extends Command {
+    static description: string;
+    static examples: string[];
+    static args: {
+        name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
+    };
+    static flags: {
+        new: import("@oclif/core/interfaces").BooleanFlag<boolean>;
+        "max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
+    };
+    run(): Promise<void>;
+}

package/dist/commands/evaluation/run.js ADDED Viewed

@@ -0,0 +1,60 @@
+import { Args, Command, Flags } from "@oclif/core";
+import pc from "picocolors";
+import { ApiClient } from "../../lib/api-client.js";
+import { loadConfig } from "../../lib/config.js";
+import { Evaluator } from "../../lib/evaluation/evaluator.js";
+const DEFAULT_MAX_CONCURRENT = 5;
+export default class Run extends Command {
+    static description = "Run an evaluation. If the evaluation had an ongoing iteration, it will resume from the last state.";
+    static examples = [
+        "<%= config.bin %> <%= command.id %> my-evaluation",
+        "<%= config.bin %> <%= command.id %> my-evaluation --new",
+        "<%= config.bin %> <%= command.id %> my-evaluation --max-concurrent 10",
+    ];
+    static args = {
+        name: Args.string({
+            description: "Name of the evaluation to run",
+            required: true,
+        }),
+    };
+    static flags = {
+        new: Flags.boolean({
+            char: "n",
+            description: "Start a new iteration of the evaluation",
+            default: false,
+        }),
+        "max-concurrent": Flags.integer({
+            char: "m",
+            description: "Maximum concurrent runs",
+            default: DEFAULT_MAX_CONCURRENT,
+        }),
+    };
+    async run() {
+        const { args, flags } = await this.parse(Run);
+        const config = loadConfig();
+        const api = new ApiClient(config);
+        const evaluator = new Evaluator(args.name, config, api);
+        // Check if evaluation exists
+        if (!(await evaluator.exists())) {
+            this.error(pc.red(`Evaluation '${args.name}' does not exist. Run 'kradle evaluation init ${args.name}' first.`));
+        }
+        // Check if config.ts exists
+        if (!(await evaluator.configExists())) {
+            this.error(pc.red(`Config file not found at ${evaluator.configPath}`));
+        }
+        this.log(pc.blue(`>> Starting evaluation: ${args.name}`));
+        if (flags.new) {
+            this.log(pc.yellow("   --new: Starting a new iteration of the evaluation"));
+        }
+        try {
+            await evaluator.run({
+                new: flags.new,
+                maxConcurrent: flags["max-concurrent"],
+            });
+            this.log(pc.green("\n✓ Evaluation complete!"));
+        }
+        catch (error) {
+            this.error(pc.red(`Evaluation failed: ${error instanceof Error ? error.message : String(error)}`));
+        }
+    }
+}

package/dist/lib/api-client.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import type z from "zod";
 import type { Challenge } from "./challenge.js";
 import type { Config } from "./config.js";
-import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema } from "./schemas.js";
+import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema, type RunStatusSchemaType } from "./schemas.js";
 export declare class ApiClient {
     private config;
     constructor(config: Config);
@@ -52,4 +52,17 @@ export declare class ApiClient {
         runIds?: string[] | undefined;
     }>;
     deleteChallenge(challengeId: string): Promise<void>;
+    /**
+     * Get the status of a run.
+     * @param runId - The ID of the run.
+     * @returns The run status.
+     */
+    getRunStatus(runId: string): Promise<RunStatusSchemaType>;
+    /**
+     * Add a tag to a run.
+     * @param runId - The ID of the run to tag.
+     * @param tag - The tag string to add.
+     * @throws an error if the tag fails to be added.
+     */
+    tagRun(runId: string, tag: string): Promise<void>;
 }

package/dist/lib/api-client.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, RunResponseSchema, UploadUrlResponseSchema, } from "./schemas.js";
+import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, RunResponseSchema, RunStatusSchema, UploadUrlResponseSchema, } from "./schemas.js";
 const DEFAULT_PAGE_SIZE = 30;
 const DEFAULT_CHALLENGE_SCHEMA = {
     slug: "",
@@ -49,17 +49,21 @@ export class ApiClient {
             method: "POST",
             ...options,
         });
-        const data = await response.json();
+        const text = await response.text();
+        if (!text) {
+            return undefined;
+        }
+        const data = JSON.parse(text);
         return schema ? schema.parse(data) : data;
     }
     async put(target, url, options = {}) {
-        await this.request(target, url, {
+        return await this.request(target, url, {
             method: "PUT",
             ...options,
         });
     }
     async delete(target, url, options = {}) {
-        await this.request(target, url, {
+        return await this.request(target, url, {
             method: "DELETE",
             ...options,
         });
@@ -140,8 +144,9 @@ export class ApiClient {
      */
     async updateChallenge(challenge, challengeConfig) {
         const url = `challenges/${challenge.shortSlug}`;
+        console.log(url);
         const config = challengeConfig ?? (await challenge.loadConfig());
-        return this.put("web", url, {
+        await this.put("web", url, {
             body: JSON.stringify(config),
         });
     }
@@ -159,4 +164,25 @@ export class ApiClient {
         const url = `challenges/${challengeId}`;
         await this.delete("web", url);
     }
+    /**
+     * Get the status of a run.
+     * @param runId - The ID of the run.
+     * @returns The run status.
+     */
+    async getRunStatus(runId) {
+        const url = `runs/${runId}`;
+        return this.get("web", url, {}, RunStatusSchema);
+    }
+    /**
+     * Add a tag to a run.
+     * @param runId - The ID of the run to tag.
+     * @param tag - The tag string to add.
+     * @throws an error if the tag fails to be added.
+     */
+    async tagRun(runId, tag) {
+        const url = `runs/${runId}/tag`;
+        await this.post("web", url, {
+            body: JSON.stringify({ tag }),
+        });
+    }
 }

package/dist/lib/challenge.js CHANGED Viewed

@@ -98,6 +98,11 @@ export class Challenge {
         catch (error) {
             throw new Error(`Failed to build datapack: ${error instanceof Error ? error.message : error}`);
         }
+        // @TODO - re-enable once we have a proper build pipeline
+        // Recursively copy the challenge dir to target directory, under src/
+        // await fs.cp(this.challengeDir, path.join(this.config.KRADLE_CHALLENGES_PATH, this.shortSlug, "src"), {
+        // 	recursive: true,
+        // });
     }
     /**
      * Load the challenge configuration from config.ts

package/dist/lib/config.d.ts CHANGED Viewed

@@ -5,7 +5,6 @@ export declare const ConfigSchema: z.ZodObject<{
     STUDIO_API_URL: z.ZodString;
     STUDIO_URL: z.ZodString;
     KRADLE_API_KEY: z.ZodString;
-    GCS_BUCKET: z.ZodString;
     KRADLE_CHALLENGES_PATH: z.ZodDefault<z.ZodString>;
     NAMESPACE: z.ZodDefault<z.ZodString>;
 }, z.core.$strip>;

package/dist/lib/config.js CHANGED Viewed

@@ -8,7 +8,6 @@ export const ConfigSchema = z.object({
     STUDIO_API_URL: z.string().url(),
     STUDIO_URL: z.string(),
     KRADLE_API_KEY: z.string(),
-    GCS_BUCKET: z.string(),
     /**
      * Absolute path to the challenges directory. Defaults to ~/Documents/kradle-studio/challenges.
      */
@@ -30,7 +29,6 @@ export function loadConfig() {
             STUDIO_API_URL: process.env.STUDIO_API_URL,
             STUDIO_URL: process.env.STUDIO_URL,
             KRADLE_API_KEY: process.env.KRADLE_API_KEY,
-            GCS_BUCKET: process.env.GCS_BUCKET,
             KRADLE_CHALLENGES_PATH: challengesPath,
             NAMESPACE: process.env.NAMESPACE,
         });

package/dist/lib/evaluation/evaluator.d.ts ADDED Viewed

@@ -0,0 +1,88 @@
+import type { ApiClient } from "../api-client.js";
+import type { Config } from "../config.js";
+import type { EvaluationMetadata, EvaluationOptions, Manifest, Progress } from "./types.js";
+export declare class Evaluator {
+    private name;
+    private config;
+    private api;
+    evaluationDir: string;
+    metadataPath: string;
+    private runner?;
+    private tui?;
+    private currentIteration?;
+    constructor(name: string, config: Config, api: ApiClient);
+    /**
+     * Get paths for a specific iteration
+     */
+    private getIterationPaths;
+    get configPath(): string;
+    /**
+     * Get the current iteration directory path
+     */
+    getCurrentIterationDir(): string;
+    /**
+     * Check if evaluation exists
+     */
+    exists(): Promise<boolean>;
+    /**
+     * Check if config.ts exists (master config)
+     */
+    configExists(): Promise<boolean>;
+    /**
+     * Load evaluation metadata
+     */
+    loadMetadata(): Promise<EvaluationMetadata | null>;
+    /**
+     * Save evaluation metadata
+     */
+    saveMetadata(metadata: EvaluationMetadata): Promise<void>;
+    /**
+     * Get the current iteration number, or -1 if none exists
+     */
+    getCurrentIterationNumber(): Promise<number>;
+    /**
+     * Create a new iteration
+     */
+    createNewIteration(): Promise<number>;
+    /**
+     * Get or create an iteration
+     * @param createNew - If true, always create a new iteration. Otherwise, use current iteration or create first one if none exists.
+     */
+    getOrCreateIteration(createNew: boolean): Promise<number>;
+    /**
+     * Load manifest from iteration
+     */
+    loadManifest(iteration: number): Promise<Manifest>;
+    /**
+     * Load progress from iteration
+     */
+    loadProgress(iteration: number): Promise<Progress | null>;
+    /**
+     * Save progress to current iteration
+     */
+    saveProgress(): Promise<void>;
+    /**
+     * Execute config.ts to generate manifest
+     */
+    generateManifest(configPath: string): Promise<Manifest>;
+    /**
+     * Execute config.ts file and return the manifest
+     */
+    private executeConfigFile;
+    /**
+     * Run the evaluation
+     */
+    run(options: EvaluationOptions): Promise<void>;
+    /**
+     * Handle state change from runner
+     */
+    private onRunStateChange;
+    /**
+     * Handle quit request
+     */
+    private handleQuit;
+    /**
+     * Open run in browser
+     */
+    private openRun;
+}