@kradle/cli 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Kradle's CLI for managing Minecraft challenges, evaluations, agents, and more!
4
4
 
5
- ## Kradle - private installation
5
+ ## Installation
6
6
 
7
7
  1. Install Kradle's CLI globally
8
8
  ```
@@ -45,11 +45,10 @@ WEB_URL=https://kradle.ai
45
45
  STUDIO_API_URL=http://localhost:8080
46
46
  STUDIO_URL=kradle-studio://
47
47
  KRADLE_API_KEY=your-api-key
48
- GCS_BUCKET=your-gcs-bucket
49
48
  KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges
50
49
  ```
51
50
 
52
- ## Commands
51
+ ## Challenge Commands
53
52
 
54
53
  ### Create Challenge
55
54
 
@@ -126,6 +125,46 @@ kradle challenge multi-upload
126
125
 
127
126
  Provides an interactive UI to select multiple challenges and uploads them in parallel.
128
127
 
128
+ ## Evaluations commands
129
+
130
+ Plan and execute batches of runs across challenges/agents, with resumable iterations and a TUI.
131
+
132
+ - **Init**: scaffold an evaluation config `evaluations/<name>/config.ts`
133
+ ```bash
134
+ kradle evaluation init <name>
135
+ ```
136
+ - **List**: list local evaluations
137
+ ```bash
138
+ kradle evaluation list
139
+ ```
140
+ - **Run**: execute or resume an evaluation (iterations stored under `evaluations/<name>/iterations/`)
141
+ ```bash
142
+ kradle evaluation run <name> [--new] [--max-concurrent N]
143
+ ```
144
+
145
+ Features:
146
+ - Iterations: `--new` starts a new iteration; otherwise resumes the latest.
147
+ - Resumable state: progress is persisted per iteration; in-flight runs are re-polled on resume, completed runs stay completed.
148
+ - Ink TUI: live status counts, elapsed times, scrollable run list; keys `q/Ctrl+C` quit, `↑/↓/j/k` move, `o` open run URL.
149
+ - Per-iteration manifest: generated from the evaluation `config.ts` into `manifest.json` before runs start.
150
+
151
+ ## Publishing a New Version
152
+
153
+ The CLI uses GitHub Actions for automated releases. To publish a new version:
154
+
155
+ 1. **Go to Actions** in the GitHub repository
156
+ 2. **Select "Create Release PR"** workflow from the sidebar
157
+ 3. **Click "Run workflow"** and choose the release type:
158
+ - `patch` - Bug fixes (0.0.5 → 0.0.6)
159
+ - `minor` - New features (0.0.5 → 0.1.0)
160
+ - `major` - Breaking changes (0.0.5 → 1.0.0)
161
+ 4. **Review and merge** the automatically created PR
162
+ 5. **Done!** The package is automatically published to npm when the PR is merged
163
+
164
+ ### Setup (one-time)
165
+
166
+ For the publish workflow to work, we're using [NPM Trusted Publishers](https://docs.npmjs.com/trusted-publishers).
167
+
129
168
  ## Development
130
169
 
131
170
  ### Setup
@@ -90,7 +90,8 @@ export default class Watch extends Command {
90
90
  this.log(pc.blue(`\nStarting watch mode for ${pc.bold(challenge.shortSlug)}\n`));
91
91
  this.log(pc.dim("Watching for changes... (Ctrl+C to stop)\n"));
92
92
  const watcher = chokidar.watch([challenge.challengeDir], {
93
- ignored: /(^|[/\\])\../, // ignore dotfiles
93
+ // ⚠️ WE IGNORE THE DATAPACK FOLDER FOR NOW, BUT IT'S A SHORT TERM FIX.
94
+ ignored: [/(^|[/\\])\../, (p) => p.includes("/datapack")], // ignore dotfiles and datapack folder
94
95
  persistent: true,
95
96
  ignoreInitial: true,
96
97
  });
@@ -0,0 +1,9 @@
1
+ import { Command } from "@oclif/core";
2
+ export default class Init extends Command {
3
+ static description: string;
4
+ static examples: string[];
5
+ static args: {
6
+ name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
7
+ };
8
+ run(): Promise<void>;
9
+ }
@@ -0,0 +1,58 @@
1
+ import { exec } from "node:child_process";
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { Args, Command } from "@oclif/core";
5
+ import pc from "picocolors";
6
+ import { loadConfig } from "../../lib/config.js";
7
+ import { getStaticResourcePath } from "../../lib/utils.js";
8
+ export default class Init extends Command {
9
+ static description = "Initialize a new evaluation";
10
+ static examples = ["<%= config.bin %> <%= command.id %> my-evaluation"];
11
+ static args = {
12
+ name: Args.string({
13
+ description: "Name of the evaluation",
14
+ required: true,
15
+ }),
16
+ };
17
+ async run() {
18
+ const { args } = await this.parse(Init);
19
+ loadConfig(); // Validate config is available
20
+ const evaluationDir = path.resolve(process.cwd(), "evaluations", args.name);
21
+ const configPath = path.join(evaluationDir, "config.ts");
22
+ // Check if evaluation already exists
23
+ try {
24
+ await fs.access(evaluationDir);
25
+ this.error(pc.red(`Evaluation '${args.name}' already exists at ${evaluationDir}`));
26
+ }
27
+ catch {
28
+ // Directory doesn't exist, which is what we want
29
+ }
30
+ // Create evaluation directory
31
+ await fs.mkdir(evaluationDir, { recursive: true });
32
+ // Copy template
33
+ const templatePath = getStaticResourcePath("evaluation_template.ts");
34
+ await fs.copyFile(templatePath, configPath);
35
+ this.log(pc.green(`✓ Created evaluation '${args.name}'`));
36
+ this.log(pc.dim(` Config: ${configPath}`));
37
+ // Offer to open in editor on macOS
38
+ if (process.platform === "darwin") {
39
+ this.log("");
40
+ this.log(pc.blue(">> Opening config.ts in your editor..."));
41
+ // Try Cursor first, then VS Code, then fallback to default
42
+ exec(`cursor "${configPath}" || code "${configPath}" || open "${configPath}"`, (error) => {
43
+ if (error) {
44
+ this.log(pc.dim(` Could not open editor automatically. Please open: ${configPath}`));
45
+ }
46
+ });
47
+ }
48
+ else {
49
+ this.log("");
50
+ this.log(pc.blue(`>> Edit the config file to define your runs:`));
51
+ this.log(pc.dim(` ${configPath}`));
52
+ }
53
+ this.log("");
54
+ this.log(pc.blue(">> Next steps:"));
55
+ this.log(pc.dim(` 1. Edit ${path.basename(configPath)} to define your evaluation runs`));
56
+ this.log(pc.dim(` 2. Run: kradle evaluation run ${args.name}`));
57
+ }
58
+ }
@@ -0,0 +1,7 @@
1
+ import { Command } from "@oclif/core";
2
+ export default class List extends Command {
3
+ static description: string;
4
+ static examples: string[];
5
+ run(): Promise<void>;
6
+ private fileExists;
7
+ }
@@ -0,0 +1,55 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { Command } from "@oclif/core";
4
+ import pc from "picocolors";
5
+ import { loadConfig } from "../../lib/config.js";
6
+ export default class List extends Command {
7
+ static description = "List all evaluations";
8
+ static examples = ["<%= config.bin %> <%= command.id %>"];
9
+ async run() {
10
+ this.parse(List);
11
+ loadConfig(); // Validate config is available
12
+ const evaluationsDir = path.resolve(process.cwd(), "evaluations");
13
+ try {
14
+ const entries = await fs.readdir(evaluationsDir, { withFileTypes: true });
15
+ const evaluations = entries.filter((e) => e.isDirectory());
16
+ if (evaluations.length === 0) {
17
+ this.log(pc.yellow("No evaluations found."));
18
+ this.log(pc.dim(` Run 'kradle evaluation init <name>' to create one.`));
19
+ return;
20
+ }
21
+ this.log(pc.blue(">> Evaluations:"));
22
+ this.log("");
23
+ for (const evaluation of evaluations) {
24
+ const evalDir = path.join(evaluationsDir, evaluation.name);
25
+ const hasConfig = await this.fileExists(path.join(evalDir, "config.ts"));
26
+ const hasManifest = await this.fileExists(path.join(evalDir, "manifest.json"));
27
+ const hasProgress = await this.fileExists(path.join(evalDir, "progress.json"));
28
+ let status = "";
29
+ if (hasProgress) {
30
+ status = pc.yellow(" (in progress)");
31
+ }
32
+ else if (hasManifest) {
33
+ status = pc.green(" (ready)");
34
+ }
35
+ else if (hasConfig) {
36
+ status = pc.dim(" (config only)");
37
+ }
38
+ this.log(` ${pc.bold(evaluation.name)}${status}`);
39
+ }
40
+ }
41
+ catch {
42
+ this.log(pc.yellow("No evaluations directory found."));
43
+ this.log(pc.dim(` Run 'kradle evaluation init <name>' to create your first evaluation.`));
44
+ }
45
+ }
46
+ async fileExists(filePath) {
47
+ try {
48
+ await fs.access(filePath);
49
+ return true;
50
+ }
51
+ catch {
52
+ return false;
53
+ }
54
+ }
55
+ }
@@ -0,0 +1,13 @@
1
+ import { Command } from "@oclif/core";
2
+ export default class Run extends Command {
3
+ static description: string;
4
+ static examples: string[];
5
+ static args: {
6
+ name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
7
+ };
8
+ static flags: {
9
+ new: import("@oclif/core/interfaces").BooleanFlag<boolean>;
10
+ "max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
11
+ };
12
+ run(): Promise<void>;
13
+ }
@@ -0,0 +1,60 @@
1
+ import { Args, Command, Flags } from "@oclif/core";
2
+ import pc from "picocolors";
3
+ import { ApiClient } from "../../lib/api-client.js";
4
+ import { loadConfig } from "../../lib/config.js";
5
+ import { Evaluator } from "../../lib/evaluation/evaluator.js";
6
+ const DEFAULT_MAX_CONCURRENT = 5;
7
+ export default class Run extends Command {
8
+ static description = "Run an evaluation. If the evaluation had an ongoing iteration, it will resume from the last state.";
9
+ static examples = [
10
+ "<%= config.bin %> <%= command.id %> my-evaluation",
11
+ "<%= config.bin %> <%= command.id %> my-evaluation --new",
12
+ "<%= config.bin %> <%= command.id %> my-evaluation --max-concurrent 10",
13
+ ];
14
+ static args = {
15
+ name: Args.string({
16
+ description: "Name of the evaluation to run",
17
+ required: true,
18
+ }),
19
+ };
20
+ static flags = {
21
+ new: Flags.boolean({
22
+ char: "n",
23
+ description: "Start a new iteration of the evaluation",
24
+ default: false,
25
+ }),
26
+ "max-concurrent": Flags.integer({
27
+ char: "m",
28
+ description: "Maximum concurrent runs",
29
+ default: DEFAULT_MAX_CONCURRENT,
30
+ }),
31
+ };
32
+ async run() {
33
+ const { args, flags } = await this.parse(Run);
34
+ const config = loadConfig();
35
+ const api = new ApiClient(config);
36
+ const evaluator = new Evaluator(args.name, config, api);
37
+ // Check if evaluation exists
38
+ if (!(await evaluator.exists())) {
39
+ this.error(pc.red(`Evaluation '${args.name}' does not exist. Run 'kradle evaluation init ${args.name}' first.`));
40
+ }
41
+ // Check if config.ts exists
42
+ if (!(await evaluator.configExists())) {
43
+ this.error(pc.red(`Config file not found at ${evaluator.configPath}`));
44
+ }
45
+ this.log(pc.blue(`>> Starting evaluation: ${args.name}`));
46
+ if (flags.new) {
47
+ this.log(pc.yellow(" --new: Starting a new iteration of the evaluation"));
48
+ }
49
+ try {
50
+ await evaluator.run({
51
+ new: flags.new,
52
+ maxConcurrent: flags["max-concurrent"],
53
+ });
54
+ this.log(pc.green("\n✓ Evaluation complete!"));
55
+ }
56
+ catch (error) {
57
+ this.error(pc.red(`Evaluation failed: ${error instanceof Error ? error.message : String(error)}`));
58
+ }
59
+ }
60
+ }
@@ -1,7 +1,7 @@
1
1
  import type z from "zod";
2
2
  import type { Challenge } from "./challenge.js";
3
3
  import type { Config } from "./config.js";
4
- import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema } from "./schemas.js";
4
+ import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema, type RunStatusSchemaType } from "./schemas.js";
5
5
  export declare class ApiClient {
6
6
  private config;
7
7
  constructor(config: Config);
@@ -52,4 +52,17 @@ export declare class ApiClient {
52
52
  runIds?: string[] | undefined;
53
53
  }>;
54
54
  deleteChallenge(challengeId: string): Promise<void>;
55
+ /**
56
+ * Get the status of a run.
57
+ * @param runId - The ID of the run.
58
+ * @returns The run status.
59
+ */
60
+ getRunStatus(runId: string): Promise<RunStatusSchemaType>;
61
+ /**
62
+ * Add a tag to a run.
63
+ * @param runId - The ID of the run to tag.
64
+ * @param tag - The tag string to add.
65
+ * @throws an error if the tag fails to be added.
66
+ */
67
+ tagRun(runId: string, tag: string): Promise<void>;
55
68
  }
@@ -1,4 +1,4 @@
1
- import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, RunResponseSchema, UploadUrlResponseSchema, } from "./schemas.js";
1
+ import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, RunResponseSchema, RunStatusSchema, UploadUrlResponseSchema, } from "./schemas.js";
2
2
  const DEFAULT_PAGE_SIZE = 30;
3
3
  const DEFAULT_CHALLENGE_SCHEMA = {
4
4
  slug: "",
@@ -12,7 +12,13 @@ const DEFAULT_CHALLENGE_SCHEMA = {
12
12
  objective: {
13
13
  fieldName: "success_rate",
14
14
  direction: "maximize",
15
- },
15
+ } /*
16
+ endStates: {
17
+ "red": "Red team only wins",
18
+ "blue": "Blue team only wins",
19
+ "both": "Both teams win",
20
+ "none": "No team wins",
21
+ },*/,
16
22
  };
17
23
  export class ApiClient {
18
24
  config;
@@ -49,17 +55,21 @@ export class ApiClient {
49
55
  method: "POST",
50
56
  ...options,
51
57
  });
52
- const data = await response.json();
58
+ const text = await response.text();
59
+ if (!text) {
60
+ return undefined;
61
+ }
62
+ const data = JSON.parse(text);
53
63
  return schema ? schema.parse(data) : data;
54
64
  }
55
65
  async put(target, url, options = {}) {
56
- await this.request(target, url, {
66
+ return await this.request(target, url, {
57
67
  method: "PUT",
58
68
  ...options,
59
69
  });
60
70
  }
61
71
  async delete(target, url, options = {}) {
62
- await this.request(target, url, {
72
+ return await this.request(target, url, {
63
73
  method: "DELETE",
64
74
  ...options,
65
75
  });
@@ -140,8 +150,9 @@ export class ApiClient {
140
150
  */
141
151
  async updateChallenge(challenge, challengeConfig) {
142
152
  const url = `challenges/${challenge.shortSlug}`;
153
+ console.log(url);
143
154
  const config = challengeConfig ?? (await challenge.loadConfig());
144
- return this.put("web", url, {
155
+ await this.put("web", url, {
145
156
  body: JSON.stringify(config),
146
157
  });
147
158
  }
@@ -159,4 +170,25 @@ export class ApiClient {
159
170
  const url = `challenges/${challengeId}`;
160
171
  await this.delete("web", url);
161
172
  }
173
+ /**
174
+ * Get the status of a run.
175
+ * @param runId - The ID of the run.
176
+ * @returns The run status.
177
+ */
178
+ async getRunStatus(runId) {
179
+ const url = `runs/${runId}`;
180
+ return this.get("web", url, {}, RunStatusSchema);
181
+ }
182
+ /**
183
+ * Add a tag to a run.
184
+ * @param runId - The ID of the run to tag.
185
+ * @param tag - The tag string to add.
186
+ * @throws an error if the tag fails to be added.
187
+ */
188
+ async tagRun(runId, tag) {
189
+ const url = `runs/${runId}/tag`;
190
+ await this.post("web", url, {
191
+ body: JSON.stringify({ tag }),
192
+ });
193
+ }
162
194
  }
@@ -98,6 +98,11 @@ export class Challenge {
98
98
  catch (error) {
99
99
  throw new Error(`Failed to build datapack: ${error instanceof Error ? error.message : error}`);
100
100
  }
101
+ // @TODO - re-enable once we have a proper build pipeline
102
+ // Recursively copy the challenge dir to target directory, under src/
103
+ // await fs.cp(this.challengeDir, path.join(this.config.KRADLE_CHALLENGES_PATH, this.shortSlug, "src"), {
104
+ // recursive: true,
105
+ // });
101
106
  }
102
107
  /**
103
108
  * Load the challenge configuration from config.ts
@@ -5,7 +5,6 @@ export declare const ConfigSchema: z.ZodObject<{
5
5
  STUDIO_API_URL: z.ZodString;
6
6
  STUDIO_URL: z.ZodString;
7
7
  KRADLE_API_KEY: z.ZodString;
8
- GCS_BUCKET: z.ZodString;
9
8
  KRADLE_CHALLENGES_PATH: z.ZodDefault<z.ZodString>;
10
9
  NAMESPACE: z.ZodDefault<z.ZodString>;
11
10
  }, z.core.$strip>;
@@ -8,7 +8,6 @@ export const ConfigSchema = z.object({
8
8
  STUDIO_API_URL: z.string().url(),
9
9
  STUDIO_URL: z.string(),
10
10
  KRADLE_API_KEY: z.string(),
11
- GCS_BUCKET: z.string(),
12
11
  /**
13
12
  * Absolute path to the challenges directory. Defaults to ~/Documents/kradle-studio/challenges.
14
13
  */
@@ -30,7 +29,6 @@ export function loadConfig() {
30
29
  STUDIO_API_URL: process.env.STUDIO_API_URL,
31
30
  STUDIO_URL: process.env.STUDIO_URL,
32
31
  KRADLE_API_KEY: process.env.KRADLE_API_KEY,
33
- GCS_BUCKET: process.env.GCS_BUCKET,
34
32
  KRADLE_CHALLENGES_PATH: challengesPath,
35
33
  NAMESPACE: process.env.NAMESPACE,
36
34
  });
@@ -0,0 +1,88 @@
1
+ import type { ApiClient } from "../api-client.js";
2
+ import type { Config } from "../config.js";
3
+ import type { EvaluationMetadata, EvaluationOptions, Manifest, Progress } from "./types.js";
4
+ export declare class Evaluator {
5
+ private name;
6
+ private config;
7
+ private api;
8
+ evaluationDir: string;
9
+ metadataPath: string;
10
+ private runner?;
11
+ private tui?;
12
+ private currentIteration?;
13
+ constructor(name: string, config: Config, api: ApiClient);
14
+ /**
15
+ * Get paths for a specific iteration
16
+ */
17
+ private getIterationPaths;
18
+ get configPath(): string;
19
+ /**
20
+ * Get the current iteration directory path
21
+ */
22
+ getCurrentIterationDir(): string;
23
+ /**
24
+ * Check if evaluation exists
25
+ */
26
+ exists(): Promise<boolean>;
27
+ /**
28
+ * Check if config.ts exists (master config)
29
+ */
30
+ configExists(): Promise<boolean>;
31
+ /**
32
+ * Load evaluation metadata
33
+ */
34
+ loadMetadata(): Promise<EvaluationMetadata | null>;
35
+ /**
36
+ * Save evaluation metadata
37
+ */
38
+ saveMetadata(metadata: EvaluationMetadata): Promise<void>;
39
+ /**
40
+ * Get the current iteration number, or -1 if none exists
41
+ */
42
+ getCurrentIterationNumber(): Promise<number>;
43
+ /**
44
+ * Create a new iteration
45
+ */
46
+ createNewIteration(): Promise<number>;
47
+ /**
48
+ * Get or create an iteration
49
+ * @param createNew - If true, always create a new iteration. Otherwise, use current iteration or create first one if none exists.
50
+ */
51
+ getOrCreateIteration(createNew: boolean): Promise<number>;
52
+ /**
53
+ * Load manifest from iteration
54
+ */
55
+ loadManifest(iteration: number): Promise<Manifest>;
56
+ /**
57
+ * Load progress from iteration
58
+ */
59
+ loadProgress(iteration: number): Promise<Progress | null>;
60
+ /**
61
+ * Save progress to current iteration
62
+ */
63
+ saveProgress(): Promise<void>;
64
+ /**
65
+ * Execute config.ts to generate manifest
66
+ */
67
+ generateManifest(configPath: string): Promise<Manifest>;
68
+ /**
69
+ * Execute config.ts file and return the manifest
70
+ */
71
+ private executeConfigFile;
72
+ /**
73
+ * Run the evaluation
74
+ */
75
+ run(options: EvaluationOptions): Promise<void>;
76
+ /**
77
+ * Handle state change from runner
78
+ */
79
+ private onRunStateChange;
80
+ /**
81
+ * Handle quit request
82
+ */
83
+ private handleQuit;
84
+ /**
85
+ * Open run in browser
86
+ */
87
+ private openRun;
88
+ }