@kradle/cli 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Kradle's CLI for managing Minecraft challenges, evaluations, agents, and more!
4
4
 
5
- ## Kradle - private installation
5
+ ## Installation
6
6
 
7
7
  1. Install Kradle's CLI globally
8
8
  ```
@@ -48,7 +48,7 @@ KRADLE_API_KEY=your-api-key
48
48
  KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges
49
49
  ```
50
50
 
51
- ## Commands
51
+ ## Challenge Commands
52
52
 
53
53
  ### Create Challenge
54
54
 
@@ -125,7 +125,7 @@ kradle challenge multi-upload
125
125
 
126
126
  Provides an interactive UI to select multiple challenges and uploads them in parallel.
127
127
 
128
- ### Evaluations (beta)
128
+ ## Evaluations commands
129
129
 
130
130
  Plan and execute batches of runs across challenges/agents, with resumable iterations and a TUI.
131
131
 
@@ -148,6 +148,23 @@ Features:
148
148
  - Ink TUI: live status counts, elapsed times, scrollable run list; keys `q/Ctrl+C` quit, `↑/↓/j/k` move, `o` open run URL.
149
149
  - Per-iteration manifest: generated from the evaluation `config.ts` into `manifest.json` before runs start.
150
150
 
151
+ ## Publishing a New Version
152
+
153
+ The CLI uses GitHub Actions for automated releases. To publish a new version:
154
+
155
+ 1. **Go to Actions** in the GitHub repository
156
+ 2. **Select "Create Release PR"** workflow from the sidebar
157
+ 3. **Click "Run workflow"** and choose the release type:
158
+ - `patch` - Bug fixes (0.0.5 → 0.0.6)
159
+ - `minor` - New features (0.0.5 → 0.1.0)
160
+ - `major` - Breaking changes (0.0.5 → 1.0.0)
161
+ 4. **Review and merge** the automatically created PR
162
+ 5. **Done!** The package is automatically published to npm when the PR is merged
163
+
164
+ ### Setup (one-time)
165
+
166
+ For the publish workflow to work, we're using [NPM Trusted Publishers](https://docs.npmjs.com/trusted-publishers).
167
+
151
168
  ## Development
152
169
 
153
170
  ### Setup
@@ -0,0 +1,9 @@
1
+ import { Command } from "@oclif/core";
2
+ export default class Create extends Command {
3
+ static description: string;
4
+ static examples: string[];
5
+ static args: {
6
+ name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
7
+ };
8
+ run(): Promise<void>;
9
+ }
@@ -0,0 +1,58 @@
1
+ import { exec } from "node:child_process";
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { Args, Command } from "@oclif/core";
5
+ import pc from "picocolors";
6
+ import { loadConfig } from "../../lib/config.js";
7
+ import { getStaticResourcePath } from "../../lib/utils.js";
8
+ export default class Create extends Command {
9
+ static description = "Create a new evaluation";
10
+ static examples = ["<%= config.bin %> <%= command.id %> my-evaluation"];
11
+ static args = {
12
+ name: Args.string({
13
+ description: "Name of the evaluation",
14
+ required: true,
15
+ }),
16
+ };
17
+ async run() {
18
+ const { args } = await this.parse(Create);
19
+ loadConfig(); // Validate config is available
20
+ const evaluationDir = path.resolve(process.cwd(), "evaluations", args.name);
21
+ const configPath = path.join(evaluationDir, "config.ts");
22
+ // Check if evaluation already exists
23
+ try {
24
+ await fs.access(evaluationDir);
25
+ this.error(pc.red(`Evaluation '${args.name}' already exists at ${evaluationDir}`));
26
+ }
27
+ catch {
28
+ // Directory doesn't exist, which is what we want
29
+ }
30
+ // Create evaluation directory
31
+ await fs.mkdir(evaluationDir, { recursive: true });
32
+ // Copy template
33
+ const templatePath = getStaticResourcePath("evaluation_template.ts");
34
+ await fs.copyFile(templatePath, configPath);
35
+ this.log(pc.green(`✓ Created evaluation '${args.name}'`));
36
+ this.log(pc.dim(` Config: ${configPath}`));
37
+ // Offer to open in editor on macOS
38
+ if (process.platform === "darwin") {
39
+ this.log("");
40
+ this.log(pc.blue(">> Opening config.ts in your editor..."));
41
+ // Try Cursor first, then VS Code, then fallback to default
42
+ exec(`cursor "${configPath}" || code "${configPath}" || open "${configPath}"`, (error) => {
43
+ if (error) {
44
+ this.log(pc.dim(` Could not open editor automatically. Please open: ${configPath}`));
45
+ }
46
+ });
47
+ }
48
+ else {
49
+ this.log("");
50
+ this.log(pc.blue(`>> Edit the config file to define your runs:`));
51
+ this.log(pc.dim(` ${configPath}`));
52
+ }
53
+ this.log("");
54
+ this.log(pc.blue(">> Next steps:"));
55
+ this.log(pc.dim(` 1. Edit ${path.basename(configPath)} to define your evaluation runs`));
56
+ this.log(pc.dim(` 2. Run: kradle evaluation run ${args.name}`));
57
+ }
58
+ }
@@ -52,7 +52,7 @@ export default class Init extends Command {
52
52
  }
53
53
  this.log("");
54
54
  this.log(pc.blue(">> Next steps:"));
55
- this.log(pc.dim(` 1. Edit ${path.basename(configPath)} to define your evaluation runs`));
55
+ this.log(pc.dim(` 1. Edit ${path.basename(configPath)} to define your evaluation runs, and `));
56
56
  this.log(pc.dim(` 2. Run: kradle evaluation run ${args.name}`));
57
57
  }
58
58
  }
@@ -4,7 +4,6 @@ export default class Init extends Command {
4
4
  static examples: string[];
5
5
  static flags: {
6
6
  name: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
7
- dev: import("@oclif/core/interfaces").BooleanFlag<boolean>;
8
7
  "api-key": import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
9
8
  };
10
9
  run(): Promise<void>;
@@ -14,11 +14,11 @@ export default class Init extends Command {
14
14
  description: "Project name",
15
15
  required: false,
16
16
  }),
17
- dev: Flags.boolean({
18
- char: "d",
19
- description: "Use Kradle's development environment instead of production",
20
- required: false,
21
- }),
17
+ // dev: Flags.boolean({
18
+ // char: "d",
19
+ // description: "Use Kradle's development environment instead of production",
20
+ // required: false,
21
+ // }),
22
22
  "api-key": Flags.string({
23
23
  char: "k",
24
24
  description: "Kradle API key",
@@ -34,10 +34,10 @@ export default class Init extends Command {
34
34
  const nonHiddenFiles = files.filter((f) => !f.startsWith("."));
35
35
  const useCurrentDir = nonHiddenFiles.length === 0;
36
36
  if (useCurrentDir) {
37
- this.log(pc.yellow("Current directory is empty, it will be used as the project directory."));
37
+ this.log(pc.yellow("Current directory is empty, it will be used to store challenges and evaluations."));
38
38
  }
39
39
  else {
40
- this.log(pc.yellow("Current directory is not empty, a subdirectory will be created for the project."));
40
+ this.log(pc.yellow("Current directory is not empty, a subdirectory will be created to store challenges and evaluations."));
41
41
  }
42
42
  let projectName;
43
43
  if (flags.name) {
@@ -51,34 +51,36 @@ export default class Init extends Command {
51
51
  const { name } = await enquirer.prompt({
52
52
  type: "input",
53
53
  name: "name",
54
- message: "Enter the project name:",
54
+ message: "What should the directory be called?",
55
55
  initial: initial,
56
56
  });
57
57
  projectName = name;
58
58
  }
59
- let useDev = flags.dev;
60
- if (!useDev) {
61
- const { confirm } = await enquirer.prompt({
62
- type: "confirm",
63
- name: "confirm",
64
- message: "Do you want to use Kradle's development environment?",
65
- initial: false,
66
- });
67
- useDev = confirm;
68
- }
69
- if (useDev) {
70
- this.log(pc.yellow("Using Kradle's development environment."));
71
- }
72
- else {
73
- this.log(pc.green("Using Kradle's production environment."));
74
- }
59
+ // let useDev = flags.dev;
60
+ // if (!useDev) {
61
+ // const { confirm } = await enquirer.prompt<{ confirm: boolean }>({
62
+ // type: "confirm",
63
+ // name: "confirm",
64
+ // message: "Do you want to use Kradle's development environment?",
65
+ // initial: false,
66
+ // });
67
+ // useDev = confirm;
68
+ // }
69
+ // if (useDev) {
70
+ // this.log(pc.yellow("Using Kradle's development environment."));
71
+ // } else {
72
+ // this.log(pc.green("Using Kradle's production environment."));
73
+ // }
74
+ this.log();
75
+ this.log(pc.yellow("Cloud Analytics are only available in the development environment for now. Development environment will be used."));
76
+ const useDev = true;
75
77
  const domain = useDev ? "dev.kradle.ai" : "kradle.ai";
76
78
  let apiKey;
77
79
  if (flags["api-key"]) {
78
80
  apiKey = flags["api-key"];
79
81
  }
80
82
  else {
81
- this.log(pc.dim(`\nGet your API key at: https://${domain}/settings#api-keys`));
83
+ this.log(pc.dim(`Get your API key at: https://${domain}/settings#api-keys`));
82
84
  const { key } = await enquirer.prompt({
83
85
  type: "password",
84
86
  name: "key",
@@ -12,7 +12,13 @@ const DEFAULT_CHALLENGE_SCHEMA = {
12
12
  objective: {
13
13
  fieldName: "success_rate",
14
14
  direction: "maximize",
15
- },
15
+ } /*
16
+ endStates: {
17
+ "red": "Red team only wins",
18
+ "blue": "Blue team only wins",
19
+ "both": "Both teams win",
20
+ "none": "No team wins",
21
+ },*/,
16
22
  };
17
23
  export class ApiClient {
18
24
  config;
@@ -1,7 +1,6 @@
1
- import { exec } from "node:child_process";
2
1
  import fs from "node:fs/promises";
3
2
  import path from "node:path";
4
- import { executeNodeCommand } from "../utils.js";
3
+ import { executeNodeCommand, openInBrowser } from "../utils.js";
5
4
  import { Runner } from "./runner.js";
6
5
  import { TUI } from "./tui.js";
7
6
  import { EvaluationMetadataSchema, ManifestSchema, ProgressSchema } from "./types.js";
@@ -195,7 +194,9 @@ export class Evaluator {
195
194
  // Load manifest
196
195
  const manifest = await this.loadManifest(iteration);
197
196
  // We have 2 mandatory tags: "eval-<evaluation-name>" and "eval-<evaluation-name>-iteration-<iteration>"
198
- const tags = [`eval-${this.name}`, `eval-${this.name}-iteration-${iteration}`, ...(manifest.tags ?? [])];
197
+ const evaluationTag = `eval-${this.name}`;
198
+ const iterationTag = `${evaluationTag}-iteration-${iteration}`;
199
+ const tags = [evaluationTag, iterationTag, ...(manifest.tags ?? [])];
199
200
  // Create runner
200
201
  this.runner = new Runner(manifest.runs, this.api, this.config.WEB_URL, {
201
202
  maxConcurrent: options.maxConcurrent,
@@ -232,6 +233,9 @@ export class Evaluator {
232
233
  if (errors?.length > 0) {
233
234
  throw new Error(`${errors.map((error) => error.error).join("\n\n")}`);
234
235
  }
236
+ if (options.openMetabase ?? true) {
237
+ openInBrowser(`https://daunt-fair.metabaseapp.com/dashboard/10-runs-analysis&tags=${iterationTag}`);
238
+ }
235
239
  }
236
240
  /**
237
241
  * Handle state change from runner
@@ -258,18 +262,7 @@ export class Evaluator {
258
262
  openRun(index) {
259
263
  const url = this.runner?.getRunUrl(index);
260
264
  if (url) {
261
- const platform = process.platform;
262
- let command;
263
- if (platform === "darwin") {
264
- command = `open "${url}"`;
265
- }
266
- else if (platform === "win32") {
267
- command = `start "${url}"`;
268
- }
269
- else {
270
- command = `xdg-open "${url}"`;
271
- }
272
- exec(command);
265
+ openInBrowser(url);
273
266
  }
274
267
  }
275
268
  }
@@ -1,4 +1,4 @@
1
- export * from "./types.js";
2
1
  export { Evaluator } from "./evaluator.js";
3
2
  export { Runner } from "./runner.js";
4
3
  export { TUI } from "./tui.js";
4
+ export * from "./types.js";
@@ -1,4 +1,4 @@
1
- export * from "./types.js";
2
1
  export { Evaluator } from "./evaluator.js";
3
2
  export { Runner } from "./runner.js";
4
3
  export { TUI } from "./tui.js";
4
+ export * from "./types.js";
@@ -35,6 +35,7 @@ const RenderRunLine = ({ state, total, isSelected, padding, }) => {
35
35
  const agents = state.config.participants.map((p) => p.agent.split(":").pop() ?? p.agent).join(", ");
36
36
  const summary = `${state.config.challenge_slug} (${agents})`;
37
37
  const maxSummaryLength = getVisibleColumns() - indexLabel.length - statusLabel.length - (elapsedLabel ? elapsedLabel.length : 0) - 4; // 4 for the spaces and emoji
38
+ // biome-ignore lint/style/useTemplate: template literal would be less readable
38
39
  const summaryText = summary.length > maxSummaryLength ? summary.slice(0, maxSummaryLength - 1) + "…" : summary;
39
40
  return (_jsxs(Text, { inverse: isSelected, children: [_jsx(Text, { color: color, children: icon }), " ", indexLabel, " ", _jsx(Text, { color: color, children: statusLabel }), elapsedLabel ? (_jsxs(_Fragment, { children: [" ", _jsx(Text, { dimColor: true, children: elapsedLabel })] })) : null, " ", _jsx(Text, { dimColor: true, children: summaryText })] }));
40
41
  };
@@ -120,6 +120,7 @@ export type EvaluationMetadata = z.infer<typeof EvaluationMetadataSchema>;
120
120
  export interface EvaluationOptions {
121
121
  new: boolean;
122
122
  maxConcurrent: number;
123
+ openMetabase?: boolean;
123
124
  }
124
125
  export declare const STATUS_ICONS: Record<RunStatus, {
125
126
  icon: string;
@@ -35,6 +35,7 @@ export declare const ChallengeSchema: z.ZodObject<{
35
35
  minimize: "minimize";
36
36
  }>;
37
37
  }, z.core.$strip>;
38
+ endStates: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
38
39
  creationTime: z.ZodOptional<z.ZodString>;
39
40
  updateTime: z.ZodOptional<z.ZodString>;
40
41
  creator: z.ZodOptional<z.ZodString>;
@@ -76,6 +77,7 @@ export declare const ChallengesResponseSchema: z.ZodObject<{
76
77
  minimize: "minimize";
77
78
  }>;
78
79
  }, z.core.$strip>;
80
+ endStates: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
79
81
  creationTime: z.ZodOptional<z.ZodString>;
80
82
  updateTime: z.ZodOptional<z.ZodString>;
81
83
  creator: z.ZodOptional<z.ZodString>;
@@ -23,6 +23,7 @@ export const ChallengeSchema = z.object({
23
23
  fieldName: z.string(),
24
24
  direction: z.enum(["maximize", "minimize"]),
25
25
  }),
26
+ endStates: z.record(z.string(), z.string()).optional(),
26
27
  creationTime: z.string().optional(),
27
28
  updateTime: z.string().optional(),
28
29
  creator: z.string().optional(),
@@ -87,3 +87,10 @@ export declare function executeCommand(command: string, args: string[], options?
87
87
  * @returns A promise that resolves with the stdout of the command.
88
88
  */
89
89
  export declare function executeNodeCommand(args: string[], config: Config): Promise<string>;
90
+ /**
91
+ * Open a URL in the default browser.
92
+ * This is fire-and-forget, so we don't wait for it to complete.
93
+ *
94
+ * @param url The URL to open.
95
+ */
96
+ export declare function openInBrowser(url: string): void;
package/dist/lib/utils.js CHANGED
@@ -1,4 +1,4 @@
1
- import { fork, spawn } from "node:child_process";
1
+ import { exec, fork, spawn } from "node:child_process";
2
2
  import fs from "node:fs/promises";
3
3
  import os from "node:os";
4
4
  import path from "node:path";
@@ -168,3 +168,23 @@ export async function executeCommand(command, args, options) {
168
168
  export async function executeNodeCommand(args, config) {
169
169
  return executeCommand(process.execPath, args, { env: config });
170
170
  }
171
+ /**
172
+ * Open a URL in the default browser.
173
+ * This is fire-and-forget, so we don't wait for it to complete.
174
+ *
175
+ * @param url The URL to open.
176
+ */
177
+ export function openInBrowser(url) {
178
+ const platform = process.platform;
179
+ let command;
180
+ if (platform === "darwin") {
181
+ command = `open "${url}"`;
182
+ }
183
+ else if (platform === "win32") {
184
+ command = `start "${url}"`;
185
+ }
186
+ else {
187
+ command = `xdg-open "${url}"`;
188
+ }
189
+ exec(command);
190
+ }
@@ -17,14 +17,6 @@
17
17
  "multiple": false,
18
18
  "type": "option"
19
19
  },
20
- "dev": {
21
- "char": "d",
22
- "description": "Use Kradle's development environment instead of production",
23
- "name": "dev",
24
- "required": false,
25
- "allowNo": false,
26
- "type": "boolean"
27
- },
28
20
  "api-key": {
29
21
  "char": "k",
30
22
  "description": "Kradle API key",
@@ -305,6 +297,36 @@
305
297
  "watch.js"
306
298
  ]
307
299
  },
300
+ "evaluation:create": {
301
+ "aliases": [],
302
+ "args": {
303
+ "name": {
304
+ "description": "Name of the evaluation",
305
+ "name": "name",
306
+ "required": true
307
+ }
308
+ },
309
+ "description": "Create a new evaluation",
310
+ "examples": [
311
+ "<%= config.bin %> <%= command.id %> my-evaluation"
312
+ ],
313
+ "flags": {},
314
+ "hasDynamicHelp": false,
315
+ "hiddenAliases": [],
316
+ "id": "evaluation:create",
317
+ "pluginAlias": "@kradle/cli",
318
+ "pluginName": "@kradle/cli",
319
+ "pluginType": "core",
320
+ "strict": true,
321
+ "enableJsonFlag": false,
322
+ "isESM": true,
323
+ "relativePath": [
324
+ "dist",
325
+ "commands",
326
+ "evaluation",
327
+ "create.js"
328
+ ]
329
+ },
308
330
  "evaluation:init": {
309
331
  "aliases": [],
310
332
  "args": {
@@ -409,5 +431,5 @@
409
431
  ]
410
432
  }
411
433
  },
412
- "version": "0.0.5"
434
+ "version": "0.0.7"
413
435
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kradle/cli",
3
- "version": "0.0.5",
3
+ "version": "0.0.7",
4
4
  "description": "Kradle's CLI. Manage challenges, evaluations, agents and more!",
5
5
  "keywords": [
6
6
  "cli"
@@ -24,11 +24,10 @@
24
24
  "build": "rm -rf dist && tsc",
25
25
  "watch": "rm -rf dist && tsc --watch",
26
26
  "lint": "biome check .",
27
- "lint:fix": "biome check --write .",
28
- "format": "biome format --write .",
27
+ "format": "biome format --write . && biome check --write .",
29
28
  "prepack": "sh scripts/prepack.sh",
30
29
  "postpack": "sh scripts/postpack.sh",
31
- "version": "oclif readme && git add README.md"
30
+ "version": "oclif manifest && oclif readme && git add README.md"
32
31
  },
33
32
  "dependencies": {
34
33
  "@google-cloud/storage": "^7.17.3",