kradle 0.6.14 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -233,6 +233,8 @@ This displays:
233
233
  - Participant results (agent, winner status, score)
234
234
  - Log entries with timestamps and levels (unless `--no-logs` is used)
235
235
 
236
+ If the run is still finalising (status `game_over`), the command waits automatically for scores and end state to be written before displaying results.
237
+
236
238
  ## Experiment Commands
237
239
 
238
240
  Experiments allow you to run batches of challenge runs with different agents and configurations, then analyze the results. This is useful for benchmarking agents, testing challenge difficulty, or gathering statistics across many runs.
@@ -462,6 +464,8 @@ The CLI uses GitHub Actions for automated releases. To publish a new version:
462
464
  4. **Review and merge** the automatically created PR
463
465
  5. **Done!** The package is automatically published to npm when the PR is merged
464
466
 
467
+ For full deployment instrucitons, see this [notion doc](https://www.notion.so/CLI-SDK-How-to-test-release-2f13a5a82da68040818fe4a545afa0af)
468
+
465
469
 
466
470
  ## Development
467
471
 
package/bin/run.js CHANGED
@@ -5,6 +5,9 @@ import path from "node:path";
5
5
  import fs from "node:fs";
6
6
  import dotenv from "dotenv";
7
7
 
8
+ process.stdout.setDefaultEncoding("utf8");
9
+ process.stderr.setDefaultEncoding("utf8");
10
+
8
11
  // Load .env file from cwd if it exists
9
12
  const envPath = path.join(process.cwd(), ".env");
10
13
  if (fs.existsSync(envPath)) {
@@ -5,10 +5,9 @@ import { ApiClient } from "../../lib/api-client.js";
5
5
  import { extractShortSlug, getChallengeSlugArgument } from "../../lib/arguments.js";
6
6
  import { Challenge } from "../../lib/challenge.js";
7
7
  import { getConfigFlags } from "../../lib/flags.js";
8
- import { clearScreen, formatDuration, fuzzyHighlight, getRunStatusDisplay, openInBrowser } from "../../lib/utils.js";
8
+ import { clearScreen, formatDuration, fuzzyHighlight, getRunStatusDisplay, openInBrowser, TERMINAL_STATUSES, } from "../../lib/utils.js";
9
9
  const POLL_INTERVAL_MS = 2000;
10
10
  const MAX_POLL_TIME_MS = 30 * 60 * 1000; // 30 minutes
11
- const TERMINAL_STATUSES = ["finished", "game_over", "error", "completed", "cancelled", "timeout", "failed"];
12
11
  export default class Run extends Command {
13
12
  static description = "Run a challenge";
14
13
  // Allow variadic arguments for inline agent specification
@@ -2,7 +2,9 @@ import { Args, Command, Flags } from "@oclif/core";
2
2
  import pc from "picocolors";
3
3
  import { ApiClient } from "../../../lib/api-client.js";
4
4
  import { getConfigFlags } from "../../../lib/flags.js";
5
- import { formatDuration, formatTime } from "../../../lib/utils.js";
5
+ import { formatDuration, formatTime, sleep, TERMINAL_STATUSES } from "../../../lib/utils.js";
6
+ const FINALISE_POLL_MS = 2000;
7
+ const FINALISE_TIMEOUT_MS = 120_000;
6
8
  function getLogLevelColor(level) {
7
9
  switch (level.toLowerCase()) {
8
10
  case "error":
@@ -54,6 +56,24 @@ export default class GetRun extends Command {
54
56
  const api = new ApiClient(flags["api-url"], flags["api-key"]);
55
57
  const showLogs = !flags["no-logs"];
56
58
  this.log(pc.blue(`>> Loading run ${args.runId}...`));
59
+ // Scores and endState aren't written until the run transitions from
60
+ // game_over → finished. Poll briefly to avoid returning incomplete data.
61
+ const initial = await api.getRunStatus(args.runId);
62
+ if (initial.status === "game_over") {
63
+ this.log(pc.dim(" Run is finalising, waiting..."));
64
+ const deadline = Date.now() + FINALISE_TIMEOUT_MS;
65
+ let timedOut = true;
66
+ while (Date.now() < deadline) {
67
+ const status = await api.getRunStatus(args.runId);
68
+ if (TERMINAL_STATUSES.includes(status.status)) {
69
+ timedOut = false;
70
+ break;
71
+ }
72
+ await sleep(FINALISE_POLL_MS);
73
+ }
74
+ if (timedOut)
75
+ this.log(pc.yellow(" Timed out waiting for finalisation — results may be incomplete."));
76
+ }
57
77
  try {
58
78
  const [runResult, logs] = await Promise.all([
59
79
  api.getRunResult(args.runId),
@@ -13,6 +13,7 @@ export default class Run extends Command {
13
13
  "max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
14
14
  "download-recordings": import("@oclif/core/interfaces").BooleanFlag<boolean>;
15
15
  "download-logs": import("@oclif/core/interfaces").BooleanFlag<boolean>;
16
+ "hide-participant-names": import("@oclif/core/interfaces").BooleanFlag<boolean>;
16
17
  };
17
18
  run(): Promise<void>;
18
19
  }
@@ -38,6 +38,10 @@ export default class Run extends Command {
38
38
  description: "Automatically download logs after each run finishes",
39
39
  default: false,
40
40
  }),
41
+ "hide-participant-names": Flags.boolean({
42
+ description: "Replace participant names with Player1, Player2, etc. to prevent models inferring opponents from their names",
43
+ default: false,
44
+ }),
41
45
  ...getConfigFlags("api-key", "api-url", "web-url"),
42
46
  };
43
47
  async run() {
@@ -63,6 +67,7 @@ export default class Run extends Command {
63
67
  openMetabase: true,
64
68
  downloadRecordings: flags["download-recordings"],
65
69
  downloadLogs: flags["download-logs"],
70
+ hideParticipantNames: flags["hide-participant-names"],
66
71
  });
67
72
  this.log(pc.green("\n✓ Experiment complete!"));
68
73
  }
@@ -1 +1 @@
1
- export declare const MINECRAFT_ARENA_MANAGER_TAG = "14f7331";
1
+ export declare const MINECRAFT_ARENA_MANAGER_TAG = "69ce836";
@@ -1,2 +1,2 @@
1
1
  // Managed by https://github.com/Kradle-ai/arena-minecraft/actions/workflows/release.yaml
2
- export const MINECRAFT_ARENA_MANAGER_TAG = "14f7331";
2
+ export const MINECRAFT_ARENA_MANAGER_TAG = "69ce836";
@@ -77,6 +77,7 @@ export declare class ApiClient {
77
77
  participants: unknown[];
78
78
  jobType: "background" | "foreground" | "foreground_with_recording";
79
79
  env?: "cloud" | "studio";
80
+ hideParticipantNames?: boolean;
80
81
  }): Promise<{
81
82
  runIds?: string[] | undefined;
82
83
  participants?: Record<string, {
@@ -260,14 +260,16 @@ export class Experimenter {
260
260
  // Fetch the user's slug to prefix tags for uniqueness
261
261
  const human = await this.api.getHuman();
262
262
  const prefix = human.username;
263
- // We have 2 mandatory tags: "<user>(<exp-name>)" and "<user>(<exp-name>-v<version>)"
264
- const experimentTag = `${prefix}(exp-${this.name})`;
265
- const versionTag = `${prefix}(exp-${this.name}-v${version})`;
263
+ // We have 2 mandatory tags: "<user>-exp-<name>" and "<user>-exp-<name>-v<version>"
264
+ // Use hyphens as delimiters — the API requires tags to start and end with alphanumerics.
265
+ const experimentTag = `${prefix}-exp-${this.name}`;
266
+ const versionTag = `${prefix}-exp-${this.name}-v${version}`;
266
267
  const tags = [experimentTag, versionTag, ...(manifest.tags ?? [])];
267
268
  // Create runner
268
269
  this.runner = new Runner(manifest.runs, this.api, this.webUrl, {
269
270
  maxConcurrent: options.maxConcurrent,
270
271
  tags: tags,
272
+ hideParticipantNames: options.hideParticipantNames,
271
273
  onStateChange: () => this.onRunStateChange(),
272
274
  onRunComplete: options.downloadRecordings || options.downloadLogs
273
275
  ? async (index, runId) => {
@@ -11,11 +11,13 @@ export declare class Runner {
11
11
  private stopped;
12
12
  private maxConcurrent;
13
13
  private tags;
14
+ private hideParticipantNames;
14
15
  private onStateChange?;
15
16
  private onRunComplete?;
16
17
  constructor(runs: RunConfig[], api: ApiClient, baseUrl: string, options?: {
17
18
  maxConcurrent?: number;
18
19
  tags?: string[];
20
+ hideParticipantNames?: boolean;
19
21
  onStateChange?: (index: number, state: RunState) => void;
20
22
  onRunComplete?: (index: number, runId: string) => Promise<void>;
21
23
  });
@@ -67,10 +69,6 @@ export declare class Runner {
67
69
  * Main execution loop
68
70
  */
69
71
  execute(): Promise<void>;
70
- /**
71
- * Helper for delays
72
- */
73
- private delay;
74
72
  /**
75
73
  * Get progress entries for saving
76
74
  */
@@ -1,3 +1,4 @@
1
+ import { sleep } from "../utils.js";
1
2
  const DEFAULT_MAX_CONCURRENT = 5;
2
3
  const STATUS_POLL_INTERVAL_MS = 2000;
3
4
  const MAX_START_RUN_TRIES = 10;
@@ -20,6 +21,7 @@ export class Runner {
20
21
  stopped = false;
21
22
  maxConcurrent;
22
23
  tags;
24
+ hideParticipantNames;
23
25
  onStateChange;
24
26
  onRunComplete;
25
27
  constructor(runs, api, baseUrl, options = {}) {
@@ -28,10 +30,11 @@ export class Runner {
28
30
  this.baseUrl = baseUrl;
29
31
  this.maxConcurrent = options.maxConcurrent ?? DEFAULT_MAX_CONCURRENT;
30
32
  this.tags = options.tags ?? [];
31
- // Validate tags respect regex
33
+ this.hideParticipantNames = options.hideParticipantNames ?? false;
34
+ // Validate tags match API rules: start and end with alphanumeric, 3-36 chars total.
32
35
  for (const tag of this.tags) {
33
- if (!/^[a-zA-Z0-9()][a-zA-Z0-9()-]{1,34}[a-zA-Z0-9()]$/.test(tag)) {
34
- throw new Error(`Invalid tag: ${tag}. Tags must start and end with a letter or number, and can only contain letters, numbers, hyphens, underscores, and parentheses.`);
36
+ if (!/^[a-zA-Z0-9][a-zA-Z0-9()-]{1,34}[a-zA-Z0-9]$/.test(tag)) {
37
+ throw new Error(`Invalid tag: ${tag}. Tags must start and end with a letter or number, and can only contain letters, numbers, hyphens, and parentheses.`);
35
38
  }
36
39
  }
37
40
  this.onStateChange = options.onStateChange;
@@ -54,7 +57,9 @@ export class Runner {
54
57
  state.runId = entry.runId;
55
58
  state.participantIds = entry.participantIds;
56
59
  state.startTime = entry.startTime;
57
- if (entry.status === "completed" || entry.status === "finished" || entry.status === "game_over") {
60
+ // game_over is not terminal data isn't written until finished.
61
+ // Leave game_over runs out of completedRuns so resumeInFlightRuns re-polls them.
62
+ if (entry.status === "completed" || entry.status === "finished") {
58
63
  this.completedRuns.add(entry.index);
59
64
  }
60
65
  else if (entry.status === "error") {
@@ -108,7 +113,7 @@ export class Runner {
108
113
  let queued = 0;
109
114
  let errors = 0;
110
115
  for (const state of this.states) {
111
- if (state.status === "completed" || state.status === "finished" || state.status === "game_over") {
116
+ if (state.status === "completed" || state.status === "finished") {
112
117
  completed++;
113
118
  }
114
119
  else if (state.status === "error") {
@@ -162,6 +167,7 @@ export class Runner {
162
167
  challenge: state.config.challenge_slug,
163
168
  participants: state.config.participants,
164
169
  jobType: "background",
170
+ hideParticipantNames: this.hideParticipantNames || undefined,
165
171
  });
166
172
  if (!response.runIds || response.runIds.length === 0) {
167
173
  throw new Error("No run ID returned from API");
@@ -191,7 +197,7 @@ export class Runner {
191
197
  // Re-queue for later
192
198
  this.updateState(index, { status: "queued", error: undefined });
193
199
  this.activeRuns.delete(index);
194
- await this.delay(getBackoffTime(tries));
200
+ await sleep(getBackoffTime(tries));
195
201
  await this.startRun(index, tries + 1);
196
202
  return;
197
203
  }
@@ -209,10 +215,11 @@ export class Runner {
209
215
  const status = await this.api.getRunStatus(runId);
210
216
  const normalizedStatus = this.normalizeStatus(status.status);
211
217
  this.updateState(index, { status: normalizedStatus });
212
- if (normalizedStatus === "completed" || normalizedStatus === "finished" || normalizedStatus === "game_over") {
218
+ // game_over means gameplay ended but the PUT /end hasn't fired yet
219
+ // keep polling so onRunComplete receives complete data.
220
+ if (normalizedStatus === "completed" || normalizedStatus === "finished") {
213
221
  this.completedRuns.add(index);
214
222
  this.activeRuns.delete(index);
215
- // Trigger recording download if callback provided
216
223
  if (this.onRunComplete) {
217
224
  // Don't await - run in background to avoid blocking
218
225
  this.onRunComplete(index, runId).catch(() => {
@@ -226,11 +233,11 @@ export class Runner {
226
233
  this.activeRuns.delete(index);
227
234
  return;
228
235
  }
229
- await this.delay(STATUS_POLL_INTERVAL_MS);
236
+ await sleep(STATUS_POLL_INTERVAL_MS);
230
237
  }
231
238
  catch (error) {
232
239
  // Network error, continue polling
233
- await this.delay(STATUS_POLL_INTERVAL_MS * 2);
240
+ await sleep(STATUS_POLL_INTERVAL_MS * 2);
234
241
  }
235
242
  }
236
243
  this.activeRuns.delete(index);
@@ -275,22 +282,16 @@ export class Runner {
275
282
  // Don't await - run concurrently
276
283
  this.startRun(index);
277
284
  // Wait a bit to avoid overwhelming the API
278
- await this.delay(350);
285
+ await sleep(350);
279
286
  }
280
287
  // Wait a bit before checking again
281
- await this.delay(500);
288
+ await sleep(500);
282
289
  }
283
290
  // Wait for active runs to complete
284
291
  while (this.activeRuns.size > 0 && !this.stopped) {
285
- await this.delay(500);
292
+ await sleep(500);
286
293
  }
287
294
  }
288
- /**
289
- * Helper for delays
290
- */
291
- delay(ms) {
292
- return new Promise((resolve) => setTimeout(resolve, ms));
293
- }
294
295
  /**
295
296
  * Get progress entries for saving
296
297
  */
@@ -30,11 +30,11 @@ export declare const ProgressEntrySchema: z.ZodObject<{
30
30
  index: z.ZodNumber;
31
31
  status: z.ZodEnum<{
32
32
  finished: "finished";
33
- game_over: "game_over";
33
+ error: "error";
34
34
  completed: "completed";
35
+ game_over: "game_over";
35
36
  started: "started";
36
37
  initializing: "initializing";
37
- error: "error";
38
38
  queued: "queued";
39
39
  watcher_connected: "watcher_connected";
40
40
  participants_connected: "participants_connected";
@@ -53,11 +53,11 @@ export declare const ProgressSchema: z.ZodObject<{
53
53
  index: z.ZodNumber;
54
54
  status: z.ZodEnum<{
55
55
  finished: "finished";
56
- game_over: "game_over";
56
+ error: "error";
57
57
  completed: "completed";
58
+ game_over: "game_over";
58
59
  started: "started";
59
60
  initializing: "initializing";
60
- error: "error";
61
61
  queued: "queued";
62
62
  watcher_connected: "watcher_connected";
63
63
  participants_connected: "participants_connected";
@@ -106,6 +106,7 @@ export interface ExperimentOptions {
106
106
  openMetabase?: boolean;
107
107
  downloadRecordings?: boolean;
108
108
  downloadLogs?: boolean;
109
+ hideParticipantNames?: boolean;
109
110
  }
110
111
  export declare const STATUS_ICONS: Record<RunStatus, {
111
112
  icon: string;
@@ -64,7 +64,7 @@ export const STATUS_ICONS = {
64
64
  running: { icon: "▶", color: "magenta" },
65
65
  recovering: { icon: "⟳", color: "cyan" },
66
66
  completed: { icon: "✓", color: "green" },
67
- game_over: { icon: "", color: "green" },
67
+ game_over: { icon: "", color: "cyan" },
68
68
  finished: { icon: "✓", color: "green" },
69
69
  error: { icon: "✗", color: "red" },
70
70
  };
@@ -67,6 +67,7 @@ export declare const ChallengeSchema: z.ZodObject<{
67
67
  tickRate: z.ZodOptional<z.ZodNumber>;
68
68
  startLives: z.ZodOptional<z.ZodNumber>;
69
69
  watcherCommands: z.ZodOptional<z.ZodString>;
70
+ hideRoleAssignments: z.ZodOptional<z.ZodBoolean>;
70
71
  }, z.core.$strip>;
71
72
  description: z.ZodOptional<z.ZodString>;
72
73
  task: z.ZodOptional<z.ZodString>;
@@ -114,6 +115,7 @@ export declare const ChallengeConfigSchema: z.ZodObject<{
114
115
  tickRate: z.ZodOptional<z.ZodNumber>;
115
116
  startLives: z.ZodOptional<z.ZodNumber>;
116
117
  watcherCommands: z.ZodOptional<z.ZodString>;
118
+ hideRoleAssignments: z.ZodOptional<z.ZodBoolean>;
117
119
  }, z.core.$strip>;
118
120
  task: z.ZodOptional<z.ZodString>;
119
121
  scoreLabel: z.ZodOptional<z.ZodString>;
@@ -164,6 +166,7 @@ export declare const ChallengesResponseSchema: z.ZodObject<{
164
166
  tickRate: z.ZodOptional<z.ZodNumber>;
165
167
  startLives: z.ZodOptional<z.ZodNumber>;
166
168
  watcherCommands: z.ZodOptional<z.ZodString>;
169
+ hideRoleAssignments: z.ZodOptional<z.ZodBoolean>;
167
170
  }, z.core.$strip>;
168
171
  description: z.ZodOptional<z.ZodString>;
169
172
  task: z.ZodOptional<z.ZodString>;
@@ -34,6 +34,7 @@ export const ChallengeSchema = z.object({
34
34
  tickRate: z.number().min(5).max(25).optional(),
35
35
  startLives: z.number().optional(),
36
36
  watcherCommands: z.string().optional(),
37
+ hideRoleAssignments: z.boolean().optional(),
37
38
  }),
38
39
  description: z.string().optional(),
39
40
  task: z.string().optional(),
@@ -93,6 +93,10 @@ export declare function loadTypescriptExport(filePath: string, exportName: strin
93
93
  * @param url The URL to open.
94
94
  */
95
95
  export declare function openInBrowser(url: string): void;
96
+ /** Run statuses that indicate a run is fully complete and data is written. */
97
+ export declare const TERMINAL_STATUSES: string[];
98
+ /** Sleep for the given number of milliseconds. */
99
+ export declare function sleep(ms: number): Promise<void>;
96
100
  /**
97
101
  * Format a duration in milliseconds to a human-readable string.
98
102
  * @param ms Duration in milliseconds
package/dist/lib/utils.js CHANGED
@@ -209,6 +209,12 @@ export function openInBrowser(url) {
209
209
  }
210
210
  exec(command);
211
211
  }
212
+ /** Run statuses that indicate a run is fully complete and data is written. */
213
+ export const TERMINAL_STATUSES = ["finished", "error", "completed", "cancelled", "timeout", "failed"];
214
+ /** Sleep for the given number of milliseconds. */
215
+ export function sleep(ms) {
216
+ return new Promise((resolve) => setTimeout(resolve, ms));
217
+ }
212
218
  /**
213
219
  * Format a duration in milliseconds to a human-readable string.
214
220
  * @param ms Duration in milliseconds
@@ -243,9 +249,10 @@ export function formatTime(isoString) {
243
249
  export function getRunStatusDisplay(status) {
244
250
  switch (status) {
245
251
  case "finished":
246
- case "game_over":
247
252
  case "completed":
248
253
  return pc.green(status);
254
+ case "game_over":
255
+ return pc.cyan(status);
249
256
  case "started":
250
257
  return pc.blue(status);
251
258
  case "initializing":
@@ -1046,6 +1046,12 @@
1046
1046
  "allowNo": false,
1047
1047
  "type": "boolean"
1048
1048
  },
1049
+ "hide-participant-names": {
1050
+ "description": "Replace participant names with Player1, Player2, etc. to prevent models inferring opponents from their names",
1051
+ "name": "hide-participant-names",
1052
+ "allowNo": false,
1053
+ "type": "boolean"
1054
+ },
1049
1055
  "api-key": {
1050
1056
  "description": "Kradle API key",
1051
1057
  "env": "KRADLE_API_KEY",
@@ -1543,5 +1549,5 @@
1543
1549
  ]
1544
1550
  }
1545
1551
  },
1546
- "version": "0.6.14"
1552
+ "version": "0.7.0"
1547
1553
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kradle",
3
- "version": "0.6.14",
3
+ "version": "0.7.0",
4
4
  "description": "Kradle's CLI. Manage challenges, experiments, agents and more!",
5
5
  "keywords": [
6
6
  "cli"