@kradle/cli 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ import { jsx as _jsx, Fragment as _Fragment, jsxs as _jsxs } from "react/jsx-runtime";
2
+ import { Box, render, Text, useInput } from "ink";
3
+ import { useEffect, useMemo, useState } from "react";
4
+ import { STATUS_ICONS } from "./types.js";
5
+ const formatElapsed = (startTime) => {
6
+ const elapsed = Date.now() - startTime;
7
+ const seconds = Math.floor(elapsed / 1000);
8
+ const minutes = Math.floor(seconds / 60);
9
+ const hours = Math.floor(minutes / 60);
10
+ if (hours > 0)
11
+ return `${hours}h ${minutes % 60}m`;
12
+ if (minutes > 0)
13
+ return `${minutes}m ${seconds % 60}s`;
14
+ return `${seconds}s`;
15
+ };
16
+ const getVisibleRows = () => {
17
+ const terminalHeight = process.stdout.rows || 24;
18
+ return Math.max(1, terminalHeight - 6 - 1); // Header (3 lines) + footer (3 lines) + 1 line of offset
19
+ };
20
+ const getVisibleColumns = () => {
21
+ const terminalWidth = process.stdout.columns || 80;
22
+ return Math.max(1, terminalWidth - 1);
23
+ };
24
+ const RenderRunLine = ({ state, total, isSelected, padding, }) => {
25
+ const { icon, color } = STATUS_ICONS[state.status] ?? STATUS_ICONS.queued;
26
+ const indexLabel = `${String(state.index + 1).padStart(padding, " ")}/${total}`;
27
+ const statusLabel = state.status.padEnd(12);
28
+ const startTime = state.startTime ?? null;
29
+ const showElapsed = startTime !== null &&
30
+ state.status !== "completed" &&
31
+ state.status !== "finished" &&
32
+ state.status !== "game_over" &&
33
+ state.status !== "error";
34
+ const elapsedLabel = showElapsed && startTime !== null ? formatElapsed(startTime) : null;
35
+ const agents = state.config.participants.map((p) => p.agent.split(":").pop() ?? p.agent).join(", ");
36
+ const summary = `${state.config.challenge_slug} (${agents})`;
37
+ const maxSummaryLength = getVisibleColumns() - indexLabel.length - statusLabel.length - (elapsedLabel ? elapsedLabel.length : 0) - 4; // 4 for the spaces and emoji
38
+ const summaryText = summary.length > maxSummaryLength ? summary.slice(0, maxSummaryLength - 1) + "…" : summary;
39
+ return (_jsxs(Text, { inverse: isSelected, children: [_jsx(Text, { color: color, children: icon }), " ", indexLabel, " ", _jsx(Text, { color: color, children: statusLabel }), elapsedLabel ? (_jsxs(_Fragment, { children: [" ", _jsx(Text, { dimColor: true, children: elapsedLabel })] })) : null, " ", _jsx(Text, { dimColor: true, children: summaryText })] }));
40
+ };
41
+ const EvaluationUI = ({ evaluationName, states, statusCounts, onQuit, onOpenRun }) => {
42
+ const [selectedIndex, setSelectedIndex] = useState(0);
43
+ const [scrollOffset, setScrollOffset] = useState(0);
44
+ const [tick, setTick] = useState(0); // force elapsed-time updates
45
+ useEffect(() => {
46
+ if (states.length === 0) {
47
+ setSelectedIndex(0);
48
+ setScrollOffset(0);
49
+ return;
50
+ }
51
+ setSelectedIndex((current) => Math.min(current, states.length - 1));
52
+ }, [states.length]);
53
+ useEffect(() => {
54
+ const rows = getVisibleRows();
55
+ setScrollOffset((offset) => {
56
+ if (selectedIndex < offset)
57
+ return selectedIndex;
58
+ if (selectedIndex >= offset + rows)
59
+ return Math.min(selectedIndex - rows + 1, Math.max(0, states.length - rows));
60
+ return Math.min(offset, Math.max(0, states.length - rows));
61
+ });
62
+ }, [selectedIndex, states.length]);
63
+ useEffect(() => {
64
+ const interval = setInterval(() => setTick((value) => value + 1), 1000);
65
+ return () => clearInterval(interval);
66
+ }, []);
67
+ useInput((input, key) => {
68
+ if (input === "q" || (key.ctrl && input === "c")) {
69
+ onQuit();
70
+ return;
71
+ }
72
+ if (states.length === 0)
73
+ return;
74
+ if (key.upArrow || input === "k") {
75
+ setSelectedIndex((current) => Math.max(0, current - 1));
76
+ }
77
+ else if (key.downArrow || input === "j") {
78
+ setSelectedIndex((current) => Math.min(states.length - 1, current + 1));
79
+ }
80
+ else if (input === "o") {
81
+ onOpenRun(selectedIndex);
82
+ }
83
+ });
84
+ const rowsAvailable = getVisibleRows();
85
+ const visibleRuns = useMemo(() => states.slice(scrollOffset, scrollOffset + rowsAvailable), [states, scrollOffset, rowsAvailable]);
86
+ const showScroll = states.length > rowsAvailable;
87
+ const rangeStart = states.length === 0 ? 0 : scrollOffset + 1;
88
+ const rangeEnd = Math.min(scrollOffset + rowsAvailable, states.length);
89
+ const horizontalRule = "─".repeat(Math.min(process.stdout.columns || 80, 80));
90
+ const padding = states.length.toString().length;
91
+ return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { bold: true, children: `Evaluation: ${evaluationName}` }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsx(Text, { dimColor: true, children: "q:quit \u2191/\u2193/j/k:select o:open in browser" }), _jsx(Text, { children: " " }), _jsxs(Box, { flexDirection: "column", children: [visibleRuns.map((state, index) => (_jsx(RenderRunLine, { state: state, total: states.length, isSelected: scrollOffset + index === selectedIndex, padding: padding }, state.index))), visibleRuns.length < rowsAvailable
92
+ ? Array.from({ length: rowsAvailable - visibleRuns.length }).map((_, index) => (_jsx(Text, { children: " " }, `empty-${index}`)))
93
+ : null] }), showScroll ? _jsx(Text, { dimColor: true, children: `[${rangeStart}-${rangeEnd} of ${states.length}]` }) : _jsx(Text, { children: " " }), _jsx(Text, { dimColor: true, children: horizontalRule }), _jsxs(Text, { children: [_jsx(Text, { children: "Completed: " }), _jsx(Text, { color: "green", children: statusCounts.completed }), _jsx(Text, { children: ` | Active: ` }), _jsx(Text, { color: "yellow", children: statusCounts.active }), _jsx(Text, { children: ` | Queued: ` }), _jsx(Text, { dimColor: true, children: statusCounts.queued }), statusCounts.errors > 0 ? (_jsxs(_Fragment, { children: [_jsx(Text, { children: ` | Errors: ` }), _jsx(Text, { color: "red", children: statusCounts.errors })] })) : null] })] }));
94
+ };
95
+ export class TUI {
96
+ options;
97
+ states = [];
98
+ statusCounts = { completed: 0, active: 0, queued: 0, errors: 0 };
99
+ app;
100
+ running = false;
101
+ constructor(options) {
102
+ this.options = options;
103
+ }
104
+ start() {
105
+ this.running = true;
106
+ this.app = render(this.renderApp());
107
+ }
108
+ stop() {
109
+ this.running = false;
110
+ this.app?.unmount();
111
+ this.app = undefined;
112
+ }
113
+ updateStates(states) {
114
+ this.states = states;
115
+ this.rerender();
116
+ }
117
+ updateStatusCounts(counts) {
118
+ this.statusCounts = counts;
119
+ this.rerender();
120
+ }
121
+ rerender() {
122
+ if (!this.running || !this.app)
123
+ return;
124
+ this.app.rerender(this.renderApp());
125
+ }
126
+ renderApp() {
127
+ return (_jsx(EvaluationUI, { evaluationName: this.options.evaluationName, states: this.states, statusCounts: this.statusCounts, onQuit: this.options.onQuit, onOpenRun: this.options.onOpenRun }));
128
+ }
129
+ }
@@ -0,0 +1,127 @@
1
+ import { z } from "zod";
2
+ export declare const ParticipantSchema: z.ZodObject<{
3
+ agent: z.ZodString;
4
+ role: z.ZodOptional<z.ZodString>;
5
+ }, z.core.$strip>;
6
+ export type Participant = z.infer<typeof ParticipantSchema>;
7
+ export declare const RunConfigSchema: z.ZodObject<{
8
+ challenge_slug: z.ZodString;
9
+ participants: z.ZodArray<z.ZodObject<{
10
+ agent: z.ZodString;
11
+ role: z.ZodOptional<z.ZodString>;
12
+ }, z.core.$strip>>;
13
+ }, z.core.$strip>;
14
+ export type RunConfig = z.infer<typeof RunConfigSchema>;
15
+ export declare const ManifestSchema: z.ZodObject<{
16
+ runs: z.ZodArray<z.ZodObject<{
17
+ challenge_slug: z.ZodString;
18
+ participants: z.ZodArray<z.ZodObject<{
19
+ agent: z.ZodString;
20
+ role: z.ZodOptional<z.ZodString>;
21
+ }, z.core.$strip>>;
22
+ }, z.core.$strip>>;
23
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
24
+ }, z.core.$strip>;
25
+ export type Manifest = z.infer<typeof ManifestSchema>;
26
+ export type RunStatus = "queued" | "initializing" | "watcher_connected" | "participants_connected" | "started" | "running" | "recovering" | "completed" | "game_over" | "finished" | "error";
27
+ export declare const ProgressEntrySchema: z.ZodObject<{
28
+ index: z.ZodNumber;
29
+ status: z.ZodEnum<{
30
+ error: "error";
31
+ queued: "queued";
32
+ initializing: "initializing";
33
+ watcher_connected: "watcher_connected";
34
+ participants_connected: "participants_connected";
35
+ started: "started";
36
+ running: "running";
37
+ recovering: "recovering";
38
+ completed: "completed";
39
+ game_over: "game_over";
40
+ finished: "finished";
41
+ }>;
42
+ runId: z.ZodOptional<z.ZodString>;
43
+ startTime: z.ZodOptional<z.ZodNumber>;
44
+ endTime: z.ZodOptional<z.ZodNumber>;
45
+ error: z.ZodOptional<z.ZodString>;
46
+ }, z.core.$strip>;
47
+ export type ProgressEntry = z.infer<typeof ProgressEntrySchema>;
48
+ export declare const ProgressSchema: z.ZodObject<{
49
+ entries: z.ZodArray<z.ZodObject<{
50
+ index: z.ZodNumber;
51
+ status: z.ZodEnum<{
52
+ error: "error";
53
+ queued: "queued";
54
+ initializing: "initializing";
55
+ watcher_connected: "watcher_connected";
56
+ participants_connected: "participants_connected";
57
+ started: "started";
58
+ running: "running";
59
+ recovering: "recovering";
60
+ completed: "completed";
61
+ game_over: "game_over";
62
+ finished: "finished";
63
+ }>;
64
+ runId: z.ZodOptional<z.ZodString>;
65
+ startTime: z.ZodOptional<z.ZodNumber>;
66
+ endTime: z.ZodOptional<z.ZodNumber>;
67
+ error: z.ZodOptional<z.ZodString>;
68
+ }, z.core.$strip>>;
69
+ lastUpdated: z.ZodNumber;
70
+ }, z.core.$strip>;
71
+ export type Progress = z.infer<typeof ProgressSchema>;
72
+ export declare const RunResultSchema: z.ZodObject<{
73
+ index: z.ZodNumber;
74
+ runId: z.ZodString;
75
+ challenge_slug: z.ZodString;
76
+ participants: z.ZodArray<z.ZodObject<{
77
+ agent: z.ZodString;
78
+ role: z.ZodOptional<z.ZodString>;
79
+ }, z.core.$strip>>;
80
+ status: z.ZodString;
81
+ startTime: z.ZodNumber;
82
+ endTime: z.ZodNumber;
83
+ duration: z.ZodNumber;
84
+ logs: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
85
+ summary: z.ZodOptional<z.ZodString>;
86
+ error: z.ZodOptional<z.ZodString>;
87
+ outcome: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
88
+ }, z.core.$strip>;
89
+ export type RunResult = z.infer<typeof RunResultSchema>;
90
+ export interface RunState {
91
+ index: number;
92
+ config: RunConfig;
93
+ status: RunStatus;
94
+ runId?: string;
95
+ startTime?: number;
96
+ error?: string;
97
+ }
98
+ export interface StatusCounts {
99
+ completed: number;
100
+ active: number;
101
+ queued: number;
102
+ errors: number;
103
+ }
104
+ export declare const RunStatusResponseSchema: z.ZodObject<{
105
+ id: z.ZodString;
106
+ status: z.ZodString;
107
+ createdAt: z.ZodOptional<z.ZodString>;
108
+ updatedAt: z.ZodOptional<z.ZodString>;
109
+ outcome: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
110
+ }, z.core.$strip>;
111
+ export type RunStatusResponse = z.infer<typeof RunStatusResponseSchema>;
112
+ export declare const RunLogsResponseSchema: z.ZodObject<{
113
+ logs: z.ZodArray<z.ZodUnknown>;
114
+ }, z.core.$strip>;
115
+ export type RunLogsResponse = z.infer<typeof RunLogsResponseSchema>;
116
+ export declare const EvaluationMetadataSchema: z.ZodObject<{
117
+ currentIteration: z.ZodNumber;
118
+ }, z.core.$strip>;
119
+ export type EvaluationMetadata = z.infer<typeof EvaluationMetadataSchema>;
120
+ export interface EvaluationOptions {
121
+ new: boolean;
122
+ maxConcurrent: number;
123
+ }
124
+ export declare const STATUS_ICONS: Record<RunStatus, {
125
+ icon: string;
126
+ color: "white" | "yellow" | "blue" | "magenta" | "cyan" | "green" | "red";
127
+ }>;
@@ -0,0 +1,86 @@
1
+ import { z } from "zod";
2
+ // Participant in a run
3
+ export const ParticipantSchema = z.object({
4
+ agent: z.string(),
5
+ role: z.string().optional(),
6
+ });
7
+ // Single run configuration
8
+ export const RunConfigSchema = z.object({
9
+ challenge_slug: z.string(),
10
+ participants: z.array(ParticipantSchema),
11
+ });
12
+ // Manifest returned by config.ts main()
13
+ export const ManifestSchema = z.object({
14
+ runs: z.array(RunConfigSchema),
15
+ tags: z.array(z.string()).optional(),
16
+ });
17
+ // Progress entry for a single run
18
+ export const ProgressEntrySchema = z.object({
19
+ index: z.number(),
20
+ status: z.enum([
21
+ "queued",
22
+ "initializing",
23
+ "watcher_connected",
24
+ "participants_connected",
25
+ "started",
26
+ "running",
27
+ "recovering",
28
+ "completed",
29
+ "game_over",
30
+ "finished",
31
+ "error",
32
+ ]),
33
+ runId: z.string().optional(),
34
+ startTime: z.number().optional(),
35
+ endTime: z.number().optional(),
36
+ error: z.string().optional(),
37
+ });
38
+ // Progress file schema
39
+ export const ProgressSchema = z.object({
40
+ entries: z.array(ProgressEntrySchema),
41
+ lastUpdated: z.number(),
42
+ });
43
+ // Run result with logs and summary
44
+ export const RunResultSchema = z.object({
45
+ index: z.number(),
46
+ runId: z.string(),
47
+ challenge_slug: z.string(),
48
+ participants: z.array(ParticipantSchema),
49
+ status: z.string(),
50
+ startTime: z.number(),
51
+ endTime: z.number(),
52
+ duration: z.number(),
53
+ logs: z.array(z.unknown()).optional(),
54
+ summary: z.string().optional(),
55
+ error: z.string().optional(),
56
+ outcome: z.record(z.string(), z.unknown()).optional(),
57
+ });
58
+ // API response schemas for run status
59
+ export const RunStatusResponseSchema = z.object({
60
+ id: z.string(),
61
+ status: z.string(),
62
+ createdAt: z.string().optional(),
63
+ updatedAt: z.string().optional(),
64
+ outcome: z.record(z.string(), z.unknown()).optional(),
65
+ });
66
+ export const RunLogsResponseSchema = z.object({
67
+ logs: z.array(z.unknown()),
68
+ });
69
+ // Evaluation metadata stored in .evaluation.json
70
+ export const EvaluationMetadataSchema = z.object({
71
+ currentIteration: z.number(),
72
+ });
73
+ // Icons and colors for TUI
74
+ export const STATUS_ICONS = {
75
+ queued: { icon: "·", color: "white" },
76
+ initializing: { icon: "○", color: "yellow" },
77
+ watcher_connected: { icon: "◐", color: "blue" },
78
+ participants_connected: { icon: "◉", color: "blue" },
79
+ started: { icon: "▶", color: "magenta" },
80
+ running: { icon: "▶", color: "magenta" },
81
+ recovering: { icon: "⟳", color: "cyan" },
82
+ completed: { icon: "✓", color: "green" },
83
+ game_over: { icon: "✓", color: "green" },
84
+ finished: { icon: "✓", color: "green" },
85
+ error: { icon: "✗", color: "red" },
86
+ };
@@ -20,10 +20,13 @@ export declare const ChallengeSchema: z.ZodObject<{
20
20
  spectator: "spectator";
21
21
  }>;
22
22
  }, z.core.$strip>;
23
+ description: z.ZodOptional<z.ZodString>;
23
24
  task: z.ZodString;
24
25
  roles: z.ZodRecord<z.ZodString, z.ZodObject<{
25
26
  description: z.ZodString;
26
27
  specificTask: z.ZodString;
28
+ minParticipants: z.ZodOptional<z.ZodNumber>;
29
+ maxParticipants: z.ZodOptional<z.ZodNumber>;
27
30
  }, z.core.$strip>>;
28
31
  objective: z.ZodObject<{
29
32
  fieldName: z.ZodString;
@@ -58,10 +61,13 @@ export declare const ChallengesResponseSchema: z.ZodObject<{
58
61
  spectator: "spectator";
59
62
  }>;
60
63
  }, z.core.$strip>;
64
+ description: z.ZodOptional<z.ZodString>;
61
65
  task: z.ZodString;
62
66
  roles: z.ZodRecord<z.ZodString, z.ZodObject<{
63
67
  description: z.ZodString;
64
68
  specificTask: z.ZodString;
69
+ minParticipants: z.ZodOptional<z.ZodNumber>;
70
+ maxParticipants: z.ZodOptional<z.ZodNumber>;
65
71
  }, z.core.$strip>>;
66
72
  objective: z.ZodObject<{
67
73
  fieldName: z.ZodString;
@@ -82,6 +88,13 @@ export declare const HumanSchema: z.ZodObject<{
82
88
  export declare const RunResponseSchema: z.ZodObject<{
83
89
  runIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
84
90
  }, z.core.$strip>;
91
+ export declare const RunStatusSchema: z.ZodObject<{
92
+ id: z.ZodString;
93
+ status: z.ZodString;
94
+ createdAt: z.ZodOptional<z.ZodString>;
95
+ updatedAt: z.ZodOptional<z.ZodString>;
96
+ outcome: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
97
+ }, z.core.$strip>;
85
98
  export declare const UploadUrlResponseSchema: z.ZodObject<{
86
99
  uploadUrl: z.ZodString;
87
100
  expiresAt: z.ZodString;
@@ -123,5 +136,6 @@ export type ChallengeSchemaType = z.infer<typeof ChallengeSchema>;
123
136
  export type ChallengesResponseType = z.infer<typeof ChallengesResponseSchema>;
124
137
  export type HumanSchemaType = z.infer<typeof HumanSchema>;
125
138
  export type RunResponseType = z.infer<typeof RunResponseSchema>;
139
+ export type RunStatusSchemaType = z.infer<typeof RunStatusSchema>;
126
140
  export type AgentSchemaType = z.infer<typeof AgentSchema>;
127
141
  export type AgentsResponseType = z.infer<typeof AgentsResponseSchema>;
@@ -11,10 +11,13 @@ export const ChallengeSchema = z.object({
11
11
  datapack: z.boolean(),
12
12
  gameMode: z.enum(["survival", "creative", "adventure", "spectator"]),
13
13
  }),
14
+ description: z.string().optional(),
14
15
  task: z.string(),
15
16
  roles: z.record(z.string(), z.object({
16
17
  description: z.string(),
17
18
  specificTask: z.string(),
19
+ minParticipants: z.number().optional(),
20
+ maxParticipants: z.number().optional(),
18
21
  })),
19
22
  objective: z.object({
20
23
  fieldName: z.string(),
@@ -34,6 +37,13 @@ export const HumanSchema = z.object({
34
37
  export const RunResponseSchema = z.object({
35
38
  runIds: z.array(z.string()).optional(),
36
39
  });
40
+ export const RunStatusSchema = z.object({
41
+ id: z.string(),
42
+ status: z.string(),
43
+ createdAt: z.string().optional(),
44
+ updatedAt: z.string().optional(),
45
+ outcome: z.record(z.string(), z.unknown()).optional(),
46
+ });
37
47
  export const UploadUrlResponseSchema = z.object({
38
48
  uploadUrl: z.string(),
39
49
  expiresAt: z.string(),
@@ -304,7 +304,110 @@
304
304
  "challenge",
305
305
  "watch.js"
306
306
  ]
307
+ },
308
+ "evaluation:init": {
309
+ "aliases": [],
310
+ "args": {
311
+ "name": {
312
+ "description": "Name of the evaluation",
313
+ "name": "name",
314
+ "required": true
315
+ }
316
+ },
317
+ "description": "Initialize a new evaluation",
318
+ "examples": [
319
+ "<%= config.bin %> <%= command.id %> my-evaluation"
320
+ ],
321
+ "flags": {},
322
+ "hasDynamicHelp": false,
323
+ "hiddenAliases": [],
324
+ "id": "evaluation:init",
325
+ "pluginAlias": "@kradle/cli",
326
+ "pluginName": "@kradle/cli",
327
+ "pluginType": "core",
328
+ "strict": true,
329
+ "enableJsonFlag": false,
330
+ "isESM": true,
331
+ "relativePath": [
332
+ "dist",
333
+ "commands",
334
+ "evaluation",
335
+ "init.js"
336
+ ]
337
+ },
338
+ "evaluation:list": {
339
+ "aliases": [],
340
+ "args": {},
341
+ "description": "List all evaluations",
342
+ "examples": [
343
+ "<%= config.bin %> <%= command.id %>"
344
+ ],
345
+ "flags": {},
346
+ "hasDynamicHelp": false,
347
+ "hiddenAliases": [],
348
+ "id": "evaluation:list",
349
+ "pluginAlias": "@kradle/cli",
350
+ "pluginName": "@kradle/cli",
351
+ "pluginType": "core",
352
+ "strict": true,
353
+ "enableJsonFlag": false,
354
+ "isESM": true,
355
+ "relativePath": [
356
+ "dist",
357
+ "commands",
358
+ "evaluation",
359
+ "list.js"
360
+ ]
361
+ },
362
+ "evaluation:run": {
363
+ "aliases": [],
364
+ "args": {
365
+ "name": {
366
+ "description": "Name of the evaluation to run",
367
+ "name": "name",
368
+ "required": true
369
+ }
370
+ },
371
+ "description": "Run an evaluation. If the evaluation had an ongoing iteration, it will resume from the last state.",
372
+ "examples": [
373
+ "<%= config.bin %> <%= command.id %> my-evaluation",
374
+ "<%= config.bin %> <%= command.id %> my-evaluation --new",
375
+ "<%= config.bin %> <%= command.id %> my-evaluation --max-concurrent 10"
376
+ ],
377
+ "flags": {
378
+ "new": {
379
+ "char": "n",
380
+ "description": "Start a new iteration of the evaluation",
381
+ "name": "new",
382
+ "allowNo": false,
383
+ "type": "boolean"
384
+ },
385
+ "max-concurrent": {
386
+ "char": "m",
387
+ "description": "Maximum concurrent runs",
388
+ "name": "max-concurrent",
389
+ "default": 5,
390
+ "hasDynamicHelp": false,
391
+ "multiple": false,
392
+ "type": "option"
393
+ }
394
+ },
395
+ "hasDynamicHelp": false,
396
+ "hiddenAliases": [],
397
+ "id": "evaluation:run",
398
+ "pluginAlias": "@kradle/cli",
399
+ "pluginName": "@kradle/cli",
400
+ "pluginType": "core",
401
+ "strict": true,
402
+ "enableJsonFlag": false,
403
+ "isESM": true,
404
+ "relativePath": [
405
+ "dist",
406
+ "commands",
407
+ "evaluation",
408
+ "run.js"
409
+ ]
307
410
  }
308
411
  },
309
- "version": "0.0.4"
412
+ "version": "0.0.5"
310
413
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kradle/cli",
3
- "version": "0.0.4",
3
+ "version": "0.0.5",
4
4
  "description": "Kradle's CLI. Manage challenges, evaluations, agents and more!",
5
5
  "keywords": [
6
6
  "cli"
@@ -38,8 +38,10 @@
38
38
  "chokidar": "^4.0.3",
39
39
  "dotenv": "^17.2.3",
40
40
  "enquirer": "^2.4.1",
41
+ "ink": "^4.4.1",
41
42
  "listr2": "^9.0.5",
42
43
  "picocolors": "^1.1.1",
44
+ "react": "^18.2.0",
43
45
  "tar": "^7.5.2",
44
46
  "zod": "^4.1.12"
45
47
  },
@@ -48,6 +50,8 @@
48
50
  "@oclif/test": "^4",
49
51
  "@types/chai": "^4",
50
52
  "@types/node": "^18",
53
+ "@types/react": "^19.2.7",
54
+ "@types/react-dom": "^19.2.3",
51
55
  "@types/tar": "^6.1.13",
52
56
  "chai": "^4",
53
57
  "oclif": "^4",
@@ -72,6 +76,9 @@
72
76
  },
73
77
  "agent": {
74
78
  "description": "Manage agents"
79
+ },
80
+ "evaluation": {
81
+ "description": "Manage and run evaluations"
75
82
  }
76
83
  }
77
84
  }
@@ -0,0 +1,69 @@
1
+ export function main(): Manifest {
2
+ const CHALLENGE_SLUG = "[INSERT CHALLENGE SLUG HERE]";
3
+
4
+ const AGENTS: string[] = [
5
+ "team-kradle:claude-sonnet-4",
6
+ "team-kradle:qwen3-coder",
7
+ "team-kradle:deepseek-chat-v3-1",
8
+ "team-kradle:grok-4",
9
+ "team-kradle:grok-code-fast-1",
10
+ "team-kradle:gpt-5",
11
+ "team-kradle:kimi-k2",
12
+ "team-kradle:gemini-2-5-flash",
13
+ "team-kradle:gemini-2-5-pro",
14
+ "team-kradle:glm-4-5-air",
15
+ "team-kradle:gpt-5-mini",
16
+ "team-kradle:o3-mini",
17
+ "team-kradle:codestral-2508",
18
+ ];
19
+
20
+ const NUM_RUNS = 200;
21
+ const NUM_AGENTS_PER_RUN = 4;
22
+
23
+ const ADDITIONAL_TAGS: string[] = [];
24
+
25
+ const runs: RunConfig[] = [];
26
+
27
+ for (let i = 0; i < NUM_RUNS; i++) {
28
+ const selectedAgents = sampleWithoutReplacement(AGENTS, NUM_AGENTS_PER_RUN);
29
+
30
+ runs.push({
31
+ challenge_slug: CHALLENGE_SLUG,
32
+ participants: selectedAgents.map((agent) => ({ agent })),
33
+ });
34
+ }
35
+
36
+ return { runs, tags: ADDITIONAL_TAGS };
37
+ }
38
+
39
+ function sampleWithoutReplacement<T>(arr: T[], count: number): T[] {
40
+ if (count > arr.length) {
41
+ throw new Error("Sample size cannot be larger than array length.");
42
+ }
43
+
44
+ const copy = [...arr];
45
+ const result: T[] = [];
46
+
47
+ for (let i = 0; i < count; i++) {
48
+ const idx = Math.floor(Math.random() * copy.length);
49
+ result.push(copy[idx]);
50
+ copy.splice(idx, 1);
51
+ }
52
+
53
+ return result;
54
+ }
55
+
56
+ type Participant = {
57
+ agent: string;
58
+ role?: string;
59
+ };
60
+
61
+ type RunConfig = {
62
+ challenge_slug: string;
63
+ participants: Participant[];
64
+ };
65
+
66
+ type Manifest = {
67
+ runs: RunConfig[];
68
+ tags?: string[];
69
+ };
@@ -2,5 +2,4 @@ WEB_API_URL=https://dev-api.kradle.ai/v0 #https://api.kradle.ai/v0
2
2
  WEB_URL=https://dev.kradle.ai #https:/.kradle.ai/workbench
3
3
  STUDIO_API_URL=http://localhost:2999/api/v0
4
4
  STUDIO_URL=kradle-dev://open #kradle://://open
5
- GCS_BUCKET=mckradle-3c267.firebasestorage.app #kradle-prod-storage
6
5
  KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges
@@ -2,5 +2,4 @@ WEB_API_URL=https://api.kradle.ai/v0 #https://dev-api.kradle.ai/v0
2
2
  WEB_URL=https://kradle.ai #https://dev.kradle.ai
3
3
  STUDIO_API_URL=http://localhost:2999/api/v0
4
4
  STUDIO_URL=kradle://open #kradle-dev://://open
5
- GCS_BUCKET=kradle-prod-storage #mckradle-3c267.firebasestorage.app
6
5
  KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges