@kradle/cli 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,275 @@
1
+ import { exec } from "node:child_process";
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { executeNodeCommand } from "../utils.js";
5
+ import { Runner } from "./runner.js";
6
+ import { TUI } from "./tui.js";
7
+ import { EvaluationMetadataSchema, ManifestSchema, ProgressSchema } from "./types.js";
8
+ export class Evaluator {
9
+ name;
10
+ config;
11
+ api;
12
+ evaluationDir;
13
+ metadataPath;
14
+ runner;
15
+ tui;
16
+ currentIteration;
17
+ constructor(name, config, api) {
18
+ this.name = name;
19
+ this.config = config;
20
+ this.api = api;
21
+ this.evaluationDir = path.resolve(process.cwd(), "evaluations", name);
22
+ this.metadataPath = path.join(this.evaluationDir, ".evaluation.json");
23
+ }
24
+ /**
25
+ * Get paths for a specific iteration
26
+ */
27
+ getIterationPaths(iteration) {
28
+ const iterationDir = path.join(this.evaluationDir, "iterations", iteration.toString().padStart(3, "0"));
29
+ return {
30
+ iterationDir,
31
+ configPath: path.join(iterationDir, "config.ts"),
32
+ manifestPath: path.join(iterationDir, "manifest.json"),
33
+ progressPath: path.join(iterationDir, "progress.json"),
34
+ };
35
+ }
36
+ get configPath() {
37
+ return path.join(this.evaluationDir, "config.ts");
38
+ }
39
+ /**
40
+ * Get the current iteration directory path
41
+ */
42
+ getCurrentIterationDir() {
43
+ if (this.currentIteration === undefined) {
44
+ throw new Error("No iteration set");
45
+ }
46
+ return this.getIterationPaths(this.currentIteration).iterationDir;
47
+ }
48
+ /**
49
+ * Check if evaluation exists
50
+ */
51
+ async exists() {
52
+ try {
53
+ await fs.access(this.evaluationDir);
54
+ return true;
55
+ }
56
+ catch {
57
+ return false;
58
+ }
59
+ }
60
+ /**
61
+ * Check if config.ts exists (master config)
62
+ */
63
+ async configExists() {
64
+ try {
65
+ await fs.access(this.configPath);
66
+ return true;
67
+ }
68
+ catch {
69
+ return false;
70
+ }
71
+ }
72
+ /**
73
+ * Load evaluation metadata
74
+ */
75
+ async loadMetadata() {
76
+ try {
77
+ const content = await fs.readFile(this.metadataPath, "utf-8");
78
+ const data = JSON.parse(content);
79
+ return EvaluationMetadataSchema.parse(data);
80
+ }
81
+ catch {
82
+ return null;
83
+ }
84
+ }
85
+ /**
86
+ * Save evaluation metadata
87
+ */
88
+ async saveMetadata(metadata) {
89
+ await fs.writeFile(this.metadataPath, JSON.stringify(metadata, null, 2));
90
+ }
91
+ /**
92
+ * Get the current iteration number, or -1 if none exists
93
+ */
94
+ async getCurrentIterationNumber() {
95
+ const metadata = await this.loadMetadata();
96
+ return metadata?.currentIteration ?? -1;
97
+ }
98
+ /**
99
+ * Create a new iteration
100
+ */
101
+ async createNewIteration() {
102
+ const currentIteration = await this.getCurrentIterationNumber();
103
+ const newIteration = currentIteration + 1;
104
+ const paths = this.getIterationPaths(newIteration);
105
+ // Create iteration directory
106
+ await fs.mkdir(paths.iterationDir, { recursive: true });
107
+ // Copy master config to iteration
108
+ const masterConfigPath = path.join(this.evaluationDir, "config.ts");
109
+ await fs.copyFile(masterConfigPath, paths.configPath);
110
+ // Generate manifest from config
111
+ const manifest = await this.generateManifest(paths.configPath);
112
+ await fs.writeFile(paths.manifestPath, JSON.stringify(manifest, null, 2));
113
+ // Update metadata
114
+ await this.saveMetadata({ currentIteration: newIteration });
115
+ this.currentIteration = newIteration;
116
+ return newIteration;
117
+ }
118
+ /**
119
+ * Get or create an iteration
120
+ * @param createNew - If true, always create a new iteration. Otherwise, use current iteration or create first one if none exists.
121
+ */
122
+ async getOrCreateIteration(createNew) {
123
+ if (createNew) {
124
+ return await this.createNewIteration();
125
+ }
126
+ const currentIteration = await this.getCurrentIterationNumber();
127
+ if (currentIteration < 0) {
128
+ return await this.createNewIteration();
129
+ }
130
+ this.currentIteration = currentIteration;
131
+ return currentIteration;
132
+ }
133
+ /**
134
+ * Load manifest from iteration
135
+ */
136
+ async loadManifest(iteration) {
137
+ const paths = this.getIterationPaths(iteration);
138
+ const content = await fs.readFile(paths.manifestPath, "utf-8");
139
+ const data = JSON.parse(content);
140
+ return ManifestSchema.parse(data);
141
+ }
142
+ /**
143
+ * Load progress from iteration
144
+ */
145
+ async loadProgress(iteration) {
146
+ try {
147
+ const paths = this.getIterationPaths(iteration);
148
+ const content = await fs.readFile(paths.progressPath, "utf-8");
149
+ const data = JSON.parse(content);
150
+ return ProgressSchema.parse(data);
151
+ }
152
+ catch {
153
+ return null;
154
+ }
155
+ }
156
+ /**
157
+ * Save progress to current iteration
158
+ */
159
+ async saveProgress() {
160
+ if (!this.runner || this.currentIteration === undefined)
161
+ return;
162
+ const paths = this.getIterationPaths(this.currentIteration);
163
+ const progress = {
164
+ entries: this.runner.getProgressEntries(),
165
+ lastUpdated: Date.now(),
166
+ };
167
+ await fs.writeFile(paths.progressPath, JSON.stringify(progress, null, 2));
168
+ }
169
+ /**
170
+ * Execute config.ts to generate manifest
171
+ */
172
+ async generateManifest(configPath) {
173
+ const manifest = await this.executeConfigFile(configPath);
174
+ return ManifestSchema.parse(manifest);
175
+ }
176
+ /**
177
+ * Execute config.ts file and return the manifest
178
+ */
179
+ async executeConfigFile(configPath) {
180
+ // We spawn a new NodeJS process to execute & log the config file.
181
+ // We can't directly import the file because it would be cached, and import cache can't be invalidated.
182
+ const stdout = await executeNodeCommand([
183
+ "--experimental-transform-types",
184
+ "--no-warnings",
185
+ "-e",
186
+ `console.log(JSON.stringify(require("${configPath}").main()));`,
187
+ ], this.config);
188
+ return JSON.parse(stdout.trim());
189
+ }
190
+ /**
191
+ * Run the evaluation
192
+ */
193
+ async run(options) {
194
+ const iteration = await this.getOrCreateIteration(options.new);
195
+ // Load manifest
196
+ const manifest = await this.loadManifest(iteration);
197
+ // We have 2 mandatory tags: "eval-<evaluation-name>" and "eval-<evaluation-name>-iteration-<iteration>"
198
+ const tags = [`eval-${this.name}`, `eval-${this.name}-iteration-${iteration}`, ...(manifest.tags ?? [])];
199
+ // Create runner
200
+ this.runner = new Runner(manifest.runs, this.api, this.config.WEB_URL, {
201
+ maxConcurrent: options.maxConcurrent,
202
+ tags: tags,
203
+ onStateChange: () => this.onRunStateChange(),
204
+ });
205
+ // Restore progress if applicable
206
+ const progress = await this.loadProgress(iteration);
207
+ if (progress) {
208
+ this.runner.restoreProgress(progress.entries);
209
+ }
210
+ // Create TUI
211
+ this.tui = new TUI({
212
+ evaluationName: `${this.name} (iteration ${iteration})`,
213
+ onQuit: () => this.handleQuit(),
214
+ onOpenRun: (index) => this.openRun(index),
215
+ });
216
+ // Initial state update
217
+ this.tui.updateStates(this.runner.getAllStates());
218
+ this.tui.updateStatusCounts(this.runner.getStatusCounts());
219
+ // Start TUI
220
+ this.tui.start();
221
+ try {
222
+ // Execute runs
223
+ await this.runner.execute();
224
+ // Final save
225
+ await this.saveProgress();
226
+ }
227
+ finally {
228
+ this.tui.stop();
229
+ console.log("");
230
+ }
231
+ const errors = this.runner?.getAllStates().filter((state) => state.status === "error");
232
+ if (errors?.length > 0) {
233
+ throw new Error(`${errors.map((error) => error.error).join("\n\n")}`);
234
+ }
235
+ }
236
+ /**
237
+ * Handle state change from runner
238
+ */
239
+ onRunStateChange() {
240
+ if (this.tui && this.runner) {
241
+ this.tui.updateStates(this.runner.getAllStates());
242
+ this.tui.updateStatusCounts(this.runner.getStatusCounts());
243
+ }
244
+ // Periodically save progress
245
+ this.saveProgress().catch(() => { });
246
+ }
247
+ /**
248
+ * Handle quit request
249
+ */
250
+ handleQuit() {
251
+ this.runner?.stop();
252
+ this.tui?.stop();
253
+ process.exit(0);
254
+ }
255
+ /**
256
+ * Open run in browser
257
+ */
258
+ openRun(index) {
259
+ const url = this.runner?.getRunUrl(index);
260
+ if (url) {
261
+ const platform = process.platform;
262
+ let command;
263
+ if (platform === "darwin") {
264
+ command = `open "${url}"`;
265
+ }
266
+ else if (platform === "win32") {
267
+ command = `start "${url}"`;
268
+ }
269
+ else {
270
+ command = `xdg-open "${url}"`;
271
+ }
272
+ exec(command);
273
+ }
274
+ }
275
+ }
@@ -0,0 +1,4 @@
1
+ export * from "./types.js";
2
+ export { Evaluator } from "./evaluator.js";
3
+ export { Runner } from "./runner.js";
4
+ export { TUI } from "./tui.js";
@@ -0,0 +1,4 @@
1
+ export * from "./types.js";
2
+ export { Evaluator } from "./evaluator.js";
3
+ export { Runner } from "./runner.js";
4
+ export { TUI } from "./tui.js";
@@ -0,0 +1,80 @@
1
+ import type { ApiClient } from "../api-client.js";
2
+ import type { ProgressEntry, RunConfig, RunState, StatusCounts } from "./types.js";
3
+ export declare class Runner {
4
+ private runs;
5
+ private api;
6
+ private baseUrl;
7
+ private states;
8
+ private activeRuns;
9
+ private completedRuns;
10
+ private nextIndex;
11
+ private stopped;
12
+ private maxConcurrent;
13
+ private tags;
14
+ private onStateChange?;
15
+ constructor(runs: RunConfig[], api: ApiClient, baseUrl: string, options?: {
16
+ maxConcurrent?: number;
17
+ tags?: string[];
18
+ onStateChange?: (index: number, state: RunState) => void;
19
+ });
20
+ /**
21
+ * Restore progress from a previous run
22
+ */
23
+ restoreProgress(entries: ProgressEntry[]): void;
24
+ /**
25
+ * Resume polling for runs that were in-flight before a crash/restart
26
+ */
27
+ private resumeInFlightRuns;
28
+ /**
29
+ * Get current run state by index
30
+ */
31
+ getRunState(index: number): RunState | undefined;
32
+ /**
33
+ * Get all run states
34
+ */
35
+ getAllStates(): RunState[];
36
+ /**
37
+ * Get status counts for TUI footer
38
+ */
39
+ getStatusCounts(): StatusCounts;
40
+ /**
41
+ * Update state and notify listener
42
+ */
43
+ private updateState;
44
+ /**
45
+ * Check if all runs are complete
46
+ */
47
+ isComplete(): boolean;
48
+ /**
49
+ * Stop scheduling new runs
50
+ */
51
+ stop(): void;
52
+ /**
53
+ * Start a single run
54
+ */
55
+ private startRun;
56
+ /**
57
+ * Poll run status until complete
58
+ */
59
+ private pollRunStatus;
60
+ /**
61
+ * Normalize API status to RunStatus
62
+ */
63
+ private normalizeStatus;
64
+ /**
65
+ * Main execution loop
66
+ */
67
+ execute(): Promise<void>;
68
+ /**
69
+ * Helper for delays
70
+ */
71
+ private delay;
72
+ /**
73
+ * Get progress entries for saving
74
+ */
75
+ getProgressEntries(): ProgressEntry[];
76
+ /**
77
+ * Get URL for a run
78
+ */
79
+ getRunUrl(index: number): string | undefined;
80
+ }
@@ -0,0 +1,280 @@
1
+ const DEFAULT_MAX_CONCURRENT = 5;
2
+ const RATE_LIMIT_BACKOFF_MS = 5000;
3
+ const STATUS_POLL_INTERVAL_MS = 2000;
4
+ export class Runner {
5
+ runs;
6
+ api;
7
+ baseUrl;
8
+ states = [];
9
+ activeRuns = new Set();
10
+ completedRuns = new Set();
11
+ nextIndex = 0;
12
+ stopped = false;
13
+ maxConcurrent;
14
+ tags;
15
+ onStateChange;
16
+ constructor(runs, api, baseUrl, options = {}) {
17
+ this.runs = runs;
18
+ this.api = api;
19
+ this.baseUrl = baseUrl;
20
+ this.maxConcurrent = options.maxConcurrent ?? DEFAULT_MAX_CONCURRENT;
21
+ this.tags = options.tags ?? [];
22
+ // Validate tags respect regex
23
+ for (const tag of this.tags) {
24
+ if (!/^[a-zA-Z0-9()][a-zA-Z0-9()-]{1,34}[a-zA-Z0-9()]$/.test(tag)) {
25
+ throw new Error(`Invalid tag: ${tag}. Tags must start and end with a letter or number, and can only contain letters, numbers, hyphens, underscores, and parentheses.`);
26
+ }
27
+ }
28
+ this.onStateChange = options.onStateChange;
29
+ // Initialize all run states as queued
30
+ this.states = runs.map((config, index) => ({
31
+ index,
32
+ config,
33
+ status: "queued",
34
+ }));
35
+ }
36
+ /**
37
+ * Restore progress from a previous run
38
+ */
39
+ restoreProgress(entries) {
40
+ for (const entry of entries) {
41
+ if (entry.index < this.states.length) {
42
+ const state = this.states[entry.index];
43
+ state.status = entry.status;
44
+ state.runId = entry.runId;
45
+ state.startTime = entry.startTime;
46
+ if (entry.status === "completed" || entry.status === "finished" || entry.status === "game_over") {
47
+ this.completedRuns.add(entry.index);
48
+ }
49
+ else if (entry.status === "error") {
50
+ this.completedRuns.add(entry.index);
51
+ state.error = entry.error;
52
+ }
53
+ }
54
+ }
55
+ // Find next index to process (first queued, non-completed)
56
+ const nextQueued = this.states.findIndex((state) => !this.completedRuns.has(state.index) && state.status === "queued");
57
+ this.nextIndex = nextQueued === -1 ? this.runs.length : nextQueued;
58
+ }
59
+ /**
60
+ * Resume polling for runs that were in-flight before a crash/restart
61
+ */
62
+ resumeInFlightRuns() {
63
+ for (const state of this.states) {
64
+ if (this.completedRuns.has(state.index))
65
+ continue;
66
+ if (state.status === "error")
67
+ continue;
68
+ if (!state.runId)
69
+ continue;
70
+ if (state.status === "queued")
71
+ continue;
72
+ if (this.activeRuns.has(state.index))
73
+ continue;
74
+ this.activeRuns.add(state.index);
75
+ // Resume polling the existing run without creating a new one
76
+ this.pollRunStatus(state.index, state.runId).catch(() => { });
77
+ }
78
+ }
79
+ /**
80
+ * Get current run state by index
81
+ */
82
+ getRunState(index) {
83
+ return this.states[index];
84
+ }
85
+ /**
86
+ * Get all run states
87
+ */
88
+ getAllStates() {
89
+ return [...this.states];
90
+ }
91
+ /**
92
+ * Get status counts for TUI footer
93
+ */
94
+ getStatusCounts() {
95
+ let completed = 0;
96
+ let active = 0;
97
+ let queued = 0;
98
+ let errors = 0;
99
+ for (const state of this.states) {
100
+ if (state.status === "completed" || state.status === "finished" || state.status === "game_over") {
101
+ completed++;
102
+ }
103
+ else if (state.status === "error") {
104
+ errors++;
105
+ }
106
+ else if (state.status === "queued") {
107
+ queued++;
108
+ }
109
+ else {
110
+ active++;
111
+ }
112
+ }
113
+ return { completed, active, queued, errors };
114
+ }
115
+ /**
116
+ * Update state and notify listener
117
+ */
118
+ updateState(index, updates) {
119
+ const state = this.states[index];
120
+ Object.assign(state, updates);
121
+ this.onStateChange?.(index, state);
122
+ }
123
+ /**
124
+ * Check if all runs are complete
125
+ */
126
+ isComplete() {
127
+ return this.completedRuns.size === this.runs.length;
128
+ }
129
+ /**
130
+ * Stop scheduling new runs
131
+ */
132
+ stop() {
133
+ this.stopped = true;
134
+ }
135
+ /**
136
+ * Start a single run
137
+ */
138
+ async startRun(index) {
139
+ const state = this.states[index];
140
+ this.activeRuns.add(index);
141
+ this.updateState(index, { status: "initializing", startTime: Date.now() });
142
+ try {
143
+ // Create the run via API
144
+ const response = await this.api.runChallenge({
145
+ challenge: state.config.challenge_slug,
146
+ participants: state.config.participants,
147
+ });
148
+ if (!response.runIds || response.runIds.length === 0) {
149
+ throw new Error("No run ID returned from API");
150
+ }
151
+ const runId = response.runIds[0];
152
+ this.updateState(index, { runId, status: "running" });
153
+ // Tag the run with all configured tags
154
+ await Promise.all(this.tags.map((tag) => this.api.tagRun(runId, tag)));
155
+ // Poll for completion
156
+ await this.pollRunStatus(index, runId);
157
+ }
158
+ catch (error) {
159
+ const errorMessage = error instanceof Error ? error.message : String(error);
160
+ // Check for rate limiting
161
+ if (errorMessage.includes("429") || errorMessage.toLowerCase().includes("rate limit")) {
162
+ // Re-queue for later
163
+ this.updateState(index, { status: "queued", error: undefined });
164
+ this.activeRuns.delete(index);
165
+ await this.delay(RATE_LIMIT_BACKOFF_MS);
166
+ return;
167
+ }
168
+ this.updateState(index, { status: "error", error: errorMessage });
169
+ this.completedRuns.add(index);
170
+ this.activeRuns.delete(index);
171
+ }
172
+ }
173
+ /**
174
+ * Poll run status until complete
175
+ */
176
+ async pollRunStatus(index, runId) {
177
+ while (!this.stopped) {
178
+ try {
179
+ const status = await this.api.getRunStatus(runId);
180
+ const normalizedStatus = this.normalizeStatus(status.status);
181
+ this.updateState(index, { status: normalizedStatus });
182
+ if (normalizedStatus === "completed" || normalizedStatus === "finished" || normalizedStatus === "game_over") {
183
+ this.completedRuns.add(index);
184
+ this.activeRuns.delete(index);
185
+ return;
186
+ }
187
+ if (normalizedStatus === "error") {
188
+ this.completedRuns.add(index);
189
+ this.activeRuns.delete(index);
190
+ return;
191
+ }
192
+ await this.delay(STATUS_POLL_INTERVAL_MS);
193
+ }
194
+ catch (error) {
195
+ // Network error, continue polling
196
+ await this.delay(STATUS_POLL_INTERVAL_MS * 2);
197
+ }
198
+ }
199
+ this.activeRuns.delete(index);
200
+ }
201
+ /**
202
+ * Normalize API status to RunStatus
203
+ */
204
+ normalizeStatus(apiStatus) {
205
+ const statusMap = {
206
+ queued: "queued",
207
+ pending: "queued",
208
+ initializing: "initializing",
209
+ watcher_connected: "watcher_connected",
210
+ participants_connected: "participants_connected",
211
+ started: "started",
212
+ running: "running",
213
+ recovering: "recovering",
214
+ completed: "completed",
215
+ game_over: "game_over",
216
+ finished: "finished",
217
+ error: "error",
218
+ failed: "error",
219
+ };
220
+ return statusMap[apiStatus.toLowerCase()] ?? "running";
221
+ }
222
+ /**
223
+ * Main execution loop
224
+ */
225
+ async execute() {
226
+ // First, resume any runs that were already in-flight
227
+ this.resumeInFlightRuns();
228
+ while (!this.stopped && !this.isComplete()) {
229
+ // Start new runs if we have capacity
230
+ while (!this.stopped && this.activeRuns.size < this.maxConcurrent && this.nextIndex < this.runs.length) {
231
+ const index = this.nextIndex;
232
+ // Skip already completed runs (from restored progress)
233
+ if (this.completedRuns.has(index)) {
234
+ this.nextIndex++;
235
+ continue;
236
+ }
237
+ this.nextIndex++;
238
+ // Don't await - run concurrently
239
+ this.startRun(index);
240
+ // Wait a bit to avoid overwhelming the API
241
+ await this.delay(150);
242
+ }
243
+ // Wait a bit before checking again
244
+ await this.delay(500);
245
+ }
246
+ // Wait for active runs to complete
247
+ while (this.activeRuns.size > 0 && !this.stopped) {
248
+ await this.delay(500);
249
+ }
250
+ }
251
+ /**
252
+ * Helper for delays
253
+ */
254
+ delay(ms) {
255
+ return new Promise((resolve) => setTimeout(resolve, ms));
256
+ }
257
+ /**
258
+ * Get progress entries for saving
259
+ */
260
+ getProgressEntries() {
261
+ return this.states.map((state) => ({
262
+ index: state.index,
263
+ status: state.status,
264
+ runId: state.runId,
265
+ startTime: state.startTime,
266
+ endTime: this.completedRuns.has(state.index) ? Date.now() : undefined,
267
+ error: state.error,
268
+ }));
269
+ }
270
+ /**
271
+ * Get URL for a run
272
+ */
273
+ getRunUrl(index) {
274
+ const state = this.states[index];
275
+ if (state?.runId) {
276
+ return `${this.baseUrl}/runs/${state.runId}`;
277
+ }
278
+ return undefined;
279
+ }
280
+ }
@@ -0,0 +1,20 @@
1
+ import type { RunState, StatusCounts } from "./types.js";
2
+ export interface TUIOptions {
3
+ evaluationName: string;
4
+ onQuit: () => void;
5
+ onOpenRun: (index: number) => void;
6
+ }
7
+ export declare class TUI {
8
+ private options;
9
+ private states;
10
+ private statusCounts;
11
+ private app?;
12
+ private running;
13
+ constructor(options: TUIOptions);
14
+ start(): void;
15
+ stop(): void;
16
+ updateStates(states: RunState[]): void;
17
+ updateStatusCounts(counts: StatusCounts): void;
18
+ private rerender;
19
+ private renderApp;
20
+ }