@kradle/cli 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -1
- package/dist/commands/challenge/create.js +2 -2
- package/dist/commands/challenge/watch.js +2 -1
- package/dist/commands/evaluation/init.d.ts +9 -0
- package/dist/commands/evaluation/init.js +58 -0
- package/dist/commands/evaluation/list.d.ts +7 -0
- package/dist/commands/evaluation/list.js +55 -0
- package/dist/commands/evaluation/run.d.ts +13 -0
- package/dist/commands/evaluation/run.js +60 -0
- package/dist/lib/api-client.d.ts +14 -1
- package/dist/lib/api-client.js +31 -5
- package/dist/lib/challenge.js +5 -0
- package/dist/lib/config.d.ts +0 -1
- package/dist/lib/config.js +0 -2
- package/dist/lib/evaluation/evaluator.d.ts +88 -0
- package/dist/lib/evaluation/evaluator.js +275 -0
- package/dist/lib/evaluation/index.d.ts +4 -0
- package/dist/lib/evaluation/index.js +4 -0
- package/dist/lib/evaluation/runner.d.ts +80 -0
- package/dist/lib/evaluation/runner.js +280 -0
- package/dist/lib/evaluation/tui.d.ts +20 -0
- package/dist/lib/evaluation/tui.js +129 -0
- package/dist/lib/evaluation/types.d.ts +127 -0
- package/dist/lib/evaluation/types.js +86 -0
- package/dist/lib/schemas.d.ts +14 -0
- package/dist/lib/schemas.js +10 -0
- package/oclif.manifest.json +104 -1
- package/package.json +8 -1
- package/static/evaluation_template.ts +69 -0
- package/static/project_template/dev.env +0 -1
- package/static/project_template/prod.env +0 -1
package/README.md
CHANGED
|
@@ -45,7 +45,6 @@ WEB_URL=https://kradle.ai
|
|
|
45
45
|
STUDIO_API_URL=http://localhost:8080
|
|
46
46
|
STUDIO_URL=kradle-studio://
|
|
47
47
|
KRADLE_API_KEY=your-api-key
|
|
48
|
-
GCS_BUCKET=your-gcs-bucket
|
|
49
48
|
KRADLE_CHALLENGES_PATH=~/Documents/kradle-studio/challenges
|
|
50
49
|
```
|
|
51
50
|
|
|
@@ -126,6 +125,29 @@ kradle challenge multi-upload
|
|
|
126
125
|
|
|
127
126
|
Provides an interactive UI to select multiple challenges and uploads them in parallel.
|
|
128
127
|
|
|
128
|
+
### Evaluations (beta)
|
|
129
|
+
|
|
130
|
+
Plan and execute batches of runs across challenges/agents, with resumable iterations and a TUI.
|
|
131
|
+
|
|
132
|
+
- **Init**: scaffold an evaluation config `evaluations/<name>/config.ts`
|
|
133
|
+
```bash
|
|
134
|
+
kradle evaluation init <name>
|
|
135
|
+
```
|
|
136
|
+
- **List**: list local evaluations
|
|
137
|
+
```bash
|
|
138
|
+
kradle evaluation list
|
|
139
|
+
```
|
|
140
|
+
- **Run**: execute or resume an evaluation (iterations stored under `evaluations/<name>/iterations/`)
|
|
141
|
+
```bash
|
|
142
|
+
kradle evaluation run <name> [--new] [--max-concurrent N]
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Features:
|
|
146
|
+
- Iterations: `--new` starts a new iteration; otherwise resumes the latest.
|
|
147
|
+
- Resumable state: progress is persisted per iteration; in-flight runs are re-polled on resume, completed runs stay completed.
|
|
148
|
+
- Ink TUI: live status counts, elapsed times, scrollable run list; keys `q/Ctrl+C` quit, `↑/↓/j/k` move, `o` open run URL.
|
|
149
|
+
- Per-iteration manifest: generated from the evaluation `config.ts` into `manifest.json` before runs start.
|
|
150
|
+
|
|
129
151
|
## Development
|
|
130
152
|
|
|
131
153
|
### Setup
|
|
@@ -50,9 +50,9 @@ export default class Create extends Command {
|
|
|
50
50
|
task: async (_, task) => {
|
|
51
51
|
const challengeData = await api.getChallenge(args.challenge);
|
|
52
52
|
// Remove fields that shouldn't be in the config file
|
|
53
|
-
const { id, creationTime, updateTime, creator,
|
|
53
|
+
const { id, creationTime, updateTime, creator, ...cleanChallenge } = challengeData;
|
|
54
54
|
// We remove the username prefix from the slug, to make the challenge easy to share with others
|
|
55
|
-
|
|
55
|
+
cleanChallenge.slug = cleanChallenge.slug.split(":")[1];
|
|
56
56
|
// Remove quotes from keys
|
|
57
57
|
const configStr = JSON.stringify(cleanChallenge, null, 2).replace(/"([a-zA-Z0-9_]+)":/g, "$1:");
|
|
58
58
|
await fs.writeFile(challenge.configPath, `
|
|
@@ -90,7 +90,8 @@ export default class Watch extends Command {
|
|
|
90
90
|
this.log(pc.blue(`\nStarting watch mode for ${pc.bold(challenge.shortSlug)}\n`));
|
|
91
91
|
this.log(pc.dim("Watching for changes... (Ctrl+C to stop)\n"));
|
|
92
92
|
const watcher = chokidar.watch([challenge.challengeDir], {
|
|
93
|
-
|
|
93
|
+
// ⚠️ WE IGNORE THE DATAPACK FOLDER FOR NOW, BUT IT'S A SHORT TERM FIX.
|
|
94
|
+
ignored: [/(^|[/\\])\../, (p) => p.includes("/datapack")], // ignore dotfiles and datapack folder
|
|
94
95
|
persistent: true,
|
|
95
96
|
ignoreInitial: true,
|
|
96
97
|
});
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Command } from "@oclif/core";
|
|
2
|
+
export default class Init extends Command {
|
|
3
|
+
static description: string;
|
|
4
|
+
static examples: string[];
|
|
5
|
+
static args: {
|
|
6
|
+
name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
|
|
7
|
+
};
|
|
8
|
+
run(): Promise<void>;
|
|
9
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { exec } from "node:child_process";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { Args, Command } from "@oclif/core";
|
|
5
|
+
import pc from "picocolors";
|
|
6
|
+
import { loadConfig } from "../../lib/config.js";
|
|
7
|
+
import { getStaticResourcePath } from "../../lib/utils.js";
|
|
8
|
+
export default class Init extends Command {
|
|
9
|
+
static description = "Initialize a new evaluation";
|
|
10
|
+
static examples = ["<%= config.bin %> <%= command.id %> my-evaluation"];
|
|
11
|
+
static args = {
|
|
12
|
+
name: Args.string({
|
|
13
|
+
description: "Name of the evaluation",
|
|
14
|
+
required: true,
|
|
15
|
+
}),
|
|
16
|
+
};
|
|
17
|
+
async run() {
|
|
18
|
+
const { args } = await this.parse(Init);
|
|
19
|
+
loadConfig(); // Validate config is available
|
|
20
|
+
const evaluationDir = path.resolve(process.cwd(), "evaluations", args.name);
|
|
21
|
+
const configPath = path.join(evaluationDir, "config.ts");
|
|
22
|
+
// Check if evaluation already exists
|
|
23
|
+
try {
|
|
24
|
+
await fs.access(evaluationDir);
|
|
25
|
+
this.error(pc.red(`Evaluation '${args.name}' already exists at ${evaluationDir}`));
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
// Directory doesn't exist, which is what we want
|
|
29
|
+
}
|
|
30
|
+
// Create evaluation directory
|
|
31
|
+
await fs.mkdir(evaluationDir, { recursive: true });
|
|
32
|
+
// Copy template
|
|
33
|
+
const templatePath = getStaticResourcePath("evaluation_template.ts");
|
|
34
|
+
await fs.copyFile(templatePath, configPath);
|
|
35
|
+
this.log(pc.green(`✓ Created evaluation '${args.name}'`));
|
|
36
|
+
this.log(pc.dim(` Config: ${configPath}`));
|
|
37
|
+
// Offer to open in editor on macOS
|
|
38
|
+
if (process.platform === "darwin") {
|
|
39
|
+
this.log("");
|
|
40
|
+
this.log(pc.blue(">> Opening config.ts in your editor..."));
|
|
41
|
+
// Try Cursor first, then VS Code, then fallback to default
|
|
42
|
+
exec(`cursor "${configPath}" || code "${configPath}" || open "${configPath}"`, (error) => {
|
|
43
|
+
if (error) {
|
|
44
|
+
this.log(pc.dim(` Could not open editor automatically. Please open: ${configPath}`));
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
this.log("");
|
|
50
|
+
this.log(pc.blue(`>> Edit the config file to define your runs:`));
|
|
51
|
+
this.log(pc.dim(` ${configPath}`));
|
|
52
|
+
}
|
|
53
|
+
this.log("");
|
|
54
|
+
this.log(pc.blue(">> Next steps:"));
|
|
55
|
+
this.log(pc.dim(` 1. Edit ${path.basename(configPath)} to define your evaluation runs`));
|
|
56
|
+
this.log(pc.dim(` 2. Run: kradle evaluation run ${args.name}`));
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Command } from "@oclif/core";
|
|
4
|
+
import pc from "picocolors";
|
|
5
|
+
import { loadConfig } from "../../lib/config.js";
|
|
6
|
+
export default class List extends Command {
|
|
7
|
+
static description = "List all evaluations";
|
|
8
|
+
static examples = ["<%= config.bin %> <%= command.id %>"];
|
|
9
|
+
async run() {
|
|
10
|
+
this.parse(List);
|
|
11
|
+
loadConfig(); // Validate config is available
|
|
12
|
+
const evaluationsDir = path.resolve(process.cwd(), "evaluations");
|
|
13
|
+
try {
|
|
14
|
+
const entries = await fs.readdir(evaluationsDir, { withFileTypes: true });
|
|
15
|
+
const evaluations = entries.filter((e) => e.isDirectory());
|
|
16
|
+
if (evaluations.length === 0) {
|
|
17
|
+
this.log(pc.yellow("No evaluations found."));
|
|
18
|
+
this.log(pc.dim(` Run 'kradle evaluation init <name>' to create one.`));
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
this.log(pc.blue(">> Evaluations:"));
|
|
22
|
+
this.log("");
|
|
23
|
+
for (const evaluation of evaluations) {
|
|
24
|
+
const evalDir = path.join(evaluationsDir, evaluation.name);
|
|
25
|
+
const hasConfig = await this.fileExists(path.join(evalDir, "config.ts"));
|
|
26
|
+
const hasManifest = await this.fileExists(path.join(evalDir, "manifest.json"));
|
|
27
|
+
const hasProgress = await this.fileExists(path.join(evalDir, "progress.json"));
|
|
28
|
+
let status = "";
|
|
29
|
+
if (hasProgress) {
|
|
30
|
+
status = pc.yellow(" (in progress)");
|
|
31
|
+
}
|
|
32
|
+
else if (hasManifest) {
|
|
33
|
+
status = pc.green(" (ready)");
|
|
34
|
+
}
|
|
35
|
+
else if (hasConfig) {
|
|
36
|
+
status = pc.dim(" (config only)");
|
|
37
|
+
}
|
|
38
|
+
this.log(` ${pc.bold(evaluation.name)}${status}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
this.log(pc.yellow("No evaluations directory found."));
|
|
43
|
+
this.log(pc.dim(` Run 'kradle evaluation init <name>' to create your first evaluation.`));
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
async fileExists(filePath) {
|
|
47
|
+
try {
|
|
48
|
+
await fs.access(filePath);
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Command } from "@oclif/core";
|
|
2
|
+
export default class Run extends Command {
|
|
3
|
+
static description: string;
|
|
4
|
+
static examples: string[];
|
|
5
|
+
static args: {
|
|
6
|
+
name: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
|
|
7
|
+
};
|
|
8
|
+
static flags: {
|
|
9
|
+
new: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
10
|
+
"max-concurrent": import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
11
|
+
};
|
|
12
|
+
run(): Promise<void>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { Args, Command, Flags } from "@oclif/core";
|
|
2
|
+
import pc from "picocolors";
|
|
3
|
+
import { ApiClient } from "../../lib/api-client.js";
|
|
4
|
+
import { loadConfig } from "../../lib/config.js";
|
|
5
|
+
import { Evaluator } from "../../lib/evaluation/evaluator.js";
|
|
6
|
+
const DEFAULT_MAX_CONCURRENT = 5;
|
|
7
|
+
export default class Run extends Command {
|
|
8
|
+
static description = "Run an evaluation. If the evaluation had an ongoing iteration, it will resume from the last state.";
|
|
9
|
+
static examples = [
|
|
10
|
+
"<%= config.bin %> <%= command.id %> my-evaluation",
|
|
11
|
+
"<%= config.bin %> <%= command.id %> my-evaluation --new",
|
|
12
|
+
"<%= config.bin %> <%= command.id %> my-evaluation --max-concurrent 10",
|
|
13
|
+
];
|
|
14
|
+
static args = {
|
|
15
|
+
name: Args.string({
|
|
16
|
+
description: "Name of the evaluation to run",
|
|
17
|
+
required: true,
|
|
18
|
+
}),
|
|
19
|
+
};
|
|
20
|
+
static flags = {
|
|
21
|
+
new: Flags.boolean({
|
|
22
|
+
char: "n",
|
|
23
|
+
description: "Start a new iteration of the evaluation",
|
|
24
|
+
default: false,
|
|
25
|
+
}),
|
|
26
|
+
"max-concurrent": Flags.integer({
|
|
27
|
+
char: "m",
|
|
28
|
+
description: "Maximum concurrent runs",
|
|
29
|
+
default: DEFAULT_MAX_CONCURRENT,
|
|
30
|
+
}),
|
|
31
|
+
};
|
|
32
|
+
async run() {
|
|
33
|
+
const { args, flags } = await this.parse(Run);
|
|
34
|
+
const config = loadConfig();
|
|
35
|
+
const api = new ApiClient(config);
|
|
36
|
+
const evaluator = new Evaluator(args.name, config, api);
|
|
37
|
+
// Check if evaluation exists
|
|
38
|
+
if (!(await evaluator.exists())) {
|
|
39
|
+
this.error(pc.red(`Evaluation '${args.name}' does not exist. Run 'kradle evaluation init ${args.name}' first.`));
|
|
40
|
+
}
|
|
41
|
+
// Check if config.ts exists
|
|
42
|
+
if (!(await evaluator.configExists())) {
|
|
43
|
+
this.error(pc.red(`Config file not found at ${evaluator.configPath}`));
|
|
44
|
+
}
|
|
45
|
+
this.log(pc.blue(`>> Starting evaluation: ${args.name}`));
|
|
46
|
+
if (flags.new) {
|
|
47
|
+
this.log(pc.yellow(" --new: Starting a new iteration of the evaluation"));
|
|
48
|
+
}
|
|
49
|
+
try {
|
|
50
|
+
await evaluator.run({
|
|
51
|
+
new: flags.new,
|
|
52
|
+
maxConcurrent: flags["max-concurrent"],
|
|
53
|
+
});
|
|
54
|
+
this.log(pc.green("\n✓ Evaluation complete!"));
|
|
55
|
+
}
|
|
56
|
+
catch (error) {
|
|
57
|
+
this.error(pc.red(`Evaluation failed: ${error instanceof Error ? error.message : String(error)}`));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
package/dist/lib/api-client.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type z from "zod";
|
|
2
2
|
import type { Challenge } from "./challenge.js";
|
|
3
3
|
import type { Config } from "./config.js";
|
|
4
|
-
import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema } from "./schemas.js";
|
|
4
|
+
import { type AgentSchemaType, type ChallengeSchemaType, HumanSchema, type RunStatusSchemaType } from "./schemas.js";
|
|
5
5
|
export declare class ApiClient {
|
|
6
6
|
private config;
|
|
7
7
|
constructor(config: Config);
|
|
@@ -52,4 +52,17 @@ export declare class ApiClient {
|
|
|
52
52
|
runIds?: string[] | undefined;
|
|
53
53
|
}>;
|
|
54
54
|
deleteChallenge(challengeId: string): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Get the status of a run.
|
|
57
|
+
* @param runId - The ID of the run.
|
|
58
|
+
* @returns The run status.
|
|
59
|
+
*/
|
|
60
|
+
getRunStatus(runId: string): Promise<RunStatusSchemaType>;
|
|
61
|
+
/**
|
|
62
|
+
* Add a tag to a run.
|
|
63
|
+
* @param runId - The ID of the run to tag.
|
|
64
|
+
* @param tag - The tag string to add.
|
|
65
|
+
* @throws an error if the tag fails to be added.
|
|
66
|
+
*/
|
|
67
|
+
tagRun(runId: string, tag: string): Promise<void>;
|
|
55
68
|
}
|
package/dist/lib/api-client.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, RunResponseSchema, UploadUrlResponseSchema, } from "./schemas.js";
|
|
1
|
+
import { AgentsResponseSchema, ChallengeSchema, ChallengesResponseSchema, HumanSchema, RunResponseSchema, RunStatusSchema, UploadUrlResponseSchema, } from "./schemas.js";
|
|
2
2
|
const DEFAULT_PAGE_SIZE = 30;
|
|
3
3
|
const DEFAULT_CHALLENGE_SCHEMA = {
|
|
4
4
|
slug: "",
|
|
@@ -49,17 +49,21 @@ export class ApiClient {
|
|
|
49
49
|
method: "POST",
|
|
50
50
|
...options,
|
|
51
51
|
});
|
|
52
|
-
const
|
|
52
|
+
const text = await response.text();
|
|
53
|
+
if (!text) {
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
const data = JSON.parse(text);
|
|
53
57
|
return schema ? schema.parse(data) : data;
|
|
54
58
|
}
|
|
55
59
|
async put(target, url, options = {}) {
|
|
56
|
-
await this.request(target, url, {
|
|
60
|
+
return await this.request(target, url, {
|
|
57
61
|
method: "PUT",
|
|
58
62
|
...options,
|
|
59
63
|
});
|
|
60
64
|
}
|
|
61
65
|
async delete(target, url, options = {}) {
|
|
62
|
-
await this.request(target, url, {
|
|
66
|
+
return await this.request(target, url, {
|
|
63
67
|
method: "DELETE",
|
|
64
68
|
...options,
|
|
65
69
|
});
|
|
@@ -140,8 +144,9 @@ export class ApiClient {
|
|
|
140
144
|
*/
|
|
141
145
|
async updateChallenge(challenge, challengeConfig) {
|
|
142
146
|
const url = `challenges/${challenge.shortSlug}`;
|
|
147
|
+
console.log(url);
|
|
143
148
|
const config = challengeConfig ?? (await challenge.loadConfig());
|
|
144
|
-
|
|
149
|
+
await this.put("web", url, {
|
|
145
150
|
body: JSON.stringify(config),
|
|
146
151
|
});
|
|
147
152
|
}
|
|
@@ -159,4 +164,25 @@ export class ApiClient {
|
|
|
159
164
|
const url = `challenges/${challengeId}`;
|
|
160
165
|
await this.delete("web", url);
|
|
161
166
|
}
|
|
167
|
+
/**
|
|
168
|
+
* Get the status of a run.
|
|
169
|
+
* @param runId - The ID of the run.
|
|
170
|
+
* @returns The run status.
|
|
171
|
+
*/
|
|
172
|
+
async getRunStatus(runId) {
|
|
173
|
+
const url = `runs/${runId}`;
|
|
174
|
+
return this.get("web", url, {}, RunStatusSchema);
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Add a tag to a run.
|
|
178
|
+
* @param runId - The ID of the run to tag.
|
|
179
|
+
* @param tag - The tag string to add.
|
|
180
|
+
* @throws an error if the tag fails to be added.
|
|
181
|
+
*/
|
|
182
|
+
async tagRun(runId, tag) {
|
|
183
|
+
const url = `runs/${runId}/tag`;
|
|
184
|
+
await this.post("web", url, {
|
|
185
|
+
body: JSON.stringify({ tag }),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
162
188
|
}
|
package/dist/lib/challenge.js
CHANGED
|
@@ -98,6 +98,11 @@ export class Challenge {
|
|
|
98
98
|
catch (error) {
|
|
99
99
|
throw new Error(`Failed to build datapack: ${error instanceof Error ? error.message : error}`);
|
|
100
100
|
}
|
|
101
|
+
// @TODO - re-enable once we have a proper build pipeline
|
|
102
|
+
// Recursively copy the challenge dir to target directory, under src/
|
|
103
|
+
// await fs.cp(this.challengeDir, path.join(this.config.KRADLE_CHALLENGES_PATH, this.shortSlug, "src"), {
|
|
104
|
+
// recursive: true,
|
|
105
|
+
// });
|
|
101
106
|
}
|
|
102
107
|
/**
|
|
103
108
|
* Load the challenge configuration from config.ts
|
package/dist/lib/config.d.ts
CHANGED
|
@@ -5,7 +5,6 @@ export declare const ConfigSchema: z.ZodObject<{
|
|
|
5
5
|
STUDIO_API_URL: z.ZodString;
|
|
6
6
|
STUDIO_URL: z.ZodString;
|
|
7
7
|
KRADLE_API_KEY: z.ZodString;
|
|
8
|
-
GCS_BUCKET: z.ZodString;
|
|
9
8
|
KRADLE_CHALLENGES_PATH: z.ZodDefault<z.ZodString>;
|
|
10
9
|
NAMESPACE: z.ZodDefault<z.ZodString>;
|
|
11
10
|
}, z.core.$strip>;
|
package/dist/lib/config.js
CHANGED
|
@@ -8,7 +8,6 @@ export const ConfigSchema = z.object({
|
|
|
8
8
|
STUDIO_API_URL: z.string().url(),
|
|
9
9
|
STUDIO_URL: z.string(),
|
|
10
10
|
KRADLE_API_KEY: z.string(),
|
|
11
|
-
GCS_BUCKET: z.string(),
|
|
12
11
|
/**
|
|
13
12
|
* Absolute path to the challenges directory. Defaults to ~/Documents/kradle-studio/challenges.
|
|
14
13
|
*/
|
|
@@ -30,7 +29,6 @@ export function loadConfig() {
|
|
|
30
29
|
STUDIO_API_URL: process.env.STUDIO_API_URL,
|
|
31
30
|
STUDIO_URL: process.env.STUDIO_URL,
|
|
32
31
|
KRADLE_API_KEY: process.env.KRADLE_API_KEY,
|
|
33
|
-
GCS_BUCKET: process.env.GCS_BUCKET,
|
|
34
32
|
KRADLE_CHALLENGES_PATH: challengesPath,
|
|
35
33
|
NAMESPACE: process.env.NAMESPACE,
|
|
36
34
|
});
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import type { ApiClient } from "../api-client.js";
|
|
2
|
+
import type { Config } from "../config.js";
|
|
3
|
+
import type { EvaluationMetadata, EvaluationOptions, Manifest, Progress } from "./types.js";
|
|
4
|
+
export declare class Evaluator {
|
|
5
|
+
private name;
|
|
6
|
+
private config;
|
|
7
|
+
private api;
|
|
8
|
+
evaluationDir: string;
|
|
9
|
+
metadataPath: string;
|
|
10
|
+
private runner?;
|
|
11
|
+
private tui?;
|
|
12
|
+
private currentIteration?;
|
|
13
|
+
constructor(name: string, config: Config, api: ApiClient);
|
|
14
|
+
/**
|
|
15
|
+
* Get paths for a specific iteration
|
|
16
|
+
*/
|
|
17
|
+
private getIterationPaths;
|
|
18
|
+
get configPath(): string;
|
|
19
|
+
/**
|
|
20
|
+
* Get the current iteration directory path
|
|
21
|
+
*/
|
|
22
|
+
getCurrentIterationDir(): string;
|
|
23
|
+
/**
|
|
24
|
+
* Check if evaluation exists
|
|
25
|
+
*/
|
|
26
|
+
exists(): Promise<boolean>;
|
|
27
|
+
/**
|
|
28
|
+
* Check if config.ts exists (master config)
|
|
29
|
+
*/
|
|
30
|
+
configExists(): Promise<boolean>;
|
|
31
|
+
/**
|
|
32
|
+
* Load evaluation metadata
|
|
33
|
+
*/
|
|
34
|
+
loadMetadata(): Promise<EvaluationMetadata | null>;
|
|
35
|
+
/**
|
|
36
|
+
* Save evaluation metadata
|
|
37
|
+
*/
|
|
38
|
+
saveMetadata(metadata: EvaluationMetadata): Promise<void>;
|
|
39
|
+
/**
|
|
40
|
+
* Get the current iteration number, or -1 if none exists
|
|
41
|
+
*/
|
|
42
|
+
getCurrentIterationNumber(): Promise<number>;
|
|
43
|
+
/**
|
|
44
|
+
* Create a new iteration
|
|
45
|
+
*/
|
|
46
|
+
createNewIteration(): Promise<number>;
|
|
47
|
+
/**
|
|
48
|
+
* Get or create an iteration
|
|
49
|
+
* @param createNew - If true, always create a new iteration. Otherwise, use current iteration or create first one if none exists.
|
|
50
|
+
*/
|
|
51
|
+
getOrCreateIteration(createNew: boolean): Promise<number>;
|
|
52
|
+
/**
|
|
53
|
+
* Load manifest from iteration
|
|
54
|
+
*/
|
|
55
|
+
loadManifest(iteration: number): Promise<Manifest>;
|
|
56
|
+
/**
|
|
57
|
+
* Load progress from iteration
|
|
58
|
+
*/
|
|
59
|
+
loadProgress(iteration: number): Promise<Progress | null>;
|
|
60
|
+
/**
|
|
61
|
+
* Save progress to current iteration
|
|
62
|
+
*/
|
|
63
|
+
saveProgress(): Promise<void>;
|
|
64
|
+
/**
|
|
65
|
+
* Execute config.ts to generate manifest
|
|
66
|
+
*/
|
|
67
|
+
generateManifest(configPath: string): Promise<Manifest>;
|
|
68
|
+
/**
|
|
69
|
+
* Execute config.ts file and return the manifest
|
|
70
|
+
*/
|
|
71
|
+
private executeConfigFile;
|
|
72
|
+
/**
|
|
73
|
+
* Run the evaluation
|
|
74
|
+
*/
|
|
75
|
+
run(options: EvaluationOptions): Promise<void>;
|
|
76
|
+
/**
|
|
77
|
+
* Handle state change from runner
|
|
78
|
+
*/
|
|
79
|
+
private onRunStateChange;
|
|
80
|
+
/**
|
|
81
|
+
* Handle quit request
|
|
82
|
+
*/
|
|
83
|
+
private handleQuit;
|
|
84
|
+
/**
|
|
85
|
+
* Open run in browser
|
|
86
|
+
*/
|
|
87
|
+
private openRun;
|
|
88
|
+
}
|