kradle 0.6.3 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ai-docs/workflow.d.ts +6 -0
- package/dist/commands/ai-docs/workflow.js +13 -0
- package/dist/commands/challenge/run.d.ts +1 -0
- package/dist/commands/challenge/run.js +5 -2
- package/dist/commands/update.d.ts +13 -0
- package/dist/commands/update.js +165 -0
- package/dist/lib/api-client.d.ts +1 -1
- package/oclif.manifest.json +78 -1
- package/package.json +1 -1
- package/static/ai_docs/WORKFLOW.md +130 -0
- package/static/project_template/AGENTS.md +6 -101
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import { Command } from "@oclif/core";
|
|
3
|
+
import { getStaticResourcePath } from "../../lib/utils.js";
|
|
4
|
+
export default class Workflow extends Command {
|
|
5
|
+
static description = "Output the challenge creation and edition workflow documentation for LLMs";
|
|
6
|
+
static examples = ["<%= config.bin %> <%= command.id %>"];
|
|
7
|
+
async run() {
|
|
8
|
+
await this.parse(Workflow);
|
|
9
|
+
const docPath = getStaticResourcePath("ai_docs/WORKFLOW.md");
|
|
10
|
+
const content = await fs.readFile(docPath, "utf-8");
|
|
11
|
+
this.log(content);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
@@ -13,6 +13,7 @@ export default class Run extends Command {
|
|
|
13
13
|
"studio-api-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
14
14
|
"studio-url": import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
15
15
|
studio: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
16
|
+
record: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
16
17
|
"no-open": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
17
18
|
"no-wait": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
18
19
|
"no-summary": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
@@ -15,6 +15,7 @@ export default class Run extends Command {
|
|
|
15
15
|
static examples = [
|
|
16
16
|
"<%= config.bin %> <%= command.id %> my-challenge",
|
|
17
17
|
"<%= config.bin %> <%= command.id %> my-challenge --studio",
|
|
18
|
+
"<%= config.bin %> <%= command.id %> my-challenge --record",
|
|
18
19
|
"<%= config.bin %> <%= command.id %> team-name:my-challenge",
|
|
19
20
|
"<%= config.bin %> <%= command.id %> my-challenge --no-open",
|
|
20
21
|
"<%= config.bin %> <%= command.id %> my-challenge --no-wait",
|
|
@@ -28,6 +29,7 @@ export default class Run extends Command {
|
|
|
28
29
|
};
|
|
29
30
|
static flags = {
|
|
30
31
|
studio: Flags.boolean({ char: "s", description: "Run in studio environment", default: false }),
|
|
32
|
+
record: Flags.boolean({ char: "r", description: "Record the run (enables video recording)", default: false }),
|
|
31
33
|
"no-open": Flags.boolean({
|
|
32
34
|
description: "Don't open the run URL in the browser",
|
|
33
35
|
default: false,
|
|
@@ -279,11 +281,12 @@ export default class Run extends Command {
|
|
|
279
281
|
this.error("No participants specified. Use inline syntax or select agents interactively.");
|
|
280
282
|
}
|
|
281
283
|
try {
|
|
282
|
-
|
|
284
|
+
const jobType = flags.record ? "foreground_with_recording" : "foreground";
|
|
285
|
+
this.log(pc.blue(`\n>> Running challenge: ${challengeSlug}${flags.studio ? " (studio)" : ""}${flags.record ? " (recording)" : ""}...`));
|
|
283
286
|
const response = await api.runChallenge({
|
|
284
287
|
challenge: challengeSlug,
|
|
285
288
|
participants,
|
|
286
|
-
jobType
|
|
289
|
+
jobType,
|
|
287
290
|
});
|
|
288
291
|
if (response.runIds && response.runIds.length > 0) {
|
|
289
292
|
const runId = response.runIds[0];
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Command } from "@oclif/core";
|
|
2
|
+
export default class Update extends Command {
|
|
3
|
+
static description: string;
|
|
4
|
+
static examples: string[];
|
|
5
|
+
static flags: {
|
|
6
|
+
"dry-run": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
7
|
+
yes: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
8
|
+
"add-missing": import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
9
|
+
};
|
|
10
|
+
private compareFiles;
|
|
11
|
+
private showDiff;
|
|
12
|
+
run(): Promise<void>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Command, Flags } from "@oclif/core";
|
|
4
|
+
import enquirer from "enquirer";
|
|
5
|
+
import pc from "picocolors";
|
|
6
|
+
import { getStaticResourcePath } from "../lib/utils.js";
|
|
7
|
+
// Files that should be synced on update (documentation files only)
|
|
8
|
+
const SYNC_FILES = ["AGENTS.md", "CLAUDE.md"];
|
|
9
|
+
export default class Update extends Command {
|
|
10
|
+
static description = "Update project template files (AGENTS.md, CLAUDE.md) to the latest version from the CLI";
|
|
11
|
+
static examples = [
|
|
12
|
+
"<%= config.bin %> <%= command.id %>",
|
|
13
|
+
"<%= config.bin %> <%= command.id %> --dry-run",
|
|
14
|
+
"<%= config.bin %> <%= command.id %> --yes",
|
|
15
|
+
];
|
|
16
|
+
static flags = {
|
|
17
|
+
"dry-run": Flags.boolean({
|
|
18
|
+
description: "Preview changes without applying them",
|
|
19
|
+
default: false,
|
|
20
|
+
}),
|
|
21
|
+
yes: Flags.boolean({
|
|
22
|
+
char: "y",
|
|
23
|
+
description: "Skip confirmation prompts",
|
|
24
|
+
default: false,
|
|
25
|
+
}),
|
|
26
|
+
"add-missing": Flags.boolean({
|
|
27
|
+
description: "Add files that exist in template but not locally",
|
|
28
|
+
default: false,
|
|
29
|
+
}),
|
|
30
|
+
};
|
|
31
|
+
async compareFiles() {
|
|
32
|
+
const templateDir = getStaticResourcePath("project_template");
|
|
33
|
+
const cwd = process.cwd();
|
|
34
|
+
const comparisons = [];
|
|
35
|
+
for (const filename of SYNC_FILES) {
|
|
36
|
+
const templatePath = path.join(templateDir, filename);
|
|
37
|
+
const localPath = path.join(cwd, filename);
|
|
38
|
+
let templateContent;
|
|
39
|
+
let localContent = null;
|
|
40
|
+
try {
|
|
41
|
+
templateContent = await fs.readFile(templatePath, "utf-8");
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// Template file doesn't exist (shouldn't happen)
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
try {
|
|
48
|
+
localContent = await fs.readFile(localPath, "utf-8");
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
// Local file doesn't exist
|
|
52
|
+
}
|
|
53
|
+
let status;
|
|
54
|
+
if (localContent === null) {
|
|
55
|
+
status = "missing_local";
|
|
56
|
+
}
|
|
57
|
+
else if (templateContent === localContent) {
|
|
58
|
+
status = "identical";
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
status = "different";
|
|
62
|
+
}
|
|
63
|
+
comparisons.push({
|
|
64
|
+
filename,
|
|
65
|
+
templatePath,
|
|
66
|
+
localPath,
|
|
67
|
+
templateContent,
|
|
68
|
+
localContent,
|
|
69
|
+
status,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
return comparisons;
|
|
73
|
+
}
|
|
74
|
+
showDiff(comparison) {
|
|
75
|
+
this.log(pc.bold(`\n--- ${comparison.filename} ---`));
|
|
76
|
+
if (comparison.status === "missing_local") {
|
|
77
|
+
this.log(pc.green("+ (new file)"));
|
|
78
|
+
const lines = comparison.templateContent.split("\n").slice(0, 10);
|
|
79
|
+
for (const line of lines) {
|
|
80
|
+
this.log(pc.green(`+ ${line}`));
|
|
81
|
+
}
|
|
82
|
+
if (comparison.templateContent.split("\n").length > 10) {
|
|
83
|
+
this.log(pc.dim(` ... and ${comparison.templateContent.split("\n").length - 10} more lines`));
|
|
84
|
+
}
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
if (comparison.status === "identical") {
|
|
88
|
+
this.log(pc.dim("(no changes)"));
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
// Show simple line count diff for different files
|
|
92
|
+
const templateLines = comparison.templateContent.split("\n").length;
|
|
93
|
+
const localLines = comparison.localContent?.split("\n").length || 0;
|
|
94
|
+
this.log(pc.yellow(` Local: ${localLines} lines`));
|
|
95
|
+
this.log(pc.green(` Template: ${templateLines} lines`));
|
|
96
|
+
this.log(pc.dim(" (file contents differ - will be replaced with template version)"));
|
|
97
|
+
}
|
|
98
|
+
async run() {
|
|
99
|
+
const { flags } = await this.parse(Update);
|
|
100
|
+
const isDryRun = flags["dry-run"];
|
|
101
|
+
const skipPrompts = flags.yes;
|
|
102
|
+
const addMissing = flags["add-missing"];
|
|
103
|
+
this.log(pc.blue(">> Checking for template updates...\n"));
|
|
104
|
+
const comparisons = await this.compareFiles();
|
|
105
|
+
// Categorize files
|
|
106
|
+
const identical = comparisons.filter((c) => c.status === "identical");
|
|
107
|
+
const different = comparisons.filter((c) => c.status === "different");
|
|
108
|
+
const missingLocal = comparisons.filter((c) => c.status === "missing_local");
|
|
109
|
+
// Show summary
|
|
110
|
+
if (identical.length > 0) {
|
|
111
|
+
this.log(pc.green(`✓ Up to date: ${identical.map((c) => c.filename).join(", ")}`));
|
|
112
|
+
}
|
|
113
|
+
if (different.length === 0 && (missingLocal.length === 0 || !addMissing)) {
|
|
114
|
+
this.log(pc.green("\n✓ All template files are up to date!"));
|
|
115
|
+
if (missingLocal.length > 0) {
|
|
116
|
+
this.log(pc.dim(` (${missingLocal.length} file(s) missing locally, use --add-missing to create them)`));
|
|
117
|
+
}
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
// Show files that need updating
|
|
121
|
+
if (different.length > 0) {
|
|
122
|
+
this.log(pc.yellow(`\n⚠ Files to update: ${different.map((c) => c.filename).join(", ")}`));
|
|
123
|
+
for (const comparison of different) {
|
|
124
|
+
this.showDiff(comparison);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (missingLocal.length > 0 && addMissing) {
|
|
128
|
+
this.log(pc.cyan(`\n+ Files to create: ${missingLocal.map((c) => c.filename).join(", ")}`));
|
|
129
|
+
for (const comparison of missingLocal) {
|
|
130
|
+
this.showDiff(comparison);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (isDryRun) {
|
|
134
|
+
this.log(pc.dim("\n(dry run - no changes applied)"));
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
// Determine which files to update
|
|
138
|
+
const filesToUpdate = [...different];
|
|
139
|
+
if (addMissing) {
|
|
140
|
+
filesToUpdate.push(...missingLocal);
|
|
141
|
+
}
|
|
142
|
+
if (filesToUpdate.length === 0) {
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
// Confirm update
|
|
146
|
+
if (!skipPrompts) {
|
|
147
|
+
const { confirm } = await enquirer.prompt({
|
|
148
|
+
type: "confirm",
|
|
149
|
+
name: "confirm",
|
|
150
|
+
message: `Update ${filesToUpdate.length} file(s)?`,
|
|
151
|
+
initial: true,
|
|
152
|
+
});
|
|
153
|
+
if (!confirm) {
|
|
154
|
+
this.log(pc.dim("Update cancelled."));
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
// Apply updates
|
|
159
|
+
for (const comparison of filesToUpdate) {
|
|
160
|
+
await fs.writeFile(comparison.localPath, comparison.templateContent);
|
|
161
|
+
this.log(pc.green(`✓ Updated ${comparison.filename}`));
|
|
162
|
+
}
|
|
163
|
+
this.log(pc.green(`\n✓ Successfully updated ${filesToUpdate.length} file(s)!`));
|
|
164
|
+
}
|
|
165
|
+
}
|
package/dist/lib/api-client.d.ts
CHANGED
|
@@ -75,7 +75,7 @@ export declare class ApiClient {
|
|
|
75
75
|
runChallenge(runData: {
|
|
76
76
|
challenge: string;
|
|
77
77
|
participants: unknown[];
|
|
78
|
-
jobType: "background" | "foreground";
|
|
78
|
+
jobType: "background" | "foreground" | "foreground_with_recording";
|
|
79
79
|
}): Promise<{
|
|
80
80
|
runIds?: string[] | undefined;
|
|
81
81
|
participants?: Record<string, {
|
package/oclif.manifest.json
CHANGED
|
@@ -42,6 +42,51 @@
|
|
|
42
42
|
"init.js"
|
|
43
43
|
]
|
|
44
44
|
},
|
|
45
|
+
"update": {
|
|
46
|
+
"aliases": [],
|
|
47
|
+
"args": {},
|
|
48
|
+
"description": "Update project template files (AGENTS.md, CLAUDE.md) to the latest version from the CLI",
|
|
49
|
+
"examples": [
|
|
50
|
+
"<%= config.bin %> <%= command.id %>",
|
|
51
|
+
"<%= config.bin %> <%= command.id %> --dry-run",
|
|
52
|
+
"<%= config.bin %> <%= command.id %> --yes"
|
|
53
|
+
],
|
|
54
|
+
"flags": {
|
|
55
|
+
"dry-run": {
|
|
56
|
+
"description": "Preview changes without applying them",
|
|
57
|
+
"name": "dry-run",
|
|
58
|
+
"allowNo": false,
|
|
59
|
+
"type": "boolean"
|
|
60
|
+
},
|
|
61
|
+
"yes": {
|
|
62
|
+
"char": "y",
|
|
63
|
+
"description": "Skip confirmation prompts",
|
|
64
|
+
"name": "yes",
|
|
65
|
+
"allowNo": false,
|
|
66
|
+
"type": "boolean"
|
|
67
|
+
},
|
|
68
|
+
"add-missing": {
|
|
69
|
+
"description": "Add files that exist in template but not locally",
|
|
70
|
+
"name": "add-missing",
|
|
71
|
+
"allowNo": false,
|
|
72
|
+
"type": "boolean"
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
"hasDynamicHelp": false,
|
|
76
|
+
"hiddenAliases": [],
|
|
77
|
+
"id": "update",
|
|
78
|
+
"pluginAlias": "kradle",
|
|
79
|
+
"pluginName": "kradle",
|
|
80
|
+
"pluginType": "core",
|
|
81
|
+
"strict": true,
|
|
82
|
+
"enableJsonFlag": false,
|
|
83
|
+
"isESM": true,
|
|
84
|
+
"relativePath": [
|
|
85
|
+
"dist",
|
|
86
|
+
"commands",
|
|
87
|
+
"update.js"
|
|
88
|
+
]
|
|
89
|
+
},
|
|
45
90
|
"agent:list": {
|
|
46
91
|
"aliases": [],
|
|
47
92
|
"args": {},
|
|
@@ -142,6 +187,30 @@
|
|
|
142
187
|
"cli.js"
|
|
143
188
|
]
|
|
144
189
|
},
|
|
190
|
+
"ai-docs:workflow": {
|
|
191
|
+
"aliases": [],
|
|
192
|
+
"args": {},
|
|
193
|
+
"description": "Output the challenge creation and edition workflow documentation for LLMs",
|
|
194
|
+
"examples": [
|
|
195
|
+
"<%= config.bin %> <%= command.id %>"
|
|
196
|
+
],
|
|
197
|
+
"flags": {},
|
|
198
|
+
"hasDynamicHelp": false,
|
|
199
|
+
"hiddenAliases": [],
|
|
200
|
+
"id": "ai-docs:workflow",
|
|
201
|
+
"pluginAlias": "kradle",
|
|
202
|
+
"pluginName": "kradle",
|
|
203
|
+
"pluginType": "core",
|
|
204
|
+
"strict": true,
|
|
205
|
+
"enableJsonFlag": false,
|
|
206
|
+
"isESM": true,
|
|
207
|
+
"relativePath": [
|
|
208
|
+
"dist",
|
|
209
|
+
"commands",
|
|
210
|
+
"ai-docs",
|
|
211
|
+
"workflow.js"
|
|
212
|
+
]
|
|
213
|
+
},
|
|
145
214
|
"challenge:build": {
|
|
146
215
|
"aliases": [],
|
|
147
216
|
"args": {
|
|
@@ -495,6 +564,7 @@
|
|
|
495
564
|
"examples": [
|
|
496
565
|
"<%= config.bin %> <%= command.id %> my-challenge",
|
|
497
566
|
"<%= config.bin %> <%= command.id %> my-challenge --studio",
|
|
567
|
+
"<%= config.bin %> <%= command.id %> my-challenge --record",
|
|
498
568
|
"<%= config.bin %> <%= command.id %> team-name:my-challenge",
|
|
499
569
|
"<%= config.bin %> <%= command.id %> my-challenge --no-open",
|
|
500
570
|
"<%= config.bin %> <%= command.id %> my-challenge --no-wait",
|
|
@@ -509,6 +579,13 @@
|
|
|
509
579
|
"allowNo": false,
|
|
510
580
|
"type": "boolean"
|
|
511
581
|
},
|
|
582
|
+
"record": {
|
|
583
|
+
"char": "r",
|
|
584
|
+
"description": "Record the run (enables video recording)",
|
|
585
|
+
"name": "record",
|
|
586
|
+
"allowNo": false,
|
|
587
|
+
"type": "boolean"
|
|
588
|
+
},
|
|
512
589
|
"no-open": {
|
|
513
590
|
"description": "Don't open the run URL in the browser",
|
|
514
591
|
"name": "no-open",
|
|
@@ -1435,5 +1512,5 @@
|
|
|
1435
1512
|
]
|
|
1436
1513
|
}
|
|
1437
1514
|
},
|
|
1438
|
-
"version": "0.6.
|
|
1515
|
+
"version": "0.6.5"
|
|
1439
1516
|
}
|
package/package.json
CHANGED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Challenge Creation and Edition Workflow
|
|
2
|
+
|
|
3
|
+
This document provides guidance on how to create a challenge using the cli
|
|
4
|
+
|
|
5
|
+
## Challenge Creation
|
|
6
|
+
|
|
7
|
+
### Phase 1: Challenge Definition
|
|
8
|
+
|
|
9
|
+
Gather requirements by asking these questions. Save answers in a markdown summary in the challenge's `config.ts` description field.
|
|
10
|
+
|
|
11
|
+
#### Questions to Ask
|
|
12
|
+
|
|
13
|
+
| # | Question | Notes |
|
|
14
|
+
|---|----------|-------|
|
|
15
|
+
| 1 | **Solo or multi-agent?** | Determines roles configuration |
|
|
16
|
+
| 2 | **World selection?** | New world → use flat world (ground Y = -60). Existing world → run `kradle world list` |
|
|
17
|
+
| 3 | **Research goal?** | Agent efficiency? Emergent behavior? Collaboration vs competition? |
|
|
18
|
+
| 4 | **Expected insights?** | What would be surprising or interesting to discover? |
|
|
19
|
+
| 5 | **Success conditions?** | When does an agent "win"? |
|
|
20
|
+
| 6 | **Optimization target?** | What metric should agents optimize? |
|
|
21
|
+
| 7 | **Possible end states?** | All ways a run can terminate |
|
|
22
|
+
| 8 | **Initial positioning?** | Where should agents spawn? |
|
|
23
|
+
|
|
24
|
+
#### Objective Configuration Reference
|
|
25
|
+
|
|
26
|
+
**`objective.fieldName`** (what to measure):
|
|
27
|
+
|
|
28
|
+
| Value | Description |
|
|
29
|
+
|-------|-------------|
|
|
30
|
+
| `successful_run_count` | Number of successful runs |
|
|
31
|
+
| `success_rate` | Percentage of successful runs |
|
|
32
|
+
| `min_time_to_success` | Fastest completion time |
|
|
33
|
+
| `max_time_to_success` | Slowest completion time |
|
|
34
|
+
| `mean_time_to_success` | Average completion time |
|
|
35
|
+
| `min_score` | Lowest score achieved |
|
|
36
|
+
| `max_score` | Highest score achieved |
|
|
37
|
+
| `mean_score` | Average score |
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
**`objective.direction`**: `minimize` or `maximize`
|
|
41
|
+
|
|
42
|
+
**`objective.thresholdFieldName`**: `runCount`
|
|
43
|
+
|
|
44
|
+
> **Important:** If optimizing for score, define the score logic and report it with `main_score.set(value);`
|
|
45
|
+
|
|
46
|
+
**Important**: roles should just be alphanumeric, no hypen no underscore
|
|
47
|
+
|
|
48
|
+
### Things to consider when building a challenge:
|
|
49
|
+
|
|
50
|
+
- Agents don't "see" the world with images, they get JSON observations about the world, so if you want them to take a specific path, you need to put a marker and tell the agent to follow that marker. The agent will use A* algorithm to go to the marker.
|
|
51
|
+
|
|
52
|
+
- If you want your marker to be configurable by the user, use the location mechanism: create a location for each marker, and reference these locations by id in your code.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
### Phase 2: Challenge Building
|
|
57
|
+
|
|
58
|
+
#### Build Checklist
|
|
59
|
+
|
|
60
|
+
1. **Create challenge** using CLI:
|
|
61
|
+
```bash
|
|
62
|
+
kradle challenge create <name>
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
2. **Run TypeScript checks** to ensure compilation:
|
|
66
|
+
```bash
|
|
67
|
+
npx tsc --noEmit
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
3. **Build and inspect datapack**:
|
|
71
|
+
```bash
|
|
72
|
+
npx tsx challenges/<name>/challenge.ts
|
|
73
|
+
```
|
|
74
|
+
Review generated `.mcfunction` files in `kradle-studio/challenges/<name>/datapack/`
|
|
75
|
+
|
|
76
|
+
IMPORTANT: do not change datapack files directly, always edit `challenge.ts` file
|
|
77
|
+
|
|
78
|
+
4. **Update the `description` field in config.ts** with a markdown summary containing:
|
|
79
|
+
- Overview (what the challenge is)
|
|
80
|
+
- Setup (world, agents, duration)
|
|
81
|
+
- Mechanics (how the challenge works)
|
|
82
|
+
- Objective (what to optimize)
|
|
83
|
+
- Research goal (what insights we're looking for)
|
|
84
|
+
- What would be fun, suprising, bewildering to observe (what would go viral)
|
|
85
|
+
- End states (all ways the challenge can end)
|
|
86
|
+
|
|
87
|
+
> **Important:** Always update this field when mechanics change during development.
|
|
88
|
+
|
|
89
|
+
5. **locations**
|
|
90
|
+
Worlds have locations. you can find out able them by running `kradle world info <world_slug>`
|
|
91
|
+
Make sure you use these coordinates when building things.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
### Phase 3: Challenge Testing
|
|
96
|
+
|
|
97
|
+
#### Test Run Configuration
|
|
98
|
+
- **Agent:** Use `gemini-2-5-flash` for all test runs
|
|
99
|
+
- by default don't use the --no-open flag so the user can follow the runs
|
|
100
|
+
- you can also ask the user if they want to record the run, in which case you'll add a --record to the `kradle challenge run` command
|
|
101
|
+
|
|
102
|
+
#### Test Checklist
|
|
103
|
+
|
|
104
|
+
1. **Check spawn positions** — Review `initial_state` observation in run logs to confirm agents spawn at expected coordinates
|
|
105
|
+
|
|
106
|
+
2. **Verify world setup** — `kradle challenge run <challenge_slug> --screenshot` will generate screenshots of the scene. you will find the url of the screenshots in the logs. use these screenshots to confirm that you have built the right structures and that the agents are spawned at the right location.
|
|
107
|
+
|
|
108
|
+
3. **Validate success conditions** — Run tests to confirm win conditions trigger correctly
|
|
109
|
+
|
|
110
|
+
4. **Validate end states** — Test each possible end state fires appropriately
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
#### Debugging Tips
|
|
114
|
+
|
|
115
|
+
- Use `Actions.log_variable()` to create debug traces
|
|
116
|
+
- As soon as the challenge starts, download the logs and start looking into them
|
|
117
|
+
```bash
|
|
118
|
+
kradle challenge runs get <run_id>
|
|
119
|
+
```
|
|
120
|
+
- if the challenge doesn't end at the maximum set duration or on certain conditions, it is likely that the datapack failed at runtime and the timer doesn't work
|
|
121
|
+
- if you don't see the structure you think you've built, it could be that the coordinates are off, or again that the datapack failed at runtime
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
## Challenge Edition
|
|
125
|
+
|
|
126
|
+
When editing a challenge first look at the `description` field in the `config.ts` file for instructions.
|
|
127
|
+
|
|
128
|
+
If the challenge is remixed from another challenge, ask the user what they want to change/update, and why?
|
|
129
|
+
|
|
130
|
+
Otherwise ask the user where they want to start. It's a good idea to look for past runs to understand what was the summary of these runs, and suggest further directions.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Kradle Challenge Development Guide
|
|
2
2
|
|
|
3
|
-
## API
|
|
3
|
+
## API, CLI, Workflow References
|
|
4
4
|
|
|
5
|
-
Always read
|
|
5
|
+
Always read all 3 references before starting:
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
8
|
# CLI reference (commands, flags, workflows)
|
|
@@ -10,108 +10,13 @@ kradle ai-docs cli
|
|
|
10
10
|
|
|
11
11
|
# SDK reference (@kradle/challenges-sdk package documentation)
|
|
12
12
|
kradle ai-docs challenges-sdk
|
|
13
|
+
|
|
14
|
+
# Challenge creation and edition workflows
|
|
15
|
+
kradle ai-docs workflow
|
|
13
16
|
```
|
|
14
17
|
|
|
15
18
|
**Key principles:**
|
|
16
19
|
- Use CLI commands to create challenges/experiments (avoid creating from scratch)
|
|
17
20
|
- Comment all generated code for clarity
|
|
18
21
|
|
|
19
|
-
---
|
|
20
|
-
|
|
21
|
-
## Workflow
|
|
22
|
-
|
|
23
|
-
### Phase 1: Challenge Definition
|
|
24
|
-
|
|
25
|
-
Gather requirements by asking these questions. Save answers in a markdown summary in the challenge's `config.ts` description field.
|
|
26
|
-
|
|
27
|
-
#### Questions to Ask
|
|
28
|
-
|
|
29
|
-
| # | Question | Notes |
|
|
30
|
-
|---|----------|-------|
|
|
31
|
-
| 1 | **Solo or multi-agent?** | Determines roles configuration |
|
|
32
|
-
| 2 | **World selection?** | New world → use flat world (ground Y = -60). Existing world → run `kradle world list` |
|
|
33
|
-
| 3 | **Research goal?** | Agent efficiency? Emergent behavior? Collaboration vs competition? |
|
|
34
|
-
| 4 | **Expected insights?** | What would be surprising or interesting to discover? |
|
|
35
|
-
| 5 | **Success conditions?** | When does an agent "win"? |
|
|
36
|
-
| 6 | **Optimization target?** | What metric should agents optimize? |
|
|
37
|
-
| 7 | **Possible end states?** | All ways a run can terminate |
|
|
38
|
-
| 8 | **Initial positioning?** | Where should agents spawn? |
|
|
39
|
-
|
|
40
|
-
#### Objective Configuration Reference
|
|
41
|
-
|
|
42
|
-
**`objective.fieldName`** (what to measure):
|
|
43
|
-
|
|
44
|
-
| Value | Description |
|
|
45
|
-
|-------|-------------|
|
|
46
|
-
| `successfulRunCount` | Number of successful runs |
|
|
47
|
-
| `successRate` | Percentage of successful runs |
|
|
48
|
-
| `minTimeToSuccess` | Fastest completion time |
|
|
49
|
-
| `maxTimeToSuccess` | Slowest completion time |
|
|
50
|
-
| `meanTimeToSuccess` | Average completion time |
|
|
51
|
-
| `minScore` | Lowest score achieved |
|
|
52
|
-
| `maxScore` | Highest score achieved |
|
|
53
|
-
| `meanScore` | Average score |
|
|
54
|
-
|
|
55
|
-
**`objective.direction`**: `minimize` or `maximize`
|
|
56
|
-
|
|
57
|
-
**`objective.thresholdFieldName`**: `runCount`
|
|
58
|
-
|
|
59
|
-
> **Important:** If optimizing for score, define the score logic and report it with `main_score.set(value);`
|
|
60
|
-
|
|
61
|
-
**Important**: roles should just be alphanumeric, no hypen no underscore
|
|
62
|
-
|
|
63
|
-
---
|
|
64
|
-
|
|
65
|
-
### Phase 2: Challenge Building
|
|
66
|
-
|
|
67
|
-
#### Build Checklist
|
|
68
|
-
|
|
69
|
-
1. **Create challenge** using CLI:
|
|
70
|
-
```bash
|
|
71
|
-
kradle challenge create <name>
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
2. **Run TypeScript checks** to ensure compilation:
|
|
75
|
-
```bash
|
|
76
|
-
npx tsc --noEmit
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
3. **Build and inspect datapack**:
|
|
80
|
-
```bash
|
|
81
|
-
npx tsx challenges/<name>/challenge.ts
|
|
82
|
-
```
|
|
83
|
-
Review generated `.mcfunction` files in `kradle-studio/challenges/<name>/datapack/`
|
|
84
|
-
|
|
85
|
-
4. **Make sure the description field in config.ts is up to date**
|
|
86
|
-
|
|
87
|
-
5. **locations**
|
|
88
|
-
Worlds have locations. you can find out able them by running `kradle world info <world_slug>`
|
|
89
|
-
Make sure you use these coordinates when building things.
|
|
90
|
-
|
|
91
|
-
---
|
|
92
|
-
|
|
93
|
-
### Phase 3: Challenge Testing
|
|
94
|
-
|
|
95
|
-
#### Test Configuration
|
|
96
|
-
- **Agent:** Use `gemini-2-5-flash` for all test runs
|
|
97
|
-
|
|
98
|
-
#### Test Checklist
|
|
99
|
-
|
|
100
|
-
1. **Check spawn positions** — Review `initial_state` observation in run logs to confirm agents spawn at expected coordinates
|
|
101
|
-
|
|
102
|
-
2. **Verify world setup** — `kradle challenge run <challenge_slug> --screenshot` will generate screenshots of the scene. you will find the url of the screenshots in the logs. use these screenshots to confirm that you have built the right structures and that the agents are spawned at the right location.
|
|
103
|
-
|
|
104
|
-
3. **Validate success conditions** — Run tests to confirm win conditions trigger correctly
|
|
105
|
-
|
|
106
|
-
4. **Validate end states** — Test each possible end state fires appropriately
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
#### Debugging Tips
|
|
110
|
-
|
|
111
|
-
- Use `Actions.log_variable()` to create debug traces
|
|
112
|
-
- As soon as the challenge starts, download the logs and start looking into them
|
|
113
|
-
```bash
|
|
114
|
-
kradle challenge runs get <run_id>
|
|
115
|
-
```
|
|
116
|
-
- if the challenge doesn't end at the maximum set duration or on certain conditions, it is likely that the datapack failed at runtime and the timer doesn't work
|
|
117
|
-
- if you don't see the structure you think you've built, it could be that the coordinates are off, or again that the datapack failed at runtime
|
|
22
|
+
---
|