@fastino-ai/pioneer-cli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -22
- package/bun.lock +82 -0
- package/cache/cache.db +0 -0
- package/cache/cache.db-shm +0 -0
- package/cache/cache.db-wal +0 -0
- package/fastino-ai-pioneer-cli-0.2.0.tgz +0 -0
- package/package.json +6 -3
- package/src/agent/Agent.ts +342 -0
- package/src/agent/BudgetManager.ts +167 -0
- package/src/agent/FileResolver.ts +321 -0
- package/src/agent/LLMClient.ts +435 -0
- package/src/agent/ToolRegistry.ts +97 -0
- package/src/agent/index.ts +15 -0
- package/src/agent/types.ts +84 -0
- package/src/chat/ChatApp.tsx +701 -0
- package/src/chat/index.ts +7 -0
- package/src/config.ts +185 -3
- package/src/evolution/EvalRunner.ts +301 -0
- package/src/evolution/EvolutionEngine.ts +319 -0
- package/src/evolution/FeedbackCollector.ts +197 -0
- package/src/evolution/ModelTrainer.ts +371 -0
- package/src/evolution/index.ts +18 -0
- package/src/evolution/types.ts +110 -0
- package/src/index.tsx +101 -2
- package/src/tools/bash.ts +184 -0
- package/src/tools/filesystem.ts +444 -0
- package/src/tools/index.ts +29 -0
- package/src/tools/modal.ts +269 -0
- package/src/tools/sandbox.ts +310 -0
- package/src/tools/training.ts +443 -0
- package/src/tools/wandb.ts +348 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Weights & Biases integration for experiment tracking
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { spawn } from "child_process";
|
|
6
|
+
import type { Tool, ToolResult } from "../agent/types.js";
|
|
7
|
+
|
|
8
|
+
export interface WandbConfig {
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
entity?: string;
|
|
11
|
+
project?: string;
|
|
12
|
+
timeout?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// Check if wandb CLI is available
|
|
16
|
+
export async function isWandbAvailable(): Promise<boolean> {
|
|
17
|
+
return new Promise((resolve) => {
|
|
18
|
+
const proc = spawn("wandb", ["--version"], { stdio: "pipe" });
|
|
19
|
+
proc.on("close", (code) => resolve(code === 0));
|
|
20
|
+
proc.on("error", () => resolve(false));
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Run wandb command
|
|
25
|
+
async function runWandb(
|
|
26
|
+
args: string[],
|
|
27
|
+
options: WandbConfig = {}
|
|
28
|
+
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
|
|
29
|
+
const timeout = options.timeout || 60000;
|
|
30
|
+
|
|
31
|
+
return new Promise((resolve) => {
|
|
32
|
+
let stdout = "";
|
|
33
|
+
let stderr = "";
|
|
34
|
+
let timedOut = false;
|
|
35
|
+
|
|
36
|
+
const env: NodeJS.ProcessEnv = { ...process.env };
|
|
37
|
+
if (options.apiKey) env.WANDB_API_KEY = options.apiKey;
|
|
38
|
+
if (options.entity) env.WANDB_ENTITY = options.entity;
|
|
39
|
+
if (options.project) env.WANDB_PROJECT = options.project;
|
|
40
|
+
|
|
41
|
+
const proc = spawn("wandb", args, {
|
|
42
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
43
|
+
env,
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const timer = setTimeout(() => {
|
|
47
|
+
timedOut = true;
|
|
48
|
+
proc.kill("SIGTERM");
|
|
49
|
+
}, timeout);
|
|
50
|
+
|
|
51
|
+
proc.stdout.on("data", (data: Buffer) => {
|
|
52
|
+
stdout += data.toString();
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
proc.stderr.on("data", (data: Buffer) => {
|
|
56
|
+
stderr += data.toString();
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
proc.on("close", (code) => {
|
|
60
|
+
clearTimeout(timer);
|
|
61
|
+
resolve({
|
|
62
|
+
stdout,
|
|
63
|
+
stderr,
|
|
64
|
+
exitCode: timedOut ? -1 : (code ?? -1),
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
proc.on("error", (err) => {
|
|
69
|
+
clearTimeout(timer);
|
|
70
|
+
resolve({
|
|
71
|
+
stdout: "",
|
|
72
|
+
stderr: err.message,
|
|
73
|
+
exitCode: -1,
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function createWandbLogTool(config: WandbConfig = {}): Tool {
|
|
80
|
+
return {
|
|
81
|
+
name: "wandb_log",
|
|
82
|
+
description: "Log metrics, artifacts, or data to Weights & Biases for experiment tracking.",
|
|
83
|
+
parameters: [
|
|
84
|
+
{
|
|
85
|
+
name: "run_name",
|
|
86
|
+
type: "string",
|
|
87
|
+
description: "Name for the W&B run",
|
|
88
|
+
required: true,
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "metrics",
|
|
92
|
+
type: "object",
|
|
93
|
+
description: "Metrics to log as JSON object (e.g., {\"loss\": 0.5, \"accuracy\": 0.9})",
|
|
94
|
+
required: true,
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "project",
|
|
98
|
+
type: "string",
|
|
99
|
+
description: "W&B project name (optional, uses config default)",
|
|
100
|
+
required: false,
|
|
101
|
+
},
|
|
102
|
+
],
|
|
103
|
+
|
|
104
|
+
async execute(args: Record<string, unknown>): Promise<ToolResult> {
|
|
105
|
+
const runName = args.run_name as string;
|
|
106
|
+
const metrics = args.metrics as Record<string, unknown>;
|
|
107
|
+
const project = (args.project as string) || config.project;
|
|
108
|
+
|
|
109
|
+
if (!runName || !metrics) {
|
|
110
|
+
return {
|
|
111
|
+
toolCallId: "",
|
|
112
|
+
output: "",
|
|
113
|
+
success: false,
|
|
114
|
+
error: "run_name and metrics are required",
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const available = await isWandbAvailable();
|
|
119
|
+
if (!available) {
|
|
120
|
+
return {
|
|
121
|
+
toolCallId: "",
|
|
122
|
+
output: "",
|
|
123
|
+
success: false,
|
|
124
|
+
error: "wandb CLI not installed. Run: pip install wandb",
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Create a Python script to log to wandb
|
|
129
|
+
const fs = await import("fs");
|
|
130
|
+
const path = await import("path");
|
|
131
|
+
const os = await import("os");
|
|
132
|
+
|
|
133
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "wandb-"));
|
|
134
|
+
const tempFile = path.join(tempDir, "log_metrics.py");
|
|
135
|
+
|
|
136
|
+
const pythonCode = `
|
|
137
|
+
import wandb
|
|
138
|
+
import json
|
|
139
|
+
|
|
140
|
+
metrics = json.loads('''${JSON.stringify(metrics)}''')
|
|
141
|
+
run = wandb.init(
|
|
142
|
+
project="${project || "pioneer-agent"}",
|
|
143
|
+
name="${runName}",
|
|
144
|
+
reinit=True
|
|
145
|
+
)
|
|
146
|
+
wandb.log(metrics)
|
|
147
|
+
wandb.finish()
|
|
148
|
+
print(f"Logged metrics to W&B run: {run.url}")
|
|
149
|
+
`;
|
|
150
|
+
|
|
151
|
+
fs.writeFileSync(tempFile, pythonCode);
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
const env: NodeJS.ProcessEnv = { ...process.env };
|
|
155
|
+
if (config.apiKey) env.WANDB_API_KEY = config.apiKey;
|
|
156
|
+
if (config.entity) env.WANDB_ENTITY = config.entity;
|
|
157
|
+
|
|
158
|
+
return new Promise((resolve) => {
|
|
159
|
+
let stdout = "";
|
|
160
|
+
let stderr = "";
|
|
161
|
+
|
|
162
|
+
const proc = spawn("python3", [tempFile], {
|
|
163
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
164
|
+
env,
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
proc.stdout.on("data", (data: Buffer) => {
|
|
168
|
+
stdout += data.toString();
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
proc.stderr.on("data", (data: Buffer) => {
|
|
172
|
+
stderr += data.toString();
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
proc.on("close", (code) => {
|
|
176
|
+
let output = stdout;
|
|
177
|
+
if (stderr && !stderr.includes("wandb:")) {
|
|
178
|
+
output += (output ? "\n" : "") + stderr;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
resolve({
|
|
182
|
+
toolCallId: "",
|
|
183
|
+
output: output || "Metrics logged successfully",
|
|
184
|
+
success: code === 0,
|
|
185
|
+
error: code !== 0 ? `Exit code: ${code}` : undefined,
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
proc.on("error", (err) => {
|
|
190
|
+
resolve({
|
|
191
|
+
toolCallId: "",
|
|
192
|
+
output: "",
|
|
193
|
+
success: false,
|
|
194
|
+
error: err.message,
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
} finally {
|
|
199
|
+
try {
|
|
200
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
201
|
+
} catch {
|
|
202
|
+
// Ignore
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
},
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export function createWandbQueryTool(config: WandbConfig = {}): Tool {
|
|
210
|
+
return {
|
|
211
|
+
name: "wandb_query",
|
|
212
|
+
description: "Query runs and metrics from Weights & Biases.",
|
|
213
|
+
parameters: [
|
|
214
|
+
{
|
|
215
|
+
name: "project",
|
|
216
|
+
type: "string",
|
|
217
|
+
description: "W&B project name to query",
|
|
218
|
+
required: true,
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
name: "filters",
|
|
222
|
+
type: "object",
|
|
223
|
+
description: "Filters for the query as JSON (e.g., {\"state\": \"finished\"})",
|
|
224
|
+
required: false,
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
name: "limit",
|
|
228
|
+
type: "number",
|
|
229
|
+
description: "Maximum number of runs to return (default: 10)",
|
|
230
|
+
required: false,
|
|
231
|
+
},
|
|
232
|
+
],
|
|
233
|
+
|
|
234
|
+
async execute(args: Record<string, unknown>): Promise<ToolResult> {
|
|
235
|
+
const project = args.project as string;
|
|
236
|
+
const filters = (args.filters as Record<string, unknown>) || {};
|
|
237
|
+
const limit = (args.limit as number) || 10;
|
|
238
|
+
|
|
239
|
+
if (!project) {
|
|
240
|
+
return {
|
|
241
|
+
toolCallId: "",
|
|
242
|
+
output: "",
|
|
243
|
+
success: false,
|
|
244
|
+
error: "project is required",
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const available = await isWandbAvailable();
|
|
249
|
+
if (!available) {
|
|
250
|
+
return {
|
|
251
|
+
toolCallId: "",
|
|
252
|
+
output: "",
|
|
253
|
+
success: false,
|
|
254
|
+
error: "wandb CLI not installed. Run: pip install wandb",
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const fs = await import("fs");
|
|
259
|
+
const path = await import("path");
|
|
260
|
+
const os = await import("os");
|
|
261
|
+
|
|
262
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "wandb-query-"));
|
|
263
|
+
const tempFile = path.join(tempDir, "query_runs.py");
|
|
264
|
+
|
|
265
|
+
const entity = config.entity ? `"${config.entity}"` : "None";
|
|
266
|
+
const filtersJson = JSON.stringify(filters);
|
|
267
|
+
|
|
268
|
+
const pythonCode = `
|
|
269
|
+
import wandb
|
|
270
|
+
import json
|
|
271
|
+
|
|
272
|
+
api = wandb.Api()
|
|
273
|
+
entity = ${entity}
|
|
274
|
+
project = "${project}"
|
|
275
|
+
|
|
276
|
+
path = f"{entity}/{project}" if entity else project
|
|
277
|
+
runs = api.runs(path, filters=json.loads('${filtersJson}'))
|
|
278
|
+
|
|
279
|
+
results = []
|
|
280
|
+
for run in list(runs)[:${limit}]:
|
|
281
|
+
results.append({
|
|
282
|
+
"id": run.id,
|
|
283
|
+
"name": run.name,
|
|
284
|
+
"state": run.state,
|
|
285
|
+
"url": run.url,
|
|
286
|
+
"summary": dict(run.summary),
|
|
287
|
+
"config": dict(run.config),
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
print(json.dumps(results, indent=2, default=str))
|
|
291
|
+
`;
|
|
292
|
+
|
|
293
|
+
fs.writeFileSync(tempFile, pythonCode);
|
|
294
|
+
|
|
295
|
+
try {
|
|
296
|
+
const env: NodeJS.ProcessEnv = { ...process.env };
|
|
297
|
+
if (config.apiKey) env.WANDB_API_KEY = config.apiKey;
|
|
298
|
+
|
|
299
|
+
return new Promise((resolve) => {
|
|
300
|
+
let stdout = "";
|
|
301
|
+
let stderr = "";
|
|
302
|
+
|
|
303
|
+
const proc = spawn("python3", [tempFile], {
|
|
304
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
305
|
+
env,
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
proc.stdout.on("data", (data: Buffer) => {
|
|
309
|
+
stdout += data.toString();
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
proc.stderr.on("data", (data: Buffer) => {
|
|
313
|
+
stderr += data.toString();
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
proc.on("close", (code) => {
|
|
317
|
+
resolve({
|
|
318
|
+
toolCallId: "",
|
|
319
|
+
output: stdout || stderr || "(no results)",
|
|
320
|
+
success: code === 0,
|
|
321
|
+
error: code !== 0 ? `Exit code: ${code}` : undefined,
|
|
322
|
+
});
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
proc.on("error", (err) => {
|
|
326
|
+
resolve({
|
|
327
|
+
toolCallId: "",
|
|
328
|
+
output: "",
|
|
329
|
+
success: false,
|
|
330
|
+
error: err.message,
|
|
331
|
+
});
|
|
332
|
+
});
|
|
333
|
+
});
|
|
334
|
+
} finally {
|
|
335
|
+
try {
|
|
336
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
337
|
+
} catch {
|
|
338
|
+
// Ignore
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
},
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
export function createWandbTools(config: WandbConfig = {}): Tool[] {
|
|
346
|
+
return [createWandbLogTool(config), createWandbQueryTool(config)];
|
|
347
|
+
}
|
|
348
|
+
|