@fastino-ai/pioneer-cli 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.cursor/rules/api-documentation.mdc +14 -0
- package/.cursor/rules/backend-location-rule.mdc +5 -0
- package/Medical_NER_Dataset_1.jsonl +50 -0
- package/README.md +4 -1
- package/bun.lock +52 -0
- package/package.json +5 -2
- package/src/api.ts +551 -22
- package/src/chat/ChatApp.tsx +548 -263
- package/src/client/ToolExecutor.ts +175 -0
- package/src/client/WebSocketClient.ts +333 -0
- package/src/client/index.ts +2 -0
- package/src/config.ts +49 -139
- package/src/index.tsx +796 -106
- package/src/telemetry.ts +173 -0
- package/src/tests/config.test.ts +19 -0
- package/src/tools/bash.ts +1 -1
- package/src/tools/filesystem.ts +1 -1
- package/src/tools/index.ts +2 -9
- package/src/tools/sandbox.ts +1 -1
- package/src/tools/types.ts +25 -0
- package/src/utils/index.ts +6 -0
- package/fastino-ai-pioneer-cli-0.2.0.tgz +0 -0
- package/ner_dataset.json +0 -111
- package/src/agent/Agent.ts +0 -342
- package/src/agent/BudgetManager.ts +0 -167
- package/src/agent/LLMClient.ts +0 -435
- package/src/agent/ToolRegistry.ts +0 -97
- package/src/agent/index.ts +0 -15
- package/src/agent/types.ts +0 -84
- package/src/evolution/EvalRunner.ts +0 -301
- package/src/evolution/EvolutionEngine.ts +0 -319
- package/src/evolution/FeedbackCollector.ts +0 -197
- package/src/evolution/ModelTrainer.ts +0 -371
- package/src/evolution/index.ts +0 -18
- package/src/evolution/types.ts +0 -110
- package/src/tools/modal.ts +0 -269
- package/src/tools/training.ts +0 -443
- package/src/tools/wandb.ts +0 -348
- /package/src/{agent → utils}/FileResolver.ts +0 -0
package/src/tools/modal.ts
DELETED
|
@@ -1,269 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Modal.com integration for serverless ML workloads
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { spawn } from "child_process";
|
|
6
|
-
import type { Tool, ToolResult } from "../agent/types.js";
|
|
7
|
-
|
|
8
|
-
export interface ModalConfig {
|
|
9
|
-
tokenId?: string;
|
|
10
|
-
tokenSecret?: string;
|
|
11
|
-
timeout?: number;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
// Check if Modal CLI is available
|
|
15
|
-
export async function isModalAvailable(): Promise<boolean> {
|
|
16
|
-
return new Promise((resolve) => {
|
|
17
|
-
const proc = spawn("modal", ["--version"], { stdio: "pipe" });
|
|
18
|
-
proc.on("close", (code) => resolve(code === 0));
|
|
19
|
-
proc.on("error", () => resolve(false));
|
|
20
|
-
});
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
// Run a Modal function
|
|
24
|
-
async function runModal(
|
|
25
|
-
args: string[],
|
|
26
|
-
options: ModalConfig = {}
|
|
27
|
-
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
|
|
28
|
-
const timeout = options.timeout || 300000; // 5 min default
|
|
29
|
-
|
|
30
|
-
return new Promise((resolve) => {
|
|
31
|
-
let stdout = "";
|
|
32
|
-
let stderr = "";
|
|
33
|
-
let timedOut = false;
|
|
34
|
-
|
|
35
|
-
const env: NodeJS.ProcessEnv = { ...process.env };
|
|
36
|
-
if (options.tokenId) env.MODAL_TOKEN_ID = options.tokenId;
|
|
37
|
-
if (options.tokenSecret) env.MODAL_TOKEN_SECRET = options.tokenSecret;
|
|
38
|
-
|
|
39
|
-
const proc = spawn("modal", args, {
|
|
40
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
41
|
-
env,
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
const timer = setTimeout(() => {
|
|
45
|
-
timedOut = true;
|
|
46
|
-
proc.kill("SIGTERM");
|
|
47
|
-
}, timeout);
|
|
48
|
-
|
|
49
|
-
proc.stdout.on("data", (data: Buffer) => {
|
|
50
|
-
stdout += data.toString();
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
proc.stderr.on("data", (data: Buffer) => {
|
|
54
|
-
stderr += data.toString();
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
proc.on("close", (code) => {
|
|
58
|
-
clearTimeout(timer);
|
|
59
|
-
resolve({
|
|
60
|
-
stdout,
|
|
61
|
-
stderr,
|
|
62
|
-
exitCode: timedOut ? -1 : (code ?? -1),
|
|
63
|
-
});
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
proc.on("error", (err) => {
|
|
67
|
-
clearTimeout(timer);
|
|
68
|
-
resolve({
|
|
69
|
-
stdout: "",
|
|
70
|
-
stderr: err.message,
|
|
71
|
-
exitCode: -1,
|
|
72
|
-
});
|
|
73
|
-
});
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
export function createModalRunTool(config: ModalConfig = {}): Tool {
|
|
78
|
-
return {
|
|
79
|
-
name: "modal_run",
|
|
80
|
-
description: "Run a Python function on Modal.com serverless GPU/CPU infrastructure. Useful for ML inference, training, and compute-intensive tasks.",
|
|
81
|
-
parameters: [
|
|
82
|
-
{
|
|
83
|
-
name: "code",
|
|
84
|
-
type: "string",
|
|
85
|
-
description: "Python code with Modal app definition. Must include @app.function decorator.",
|
|
86
|
-
required: true,
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
name: "function_name",
|
|
90
|
-
type: "string",
|
|
91
|
-
description: "Name of the Modal function to run (the one with @app.local_entrypoint or to call)",
|
|
92
|
-
required: true,
|
|
93
|
-
},
|
|
94
|
-
{
|
|
95
|
-
name: "args",
|
|
96
|
-
type: "string",
|
|
97
|
-
description: "Arguments to pass to the function (as JSON string)",
|
|
98
|
-
required: false,
|
|
99
|
-
},
|
|
100
|
-
],
|
|
101
|
-
|
|
102
|
-
async execute(args: Record<string, unknown>): Promise<ToolResult> {
|
|
103
|
-
const code = args.code as string;
|
|
104
|
-
const functionName = args.function_name as string;
|
|
105
|
-
const funcArgs = args.args as string;
|
|
106
|
-
|
|
107
|
-
if (!code || !functionName) {
|
|
108
|
-
return {
|
|
109
|
-
toolCallId: "",
|
|
110
|
-
output: "",
|
|
111
|
-
success: false,
|
|
112
|
-
error: "code and function_name are required",
|
|
113
|
-
};
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// Check if Modal is available
|
|
117
|
-
const available = await isModalAvailable();
|
|
118
|
-
if (!available) {
|
|
119
|
-
return {
|
|
120
|
-
toolCallId: "",
|
|
121
|
-
output: "",
|
|
122
|
-
success: false,
|
|
123
|
-
error: "Modal CLI not installed. Run: pip install modal",
|
|
124
|
-
};
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// Create temp file with the code
|
|
128
|
-
const fs = await import("fs");
|
|
129
|
-
const path = await import("path");
|
|
130
|
-
const os = await import("os");
|
|
131
|
-
|
|
132
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "modal-"));
|
|
133
|
-
const tempFile = path.join(tempDir, "app.py");
|
|
134
|
-
|
|
135
|
-
// Ensure the code has proper Modal imports
|
|
136
|
-
let fullCode = code;
|
|
137
|
-
if (!code.includes("import modal")) {
|
|
138
|
-
fullCode = "import modal\n\n" + code;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
fs.writeFileSync(tempFile, fullCode);
|
|
142
|
-
|
|
143
|
-
try {
|
|
144
|
-
// Run the Modal function
|
|
145
|
-
const runArgs = ["run", tempFile];
|
|
146
|
-
if (functionName) {
|
|
147
|
-
runArgs.push("::" + functionName);
|
|
148
|
-
}
|
|
149
|
-
if (funcArgs) {
|
|
150
|
-
try {
|
|
151
|
-
const parsed = JSON.parse(funcArgs);
|
|
152
|
-
for (const [key, value] of Object.entries(parsed)) {
|
|
153
|
-
runArgs.push(`--${key}`, String(value));
|
|
154
|
-
}
|
|
155
|
-
} catch {
|
|
156
|
-
// Ignore JSON parse errors
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
const result = await runModal(runArgs, config);
|
|
161
|
-
|
|
162
|
-
let output = result.stdout;
|
|
163
|
-
if (result.stderr) {
|
|
164
|
-
output += (output ? "\n\nStderr:\n" : "") + result.stderr;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return {
|
|
168
|
-
toolCallId: "",
|
|
169
|
-
output: output || "(no output)",
|
|
170
|
-
success: result.exitCode === 0,
|
|
171
|
-
error: result.exitCode !== 0 ? `Exit code: ${result.exitCode}` : undefined,
|
|
172
|
-
};
|
|
173
|
-
} finally {
|
|
174
|
-
// Cleanup
|
|
175
|
-
try {
|
|
176
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
177
|
-
} catch {
|
|
178
|
-
// Ignore cleanup errors
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
},
|
|
182
|
-
};
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
export function createModalDeployTool(config: ModalConfig = {}): Tool {
|
|
186
|
-
return {
|
|
187
|
-
name: "modal_deploy",
|
|
188
|
-
description: "Deploy a Modal app for persistent endpoints or scheduled jobs.",
|
|
189
|
-
parameters: [
|
|
190
|
-
{
|
|
191
|
-
name: "code",
|
|
192
|
-
type: "string",
|
|
193
|
-
description: "Python code with Modal app definition",
|
|
194
|
-
required: true,
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
name: "app_name",
|
|
198
|
-
type: "string",
|
|
199
|
-
description: "Name for the deployed app",
|
|
200
|
-
required: true,
|
|
201
|
-
},
|
|
202
|
-
],
|
|
203
|
-
|
|
204
|
-
async execute(args: Record<string, unknown>): Promise<ToolResult> {
|
|
205
|
-
const code = args.code as string;
|
|
206
|
-
const appName = args.app_name as string;
|
|
207
|
-
|
|
208
|
-
if (!code || !appName) {
|
|
209
|
-
return {
|
|
210
|
-
toolCallId: "",
|
|
211
|
-
output: "",
|
|
212
|
-
success: false,
|
|
213
|
-
error: "code and app_name are required",
|
|
214
|
-
};
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
const available = await isModalAvailable();
|
|
218
|
-
if (!available) {
|
|
219
|
-
return {
|
|
220
|
-
toolCallId: "",
|
|
221
|
-
output: "",
|
|
222
|
-
success: false,
|
|
223
|
-
error: "Modal CLI not installed. Run: pip install modal",
|
|
224
|
-
};
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
const fs = await import("fs");
|
|
228
|
-
const path = await import("path");
|
|
229
|
-
const os = await import("os");
|
|
230
|
-
|
|
231
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "modal-deploy-"));
|
|
232
|
-
const tempFile = path.join(tempDir, "app.py");
|
|
233
|
-
|
|
234
|
-
let fullCode = code;
|
|
235
|
-
if (!code.includes("import modal")) {
|
|
236
|
-
fullCode = "import modal\n\n" + code;
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
fs.writeFileSync(tempFile, fullCode);
|
|
240
|
-
|
|
241
|
-
try {
|
|
242
|
-
const result = await runModal(["deploy", tempFile, "--name", appName], config);
|
|
243
|
-
|
|
244
|
-
let output = result.stdout;
|
|
245
|
-
if (result.stderr) {
|
|
246
|
-
output += (output ? "\n\nStderr:\n" : "") + result.stderr;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
return {
|
|
250
|
-
toolCallId: "",
|
|
251
|
-
output: output || "(no output)",
|
|
252
|
-
success: result.exitCode === 0,
|
|
253
|
-
error: result.exitCode !== 0 ? `Exit code: ${result.exitCode}` : undefined,
|
|
254
|
-
};
|
|
255
|
-
} finally {
|
|
256
|
-
try {
|
|
257
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
258
|
-
} catch {
|
|
259
|
-
// Ignore
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
},
|
|
263
|
-
};
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
export function createModalTools(config: ModalConfig = {}): Tool[] {
|
|
267
|
-
return [createModalRunTool(config), createModalDeployTool(config)];
|
|
268
|
-
}
|
|
269
|
-
|
package/src/tools/training.ts
DELETED
|
@@ -1,443 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Model training orchestration tool
|
|
3
|
-
* Combines Modal for compute and W&B for tracking
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import type { Tool, ToolResult } from "../agent/types.js";
|
|
7
|
-
import { spawn } from "child_process";
|
|
8
|
-
import * as fs from "fs";
|
|
9
|
-
import * as path from "path";
|
|
10
|
-
import * as os from "os";
|
|
11
|
-
|
|
12
|
-
export interface TrainingConfig {
|
|
13
|
-
modalTokenId?: string;
|
|
14
|
-
modalTokenSecret?: string;
|
|
15
|
-
wandbApiKey?: string;
|
|
16
|
-
wandbEntity?: string;
|
|
17
|
-
defaultProject?: string;
|
|
18
|
-
timeout?: number;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export function createTrainingTool(config: TrainingConfig = {}): Tool {
|
|
22
|
-
return {
|
|
23
|
-
name: "train_model",
|
|
24
|
-
description: "Train a machine learning model using Modal for compute and W&B for tracking. Supports fine-tuning and training from scratch.",
|
|
25
|
-
parameters: [
|
|
26
|
-
{
|
|
27
|
-
name: "training_code",
|
|
28
|
-
type: "string",
|
|
29
|
-
description: "Python training code. Should include model definition, data loading, training loop, and logging to W&B.",
|
|
30
|
-
required: true,
|
|
31
|
-
},
|
|
32
|
-
{
|
|
33
|
-
name: "model_type",
|
|
34
|
-
type: "string",
|
|
35
|
-
description: "Type of model: 'transformer', 'cnn', 'custom'",
|
|
36
|
-
required: true,
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
name: "dataset_path",
|
|
40
|
-
type: "string",
|
|
41
|
-
description: "Path to training dataset (local or remote URL)",
|
|
42
|
-
required: true,
|
|
43
|
-
},
|
|
44
|
-
{
|
|
45
|
-
name: "hyperparameters",
|
|
46
|
-
type: "object",
|
|
47
|
-
description: "Training hyperparameters as JSON (e.g., {\"learning_rate\": 0.001, \"epochs\": 10})",
|
|
48
|
-
required: false,
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
name: "gpu",
|
|
52
|
-
type: "string",
|
|
53
|
-
description: "GPU type for Modal: 'T4', 'A10G', 'A100' (default: T4)",
|
|
54
|
-
required: false,
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
name: "run_name",
|
|
58
|
-
type: "string",
|
|
59
|
-
description: "Name for the W&B run",
|
|
60
|
-
required: true,
|
|
61
|
-
},
|
|
62
|
-
],
|
|
63
|
-
|
|
64
|
-
async execute(args: Record<string, unknown>): Promise<ToolResult> {
|
|
65
|
-
const trainingCode = args.training_code as string;
|
|
66
|
-
const modelType = args.model_type as string;
|
|
67
|
-
const datasetPath = args.dataset_path as string;
|
|
68
|
-
const hyperparameters = (args.hyperparameters as Record<string, unknown>) || {};
|
|
69
|
-
const gpu = (args.gpu as string) || "T4";
|
|
70
|
-
const runName = args.run_name as string;
|
|
71
|
-
|
|
72
|
-
if (!trainingCode || !modelType || !datasetPath || !runName) {
|
|
73
|
-
return {
|
|
74
|
-
toolCallId: "",
|
|
75
|
-
output: "",
|
|
76
|
-
success: false,
|
|
77
|
-
error: "training_code, model_type, dataset_path, and run_name are required",
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "training-"));
|
|
82
|
-
const appFile = path.join(tempDir, "training_app.py");
|
|
83
|
-
|
|
84
|
-
// Generate Modal training app
|
|
85
|
-
const modalCode = `
|
|
86
|
-
import modal
|
|
87
|
-
import wandb
|
|
88
|
-
import json
|
|
89
|
-
|
|
90
|
-
app = modal.App("training-${runName}")
|
|
91
|
-
|
|
92
|
-
# Define the training image with dependencies
|
|
93
|
-
image = modal.Image.debian_slim(python_version="3.11").pip_install([
|
|
94
|
-
"torch",
|
|
95
|
-
"transformers",
|
|
96
|
-
"datasets",
|
|
97
|
-
"wandb",
|
|
98
|
-
"numpy",
|
|
99
|
-
"scikit-learn",
|
|
100
|
-
])
|
|
101
|
-
|
|
102
|
-
@app.function(
|
|
103
|
-
image=image,
|
|
104
|
-
gpu="${gpu}",
|
|
105
|
-
timeout=3600,
|
|
106
|
-
secrets=[
|
|
107
|
-
modal.Secret.from_name("wandb-secret", required=False),
|
|
108
|
-
],
|
|
109
|
-
)
|
|
110
|
-
def train():
|
|
111
|
-
import os
|
|
112
|
-
|
|
113
|
-
# Initialize W&B
|
|
114
|
-
wandb.init(
|
|
115
|
-
project="${config.defaultProject || "pioneer-training"}",
|
|
116
|
-
name="${runName}",
|
|
117
|
-
config=${JSON.stringify(hyperparameters)},
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
# User's training code
|
|
121
|
-
${trainingCode.split('\n').map(line => ' ' + line).join('\n')}
|
|
122
|
-
|
|
123
|
-
wandb.finish()
|
|
124
|
-
return {"status": "completed", "run_name": "${runName}"}
|
|
125
|
-
|
|
126
|
-
@app.local_entrypoint()
|
|
127
|
-
def main():
|
|
128
|
-
result = train.remote()
|
|
129
|
-
print(json.dumps(result))
|
|
130
|
-
`;
|
|
131
|
-
|
|
132
|
-
fs.writeFileSync(appFile, modalCode);
|
|
133
|
-
|
|
134
|
-
try {
|
|
135
|
-
const timeout = config.timeout || 3600000; // 1 hour default
|
|
136
|
-
|
|
137
|
-
return new Promise((resolve) => {
|
|
138
|
-
let stdout = "";
|
|
139
|
-
let stderr = "";
|
|
140
|
-
let timedOut = false;
|
|
141
|
-
|
|
142
|
-
const env: NodeJS.ProcessEnv = { ...process.env };
|
|
143
|
-
if (config.modalTokenId) env.MODAL_TOKEN_ID = config.modalTokenId;
|
|
144
|
-
if (config.modalTokenSecret) env.MODAL_TOKEN_SECRET = config.modalTokenSecret;
|
|
145
|
-
if (config.wandbApiKey) env.WANDB_API_KEY = config.wandbApiKey;
|
|
146
|
-
|
|
147
|
-
const proc = spawn("modal", ["run", appFile], {
|
|
148
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
149
|
-
env,
|
|
150
|
-
});
|
|
151
|
-
|
|
152
|
-
const timer = setTimeout(() => {
|
|
153
|
-
timedOut = true;
|
|
154
|
-
proc.kill("SIGTERM");
|
|
155
|
-
}, timeout);
|
|
156
|
-
|
|
157
|
-
proc.stdout.on("data", (data: Buffer) => {
|
|
158
|
-
stdout += data.toString();
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
proc.stderr.on("data", (data: Buffer) => {
|
|
162
|
-
stderr += data.toString();
|
|
163
|
-
});
|
|
164
|
-
|
|
165
|
-
proc.on("close", (code) => {
|
|
166
|
-
clearTimeout(timer);
|
|
167
|
-
|
|
168
|
-
let output = stdout;
|
|
169
|
-
if (stderr) {
|
|
170
|
-
output += (output ? "\n\nLogs:\n" : "") + stderr;
|
|
171
|
-
}
|
|
172
|
-
if (timedOut) {
|
|
173
|
-
output += "\n\n(Training timed out)";
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
resolve({
|
|
177
|
-
toolCallId: "",
|
|
178
|
-
output: output || "(no output)",
|
|
179
|
-
success: !timedOut && code === 0,
|
|
180
|
-
error: timedOut
|
|
181
|
-
? "Training timed out"
|
|
182
|
-
: code !== 0
|
|
183
|
-
? `Exit code: ${code}`
|
|
184
|
-
: undefined,
|
|
185
|
-
});
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
proc.on("error", (err) => {
|
|
189
|
-
clearTimeout(timer);
|
|
190
|
-
resolve({
|
|
191
|
-
toolCallId: "",
|
|
192
|
-
output: "",
|
|
193
|
-
success: false,
|
|
194
|
-
error: err.message,
|
|
195
|
-
});
|
|
196
|
-
});
|
|
197
|
-
});
|
|
198
|
-
} finally {
|
|
199
|
-
try {
|
|
200
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
201
|
-
} catch {
|
|
202
|
-
// Ignore
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
},
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
export function createFineTuningTool(config: TrainingConfig = {}): Tool {
|
|
210
|
-
return {
|
|
211
|
-
name: "finetune_model",
|
|
212
|
-
description: "Fine-tune a pre-trained model (GPT, BERT, etc.) on custom data using Modal for compute.",
|
|
213
|
-
parameters: [
|
|
214
|
-
{
|
|
215
|
-
name: "base_model",
|
|
216
|
-
type: "string",
|
|
217
|
-
description: "Base model to fine-tune (e.g., 'gpt2', 'bert-base-uncased', 'meta-llama/Llama-2-7b')",
|
|
218
|
-
required: true,
|
|
219
|
-
},
|
|
220
|
-
{
|
|
221
|
-
name: "dataset_path",
|
|
222
|
-
type: "string",
|
|
223
|
-
description: "Path to training dataset (JSONL format with 'text' or 'prompt'/'completion' fields)",
|
|
224
|
-
required: true,
|
|
225
|
-
},
|
|
226
|
-
{
|
|
227
|
-
name: "output_dir",
|
|
228
|
-
type: "string",
|
|
229
|
-
description: "Directory to save the fine-tuned model",
|
|
230
|
-
required: true,
|
|
231
|
-
},
|
|
232
|
-
{
|
|
233
|
-
name: "hyperparameters",
|
|
234
|
-
type: "object",
|
|
235
|
-
description: "Training hyperparameters (learning_rate, epochs, batch_size, etc.)",
|
|
236
|
-
required: false,
|
|
237
|
-
},
|
|
238
|
-
{
|
|
239
|
-
name: "run_name",
|
|
240
|
-
type: "string",
|
|
241
|
-
description: "Name for the training run",
|
|
242
|
-
required: true,
|
|
243
|
-
},
|
|
244
|
-
{
|
|
245
|
-
name: "use_lora",
|
|
246
|
-
type: "boolean",
|
|
247
|
-
description: "Use LoRA for efficient fine-tuning (default: true for large models)",
|
|
248
|
-
required: false,
|
|
249
|
-
},
|
|
250
|
-
],
|
|
251
|
-
|
|
252
|
-
async execute(args: Record<string, unknown>): Promise<ToolResult> {
|
|
253
|
-
const baseModel = args.base_model as string;
|
|
254
|
-
const datasetPath = args.dataset_path as string;
|
|
255
|
-
const outputDir = args.output_dir as string;
|
|
256
|
-
const hyperparameters = (args.hyperparameters as Record<string, unknown>) || {};
|
|
257
|
-
const runName = args.run_name as string;
|
|
258
|
-
const useLora = args.use_lora !== false;
|
|
259
|
-
|
|
260
|
-
if (!baseModel || !datasetPath || !outputDir || !runName) {
|
|
261
|
-
return {
|
|
262
|
-
toolCallId: "",
|
|
263
|
-
output: "",
|
|
264
|
-
success: false,
|
|
265
|
-
error: "base_model, dataset_path, output_dir, and run_name are required",
|
|
266
|
-
};
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "finetune-"));
|
|
270
|
-
const appFile = path.join(tempDir, "finetune_app.py");
|
|
271
|
-
|
|
272
|
-
const hp = {
|
|
273
|
-
learning_rate: 2e-5,
|
|
274
|
-
num_epochs: 3,
|
|
275
|
-
batch_size: 8,
|
|
276
|
-
warmup_steps: 100,
|
|
277
|
-
...hyperparameters,
|
|
278
|
-
};
|
|
279
|
-
|
|
280
|
-
const modalCode = `
|
|
281
|
-
import modal
|
|
282
|
-
|
|
283
|
-
app = modal.App("finetune-${runName}")
|
|
284
|
-
|
|
285
|
-
image = modal.Image.debian_slim(python_version="3.11").pip_install([
|
|
286
|
-
"torch",
|
|
287
|
-
"transformers",
|
|
288
|
-
"datasets",
|
|
289
|
-
"accelerate",
|
|
290
|
-
"peft",
|
|
291
|
-
"bitsandbytes",
|
|
292
|
-
"wandb",
|
|
293
|
-
"trl",
|
|
294
|
-
])
|
|
295
|
-
|
|
296
|
-
@app.function(
|
|
297
|
-
image=image,
|
|
298
|
-
gpu="A10G",
|
|
299
|
-
timeout=7200,
|
|
300
|
-
secrets=[modal.Secret.from_name("wandb-secret", required=False)],
|
|
301
|
-
)
|
|
302
|
-
def finetune():
|
|
303
|
-
import torch
|
|
304
|
-
import wandb
|
|
305
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
|
|
306
|
-
from datasets import load_dataset
|
|
307
|
-
from peft import LoraConfig, get_peft_model
|
|
308
|
-
from trl import SFTTrainer
|
|
309
|
-
|
|
310
|
-
wandb.init(project="${config.defaultProject || "pioneer-finetune"}", name="${runName}")
|
|
311
|
-
|
|
312
|
-
# Load model and tokenizer
|
|
313
|
-
model = AutoModelForCausalLM.from_pretrained(
|
|
314
|
-
"${baseModel}",
|
|
315
|
-
torch_dtype=torch.float16,
|
|
316
|
-
device_map="auto",
|
|
317
|
-
)
|
|
318
|
-
tokenizer = AutoTokenizer.from_pretrained("${baseModel}")
|
|
319
|
-
tokenizer.pad_token = tokenizer.eos_token
|
|
320
|
-
|
|
321
|
-
${useLora ? `
|
|
322
|
-
# Apply LoRA
|
|
323
|
-
lora_config = LoraConfig(
|
|
324
|
-
r=16,
|
|
325
|
-
lora_alpha=32,
|
|
326
|
-
target_modules=["q_proj", "v_proj"],
|
|
327
|
-
lora_dropout=0.05,
|
|
328
|
-
bias="none",
|
|
329
|
-
)
|
|
330
|
-
model = get_peft_model(model, lora_config)
|
|
331
|
-
` : ""}
|
|
332
|
-
|
|
333
|
-
# Load dataset
|
|
334
|
-
dataset = load_dataset("json", data_files="${datasetPath}")["train"]
|
|
335
|
-
|
|
336
|
-
# Training arguments
|
|
337
|
-
training_args = TrainingArguments(
|
|
338
|
-
output_dir="${outputDir}",
|
|
339
|
-
num_train_epochs=${hp.num_epochs},
|
|
340
|
-
per_device_train_batch_size=${hp.batch_size},
|
|
341
|
-
learning_rate=${hp.learning_rate},
|
|
342
|
-
warmup_steps=${hp.warmup_steps},
|
|
343
|
-
logging_steps=10,
|
|
344
|
-
save_steps=500,
|
|
345
|
-
report_to="wandb",
|
|
346
|
-
)
|
|
347
|
-
|
|
348
|
-
# Train
|
|
349
|
-
trainer = SFTTrainer(
|
|
350
|
-
model=model,
|
|
351
|
-
train_dataset=dataset,
|
|
352
|
-
args=training_args,
|
|
353
|
-
tokenizer=tokenizer,
|
|
354
|
-
)
|
|
355
|
-
trainer.train()
|
|
356
|
-
|
|
357
|
-
# Save model
|
|
358
|
-
trainer.save_model("${outputDir}")
|
|
359
|
-
tokenizer.save_pretrained("${outputDir}")
|
|
360
|
-
|
|
361
|
-
wandb.finish()
|
|
362
|
-
return {"status": "completed", "output_dir": "${outputDir}"}
|
|
363
|
-
|
|
364
|
-
@app.local_entrypoint()
|
|
365
|
-
def main():
|
|
366
|
-
import json
|
|
367
|
-
result = finetune.remote()
|
|
368
|
-
print(json.dumps(result))
|
|
369
|
-
`;
|
|
370
|
-
|
|
371
|
-
fs.writeFileSync(appFile, modalCode);
|
|
372
|
-
|
|
373
|
-
try {
|
|
374
|
-
const timeout = config.timeout || 7200000; // 2 hours
|
|
375
|
-
|
|
376
|
-
return new Promise((resolve) => {
|
|
377
|
-
let stdout = "";
|
|
378
|
-
let stderr = "";
|
|
379
|
-
let timedOut = false;
|
|
380
|
-
|
|
381
|
-
const env: NodeJS.ProcessEnv = { ...process.env };
|
|
382
|
-
if (config.modalTokenId) env.MODAL_TOKEN_ID = config.modalTokenId;
|
|
383
|
-
if (config.modalTokenSecret) env.MODAL_TOKEN_SECRET = config.modalTokenSecret;
|
|
384
|
-
if (config.wandbApiKey) env.WANDB_API_KEY = config.wandbApiKey;
|
|
385
|
-
|
|
386
|
-
const proc = spawn("modal", ["run", appFile], {
|
|
387
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
388
|
-
env,
|
|
389
|
-
});
|
|
390
|
-
|
|
391
|
-
const timer = setTimeout(() => {
|
|
392
|
-
timedOut = true;
|
|
393
|
-
proc.kill("SIGTERM");
|
|
394
|
-
}, timeout);
|
|
395
|
-
|
|
396
|
-
proc.stdout.on("data", (data: Buffer) => {
|
|
397
|
-
stdout += data.toString();
|
|
398
|
-
});
|
|
399
|
-
|
|
400
|
-
proc.stderr.on("data", (data: Buffer) => {
|
|
401
|
-
stderr += data.toString();
|
|
402
|
-
});
|
|
403
|
-
|
|
404
|
-
proc.on("close", (code) => {
|
|
405
|
-
clearTimeout(timer);
|
|
406
|
-
|
|
407
|
-
let output = stdout;
|
|
408
|
-
if (stderr) output += (output ? "\n\n" : "") + stderr;
|
|
409
|
-
if (timedOut) output += "\n\n(Fine-tuning timed out)";
|
|
410
|
-
|
|
411
|
-
resolve({
|
|
412
|
-
toolCallId: "",
|
|
413
|
-
output: output || "(no output)",
|
|
414
|
-
success: !timedOut && code === 0,
|
|
415
|
-
error: timedOut ? "Timed out" : code !== 0 ? `Exit code: ${code}` : undefined,
|
|
416
|
-
});
|
|
417
|
-
});
|
|
418
|
-
|
|
419
|
-
proc.on("error", (err) => {
|
|
420
|
-
clearTimeout(timer);
|
|
421
|
-
resolve({
|
|
422
|
-
toolCallId: "",
|
|
423
|
-
output: "",
|
|
424
|
-
success: false,
|
|
425
|
-
error: err.message,
|
|
426
|
-
});
|
|
427
|
-
});
|
|
428
|
-
});
|
|
429
|
-
} finally {
|
|
430
|
-
try {
|
|
431
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
432
|
-
} catch {
|
|
433
|
-
// Ignore
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
},
|
|
437
|
-
};
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
export function createTrainingTools(config: TrainingConfig = {}): Tool[] {
|
|
441
|
-
return [createTrainingTool(config), createFineTuningTool(config)];
|
|
442
|
-
}
|
|
443
|
-
|