@fastino-ai/pioneer-cli 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.cursor/rules/api-documentation.mdc +14 -0
- package/.cursor/rules/backend-location-rule.mdc +5 -0
- package/Medical_NER_Dataset_1.jsonl +50 -0
- package/README.md +4 -1
- package/bun.lock +52 -0
- package/package.json +5 -2
- package/src/api.ts +551 -22
- package/src/chat/ChatApp.tsx +548 -263
- package/src/client/ToolExecutor.ts +175 -0
- package/src/client/WebSocketClient.ts +333 -0
- package/src/client/index.ts +2 -0
- package/src/config.ts +49 -139
- package/src/index.tsx +815 -107
- package/src/telemetry.ts +173 -0
- package/src/tests/config.test.ts +19 -0
- package/src/tools/bash.ts +1 -1
- package/src/tools/filesystem.ts +1 -1
- package/src/tools/index.ts +2 -9
- package/src/tools/sandbox.ts +1 -1
- package/src/tools/types.ts +25 -0
- package/src/utils/index.ts +6 -0
- package/fastino-ai-pioneer-cli-0.2.0.tgz +0 -0
- package/ner_dataset.json +0 -111
- package/src/agent/Agent.ts +0 -342
- package/src/agent/BudgetManager.ts +0 -167
- package/src/agent/LLMClient.ts +0 -435
- package/src/agent/ToolRegistry.ts +0 -97
- package/src/agent/index.ts +0 -15
- package/src/agent/types.ts +0 -84
- package/src/evolution/EvalRunner.ts +0 -301
- package/src/evolution/EvolutionEngine.ts +0 -319
- package/src/evolution/FeedbackCollector.ts +0 -197
- package/src/evolution/ModelTrainer.ts +0 -371
- package/src/evolution/index.ts +0 -18
- package/src/evolution/types.ts +0 -110
- package/src/tools/modal.ts +0 -269
- package/src/tools/training.ts +0 -443
- package/src/tools/wandb.ts +0 -348
- /package/src/{agent → utils}/FileResolver.ts +0 -0
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* FeedbackCollector - Collects and stores training feedback from interactions
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import * as fs from "fs";
|
|
6
|
-
import * as path from "path";
|
|
7
|
-
import * as os from "os";
|
|
8
|
-
import type { Feedback, TrainingData } from "./types.js";
|
|
9
|
-
|
|
10
|
-
export interface FeedbackCollectorConfig {
|
|
11
|
-
storagePath?: string;
|
|
12
|
-
maxFeedbackItems?: number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export class FeedbackCollector {
|
|
16
|
-
private storagePath: string;
|
|
17
|
-
private maxItems: number;
|
|
18
|
-
private feedback: Feedback[] = [];
|
|
19
|
-
|
|
20
|
-
constructor(config: FeedbackCollectorConfig = {}) {
|
|
21
|
-
this.storagePath = config.storagePath || path.join(os.homedir(), ".pioneer", "feedback");
|
|
22
|
-
this.maxItems = config.maxFeedbackItems || 1000;
|
|
23
|
-
this.ensureStorageDir();
|
|
24
|
-
this.loadFeedback();
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
private ensureStorageDir(): void {
|
|
28
|
-
if (!fs.existsSync(this.storagePath)) {
|
|
29
|
-
fs.mkdirSync(this.storagePath, { recursive: true });
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
private loadFeedback(): void {
|
|
34
|
-
const feedbackFile = path.join(this.storagePath, "feedback.json");
|
|
35
|
-
try {
|
|
36
|
-
if (fs.existsSync(feedbackFile)) {
|
|
37
|
-
const data = fs.readFileSync(feedbackFile, "utf-8");
|
|
38
|
-
this.feedback = JSON.parse(data);
|
|
39
|
-
}
|
|
40
|
-
} catch {
|
|
41
|
-
this.feedback = [];
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
private saveFeedback(): void {
|
|
46
|
-
const feedbackFile = path.join(this.storagePath, "feedback.json");
|
|
47
|
-
fs.writeFileSync(feedbackFile, JSON.stringify(this.feedback, null, 2));
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
recordInteraction(params: {
|
|
51
|
-
sessionId: string;
|
|
52
|
-
userMessage: string;
|
|
53
|
-
agentResponse: string;
|
|
54
|
-
toolCalls: string[];
|
|
55
|
-
wasSuccessful: boolean;
|
|
56
|
-
metadata?: Record<string, unknown>;
|
|
57
|
-
}): string {
|
|
58
|
-
const id = `fb_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
59
|
-
|
|
60
|
-
const feedback: Feedback = {
|
|
61
|
-
id,
|
|
62
|
-
sessionId: params.sessionId,
|
|
63
|
-
timestamp: new Date(),
|
|
64
|
-
userMessage: params.userMessage,
|
|
65
|
-
agentResponse: params.agentResponse,
|
|
66
|
-
toolCalls: params.toolCalls,
|
|
67
|
-
wasSuccessful: params.wasSuccessful,
|
|
68
|
-
metadata: params.metadata,
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
this.feedback.push(feedback);
|
|
72
|
-
|
|
73
|
-
// Trim to max items
|
|
74
|
-
if (this.feedback.length > this.maxItems) {
|
|
75
|
-
this.feedback = this.feedback.slice(-this.maxItems);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
this.saveFeedback();
|
|
79
|
-
return id;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
addRating(feedbackId: string, rating: number, corrections?: string): void {
|
|
83
|
-
const item = this.feedback.find((f) => f.id === feedbackId);
|
|
84
|
-
if (item) {
|
|
85
|
-
item.rating = Math.max(1, Math.min(5, rating));
|
|
86
|
-
if (corrections) {
|
|
87
|
-
item.corrections = corrections;
|
|
88
|
-
}
|
|
89
|
-
this.saveFeedback();
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
getRecentFeedback(limit = 100): Feedback[] {
|
|
94
|
-
return this.feedback.slice(-limit);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
getPositiveFeedback(minRating = 4): Feedback[] {
|
|
98
|
-
return this.feedback.filter(
|
|
99
|
-
(f) => f.rating !== undefined && f.rating >= minRating
|
|
100
|
-
);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
getNegativeFeedback(maxRating = 2): Feedback[] {
|
|
104
|
-
return this.feedback.filter(
|
|
105
|
-
(f) => f.rating !== undefined && f.rating <= maxRating
|
|
106
|
-
);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
getSuccessfulInteractions(): Feedback[] {
|
|
110
|
-
return this.feedback.filter((f) => f.wasSuccessful);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
getCorrectedInteractions(): Feedback[] {
|
|
114
|
-
return this.feedback.filter((f) => f.corrections !== undefined);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// Convert feedback to training data format
|
|
118
|
-
toTrainingData(): TrainingData[] {
|
|
119
|
-
const trainingData: TrainingData[] = [];
|
|
120
|
-
|
|
121
|
-
for (const fb of this.feedback) {
|
|
122
|
-
// Only use successful or highly-rated interactions
|
|
123
|
-
if (!fb.wasSuccessful && (fb.rating === undefined || fb.rating < 4)) {
|
|
124
|
-
continue;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// Use corrections if available
|
|
128
|
-
const assistantContent = fb.corrections || fb.agentResponse;
|
|
129
|
-
|
|
130
|
-
trainingData.push({
|
|
131
|
-
id: fb.id,
|
|
132
|
-
messages: [
|
|
133
|
-
{ role: "user", content: fb.userMessage },
|
|
134
|
-
{ role: "assistant", content: assistantContent },
|
|
135
|
-
],
|
|
136
|
-
toolCalls: fb.toolCalls.map((name) => ({
|
|
137
|
-
name,
|
|
138
|
-
arguments: {},
|
|
139
|
-
result: "",
|
|
140
|
-
})),
|
|
141
|
-
metadata: fb.metadata,
|
|
142
|
-
});
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
return trainingData;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// Export training data in various formats
|
|
149
|
-
exportAsJsonl(outputPath: string): void {
|
|
150
|
-
const data = this.toTrainingData();
|
|
151
|
-
const lines = data.map((d) => JSON.stringify(d));
|
|
152
|
-
fs.writeFileSync(outputPath, lines.join("\n"));
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
exportAsOpenAIFormat(outputPath: string): void {
|
|
156
|
-
const data = this.toTrainingData();
|
|
157
|
-
const formatted = data.map((d) => ({
|
|
158
|
-
messages: d.messages,
|
|
159
|
-
}));
|
|
160
|
-
const lines = formatted.map((d) => JSON.stringify(d));
|
|
161
|
-
fs.writeFileSync(outputPath, lines.join("\n"));
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
// Get statistics
|
|
165
|
-
getStats(): {
|
|
166
|
-
total: number;
|
|
167
|
-
rated: number;
|
|
168
|
-
avgRating: number;
|
|
169
|
-
successRate: number;
|
|
170
|
-
corrected: number;
|
|
171
|
-
} {
|
|
172
|
-
const rated = this.feedback.filter((f) => f.rating !== undefined);
|
|
173
|
-
const avgRating =
|
|
174
|
-
rated.length > 0
|
|
175
|
-
? rated.reduce((sum, f) => sum + (f.rating || 0), 0) / rated.length
|
|
176
|
-
: 0;
|
|
177
|
-
const successful = this.feedback.filter((f) => f.wasSuccessful);
|
|
178
|
-
const corrected = this.feedback.filter((f) => f.corrections !== undefined);
|
|
179
|
-
|
|
180
|
-
return {
|
|
181
|
-
total: this.feedback.length,
|
|
182
|
-
rated: rated.length,
|
|
183
|
-
avgRating: Math.round(avgRating * 100) / 100,
|
|
184
|
-
successRate:
|
|
185
|
-
this.feedback.length > 0
|
|
186
|
-
? Math.round((successful.length / this.feedback.length) * 100)
|
|
187
|
-
: 0,
|
|
188
|
-
corrected: corrected.length,
|
|
189
|
-
};
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
clear(): void {
|
|
193
|
-
this.feedback = [];
|
|
194
|
-
this.saveFeedback();
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
|
|
@@ -1,371 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* ModelTrainer - Fine-tune or train models based on feedback
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import * as fs from "fs";
|
|
6
|
-
import * as path from "path";
|
|
7
|
-
import * as os from "os";
|
|
8
|
-
import { spawn } from "child_process";
|
|
9
|
-
import type { TrainingData } from "./types.js";
|
|
10
|
-
|
|
11
|
-
export interface ModelTrainerConfig {
|
|
12
|
-
provider: "openai" | "anthropic" | "modal" | "local";
|
|
13
|
-
baseModel: string;
|
|
14
|
-
outputDir?: string;
|
|
15
|
-
openaiApiKey?: string;
|
|
16
|
-
anthropicApiKey?: string;
|
|
17
|
-
modalTokenId?: string;
|
|
18
|
-
modalTokenSecret?: string;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export interface TrainingResult {
|
|
22
|
-
success: boolean;
|
|
23
|
-
modelId?: string;
|
|
24
|
-
modelPath?: string;
|
|
25
|
-
metrics?: {
|
|
26
|
-
loss?: number;
|
|
27
|
-
accuracy?: number;
|
|
28
|
-
epochs?: number;
|
|
29
|
-
};
|
|
30
|
-
error?: string;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export class ModelTrainer {
|
|
34
|
-
private config: ModelTrainerConfig;
|
|
35
|
-
private outputDir: string;
|
|
36
|
-
|
|
37
|
-
constructor(config: ModelTrainerConfig) {
|
|
38
|
-
this.config = config;
|
|
39
|
-
this.outputDir =
|
|
40
|
-
config.outputDir || path.join(os.homedir(), ".pioneer", "models");
|
|
41
|
-
this.ensureOutputDir();
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
private ensureOutputDir(): void {
|
|
45
|
-
if (!fs.existsSync(this.outputDir)) {
|
|
46
|
-
fs.mkdirSync(this.outputDir, { recursive: true });
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
async train(trainingData: TrainingData[]): Promise<TrainingResult> {
|
|
51
|
-
if (trainingData.length < 10) {
|
|
52
|
-
return {
|
|
53
|
-
success: false,
|
|
54
|
-
error: "Insufficient training data. Need at least 10 examples.",
|
|
55
|
-
};
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
switch (this.config.provider) {
|
|
59
|
-
case "openai":
|
|
60
|
-
return this.trainOpenAI(trainingData);
|
|
61
|
-
case "modal":
|
|
62
|
-
return this.trainModal(trainingData);
|
|
63
|
-
case "local":
|
|
64
|
-
return this.trainLocal(trainingData);
|
|
65
|
-
default:
|
|
66
|
-
return {
|
|
67
|
-
success: false,
|
|
68
|
-
error: `Unsupported provider: ${this.config.provider}`,
|
|
69
|
-
};
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
private async trainOpenAI(trainingData: TrainingData[]): Promise<TrainingResult> {
|
|
74
|
-
const apiKey = this.config.openaiApiKey || process.env.OPENAI_API_KEY;
|
|
75
|
-
if (!apiKey) {
|
|
76
|
-
return { success: false, error: "OpenAI API key not provided" };
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// Prepare training file
|
|
80
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "openai-train-"));
|
|
81
|
-
const trainingFile = path.join(tempDir, "training.jsonl");
|
|
82
|
-
|
|
83
|
-
const formattedData = trainingData.map((d) => ({
|
|
84
|
-
messages: d.messages,
|
|
85
|
-
}));
|
|
86
|
-
|
|
87
|
-
fs.writeFileSync(
|
|
88
|
-
trainingFile,
|
|
89
|
-
formattedData.map((d) => JSON.stringify(d)).join("\n")
|
|
90
|
-
);
|
|
91
|
-
|
|
92
|
-
try {
|
|
93
|
-
// Upload file
|
|
94
|
-
const uploadResponse = await fetch(
|
|
95
|
-
"https://api.openai.com/v1/files",
|
|
96
|
-
{
|
|
97
|
-
method: "POST",
|
|
98
|
-
headers: {
|
|
99
|
-
Authorization: `Bearer ${apiKey}`,
|
|
100
|
-
},
|
|
101
|
-
body: (() => {
|
|
102
|
-
const formData = new FormData();
|
|
103
|
-
formData.append("purpose", "fine-tune");
|
|
104
|
-
formData.append(
|
|
105
|
-
"file",
|
|
106
|
-
new Blob([fs.readFileSync(trainingFile)]),
|
|
107
|
-
"training.jsonl"
|
|
108
|
-
);
|
|
109
|
-
return formData;
|
|
110
|
-
})(),
|
|
111
|
-
}
|
|
112
|
-
);
|
|
113
|
-
|
|
114
|
-
if (!uploadResponse.ok) {
|
|
115
|
-
const error = await uploadResponse.text();
|
|
116
|
-
return { success: false, error: `File upload failed: ${error}` };
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
const uploadResult = await uploadResponse.json() as { id: string };
|
|
120
|
-
|
|
121
|
-
// Create fine-tuning job
|
|
122
|
-
const ftResponse = await fetch(
|
|
123
|
-
"https://api.openai.com/v1/fine_tuning/jobs",
|
|
124
|
-
{
|
|
125
|
-
method: "POST",
|
|
126
|
-
headers: {
|
|
127
|
-
Authorization: `Bearer ${apiKey}`,
|
|
128
|
-
"Content-Type": "application/json",
|
|
129
|
-
},
|
|
130
|
-
body: JSON.stringify({
|
|
131
|
-
training_file: uploadResult.id,
|
|
132
|
-
model: this.config.baseModel || "gpt-4o-mini-2024-07-18",
|
|
133
|
-
}),
|
|
134
|
-
}
|
|
135
|
-
);
|
|
136
|
-
|
|
137
|
-
if (!ftResponse.ok) {
|
|
138
|
-
const error = await ftResponse.text();
|
|
139
|
-
return { success: false, error: `Fine-tuning failed: ${error}` };
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
const ftResult = await ftResponse.json() as { id: string; fine_tuned_model?: string };
|
|
143
|
-
|
|
144
|
-
return {
|
|
145
|
-
success: true,
|
|
146
|
-
modelId: ftResult.id,
|
|
147
|
-
modelPath: ftResult.fine_tuned_model,
|
|
148
|
-
};
|
|
149
|
-
} finally {
|
|
150
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
private async trainModal(trainingData: TrainingData[]): Promise<TrainingResult> {
|
|
155
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "modal-train-"));
|
|
156
|
-
const dataFile = path.join(tempDir, "training_data.jsonl");
|
|
157
|
-
const appFile = path.join(tempDir, "train_app.py");
|
|
158
|
-
|
|
159
|
-
// Save training data
|
|
160
|
-
fs.writeFileSync(
|
|
161
|
-
dataFile,
|
|
162
|
-
trainingData.map((d) => JSON.stringify(d)).join("\n")
|
|
163
|
-
);
|
|
164
|
-
|
|
165
|
-
// Create Modal training app
|
|
166
|
-
const modalCode = `
|
|
167
|
-
import modal
|
|
168
|
-
import json
|
|
169
|
-
|
|
170
|
-
app = modal.App("pioneer-finetune")
|
|
171
|
-
|
|
172
|
-
volume = modal.Volume.from_name("pioneer-models", create_if_missing=True)
|
|
173
|
-
|
|
174
|
-
image = modal.Image.debian_slim(python_version="3.11").pip_install([
|
|
175
|
-
"torch",
|
|
176
|
-
"transformers",
|
|
177
|
-
"datasets",
|
|
178
|
-
"accelerate",
|
|
179
|
-
"peft",
|
|
180
|
-
"bitsandbytes",
|
|
181
|
-
])
|
|
182
|
-
|
|
183
|
-
@app.function(
|
|
184
|
-
image=image,
|
|
185
|
-
gpu="A10G",
|
|
186
|
-
timeout=7200,
|
|
187
|
-
volumes={"/models": volume},
|
|
188
|
-
)
|
|
189
|
-
def finetune(data_json: str, base_model: str, output_name: str):
|
|
190
|
-
import torch
|
|
191
|
-
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
|
192
|
-
from datasets import Dataset
|
|
193
|
-
from peft import LoraConfig, get_peft_model
|
|
194
|
-
|
|
195
|
-
# Parse data
|
|
196
|
-
data = [json.loads(line) for line in data_json.strip().split("\\n")]
|
|
197
|
-
|
|
198
|
-
# Convert to dataset
|
|
199
|
-
texts = []
|
|
200
|
-
for item in data:
|
|
201
|
-
text = ""
|
|
202
|
-
for msg in item.get("messages", []):
|
|
203
|
-
text += f"{msg['role']}: {msg['content']}\\n"
|
|
204
|
-
texts.append(text)
|
|
205
|
-
|
|
206
|
-
dataset = Dataset.from_dict({"text": texts})
|
|
207
|
-
|
|
208
|
-
# Load model
|
|
209
|
-
model = AutoModelForCausalLM.from_pretrained(
|
|
210
|
-
base_model,
|
|
211
|
-
torch_dtype=torch.float16,
|
|
212
|
-
device_map="auto",
|
|
213
|
-
)
|
|
214
|
-
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
|
215
|
-
tokenizer.pad_token = tokenizer.eos_token
|
|
216
|
-
|
|
217
|
-
# Apply LoRA
|
|
218
|
-
lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"])
|
|
219
|
-
model = get_peft_model(model, lora_config)
|
|
220
|
-
|
|
221
|
-
# Tokenize
|
|
222
|
-
def tokenize(examples):
|
|
223
|
-
return tokenizer(examples["text"], truncation=True, max_length=512, padding="max_length")
|
|
224
|
-
|
|
225
|
-
tokenized = dataset.map(tokenize, batched=True)
|
|
226
|
-
|
|
227
|
-
# Train
|
|
228
|
-
training_args = TrainingArguments(
|
|
229
|
-
output_dir=f"/models/{output_name}",
|
|
230
|
-
num_train_epochs=3,
|
|
231
|
-
per_device_train_batch_size=4,
|
|
232
|
-
learning_rate=2e-5,
|
|
233
|
-
save_steps=500,
|
|
234
|
-
logging_steps=10,
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
trainer = Trainer(
|
|
238
|
-
model=model,
|
|
239
|
-
args=training_args,
|
|
240
|
-
train_dataset=tokenized,
|
|
241
|
-
)
|
|
242
|
-
trainer.train()
|
|
243
|
-
|
|
244
|
-
# Save
|
|
245
|
-
trainer.save_model(f"/models/{output_name}")
|
|
246
|
-
volume.commit()
|
|
247
|
-
|
|
248
|
-
return {"success": True, "model_path": f"/models/{output_name}"}
|
|
249
|
-
|
|
250
|
-
@app.local_entrypoint()
|
|
251
|
-
def main():
|
|
252
|
-
import sys
|
|
253
|
-
data_file = sys.argv[1] if len(sys.argv) > 1 else "training_data.jsonl"
|
|
254
|
-
base_model = "${this.config.baseModel || "meta-llama/Llama-2-7b-hf"}"
|
|
255
|
-
output_name = "pioneer-ft-" + str(int(__import__("time").time()))
|
|
256
|
-
|
|
257
|
-
with open(data_file) as f:
|
|
258
|
-
data_json = f.read()
|
|
259
|
-
|
|
260
|
-
result = finetune.remote(data_json, base_model, output_name)
|
|
261
|
-
print(json.dumps(result))
|
|
262
|
-
`;
|
|
263
|
-
|
|
264
|
-
fs.writeFileSync(appFile, modalCode);
|
|
265
|
-
|
|
266
|
-
try {
|
|
267
|
-
return new Promise((resolve) => {
|
|
268
|
-
let stdout = "";
|
|
269
|
-
let stderr = "";
|
|
270
|
-
|
|
271
|
-
const env: NodeJS.ProcessEnv = { ...process.env };
|
|
272
|
-
if (this.config.modalTokenId) {
|
|
273
|
-
env.MODAL_TOKEN_ID = this.config.modalTokenId;
|
|
274
|
-
}
|
|
275
|
-
if (this.config.modalTokenSecret) {
|
|
276
|
-
env.MODAL_TOKEN_SECRET = this.config.modalTokenSecret;
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
const proc = spawn("modal", ["run", appFile, "--", dataFile], {
|
|
280
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
281
|
-
env,
|
|
282
|
-
});
|
|
283
|
-
|
|
284
|
-
proc.stdout.on("data", (data: Buffer) => {
|
|
285
|
-
stdout += data.toString();
|
|
286
|
-
});
|
|
287
|
-
|
|
288
|
-
proc.stderr.on("data", (data: Buffer) => {
|
|
289
|
-
stderr += data.toString();
|
|
290
|
-
});
|
|
291
|
-
|
|
292
|
-
proc.on("close", (code) => {
|
|
293
|
-
if (code === 0) {
|
|
294
|
-
try {
|
|
295
|
-
const result = JSON.parse(stdout.trim().split("\n").pop() || "{}");
|
|
296
|
-
resolve({
|
|
297
|
-
success: true,
|
|
298
|
-
modelPath: result.model_path,
|
|
299
|
-
});
|
|
300
|
-
} catch {
|
|
301
|
-
resolve({ success: true, modelPath: stdout });
|
|
302
|
-
}
|
|
303
|
-
} else {
|
|
304
|
-
resolve({
|
|
305
|
-
success: false,
|
|
306
|
-
error: stderr || `Exit code: ${code}`,
|
|
307
|
-
});
|
|
308
|
-
}
|
|
309
|
-
});
|
|
310
|
-
|
|
311
|
-
proc.on("error", (err) => {
|
|
312
|
-
resolve({ success: false, error: err.message });
|
|
313
|
-
});
|
|
314
|
-
});
|
|
315
|
-
} finally {
|
|
316
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
private async trainLocal(trainingData: TrainingData[]): Promise<TrainingResult> {
|
|
321
|
-
// For local training, we'll save the data and provide instructions
|
|
322
|
-
const outputPath = path.join(
|
|
323
|
-
this.outputDir,
|
|
324
|
-
`training_${Date.now()}.jsonl`
|
|
325
|
-
);
|
|
326
|
-
|
|
327
|
-
fs.writeFileSync(
|
|
328
|
-
outputPath,
|
|
329
|
-
trainingData.map((d) => JSON.stringify(d)).join("\n")
|
|
330
|
-
);
|
|
331
|
-
|
|
332
|
-
return {
|
|
333
|
-
success: true,
|
|
334
|
-
modelPath: outputPath,
|
|
335
|
-
metrics: {
|
|
336
|
-
epochs: 0,
|
|
337
|
-
},
|
|
338
|
-
};
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
// Prompt optimization (alternative to full fine-tuning)
|
|
342
|
-
async optimizePrompt(
|
|
343
|
-
currentPrompt: string,
|
|
344
|
-
feedback: { positive: string[]; negative: string[] }
|
|
345
|
-
): Promise<string> {
|
|
346
|
-
// Use the agent itself to improve its system prompt based on feedback
|
|
347
|
-
const optimizationPrompt = `You are a prompt engineer. Given the current system prompt and feedback, create an improved version.
|
|
348
|
-
|
|
349
|
-
Current Prompt:
|
|
350
|
-
${currentPrompt}
|
|
351
|
-
|
|
352
|
-
Positive Examples (what worked well):
|
|
353
|
-
${feedback.positive.slice(0, 5).join("\n---\n")}
|
|
354
|
-
|
|
355
|
-
Negative Examples (what didn't work):
|
|
356
|
-
${feedback.negative.slice(0, 5).join("\n---\n")}
|
|
357
|
-
|
|
358
|
-
Create an improved system prompt that:
|
|
359
|
-
1. Keeps what's working well
|
|
360
|
-
2. Addresses the issues in negative examples
|
|
361
|
-
3. Maintains the core capabilities
|
|
362
|
-
4. Is clear and actionable
|
|
363
|
-
|
|
364
|
-
Improved Prompt:`;
|
|
365
|
-
|
|
366
|
-
// For now, return the original prompt
|
|
367
|
-
// In a full implementation, this would call an LLM to generate the improved prompt
|
|
368
|
-
return currentPrompt;
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
|
package/src/evolution/index.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Evolution module exports
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
export { EvolutionEngine } from "./EvolutionEngine.js";
|
|
6
|
-
export type { EvolutionEngineConfig, EvolutionEvents } from "./EvolutionEngine.js";
|
|
7
|
-
|
|
8
|
-
export { FeedbackCollector } from "./FeedbackCollector.js";
|
|
9
|
-
export type { FeedbackCollectorConfig } from "./FeedbackCollector.js";
|
|
10
|
-
|
|
11
|
-
export { EvalRunner, DEFAULT_EVAL_CASES } from "./EvalRunner.js";
|
|
12
|
-
export type { EvalRunnerConfig } from "./EvalRunner.js";
|
|
13
|
-
|
|
14
|
-
export { ModelTrainer } from "./ModelTrainer.js";
|
|
15
|
-
export type { ModelTrainerConfig, TrainingResult } from "./ModelTrainer.js";
|
|
16
|
-
|
|
17
|
-
export * from "./types.js";
|
|
18
|
-
|
package/src/evolution/types.ts
DELETED
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Types for the self-evolution system
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
export interface Feedback {
|
|
6
|
-
id: string;
|
|
7
|
-
sessionId: string;
|
|
8
|
-
timestamp: Date;
|
|
9
|
-
userMessage: string;
|
|
10
|
-
agentResponse: string;
|
|
11
|
-
toolCalls: string[];
|
|
12
|
-
rating?: number; // 1-5
|
|
13
|
-
corrections?: string;
|
|
14
|
-
wasSuccessful: boolean;
|
|
15
|
-
metadata?: Record<string, unknown>;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export interface EvalCase {
|
|
19
|
-
id: string;
|
|
20
|
-
name: string;
|
|
21
|
-
description: string;
|
|
22
|
-
input: string;
|
|
23
|
-
expectedOutput?: string;
|
|
24
|
-
expectedToolCalls?: string[];
|
|
25
|
-
successCriteria: EvalCriteria[];
|
|
26
|
-
weight?: number;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export interface EvalCriteria {
|
|
30
|
-
type: "contains" | "not_contains" | "tool_called" | "tool_not_called" | "regex" | "custom";
|
|
31
|
-
value: string;
|
|
32
|
-
description?: string;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export interface EvalResult {
|
|
36
|
-
caseId: string;
|
|
37
|
-
passed: boolean;
|
|
38
|
-
score: number;
|
|
39
|
-
actualOutput: string;
|
|
40
|
-
toolsCalled: string[];
|
|
41
|
-
errors?: string[];
|
|
42
|
-
duration: number;
|
|
43
|
-
tokenUsage: number;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export interface EvalRunSummary {
|
|
47
|
-
runId: string;
|
|
48
|
-
timestamp: Date;
|
|
49
|
-
totalCases: number;
|
|
50
|
-
passedCases: number;
|
|
51
|
-
failedCases: number;
|
|
52
|
-
averageScore: number;
|
|
53
|
-
totalTokens: number;
|
|
54
|
-
totalDuration: number;
|
|
55
|
-
results: EvalResult[];
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
export interface EvolutionConfig {
|
|
59
|
-
evalCases: EvalCase[];
|
|
60
|
-
targetScore: number;
|
|
61
|
-
maxIterations: number;
|
|
62
|
-
budgetPerIteration: {
|
|
63
|
-
maxTokens?: number;
|
|
64
|
-
maxCost?: number;
|
|
65
|
-
maxTime?: number;
|
|
66
|
-
};
|
|
67
|
-
feedbackWindow: number; // Number of recent feedback items to consider
|
|
68
|
-
trainingConfig?: {
|
|
69
|
-
provider: "openai" | "anthropic" | "modal";
|
|
70
|
-
baseModel: string;
|
|
71
|
-
fineTuneMethod: "full" | "lora" | "prompt";
|
|
72
|
-
};
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
export interface EvolutionState {
|
|
76
|
-
iteration: number;
|
|
77
|
-
currentScore: number;
|
|
78
|
-
bestScore: number;
|
|
79
|
-
bestPrompt: string;
|
|
80
|
-
history: EvolutionHistory[];
|
|
81
|
-
totalTokensUsed: number;
|
|
82
|
-
totalCostUsed: number;
|
|
83
|
-
totalTimeUsed: number;
|
|
84
|
-
startTime: Date;
|
|
85
|
-
endTime?: Date;
|
|
86
|
-
status: "running" | "completed" | "failed" | "budget_exhausted";
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
export interface EvolutionHistory {
|
|
90
|
-
iteration: number;
|
|
91
|
-
prompt: string;
|
|
92
|
-
evalScore: number;
|
|
93
|
-
changes: string;
|
|
94
|
-
timestamp: Date;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
export interface TrainingData {
|
|
98
|
-
id: string;
|
|
99
|
-
messages: Array<{
|
|
100
|
-
role: "user" | "assistant" | "system";
|
|
101
|
-
content: string;
|
|
102
|
-
}>;
|
|
103
|
-
toolCalls?: Array<{
|
|
104
|
-
name: string;
|
|
105
|
-
arguments: Record<string, unknown>;
|
|
106
|
-
result: string;
|
|
107
|
-
}>;
|
|
108
|
-
metadata?: Record<string, unknown>;
|
|
109
|
-
}
|
|
110
|
-
|