@fastino-ai/pioneer-cli 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.cursor/rules/api-documentation.mdc +14 -0
  3. package/.cursor/rules/backend-location-rule.mdc +5 -0
  4. package/Medical_NER_Dataset_1.jsonl +50 -0
  5. package/README.md +4 -1
  6. package/bun.lock +52 -0
  7. package/package.json +5 -2
  8. package/src/api.ts +551 -22
  9. package/src/chat/ChatApp.tsx +548 -263
  10. package/src/client/ToolExecutor.ts +175 -0
  11. package/src/client/WebSocketClient.ts +333 -0
  12. package/src/client/index.ts +2 -0
  13. package/src/config.ts +49 -139
  14. package/src/index.tsx +815 -107
  15. package/src/telemetry.ts +173 -0
  16. package/src/tests/config.test.ts +19 -0
  17. package/src/tools/bash.ts +1 -1
  18. package/src/tools/filesystem.ts +1 -1
  19. package/src/tools/index.ts +2 -9
  20. package/src/tools/sandbox.ts +1 -1
  21. package/src/tools/types.ts +25 -0
  22. package/src/utils/index.ts +6 -0
  23. package/fastino-ai-pioneer-cli-0.2.0.tgz +0 -0
  24. package/ner_dataset.json +0 -111
  25. package/src/agent/Agent.ts +0 -342
  26. package/src/agent/BudgetManager.ts +0 -167
  27. package/src/agent/LLMClient.ts +0 -435
  28. package/src/agent/ToolRegistry.ts +0 -97
  29. package/src/agent/index.ts +0 -15
  30. package/src/agent/types.ts +0 -84
  31. package/src/evolution/EvalRunner.ts +0 -301
  32. package/src/evolution/EvolutionEngine.ts +0 -319
  33. package/src/evolution/FeedbackCollector.ts +0 -197
  34. package/src/evolution/ModelTrainer.ts +0 -371
  35. package/src/evolution/index.ts +0 -18
  36. package/src/evolution/types.ts +0 -110
  37. package/src/tools/modal.ts +0 -269
  38. package/src/tools/training.ts +0 -443
  39. package/src/tools/wandb.ts +0 -348
  40. /package/src/{agent → utils}/FileResolver.ts +0 -0
@@ -1,197 +0,0 @@
1
- /**
2
- * FeedbackCollector - Collects and stores training feedback from interactions
3
- */
4
-
5
- import * as fs from "fs";
6
- import * as path from "path";
7
- import * as os from "os";
8
- import type { Feedback, TrainingData } from "./types.js";
9
-
10
- export interface FeedbackCollectorConfig {
11
- storagePath?: string;
12
- maxFeedbackItems?: number;
13
- }
14
-
15
- export class FeedbackCollector {
16
- private storagePath: string;
17
- private maxItems: number;
18
- private feedback: Feedback[] = [];
19
-
20
- constructor(config: FeedbackCollectorConfig = {}) {
21
- this.storagePath = config.storagePath || path.join(os.homedir(), ".pioneer", "feedback");
22
- this.maxItems = config.maxFeedbackItems || 1000;
23
- this.ensureStorageDir();
24
- this.loadFeedback();
25
- }
26
-
27
- private ensureStorageDir(): void {
28
- if (!fs.existsSync(this.storagePath)) {
29
- fs.mkdirSync(this.storagePath, { recursive: true });
30
- }
31
- }
32
-
33
- private loadFeedback(): void {
34
- const feedbackFile = path.join(this.storagePath, "feedback.json");
35
- try {
36
- if (fs.existsSync(feedbackFile)) {
37
- const data = fs.readFileSync(feedbackFile, "utf-8");
38
- this.feedback = JSON.parse(data);
39
- }
40
- } catch {
41
- this.feedback = [];
42
- }
43
- }
44
-
45
- private saveFeedback(): void {
46
- const feedbackFile = path.join(this.storagePath, "feedback.json");
47
- fs.writeFileSync(feedbackFile, JSON.stringify(this.feedback, null, 2));
48
- }
49
-
50
- recordInteraction(params: {
51
- sessionId: string;
52
- userMessage: string;
53
- agentResponse: string;
54
- toolCalls: string[];
55
- wasSuccessful: boolean;
56
- metadata?: Record<string, unknown>;
57
- }): string {
58
- const id = `fb_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
59
-
60
- const feedback: Feedback = {
61
- id,
62
- sessionId: params.sessionId,
63
- timestamp: new Date(),
64
- userMessage: params.userMessage,
65
- agentResponse: params.agentResponse,
66
- toolCalls: params.toolCalls,
67
- wasSuccessful: params.wasSuccessful,
68
- metadata: params.metadata,
69
- };
70
-
71
- this.feedback.push(feedback);
72
-
73
- // Trim to max items
74
- if (this.feedback.length > this.maxItems) {
75
- this.feedback = this.feedback.slice(-this.maxItems);
76
- }
77
-
78
- this.saveFeedback();
79
- return id;
80
- }
81
-
82
- addRating(feedbackId: string, rating: number, corrections?: string): void {
83
- const item = this.feedback.find((f) => f.id === feedbackId);
84
- if (item) {
85
- item.rating = Math.max(1, Math.min(5, rating));
86
- if (corrections) {
87
- item.corrections = corrections;
88
- }
89
- this.saveFeedback();
90
- }
91
- }
92
-
93
- getRecentFeedback(limit = 100): Feedback[] {
94
- return this.feedback.slice(-limit);
95
- }
96
-
97
- getPositiveFeedback(minRating = 4): Feedback[] {
98
- return this.feedback.filter(
99
- (f) => f.rating !== undefined && f.rating >= minRating
100
- );
101
- }
102
-
103
- getNegativeFeedback(maxRating = 2): Feedback[] {
104
- return this.feedback.filter(
105
- (f) => f.rating !== undefined && f.rating <= maxRating
106
- );
107
- }
108
-
109
- getSuccessfulInteractions(): Feedback[] {
110
- return this.feedback.filter((f) => f.wasSuccessful);
111
- }
112
-
113
- getCorrectedInteractions(): Feedback[] {
114
- return this.feedback.filter((f) => f.corrections !== undefined);
115
- }
116
-
117
- // Convert feedback to training data format
118
- toTrainingData(): TrainingData[] {
119
- const trainingData: TrainingData[] = [];
120
-
121
- for (const fb of this.feedback) {
122
- // Only use successful or highly-rated interactions
123
- if (!fb.wasSuccessful && (fb.rating === undefined || fb.rating < 4)) {
124
- continue;
125
- }
126
-
127
- // Use corrections if available
128
- const assistantContent = fb.corrections || fb.agentResponse;
129
-
130
- trainingData.push({
131
- id: fb.id,
132
- messages: [
133
- { role: "user", content: fb.userMessage },
134
- { role: "assistant", content: assistantContent },
135
- ],
136
- toolCalls: fb.toolCalls.map((name) => ({
137
- name,
138
- arguments: {},
139
- result: "",
140
- })),
141
- metadata: fb.metadata,
142
- });
143
- }
144
-
145
- return trainingData;
146
- }
147
-
148
- // Export training data in various formats
149
- exportAsJsonl(outputPath: string): void {
150
- const data = this.toTrainingData();
151
- const lines = data.map((d) => JSON.stringify(d));
152
- fs.writeFileSync(outputPath, lines.join("\n"));
153
- }
154
-
155
- exportAsOpenAIFormat(outputPath: string): void {
156
- const data = this.toTrainingData();
157
- const formatted = data.map((d) => ({
158
- messages: d.messages,
159
- }));
160
- const lines = formatted.map((d) => JSON.stringify(d));
161
- fs.writeFileSync(outputPath, lines.join("\n"));
162
- }
163
-
164
- // Get statistics
165
- getStats(): {
166
- total: number;
167
- rated: number;
168
- avgRating: number;
169
- successRate: number;
170
- corrected: number;
171
- } {
172
- const rated = this.feedback.filter((f) => f.rating !== undefined);
173
- const avgRating =
174
- rated.length > 0
175
- ? rated.reduce((sum, f) => sum + (f.rating || 0), 0) / rated.length
176
- : 0;
177
- const successful = this.feedback.filter((f) => f.wasSuccessful);
178
- const corrected = this.feedback.filter((f) => f.corrections !== undefined);
179
-
180
- return {
181
- total: this.feedback.length,
182
- rated: rated.length,
183
- avgRating: Math.round(avgRating * 100) / 100,
184
- successRate:
185
- this.feedback.length > 0
186
- ? Math.round((successful.length / this.feedback.length) * 100)
187
- : 0,
188
- corrected: corrected.length,
189
- };
190
- }
191
-
192
- clear(): void {
193
- this.feedback = [];
194
- this.saveFeedback();
195
- }
196
- }
197
-
@@ -1,371 +0,0 @@
1
- /**
2
- * ModelTrainer - Fine-tune or train models based on feedback
3
- */
4
-
5
- import * as fs from "fs";
6
- import * as path from "path";
7
- import * as os from "os";
8
- import { spawn } from "child_process";
9
- import type { TrainingData } from "./types.js";
10
-
11
- export interface ModelTrainerConfig {
12
- provider: "openai" | "anthropic" | "modal" | "local";
13
- baseModel: string;
14
- outputDir?: string;
15
- openaiApiKey?: string;
16
- anthropicApiKey?: string;
17
- modalTokenId?: string;
18
- modalTokenSecret?: string;
19
- }
20
-
21
- export interface TrainingResult {
22
- success: boolean;
23
- modelId?: string;
24
- modelPath?: string;
25
- metrics?: {
26
- loss?: number;
27
- accuracy?: number;
28
- epochs?: number;
29
- };
30
- error?: string;
31
- }
32
-
33
- export class ModelTrainer {
34
- private config: ModelTrainerConfig;
35
- private outputDir: string;
36
-
37
- constructor(config: ModelTrainerConfig) {
38
- this.config = config;
39
- this.outputDir =
40
- config.outputDir || path.join(os.homedir(), ".pioneer", "models");
41
- this.ensureOutputDir();
42
- }
43
-
44
- private ensureOutputDir(): void {
45
- if (!fs.existsSync(this.outputDir)) {
46
- fs.mkdirSync(this.outputDir, { recursive: true });
47
- }
48
- }
49
-
50
- async train(trainingData: TrainingData[]): Promise<TrainingResult> {
51
- if (trainingData.length < 10) {
52
- return {
53
- success: false,
54
- error: "Insufficient training data. Need at least 10 examples.",
55
- };
56
- }
57
-
58
- switch (this.config.provider) {
59
- case "openai":
60
- return this.trainOpenAI(trainingData);
61
- case "modal":
62
- return this.trainModal(trainingData);
63
- case "local":
64
- return this.trainLocal(trainingData);
65
- default:
66
- return {
67
- success: false,
68
- error: `Unsupported provider: ${this.config.provider}`,
69
- };
70
- }
71
- }
72
-
73
- private async trainOpenAI(trainingData: TrainingData[]): Promise<TrainingResult> {
74
- const apiKey = this.config.openaiApiKey || process.env.OPENAI_API_KEY;
75
- if (!apiKey) {
76
- return { success: false, error: "OpenAI API key not provided" };
77
- }
78
-
79
- // Prepare training file
80
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "openai-train-"));
81
- const trainingFile = path.join(tempDir, "training.jsonl");
82
-
83
- const formattedData = trainingData.map((d) => ({
84
- messages: d.messages,
85
- }));
86
-
87
- fs.writeFileSync(
88
- trainingFile,
89
- formattedData.map((d) => JSON.stringify(d)).join("\n")
90
- );
91
-
92
- try {
93
- // Upload file
94
- const uploadResponse = await fetch(
95
- "https://api.openai.com/v1/files",
96
- {
97
- method: "POST",
98
- headers: {
99
- Authorization: `Bearer ${apiKey}`,
100
- },
101
- body: (() => {
102
- const formData = new FormData();
103
- formData.append("purpose", "fine-tune");
104
- formData.append(
105
- "file",
106
- new Blob([fs.readFileSync(trainingFile)]),
107
- "training.jsonl"
108
- );
109
- return formData;
110
- })(),
111
- }
112
- );
113
-
114
- if (!uploadResponse.ok) {
115
- const error = await uploadResponse.text();
116
- return { success: false, error: `File upload failed: ${error}` };
117
- }
118
-
119
- const uploadResult = await uploadResponse.json() as { id: string };
120
-
121
- // Create fine-tuning job
122
- const ftResponse = await fetch(
123
- "https://api.openai.com/v1/fine_tuning/jobs",
124
- {
125
- method: "POST",
126
- headers: {
127
- Authorization: `Bearer ${apiKey}`,
128
- "Content-Type": "application/json",
129
- },
130
- body: JSON.stringify({
131
- training_file: uploadResult.id,
132
- model: this.config.baseModel || "gpt-4o-mini-2024-07-18",
133
- }),
134
- }
135
- );
136
-
137
- if (!ftResponse.ok) {
138
- const error = await ftResponse.text();
139
- return { success: false, error: `Fine-tuning failed: ${error}` };
140
- }
141
-
142
- const ftResult = await ftResponse.json() as { id: string; fine_tuned_model?: string };
143
-
144
- return {
145
- success: true,
146
- modelId: ftResult.id,
147
- modelPath: ftResult.fine_tuned_model,
148
- };
149
- } finally {
150
- fs.rmSync(tempDir, { recursive: true, force: true });
151
- }
152
- }
153
-
154
- private async trainModal(trainingData: TrainingData[]): Promise<TrainingResult> {
155
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "modal-train-"));
156
- const dataFile = path.join(tempDir, "training_data.jsonl");
157
- const appFile = path.join(tempDir, "train_app.py");
158
-
159
- // Save training data
160
- fs.writeFileSync(
161
- dataFile,
162
- trainingData.map((d) => JSON.stringify(d)).join("\n")
163
- );
164
-
165
- // Create Modal training app
166
- const modalCode = `
167
- import modal
168
- import json
169
-
170
- app = modal.App("pioneer-finetune")
171
-
172
- volume = modal.Volume.from_name("pioneer-models", create_if_missing=True)
173
-
174
- image = modal.Image.debian_slim(python_version="3.11").pip_install([
175
- "torch",
176
- "transformers",
177
- "datasets",
178
- "accelerate",
179
- "peft",
180
- "bitsandbytes",
181
- ])
182
-
183
- @app.function(
184
- image=image,
185
- gpu="A10G",
186
- timeout=7200,
187
- volumes={"/models": volume},
188
- )
189
- def finetune(data_json: str, base_model: str, output_name: str):
190
- import torch
191
- from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
192
- from datasets import Dataset
193
- from peft import LoraConfig, get_peft_model
194
-
195
- # Parse data
196
- data = [json.loads(line) for line in data_json.strip().split("\\n")]
197
-
198
- # Convert to dataset
199
- texts = []
200
- for item in data:
201
- text = ""
202
- for msg in item.get("messages", []):
203
- text += f"{msg['role']}: {msg['content']}\\n"
204
- texts.append(text)
205
-
206
- dataset = Dataset.from_dict({"text": texts})
207
-
208
- # Load model
209
- model = AutoModelForCausalLM.from_pretrained(
210
- base_model,
211
- torch_dtype=torch.float16,
212
- device_map="auto",
213
- )
214
- tokenizer = AutoTokenizer.from_pretrained(base_model)
215
- tokenizer.pad_token = tokenizer.eos_token
216
-
217
- # Apply LoRA
218
- lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"])
219
- model = get_peft_model(model, lora_config)
220
-
221
- # Tokenize
222
- def tokenize(examples):
223
- return tokenizer(examples["text"], truncation=True, max_length=512, padding="max_length")
224
-
225
- tokenized = dataset.map(tokenize, batched=True)
226
-
227
- # Train
228
- training_args = TrainingArguments(
229
- output_dir=f"/models/{output_name}",
230
- num_train_epochs=3,
231
- per_device_train_batch_size=4,
232
- learning_rate=2e-5,
233
- save_steps=500,
234
- logging_steps=10,
235
- )
236
-
237
- trainer = Trainer(
238
- model=model,
239
- args=training_args,
240
- train_dataset=tokenized,
241
- )
242
- trainer.train()
243
-
244
- # Save
245
- trainer.save_model(f"/models/{output_name}")
246
- volume.commit()
247
-
248
- return {"success": True, "model_path": f"/models/{output_name}"}
249
-
250
- @app.local_entrypoint()
251
- def main():
252
- import sys
253
- data_file = sys.argv[1] if len(sys.argv) > 1 else "training_data.jsonl"
254
- base_model = "${this.config.baseModel || "meta-llama/Llama-2-7b-hf"}"
255
- output_name = "pioneer-ft-" + str(int(__import__("time").time()))
256
-
257
- with open(data_file) as f:
258
- data_json = f.read()
259
-
260
- result = finetune.remote(data_json, base_model, output_name)
261
- print(json.dumps(result))
262
- `;
263
-
264
- fs.writeFileSync(appFile, modalCode);
265
-
266
- try {
267
- return new Promise((resolve) => {
268
- let stdout = "";
269
- let stderr = "";
270
-
271
- const env: NodeJS.ProcessEnv = { ...process.env };
272
- if (this.config.modalTokenId) {
273
- env.MODAL_TOKEN_ID = this.config.modalTokenId;
274
- }
275
- if (this.config.modalTokenSecret) {
276
- env.MODAL_TOKEN_SECRET = this.config.modalTokenSecret;
277
- }
278
-
279
- const proc = spawn("modal", ["run", appFile, "--", dataFile], {
280
- stdio: ["pipe", "pipe", "pipe"],
281
- env,
282
- });
283
-
284
- proc.stdout.on("data", (data: Buffer) => {
285
- stdout += data.toString();
286
- });
287
-
288
- proc.stderr.on("data", (data: Buffer) => {
289
- stderr += data.toString();
290
- });
291
-
292
- proc.on("close", (code) => {
293
- if (code === 0) {
294
- try {
295
- const result = JSON.parse(stdout.trim().split("\n").pop() || "{}");
296
- resolve({
297
- success: true,
298
- modelPath: result.model_path,
299
- });
300
- } catch {
301
- resolve({ success: true, modelPath: stdout });
302
- }
303
- } else {
304
- resolve({
305
- success: false,
306
- error: stderr || `Exit code: ${code}`,
307
- });
308
- }
309
- });
310
-
311
- proc.on("error", (err) => {
312
- resolve({ success: false, error: err.message });
313
- });
314
- });
315
- } finally {
316
- fs.rmSync(tempDir, { recursive: true, force: true });
317
- }
318
- }
319
-
320
- private async trainLocal(trainingData: TrainingData[]): Promise<TrainingResult> {
321
- // For local training, we'll save the data and provide instructions
322
- const outputPath = path.join(
323
- this.outputDir,
324
- `training_${Date.now()}.jsonl`
325
- );
326
-
327
- fs.writeFileSync(
328
- outputPath,
329
- trainingData.map((d) => JSON.stringify(d)).join("\n")
330
- );
331
-
332
- return {
333
- success: true,
334
- modelPath: outputPath,
335
- metrics: {
336
- epochs: 0,
337
- },
338
- };
339
- }
340
-
341
- // Prompt optimization (alternative to full fine-tuning)
342
- async optimizePrompt(
343
- currentPrompt: string,
344
- feedback: { positive: string[]; negative: string[] }
345
- ): Promise<string> {
346
- // Use the agent itself to improve its system prompt based on feedback
347
- const optimizationPrompt = `You are a prompt engineer. Given the current system prompt and feedback, create an improved version.
348
-
349
- Current Prompt:
350
- ${currentPrompt}
351
-
352
- Positive Examples (what worked well):
353
- ${feedback.positive.slice(0, 5).join("\n---\n")}
354
-
355
- Negative Examples (what didn't work):
356
- ${feedback.negative.slice(0, 5).join("\n---\n")}
357
-
358
- Create an improved system prompt that:
359
- 1. Keeps what's working well
360
- 2. Addresses the issues in negative examples
361
- 3. Maintains the core capabilities
362
- 4. Is clear and actionable
363
-
364
- Improved Prompt:`;
365
-
366
- // For now, return the original prompt
367
- // In a full implementation, this would call an LLM to generate the improved prompt
368
- return currentPrompt;
369
- }
370
- }
371
-
@@ -1,18 +0,0 @@
1
- /**
2
- * Evolution module exports
3
- */
4
-
5
- export { EvolutionEngine } from "./EvolutionEngine.js";
6
- export type { EvolutionEngineConfig, EvolutionEvents } from "./EvolutionEngine.js";
7
-
8
- export { FeedbackCollector } from "./FeedbackCollector.js";
9
- export type { FeedbackCollectorConfig } from "./FeedbackCollector.js";
10
-
11
- export { EvalRunner, DEFAULT_EVAL_CASES } from "./EvalRunner.js";
12
- export type { EvalRunnerConfig } from "./EvalRunner.js";
13
-
14
- export { ModelTrainer } from "./ModelTrainer.js";
15
- export type { ModelTrainerConfig, TrainingResult } from "./ModelTrainer.js";
16
-
17
- export * from "./types.js";
18
-
@@ -1,110 +0,0 @@
1
- /**
2
- * Types for the self-evolution system
3
- */
4
-
5
- export interface Feedback {
6
- id: string;
7
- sessionId: string;
8
- timestamp: Date;
9
- userMessage: string;
10
- agentResponse: string;
11
- toolCalls: string[];
12
- rating?: number; // 1-5
13
- corrections?: string;
14
- wasSuccessful: boolean;
15
- metadata?: Record<string, unknown>;
16
- }
17
-
18
- export interface EvalCase {
19
- id: string;
20
- name: string;
21
- description: string;
22
- input: string;
23
- expectedOutput?: string;
24
- expectedToolCalls?: string[];
25
- successCriteria: EvalCriteria[];
26
- weight?: number;
27
- }
28
-
29
- export interface EvalCriteria {
30
- type: "contains" | "not_contains" | "tool_called" | "tool_not_called" | "regex" | "custom";
31
- value: string;
32
- description?: string;
33
- }
34
-
35
- export interface EvalResult {
36
- caseId: string;
37
- passed: boolean;
38
- score: number;
39
- actualOutput: string;
40
- toolsCalled: string[];
41
- errors?: string[];
42
- duration: number;
43
- tokenUsage: number;
44
- }
45
-
46
- export interface EvalRunSummary {
47
- runId: string;
48
- timestamp: Date;
49
- totalCases: number;
50
- passedCases: number;
51
- failedCases: number;
52
- averageScore: number;
53
- totalTokens: number;
54
- totalDuration: number;
55
- results: EvalResult[];
56
- }
57
-
58
- export interface EvolutionConfig {
59
- evalCases: EvalCase[];
60
- targetScore: number;
61
- maxIterations: number;
62
- budgetPerIteration: {
63
- maxTokens?: number;
64
- maxCost?: number;
65
- maxTime?: number;
66
- };
67
- feedbackWindow: number; // Number of recent feedback items to consider
68
- trainingConfig?: {
69
- provider: "openai" | "anthropic" | "modal";
70
- baseModel: string;
71
- fineTuneMethod: "full" | "lora" | "prompt";
72
- };
73
- }
74
-
75
- export interface EvolutionState {
76
- iteration: number;
77
- currentScore: number;
78
- bestScore: number;
79
- bestPrompt: string;
80
- history: EvolutionHistory[];
81
- totalTokensUsed: number;
82
- totalCostUsed: number;
83
- totalTimeUsed: number;
84
- startTime: Date;
85
- endTime?: Date;
86
- status: "running" | "completed" | "failed" | "budget_exhausted";
87
- }
88
-
89
- export interface EvolutionHistory {
90
- iteration: number;
91
- prompt: string;
92
- evalScore: number;
93
- changes: string;
94
- timestamp: Date;
95
- }
96
-
97
- export interface TrainingData {
98
- id: string;
99
- messages: Array<{
100
- role: "user" | "assistant" | "system";
101
- content: string;
102
- }>;
103
- toolCalls?: Array<{
104
- name: string;
105
- arguments: Record<string, unknown>;
106
- result: string;
107
- }>;
108
- metadata?: Record<string, unknown>;
109
- }
110
-