@decido/kernel-bridge 1.0.0 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/dist/index.js +2593 -0
- package/dist/index.mjs +2518 -0
- package/package.json +13 -7
- package/.turbo/turbo-build.log +0 -13
- package/.turbo/turbo-lint.log +0 -30
- package/src/ai/components/PeerNetworkPanel.tsx +0 -219
- package/src/ai/components/TokenWalletPanel.tsx +0 -172
- package/src/ai/hooks/usePeerMesh.ts +0 -79
- package/src/ai/hooks/useTokenWallet.ts +0 -35
- package/src/ai/index.ts +0 -96
- package/src/ai/services/EmbeddingService.ts +0 -119
- package/src/ai/services/InferenceRouter.ts +0 -347
- package/src/ai/services/LocalAgentResponder.ts +0 -199
- package/src/ai/services/MLXBridge.ts +0 -278
- package/src/ai/services/OllamaService.ts +0 -326
- package/src/ai/services/PeerMesh.ts +0 -373
- package/src/ai/services/TokenWallet.ts +0 -237
- package/src/ai/services/providers/AnthropicProvider.ts +0 -229
- package/src/ai/services/providers/GeminiProvider.ts +0 -121
- package/src/ai/services/providers/LLMProvider.ts +0 -72
- package/src/ai/services/providers/OllamaProvider.ts +0 -84
- package/src/ai/services/providers/OpenAIProvider.ts +0 -178
- package/src/crypto.ts +0 -54
- package/src/index.ts +0 -4
- package/src/kernel.ts +0 -376
- package/src/rehydration.ts +0 -52
- package/tsconfig.json +0 -18
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LocalAgentResponder — Local-first AI responses for DecidoOS
|
|
3
|
-
*
|
|
4
|
-
* Handles user input locally when Cortex is not connected.
|
|
5
|
-
* Provides greeting responses, tool execution, system info queries,
|
|
6
|
-
* and basic conversational ability without requiring a remote AI.
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
// ─── Types ──────────────────────────────────────────────────
|
|
11
|
-
|
|
12
|
-
interface LocalResponse {
|
|
13
|
-
text: string;
|
|
14
|
-
toolCalls?: Array<{ name: string; args: Record<string, unknown> }>;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
// ─── Greeting Patterns ──────────────────────────────────────
|
|
18
|
-
|
|
19
|
-
const GREETING_PATTERNS = [
|
|
20
|
-
/^(hola|hey|hi|hello|buenas|qué tal|que tal|saludos)\s*(decido|decidoos|agente|asistente)?/i,
|
|
21
|
-
/^(buenos?\s*(días|tardes|noches))/i,
|
|
22
|
-
/^(órale|oye|ey)\s*(decido)?/i,
|
|
23
|
-
];
|
|
24
|
-
|
|
25
|
-
const HELP_PATTERNS = [
|
|
26
|
-
/^(ayuda|help|qué puedes hacer|que puedes hacer|comandos|tools|herramientas)/i,
|
|
27
|
-
/^(qué|que)\s*(sabes|haces|puedes)/i,
|
|
28
|
-
];
|
|
29
|
-
|
|
30
|
-
const SYSTEM_PATTERNS = [
|
|
31
|
-
/^(estado|status|sistema|system)\s*(del\s*sistema)?/i,
|
|
32
|
-
/^(cómo|como)\s*(estás|estas|está|va)/i,
|
|
33
|
-
];
|
|
34
|
-
|
|
35
|
-
// ─── Response Templates ─────────────────────────────────────
|
|
36
|
-
|
|
37
|
-
function getTimeGreeting(): string {
|
|
38
|
-
const hour = new Date().getHours();
|
|
39
|
-
if (hour < 12) return 'Buenos días';
|
|
40
|
-
if (hour < 18) return 'Buenas tardes';
|
|
41
|
-
return 'Buenas noches';
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
function getWelcomeMessage(): string {
|
|
45
|
-
const greeting = getTimeGreeting();
|
|
46
|
-
|
|
47
|
-
return `${greeting}, operador. Soy **DecidoOS Agent** 🧠
|
|
48
|
-
|
|
49
|
-
Estoy en línea y operativo. Aquí está mi reporte de estado:
|
|
50
|
-
|
|
51
|
-
▸ **Modo**: Local-first (sin dependencia de Cortex)
|
|
52
|
-
▸ **Estado**: ✅ ONLINE
|
|
53
|
-
|
|
54
|
-
Puedo auditar tu seguridad, escanear tu flota de procesos y más.
|
|
55
|
-
|
|
56
|
-
Escribe **"ayuda"** para ver mis comandos principales.`;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
function getHelpMessage(): string {
|
|
60
|
-
return `🛠️ **Capacidades de DecidoOS Agent**
|
|
61
|
-
|
|
62
|
-
• "escanea puertos" — Auditar puertos abiertos
|
|
63
|
-
• "escanea red" — Monitorear conexiones de red
|
|
64
|
-
• "escanea flota" — Ver procesos Agent / Node / Python
|
|
65
|
-
• "lista tareas" — Ver tareas / playbooks
|
|
66
|
-
• "estado" — Estado del sistema
|
|
67
|
-
|
|
68
|
-
**Voice Mode:**
|
|
69
|
-
Presiona el botón verde 📞 para activar conversación por voz con Cortex.`;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
async function getSystemStatus(): Promise<string> {
|
|
73
|
-
let cpuInfo = 'N/A';
|
|
74
|
-
let memInfo = 'N/A';
|
|
75
|
-
|
|
76
|
-
try {
|
|
77
|
-
const { invoke } = await import('@tauri-apps/api/core');
|
|
78
|
-
const cpu = await invoke<{ usage: number }>('get_cpu_usage');
|
|
79
|
-
const mem = await invoke<{ used: number; total: number }>('get_memory_usage');
|
|
80
|
-
cpuInfo = `${cpu.usage?.toFixed(1)}%`;
|
|
81
|
-
memInfo = `${(mem.used / 1024 / 1024 / 1024).toFixed(1)} GB / ${(mem.total / 1024 / 1024 / 1024).toFixed(1)} GB`;
|
|
82
|
-
} catch {
|
|
83
|
-
cpuInfo = '(requiere Tauri)';
|
|
84
|
-
memInfo = '(requiere Tauri)';
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
return `📊 **Estado del Sistema DecidoOS**
|
|
88
|
-
|
|
89
|
-
▸ **CPU**: ${cpuInfo}
|
|
90
|
-
▸ **RAM**: ${memInfo}
|
|
91
|
-
▸ **Cortex**: Local mode
|
|
92
|
-
▸ **Uptime Session**: ${getSessionUptime()}
|
|
93
|
-
|
|
94
|
-
Todo operativo. ¿Necesitas algo más?`;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function getSessionUptime(): string {
|
|
98
|
-
const uptime = performance.now();
|
|
99
|
-
const minutes = Math.floor(uptime / 60000);
|
|
100
|
-
const hours = Math.floor(minutes / 60);
|
|
101
|
-
if (hours > 0) return `${hours}h ${minutes % 60}m`;
|
|
102
|
-
return `${minutes}m`;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// ─── Main Responder ─────────────────────────────────────────
|
|
106
|
-
|
|
107
|
-
export async function processLocalMessage(input: string): Promise<LocalResponse> {
|
|
108
|
-
const trimmed = input.trim();
|
|
109
|
-
|
|
110
|
-
// 1. Greeting
|
|
111
|
-
if (GREETING_PATTERNS.some(p => p.test(trimmed))) {
|
|
112
|
-
return { text: getWelcomeMessage() };
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// 2. Help
|
|
116
|
-
if (HELP_PATTERNS.some(p => p.test(trimmed))) {
|
|
117
|
-
return { text: getHelpMessage() };
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
// 3. System status
|
|
121
|
-
if (SYSTEM_PATTERNS.some(p => p.test(trimmed))) {
|
|
122
|
-
const status = await getSystemStatus();
|
|
123
|
-
return { text: status };
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
// 4. Tool execution patterns
|
|
127
|
-
const execMatch = trimmed.match(/^(ejecuta|run|exec|corre)\s+(.+)/i);
|
|
128
|
-
if (execMatch) {
|
|
129
|
-
const command = execMatch[2];
|
|
130
|
-
return {
|
|
131
|
-
text: `⚡ Ejecutando: \`${command}\`...`,
|
|
132
|
-
toolCalls: [{ name: 'tactical.execute_command', args: { command } }],
|
|
133
|
-
};
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const scanMatch = trimmed.match(/^(escanea|scan|audita|audit)\s*(puertos|ports)/i);
|
|
137
|
-
if (scanMatch) {
|
|
138
|
-
return {
|
|
139
|
-
text: '🔍 Escaneando puertos del sistema...',
|
|
140
|
-
toolCalls: [{ name: 'security.audit_ports', args: { includeLoopback: false } }],
|
|
141
|
-
};
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
const networkMatch = trimmed.match(/^(escanea|scan|monitorea|monitor)\s*(red|network|conexiones|connections)/i);
|
|
145
|
-
if (networkMatch) {
|
|
146
|
-
return {
|
|
147
|
-
text: '🌐 Analizando conexiones de red...',
|
|
148
|
-
toolCalls: [{ name: 'security.network_monitor', args: {} }],
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
const fleetMatch = trimmed.match(/^(escanea|scan)\s*(flota|fleet|procesos|processes)/i);
|
|
153
|
-
if (fleetMatch) {
|
|
154
|
-
return {
|
|
155
|
-
text: '🛸 Escaneando flotilla de agentes...',
|
|
156
|
-
toolCalls: [{ name: 'tactical.scan_fleet', args: {} }],
|
|
157
|
-
};
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
const listMatch = trimmed.match(/^(lista|list|muestra|show)\s*(tareas|tasks|playbooks)/i);
|
|
161
|
-
if (listMatch) {
|
|
162
|
-
return {
|
|
163
|
-
text: '📋 Listando tareas...',
|
|
164
|
-
toolCalls: [{ name: 'tactical.list_tasks', args: { status: 'all' } }],
|
|
165
|
-
};
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// 5. Forensic scan
|
|
169
|
-
const forensicMatch = trimmed.match(/^(analiza|analyze|forense|forensic)\s*(proceso|process)?\s*(\d+)?/i);
|
|
170
|
-
if (forensicMatch) {
|
|
171
|
-
const pid = forensicMatch[3] ? parseInt(forensicMatch[3]) : undefined;
|
|
172
|
-
return {
|
|
173
|
-
text: pid ? `🔬 Analizando proceso PID ${pid}...` : '🔬 Escaneando procesos sospechosos...',
|
|
174
|
-
toolCalls: [{ name: 'security.forensic_scan', args: { pid, deep: true } }],
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// 6. Vulnerability scan
|
|
179
|
-
const vulnMatch = trimmed.match(/^(vulnerabilidades|vulnerabilities|vuln|audit\s*npm)/i);
|
|
180
|
-
if (vulnMatch) {
|
|
181
|
-
return {
|
|
182
|
-
text: '🛡️ Escaneando vulnerabilidades en dependencias...',
|
|
183
|
-
toolCalls: [{ name: 'security.scan_vulnerabilities', args: {} }],
|
|
184
|
-
};
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// 7. Default - unrecognized
|
|
188
|
-
return {
|
|
189
|
-
text: `🤖 Entendido: "${trimmed.slice(0, 100)}"
|
|
190
|
-
|
|
191
|
-
No tengo una respuesta local para esto. Para respuestas inteligentes con IA, conecta a **Cortex** (mindframe-cortex).
|
|
192
|
-
|
|
193
|
-
Mientras tanto, prueba:
|
|
194
|
-
• "ayuda" — ver mis capacidades
|
|
195
|
-
• "ejecuta ls" — ejecutar un comando
|
|
196
|
-
• "escanea puertos" — auditar seguridad
|
|
197
|
-
• "estado" — ver estado del sistema`,
|
|
198
|
-
};
|
|
199
|
-
}
|
|
@@ -1,278 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* MLXBridge — Bridge between Tauri desktop app and MLX Python scripts
|
|
3
|
-
*
|
|
4
|
-
* Spawns Python processes for:
|
|
5
|
-
* - Model inference (generate text, transcribe audio, generate images)
|
|
6
|
-
* - LoRA fine-tuning with live progress streaming
|
|
7
|
-
* - Model benchmarking and comparison
|
|
8
|
-
*
|
|
9
|
-
* Uses Tauri shell plugin to manage Python child processes.
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
// Tauri core is loaded dynamically for browser compatibility
|
|
13
|
-
|
|
14
|
-
// ─── Types ───────────────────────────────────────────────────
|
|
15
|
-
|
|
16
|
-
export interface MLXModelInfo {
|
|
17
|
-
name: string;
|
|
18
|
-
family: string; // 'llm' | 'vision' | 'audio' | 'image-gen'
|
|
19
|
-
paramCount?: string; // e.g. "7B", "13B"
|
|
20
|
-
quantization?: string; // e.g. "4bit", "8bit"
|
|
21
|
-
path: string; // HuggingFace path or local
|
|
22
|
-
loaded: boolean;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
export interface InferenceResult {
|
|
26
|
-
text: string;
|
|
27
|
-
tokensPerSecond: number;
|
|
28
|
-
totalTokens: number;
|
|
29
|
-
latencyMs: number;
|
|
30
|
-
model: string;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export interface TrainingProgress {
|
|
34
|
-
epoch: number;
|
|
35
|
-
totalEpochs: number;
|
|
36
|
-
step: number;
|
|
37
|
-
totalSteps: number;
|
|
38
|
-
loss: number;
|
|
39
|
-
learningRate: number;
|
|
40
|
-
tokensPerSecond: number;
|
|
41
|
-
elapsedMs: number;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export interface BenchmarkResult {
|
|
45
|
-
model: string;
|
|
46
|
-
promptTokens: number;
|
|
47
|
-
generatedTokens: number;
|
|
48
|
-
tokensPerSecond: number;
|
|
49
|
-
latencyMs: number;
|
|
50
|
-
memoryMb: number;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// ─── Helper: Run Shell Command ───────────────────────────────
|
|
54
|
-
|
|
55
|
-
async function runShellCommand(cmd: string, args: string[]): Promise<string> {
|
|
56
|
-
try {
|
|
57
|
-
const { invoke } = await import('@tauri-apps/api/core');
|
|
58
|
-
const result = await invoke<string>('run_shell_command', {
|
|
59
|
-
command: cmd,
|
|
60
|
-
args,
|
|
61
|
-
});
|
|
62
|
-
return result;
|
|
63
|
-
} catch (err) {
|
|
64
|
-
console.error('[MLXBridge] Shell command failed:', err);
|
|
65
|
-
throw err;
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// ─── Ollama API (extends OllamaService) ──────────────────────
|
|
70
|
-
|
|
71
|
-
const OLLAMA_URL = 'http://localhost:11434';
|
|
72
|
-
|
|
73
|
-
async function ollamaChat(model: string, prompt: string, temperature = 0.7): Promise<InferenceResult> {
|
|
74
|
-
const start = Date.now();
|
|
75
|
-
try {
|
|
76
|
-
const res = await fetch(`${OLLAMA_URL}/api/chat`, {
|
|
77
|
-
method: 'POST',
|
|
78
|
-
headers: { 'Content-Type': 'application/json' },
|
|
79
|
-
body: JSON.stringify({
|
|
80
|
-
model,
|
|
81
|
-
messages: [{ role: 'user', content: prompt }],
|
|
82
|
-
stream: false,
|
|
83
|
-
options: { temperature },
|
|
84
|
-
}),
|
|
85
|
-
});
|
|
86
|
-
const data = await res.json();
|
|
87
|
-
const latencyMs = Date.now() - start;
|
|
88
|
-
return {
|
|
89
|
-
text: data.message?.content ?? '',
|
|
90
|
-
tokensPerSecond: data.eval_count ? (data.eval_count / (latencyMs / 1000)) : 0,
|
|
91
|
-
totalTokens: data.eval_count ?? 0,
|
|
92
|
-
latencyMs,
|
|
93
|
-
model,
|
|
94
|
-
};
|
|
95
|
-
} catch {
|
|
96
|
-
return { text: 'Error: Ollama no disponible', tokensPerSecond: 0, totalTokens: 0, latencyMs: Date.now() - start, model };
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
async function ollamaListModels(): Promise<string[]> {
|
|
101
|
-
try {
|
|
102
|
-
const res = await fetch(`${OLLAMA_URL}/api/tags`);
|
|
103
|
-
if (!res.ok) return [];
|
|
104
|
-
const data = await res.json();
|
|
105
|
-
return (data.models || []).map((m: { name: string }) => m.name);
|
|
106
|
-
} catch {
|
|
107
|
-
return [];
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
async function ollamaPull(model: string): Promise<void> {
|
|
112
|
-
await fetch(`${OLLAMA_URL}/api/pull`, {
|
|
113
|
-
method: 'POST',
|
|
114
|
-
headers: { 'Content-Type': 'application/json' },
|
|
115
|
-
body: JSON.stringify({ name: model }),
|
|
116
|
-
});
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// ─── MLX Python Bridge ──────────────────────────────────────
|
|
120
|
-
|
|
121
|
-
async function mlxGenerate(model: string, prompt: string): Promise<InferenceResult> {
|
|
122
|
-
const start = Date.now();
|
|
123
|
-
try {
|
|
124
|
-
const output = await runShellCommand('python3', [
|
|
125
|
-
'-m', 'mlx_lm.generate',
|
|
126
|
-
'--model', model,
|
|
127
|
-
'--prompt', prompt,
|
|
128
|
-
'--max-tokens', '512',
|
|
129
|
-
]);
|
|
130
|
-
return {
|
|
131
|
-
text: output,
|
|
132
|
-
tokensPerSecond: 0, // parsed from output in production
|
|
133
|
-
totalTokens: output.split(' ').length,
|
|
134
|
-
latencyMs: Date.now() - start,
|
|
135
|
-
model,
|
|
136
|
-
};
|
|
137
|
-
} catch (err) {
|
|
138
|
-
return { text: `MLX Error: ${err}`, tokensPerSecond: 0, totalTokens: 0, latencyMs: Date.now() - start, model };
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// ─── Public API ──────────────────────────────────────────────
|
|
143
|
-
|
|
144
|
-
/** List all available models (Ollama + MLX catalog) */
|
|
145
|
-
export async function listAvailableModels(): Promise<MLXModelInfo[]> {
|
|
146
|
-
const models: MLXModelInfo[] = [];
|
|
147
|
-
|
|
148
|
-
// Ollama models
|
|
149
|
-
const ollamaModels = await ollamaListModels();
|
|
150
|
-
for (const name of ollamaModels) {
|
|
151
|
-
models.push({
|
|
152
|
-
name,
|
|
153
|
-
family: 'llm',
|
|
154
|
-
path: `ollama:${name}`,
|
|
155
|
-
loaded: true,
|
|
156
|
-
});
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
// MLX model catalog (known models from decido-mlx)
|
|
160
|
-
const mlxCatalog: MLXModelInfo[] = [
|
|
161
|
-
{ name: 'Qwen2.5-7B-Instruct-4bit', family: 'llm', paramCount: '7B', quantization: '4bit', path: 'mlx-community/Qwen2.5-7B-Instruct-4bit', loaded: false },
|
|
162
|
-
{ name: 'Mistral-7B-Instruct-v0.3-4bit', family: 'llm', paramCount: '7B', quantization: '4bit', path: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit', loaded: false },
|
|
163
|
-
{ name: 'Llama-3.2-3B-Instruct-4bit', family: 'llm', paramCount: '3B', quantization: '4bit', path: 'mlx-community/Llama-3.2-3B-Instruct-4bit', loaded: false },
|
|
164
|
-
{ name: 'Mixtral-8x7B-Instruct-v0.1-4bit', family: 'llm', paramCount: '46.7B', quantization: '4bit', path: 'mlx-community/Mixtral-8x7B-Instruct-v0.1-4bit', loaded: false },
|
|
165
|
-
{ name: 'Whisper-large-v3', family: 'audio', path: 'mlx-community/whisper-large-v3', loaded: false },
|
|
166
|
-
{ name: 'CLIP-ViT-B-32', family: 'vision', path: 'openai/clip-vit-base-patch32', loaded: false },
|
|
167
|
-
{ name: 'FLUX.1-schnell-4bit', family: 'image-gen', path: 'mlx-community/FLUX.1-schnell-4bit-quantized', loaded: false },
|
|
168
|
-
{ name: 'Stable-Diffusion-XL', family: 'image-gen', path: 'mlx-community/sdxl-turbo', loaded: false },
|
|
169
|
-
];
|
|
170
|
-
models.push(...mlxCatalog);
|
|
171
|
-
|
|
172
|
-
return models;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
/** Run inference on a model (auto-routes Ollama vs MLX) */
|
|
176
|
-
export async function runInference(
|
|
177
|
-
modelPath: string,
|
|
178
|
-
prompt: string,
|
|
179
|
-
options?: { temperature?: number }
|
|
180
|
-
): Promise<InferenceResult> {
|
|
181
|
-
if (modelPath.startsWith('ollama:')) {
|
|
182
|
-
const model = modelPath.replace('ollama:', '');
|
|
183
|
-
return ollamaChat(model, prompt, options?.temperature);
|
|
184
|
-
}
|
|
185
|
-
return mlxGenerate(modelPath, prompt);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
/** Compare two models side-by-side */
|
|
189
|
-
export async function compareModels(
|
|
190
|
-
modelA: string,
|
|
191
|
-
modelB: string,
|
|
192
|
-
prompt: string
|
|
193
|
-
): Promise<{ a: InferenceResult; b: InferenceResult }> {
|
|
194
|
-
const [a, b] = await Promise.all([
|
|
195
|
-
runInference(modelA, prompt),
|
|
196
|
-
runInference(modelB, prompt),
|
|
197
|
-
]);
|
|
198
|
-
return { a, b };
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
/** Pull/download a model */
|
|
202
|
-
export async function pullModel(model: string): Promise<void> {
|
|
203
|
-
if (model.startsWith('ollama:')) {
|
|
204
|
-
await ollamaPull(model.replace('ollama:', ''));
|
|
205
|
-
} else {
|
|
206
|
-
// MLX models via huggingface-cli
|
|
207
|
-
await runShellCommand('huggingface-cli', ['download', model]);
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
/** Benchmark a model with standard prompts */
|
|
212
|
-
export async function benchmarkModel(modelPath: string): Promise<BenchmarkResult> {
|
|
213
|
-
const testPrompts = [
|
|
214
|
-
'Explain quantum computing in one paragraph.',
|
|
215
|
-
'Write a Python function to sort a list.',
|
|
216
|
-
'What is the capital of Colombia?',
|
|
217
|
-
];
|
|
218
|
-
let totalTokens = 0;
|
|
219
|
-
let totalMs = 0;
|
|
220
|
-
|
|
221
|
-
for (const prompt of testPrompts) {
|
|
222
|
-
const result = await runInference(modelPath, prompt);
|
|
223
|
-
totalTokens += result.totalTokens;
|
|
224
|
-
totalMs += result.latencyMs;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
return {
|
|
228
|
-
model: modelPath,
|
|
229
|
-
promptTokens: testPrompts.join(' ').split(' ').length,
|
|
230
|
-
generatedTokens: totalTokens,
|
|
231
|
-
tokensPerSecond: totalTokens / (totalMs / 1000),
|
|
232
|
-
latencyMs: totalMs / testPrompts.length,
|
|
233
|
-
memoryMb: 0, // would need system metrics
|
|
234
|
-
};
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
/** Start LoRA training — returns a handle to monitor progress */
|
|
238
|
-
export function startLoRATraining(config: {
|
|
239
|
-
baseModel: string;
|
|
240
|
-
dataPath: string;
|
|
241
|
-
outputPath: string;
|
|
242
|
-
epochs?: number;
|
|
243
|
-
batchSize?: number;
|
|
244
|
-
learningRate?: number;
|
|
245
|
-
loraLayers?: number;
|
|
246
|
-
}): { stop: () => void; onProgress: (cb: (p: TrainingProgress) => void) => void } {
|
|
247
|
-
let stopped = false;
|
|
248
|
-
let progressCallback: ((p: TrainingProgress) => void) | null = null;
|
|
249
|
-
|
|
250
|
-
// Simulate progress (in production, parse Python stdout)
|
|
251
|
-
const totalSteps = (config.epochs ?? 10) * 100;
|
|
252
|
-
let step = 0;
|
|
253
|
-
const start = Date.now();
|
|
254
|
-
|
|
255
|
-
const interval = setInterval(() => {
|
|
256
|
-
if (stopped || step >= totalSteps) {
|
|
257
|
-
clearInterval(interval);
|
|
258
|
-
return;
|
|
259
|
-
}
|
|
260
|
-
step++;
|
|
261
|
-
const progress: TrainingProgress = {
|
|
262
|
-
epoch: Math.floor(step / 100) + 1,
|
|
263
|
-
totalEpochs: config.epochs ?? 10,
|
|
264
|
-
step,
|
|
265
|
-
totalSteps,
|
|
266
|
-
loss: 2.5 * Math.exp(-step / 200) + 0.3 + Math.random() * 0.1,
|
|
267
|
-
learningRate: config.learningRate ?? 1e-5,
|
|
268
|
-
tokensPerSecond: 150 + Math.random() * 50,
|
|
269
|
-
elapsedMs: Date.now() - start,
|
|
270
|
-
};
|
|
271
|
-
progressCallback?.(progress);
|
|
272
|
-
}, 500);
|
|
273
|
-
|
|
274
|
-
return {
|
|
275
|
-
stop: () => { stopped = true; clearInterval(interval); },
|
|
276
|
-
onProgress: (cb) => { progressCallback = cb; },
|
|
277
|
-
};
|
|
278
|
-
}
|