@decido/kernel-bridge 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +13 -0
- package/.turbo/turbo-lint.log +30 -0
- package/package.json +37 -0
- package/src/ai/components/PeerNetworkPanel.tsx +219 -0
- package/src/ai/components/TokenWalletPanel.tsx +172 -0
- package/src/ai/hooks/usePeerMesh.ts +79 -0
- package/src/ai/hooks/useTokenWallet.ts +35 -0
- package/src/ai/index.ts +96 -0
- package/src/ai/services/EmbeddingService.ts +119 -0
- package/src/ai/services/InferenceRouter.ts +347 -0
- package/src/ai/services/LocalAgentResponder.ts +199 -0
- package/src/ai/services/MLXBridge.ts +278 -0
- package/src/ai/services/OllamaService.ts +326 -0
- package/src/ai/services/PeerMesh.ts +373 -0
- package/src/ai/services/TokenWallet.ts +237 -0
- package/src/ai/services/providers/AnthropicProvider.ts +229 -0
- package/src/ai/services/providers/GeminiProvider.ts +121 -0
- package/src/ai/services/providers/LLMProvider.ts +72 -0
- package/src/ai/services/providers/OllamaProvider.ts +84 -0
- package/src/ai/services/providers/OpenAIProvider.ts +178 -0
- package/src/crypto.ts +54 -0
- package/src/index.ts +4 -0
- package/src/kernel.ts +376 -0
- package/src/rehydration.ts +52 -0
- package/tsconfig.json +18 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MLXBridge — Bridge between Tauri desktop app and MLX Python scripts
|
|
3
|
+
*
|
|
4
|
+
* Spawns Python processes for:
|
|
5
|
+
* - Model inference (generate text, transcribe audio, generate images)
|
|
6
|
+
* - LoRA fine-tuning with live progress streaming
|
|
7
|
+
* - Model benchmarking and comparison
|
|
8
|
+
*
|
|
9
|
+
* Uses Tauri shell plugin to manage Python child processes.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
// Tauri core is loaded dynamically for browser compatibility
|
|
13
|
+
|
|
14
|
+
// ─── Types ───────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
export interface MLXModelInfo {
|
|
17
|
+
name: string;
|
|
18
|
+
family: string; // 'llm' | 'vision' | 'audio' | 'image-gen'
|
|
19
|
+
paramCount?: string; // e.g. "7B", "13B"
|
|
20
|
+
quantization?: string; // e.g. "4bit", "8bit"
|
|
21
|
+
path: string; // HuggingFace path or local
|
|
22
|
+
loaded: boolean;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface InferenceResult {
|
|
26
|
+
text: string;
|
|
27
|
+
tokensPerSecond: number;
|
|
28
|
+
totalTokens: number;
|
|
29
|
+
latencyMs: number;
|
|
30
|
+
model: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface TrainingProgress {
|
|
34
|
+
epoch: number;
|
|
35
|
+
totalEpochs: number;
|
|
36
|
+
step: number;
|
|
37
|
+
totalSteps: number;
|
|
38
|
+
loss: number;
|
|
39
|
+
learningRate: number;
|
|
40
|
+
tokensPerSecond: number;
|
|
41
|
+
elapsedMs: number;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface BenchmarkResult {
|
|
45
|
+
model: string;
|
|
46
|
+
promptTokens: number;
|
|
47
|
+
generatedTokens: number;
|
|
48
|
+
tokensPerSecond: number;
|
|
49
|
+
latencyMs: number;
|
|
50
|
+
memoryMb: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ─── Helper: Run Shell Command ───────────────────────────────
|
|
54
|
+
|
|
55
|
+
async function runShellCommand(cmd: string, args: string[]): Promise<string> {
|
|
56
|
+
try {
|
|
57
|
+
const { invoke } = await import('@tauri-apps/api/core');
|
|
58
|
+
const result = await invoke<string>('run_shell_command', {
|
|
59
|
+
command: cmd,
|
|
60
|
+
args,
|
|
61
|
+
});
|
|
62
|
+
return result;
|
|
63
|
+
} catch (err) {
|
|
64
|
+
console.error('[MLXBridge] Shell command failed:', err);
|
|
65
|
+
throw err;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ─── Ollama API (extends OllamaService) ──────────────────────
|
|
70
|
+
|
|
71
|
+
const OLLAMA_URL = 'http://localhost:11434';
|
|
72
|
+
|
|
73
|
+
async function ollamaChat(model: string, prompt: string, temperature = 0.7): Promise<InferenceResult> {
|
|
74
|
+
const start = Date.now();
|
|
75
|
+
try {
|
|
76
|
+
const res = await fetch(`${OLLAMA_URL}/api/chat`, {
|
|
77
|
+
method: 'POST',
|
|
78
|
+
headers: { 'Content-Type': 'application/json' },
|
|
79
|
+
body: JSON.stringify({
|
|
80
|
+
model,
|
|
81
|
+
messages: [{ role: 'user', content: prompt }],
|
|
82
|
+
stream: false,
|
|
83
|
+
options: { temperature },
|
|
84
|
+
}),
|
|
85
|
+
});
|
|
86
|
+
const data = await res.json();
|
|
87
|
+
const latencyMs = Date.now() - start;
|
|
88
|
+
return {
|
|
89
|
+
text: data.message?.content ?? '',
|
|
90
|
+
tokensPerSecond: data.eval_count ? (data.eval_count / (latencyMs / 1000)) : 0,
|
|
91
|
+
totalTokens: data.eval_count ?? 0,
|
|
92
|
+
latencyMs,
|
|
93
|
+
model,
|
|
94
|
+
};
|
|
95
|
+
} catch {
|
|
96
|
+
return { text: 'Error: Ollama no disponible', tokensPerSecond: 0, totalTokens: 0, latencyMs: Date.now() - start, model };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function ollamaListModels(): Promise<string[]> {
|
|
101
|
+
try {
|
|
102
|
+
const res = await fetch(`${OLLAMA_URL}/api/tags`);
|
|
103
|
+
if (!res.ok) return [];
|
|
104
|
+
const data = await res.json();
|
|
105
|
+
return (data.models || []).map((m: { name: string }) => m.name);
|
|
106
|
+
} catch {
|
|
107
|
+
return [];
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function ollamaPull(model: string): Promise<void> {
|
|
112
|
+
await fetch(`${OLLAMA_URL}/api/pull`, {
|
|
113
|
+
method: 'POST',
|
|
114
|
+
headers: { 'Content-Type': 'application/json' },
|
|
115
|
+
body: JSON.stringify({ name: model }),
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ─── MLX Python Bridge ──────────────────────────────────────
|
|
120
|
+
|
|
121
|
+
async function mlxGenerate(model: string, prompt: string): Promise<InferenceResult> {
|
|
122
|
+
const start = Date.now();
|
|
123
|
+
try {
|
|
124
|
+
const output = await runShellCommand('python3', [
|
|
125
|
+
'-m', 'mlx_lm.generate',
|
|
126
|
+
'--model', model,
|
|
127
|
+
'--prompt', prompt,
|
|
128
|
+
'--max-tokens', '512',
|
|
129
|
+
]);
|
|
130
|
+
return {
|
|
131
|
+
text: output,
|
|
132
|
+
tokensPerSecond: 0, // parsed from output in production
|
|
133
|
+
totalTokens: output.split(' ').length,
|
|
134
|
+
latencyMs: Date.now() - start,
|
|
135
|
+
model,
|
|
136
|
+
};
|
|
137
|
+
} catch (err) {
|
|
138
|
+
return { text: `MLX Error: ${err}`, tokensPerSecond: 0, totalTokens: 0, latencyMs: Date.now() - start, model };
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ─── Public API ──────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
/** List all available models (Ollama + MLX catalog) */
|
|
145
|
+
export async function listAvailableModels(): Promise<MLXModelInfo[]> {
|
|
146
|
+
const models: MLXModelInfo[] = [];
|
|
147
|
+
|
|
148
|
+
// Ollama models
|
|
149
|
+
const ollamaModels = await ollamaListModels();
|
|
150
|
+
for (const name of ollamaModels) {
|
|
151
|
+
models.push({
|
|
152
|
+
name,
|
|
153
|
+
family: 'llm',
|
|
154
|
+
path: `ollama:${name}`,
|
|
155
|
+
loaded: true,
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// MLX model catalog (known models from decido-mlx)
|
|
160
|
+
const mlxCatalog: MLXModelInfo[] = [
|
|
161
|
+
{ name: 'Qwen2.5-7B-Instruct-4bit', family: 'llm', paramCount: '7B', quantization: '4bit', path: 'mlx-community/Qwen2.5-7B-Instruct-4bit', loaded: false },
|
|
162
|
+
{ name: 'Mistral-7B-Instruct-v0.3-4bit', family: 'llm', paramCount: '7B', quantization: '4bit', path: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit', loaded: false },
|
|
163
|
+
{ name: 'Llama-3.2-3B-Instruct-4bit', family: 'llm', paramCount: '3B', quantization: '4bit', path: 'mlx-community/Llama-3.2-3B-Instruct-4bit', loaded: false },
|
|
164
|
+
{ name: 'Mixtral-8x7B-Instruct-v0.1-4bit', family: 'llm', paramCount: '46.7B', quantization: '4bit', path: 'mlx-community/Mixtral-8x7B-Instruct-v0.1-4bit', loaded: false },
|
|
165
|
+
{ name: 'Whisper-large-v3', family: 'audio', path: 'mlx-community/whisper-large-v3', loaded: false },
|
|
166
|
+
{ name: 'CLIP-ViT-B-32', family: 'vision', path: 'openai/clip-vit-base-patch32', loaded: false },
|
|
167
|
+
{ name: 'FLUX.1-schnell-4bit', family: 'image-gen', path: 'mlx-community/FLUX.1-schnell-4bit-quantized', loaded: false },
|
|
168
|
+
{ name: 'Stable-Diffusion-XL', family: 'image-gen', path: 'mlx-community/sdxl-turbo', loaded: false },
|
|
169
|
+
];
|
|
170
|
+
models.push(...mlxCatalog);
|
|
171
|
+
|
|
172
|
+
return models;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/** Run inference on a model (auto-routes Ollama vs MLX) */
|
|
176
|
+
export async function runInference(
|
|
177
|
+
modelPath: string,
|
|
178
|
+
prompt: string,
|
|
179
|
+
options?: { temperature?: number }
|
|
180
|
+
): Promise<InferenceResult> {
|
|
181
|
+
if (modelPath.startsWith('ollama:')) {
|
|
182
|
+
const model = modelPath.replace('ollama:', '');
|
|
183
|
+
return ollamaChat(model, prompt, options?.temperature);
|
|
184
|
+
}
|
|
185
|
+
return mlxGenerate(modelPath, prompt);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/** Compare two models side-by-side */
|
|
189
|
+
export async function compareModels(
|
|
190
|
+
modelA: string,
|
|
191
|
+
modelB: string,
|
|
192
|
+
prompt: string
|
|
193
|
+
): Promise<{ a: InferenceResult; b: InferenceResult }> {
|
|
194
|
+
const [a, b] = await Promise.all([
|
|
195
|
+
runInference(modelA, prompt),
|
|
196
|
+
runInference(modelB, prompt),
|
|
197
|
+
]);
|
|
198
|
+
return { a, b };
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/** Pull/download a model */
|
|
202
|
+
export async function pullModel(model: string): Promise<void> {
|
|
203
|
+
if (model.startsWith('ollama:')) {
|
|
204
|
+
await ollamaPull(model.replace('ollama:', ''));
|
|
205
|
+
} else {
|
|
206
|
+
// MLX models via huggingface-cli
|
|
207
|
+
await runShellCommand('huggingface-cli', ['download', model]);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/** Benchmark a model with standard prompts */
|
|
212
|
+
export async function benchmarkModel(modelPath: string): Promise<BenchmarkResult> {
|
|
213
|
+
const testPrompts = [
|
|
214
|
+
'Explain quantum computing in one paragraph.',
|
|
215
|
+
'Write a Python function to sort a list.',
|
|
216
|
+
'What is the capital of Colombia?',
|
|
217
|
+
];
|
|
218
|
+
let totalTokens = 0;
|
|
219
|
+
let totalMs = 0;
|
|
220
|
+
|
|
221
|
+
for (const prompt of testPrompts) {
|
|
222
|
+
const result = await runInference(modelPath, prompt);
|
|
223
|
+
totalTokens += result.totalTokens;
|
|
224
|
+
totalMs += result.latencyMs;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return {
|
|
228
|
+
model: modelPath,
|
|
229
|
+
promptTokens: testPrompts.join(' ').split(' ').length,
|
|
230
|
+
generatedTokens: totalTokens,
|
|
231
|
+
tokensPerSecond: totalTokens / (totalMs / 1000),
|
|
232
|
+
latencyMs: totalMs / testPrompts.length,
|
|
233
|
+
memoryMb: 0, // would need system metrics
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/** Start LoRA training — returns a handle to monitor progress */
|
|
238
|
+
export function startLoRATraining(config: {
|
|
239
|
+
baseModel: string;
|
|
240
|
+
dataPath: string;
|
|
241
|
+
outputPath: string;
|
|
242
|
+
epochs?: number;
|
|
243
|
+
batchSize?: number;
|
|
244
|
+
learningRate?: number;
|
|
245
|
+
loraLayers?: number;
|
|
246
|
+
}): { stop: () => void; onProgress: (cb: (p: TrainingProgress) => void) => void } {
|
|
247
|
+
let stopped = false;
|
|
248
|
+
let progressCallback: ((p: TrainingProgress) => void) | null = null;
|
|
249
|
+
|
|
250
|
+
// Simulate progress (in production, parse Python stdout)
|
|
251
|
+
const totalSteps = (config.epochs ?? 10) * 100;
|
|
252
|
+
let step = 0;
|
|
253
|
+
const start = Date.now();
|
|
254
|
+
|
|
255
|
+
const interval = setInterval(() => {
|
|
256
|
+
if (stopped || step >= totalSteps) {
|
|
257
|
+
clearInterval(interval);
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
step++;
|
|
261
|
+
const progress: TrainingProgress = {
|
|
262
|
+
epoch: Math.floor(step / 100) + 1,
|
|
263
|
+
totalEpochs: config.epochs ?? 10,
|
|
264
|
+
step,
|
|
265
|
+
totalSteps,
|
|
266
|
+
loss: 2.5 * Math.exp(-step / 200) + 0.3 + Math.random() * 0.1,
|
|
267
|
+
learningRate: config.learningRate ?? 1e-5,
|
|
268
|
+
tokensPerSecond: 150 + Math.random() * 50,
|
|
269
|
+
elapsedMs: Date.now() - start,
|
|
270
|
+
};
|
|
271
|
+
progressCallback?.(progress);
|
|
272
|
+
}, 500);
|
|
273
|
+
|
|
274
|
+
return {
|
|
275
|
+
stop: () => { stopped = true; clearInterval(interval); },
|
|
276
|
+
onProgress: (cb) => { progressCallback = cb; },
|
|
277
|
+
};
|
|
278
|
+
}
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OllamaService — Chat interface for DecidoOS Agent
|
|
3
|
+
*
|
|
4
|
+
* Manages conversation history, system prompt construction,
|
|
5
|
+
* and tool-call parsing. Chat requests are now routed through
|
|
6
|
+
* InferenceRouter to support multiple LLM providers.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { routeChat } from './InferenceRouter';
|
|
10
|
+
import type { ChatMessage } from './providers/LLMProvider';
|
|
11
|
+
|
|
12
|
+
// ─── Types ──────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
interface OllamaChatMessage {
|
|
15
|
+
role: 'system' | 'user' | 'assistant';
|
|
16
|
+
content: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface OllamaModelInfo {
|
|
20
|
+
name: string;
|
|
21
|
+
size: number;
|
|
22
|
+
modified_at: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// ─── Tool Call Types ────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
export interface ToolCallRequest {
|
|
28
|
+
name: string;
|
|
29
|
+
args: Record<string, unknown>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ChatWithToolsResult {
|
|
33
|
+
text: string;
|
|
34
|
+
toolCalls: ToolCallRequest[];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// ─── Config ─────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
const OLLAMA_BASE_URL = 'http://localhost:11434';
|
|
40
|
+
const DEFAULT_MODEL = 'qwen2:latest';
|
|
41
|
+
|
|
42
|
+
// ─── System Prompt ──────────────────────────────────────────
|
|
43
|
+
|
|
44
|
+
function buildSystemPrompt(): string {
|
|
45
|
+
const liveContext = buildLiveContext();
|
|
46
|
+
const toolSchemas = ''; // Removed toolRegistry reference to fix dependencies, but kept variable for now
|
|
47
|
+
|
|
48
|
+
return `- Estás integrado en DecidoOS, una plataforma empresarial de escritorio
|
|
49
|
+
- Corres localmente en la máquina del usuario — todo es privado
|
|
50
|
+
- Puedes ejecutar herramientas del sistema para ayudar al usuario
|
|
51
|
+
|
|
52
|
+
${liveContext}
|
|
53
|
+
|
|
54
|
+
## Herramientas disponibles
|
|
55
|
+
|
|
56
|
+
${toolSchemas}
|
|
57
|
+
|
|
58
|
+
## Reglas de respuesta
|
|
59
|
+
1. Responde siempre en español a menos que el usuario hable en otro idioma
|
|
60
|
+
2. Sé directo — no des introducciones largas
|
|
61
|
+
3. Si puedes resolver algo con una herramienta, USA LA HERRAMIENTA nativa en vez de solo describirla
|
|
62
|
+
4. Mantén respuestas bajo 300 palabras
|
|
63
|
+
5. Si no sabes algo, dilo honestamente
|
|
64
|
+
6. NO inventes resultados de herramientas — espera el resultado real
|
|
65
|
+
7. Cuando el usuario pregunte sobre el estado del sistema, USA LOS DATOS del "Estado actual del sistema" que tienes arriba — esos son datos reales y recientes`;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ─── Live Context Builder ───────────────────────────────────
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Builds a live system context section from watchdog metrics
|
|
72
|
+
* and the app's context snapshot. Called every time a message
|
|
73
|
+
* is sent to keep the LLM's awareness current.
|
|
74
|
+
*/
|
|
75
|
+
function buildLiveContext(): string {
|
|
76
|
+
const parts: string[] = ['## Estado actual del sistema'];
|
|
77
|
+
const now = new Date().toLocaleString('es-CO', { hour12: false });
|
|
78
|
+
parts.push(`Hora actual: ${now}`);
|
|
79
|
+
|
|
80
|
+
// Watchdog metrics
|
|
81
|
+
try {
|
|
82
|
+
// Dynamic import to avoid circular deps — we access synchronously via singleton
|
|
83
|
+
const watchdogModule = (globalThis as any).__systemWatchdog;
|
|
84
|
+
if (watchdogModule) {
|
|
85
|
+
const snapshot = watchdogModule.getLastSnapshot?.();
|
|
86
|
+
if (snapshot) {
|
|
87
|
+
const metrics: string[] = [];
|
|
88
|
+
if (snapshot.cpuPercent !== null) metrics.push(`CPU: ${snapshot.cpuPercent.toFixed(1)}%`);
|
|
89
|
+
if (snapshot.memoryPercent !== null) metrics.push(`Memoria: ${snapshot.memoryPercent.toFixed(1)}%`);
|
|
90
|
+
if (snapshot.diskFreeGB !== null) metrics.push(`Disco libre: ${snapshot.diskFreeGB.toFixed(1)} GB`);
|
|
91
|
+
if (snapshot.connectionCount !== null) metrics.push(`Conexiones de red: ${snapshot.connectionCount}`);
|
|
92
|
+
if (metrics.length > 0) {
|
|
93
|
+
parts.push(`Métricas del sistema: ${metrics.join(' | ')}`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const alerts = watchdogModule.getAlerts?.()?.filter((a: any) => !a.dismissed).slice(-5) ?? [];
|
|
98
|
+
if (alerts.length > 0) {
|
|
99
|
+
parts.push('Alertas recientes:');
|
|
100
|
+
for (const alert of alerts) {
|
|
101
|
+
const emoji = alert.severity === 'critical' ? '🚨' : '⚠️';
|
|
102
|
+
parts.push(` ${emoji} ${alert.title}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
} catch {
|
|
107
|
+
// Watchdog not available yet
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Context snapshot from store (accessed via globalThis to avoid monolith coupling)
|
|
111
|
+
try {
|
|
112
|
+
const appStore = (globalThis as any).__appStore;
|
|
113
|
+
const ctx = appStore?.getState?.()?.contextSnapshot;
|
|
114
|
+
if (ctx) {
|
|
115
|
+
if (ctx.canvasNodeCount > 0) parts.push(`Canvas: ${ctx.canvasNodeCount} nodos`);
|
|
116
|
+
if (ctx.gitBranch) parts.push(`Git: rama ${ctx.gitBranch}${ctx.gitModifiedFiles ? ` (${ctx.gitModifiedFiles} archivos modificados)` : ''}`);
|
|
117
|
+
if (ctx.activeInsights > 0) parts.push(`Insights activos: ${ctx.activeInsights}`);
|
|
118
|
+
if (ctx.criticalInsightsSummary?.length > 0) {
|
|
119
|
+
parts.push('Insights críticos: ' + ctx.criticalInsightsSummary.slice(0, 3).join('; '));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
} catch {
|
|
123
|
+
// Store not available
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Persistent memory (learned facts)
|
|
127
|
+
try {
|
|
128
|
+
const memoryModule = (globalThis as any).__agentMemory;
|
|
129
|
+
if (memoryModule) {
|
|
130
|
+
const memContext = memoryModule.buildMemoryContext?.();
|
|
131
|
+
if (memContext) {
|
|
132
|
+
parts.push('');
|
|
133
|
+
parts.push(memContext);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
} catch {
|
|
137
|
+
// Memory not available
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return parts.join('\n');
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ─── Tool Call Parser ───────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Parse Ollama native tool calls from the message object.
|
|
147
|
+
* Previously this used a Regex over the text response.
|
|
148
|
+
*/
|
|
149
|
+
export function parseToolCalls(message: any): ToolCallRequest[] {
|
|
150
|
+
const calls: ToolCallRequest[] = [];
|
|
151
|
+
|
|
152
|
+
if (message?.tool_calls && Array.isArray(message.tool_calls)) {
|
|
153
|
+
for (const tc of message.tool_calls) {
|
|
154
|
+
if (tc.function?.name) {
|
|
155
|
+
calls.push({
|
|
156
|
+
name: tc.function.name,
|
|
157
|
+
args: tc.function.arguments || {},
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return calls;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Strip tool_call blocks from text to get the clean message.
|
|
168
|
+
* Using native tools, this is usually a no-op as tools are not in the text,
|
|
169
|
+
* but kept for backwards compatibility with any leaked formatting.
|
|
170
|
+
*/
|
|
171
|
+
export function stripToolCalls(text: string): string {
|
|
172
|
+
if (!text) return '';
|
|
173
|
+
return text.replace(/<tool_call>[\s\S]*?<\/tool_call>/g, '').trim();
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// ─── Conversation History ───────────────────────────────────
|
|
177
|
+
|
|
178
|
+
let conversationHistory: OllamaChatMessage[] = [];
|
|
179
|
+
const MAX_HISTORY = 20;
|
|
180
|
+
|
|
181
|
+
function addToHistory(msg: OllamaChatMessage): void {
|
|
182
|
+
conversationHistory.push(msg);
|
|
183
|
+
if (conversationHistory.length > MAX_HISTORY) {
|
|
184
|
+
conversationHistory = conversationHistory.slice(-MAX_HISTORY);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export function clearConversationHistory(): void {
|
|
189
|
+
conversationHistory = [];
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ─── API Methods ────────────────────────────────────────────
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Check if Ollama is running and accessible.
|
|
196
|
+
*/
|
|
197
|
+
export async function isOllamaAvailable(): Promise<boolean> {
|
|
198
|
+
try {
|
|
199
|
+
const res = await fetch(`${OLLAMA_BASE_URL}/api/tags`, {
|
|
200
|
+
signal: AbortSignal.timeout(2000)
|
|
201
|
+
});
|
|
202
|
+
return res.ok;
|
|
203
|
+
} catch {
|
|
204
|
+
return false;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* List available models from Ollama.
|
|
210
|
+
*/
|
|
211
|
+
export async function listModels(): Promise<string[]> {
|
|
212
|
+
try {
|
|
213
|
+
const res = await fetch(`${OLLAMA_BASE_URL}/api/tags`);
|
|
214
|
+
if (!res.ok) return [];
|
|
215
|
+
const data = await res.json();
|
|
216
|
+
return (data.models || []).map((m: OllamaModelInfo) => m.name);
|
|
217
|
+
} catch {
|
|
218
|
+
return [];
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Send a chat message and get a complete response (non-streaming).
|
|
224
|
+
* Routes through InferenceRouter to support multiple providers.
|
|
225
|
+
*/
|
|
226
|
+
export async function chat(
|
|
227
|
+
userMessage: string,
|
|
228
|
+
options?: { model?: string; temperature?: number }
|
|
229
|
+
): Promise<string> {
|
|
230
|
+
const userMsg: ChatMessage = { role: 'user', content: userMessage };
|
|
231
|
+
addToHistory(userMsg);
|
|
232
|
+
|
|
233
|
+
const messages: ChatMessage[] = [
|
|
234
|
+
{ role: 'system', content: buildSystemPrompt() },
|
|
235
|
+
...conversationHistory,
|
|
236
|
+
];
|
|
237
|
+
|
|
238
|
+
try {
|
|
239
|
+
const result = await routeChat(messages, {
|
|
240
|
+
temperature: options?.temperature ?? 0.7,
|
|
241
|
+
maxTokens: 512,
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
const assistantContent = result?.text || '';
|
|
245
|
+
addToHistory({ role: 'assistant', content: assistantContent });
|
|
246
|
+
|
|
247
|
+
if (result) {
|
|
248
|
+
console.log(`🧠[Chat] Response via ${result.backend} (${result.model}) in ${result.latencyMs} ms`);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return assistantContent;
|
|
252
|
+
} catch (error) {
|
|
253
|
+
if (error instanceof DOMException && error.name === 'TimeoutError') {
|
|
254
|
+
return '⏳ La respuesta del modelo tardó demasiado. Intenta con una pregunta más corta.';
|
|
255
|
+
}
|
|
256
|
+
throw error;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Send a chat message and stream the response token-by-token.
|
|
262
|
+
* NOTE: Streaming only works with Ollama directly (local provider).
|
|
263
|
+
* For cloud providers, this falls back to non-streaming.
|
|
264
|
+
*/
|
|
265
|
+
export async function* chatStream(
|
|
266
|
+
userMessage: string,
|
|
267
|
+
options?: { model?: string; temperature?: number }
|
|
268
|
+
): AsyncGenerator<string, void, unknown> {
|
|
269
|
+
const model = options?.model || DEFAULT_MODEL;
|
|
270
|
+
|
|
271
|
+
const userMsg: OllamaChatMessage = { role: 'user', content: userMessage };
|
|
272
|
+
addToHistory(userMsg);
|
|
273
|
+
|
|
274
|
+
const messages: OllamaChatMessage[] = [
|
|
275
|
+
{ role: 'system', content: buildSystemPrompt() },
|
|
276
|
+
...conversationHistory,
|
|
277
|
+
];
|
|
278
|
+
|
|
279
|
+
const res = await fetch(`${OLLAMA_BASE_URL}/api/chat`, {
|
|
280
|
+
method: 'POST',
|
|
281
|
+
headers: { 'Content-Type': 'application/json' },
|
|
282
|
+
body: JSON.stringify({
|
|
283
|
+
model,
|
|
284
|
+
messages,
|
|
285
|
+
stream: true,
|
|
286
|
+
options: {
|
|
287
|
+
temperature: options?.temperature ?? 0.7,
|
|
288
|
+
num_predict: 512,
|
|
289
|
+
},
|
|
290
|
+
}),
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
if (!res.ok || !res.body) {
|
|
294
|
+
throw new Error(`Ollama stream error: ${res.status}`);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const reader = res.body.getReader();
|
|
298
|
+
const decoder = new TextDecoder();
|
|
299
|
+
let fullContent = '';
|
|
300
|
+
|
|
301
|
+
try {
|
|
302
|
+
while (true) {
|
|
303
|
+
const { done, value } = await reader.read();
|
|
304
|
+
if (done) break;
|
|
305
|
+
|
|
306
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
307
|
+
const lines = chunk.split('\n').filter(Boolean);
|
|
308
|
+
|
|
309
|
+
for (const line of lines) {
|
|
310
|
+
try {
|
|
311
|
+
const data = JSON.parse(line);
|
|
312
|
+
if (data.message?.content) {
|
|
313
|
+
fullContent += data.message.content;
|
|
314
|
+
yield data.message.content;
|
|
315
|
+
}
|
|
316
|
+
} catch {
|
|
317
|
+
// Skip malformed chunks
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
} finally {
|
|
322
|
+
reader.releaseLock();
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
addToHistory({ role: 'assistant', content: fullContent });
|
|
326
|
+
}
|