@pheem49/mint 1.5.2 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GUIDE_TH.md +23 -11
- package/README.md +148 -66
- package/assets/Agent_Mint.png +0 -0
- package/assets/Settings.png +0 -0
- package/install.ps1 +64 -0
- package/install.sh +54 -0
- package/main.js +12 -0
- package/package.json +5 -3
- package/preload.js +4 -0
- package/scripts/install_linux_desktop_entry.js +48 -0
- package/src/AI_Brain/Gemini_API.js +231 -498
- package/src/AI_Brain/autonomous_brain.js +46 -19
- package/src/AI_Brain/headless_agent.js +21 -2
- package/src/AI_Brain/provider_adapter.js +358 -0
- package/src/Automation_Layer/file_operations.js +17 -5
- package/src/CLI/approval_handler.js +5 -0
- package/src/CLI/chat_router.js +7 -0
- package/src/CLI/chat_ui.js +397 -76
- package/src/CLI/cli_colors.js +86 -3
- package/src/CLI/cli_formatters.js +6 -1
- package/src/CLI/code_agent.js +706 -273
- package/src/CLI/interactive_chat.js +311 -149
- package/src/CLI/slash_command_handler.js +2 -2
- package/src/CLI/updater.js +21 -1
- package/src/System/config_manager.js +5 -1
- package/src/System/ipc_handlers.js +95 -1
- package/src/System/picture_store.js +109 -0
- package/src/System/smart_context.js +227 -0
- package/src/System/task_manager.js +127 -0
- package/src/System/tool_registry.js +13 -0
- package/src/System/window_manager.js +16 -8
- package/src/UI/live2d_manager.js +42 -8
- package/src/UI/preload-spotlight.js +1 -0
- package/src/UI/renderer.js +837 -63
- package/src/UI/settings.css +160 -96
- package/src/UI/settings.html +9 -0
- package/src/UI/settings.js +35 -2
- package/src/UI/spotlight.js +13 -9
- package/src/UI/styles.css +1592 -165
- package/privacy.txt +0 -1
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
const { GoogleGenAI } = require('@google/genai');
|
|
2
2
|
const { readChatHistory, writeChatHistory, clearChatHistory } = require('../System/chat_history_manager');
|
|
3
|
-
const { readConfig, getAvailableProviders
|
|
3
|
+
const { readConfig, getAvailableProviders } = require('../System/config_manager');
|
|
4
4
|
const pluginManager = require('../Plugins/plugin_manager');
|
|
5
5
|
const mcpManager = require('../Plugins/mcp_manager');
|
|
6
6
|
const memoryStore = require('./memory_store');
|
|
7
7
|
const agentOrchestrator = require('./agent_orchestrator');
|
|
8
8
|
const workspaceManager = require('../CLI/workspace_manager');
|
|
9
9
|
const toolRegistry = require('../System/tool_registry');
|
|
10
|
+
const providerAdapter = require('./provider_adapter');
|
|
10
11
|
|
|
11
12
|
let ai = null;
|
|
12
13
|
let activeApiKey = '';
|
|
13
14
|
const initialEnvKey = (process.env.GEMINI_API_KEY || '').trim();
|
|
14
|
-
const axios = require('axios');
|
|
15
15
|
const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash';
|
|
16
16
|
|
|
17
17
|
function decodeUnicode(str) {
|
|
@@ -42,15 +42,39 @@ function imageDataUriToInlineData(base64Image) {
|
|
|
42
42
|
};
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
function imageDataUriToBase64(base64Image) {
|
|
46
|
-
return imageDataUriToInlineData(base64Image).data;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
45
|
function normalizeImageList(base64Image) {
|
|
50
46
|
if (!base64Image) return [];
|
|
51
47
|
return Array.isArray(base64Image) ? base64Image.filter(Boolean) : [base64Image];
|
|
52
48
|
}
|
|
53
49
|
|
|
50
|
+
const CHAT_MODE_ACTION_POLICY = `GOAL:
|
|
51
|
+
Your goal is to help the user with their queries. This Electron app is Chat Mode: use at most ONE simple action per user message, only when the latest message explicitly asks for that local action. If the user asks a question or asks you to provide text/commands, answer with action "none".
|
|
52
|
+
|
|
53
|
+
ACTION DISCIPLINE:
|
|
54
|
+
- Always return a single JSON object. Never return a JSON array or multiple actions.
|
|
55
|
+
- If the user asks "พิมพ์คำสั่งให้หน่อย", "บอกคำสั่ง", "ขอคำสั่ง", "what command", or "type the command", provide the command in "response" and set action "none". Do NOT use "type_text" or "key_tap".
|
|
56
|
+
- Use "type_text", "key_tap", "mouse_click", or "mouse_move" only when the user explicitly asks you to control the currently focused UI, not when they ask for a command to copy/type themselves.
|
|
57
|
+
- If the user asks to run terminal commands or code, Chat Mode should provide the command or tell them to use the Mint CLI agent. Do not type or press Enter on their behalf.
|
|
58
|
+
- Never say you opened, checked, inspected, or verified a file/folder unless the selected action actually does it and the app will execute that action.
|
|
59
|
+
- If the request needs workspace code inspection, edits, tests, or shell execution, tell the user to use the Mint CLI agent instead of pretending to inspect files.`;
|
|
60
|
+
|
|
61
|
+
const AGENT_MODE_ACTION_POLICY = `GOAL:
|
|
62
|
+
Your goal is to act as Mint's Desktop Agent Mode. You may use ONE concrete desktop action per response when it directly advances the user's latest request or a clear desktop task implied by Smart Context. Prefer useful action over explaining when the user asked Mint to do something.
|
|
63
|
+
|
|
64
|
+
ACTION DISCIPLINE:
|
|
65
|
+
- Always return a single JSON object. Never return a JSON array or multiple actions.
|
|
66
|
+
- Choose exactly one action when a desktop action is useful and the user's intent is clear; otherwise use action "none" and ask a concise follow-up.
|
|
67
|
+
- You may use safe desktop actions such as open_url, search, open_app, find_path, open_file, open_folder, create_folder, clipboard_write, learn_file, learn_folder, plugin, mcp_tool, web_automation, system_info, mouse_move, mouse_click, type_text, and key_tap when they match the request.
|
|
68
|
+
- Approval and dangerous actions are handled by Mint's UI. You may propose system_automation or delete_file only when the user clearly requested it; the app will ask for permission before running.
|
|
69
|
+
- For UI-control actions (mouse_click, mouse_move, type_text, key_tap), rely on Smart Context or the attached screenshot. If the target is ambiguous, ask before acting.
|
|
70
|
+
- If the user asks "พิมพ์คำสั่งให้หน่อย", "บอกคำสั่ง", "ขอคำสั่ง", "what command", or "type the command", provide the command in "response" and set action "none" unless they explicitly ask Mint to type it into the active UI.
|
|
71
|
+
- If the request needs workspace code inspection, edits, tests, or shell execution, tell the user to use the Mint CLI agent instead of pretending to inspect files or run commands from Chat UI.
|
|
72
|
+
- Never say you opened, checked, inspected, or verified something unless the selected action actually does it and the app will execute that action.`;
|
|
73
|
+
|
|
74
|
+
function buildActionModeInstruction(config = readConfig()) {
|
|
75
|
+
return config.assistantMode === 'agent' ? AGENT_MODE_ACTION_POLICY : CHAT_MODE_ACTION_POLICY;
|
|
76
|
+
}
|
|
77
|
+
|
|
54
78
|
const systemInstruction = `You are "Mint" (มิ้นท์), a cute, cheerful, and highly helpful female Local AI Desktop Agent.
|
|
55
79
|
|
|
56
80
|
PERSONALITY & TONE:
|
|
@@ -72,9 +96,10 @@ NATURAL CHAT FLOW:
|
|
|
72
96
|
- You have the autonomy to suggest better ways to achieve a goal, provide alternative perspectives, and take initiative in helping the user.
|
|
73
97
|
- Separate distinct points with blank lines (double newline) for readability.
|
|
74
98
|
- Ask follow-up questions only when they add significant value to the task or conversation.
|
|
99
|
+
- The latest user message is authoritative. Do not continue or describe older tasks unless the latest message explicitly asks you to continue them.
|
|
100
|
+
- For greetings, name-calls, acknowledgements, and backchannels such as "มิ้น", "มิ้นๆ", "อ๋อ", "โอเค", "ขอบคุณ", "hi", "hello", "ok", or "thanks", return action "none" and a short reply only.
|
|
75
101
|
|
|
76
|
-
|
|
77
|
-
Your goal is to help the user with their queries. If they ask to open an application, open a website, search, manage files, or get system info, you must trigger an action in the structured JSON format below. **NEVER provide a conversational response about performing an action without including the actual "action" object in your JSON.**
|
|
102
|
+
{{ACTION_MODE_INSTRUCTION}}
|
|
78
103
|
|
|
79
104
|
CREATOR INFO:
|
|
80
105
|
- The creator is Pheem49.
|
|
@@ -138,6 +163,7 @@ ${toolRegistry.buildToolPromptSection()}
|
|
|
138
163
|
// Replaces 5 previously duplicated mcpPrompt blocks.
|
|
139
164
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
140
165
|
function buildSystemPrompt() {
|
|
166
|
+
const config = readConfig();
|
|
141
167
|
pluginManager.loadPlugins();
|
|
142
168
|
const mcpTools = mcpManager.getAllTools();
|
|
143
169
|
|
|
@@ -165,7 +191,9 @@ function buildSystemPrompt() {
|
|
|
165
191
|
workspaceSection = `\n\n[WORKSPACE DETECTED: ${ws.name}]\nPath: ${ws.path}\nProject Instructions: ${ws.instructions}\n`;
|
|
166
192
|
}
|
|
167
193
|
|
|
168
|
-
|
|
194
|
+
const modeInstruction = buildActionModeInstruction(config);
|
|
195
|
+
const baseInstruction = systemInstruction.replace('{{ACTION_MODE_INSTRUCTION}}', modeInstruction);
|
|
196
|
+
return baseInstruction + personaInstruction + workspaceSection + pluginManager.getPromptDescriptions() + mcpSection + userContext;
|
|
169
197
|
}
|
|
170
198
|
|
|
171
199
|
function buildMessageWithRelevantMemory(finalMessage) {
|
|
@@ -191,10 +219,15 @@ function stripRelevantMemoryBlock(text) {
|
|
|
191
219
|
return input
|
|
192
220
|
.replace(/\n?\[Relevant long-term memory for this user message\][\s\S]*?\[End relevant memory\]\n?/g, '\n')
|
|
193
221
|
.replace(/^\s*\[Relevant long-term memory for this user message\][\s\S]*?\[End relevant memory\]\s*/g, '')
|
|
222
|
+
.replace(/\n?\[SMART_CONTEXT\][\s\S]*?\[\/SMART_CONTEXT\]\n?/g, '\n')
|
|
194
223
|
.replace(/\n?\[LOCAL KNOWLEDGE BASE - USE THIS CONTEXT TO ANSWER\][\s\S]*/g, '')
|
|
195
224
|
.trim();
|
|
196
225
|
}
|
|
197
226
|
|
|
227
|
+
function hasSmartContextBlock(text) {
|
|
228
|
+
return /\[SMART_CONTEXT\][\s\S]*?\[\/SMART_CONTEXT\]/.test(String(text || ''));
|
|
229
|
+
}
|
|
230
|
+
|
|
198
231
|
function cleanHistoryForStorage(history) {
|
|
199
232
|
if (!Array.isArray(history)) return [];
|
|
200
233
|
return history.map(msg => ({
|
|
@@ -202,7 +235,13 @@ function cleanHistoryForStorage(history) {
|
|
|
202
235
|
parts: Array.isArray(msg.parts)
|
|
203
236
|
? msg.parts.map(part => {
|
|
204
237
|
if (part.text) {
|
|
205
|
-
return {
|
|
238
|
+
return {
|
|
239
|
+
text: stripRelevantMemoryBlock(part.text)
|
|
240
|
+
.replace(/data:image\/[\w.+-]+;base64,[A-Za-z0-9+/=]+/g, '[Image omitted from chat history]')
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
if (part.inlineData || part.fileData || part.image_url || part.imageUrl) {
|
|
244
|
+
return { text: '[Image omitted from chat history; saved locally when sent by the user.]' };
|
|
206
245
|
}
|
|
207
246
|
return part;
|
|
208
247
|
})
|
|
@@ -210,6 +249,20 @@ function cleanHistoryForStorage(history) {
|
|
|
210
249
|
}));
|
|
211
250
|
}
|
|
212
251
|
|
|
252
|
+
function preserveHistoryMetadata(nextHistory, previousHistory, now) {
|
|
253
|
+
if (!Array.isArray(nextHistory)) return [];
|
|
254
|
+
const previous = Array.isArray(previousHistory) ? previousHistory : [];
|
|
255
|
+
|
|
256
|
+
return nextHistory.map((msg, index) => {
|
|
257
|
+
const prior = previous[index] || {};
|
|
258
|
+
return {
|
|
259
|
+
...msg,
|
|
260
|
+
timestamp: msg.timestamp || prior.timestamp || (index >= nextHistory.length - 2 ? now : null),
|
|
261
|
+
providerInfo: msg.providerInfo || prior.providerInfo || null
|
|
262
|
+
};
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
213
266
|
function validateParsedAction(parsedResult) {
|
|
214
267
|
if (!parsedResult || !parsedResult.action) {
|
|
215
268
|
return parsedResult;
|
|
@@ -223,6 +276,42 @@ function validateParsedAction(parsedResult) {
|
|
|
223
276
|
return parsedResult;
|
|
224
277
|
}
|
|
225
278
|
|
|
279
|
+
function normalizeParsedResult(parsedResult, originalText = '') {
|
|
280
|
+
if (Array.isArray(parsedResult)) {
|
|
281
|
+
const first = parsedResult.find(item => item && typeof item === 'object') || {};
|
|
282
|
+
const commandAction = parsedResult.find(item =>
|
|
283
|
+
item && item.action && item.action.type === 'type_text' && item.action.target
|
|
284
|
+
);
|
|
285
|
+
return {
|
|
286
|
+
response: commandAction
|
|
287
|
+
? `คำสั่งคือ:\n${commandAction.action.target}`
|
|
288
|
+
: (first.response || 'มิ้นท์ตอบได้ทีละ action ต่อข้อความนะคะ ลองสั่งใหม่อีกครั้งได้เลยค่ะ'),
|
|
289
|
+
action: { type: 'none', target: '' }
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (!parsedResult || typeof parsedResult !== 'object') {
|
|
294
|
+
return { response: String(parsedResult || ''), action: { type: 'none', target: '' } };
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if (!parsedResult.action || typeof parsedResult.action !== 'object') {
|
|
298
|
+
parsedResult.action = { type: 'none', target: '' };
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const input = String(originalText || '').toLowerCase();
|
|
302
|
+
const asksForCommandText = /พิมพ์คำสั่ง|บอกคำสั่ง|ขอคำสั่ง|คำสั่ง.*ให้หน่อย|type.*command|what command|give.*command/.test(input);
|
|
303
|
+
const actionType = parsedResult.action.type;
|
|
304
|
+
if (asksForCommandText && (actionType === 'type_text' || actionType === 'key_tap')) {
|
|
305
|
+
const typed = actionType === 'type_text' ? String(parsedResult.action.target || '').trim() : '';
|
|
306
|
+
parsedResult.response = typed
|
|
307
|
+
? `คำสั่งคือ:\n${typed}`
|
|
308
|
+
: (parsedResult.response || 'ได้ค่ะ แต่คำขอนี้ควรตอบเป็นข้อความ ไม่ควรพิมพ์หรือกดปุ่มแทนค่ะ');
|
|
309
|
+
parsedResult.action = { type: 'none', target: '' };
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return parsedResult;
|
|
313
|
+
}
|
|
314
|
+
|
|
226
315
|
function resolveApiKey() {
|
|
227
316
|
let settingsKey = '';
|
|
228
317
|
try {
|
|
@@ -259,63 +348,15 @@ function resolveGeminiModel() {
|
|
|
259
348
|
}
|
|
260
349
|
|
|
261
350
|
function getProviderAttemptOrder(config) {
|
|
262
|
-
const provider = config.aiProvider || 'gemini';
|
|
263
351
|
const availableProviders = getAvailableProviders(config);
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
: availableProviders
|
|
267
|
-
|
|
352
|
+
return providerAdapter.getProviderAttemptOrder(config, {
|
|
353
|
+
availableProviders,
|
|
354
|
+
priority: availableProviders
|
|
355
|
+
});
|
|
268
356
|
}
|
|
269
357
|
|
|
270
358
|
function getProviderModel(provider, config = {}) {
|
|
271
|
-
|
|
272
|
-
case 'gemini':
|
|
273
|
-
return (config.geminiModel || DEFAULT_GEMINI_MODEL).trim() || DEFAULT_GEMINI_MODEL;
|
|
274
|
-
case 'anthropic':
|
|
275
|
-
return config.anthropicModel || 'claude-3-5-sonnet-latest';
|
|
276
|
-
case 'openai':
|
|
277
|
-
return config.openaiModel || 'gpt-4o';
|
|
278
|
-
case 'local_openai':
|
|
279
|
-
return config.localModelName || 'local-model';
|
|
280
|
-
case 'huggingface':
|
|
281
|
-
return config.hfModel || 'meta-llama/Meta-Llama-3-8B-Instruct';
|
|
282
|
-
case 'ollama':
|
|
283
|
-
return config.ollamaModel || 'llama3:latest';
|
|
284
|
-
default:
|
|
285
|
-
return '';
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
function withProviderInfo(result, provider, config = {}) {
|
|
290
|
-
const normalized = (result && typeof result === 'object')
|
|
291
|
-
? result
|
|
292
|
-
: { response: String(result || ''), action: { type: 'none', target: '' } };
|
|
293
|
-
const providerInfo = {
|
|
294
|
-
provider,
|
|
295
|
-
model: getProviderModel(provider, config)
|
|
296
|
-
};
|
|
297
|
-
|
|
298
|
-
attachProviderInfoToLatestHistory(providerInfo);
|
|
299
|
-
|
|
300
|
-
return {
|
|
301
|
-
...normalized,
|
|
302
|
-
providerInfo
|
|
303
|
-
};
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
function attachProviderInfoToLatestHistory(providerInfo) {
|
|
307
|
-
try {
|
|
308
|
-
const history = readChatHistory();
|
|
309
|
-
for (let i = history.length - 1; i >= 0; i -= 1) {
|
|
310
|
-
if (history[i] && history[i].role === 'model') {
|
|
311
|
-
history[i].providerInfo = providerInfo;
|
|
312
|
-
writeChatHistory(cleanHistoryForStorage(history));
|
|
313
|
-
return;
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
} catch (error) {
|
|
317
|
-
console.warn('[Provider Info] Failed to persist provider metadata:', error.message);
|
|
318
|
-
}
|
|
359
|
+
return providerAdapter.getProviderModel(provider, config);
|
|
319
360
|
}
|
|
320
361
|
|
|
321
362
|
// Chat session — maintains conversation history within the session
|
|
@@ -370,16 +411,98 @@ function shouldUseKnowledgeSearch(message) {
|
|
|
370
411
|
return knowledgeHints.some(hint => text.includes(hint));
|
|
371
412
|
}
|
|
372
413
|
|
|
414
|
+
function chatHistoryToProviderHistory(history = []) {
|
|
415
|
+
return (Array.isArray(history) ? history : [])
|
|
416
|
+
.slice(-MAX_HISTORY_MESSAGES)
|
|
417
|
+
.map((msg) => {
|
|
418
|
+
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
419
|
+
const text = Array.isArray(msg.parts)
|
|
420
|
+
? msg.parts.map(part => typeof part.text === 'string' ? stripRelevantMemoryBlock(part.text) : '').filter(Boolean).join('\n')
|
|
421
|
+
: '';
|
|
422
|
+
if (!text.trim()) return null;
|
|
423
|
+
return { role, content: text };
|
|
424
|
+
})
|
|
425
|
+
.filter(Boolean);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
function buildChatObservation(finalMessage, images = [], base64Audio = null) {
|
|
429
|
+
let text = '';
|
|
430
|
+
if (finalMessage) {
|
|
431
|
+
text = buildMessageWithRelevantMemory(finalMessage);
|
|
432
|
+
} else if (base64Audio && images.length === 0) {
|
|
433
|
+
text = 'Please listen to this voice command and respond in Thai with the appropriate JSON action if needed.';
|
|
434
|
+
} else if (images.length === 0 && !base64Audio) {
|
|
435
|
+
text = 'Analyze this input.';
|
|
436
|
+
} else {
|
|
437
|
+
text = 'Analyze this input.';
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
return {
|
|
441
|
+
text,
|
|
442
|
+
imageDataUris: images,
|
|
443
|
+
audioDataUri: base64Audio || null
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
function parseChatProviderResponse(outputText, originalText = '', now = new Date().toISOString()) {
|
|
448
|
+
const cleaned = stripRelevantMemoryBlock(String(outputText || ''));
|
|
449
|
+
let parsedResult;
|
|
450
|
+
try {
|
|
451
|
+
parsedResult = JSON.parse(cleaned);
|
|
452
|
+
} catch (e) {
|
|
453
|
+
const jsonMatch = cleaned.match(/```json\n([\s\S]*?)\n```/) || cleaned.match(/\{[\s\S]*\}/);
|
|
454
|
+
if (jsonMatch) {
|
|
455
|
+
parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
|
|
456
|
+
} else {
|
|
457
|
+
parsedResult = {
|
|
458
|
+
response: cleaned,
|
|
459
|
+
action: { type: 'none', target: '' }
|
|
460
|
+
};
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
parsedResult = normalizeParsedResult(parsedResult, originalText);
|
|
465
|
+
if (parsedResult && typeof parsedResult.response === 'string') {
|
|
466
|
+
parsedResult.response = stripRelevantMemoryBlock(decodeUnicode(parsedResult.response));
|
|
467
|
+
}
|
|
468
|
+
validateParsedAction(parsedResult);
|
|
469
|
+
parsedResult.timestamp = now;
|
|
470
|
+
return parsedResult;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
function appendChatProviderHistory(previousHistory, finalMessage, outputText, providerInfo, now) {
|
|
474
|
+
const nextHistory = [
|
|
475
|
+
...(Array.isArray(previousHistory) ? previousHistory : []),
|
|
476
|
+
{
|
|
477
|
+
role: 'user',
|
|
478
|
+
parts: [{ text: finalMessage || 'Analyze this input.' }],
|
|
479
|
+
timestamp: now
|
|
480
|
+
},
|
|
481
|
+
{
|
|
482
|
+
role: 'model',
|
|
483
|
+
parts: [{ text: String(outputText || '') }],
|
|
484
|
+
timestamp: now,
|
|
485
|
+
providerInfo
|
|
486
|
+
}
|
|
487
|
+
].slice(-MAX_STORED_HISTORY_MESSAGES);
|
|
488
|
+
|
|
489
|
+
writeChatHistory(cleanHistoryForStorage(nextHistory));
|
|
490
|
+
}
|
|
491
|
+
|
|
373
492
|
async function handleChat(message, base64Image = null, base64Audio = null) {
|
|
374
493
|
try {
|
|
375
494
|
const config = readConfig();
|
|
495
|
+
const images = normalizeImageList(base64Image);
|
|
496
|
+
const previousHistory = readChatHistory();
|
|
497
|
+
const userVisibleMessage = stripRelevantMemoryBlock(message);
|
|
498
|
+
const containsSmartContext = hasSmartContextBlock(message);
|
|
376
499
|
|
|
377
500
|
let finalMessage = message;
|
|
378
501
|
|
|
379
502
|
// Inject Local RAG Context
|
|
380
|
-
if (
|
|
503
|
+
if (userVisibleMessage && userVisibleMessage.trim().length > 0 && shouldUseKnowledgeSearch(userVisibleMessage)) {
|
|
381
504
|
const { searchKnowledge } = require('./knowledge_base');
|
|
382
|
-
const retrievedDocs = await searchKnowledge(
|
|
505
|
+
const retrievedDocs = await searchKnowledge(userVisibleMessage);
|
|
383
506
|
if (retrievedDocs && retrievedDocs.length > 0) {
|
|
384
507
|
let contextString = `\n\n[LOCAL KNOWLEDGE BASE - USE THIS CONTEXT TO ANSWER]\n`;
|
|
385
508
|
retrievedDocs.forEach(doc => {
|
|
@@ -389,179 +512,47 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
|
|
|
389
512
|
}
|
|
390
513
|
}
|
|
391
514
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
const currentProv = providersToTry[i];
|
|
396
|
-
try {
|
|
397
|
-
if (currentProv === 'ollama') {
|
|
398
|
-
return withProviderInfo(await handleOllamaChat(finalMessage, base64Image, base64Audio, config), currentProv, config);
|
|
399
|
-
}
|
|
400
|
-
if (currentProv === 'anthropic') {
|
|
401
|
-
return withProviderInfo(await handleAnthropicChat(finalMessage, base64Image, config), currentProv, config);
|
|
402
|
-
}
|
|
403
|
-
if (currentProv === 'openai') {
|
|
404
|
-
return withProviderInfo(await handleOpenAIChat(finalMessage, base64Image, config), currentProv, config);
|
|
405
|
-
}
|
|
406
|
-
if (currentProv === 'local_openai') {
|
|
407
|
-
return withProviderInfo(await handleLocalOpenAIChat(finalMessage, base64Image, config), currentProv, config);
|
|
408
|
-
}
|
|
409
|
-
if (currentProv === 'huggingface') {
|
|
410
|
-
return withProviderInfo(await handleHuggingFaceChat(finalMessage, base64Image, config), currentProv, config);
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
const currentKey = resolveApiKey();
|
|
414
|
-
if (!currentKey) {
|
|
415
|
-
if (i === providersToTry.length - 1) {
|
|
416
|
-
return withProviderInfo({
|
|
417
|
-
response: "I couldn't find your Gemini API Key. Please run 'mint onboard' to set it up!",
|
|
418
|
-
action: { type: "none", target: "" }
|
|
419
|
-
}, currentProv, config);
|
|
420
|
-
}
|
|
421
|
-
console.warn("[Fallback System] Gemini API key missing. Skipping Gemini provider.");
|
|
422
|
-
continue;
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
if (!ai || activeApiKey !== currentKey) {
|
|
426
|
-
initAiClient();
|
|
427
|
-
createChat(readChatHistory());
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
return withProviderInfo(await handleGeminiChat(finalMessage, base64Image, base64Audio), currentProv, config);
|
|
431
|
-
} catch (error) {
|
|
432
|
-
console.error(`[Fallback System] Provider '${currentProv}' failed:`, error.message);
|
|
433
|
-
if (i === providersToTry.length - 1) {
|
|
434
|
-
console.error("[Fallback System] All available providers failed.");
|
|
435
|
-
throw error; // No more providers to fallback to
|
|
436
|
-
}
|
|
437
|
-
console.log(`[Fallback System] Switching to next available provider: '${providersToTry[i+1]}'`);
|
|
438
|
-
// Continue the loop to try the next provider
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
} catch (globalError) {
|
|
442
|
-
console.error("handleChat error:", globalError);
|
|
443
|
-
throw globalError;
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
|
|
448
|
-
try {
|
|
449
|
-
const images = normalizeImageList(base64Image);
|
|
450
|
-
// 1. Check cache first for text-only messages
|
|
451
|
-
if (finalMessage && images.length === 0 && !base64Audio) {
|
|
452
|
-
const cached = memoryStore.getCachedResponse(finalMessage);
|
|
453
|
-
if (cached) return cached;
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
const desiredModel = resolveGeminiModel();
|
|
457
|
-
if (!chat || activeModel !== desiredModel) {
|
|
458
|
-
createChat(readChatHistory());
|
|
515
|
+
if (!containsSmartContext && userVisibleMessage && images.length === 0 && !base64Audio) {
|
|
516
|
+
const cached = memoryStore.getCachedResponse(userVisibleMessage);
|
|
517
|
+
if (cached) return cached;
|
|
459
518
|
}
|
|
460
519
|
|
|
461
|
-
|
|
462
|
-
const
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
const image = imageDataUriToInlineData(item);
|
|
474
|
-
parts.push({
|
|
475
|
-
inlineData: image
|
|
476
|
-
});
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
if (base64Audio) {
|
|
480
|
-
// Extract MIME type from the data URI if present, fallback to audio/webm
|
|
481
|
-
let mimeType = "audio/webm";
|
|
482
|
-
const mimeMatch = base64Audio.match(/^data:(audio\/\w+);base64,/);
|
|
483
|
-
if (mimeMatch) {
|
|
484
|
-
mimeType = mimeMatch[1];
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
const base64Data = base64Audio.replace(/^data:audio\/\w+;base64,/, '');
|
|
488
|
-
parts.push({
|
|
489
|
-
inlineData: { mimeType: mimeType, data: base64Data }
|
|
490
|
-
});
|
|
491
|
-
}
|
|
492
|
-
|
|
493
|
-
aiResponse = await chat.sendMessage({ message: parts });
|
|
494
|
-
|
|
495
|
-
// Save history with timestamps
|
|
496
|
-
const history = await chat.getHistory();
|
|
520
|
+
const providersToTry = getProviderAttemptOrder(config);
|
|
521
|
+
const client = new providerAdapter.AgentProviderClient({
|
|
522
|
+
provider: providersToTry[0],
|
|
523
|
+
providerOrder: providersToTry,
|
|
524
|
+
config,
|
|
525
|
+
history: chatHistoryToProviderHistory(previousHistory),
|
|
526
|
+
systemInstruction: buildSystemPrompt(),
|
|
527
|
+
responseMimeType: 'application/json',
|
|
528
|
+
maxTokens: 4096
|
|
529
|
+
});
|
|
530
|
+
const observation = buildChatObservation(finalMessage, images, base64Audio);
|
|
531
|
+
const outputText = await client.sendMessage(observation);
|
|
497
532
|
const now = new Date().toISOString();
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
outputText = (typeof aiResponse.text === 'function') ? aiResponse.text() : (aiResponse.text || '');
|
|
516
|
-
} catch (e) {
|
|
517
|
-
outputText = String(aiResponse || '');
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
outputText = stripRelevantMemoryBlock(outputText);
|
|
521
|
-
|
|
522
|
-
let parsedResult;
|
|
523
|
-
try {
|
|
524
|
-
parsedResult = JSON.parse(outputText);
|
|
525
|
-
} catch (e) {
|
|
526
|
-
// Fallback in case the model failed to return pure JSON
|
|
527
|
-
console.error("Failed to parse JSON directly:", e);
|
|
528
|
-
const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
|
|
529
|
-
if (jsonMatch) {
|
|
530
|
-
parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
|
|
531
|
-
} else {
|
|
532
|
-
parsedResult = {
|
|
533
|
-
response: outputText,
|
|
534
|
-
action: { type: "none", target: "" }
|
|
535
|
-
};
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
// Decode any remaining unicode escapes in the response text
|
|
540
|
-
if (parsedResult && typeof parsedResult.response === 'string') {
|
|
541
|
-
parsedResult.response = decodeUnicode(parsedResult.response);
|
|
542
|
-
parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
// Attach timestamp to the result
|
|
546
|
-
validateParsedAction(parsedResult);
|
|
547
|
-
parsedResult.timestamp = now;
|
|
548
|
-
|
|
549
|
-
// Record interaction for long-term memory (non-blocking)
|
|
550
|
-
if (finalMessage && parsedResult.response) {
|
|
551
|
-
setImmediate(() => {
|
|
552
|
-
memoryStore.recordInteraction(finalMessage, parsedResult.response);
|
|
553
|
-
// Cache text-only responses
|
|
554
|
-
if (images.length === 0 && !base64Audio) {
|
|
555
|
-
memoryStore.cacheResponse(finalMessage, parsedResult);
|
|
556
|
-
}
|
|
557
|
-
});
|
|
533
|
+
const provider = client.lastSuccessfulProvider || client.provider || providersToTry[0];
|
|
534
|
+
const providerInfo = {
|
|
535
|
+
provider,
|
|
536
|
+
model: getProviderModel(provider, config),
|
|
537
|
+
usage: client.getUsageSummary()
|
|
538
|
+
};
|
|
539
|
+
const parsedResult = parseChatProviderResponse(outputText, userVisibleMessage || finalMessage, now);
|
|
540
|
+
parsedResult.providerInfo = providerInfo;
|
|
541
|
+
appendChatProviderHistory(previousHistory, userVisibleMessage || finalMessage, outputText, providerInfo, now);
|
|
542
|
+
|
|
543
|
+
if ((userVisibleMessage || finalMessage) && parsedResult.response) {
|
|
544
|
+
setImmediate(() => {
|
|
545
|
+
memoryStore.recordInteraction(userVisibleMessage || finalMessage, parsedResult.response);
|
|
546
|
+
if (!containsSmartContext && images.length === 0 && !base64Audio) {
|
|
547
|
+
memoryStore.cacheResponse(userVisibleMessage || finalMessage, parsedResult);
|
|
548
|
+
}
|
|
549
|
+
});
|
|
558
550
|
}
|
|
559
551
|
|
|
560
552
|
return parsedResult;
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
throw error;
|
|
553
|
+
} catch (globalError) {
|
|
554
|
+
console.error("handleChat error:", globalError);
|
|
555
|
+
throw globalError;
|
|
565
556
|
}
|
|
566
557
|
}
|
|
567
558
|
|
|
@@ -573,6 +564,7 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
|
|
|
573
564
|
async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
|
|
574
565
|
try {
|
|
575
566
|
const images = normalizeImageList(base64Image);
|
|
567
|
+
const previousHistory = readChatHistory();
|
|
576
568
|
// 1. Check cache first
|
|
577
569
|
if (finalMessage && images.length === 0 && !base64Audio) {
|
|
578
570
|
const cached = memoryStore.getCachedResponse(finalMessage);
|
|
@@ -624,7 +616,7 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
|
|
|
624
616
|
fullText = stripRelevantMemoryBlock(fullText);
|
|
625
617
|
|
|
626
618
|
// Save history
|
|
627
|
-
const history = await chat.getHistory();
|
|
619
|
+
const history = preserveHistoryMetadata(await chat.getHistory(), previousHistory, new Date().toISOString());
|
|
628
620
|
const now = new Date().toISOString();
|
|
629
621
|
if (history.length >= 2) {
|
|
630
622
|
const modelMsg = history[history.length - 1];
|
|
@@ -646,6 +638,8 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
|
|
|
646
638
|
parsedResult = { response: fullText, action: { type: 'none', target: '' } };
|
|
647
639
|
}
|
|
648
640
|
}
|
|
641
|
+
parsedResult = normalizeParsedResult(parsedResult, finalMessage);
|
|
642
|
+
|
|
649
643
|
if (parsedResult && typeof parsedResult.response === 'string') {
|
|
650
644
|
parsedResult.response = decodeUnicode(parsedResult.response);
|
|
651
645
|
parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
|
|
@@ -672,269 +666,6 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
|
|
|
672
666
|
}
|
|
673
667
|
}
|
|
674
668
|
|
|
675
|
-
async function handleAnthropicChat(finalMessage, base64Image, config) {
|
|
676
|
-
const history = readChatHistory() || [];
|
|
677
|
-
const images = normalizeImageList(base64Image);
|
|
678
|
-
const apiKey = config.anthropicApiKey || process.env.ANTHROPIC_API_KEY;
|
|
679
|
-
if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Anthropic API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
|
|
680
|
-
|
|
681
|
-
const systemPrompt = buildSystemPrompt();
|
|
682
|
-
|
|
683
|
-
const messages = [];
|
|
684
|
-
for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
|
|
685
|
-
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
686
|
-
let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
|
|
687
|
-
if (text) messages.push({ role, content: text });
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
const content = [];
|
|
691
|
-
for (const item of images) {
|
|
692
|
-
const image = imageDataUriToInlineData(item);
|
|
693
|
-
content.push({
|
|
694
|
-
type: "image",
|
|
695
|
-
source: { type: "base64", media_type: image.mimeType, data: image.data }
|
|
696
|
-
});
|
|
697
|
-
}
|
|
698
|
-
content.push({ type: "text", text: finalMessage || "Analyze this." });
|
|
699
|
-
messages.push({ role: "user", content });
|
|
700
|
-
|
|
701
|
-
const response = await axios.post('https://api.anthropic.com/v1/messages', {
|
|
702
|
-
model: config.anthropicModel || 'claude-3-5-sonnet-latest',
|
|
703
|
-
max_tokens: 4096,
|
|
704
|
-
system: systemPrompt,
|
|
705
|
-
messages: messages
|
|
706
|
-
}, {
|
|
707
|
-
headers: {
|
|
708
|
-
'x-api-key': apiKey,
|
|
709
|
-
'anthropic-version': '2023-06-01',
|
|
710
|
-
'content-type': 'application/json'
|
|
711
|
-
}
|
|
712
|
-
});
|
|
713
|
-
|
|
714
|
-
const outputText = response.data.content[0].text;
|
|
715
|
-
history.push({ role: 'user', parts: [{ text: finalMessage }] });
|
|
716
|
-
history.push({ role: 'model', parts: [{ text: outputText }] });
|
|
717
|
-
writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
|
|
718
|
-
|
|
719
|
-
return parseAiResponse(outputText);
|
|
720
|
-
}
|
|
721
|
-
|
|
722
|
-
async function handleOpenAIChat(finalMessage, base64Image, config) {
|
|
723
|
-
const history = readChatHistory() || [];
|
|
724
|
-
const images = normalizeImageList(base64Image);
|
|
725
|
-
const apiKey = config.openaiApiKey || process.env.OPENAI_API_KEY;
|
|
726
|
-
if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ OpenAI API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
|
|
727
|
-
|
|
728
|
-
const systemPrompt = buildSystemPrompt();
|
|
729
|
-
|
|
730
|
-
const messages = [{ role: "system", content: systemPrompt }];
|
|
731
|
-
for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
|
|
732
|
-
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
733
|
-
let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
|
|
734
|
-
if (text) messages.push({ role, content: text });
|
|
735
|
-
}
|
|
736
|
-
|
|
737
|
-
const content = [{ type: "text", text: finalMessage || "Analyze this." }];
|
|
738
|
-
for (const item of images) {
|
|
739
|
-
content.push({
|
|
740
|
-
type: "image_url",
|
|
741
|
-
image_url: { url: item }
|
|
742
|
-
});
|
|
743
|
-
}
|
|
744
|
-
messages.push({ role: "user", content });
|
|
745
|
-
|
|
746
|
-
const response = await axios.post('https://api.openai.com/v1/chat/completions', {
|
|
747
|
-
model: config.openaiModel || 'gpt-4o',
|
|
748
|
-
messages: messages,
|
|
749
|
-
response_format: { type: "json_object" }
|
|
750
|
-
}, {
|
|
751
|
-
headers: {
|
|
752
|
-
'Authorization': `Bearer ${apiKey}`,
|
|
753
|
-
'Content-Type': 'application/json'
|
|
754
|
-
}
|
|
755
|
-
});
|
|
756
|
-
|
|
757
|
-
const outputText = response.data.choices[0].message.content;
|
|
758
|
-
history.push({ role: 'user', parts: [{ text: finalMessage }] });
|
|
759
|
-
history.push({ role: 'model', parts: [{ text: outputText }] });
|
|
760
|
-
writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
|
|
761
|
-
|
|
762
|
-
return parseAiResponse(outputText);
|
|
763
|
-
}
|
|
764
|
-
|
|
765
|
-
async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
|
|
766
|
-
const history = readChatHistory() || [];
|
|
767
|
-
const images = normalizeImageList(base64Image);
|
|
768
|
-
const apiKey = 'lm-studio';
|
|
769
|
-
const baseUrl = config.localApiBaseUrl || 'http://localhost:1234/v1';
|
|
770
|
-
|
|
771
|
-
const systemPrompt = buildSystemPrompt();
|
|
772
|
-
|
|
773
|
-
const messages = [{ role: "system", content: systemPrompt }];
|
|
774
|
-
for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
|
|
775
|
-
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
776
|
-
let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
|
|
777
|
-
if (text) messages.push({ role, content: text });
|
|
778
|
-
}
|
|
779
|
-
|
|
780
|
-
const content = [{ type: "text", text: finalMessage || "Analyze this." }];
|
|
781
|
-
for (const item of images) {
|
|
782
|
-
content.push({
|
|
783
|
-
type: "image_url",
|
|
784
|
-
image_url: { url: item }
|
|
785
|
-
});
|
|
786
|
-
}
|
|
787
|
-
messages.push({ role: "user", content });
|
|
788
|
-
|
|
789
|
-
const response = await axios.post(`${baseUrl.replace(/\/$/, '')}/chat/completions`, {
|
|
790
|
-
model: config.localModelName || 'local-model',
|
|
791
|
-
messages: messages,
|
|
792
|
-
// response_format json_object is sometimes problematic on weak local models, but required by our prompt.
|
|
793
|
-
// We'll keep it as some local servers like LM Studio support it for specific models.
|
|
794
|
-
// If not supported, the system prompt usually coerces it anyway.
|
|
795
|
-
response_format: { type: "json_object" }
|
|
796
|
-
}, {
|
|
797
|
-
headers: {
|
|
798
|
-
'Authorization': `Bearer ${apiKey}`,
|
|
799
|
-
'Content-Type': 'application/json'
|
|
800
|
-
}
|
|
801
|
-
});
|
|
802
|
-
|
|
803
|
-
const outputText = response.data.choices[0].message.content;
|
|
804
|
-
history.push({ role: 'user', parts: [{ text: finalMessage }] });
|
|
805
|
-
history.push({ role: 'model', parts: [{ text: outputText }] });
|
|
806
|
-
writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
|
|
807
|
-
|
|
808
|
-
return parseAiResponse(outputText);
|
|
809
|
-
}
|
|
810
|
-
|
|
811
|
-
async function handleHuggingFaceChat(finalMessage, base64Image, config) {
|
|
812
|
-
const history = readChatHistory() || [];
|
|
813
|
-
const images = normalizeImageList(base64Image);
|
|
814
|
-
const apiKey = config.hfApiKey || process.env.HF_API_KEY;
|
|
815
|
-
if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Hugging Face API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
|
|
816
|
-
|
|
817
|
-
const modelId = config.hfModel || 'meta-llama/Meta-Llama-3-8B-Instruct';
|
|
818
|
-
const baseUrl = `https://api-inference.huggingface.co/models/${modelId}/v1/chat/completions`;
|
|
819
|
-
|
|
820
|
-
const systemPrompt = buildSystemPrompt();
|
|
821
|
-
|
|
822
|
-
const messages = [{ role: "system", content: systemPrompt }];
|
|
823
|
-
for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
|
|
824
|
-
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
825
|
-
let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
|
|
826
|
-
if (text) messages.push({ role, content: text });
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
const content = [{ type: "text", text: finalMessage || "Analyze this." }];
|
|
830
|
-
for (const item of images) {
|
|
831
|
-
content.push({
|
|
832
|
-
type: "image_url",
|
|
833
|
-
image_url: { url: item }
|
|
834
|
-
});
|
|
835
|
-
}
|
|
836
|
-
messages.push({ role: "user", content });
|
|
837
|
-
|
|
838
|
-
const response = await axios.post(baseUrl, {
|
|
839
|
-
model: modelId,
|
|
840
|
-
messages: messages,
|
|
841
|
-
max_tokens: 4096
|
|
842
|
-
}, {
|
|
843
|
-
headers: {
|
|
844
|
-
'Authorization': `Bearer ${apiKey}`,
|
|
845
|
-
'Content-Type': 'application/json'
|
|
846
|
-
}
|
|
847
|
-
});
|
|
848
|
-
|
|
849
|
-
const outputText = response.data.choices[0].message.content;
|
|
850
|
-
history.push({ role: 'user', parts: [{ text: finalMessage }] });
|
|
851
|
-
history.push({ role: 'model', parts: [{ text: outputText }] });
|
|
852
|
-
writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
|
|
853
|
-
|
|
854
|
-
return parseAiResponse(outputText);
|
|
855
|
-
}
|
|
856
|
-
|
|
857
|
-
function parseAiResponse(outputText) {
|
|
858
|
-
let parsedResult;
|
|
859
|
-
try {
|
|
860
|
-
parsedResult = JSON.parse(outputText);
|
|
861
|
-
} catch (e) {
|
|
862
|
-
const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
|
|
863
|
-
if (jsonMatch) {
|
|
864
|
-
parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
|
|
865
|
-
} else {
|
|
866
|
-
parsedResult = { response: outputText, action: { type: "none", target: "" } };
|
|
867
|
-
}
|
|
868
|
-
}
|
|
869
|
-
if (parsedResult && typeof parsedResult.response === 'string') {
|
|
870
|
-
parsedResult.response = decodeUnicode(parsedResult.response);
|
|
871
|
-
}
|
|
872
|
-
validateParsedAction(parsedResult);
|
|
873
|
-
parsedResult.timestamp = new Date().toISOString();
|
|
874
|
-
return parsedResult;
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
async function handleOllamaChat(finalMessage, base64Image, base64Audio, config) {
|
|
878
|
-
const history = readChatHistory() || [];
|
|
879
|
-
const imageInputs = normalizeImageList(base64Image);
|
|
880
|
-
|
|
881
|
-
const ollamaMessages = [
|
|
882
|
-
{ role: 'system', content: buildSystemPrompt() }
|
|
883
|
-
];
|
|
884
|
-
|
|
885
|
-
for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
|
|
886
|
-
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
887
|
-
let text = '';
|
|
888
|
-
if (Array.isArray(msg.parts)) {
|
|
889
|
-
text = msg.parts.map(p => p.text || '').join('\n');
|
|
890
|
-
}
|
|
891
|
-
if (text) ollamaMessages.push({ role, content: text });
|
|
892
|
-
}
|
|
893
|
-
|
|
894
|
-
let currentContent = finalMessage || 'Analyze this input.';
|
|
895
|
-
let images = [];
|
|
896
|
-
for (const item of imageInputs) {
|
|
897
|
-
images.push(imageDataUriToBase64(item));
|
|
898
|
-
}
|
|
899
|
-
|
|
900
|
-
if (base64Audio && imageInputs.length === 0 && !finalMessage) {
|
|
901
|
-
currentContent = "Please analyze this audio requirement based on text if any was transacted, otherwise reply with appropriate action.";
|
|
902
|
-
}
|
|
903
|
-
|
|
904
|
-
const userMessage = { role: 'user', content: currentContent };
|
|
905
|
-
if (images.length > 0) userMessage.images = images;
|
|
906
|
-
|
|
907
|
-
ollamaMessages.push(userMessage);
|
|
908
|
-
|
|
909
|
-
const ollamaBaseUrl = (config.ollamaHost || 'http://localhost:11434').replace(/\/$/, '');
|
|
910
|
-
const response = await axios.post(`${ollamaBaseUrl}/api/chat`, {
|
|
911
|
-
model: config.ollamaModel || 'llama3:latest',
|
|
912
|
-
messages: ollamaMessages,
|
|
913
|
-
format: 'json',
|
|
914
|
-
stream: false
|
|
915
|
-
});
|
|
916
|
-
|
|
917
|
-
const outputText = response.data.message.content;
|
|
918
|
-
|
|
919
|
-
history.push({ role: 'user', parts: [{ text: currentContent }] });
|
|
920
|
-
history.push({ role: 'model', parts: [{ text: outputText }] });
|
|
921
|
-
writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
|
|
922
|
-
|
|
923
|
-
let parsedResult;
|
|
924
|
-
try {
|
|
925
|
-
parsedResult = JSON.parse(outputText);
|
|
926
|
-
} catch(e) {
|
|
927
|
-
const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
|
|
928
|
-
if (jsonMatch) {
|
|
929
|
-
parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
|
|
930
|
-
} else {
|
|
931
|
-
parsedResult = { response: outputText, action: { type: "none", target: "" } };
|
|
932
|
-
}
|
|
933
|
-
}
|
|
934
|
-
validateParsedAction(parsedResult);
|
|
935
|
-
return parsedResult;
|
|
936
|
-
}
|
|
937
|
-
|
|
938
669
|
function resetChat() {
|
|
939
670
|
clearChatHistory();
|
|
940
671
|
memoryStore.clearConversationScopedProfile();
|
|
@@ -980,7 +711,7 @@ function historyToTranscript(history) {
|
|
|
980
711
|
transcript.push({
|
|
981
712
|
sender,
|
|
982
713
|
text,
|
|
983
|
-
timestamp: content.timestamp ||
|
|
714
|
+
timestamp: content.timestamp || null,
|
|
984
715
|
providerInfo: content.providerInfo || null
|
|
985
716
|
});
|
|
986
717
|
}
|
|
@@ -1058,6 +789,8 @@ module.exports = {
|
|
|
1058
789
|
translateImageContent,
|
|
1059
790
|
refreshApiKeyFromConfig,
|
|
1060
791
|
_helpers: {
|
|
1061
|
-
getProviderAttemptOrder
|
|
792
|
+
getProviderAttemptOrder,
|
|
793
|
+
normalizeParsedResult,
|
|
794
|
+
buildActionModeInstruction
|
|
1062
795
|
}
|
|
1063
796
|
};
|