@pheem49/mint 1.5.1 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/GUIDE_TH.md +7 -7
  2. package/README.md +140 -66
  3. package/assets/Agent_Mint.png +0 -0
  4. package/assets/Settings.png +0 -0
  5. package/main.js +12 -0
  6. package/mint-cli.js +148 -921
  7. package/models/Shiroko_Model/Shiroko/Shiroko_Core//345/221/206/347/214/253.exp3.json +31 -1
  8. package/models/Shiroko_Model/Shiroko/Shiroko_Core//347/202/271/344/270/200/344/270/213.exp3.json +6 -1
  9. package/package.json +20 -21
  10. package/preload.js +2 -0
  11. package/scripts/install_linux_desktop_entry.js +48 -0
  12. package/src/AI_Brain/Gemini_API.js +194 -491
  13. package/src/AI_Brain/autonomous_brain.js +46 -19
  14. package/src/AI_Brain/headless_agent.js +21 -2
  15. package/src/AI_Brain/proactive_engine.js +12 -2
  16. package/src/AI_Brain/provider_adapter.js +358 -0
  17. package/src/Automation_Layer/browser_automation.js +26 -24
  18. package/src/CLI/approval_handler.js +47 -0
  19. package/src/CLI/chat_router.js +7 -0
  20. package/src/CLI/chat_ui.js +586 -80
  21. package/src/CLI/cli_colors.js +115 -0
  22. package/src/CLI/cli_formatters.js +94 -0
  23. package/src/CLI/code_agent.js +825 -283
  24. package/src/CLI/intent_detectors.js +181 -0
  25. package/src/CLI/interactive_chat.js +641 -0
  26. package/src/CLI/list_features.js +3 -0
  27. package/src/CLI/repo_summarizer.js +282 -0
  28. package/src/CLI/semantic_code_search.js +312 -0
  29. package/src/CLI/skill_manager.js +41 -0
  30. package/src/CLI/slash_command_handler.js +418 -0
  31. package/src/CLI/symbol_indexer.js +231 -0
  32. package/src/CLI/updater.js +21 -1
  33. package/src/Channels/discord_bridge.js +11 -13
  34. package/src/Channels/line_bridge.js +10 -10
  35. package/src/Channels/slack_bridge.js +7 -12
  36. package/src/Channels/telegram_bridge.js +6 -14
  37. package/src/Channels/whatsapp_bridge.js +11 -9
  38. package/src/System/chat_history_manager.js +20 -12
  39. package/src/System/config_manager.js +4 -1
  40. package/src/System/ipc_handlers.js +10 -0
  41. package/src/System/optional_require.js +23 -0
  42. package/src/System/picture_store.js +109 -0
  43. package/src/System/task_manager.js +127 -0
  44. package/src/System/tool_registry.js +13 -0
  45. package/src/System/window_manager.js +16 -8
  46. package/src/UI/live2d_manager.js +246 -14
  47. package/src/UI/renderer.js +620 -45
  48. package/src/UI/settings.css +738 -439
  49. package/src/UI/settings.html +487 -432
  50. package/src/UI/settings.js +44 -10
  51. package/src/UI/styles.css +1403 -106
  52. package/privacy.txt +0 -1
@@ -1,17 +1,17 @@
1
1
  const { GoogleGenAI } = require('@google/genai');
2
2
  const { readChatHistory, writeChatHistory, clearChatHistory } = require('../System/chat_history_manager');
3
- const { readConfig, getAvailableProviders, isPlaceholder } = require('../System/config_manager');
3
+ const { readConfig, getAvailableProviders } = require('../System/config_manager');
4
4
  const pluginManager = require('../Plugins/plugin_manager');
5
5
  const mcpManager = require('../Plugins/mcp_manager');
6
6
  const memoryStore = require('./memory_store');
7
7
  const agentOrchestrator = require('./agent_orchestrator');
8
8
  const workspaceManager = require('../CLI/workspace_manager');
9
9
  const toolRegistry = require('../System/tool_registry');
10
+ const providerAdapter = require('./provider_adapter');
10
11
 
11
12
  let ai = null;
12
13
  let activeApiKey = '';
13
14
  const initialEnvKey = (process.env.GEMINI_API_KEY || '').trim();
14
- const axios = require('axios');
15
15
  const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash';
16
16
 
17
17
  function decodeUnicode(str) {
@@ -42,10 +42,6 @@ function imageDataUriToInlineData(base64Image) {
42
42
  };
43
43
  }
44
44
 
45
- function imageDataUriToBase64(base64Image) {
46
- return imageDataUriToInlineData(base64Image).data;
47
- }
48
-
49
45
  function normalizeImageList(base64Image) {
50
46
  if (!base64Image) return [];
51
47
  return Array.isArray(base64Image) ? base64Image.filter(Boolean) : [base64Image];
@@ -72,9 +68,19 @@ NATURAL CHAT FLOW:
72
68
  - You have the autonomy to suggest better ways to achieve a goal, provide alternative perspectives, and take initiative in helping the user.
73
69
  - Separate distinct points with blank lines (double newline) for readability.
74
70
  - Ask follow-up questions only when they add significant value to the task or conversation.
71
+ - The latest user message is authoritative. Do not continue or describe older tasks unless the latest message explicitly asks you to continue them.
72
+ - For greetings, name-calls, acknowledgements, and backchannels such as "มิ้น", "มิ้นๆ", "อ๋อ", "โอเค", "ขอบคุณ", "hi", "hello", "ok", or "thanks", return action "none" and a short reply only.
75
73
 
76
74
  GOAL:
77
- Your goal is to help the user with their queries. If they ask to open an application, open a website, search, manage files, or get system info, you must trigger an action in the structured JSON format below. **NEVER provide a conversational response about performing an action without including the actual "action" object in your JSON.**
75
+ Your goal is to help the user with their queries. This Electron app is Chat Mode, not Code Agent Mode: use at most ONE simple action per user message, only when the latest message explicitly asks for that local action. If the user asks a question or asks you to provide text/commands, answer with action "none".
76
+
77
+ ACTION DISCIPLINE:
78
+ - Always return a single JSON object. Never return a JSON array or multiple actions.
79
+ - If the user asks "พิมพ์คำสั่งให้หน่อย", "บอกคำสั่ง", "ขอคำสั่ง", "what command", or "type the command", provide the command in "response" and set action "none". Do NOT use "type_text" or "key_tap".
80
+ - Use "type_text", "key_tap", "mouse_click", or "mouse_move" only when the user explicitly asks you to control the currently focused UI, not when they ask for a command to copy/type themselves.
81
+ - If the user asks to run terminal commands or code, Chat Mode should provide the command or tell them to use the CLI agent. Do not type or press Enter on their behalf.
82
+ - Never say you opened, checked, inspected, or verified a file/folder unless the selected action actually does it and the app will execute that action.
83
+ - If the request needs workspace code inspection, edits, tests, or shell execution, tell the user to use the Mint CLI agent instead of pretending to inspect files.
78
84
 
79
85
  CREATOR INFO:
80
86
  - The creator is Pheem49.
@@ -202,7 +208,13 @@ function cleanHistoryForStorage(history) {
202
208
  parts: Array.isArray(msg.parts)
203
209
  ? msg.parts.map(part => {
204
210
  if (part.text) {
205
- return { ...part, text: stripRelevantMemoryBlock(part.text) };
211
+ return {
212
+ text: stripRelevantMemoryBlock(part.text)
213
+ .replace(/data:image\/[\w.+-]+;base64,[A-Za-z0-9+/=]+/g, '[Image omitted from chat history]')
214
+ };
215
+ }
216
+ if (part.inlineData || part.fileData || part.image_url || part.imageUrl) {
217
+ return { text: '[Image omitted from chat history; saved locally when sent by the user.]' };
206
218
  }
207
219
  return part;
208
220
  })
@@ -210,6 +222,20 @@ function cleanHistoryForStorage(history) {
210
222
  }));
211
223
  }
212
224
 
225
+ function preserveHistoryMetadata(nextHistory, previousHistory, now) {
226
+ if (!Array.isArray(nextHistory)) return [];
227
+ const previous = Array.isArray(previousHistory) ? previousHistory : [];
228
+
229
+ return nextHistory.map((msg, index) => {
230
+ const prior = previous[index] || {};
231
+ return {
232
+ ...msg,
233
+ timestamp: msg.timestamp || prior.timestamp || (index >= nextHistory.length - 2 ? now : null),
234
+ providerInfo: msg.providerInfo || prior.providerInfo || null
235
+ };
236
+ });
237
+ }
238
+
213
239
  function validateParsedAction(parsedResult) {
214
240
  if (!parsedResult || !parsedResult.action) {
215
241
  return parsedResult;
@@ -223,6 +249,42 @@ function validateParsedAction(parsedResult) {
223
249
  return parsedResult;
224
250
  }
225
251
 
252
+ function normalizeParsedResult(parsedResult, originalText = '') {
253
+ if (Array.isArray(parsedResult)) {
254
+ const first = parsedResult.find(item => item && typeof item === 'object') || {};
255
+ const commandAction = parsedResult.find(item =>
256
+ item && item.action && item.action.type === 'type_text' && item.action.target
257
+ );
258
+ return {
259
+ response: commandAction
260
+ ? `คำสั่งคือ:\n${commandAction.action.target}`
261
+ : (first.response || 'มิ้นท์ตอบได้ทีละ action ต่อข้อความนะคะ ลองสั่งใหม่อีกครั้งได้เลยค่ะ'),
262
+ action: { type: 'none', target: '' }
263
+ };
264
+ }
265
+
266
+ if (!parsedResult || typeof parsedResult !== 'object') {
267
+ return { response: String(parsedResult || ''), action: { type: 'none', target: '' } };
268
+ }
269
+
270
+ if (!parsedResult.action || typeof parsedResult.action !== 'object') {
271
+ parsedResult.action = { type: 'none', target: '' };
272
+ }
273
+
274
+ const input = String(originalText || '').toLowerCase();
275
+ const asksForCommandText = /พิมพ์คำสั่ง|บอกคำสั่ง|ขอคำสั่ง|คำสั่ง.*ให้หน่อย|type.*command|what command|give.*command/.test(input);
276
+ const actionType = parsedResult.action.type;
277
+ if (asksForCommandText && (actionType === 'type_text' || actionType === 'key_tap')) {
278
+ const typed = actionType === 'type_text' ? String(parsedResult.action.target || '').trim() : '';
279
+ parsedResult.response = typed
280
+ ? `คำสั่งคือ:\n${typed}`
281
+ : (parsedResult.response || 'ได้ค่ะ แต่คำขอนี้ควรตอบเป็นข้อความ ไม่ควรพิมพ์หรือกดปุ่มแทนค่ะ');
282
+ parsedResult.action = { type: 'none', target: '' };
283
+ }
284
+
285
+ return parsedResult;
286
+ }
287
+
226
288
  function resolveApiKey() {
227
289
  let settingsKey = '';
228
290
  try {
@@ -259,63 +321,15 @@ function resolveGeminiModel() {
259
321
  }
260
322
 
261
323
  function getProviderAttemptOrder(config) {
262
- const provider = config.aiProvider || 'gemini';
263
324
  const availableProviders = getAvailableProviders(config);
264
- const ordered = availableProviders.includes(provider)
265
- ? [provider, ...availableProviders.filter(p => p !== provider)]
266
- : availableProviders;
267
- return ordered.length > 0 ? ordered : ['gemini'];
325
+ return providerAdapter.getProviderAttemptOrder(config, {
326
+ availableProviders,
327
+ priority: availableProviders
328
+ });
268
329
  }
269
330
 
270
331
  function getProviderModel(provider, config = {}) {
271
- switch (provider) {
272
- case 'gemini':
273
- return (config.geminiModel || DEFAULT_GEMINI_MODEL).trim() || DEFAULT_GEMINI_MODEL;
274
- case 'anthropic':
275
- return config.anthropicModel || 'claude-3-5-sonnet-latest';
276
- case 'openai':
277
- return config.openaiModel || 'gpt-4o';
278
- case 'local_openai':
279
- return config.localModelName || 'local-model';
280
- case 'huggingface':
281
- return config.hfModel || 'meta-llama/Meta-Llama-3-8B-Instruct';
282
- case 'ollama':
283
- return config.ollamaModel || 'llama3:latest';
284
- default:
285
- return '';
286
- }
287
- }
288
-
289
- function withProviderInfo(result, provider, config = {}) {
290
- const normalized = (result && typeof result === 'object')
291
- ? result
292
- : { response: String(result || ''), action: { type: 'none', target: '' } };
293
- const providerInfo = {
294
- provider,
295
- model: getProviderModel(provider, config)
296
- };
297
-
298
- attachProviderInfoToLatestHistory(providerInfo);
299
-
300
- return {
301
- ...normalized,
302
- providerInfo
303
- };
304
- }
305
-
306
- function attachProviderInfoToLatestHistory(providerInfo) {
307
- try {
308
- const history = readChatHistory();
309
- for (let i = history.length - 1; i >= 0; i -= 1) {
310
- if (history[i] && history[i].role === 'model') {
311
- history[i].providerInfo = providerInfo;
312
- writeChatHistory(cleanHistoryForStorage(history));
313
- return;
314
- }
315
- }
316
- } catch (error) {
317
- console.warn('[Provider Info] Failed to persist provider metadata:', error.message);
318
- }
332
+ return providerAdapter.getProviderModel(provider, config);
319
333
  }
320
334
 
321
335
  // Chat session — maintains conversation history within the session
@@ -370,9 +384,89 @@ function shouldUseKnowledgeSearch(message) {
370
384
  return knowledgeHints.some(hint => text.includes(hint));
371
385
  }
372
386
 
387
+ function chatHistoryToProviderHistory(history = []) {
388
+ return (Array.isArray(history) ? history : [])
389
+ .slice(-MAX_HISTORY_MESSAGES)
390
+ .map((msg) => {
391
+ const role = msg.role === 'model' ? 'assistant' : 'user';
392
+ const text = Array.isArray(msg.parts)
393
+ ? msg.parts.map(part => typeof part.text === 'string' ? stripRelevantMemoryBlock(part.text) : '').filter(Boolean).join('\n')
394
+ : '';
395
+ if (!text.trim()) return null;
396
+ return { role, content: text };
397
+ })
398
+ .filter(Boolean);
399
+ }
400
+
401
+ function buildChatObservation(finalMessage, images = [], base64Audio = null) {
402
+ let text = '';
403
+ if (finalMessage) {
404
+ text = buildMessageWithRelevantMemory(finalMessage);
405
+ } else if (base64Audio && images.length === 0) {
406
+ text = 'Please listen to this voice command and respond in Thai with the appropriate JSON action if needed.';
407
+ } else if (images.length === 0 && !base64Audio) {
408
+ text = 'Analyze this input.';
409
+ } else {
410
+ text = 'Analyze this input.';
411
+ }
412
+
413
+ return {
414
+ text,
415
+ imageDataUris: images,
416
+ audioDataUri: base64Audio || null
417
+ };
418
+ }
419
+
420
+ function parseChatProviderResponse(outputText, originalText = '', now = new Date().toISOString()) {
421
+ const cleaned = stripRelevantMemoryBlock(String(outputText || ''));
422
+ let parsedResult;
423
+ try {
424
+ parsedResult = JSON.parse(cleaned);
425
+ } catch (e) {
426
+ const jsonMatch = cleaned.match(/```json\n([\s\S]*?)\n```/) || cleaned.match(/\{[\s\S]*\}/);
427
+ if (jsonMatch) {
428
+ parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
429
+ } else {
430
+ parsedResult = {
431
+ response: cleaned,
432
+ action: { type: 'none', target: '' }
433
+ };
434
+ }
435
+ }
436
+
437
+ parsedResult = normalizeParsedResult(parsedResult, originalText);
438
+ if (parsedResult && typeof parsedResult.response === 'string') {
439
+ parsedResult.response = stripRelevantMemoryBlock(decodeUnicode(parsedResult.response));
440
+ }
441
+ validateParsedAction(parsedResult);
442
+ parsedResult.timestamp = now;
443
+ return parsedResult;
444
+ }
445
+
446
+ function appendChatProviderHistory(previousHistory, finalMessage, outputText, providerInfo, now) {
447
+ const nextHistory = [
448
+ ...(Array.isArray(previousHistory) ? previousHistory : []),
449
+ {
450
+ role: 'user',
451
+ parts: [{ text: finalMessage || 'Analyze this input.' }],
452
+ timestamp: now
453
+ },
454
+ {
455
+ role: 'model',
456
+ parts: [{ text: String(outputText || '') }],
457
+ timestamp: now,
458
+ providerInfo
459
+ }
460
+ ].slice(-MAX_STORED_HISTORY_MESSAGES);
461
+
462
+ writeChatHistory(cleanHistoryForStorage(nextHistory));
463
+ }
464
+
373
465
  async function handleChat(message, base64Image = null, base64Audio = null) {
374
466
  try {
375
467
  const config = readConfig();
468
+ const images = normalizeImageList(base64Image);
469
+ const previousHistory = readChatHistory();
376
470
 
377
471
  let finalMessage = message;
378
472
 
@@ -389,179 +483,47 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
389
483
  }
390
484
  }
391
485
 
392
- const providersToTry = getProviderAttemptOrder(config);
393
-
394
- for (let i = 0; i < providersToTry.length; i++) {
395
- const currentProv = providersToTry[i];
396
- try {
397
- if (currentProv === 'ollama') {
398
- return withProviderInfo(await handleOllamaChat(finalMessage, base64Image, base64Audio, config), currentProv, config);
399
- }
400
- if (currentProv === 'anthropic') {
401
- return withProviderInfo(await handleAnthropicChat(finalMessage, base64Image, config), currentProv, config);
402
- }
403
- if (currentProv === 'openai') {
404
- return withProviderInfo(await handleOpenAIChat(finalMessage, base64Image, config), currentProv, config);
405
- }
406
- if (currentProv === 'local_openai') {
407
- return withProviderInfo(await handleLocalOpenAIChat(finalMessage, base64Image, config), currentProv, config);
408
- }
409
- if (currentProv === 'huggingface') {
410
- return withProviderInfo(await handleHuggingFaceChat(finalMessage, base64Image, config), currentProv, config);
411
- }
412
-
413
- const currentKey = resolveApiKey();
414
- if (!currentKey) {
415
- if (i === providersToTry.length - 1) {
416
- return withProviderInfo({
417
- response: "I couldn't find your Gemini API Key. Please run 'mint onboard' to set it up!",
418
- action: { type: "none", target: "" }
419
- }, currentProv, config);
420
- }
421
- console.warn("[Fallback System] Gemini API key missing. Skipping Gemini provider.");
422
- continue;
423
- }
424
-
425
- if (!ai || activeApiKey !== currentKey) {
426
- initAiClient();
427
- createChat(readChatHistory());
428
- }
429
-
430
- return withProviderInfo(await handleGeminiChat(finalMessage, base64Image, base64Audio), currentProv, config);
431
- } catch (error) {
432
- console.error(`[Fallback System] Provider '${currentProv}' failed:`, error.message);
433
- if (i === providersToTry.length - 1) {
434
- console.error("[Fallback System] All available providers failed.");
435
- throw error; // No more providers to fallback to
436
- }
437
- console.log(`[Fallback System] Switching to next available provider: '${providersToTry[i+1]}'`);
438
- // Continue the loop to try the next provider
439
- }
440
- }
441
- } catch (globalError) {
442
- console.error("handleChat error:", globalError);
443
- throw globalError;
444
- }
445
- }
446
-
447
- async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
448
- try {
449
- const images = normalizeImageList(base64Image);
450
- // 1. Check cache first for text-only messages
451
486
  if (finalMessage && images.length === 0 && !base64Audio) {
452
- const cached = memoryStore.getCachedResponse(finalMessage);
453
- if (cached) return cached;
454
- }
455
-
456
- const desiredModel = resolveGeminiModel();
457
- if (!chat || activeModel !== desiredModel) {
458
- createChat(readChatHistory());
459
- }
460
-
461
- let aiResponse;
462
- const parts = [];
463
- if (finalMessage) {
464
- parts.push({ text: buildMessageWithRelevantMemory(finalMessage) });
465
- } else if (base64Audio && images.length === 0) {
466
- // Provide a guiding prompt when only audio is provided to ensure Gemini follows instructions
467
- parts.push({ text: "Please listen to this voice command and respond in Thai with the appropriate JSON action if needed." });
468
- } else if (images.length === 0 && !base64Audio) {
469
- parts.push({ text: "Analyze this input." });
470
- }
471
-
472
- for (const item of images) {
473
- const image = imageDataUriToInlineData(item);
474
- parts.push({
475
- inlineData: image
476
- });
477
- }
478
-
479
- if (base64Audio) {
480
- // Extract MIME type from the data URI if present, fallback to audio/webm
481
- let mimeType = "audio/webm";
482
- const mimeMatch = base64Audio.match(/^data:(audio\/\w+);base64,/);
483
- if (mimeMatch) {
484
- mimeType = mimeMatch[1];
485
- }
486
-
487
- const base64Data = base64Audio.replace(/^data:audio\/\w+;base64,/, '');
488
- parts.push({
489
- inlineData: { mimeType: mimeType, data: base64Data }
490
- });
487
+ const cached = memoryStore.getCachedResponse(finalMessage);
488
+ if (cached) return cached;
491
489
  }
492
490
 
493
- aiResponse = await chat.sendMessage({ message: parts });
494
-
495
- // Save history with timestamps
496
- const history = await chat.getHistory();
491
+ const providersToTry = getProviderAttemptOrder(config);
492
+ const client = new providerAdapter.AgentProviderClient({
493
+ provider: providersToTry[0],
494
+ providerOrder: providersToTry,
495
+ config,
496
+ history: chatHistoryToProviderHistory(previousHistory),
497
+ systemInstruction: buildSystemPrompt(),
498
+ responseMimeType: 'application/json',
499
+ maxTokens: 4096
500
+ });
501
+ const observation = buildChatObservation(finalMessage, images, base64Audio);
502
+ const outputText = await client.sendMessage(observation);
497
503
  const now = new Date().toISOString();
498
-
499
- // Add timestamp to the last two messages (User and Model) if they don't have one
500
- if (history.length >= 2) {
501
- const modelMsg = history[history.length - 1];
502
- const userMsg = history[history.length - 2];
503
- if (!modelMsg.timestamp) modelMsg.timestamp = now;
504
- if (!userMsg.timestamp) userMsg.timestamp = now;
505
- } else if (history.length === 1) {
506
- const msg = history[0];
507
- if (!msg.timestamp) msg.timestamp = now;
508
- }
509
-
510
- writeChatHistory(cleanHistoryForStorage(history));
511
-
512
- let outputText = '';
513
- try {
514
- // Robust text extraction
515
- outputText = (typeof aiResponse.text === 'function') ? aiResponse.text() : (aiResponse.text || '');
516
- } catch (e) {
517
- outputText = String(aiResponse || '');
518
- }
519
-
520
- outputText = stripRelevantMemoryBlock(outputText);
521
-
522
- let parsedResult;
523
- try {
524
- parsedResult = JSON.parse(outputText);
525
- } catch (e) {
526
- // Fallback in case the model failed to return pure JSON
527
- console.error("Failed to parse JSON directly:", e);
528
- const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
529
- if (jsonMatch) {
530
- parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
531
- } else {
532
- parsedResult = {
533
- response: outputText,
534
- action: { type: "none", target: "" }
535
- };
536
- }
537
- }
538
-
539
- // Decode any remaining unicode escapes in the response text
540
- if (parsedResult && typeof parsedResult.response === 'string') {
541
- parsedResult.response = decodeUnicode(parsedResult.response);
542
- parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
543
- }
544
-
545
- // Attach timestamp to the result
546
- validateParsedAction(parsedResult);
547
- parsedResult.timestamp = now;
504
+ const provider = client.lastSuccessfulProvider || client.provider || providersToTry[0];
505
+ const providerInfo = {
506
+ provider,
507
+ model: getProviderModel(provider, config),
508
+ usage: client.getUsageSummary()
509
+ };
510
+ const parsedResult = parseChatProviderResponse(outputText, finalMessage, now);
511
+ parsedResult.providerInfo = providerInfo;
512
+ appendChatProviderHistory(previousHistory, finalMessage, outputText, providerInfo, now);
548
513
 
549
- // Record interaction for long-term memory (non-blocking)
550
514
  if (finalMessage && parsedResult.response) {
551
- setImmediate(() => {
552
- memoryStore.recordInteraction(finalMessage, parsedResult.response);
553
- // Cache text-only responses
554
- if (images.length === 0 && !base64Audio) {
555
- memoryStore.cacheResponse(finalMessage, parsedResult);
556
- }
557
- });
515
+ setImmediate(() => {
516
+ memoryStore.recordInteraction(finalMessage, parsedResult.response);
517
+ if (images.length === 0 && !base64Audio) {
518
+ memoryStore.cacheResponse(finalMessage, parsedResult);
519
+ }
520
+ });
558
521
  }
559
522
 
560
523
  return parsedResult;
561
-
562
- } catch (error) {
563
- console.error("AI API Error:", error);
564
- throw error;
524
+ } catch (globalError) {
525
+ console.error("handleChat error:", globalError);
526
+ throw globalError;
565
527
  }
566
528
  }
567
529
 
@@ -573,6 +535,7 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
573
535
  async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
574
536
  try {
575
537
  const images = normalizeImageList(base64Image);
538
+ const previousHistory = readChatHistory();
576
539
  // 1. Check cache first
577
540
  if (finalMessage && images.length === 0 && !base64Audio) {
578
541
  const cached = memoryStore.getCachedResponse(finalMessage);
@@ -624,7 +587,7 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
624
587
  fullText = stripRelevantMemoryBlock(fullText);
625
588
 
626
589
  // Save history
627
- const history = await chat.getHistory();
590
+ const history = preserveHistoryMetadata(await chat.getHistory(), previousHistory, new Date().toISOString());
628
591
  const now = new Date().toISOString();
629
592
  if (history.length >= 2) {
630
593
  const modelMsg = history[history.length - 1];
@@ -646,6 +609,8 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
646
609
  parsedResult = { response: fullText, action: { type: 'none', target: '' } };
647
610
  }
648
611
  }
612
+ parsedResult = normalizeParsedResult(parsedResult, finalMessage);
613
+
649
614
  if (parsedResult && typeof parsedResult.response === 'string') {
650
615
  parsedResult.response = decodeUnicode(parsedResult.response);
651
616
  parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
@@ -672,269 +637,6 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
672
637
  }
673
638
  }
674
639
 
675
- async function handleAnthropicChat(finalMessage, base64Image, config) {
676
- const history = readChatHistory() || [];
677
- const images = normalizeImageList(base64Image);
678
- const apiKey = config.anthropicApiKey || process.env.ANTHROPIC_API_KEY;
679
- if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Anthropic API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
680
-
681
- const systemPrompt = buildSystemPrompt();
682
-
683
- const messages = [];
684
- for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
685
- const role = msg.role === 'model' ? 'assistant' : 'user';
686
- let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
687
- if (text) messages.push({ role, content: text });
688
- }
689
-
690
- const content = [];
691
- for (const item of images) {
692
- const image = imageDataUriToInlineData(item);
693
- content.push({
694
- type: "image",
695
- source: { type: "base64", media_type: image.mimeType, data: image.data }
696
- });
697
- }
698
- content.push({ type: "text", text: finalMessage || "Analyze this." });
699
- messages.push({ role: "user", content });
700
-
701
- const response = await axios.post('https://api.anthropic.com/v1/messages', {
702
- model: config.anthropicModel || 'claude-3-5-sonnet-latest',
703
- max_tokens: 4096,
704
- system: systemPrompt,
705
- messages: messages
706
- }, {
707
- headers: {
708
- 'x-api-key': apiKey,
709
- 'anthropic-version': '2023-06-01',
710
- 'content-type': 'application/json'
711
- }
712
- });
713
-
714
- const outputText = response.data.content[0].text;
715
- history.push({ role: 'user', parts: [{ text: finalMessage }] });
716
- history.push({ role: 'model', parts: [{ text: outputText }] });
717
- writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
718
-
719
- return parseAiResponse(outputText);
720
- }
721
-
722
- async function handleOpenAIChat(finalMessage, base64Image, config) {
723
- const history = readChatHistory() || [];
724
- const images = normalizeImageList(base64Image);
725
- const apiKey = config.openaiApiKey || process.env.OPENAI_API_KEY;
726
- if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ OpenAI API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
727
-
728
- const systemPrompt = buildSystemPrompt();
729
-
730
- const messages = [{ role: "system", content: systemPrompt }];
731
- for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
732
- const role = msg.role === 'model' ? 'assistant' : 'user';
733
- let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
734
- if (text) messages.push({ role, content: text });
735
- }
736
-
737
- const content = [{ type: "text", text: finalMessage || "Analyze this." }];
738
- for (const item of images) {
739
- content.push({
740
- type: "image_url",
741
- image_url: { url: item }
742
- });
743
- }
744
- messages.push({ role: "user", content });
745
-
746
- const response = await axios.post('https://api.openai.com/v1/chat/completions', {
747
- model: config.openaiModel || 'gpt-4o',
748
- messages: messages,
749
- response_format: { type: "json_object" }
750
- }, {
751
- headers: {
752
- 'Authorization': `Bearer ${apiKey}`,
753
- 'Content-Type': 'application/json'
754
- }
755
- });
756
-
757
- const outputText = response.data.choices[0].message.content;
758
- history.push({ role: 'user', parts: [{ text: finalMessage }] });
759
- history.push({ role: 'model', parts: [{ text: outputText }] });
760
- writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
761
-
762
- return parseAiResponse(outputText);
763
- }
764
-
765
- async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
766
- const history = readChatHistory() || [];
767
- const images = normalizeImageList(base64Image);
768
- const apiKey = 'lm-studio';
769
- const baseUrl = config.localApiBaseUrl || 'http://localhost:1234/v1';
770
-
771
- const systemPrompt = buildSystemPrompt();
772
-
773
- const messages = [{ role: "system", content: systemPrompt }];
774
- for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
775
- const role = msg.role === 'model' ? 'assistant' : 'user';
776
- let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
777
- if (text) messages.push({ role, content: text });
778
- }
779
-
780
- const content = [{ type: "text", text: finalMessage || "Analyze this." }];
781
- for (const item of images) {
782
- content.push({
783
- type: "image_url",
784
- image_url: { url: item }
785
- });
786
- }
787
- messages.push({ role: "user", content });
788
-
789
- const response = await axios.post(`${baseUrl.replace(/\/$/, '')}/chat/completions`, {
790
- model: config.localModelName || 'local-model',
791
- messages: messages,
792
- // response_format json_object is sometimes problematic on weak local models, but required by our prompt.
793
- // We'll keep it as some local servers like LM Studio support it for specific models.
794
- // If not supported, the system prompt usually coerces it anyway.
795
- response_format: { type: "json_object" }
796
- }, {
797
- headers: {
798
- 'Authorization': `Bearer ${apiKey}`,
799
- 'Content-Type': 'application/json'
800
- }
801
- });
802
-
803
- const outputText = response.data.choices[0].message.content;
804
- history.push({ role: 'user', parts: [{ text: finalMessage }] });
805
- history.push({ role: 'model', parts: [{ text: outputText }] });
806
- writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
807
-
808
- return parseAiResponse(outputText);
809
- }
810
-
811
- async function handleHuggingFaceChat(finalMessage, base64Image, config) {
812
- const history = readChatHistory() || [];
813
- const images = normalizeImageList(base64Image);
814
- const apiKey = config.hfApiKey || process.env.HF_API_KEY;
815
- if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Hugging Face API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
816
-
817
- const modelId = config.hfModel || 'meta-llama/Meta-Llama-3-8B-Instruct';
818
- const baseUrl = `https://api-inference.huggingface.co/models/${modelId}/v1/chat/completions`;
819
-
820
- const systemPrompt = buildSystemPrompt();
821
-
822
- const messages = [{ role: "system", content: systemPrompt }];
823
- for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
824
- const role = msg.role === 'model' ? 'assistant' : 'user';
825
- let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
826
- if (text) messages.push({ role, content: text });
827
- }
828
-
829
- const content = [{ type: "text", text: finalMessage || "Analyze this." }];
830
- for (const item of images) {
831
- content.push({
832
- type: "image_url",
833
- image_url: { url: item }
834
- });
835
- }
836
- messages.push({ role: "user", content });
837
-
838
- const response = await axios.post(baseUrl, {
839
- model: modelId,
840
- messages: messages,
841
- max_tokens: 4096
842
- }, {
843
- headers: {
844
- 'Authorization': `Bearer ${apiKey}`,
845
- 'Content-Type': 'application/json'
846
- }
847
- });
848
-
849
- const outputText = response.data.choices[0].message.content;
850
- history.push({ role: 'user', parts: [{ text: finalMessage }] });
851
- history.push({ role: 'model', parts: [{ text: outputText }] });
852
- writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
853
-
854
- return parseAiResponse(outputText);
855
- }
856
-
857
- function parseAiResponse(outputText) {
858
- let parsedResult;
859
- try {
860
- parsedResult = JSON.parse(outputText);
861
- } catch (e) {
862
- const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
863
- if (jsonMatch) {
864
- parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
865
- } else {
866
- parsedResult = { response: outputText, action: { type: "none", target: "" } };
867
- }
868
- }
869
- if (parsedResult && typeof parsedResult.response === 'string') {
870
- parsedResult.response = decodeUnicode(parsedResult.response);
871
- }
872
- validateParsedAction(parsedResult);
873
- parsedResult.timestamp = new Date().toISOString();
874
- return parsedResult;
875
- }
876
-
877
- async function handleOllamaChat(finalMessage, base64Image, base64Audio, config) {
878
- const history = readChatHistory() || [];
879
- const imageInputs = normalizeImageList(base64Image);
880
-
881
- const ollamaMessages = [
882
- { role: 'system', content: buildSystemPrompt() }
883
- ];
884
-
885
- for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
886
- const role = msg.role === 'model' ? 'assistant' : 'user';
887
- let text = '';
888
- if (Array.isArray(msg.parts)) {
889
- text = msg.parts.map(p => p.text || '').join('\n');
890
- }
891
- if (text) ollamaMessages.push({ role, content: text });
892
- }
893
-
894
- let currentContent = finalMessage || 'Analyze this input.';
895
- let images = [];
896
- for (const item of imageInputs) {
897
- images.push(imageDataUriToBase64(item));
898
- }
899
-
900
- if (base64Audio && imageInputs.length === 0 && !finalMessage) {
901
- currentContent = "Please analyze this audio requirement based on text if any was transacted, otherwise reply with appropriate action.";
902
- }
903
-
904
- const userMessage = { role: 'user', content: currentContent };
905
- if (images.length > 0) userMessage.images = images;
906
-
907
- ollamaMessages.push(userMessage);
908
-
909
- const ollamaBaseUrl = (config.ollamaHost || 'http://localhost:11434').replace(/\/$/, '');
910
- const response = await axios.post(`${ollamaBaseUrl}/api/chat`, {
911
- model: config.ollamaModel || 'llama3:latest',
912
- messages: ollamaMessages,
913
- format: 'json',
914
- stream: false
915
- });
916
-
917
- const outputText = response.data.message.content;
918
-
919
- history.push({ role: 'user', parts: [{ text: currentContent }] });
920
- history.push({ role: 'model', parts: [{ text: outputText }] });
921
- writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
922
-
923
- let parsedResult;
924
- try {
925
- parsedResult = JSON.parse(outputText);
926
- } catch(e) {
927
- const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
928
- if (jsonMatch) {
929
- parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
930
- } else {
931
- parsedResult = { response: outputText, action: { type: "none", target: "" } };
932
- }
933
- }
934
- validateParsedAction(parsedResult);
935
- return parsedResult;
936
- }
937
-
938
640
  function resetChat() {
939
641
  clearChatHistory();
940
642
  memoryStore.clearConversationScopedProfile();
@@ -980,7 +682,7 @@ function historyToTranscript(history) {
980
682
  transcript.push({
981
683
  sender,
982
684
  text,
983
- timestamp: content.timestamp || new Date().toISOString(),
685
+ timestamp: content.timestamp || null,
984
686
  providerInfo: content.providerInfo || null
985
687
  });
986
688
  }
@@ -1058,6 +760,7 @@ module.exports = {
1058
760
  translateImageContent,
1059
761
  refreshApiKeyFromConfig,
1060
762
  _helpers: {
1061
- getProviderAttemptOrder
763
+ getProviderAttemptOrder,
764
+ normalizeParsedResult
1062
765
  }
1063
766
  };