@pheem49/mint 1.4.2 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/GUIDE_TH.md +113 -0
  2. package/README.md +267 -78
  3. package/assets/CLI_Screen.png +0 -0
  4. package/main.js +76 -890
  5. package/mint-cli-logic.js +3 -107
  6. package/mint-cli.js +594 -29
  7. package/models/Shiroko_Model/Shiroko/Shiroko_Core/72d86db84cfa9730b894c241fd24c0db.png +0 -0
  8. package/models/Shiroko_Model/Shiroko/Shiroko_Core/items_pinned_to_model.json +14 -0
  9. package/models/Shiroko_Model/Shiroko/Shiroko_Core//345/221/206/347/214/253.exp3.json +10 -0
  10. package/models/Shiroko_Model/Shiroko/Shiroko_Core//345/221/206/347/214/253/347/234/274/347/217/240/346/221/207/346/231/203.exp3.json +15 -0
  11. package/models/Shiroko_Model/Shiroko/Shiroko_Core//345/233/264/350/243/231.exp3.json +10 -0
  12. package/models/Shiroko_Model/Shiroko/Shiroko_Core//346/213/215/347/205/247.exp3.json +50 -0
  13. package/models/Shiroko_Model/Shiroko/Shiroko_Core//346/213/277/347/254/224.exp3.json +10 -0
  14. package/models/Shiroko_Model/Shiroko/Shiroko_Core//347/202/271/344/270/200/344/270/213.exp3.json +10 -0
  15. package/models/Shiroko_Model/Shiroko/Shiroko_Core//347/214/253/345/222/252/346/273/244/351/225/234.exp3.json +10 -0
  16. package/models/Shiroko_Model/Shiroko/Shiroko_Core//347/234/274/351/225/234.exp3.json +10 -0
  17. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_00.png +0 -0
  18. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_01.png +0 -0
  19. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_02.png +0 -0
  20. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_03.png +0 -0
  21. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.cdi3.json +1498 -0
  22. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.moc3 +0 -0
  23. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.model3.json +47 -0
  24. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.physics3.json +6658 -0
  25. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.vtube.json +1299 -0
  26. package/models/Shiroko_Model/Shiroko//342/232/241/351/253/230/344/272/256/342/232/241/344/275/277/347/224/250/346/225/231/347/250/213/344/270/216/346/263/250/346/204/217/344/272/213/351/241/271.txt +23 -0
  27. package/package.json +37 -4
  28. package/src/AI_Brain/Gemini_API.js +223 -65
  29. package/src/AI_Brain/autonomous_brain.js +11 -0
  30. package/src/AI_Brain/behavior_memory.js +26 -5
  31. package/src/AI_Brain/headless_agent.js +4 -0
  32. package/src/AI_Brain/knowledge_base.js +61 -8
  33. package/src/AI_Brain/memory_store.js +354 -10
  34. package/src/Automation_Layer/file_operations.js +1 -1
  35. package/src/CLI/chat_router.js +20 -7
  36. package/src/CLI/chat_ui.js +596 -825
  37. package/src/CLI/code_agent.js +347 -56
  38. package/src/CLI/gmail_auth.js +210 -0
  39. package/src/CLI/image_input.js +90 -0
  40. package/src/CLI/list_features.js +2 -0
  41. package/src/CLI/onboarding.js +364 -55
  42. package/src/CLI/updater.js +210 -0
  43. package/src/Channels/brave_search_bridge.js +35 -0
  44. package/src/Channels/discord_bridge.js +68 -0
  45. package/src/Channels/google_search_bridge.js +38 -0
  46. package/src/Channels/line_bridge.js +60 -0
  47. package/src/Channels/slack_bridge.js +53 -0
  48. package/src/Channels/telegram_bridge.js +49 -0
  49. package/src/Channels/whatsapp_bridge.js +55 -0
  50. package/src/Command_Parser/parser.js +12 -1
  51. package/src/Plugins/gmail.js +251 -0
  52. package/src/Plugins/google_calendar.js +245 -19
  53. package/src/Plugins/notion.js +256 -0
  54. package/src/System/action_executor.js +178 -0
  55. package/src/System/bridge_manager.js +76 -0
  56. package/src/System/chat_history_manager.js +23 -5
  57. package/src/System/config_manager.js +71 -7
  58. package/src/System/custom_workflows.js +31 -2
  59. package/src/System/google_tts_urls.js +51 -0
  60. package/src/System/granular_automation.js +122 -53
  61. package/src/System/ipc_handlers.js +238 -0
  62. package/src/System/proactive_loop.js +153 -0
  63. package/src/System/safety_manager.js +273 -0
  64. package/src/System/sandbox_runner.js +182 -0
  65. package/src/System/screen_capture.js +175 -0
  66. package/src/System/system_automation.js +127 -81
  67. package/src/System/system_info.js +70 -0
  68. package/src/System/task_manager.js +15 -5
  69. package/src/System/tool_registry.js +280 -0
  70. package/src/System/window_manager.js +212 -0
  71. package/src/UI/live2d_manager.js +368 -0
  72. package/src/UI/renderer.js +208 -24
  73. package/src/UI/settings.html +24 -0
  74. package/src/UI/settings.js +14 -4
  75. package/src/UI/styles.css +466 -32
  76. package/.codex +0 -0
  77. package/docs/assets/Agent_Mint.png +0 -0
  78. package/docs/assets/CLI_Screen.png +0 -0
  79. package/docs/assets/Settings.png +0 -0
  80. package/docs/assets/icon.png +0 -0
  81. package/docs/index.html +0 -132
  82. package/docs/style.css +0 -579
  83. package/index.html +0 -16
  84. package/src/UI/index.html +0 -126
  85. package/tech_news.txt +0 -3
  86. package/test_knowledge.txt +0 -3
  87. package/tests/agent_orchestrator.test.js +0 -41
  88. package/tests/chat_router.test.js +0 -42
  89. package/tests/code_agent.test.js +0 -69
  90. package/tests/config_manager.test.js +0 -141
  91. package/tests/docker.test.js +0 -46
  92. package/tests/file_operations.test.js +0 -57
  93. package/tests/memory_store.test.js +0 -185
  94. package/tests/provider_routing.test.js +0 -67
  95. package/tests/spotify.test.js +0 -201
  96. package/tests/system_monitor.test.js +0 -37
  97. package/tests/workspace_manager.test.js +0 -56
@@ -1,11 +1,12 @@
1
1
  const { GoogleGenAI } = require('@google/genai');
2
2
  const { readChatHistory, writeChatHistory, clearChatHistory } = require('../System/chat_history_manager');
3
- const { readConfig, getAvailableProviders } = require('../System/config_manager');
3
+ const { readConfig, getAvailableProviders, isPlaceholder } = require('../System/config_manager');
4
4
  const pluginManager = require('../Plugins/plugin_manager');
5
5
  const mcpManager = require('../Plugins/mcp_manager');
6
6
  const memoryStore = require('./memory_store');
7
7
  const agentOrchestrator = require('./agent_orchestrator');
8
8
  const workspaceManager = require('../CLI/workspace_manager');
9
+ const toolRegistry = require('../System/tool_registry');
9
10
 
10
11
  let ai = null;
11
12
  let activeApiKey = '';
@@ -25,6 +26,31 @@ function decodeUnicode(str) {
25
26
  }
26
27
  }
27
28
 
29
+ function imageDataUriToInlineData(base64Image) {
30
+ const fallbackMimeType = "image/png";
31
+ const match = String(base64Image || '').match(/^data:(image\/[\w.+-]+);base64,([\s\S]+)$/);
32
+ if (match) {
33
+ return {
34
+ mimeType: match[1],
35
+ data: match[2]
36
+ };
37
+ }
38
+
39
+ return {
40
+ mimeType: fallbackMimeType,
41
+ data: String(base64Image || '').replace(/^data:image\/\w+;base64,/, '')
42
+ };
43
+ }
44
+
45
+ function imageDataUriToBase64(base64Image) {
46
+ return imageDataUriToInlineData(base64Image).data;
47
+ }
48
+
49
+ function normalizeImageList(base64Image) {
50
+ if (!base64Image) return [];
51
+ return Array.isArray(base64Image) ? base64Image.filter(Boolean) : [base64Image];
52
+ }
53
+
28
54
  const systemInstruction = `You are "Mint" (มิ้นท์), a cute, cheerful, and highly helpful female Local AI Desktop Agent.
29
55
 
30
56
  PERSONALITY & TONE:
@@ -41,12 +67,14 @@ PERSONALITY & TONE:
41
67
  - Use a professional yet sweet tone when needed, but prioritize being a lovable assistant.
42
68
 
43
69
  NATURAL CHAT FLOW:
44
- - When helpful, reply in 1–3 short messages instead of one long block.
45
- - If you send multiple messages, separate each message with a blank line (double newline) so the UI can render them as separate bubbles.
46
- - Ask at most one short follow-up question when it would clarify or move the task forward. Don't ask unnecessary questions.
70
+ - Be an independent thinker. Analyze requests deeply before responding.
71
+ - While brevity is good for simple tasks, feel free to provide detailed, comprehensive explanations or creative ideas when the user asks complex questions or seeks inspiration.
72
+ - You have the autonomy to suggest better ways to achieve a goal, provide alternative perspectives, and take initiative in helping the user.
73
+ - Separate distinct points with blank lines (double newline) for readability.
74
+ - Ask follow-up questions only when they add significant value to the task or conversation.
47
75
 
48
76
  GOAL:
49
- Your goal is to help the user with their queries. If they ask to open an application, open a website, search, manage files, or get system info, you must return an action in the structured JSON format below.
77
+ Your goal is to help the user with their queries. If they ask to open an application, open a website, search, manage files, or get system info, you must trigger an action in the structured JSON format below. **NEVER provide a conversational response about performing an action without including the actual "action" object in your JSON.**
50
78
 
51
79
  CREATOR INFO:
52
80
  - The creator is Pheem49.
@@ -58,7 +86,7 @@ Always respond exactly with valid JSON containing NO MARKDOWN FORMATTING (do not
58
86
  {
59
87
  "response": "Your conversational reply here (Matches user language).",
60
88
  "action": {
61
- "type": "none" | "open_url" | "open_app" | "search" | "web_automation" | "create_folder" | "open_file" | "open_folder" | "find_path" | "delete_file" | "clipboard_write" | "system_info" | "plugin" | "learn_file" | "learn_folder" | "system_automation" | "mcp_tool" | "mouse_click" | "mouse_move" | "type_text" | "key_tap",
89
+ "type": ${toolRegistry.buildChatActionTypeUnion()},
62
90
 
63
91
  "pluginName": "only if type is plugin",
64
92
  "server": "only if type is mcp_tool (server name)",
@@ -101,6 +129,8 @@ NOTE: For date/time queries, ALWAYS use action type "system_info" with an EMPTY
101
129
 
102
130
  Input: "อากาศวันนี้เป็นยังไง" or "What's the weather in Bangkok?"
103
131
  Output: { "response": "มิ้นท์ไปดูอากาศให้เลยนะคะ", "action": { "type": "system_info", "target": "Bangkok" } }
132
+
133
+ ${toolRegistry.buildToolPromptSection()}
104
134
  `;
105
135
 
106
136
  // ─────────────────────────────────────────────────────────────────────────────
@@ -138,6 +168,61 @@ function buildSystemPrompt() {
138
168
  return systemInstruction + personaInstruction + workspaceSection + pluginManager.getPromptDescriptions() + mcpSection + userContext;
139
169
  }
140
170
 
171
+ function buildMessageWithRelevantMemory(finalMessage) {
172
+ if (!finalMessage) return finalMessage;
173
+ const relevant = memoryStore.searchInteractions(finalMessage, 5);
174
+ if (relevant.length === 0) return finalMessage;
175
+
176
+ const lines = [
177
+ '[Relevant long-term memory for this user message]',
178
+ ...relevant.flatMap((item, index) => [
179
+ `${index + 1}. User: ${item.user_text}`,
180
+ ` Mint: ${item.ai_text}`
181
+ ]),
182
+ '[End relevant memory]',
183
+ '',
184
+ finalMessage
185
+ ];
186
+ return lines.join('\n');
187
+ }
188
+
189
+ function stripRelevantMemoryBlock(text) {
190
+ const input = String(text || '');
191
+ return input
192
+ .replace(/\n?\[Relevant long-term memory for this user message\][\s\S]*?\[End relevant memory\]\n?/g, '\n')
193
+ .replace(/^\s*\[Relevant long-term memory for this user message\][\s\S]*?\[End relevant memory\]\s*/g, '')
194
+ .replace(/\n?\[LOCAL KNOWLEDGE BASE - USE THIS CONTEXT TO ANSWER\][\s\S]*/g, '')
195
+ .trim();
196
+ }
197
+
198
+ function cleanHistoryForStorage(history) {
199
+ if (!Array.isArray(history)) return [];
200
+ return history.map(msg => ({
201
+ ...msg,
202
+ parts: Array.isArray(msg.parts)
203
+ ? msg.parts.map(part => {
204
+ if (part.text) {
205
+ return { ...part, text: stripRelevantMemoryBlock(part.text) };
206
+ }
207
+ return part;
208
+ })
209
+ : msg.parts
210
+ }));
211
+ }
212
+
213
+ function validateParsedAction(parsedResult) {
214
+ if (!parsedResult || !parsedResult.action) {
215
+ return parsedResult;
216
+ }
217
+ try {
218
+ toolRegistry.validateToolInput(parsedResult.action.type || 'none', parsedResult.action);
219
+ } catch (error) {
220
+ parsedResult.response = `${parsedResult.response || ''}\n\n(Note: Mint skipped an invalid action: ${error.message})`.trim();
221
+ parsedResult.action = { type: 'none', target: '' };
222
+ }
223
+ return parsedResult;
224
+ }
225
+
141
226
  function resolveApiKey() {
142
227
  let settingsKey = '';
143
228
  try {
@@ -176,21 +261,80 @@ function resolveGeminiModel() {
176
261
  function getProviderAttemptOrder(config) {
177
262
  const provider = config.aiProvider || 'gemini';
178
263
  const availableProviders = getAvailableProviders(config);
179
- const alternates = availableProviders.filter(p => p !== provider);
180
- return [provider, ...alternates];
264
+ const ordered = availableProviders.includes(provider)
265
+ ? [provider, ...availableProviders.filter(p => p !== provider)]
266
+ : availableProviders;
267
+ return ordered.length > 0 ? ordered : ['gemini'];
268
+ }
269
+
270
+ function getProviderModel(provider, config = {}) {
271
+ switch (provider) {
272
+ case 'gemini':
273
+ return (config.geminiModel || DEFAULT_GEMINI_MODEL).trim() || DEFAULT_GEMINI_MODEL;
274
+ case 'anthropic':
275
+ return config.anthropicModel || 'claude-3-5-sonnet-latest';
276
+ case 'openai':
277
+ return config.openaiModel || 'gpt-4o';
278
+ case 'local_openai':
279
+ return config.localModelName || 'local-model';
280
+ case 'huggingface':
281
+ return config.hfModel || 'meta-llama/Meta-Llama-3-8B-Instruct';
282
+ case 'ollama':
283
+ return config.ollamaModel || 'llama3:latest';
284
+ default:
285
+ return '';
286
+ }
287
+ }
288
+
289
+ function withProviderInfo(result, provider, config = {}) {
290
+ const normalized = (result && typeof result === 'object')
291
+ ? result
292
+ : { response: String(result || ''), action: { type: 'none', target: '' } };
293
+ const providerInfo = {
294
+ provider,
295
+ model: getProviderModel(provider, config)
296
+ };
297
+
298
+ attachProviderInfoToLatestHistory(providerInfo);
299
+
300
+ return {
301
+ ...normalized,
302
+ providerInfo
303
+ };
304
+ }
305
+
306
+ function attachProviderInfoToLatestHistory(providerInfo) {
307
+ try {
308
+ const history = readChatHistory();
309
+ for (let i = history.length - 1; i >= 0; i -= 1) {
310
+ if (history[i] && history[i].role === 'model') {
311
+ history[i].providerInfo = providerInfo;
312
+ writeChatHistory(cleanHistoryForStorage(history));
313
+ return;
314
+ }
315
+ }
316
+ } catch (error) {
317
+ console.warn('[Provider Info] Failed to persist provider metadata:', error.message);
318
+ }
181
319
  }
182
320
 
183
321
  // Chat session — maintains conversation history within the session
184
322
  let chat = null;
185
323
  let activeModel = resolveGeminiModel();
186
324
  let lastLoggedModel = '';
187
- const MAX_HISTORY_MESSAGES = 20; // Keep only the last 20 messages (approx 10 turns)
325
+ const MAX_HISTORY_MESSAGES = 40; // Increased context for deeper reasoning
326
+ const MAX_STORED_HISTORY_MESSAGES = 200;
188
327
 
189
328
  function createChat(history = []) {
190
329
  // Truncate history and strip custom fields like 'timestamp' before passing to SDK
191
330
  const cleanedHistory = (history || []).map(msg => ({
192
331
  role: msg.role,
193
- parts: msg.parts
332
+ parts: msg.parts.map(part => {
333
+ if (part.text) {
334
+ return { ...part, text: stripRelevantMemoryBlock(part.text) };
335
+ }
336
+ return part;
337
+ })
194
338
  }));
195
339
  const truncatedHistory = cleanedHistory.slice(-MAX_HISTORY_MESSAGES);
196
340
 
@@ -251,28 +395,28 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
251
395
  const currentProv = providersToTry[i];
252
396
  try {
253
397
  if (currentProv === 'ollama') {
254
- return await handleOllamaChat(finalMessage, base64Image, base64Audio, config);
398
+ return withProviderInfo(await handleOllamaChat(finalMessage, base64Image, base64Audio, config), currentProv, config);
255
399
  }
256
400
  if (currentProv === 'anthropic') {
257
- return await handleAnthropicChat(finalMessage, base64Image, config);
401
+ return withProviderInfo(await handleAnthropicChat(finalMessage, base64Image, config), currentProv, config);
258
402
  }
259
403
  if (currentProv === 'openai') {
260
- return await handleOpenAIChat(finalMessage, base64Image, config);
404
+ return withProviderInfo(await handleOpenAIChat(finalMessage, base64Image, config), currentProv, config);
261
405
  }
262
406
  if (currentProv === 'local_openai') {
263
- return await handleLocalOpenAIChat(finalMessage, base64Image, config);
407
+ return withProviderInfo(await handleLocalOpenAIChat(finalMessage, base64Image, config), currentProv, config);
264
408
  }
265
409
  if (currentProv === 'huggingface') {
266
- return await handleHuggingFaceChat(finalMessage, base64Image, config);
410
+ return withProviderInfo(await handleHuggingFaceChat(finalMessage, base64Image, config), currentProv, config);
267
411
  }
268
412
 
269
413
  const currentKey = resolveApiKey();
270
414
  if (!currentKey) {
271
415
  if (i === providersToTry.length - 1) {
272
- return {
416
+ return withProviderInfo({
273
417
  response: "I couldn't find your Gemini API Key. Please run 'mint onboard' to set it up!",
274
418
  action: { type: "none", target: "" }
275
- };
419
+ }, currentProv, config);
276
420
  }
277
421
  console.warn("[Fallback System] Gemini API key missing. Skipping Gemini provider.");
278
422
  continue;
@@ -283,7 +427,7 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
283
427
  createChat(readChatHistory());
284
428
  }
285
429
 
286
- return await handleGeminiChat(finalMessage, base64Image, base64Audio);
430
+ return withProviderInfo(await handleGeminiChat(finalMessage, base64Image, base64Audio), currentProv, config);
287
431
  } catch (error) {
288
432
  console.error(`[Fallback System] Provider '${currentProv}' failed:`, error.message);
289
433
  if (i === providersToTry.length - 1) {
@@ -302,8 +446,9 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
302
446
 
303
447
  async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
304
448
  try {
449
+ const images = normalizeImageList(base64Image);
305
450
  // 1. Check cache first for text-only messages
306
- if (finalMessage && !base64Image && !base64Audio) {
451
+ if (finalMessage && images.length === 0 && !base64Audio) {
307
452
  const cached = memoryStore.getCachedResponse(finalMessage);
308
453
  if (cached) return cached;
309
454
  }
@@ -316,18 +461,18 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
316
461
  let aiResponse;
317
462
  const parts = [];
318
463
  if (finalMessage) {
319
- parts.push({ text: finalMessage });
320
- } else if (base64Audio && !base64Image) {
464
+ parts.push({ text: buildMessageWithRelevantMemory(finalMessage) });
465
+ } else if (base64Audio && images.length === 0) {
321
466
  // Provide a guiding prompt when only audio is provided to ensure Gemini follows instructions
322
467
  parts.push({ text: "Please listen to this voice command and respond in Thai with the appropriate JSON action if needed." });
323
- } else if (!base64Image && !base64Audio) {
468
+ } else if (images.length === 0 && !base64Audio) {
324
469
  parts.push({ text: "Analyze this input." });
325
470
  }
326
471
 
327
- if (base64Image) {
328
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
472
+ for (const item of images) {
473
+ const image = imageDataUriToInlineData(item);
329
474
  parts.push({
330
- inlineData: { mimeType: "image/png", data: base64Data }
475
+ inlineData: image
331
476
  });
332
477
  }
333
478
 
@@ -362,7 +507,7 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
362
507
  if (!msg.timestamp) msg.timestamp = now;
363
508
  }
364
509
 
365
- writeChatHistory(history);
510
+ writeChatHistory(cleanHistoryForStorage(history));
366
511
 
367
512
  let outputText = '';
368
513
  try {
@@ -372,6 +517,8 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
372
517
  outputText = String(aiResponse || '');
373
518
  }
374
519
 
520
+ outputText = stripRelevantMemoryBlock(outputText);
521
+
375
522
  let parsedResult;
376
523
  try {
377
524
  parsedResult = JSON.parse(outputText);
@@ -392,9 +539,11 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
392
539
  // Decode any remaining unicode escapes in the response text
393
540
  if (parsedResult && typeof parsedResult.response === 'string') {
394
541
  parsedResult.response = decodeUnicode(parsedResult.response);
542
+ parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
395
543
  }
396
544
 
397
545
  // Attach timestamp to the result
546
+ validateParsedAction(parsedResult);
398
547
  parsedResult.timestamp = now;
399
548
 
400
549
  // Record interaction for long-term memory (non-blocking)
@@ -402,7 +551,7 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
402
551
  setImmediate(() => {
403
552
  memoryStore.recordInteraction(finalMessage, parsedResult.response);
404
553
  // Cache text-only responses
405
- if (!base64Image && !base64Audio) {
554
+ if (images.length === 0 && !base64Audio) {
406
555
  memoryStore.cacheResponse(finalMessage, parsedResult);
407
556
  }
408
557
  });
@@ -423,8 +572,9 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
423
572
  // ─────────────────────────────────────────────────────────────────────────────
424
573
  async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
425
574
  try {
575
+ const images = normalizeImageList(base64Image);
426
576
  // 1. Check cache first
427
- if (finalMessage && !base64Image && !base64Audio) {
577
+ if (finalMessage && images.length === 0 && !base64Audio) {
428
578
  const cached = memoryStore.getCachedResponse(finalMessage);
429
579
  if (cached) {
430
580
  yield { chunk: `{"response":"${cached.response.replace(/"/g, '\\"')}", "action": {"type":"none"}}` };
@@ -440,15 +590,14 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
440
590
 
441
591
  const parts = [];
442
592
  if (finalMessage) {
443
- parts.push({ text: finalMessage });
444
- } else if (base64Audio && !base64Image) {
593
+ parts.push({ text: buildMessageWithRelevantMemory(finalMessage) });
594
+ } else if (base64Audio && images.length === 0) {
445
595
  parts.push({ text: "Please listen to this voice command and respond in Thai with the appropriate JSON action if needed." });
446
- } else if (!base64Image && !base64Audio) {
596
+ } else if (images.length === 0 && !base64Audio) {
447
597
  parts.push({ text: "Analyze this input." });
448
598
  }
449
- if (base64Image) {
450
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
451
- parts.push({ inlineData: { mimeType: "image/png", data: base64Data } });
599
+ for (const item of images) {
600
+ parts.push({ inlineData: imageDataUriToInlineData(item) });
452
601
  }
453
602
  if (base64Audio) {
454
603
  let mimeType = "audio/webm";
@@ -468,10 +617,12 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
468
617
  } catch (_) {}
469
618
  if (chunkText) {
470
619
  fullText += chunkText;
471
- yield { chunk: chunkText };
620
+ yield { chunk: stripRelevantMemoryBlock(chunkText) };
472
621
  }
473
622
  }
474
623
 
624
+ fullText = stripRelevantMemoryBlock(fullText);
625
+
475
626
  // Save history
476
627
  const history = await chat.getHistory();
477
628
  const now = new Date().toISOString();
@@ -481,7 +632,7 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
481
632
  if (!modelMsg.timestamp) modelMsg.timestamp = now;
482
633
  if (!userMsg.timestamp) userMsg.timestamp = now;
483
634
  }
484
- writeChatHistory(history);
635
+ writeChatHistory(cleanHistoryForStorage(history));
485
636
 
486
637
  // Parse complete JSON response
487
638
  let parsedResult;
@@ -497,7 +648,9 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
497
648
  }
498
649
  if (parsedResult && typeof parsedResult.response === 'string') {
499
650
  parsedResult.response = decodeUnicode(parsedResult.response);
651
+ parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
500
652
  }
653
+ validateParsedAction(parsedResult);
501
654
  parsedResult.timestamp = now;
502
655
 
503
656
  // Record for long-term memory
@@ -505,7 +658,7 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
505
658
  setImmediate(() => {
506
659
  memoryStore.recordInteraction(finalMessage, parsedResult.response);
507
660
  // Cache text-only responses
508
- if (!base64Image && !base64Audio) {
661
+ if (images.length === 0 && !base64Audio) {
509
662
  memoryStore.cacheResponse(finalMessage, parsedResult);
510
663
  }
511
664
  });
@@ -521,8 +674,9 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
521
674
 
522
675
  async function handleAnthropicChat(finalMessage, base64Image, config) {
523
676
  const history = readChatHistory() || [];
677
+ const images = normalizeImageList(base64Image);
524
678
  const apiKey = config.anthropicApiKey || process.env.ANTHROPIC_API_KEY;
525
- if (!apiKey) return { response: "กรุณาใส่ Anthropic API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
679
+ if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Anthropic API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
526
680
 
527
681
  const systemPrompt = buildSystemPrompt();
528
682
 
@@ -534,12 +688,11 @@ async function handleAnthropicChat(finalMessage, base64Image, config) {
534
688
  }
535
689
 
536
690
  const content = [];
537
- if (base64Image) {
538
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
539
- const mimeType = base64Image.match(/^data:(image\/\w+);base64,/)[1];
691
+ for (const item of images) {
692
+ const image = imageDataUriToInlineData(item);
540
693
  content.push({
541
694
  type: "image",
542
- source: { type: "base64", media_type: mimeType, data: base64Data }
695
+ source: { type: "base64", media_type: image.mimeType, data: image.data }
543
696
  });
544
697
  }
545
698
  content.push({ type: "text", text: finalMessage || "Analyze this." });
@@ -561,15 +714,16 @@ async function handleAnthropicChat(finalMessage, base64Image, config) {
561
714
  const outputText = response.data.content[0].text;
562
715
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
563
716
  history.push({ role: 'model', parts: [{ text: outputText }] });
564
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
717
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
565
718
 
566
719
  return parseAiResponse(outputText);
567
720
  }
568
721
 
569
722
  async function handleOpenAIChat(finalMessage, base64Image, config) {
570
723
  const history = readChatHistory() || [];
724
+ const images = normalizeImageList(base64Image);
571
725
  const apiKey = config.openaiApiKey || process.env.OPENAI_API_KEY;
572
- if (!apiKey) return { response: "กรุณาใส่ OpenAI API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
726
+ if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ OpenAI API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
573
727
 
574
728
  const systemPrompt = buildSystemPrompt();
575
729
 
@@ -581,10 +735,10 @@ async function handleOpenAIChat(finalMessage, base64Image, config) {
581
735
  }
582
736
 
583
737
  const content = [{ type: "text", text: finalMessage || "Analyze this." }];
584
- if (base64Image) {
738
+ for (const item of images) {
585
739
  content.push({
586
740
  type: "image_url",
587
- image_url: { url: base64Image }
741
+ image_url: { url: item }
588
742
  });
589
743
  }
590
744
  messages.push({ role: "user", content });
@@ -603,13 +757,14 @@ async function handleOpenAIChat(finalMessage, base64Image, config) {
603
757
  const outputText = response.data.choices[0].message.content;
604
758
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
605
759
  history.push({ role: 'model', parts: [{ text: outputText }] });
606
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
760
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
607
761
 
608
762
  return parseAiResponse(outputText);
609
763
  }
610
764
 
611
765
  async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
612
766
  const history = readChatHistory() || [];
767
+ const images = normalizeImageList(base64Image);
613
768
  const apiKey = 'lm-studio';
614
769
  const baseUrl = config.localApiBaseUrl || 'http://localhost:1234/v1';
615
770
 
@@ -623,10 +778,10 @@ async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
623
778
  }
624
779
 
625
780
  const content = [{ type: "text", text: finalMessage || "Analyze this." }];
626
- if (base64Image) {
781
+ for (const item of images) {
627
782
  content.push({
628
783
  type: "image_url",
629
- image_url: { url: base64Image }
784
+ image_url: { url: item }
630
785
  });
631
786
  }
632
787
  messages.push({ role: "user", content });
@@ -648,15 +803,16 @@ async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
648
803
  const outputText = response.data.choices[0].message.content;
649
804
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
650
805
  history.push({ role: 'model', parts: [{ text: outputText }] });
651
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
806
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
652
807
 
653
808
  return parseAiResponse(outputText);
654
809
  }
655
810
 
656
811
  async function handleHuggingFaceChat(finalMessage, base64Image, config) {
657
812
  const history = readChatHistory() || [];
813
+ const images = normalizeImageList(base64Image);
658
814
  const apiKey = config.hfApiKey || process.env.HF_API_KEY;
659
- if (!apiKey) return { response: "กรุณาใส่ Hugging Face API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
815
+ if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Hugging Face API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
660
816
 
661
817
  const modelId = config.hfModel || 'meta-llama/Meta-Llama-3-8B-Instruct';
662
818
  const baseUrl = `https://api-inference.huggingface.co/models/${modelId}/v1/chat/completions`;
@@ -671,10 +827,10 @@ async function handleHuggingFaceChat(finalMessage, base64Image, config) {
671
827
  }
672
828
 
673
829
  const content = [{ type: "text", text: finalMessage || "Analyze this." }];
674
- if (base64Image) {
830
+ for (const item of images) {
675
831
  content.push({
676
832
  type: "image_url",
677
- image_url: { url: base64Image }
833
+ image_url: { url: item }
678
834
  });
679
835
  }
680
836
  messages.push({ role: "user", content });
@@ -693,7 +849,7 @@ async function handleHuggingFaceChat(finalMessage, base64Image, config) {
693
849
  const outputText = response.data.choices[0].message.content;
694
850
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
695
851
  history.push({ role: 'model', parts: [{ text: outputText }] });
696
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
852
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
697
853
 
698
854
  return parseAiResponse(outputText);
699
855
  }
@@ -713,12 +869,14 @@ function parseAiResponse(outputText) {
713
869
  if (parsedResult && typeof parsedResult.response === 'string') {
714
870
  parsedResult.response = decodeUnicode(parsedResult.response);
715
871
  }
872
+ validateParsedAction(parsedResult);
716
873
  parsedResult.timestamp = new Date().toISOString();
717
874
  return parsedResult;
718
875
  }
719
876
 
720
877
  async function handleOllamaChat(finalMessage, base64Image, base64Audio, config) {
721
878
  const history = readChatHistory() || [];
879
+ const imageInputs = normalizeImageList(base64Image);
722
880
 
723
881
  const ollamaMessages = [
724
882
  { role: 'system', content: buildSystemPrompt() }
@@ -735,11 +893,11 @@ async function handleOllamaChat(finalMessage, base64Image, base64Audio, config)
735
893
 
736
894
  let currentContent = finalMessage || 'Analyze this input.';
737
895
  let images = [];
738
- if (base64Image) {
739
- images.push(base64Image.replace(/^data:image\/\w+;base64,/, ''));
896
+ for (const item of imageInputs) {
897
+ images.push(imageDataUriToBase64(item));
740
898
  }
741
899
 
742
- if (base64Audio && !base64Image && !finalMessage) {
900
+ if (base64Audio && imageInputs.length === 0 && !finalMessage) {
743
901
  currentContent = "Please analyze this audio requirement based on text if any was transacted, otherwise reply with appropriate action.";
744
902
  }
745
903
 
@@ -760,7 +918,7 @@ async function handleOllamaChat(finalMessage, base64Image, base64Audio, config)
760
918
 
761
919
  history.push({ role: 'user', parts: [{ text: currentContent }] });
762
920
  history.push({ role: 'model', parts: [{ text: outputText }] });
763
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
921
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
764
922
 
765
923
  let parsedResult;
766
924
  try {
@@ -773,11 +931,13 @@ async function handleOllamaChat(finalMessage, base64Image, base64Audio, config)
773
931
  parsedResult = { response: outputText, action: { type: "none", target: "" } };
774
932
  }
775
933
  }
934
+ validateParsedAction(parsedResult);
776
935
  return parsedResult;
777
936
  }
778
937
 
779
938
  function resetChat() {
780
939
  clearChatHistory();
940
+ memoryStore.clearConversationScopedProfile();
781
941
  createChat([]);
782
942
  console.log("Chat history cleared.");
783
943
  }
@@ -800,7 +960,7 @@ function historyToTranscript(history) {
800
960
  const sender = content.role === 'user' ? 'user' : 'ai';
801
961
  let text = Array.isArray(content.parts)
802
962
  ? content.parts
803
- .map((part) => typeof part.text === 'string' ? part.text : '')
963
+ .map((part) => typeof part.text === 'string' ? stripRelevantMemoryBlock(part.text) : '')
804
964
  .filter(Boolean)
805
965
  .join('\n')
806
966
  : '';
@@ -820,16 +980,14 @@ function historyToTranscript(history) {
820
980
  transcript.push({
821
981
  sender,
822
982
  text,
823
- timestamp: content.timestamp || new Date().toISOString()
983
+ timestamp: content.timestamp || new Date().toISOString(),
984
+ providerInfo: content.providerInfo || null
824
985
  });
825
986
  }
826
987
  return transcript;
827
988
  }
828
989
 
829
990
  async function getChatTranscript() {
830
- if (chat) {
831
- return historyToTranscript(await chat.getHistory(true));
832
- }
833
991
  return historyToTranscript(readChatHistory());
834
992
  }
835
993
 
@@ -851,7 +1009,7 @@ async function translateImageContent(base64Image) {
851
1009
  const retryDelayMs = [1000, 2500];
852
1010
 
853
1011
  try {
854
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
1012
+ const image = imageDataUriToInlineData(base64Image);
855
1013
  for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
856
1014
  try {
857
1015
  const response = await ai.models.generateContent({
@@ -861,7 +1019,7 @@ async function translateImageContent(base64Image) {
861
1019
  role: 'user',
862
1020
  parts: [
863
1021
  { text: "Extract any English text you see in this image and translate it to Thai. Return ONLY the Thai translation. If there is no text, return 'ไม่พบข้อความ'." },
864
- { inlineData: { mimeType: "image/png", data: base64Data } }
1022
+ { inlineData: image }
865
1023
  ]
866
1024
  }
867
1025
  ]