@pheem49/mint 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +35 -1
  2. package/main.js +28 -14
  3. package/mint-cli-logic.js +3 -119
  4. package/mint-cli.js +201 -500
  5. package/models/Shiroko_Model/Shiroko/Shiroko_Core/72d86db84cfa9730b894c241fd24c0db.png +0 -0
  6. package/models/Shiroko_Model/Shiroko/Shiroko_Core/items_pinned_to_model.json +14 -0
  7. package/models/Shiroko_Model/Shiroko/Shiroko_Core//345/221/206/347/214/253.exp3.json +40 -0
  8. package/models/Shiroko_Model/Shiroko/Shiroko_Core//345/221/206/347/214/253/347/234/274/347/217/240/346/221/207/346/231/203.exp3.json +15 -0
  9. package/models/Shiroko_Model/Shiroko/Shiroko_Core//345/233/264/350/243/231.exp3.json +10 -0
  10. package/models/Shiroko_Model/Shiroko/Shiroko_Core//346/213/215/347/205/247.exp3.json +50 -0
  11. package/models/Shiroko_Model/Shiroko/Shiroko_Core//346/213/277/347/254/224.exp3.json +10 -0
  12. package/models/Shiroko_Model/Shiroko/Shiroko_Core//347/202/271/344/270/200/344/270/213.exp3.json +15 -0
  13. package/models/Shiroko_Model/Shiroko/Shiroko_Core//347/214/253/345/222/252/346/273/244/351/225/234.exp3.json +10 -0
  14. package/models/Shiroko_Model/Shiroko/Shiroko_Core//347/234/274/351/225/234.exp3.json +10 -0
  15. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_00.png +0 -0
  16. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_01.png +0 -0
  17. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_02.png +0 -0
  18. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.4096/texture_03.png +0 -0
  19. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.cdi3.json +1498 -0
  20. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.moc3 +0 -0
  21. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.model3.json +47 -0
  22. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.physics3.json +6658 -0
  23. package/models/Shiroko_Model/Shiroko/Shiroko_Core//351/235/242/351/245/2740.vtube.json +1299 -0
  24. package/models/Shiroko_Model/Shiroko//342/232/241/351/253/230/344/272/256/342/232/241/344/275/277/347/224/250/346/225/231/347/250/213/344/270/216/346/263/250/346/204/217/344/272/213/351/241/271.txt +23 -0
  25. package/package.json +40 -17
  26. package/src/AI_Brain/Gemini_API.js +147 -46
  27. package/src/AI_Brain/autonomous_brain.js +2 -1
  28. package/src/AI_Brain/memory_store.js +299 -3
  29. package/src/AI_Brain/proactive_engine.js +12 -2
  30. package/src/Automation_Layer/browser_automation.js +26 -24
  31. package/src/CLI/approval_handler.js +42 -0
  32. package/src/CLI/chat_router.js +18 -6
  33. package/src/CLI/chat_ui.js +583 -52
  34. package/src/CLI/cli_colors.js +32 -0
  35. package/src/CLI/cli_formatters.js +89 -0
  36. package/src/CLI/code_agent.js +369 -71
  37. package/src/CLI/image_input.js +90 -0
  38. package/src/CLI/intent_detectors.js +181 -0
  39. package/src/CLI/interactive_chat.js +479 -0
  40. package/src/CLI/list_features.js +3 -0
  41. package/src/CLI/onboarding.js +72 -15
  42. package/src/CLI/repo_summarizer.js +282 -0
  43. package/src/CLI/semantic_code_search.js +312 -0
  44. package/src/CLI/skill_manager.js +41 -0
  45. package/src/CLI/slash_command_handler.js +418 -0
  46. package/src/CLI/symbol_indexer.js +231 -0
  47. package/src/CLI/updater.js +6 -4
  48. package/src/Channels/discord_bridge.js +11 -13
  49. package/src/Channels/line_bridge.js +10 -10
  50. package/src/Channels/slack_bridge.js +7 -12
  51. package/src/Channels/telegram_bridge.js +6 -14
  52. package/src/Channels/whatsapp_bridge.js +11 -9
  53. package/src/System/action_executor.js +59 -10
  54. package/src/System/chat_history_manager.js +20 -12
  55. package/src/System/config_manager.js +31 -1
  56. package/src/System/granular_automation.js +122 -53
  57. package/src/System/optional_require.js +23 -0
  58. package/src/System/proactive_loop.js +19 -3
  59. package/src/System/safety_manager.js +108 -0
  60. package/src/System/sandbox_runner.js +182 -0
  61. package/src/System/system_automation.js +127 -81
  62. package/src/System/system_info.js +70 -0
  63. package/src/System/tool_registry.js +280 -0
  64. package/src/System/window_manager.js +4 -2
  65. package/src/UI/live2d_manager.js +566 -0
  66. package/src/UI/renderer.js +339 -21
  67. package/src/UI/settings.css +655 -420
  68. package/src/UI/settings.html +478 -432
  69. package/src/UI/settings.js +10 -8
  70. package/src/UI/styles.css +516 -31
  71. package/.codex +0 -0
  72. package/docs/assets/Agent_Mint.png +0 -0
  73. package/docs/assets/CLI_Screen.png +0 -0
  74. package/docs/assets/Settings.png +0 -0
  75. package/docs/assets/icon.png +0 -0
  76. package/docs/guide.html +0 -632
  77. package/docs/index.html +0 -133
  78. package/docs/style.css +0 -579
  79. package/index.html +0 -16
  80. package/src/UI/index.html +0 -126
  81. package/tech_news.txt +0 -3
  82. package/test_knowledge.txt +0 -3
  83. package/tests/action_executor_safety.test.js +0 -67
  84. package/tests/agent_orchestrator.test.js +0 -41
  85. package/tests/chat_router.test.js +0 -42
  86. package/tests/code_agent.test.js +0 -69
  87. package/tests/config_manager.test.js +0 -141
  88. package/tests/docker.test.js +0 -46
  89. package/tests/file_operations.test.js +0 -57
  90. package/tests/gmail.test.js +0 -135
  91. package/tests/gmail_auth.test.js +0 -129
  92. package/tests/google_calendar.test.js +0 -113
  93. package/tests/google_tts_urls.test.js +0 -24
  94. package/tests/memory_store.test.js +0 -185
  95. package/tests/notion.test.js +0 -121
  96. package/tests/provider_routing.test.js +0 -83
  97. package/tests/safety_manager.test.js +0 -40
  98. package/tests/spotify.test.js +0 -201
  99. package/tests/system_monitor.test.js +0 -37
  100. package/tests/updater.test.js +0 -32
  101. package/tests/workspace_manager.test.js +0 -56
@@ -0,0 +1,23 @@
1
+ 免费模型、免费发布,禁止转手、倒卖、免费模型、免费发布,禁止转手、倒卖、免费模型、免费发布,禁止转手、倒卖、
2
+
3
+ 本模型开放直播收益权利 可以通过直播营业收礼物舰长盈利 可以二创 可以以次形象约稿作为粉丝回馈or舰长礼物
4
+
5
+
6
+ *注意:本形象已注册版权 如过渡期间使用本模型的在制作属于私人的新形象时不可参考本模型立绘的设计 否则将追究其责任*
7
+
8
+
9
+
10
+ 唯一发布渠道为哔哩哔哩账号:神宫凉子 发布者账号UID:13737731
11
+ 唯一发布渠道为哔哩哔哩账号:神宫凉子 发布者账号UID:13737731
12
+ 唯一发布渠道为哔哩哔哩账号:神宫凉子 发布者账号UID:13737731
13
+ 唯一发布渠道为哔哩哔哩账号:神宫凉子 发布者账号UID:13737731
14
+
15
+ 【简单使用流程】
16
+
17
+ 1、通过链接下载压缩包
18
+
19
+ 2、根据自己使用的软件(vts),选择相应模型文件,不同模型文件都经过简单配置,打开即用
20
+
21
+ 3、把模型导入面捕软件,开启面捕即可使用
22
+
23
+ 4、直播推流与面捕软件的具体使用方法,可在b站自行搜索
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pheem49/mint",
3
- "version": "1.5.0",
3
+ "version": "1.5.2",
4
4
  "description": "A powerful Electron-based AI desktop assistant powered by Google Gemini, featuring screen vision, web automation, and proactive suggestions.",
5
5
  "main": "main.js",
6
6
  "scripts": {
@@ -8,6 +8,9 @@
8
8
  "test": "jest --testPathPatterns=tests/",
9
9
  "test:watch": "jest --testPathPatterns=tests/ --watch",
10
10
  "build:linux": "electron-builder --linux",
11
+ "build:mac": "electron-builder --mac",
12
+ "build:win": "electron-builder --win",
13
+ "build:all": "electron-builder --linux --mac --win",
11
14
  "cli": "node mint-cli.js"
12
15
  },
13
16
  "jest": {
@@ -34,36 +37,37 @@
34
37
  "dependencies": {
35
38
  "@google/genai": "^1.44.0",
36
39
  "@inkjs/ui": "^2.0.0",
37
- "@line/bot-sdk": "^11.0.0",
38
- "@modelcontextprotocol/sdk": "^1.29.0",
39
- "@slack/bolt": "^4.7.2",
40
40
  "axios": "^1.13.6",
41
- "blessed": "^0.1.81",
42
41
  "cheerio": "^1.2.0",
43
42
  "commander": "^14.0.3",
44
- "discord.js": "^14.26.4",
45
43
  "dotenv": "^17.3.1",
46
- "express": "^5.2.1",
47
- "framer-motion": "^12.38.0",
48
44
  "ink": "^7.0.1",
49
45
  "ink-text-input": "^6.0.0",
50
- "inquirer": "^13.4.1",
51
- "lucide-react": "^1.9.0",
52
46
  "mammoth": "^1.12.0",
53
47
  "pdf-parse": "^2.4.5",
54
- "puppeteer": "^24.38.0",
55
- "qrcode-terminal": "^0.12.0",
56
- "react": "^19.2.5",
57
- "react-dom": "^19.2.5",
58
- "read-excel-file": "^9.0.9",
59
- "telegraf": "^4.16.3",
60
- "whatsapp-web.js": "^1.34.7"
48
+ "react": "^19.2.5"
49
+ },
50
+ "peerDependenciesOptional": {
51
+ "puppeteer": ">=22.0.0",
52
+ "whatsapp-web.js": ">=1.0.0",
53
+ "qrcode-terminal": ">=0.12.0",
54
+ "discord.js": ">=14.0.0",
55
+ "@slack/bolt": ">=4.0.0",
56
+ "telegraf": ">=4.0.0",
57
+ "@line/bot-sdk": ">=11.0.0",
58
+ "express": ">=4.0.0"
61
59
  },
62
60
  "devDependencies": {
61
+ "@hazart-pkg/live2d-core": "^1.0.1",
63
62
  "@vitejs/plugin-react": "^6.0.1",
64
63
  "electron": "^40.7.0",
65
64
  "electron-builder": "^26.8.1",
65
+ "framer-motion": "^12.38.0",
66
66
  "jest": "^30.4.0",
67
+ "lucide-react": "^1.9.0",
68
+ "pixi-live2d-display": "^0.4.0",
69
+ "pixi.js": "^6.5.10",
70
+ "react-dom": "^19.2.5",
67
71
  "vite": "^8.0.10"
68
72
  },
69
73
  "build": {
@@ -84,6 +88,25 @@
84
88
  ],
85
89
  "category": "Utility"
86
90
  },
91
+ "mac": {
92
+ "icon": "assets/icon.png",
93
+ "target": [
94
+ "dmg",
95
+ "zip"
96
+ ],
97
+ "category": "public.app-category.productivity"
98
+ },
99
+ "win": {
100
+ "icon": "assets/icon.png",
101
+ "target": [
102
+ "nsis",
103
+ "portable"
104
+ ]
105
+ },
106
+ "nsis": {
107
+ "oneClick": false,
108
+ "allowToChangeInstallationDirectory": true
109
+ },
87
110
  "deb": {
88
111
  "packageName": "mint-ai",
89
112
  "artifactName": "mint-ai_${version}_${arch}.${ext}"
@@ -6,6 +6,7 @@ const mcpManager = require('../Plugins/mcp_manager');
6
6
  const memoryStore = require('./memory_store');
7
7
  const agentOrchestrator = require('./agent_orchestrator');
8
8
  const workspaceManager = require('../CLI/workspace_manager');
9
+ const toolRegistry = require('../System/tool_registry');
9
10
 
10
11
  let ai = null;
11
12
  let activeApiKey = '';
@@ -25,6 +26,31 @@ function decodeUnicode(str) {
25
26
  }
26
27
  }
27
28
 
29
+ function imageDataUriToInlineData(base64Image) {
30
+ const fallbackMimeType = "image/png";
31
+ const match = String(base64Image || '').match(/^data:(image\/[\w.+-]+);base64,([\s\S]+)$/);
32
+ if (match) {
33
+ return {
34
+ mimeType: match[1],
35
+ data: match[2]
36
+ };
37
+ }
38
+
39
+ return {
40
+ mimeType: fallbackMimeType,
41
+ data: String(base64Image || '').replace(/^data:image\/\w+;base64,/, '')
42
+ };
43
+ }
44
+
45
+ function imageDataUriToBase64(base64Image) {
46
+ return imageDataUriToInlineData(base64Image).data;
47
+ }
48
+
49
+ function normalizeImageList(base64Image) {
50
+ if (!base64Image) return [];
51
+ return Array.isArray(base64Image) ? base64Image.filter(Boolean) : [base64Image];
52
+ }
53
+
28
54
  const systemInstruction = `You are "Mint" (มิ้นท์), a cute, cheerful, and highly helpful female Local AI Desktop Agent.
29
55
 
30
56
  PERSONALITY & TONE:
@@ -60,7 +86,7 @@ Always respond exactly with valid JSON containing NO MARKDOWN FORMATTING (do not
60
86
  {
61
87
  "response": "Your conversational reply here (Matches user language).",
62
88
  "action": {
63
- "type": "none" | "open_url" | "open_app" | "search" | "web_automation" | "create_folder" | "open_file" | "open_folder" | "find_path" | "delete_file" | "clipboard_write" | "system_info" | "plugin" | "learn_file" | "learn_folder" | "system_automation" | "mcp_tool" | "mouse_click" | "mouse_move" | "type_text" | "key_tap",
89
+ "type": ${toolRegistry.buildChatActionTypeUnion()},
64
90
 
65
91
  "pluginName": "only if type is plugin",
66
92
  "server": "only if type is mcp_tool (server name)",
@@ -103,6 +129,8 @@ NOTE: For date/time queries, ALWAYS use action type "system_info" with an EMPTY
103
129
 
104
130
  Input: "อากาศวันนี้เป็นยังไง" or "What's the weather in Bangkok?"
105
131
  Output: { "response": "มิ้นท์ไปดูอากาศให้เลยนะคะ", "action": { "type": "system_info", "target": "Bangkok" } }
132
+
133
+ ${toolRegistry.buildToolPromptSection()}
106
134
  `;
107
135
 
108
136
  // ─────────────────────────────────────────────────────────────────────────────
@@ -140,6 +168,61 @@ function buildSystemPrompt() {
140
168
  return systemInstruction + personaInstruction + workspaceSection + pluginManager.getPromptDescriptions() + mcpSection + userContext;
141
169
  }
142
170
 
171
+ function buildMessageWithRelevantMemory(finalMessage) {
172
+ if (!finalMessage) return finalMessage;
173
+ const relevant = memoryStore.searchInteractions(finalMessage, 5);
174
+ if (relevant.length === 0) return finalMessage;
175
+
176
+ const lines = [
177
+ '[Relevant long-term memory for this user message]',
178
+ ...relevant.flatMap((item, index) => [
179
+ `${index + 1}. User: ${item.user_text}`,
180
+ ` Mint: ${item.ai_text}`
181
+ ]),
182
+ '[End relevant memory]',
183
+ '',
184
+ finalMessage
185
+ ];
186
+ return lines.join('\n');
187
+ }
188
+
189
+ function stripRelevantMemoryBlock(text) {
190
+ const input = String(text || '');
191
+ return input
192
+ .replace(/\n?\[Relevant long-term memory for this user message\][\s\S]*?\[End relevant memory\]\n?/g, '\n')
193
+ .replace(/^\s*\[Relevant long-term memory for this user message\][\s\S]*?\[End relevant memory\]\s*/g, '')
194
+ .replace(/\n?\[LOCAL KNOWLEDGE BASE - USE THIS CONTEXT TO ANSWER\][\s\S]*/g, '')
195
+ .trim();
196
+ }
197
+
198
+ function cleanHistoryForStorage(history) {
199
+ if (!Array.isArray(history)) return [];
200
+ return history.map(msg => ({
201
+ ...msg,
202
+ parts: Array.isArray(msg.parts)
203
+ ? msg.parts.map(part => {
204
+ if (part.text) {
205
+ return { ...part, text: stripRelevantMemoryBlock(part.text) };
206
+ }
207
+ return part;
208
+ })
209
+ : msg.parts
210
+ }));
211
+ }
212
+
213
+ function validateParsedAction(parsedResult) {
214
+ if (!parsedResult || !parsedResult.action) {
215
+ return parsedResult;
216
+ }
217
+ try {
218
+ toolRegistry.validateToolInput(parsedResult.action.type || 'none', parsedResult.action);
219
+ } catch (error) {
220
+ parsedResult.response = `${parsedResult.response || ''}\n\n(Note: Mint skipped an invalid action: ${error.message})`.trim();
221
+ parsedResult.action = { type: 'none', target: '' };
222
+ }
223
+ return parsedResult;
224
+ }
225
+
143
226
  function resolveApiKey() {
144
227
  let settingsKey = '';
145
228
  try {
@@ -226,7 +309,7 @@ function attachProviderInfoToLatestHistory(providerInfo) {
226
309
  for (let i = history.length - 1; i >= 0; i -= 1) {
227
310
  if (history[i] && history[i].role === 'model') {
228
311
  history[i].providerInfo = providerInfo;
229
- writeChatHistory(history);
312
+ writeChatHistory(cleanHistoryForStorage(history));
230
313
  return;
231
314
  }
232
315
  }
@@ -240,12 +323,18 @@ let chat = null;
240
323
  let activeModel = resolveGeminiModel();
241
324
  let lastLoggedModel = '';
242
325
  const MAX_HISTORY_MESSAGES = 40; // Increased context for deeper reasoning
326
+ const MAX_STORED_HISTORY_MESSAGES = 200;
243
327
 
244
328
  function createChat(history = []) {
245
329
  // Truncate history and strip custom fields like 'timestamp' before passing to SDK
246
330
  const cleanedHistory = (history || []).map(msg => ({
247
331
  role: msg.role,
248
- parts: msg.parts
332
+ parts: msg.parts.map(part => {
333
+ if (part.text) {
334
+ return { ...part, text: stripRelevantMemoryBlock(part.text) };
335
+ }
336
+ return part;
337
+ })
249
338
  }));
250
339
  const truncatedHistory = cleanedHistory.slice(-MAX_HISTORY_MESSAGES);
251
340
 
@@ -357,8 +446,9 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
357
446
 
358
447
  async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
359
448
  try {
449
+ const images = normalizeImageList(base64Image);
360
450
  // 1. Check cache first for text-only messages
361
- if (finalMessage && !base64Image && !base64Audio) {
451
+ if (finalMessage && images.length === 0 && !base64Audio) {
362
452
  const cached = memoryStore.getCachedResponse(finalMessage);
363
453
  if (cached) return cached;
364
454
  }
@@ -371,18 +461,18 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
371
461
  let aiResponse;
372
462
  const parts = [];
373
463
  if (finalMessage) {
374
- parts.push({ text: finalMessage });
375
- } else if (base64Audio && !base64Image) {
464
+ parts.push({ text: buildMessageWithRelevantMemory(finalMessage) });
465
+ } else if (base64Audio && images.length === 0) {
376
466
  // Provide a guiding prompt when only audio is provided to ensure Gemini follows instructions
377
467
  parts.push({ text: "Please listen to this voice command and respond in Thai with the appropriate JSON action if needed." });
378
- } else if (!base64Image && !base64Audio) {
468
+ } else if (images.length === 0 && !base64Audio) {
379
469
  parts.push({ text: "Analyze this input." });
380
470
  }
381
471
 
382
- if (base64Image) {
383
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
472
+ for (const item of images) {
473
+ const image = imageDataUriToInlineData(item);
384
474
  parts.push({
385
- inlineData: { mimeType: "image/png", data: base64Data }
475
+ inlineData: image
386
476
  });
387
477
  }
388
478
 
@@ -417,7 +507,7 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
417
507
  if (!msg.timestamp) msg.timestamp = now;
418
508
  }
419
509
 
420
- writeChatHistory(history);
510
+ writeChatHistory(cleanHistoryForStorage(history));
421
511
 
422
512
  let outputText = '';
423
513
  try {
@@ -427,6 +517,8 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
427
517
  outputText = String(aiResponse || '');
428
518
  }
429
519
 
520
+ outputText = stripRelevantMemoryBlock(outputText);
521
+
430
522
  let parsedResult;
431
523
  try {
432
524
  parsedResult = JSON.parse(outputText);
@@ -447,9 +539,11 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
447
539
  // Decode any remaining unicode escapes in the response text
448
540
  if (parsedResult && typeof parsedResult.response === 'string') {
449
541
  parsedResult.response = decodeUnicode(parsedResult.response);
542
+ parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
450
543
  }
451
544
 
452
545
  // Attach timestamp to the result
546
+ validateParsedAction(parsedResult);
453
547
  parsedResult.timestamp = now;
454
548
 
455
549
  // Record interaction for long-term memory (non-blocking)
@@ -457,7 +551,7 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
457
551
  setImmediate(() => {
458
552
  memoryStore.recordInteraction(finalMessage, parsedResult.response);
459
553
  // Cache text-only responses
460
- if (!base64Image && !base64Audio) {
554
+ if (images.length === 0 && !base64Audio) {
461
555
  memoryStore.cacheResponse(finalMessage, parsedResult);
462
556
  }
463
557
  });
@@ -478,8 +572,9 @@ async function handleGeminiChat(finalMessage, base64Image, base64Audio) {
478
572
  // ─────────────────────────────────────────────────────────────────────────────
479
573
  async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
480
574
  try {
575
+ const images = normalizeImageList(base64Image);
481
576
  // 1. Check cache first
482
- if (finalMessage && !base64Image && !base64Audio) {
577
+ if (finalMessage && images.length === 0 && !base64Audio) {
483
578
  const cached = memoryStore.getCachedResponse(finalMessage);
484
579
  if (cached) {
485
580
  yield { chunk: `{"response":"${cached.response.replace(/"/g, '\\"')}", "action": {"type":"none"}}` };
@@ -495,15 +590,14 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
495
590
 
496
591
  const parts = [];
497
592
  if (finalMessage) {
498
- parts.push({ text: finalMessage });
499
- } else if (base64Audio && !base64Image) {
593
+ parts.push({ text: buildMessageWithRelevantMemory(finalMessage) });
594
+ } else if (base64Audio && images.length === 0) {
500
595
  parts.push({ text: "Please listen to this voice command and respond in Thai with the appropriate JSON action if needed." });
501
- } else if (!base64Image && !base64Audio) {
596
+ } else if (images.length === 0 && !base64Audio) {
502
597
  parts.push({ text: "Analyze this input." });
503
598
  }
504
- if (base64Image) {
505
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
506
- parts.push({ inlineData: { mimeType: "image/png", data: base64Data } });
599
+ for (const item of images) {
600
+ parts.push({ inlineData: imageDataUriToInlineData(item) });
507
601
  }
508
602
  if (base64Audio) {
509
603
  let mimeType = "audio/webm";
@@ -523,10 +617,12 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
523
617
  } catch (_) {}
524
618
  if (chunkText) {
525
619
  fullText += chunkText;
526
- yield { chunk: chunkText };
620
+ yield { chunk: stripRelevantMemoryBlock(chunkText) };
527
621
  }
528
622
  }
529
623
 
624
+ fullText = stripRelevantMemoryBlock(fullText);
625
+
530
626
  // Save history
531
627
  const history = await chat.getHistory();
532
628
  const now = new Date().toISOString();
@@ -536,7 +632,7 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
536
632
  if (!modelMsg.timestamp) modelMsg.timestamp = now;
537
633
  if (!userMsg.timestamp) userMsg.timestamp = now;
538
634
  }
539
- writeChatHistory(history);
635
+ writeChatHistory(cleanHistoryForStorage(history));
540
636
 
541
637
  // Parse complete JSON response
542
638
  let parsedResult;
@@ -552,7 +648,9 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
552
648
  }
553
649
  if (parsedResult && typeof parsedResult.response === 'string') {
554
650
  parsedResult.response = decodeUnicode(parsedResult.response);
651
+ parsedResult.response = stripRelevantMemoryBlock(parsedResult.response);
555
652
  }
653
+ validateParsedAction(parsedResult);
556
654
  parsedResult.timestamp = now;
557
655
 
558
656
  // Record for long-term memory
@@ -560,7 +658,7 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
560
658
  setImmediate(() => {
561
659
  memoryStore.recordInteraction(finalMessage, parsedResult.response);
562
660
  // Cache text-only responses
563
- if (!base64Image && !base64Audio) {
661
+ if (images.length === 0 && !base64Audio) {
564
662
  memoryStore.cacheResponse(finalMessage, parsedResult);
565
663
  }
566
664
  });
@@ -576,6 +674,7 @@ async function* handleGeminiChatStream(finalMessage, base64Image, base64Audio) {
576
674
 
577
675
  async function handleAnthropicChat(finalMessage, base64Image, config) {
578
676
  const history = readChatHistory() || [];
677
+ const images = normalizeImageList(base64Image);
579
678
  const apiKey = config.anthropicApiKey || process.env.ANTHROPIC_API_KEY;
580
679
  if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Anthropic API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
581
680
 
@@ -589,12 +688,11 @@ async function handleAnthropicChat(finalMessage, base64Image, config) {
589
688
  }
590
689
 
591
690
  const content = [];
592
- if (base64Image) {
593
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
594
- const mimeType = base64Image.match(/^data:(image\/\w+);base64,/)[1];
691
+ for (const item of images) {
692
+ const image = imageDataUriToInlineData(item);
595
693
  content.push({
596
694
  type: "image",
597
- source: { type: "base64", media_type: mimeType, data: base64Data }
695
+ source: { type: "base64", media_type: image.mimeType, data: image.data }
598
696
  });
599
697
  }
600
698
  content.push({ type: "text", text: finalMessage || "Analyze this." });
@@ -616,13 +714,14 @@ async function handleAnthropicChat(finalMessage, base64Image, config) {
616
714
  const outputText = response.data.content[0].text;
617
715
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
618
716
  history.push({ role: 'model', parts: [{ text: outputText }] });
619
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
717
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
620
718
 
621
719
  return parseAiResponse(outputText);
622
720
  }
623
721
 
624
722
  async function handleOpenAIChat(finalMessage, base64Image, config) {
625
723
  const history = readChatHistory() || [];
724
+ const images = normalizeImageList(base64Image);
626
725
  const apiKey = config.openaiApiKey || process.env.OPENAI_API_KEY;
627
726
  if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ OpenAI API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
628
727
 
@@ -636,10 +735,10 @@ async function handleOpenAIChat(finalMessage, base64Image, config) {
636
735
  }
637
736
 
638
737
  const content = [{ type: "text", text: finalMessage || "Analyze this." }];
639
- if (base64Image) {
738
+ for (const item of images) {
640
739
  content.push({
641
740
  type: "image_url",
642
- image_url: { url: base64Image }
741
+ image_url: { url: item }
643
742
  });
644
743
  }
645
744
  messages.push({ role: "user", content });
@@ -658,13 +757,14 @@ async function handleOpenAIChat(finalMessage, base64Image, config) {
658
757
  const outputText = response.data.choices[0].message.content;
659
758
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
660
759
  history.push({ role: 'model', parts: [{ text: outputText }] });
661
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
760
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
662
761
 
663
762
  return parseAiResponse(outputText);
664
763
  }
665
764
 
666
765
  async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
667
766
  const history = readChatHistory() || [];
767
+ const images = normalizeImageList(base64Image);
668
768
  const apiKey = 'lm-studio';
669
769
  const baseUrl = config.localApiBaseUrl || 'http://localhost:1234/v1';
670
770
 
@@ -678,10 +778,10 @@ async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
678
778
  }
679
779
 
680
780
  const content = [{ type: "text", text: finalMessage || "Analyze this." }];
681
- if (base64Image) {
781
+ for (const item of images) {
682
782
  content.push({
683
783
  type: "image_url",
684
- image_url: { url: base64Image }
784
+ image_url: { url: item }
685
785
  });
686
786
  }
687
787
  messages.push({ role: "user", content });
@@ -703,13 +803,14 @@ async function handleLocalOpenAIChat(finalMessage, base64Image, config) {
703
803
  const outputText = response.data.choices[0].message.content;
704
804
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
705
805
  history.push({ role: 'model', parts: [{ text: outputText }] });
706
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
806
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
707
807
 
708
808
  return parseAiResponse(outputText);
709
809
  }
710
810
 
711
811
  async function handleHuggingFaceChat(finalMessage, base64Image, config) {
712
812
  const history = readChatHistory() || [];
813
+ const images = normalizeImageList(base64Image);
713
814
  const apiKey = config.hfApiKey || process.env.HF_API_KEY;
714
815
  if (isPlaceholder(apiKey)) return { response: "กรุณาใส่ Hugging Face API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
715
816
 
@@ -726,10 +827,10 @@ async function handleHuggingFaceChat(finalMessage, base64Image, config) {
726
827
  }
727
828
 
728
829
  const content = [{ type: "text", text: finalMessage || "Analyze this." }];
729
- if (base64Image) {
830
+ for (const item of images) {
730
831
  content.push({
731
832
  type: "image_url",
732
- image_url: { url: base64Image }
833
+ image_url: { url: item }
733
834
  });
734
835
  }
735
836
  messages.push({ role: "user", content });
@@ -748,7 +849,7 @@ async function handleHuggingFaceChat(finalMessage, base64Image, config) {
748
849
  const outputText = response.data.choices[0].message.content;
749
850
  history.push({ role: 'user', parts: [{ text: finalMessage }] });
750
851
  history.push({ role: 'model', parts: [{ text: outputText }] });
751
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
852
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
752
853
 
753
854
  return parseAiResponse(outputText);
754
855
  }
@@ -768,12 +869,14 @@ function parseAiResponse(outputText) {
768
869
  if (parsedResult && typeof parsedResult.response === 'string') {
769
870
  parsedResult.response = decodeUnicode(parsedResult.response);
770
871
  }
872
+ validateParsedAction(parsedResult);
771
873
  parsedResult.timestamp = new Date().toISOString();
772
874
  return parsedResult;
773
875
  }
774
876
 
775
877
  async function handleOllamaChat(finalMessage, base64Image, base64Audio, config) {
776
878
  const history = readChatHistory() || [];
879
+ const imageInputs = normalizeImageList(base64Image);
777
880
 
778
881
  const ollamaMessages = [
779
882
  { role: 'system', content: buildSystemPrompt() }
@@ -790,11 +893,11 @@ async function handleOllamaChat(finalMessage, base64Image, base64Audio, config)
790
893
 
791
894
  let currentContent = finalMessage || 'Analyze this input.';
792
895
  let images = [];
793
- if (base64Image) {
794
- images.push(base64Image.replace(/^data:image\/\w+;base64,/, ''));
896
+ for (const item of imageInputs) {
897
+ images.push(imageDataUriToBase64(item));
795
898
  }
796
899
 
797
- if (base64Audio && !base64Image && !finalMessage) {
900
+ if (base64Audio && imageInputs.length === 0 && !finalMessage) {
798
901
  currentContent = "Please analyze this audio requirement based on text if any was transacted, otherwise reply with appropriate action.";
799
902
  }
800
903
 
@@ -815,7 +918,7 @@ async function handleOllamaChat(finalMessage, base64Image, base64Audio, config)
815
918
 
816
919
  history.push({ role: 'user', parts: [{ text: currentContent }] });
817
920
  history.push({ role: 'model', parts: [{ text: outputText }] });
818
- writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
921
+ writeChatHistory(cleanHistoryForStorage(history.slice(-MAX_STORED_HISTORY_MESSAGES)));
819
922
 
820
923
  let parsedResult;
821
924
  try {
@@ -828,6 +931,7 @@ async function handleOllamaChat(finalMessage, base64Image, base64Audio, config)
828
931
  parsedResult = { response: outputText, action: { type: "none", target: "" } };
829
932
  }
830
933
  }
934
+ validateParsedAction(parsedResult);
831
935
  return parsedResult;
832
936
  }
833
937
 
@@ -856,7 +960,7 @@ function historyToTranscript(history) {
856
960
  const sender = content.role === 'user' ? 'user' : 'ai';
857
961
  let text = Array.isArray(content.parts)
858
962
  ? content.parts
859
- .map((part) => typeof part.text === 'string' ? part.text : '')
963
+ .map((part) => typeof part.text === 'string' ? stripRelevantMemoryBlock(part.text) : '')
860
964
  .filter(Boolean)
861
965
  .join('\n')
862
966
  : '';
@@ -884,9 +988,6 @@ function historyToTranscript(history) {
884
988
  }
885
989
 
886
990
  async function getChatTranscript() {
887
- if (chat) {
888
- return historyToTranscript(await chat.getHistory(true));
889
- }
890
991
  return historyToTranscript(readChatHistory());
891
992
  }
892
993
 
@@ -908,7 +1009,7 @@ async function translateImageContent(base64Image) {
908
1009
  const retryDelayMs = [1000, 2500];
909
1010
 
910
1011
  try {
911
- const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
1012
+ const image = imageDataUriToInlineData(base64Image);
912
1013
  for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
913
1014
  try {
914
1015
  const response = await ai.models.generateContent({
@@ -918,7 +1019,7 @@ async function translateImageContent(base64Image) {
918
1019
  role: 'user',
919
1020
  parts: [
920
1021
  { text: "Extract any English text you see in this image and translate it to Thai. Return ONLY the Thai translation. If there is no text, return 'ไม่พบข้อความ'." },
921
- { inlineData: { mimeType: "image/png", data: base64Data } }
1022
+ { inlineData: image }
922
1023
  ]
923
1024
  }
924
1025
  ]
@@ -100,7 +100,7 @@ async function executeAutonomousTask(taskDescription, notifyCallback) {
100
100
  break;
101
101
  case 'write_file':
102
102
  const filePath = expandHome(actionObj.target);
103
- safetyManager.resolveWithinRoot(os.homedir(), filePath);
103
+ safetyManager.assertPathCapability(filePath, 'write');
104
104
  if (notifyCallback) notifyCallback(`✍️ กำลังบันทึกไฟล์: ${actionObj.target}`);
105
105
  try {
106
106
  safetyManager.appendActionLog({
@@ -119,6 +119,7 @@ async function executeAutonomousTask(taskDescription, notifyCallback) {
119
119
  case 'delete_file':
120
120
  const delPath = expandHome(actionObj.target);
121
121
  safetyManager.assertActionAllowed({ type: 'delete_file', target: delPath });
122
+ safetyManager.assertPathCapability(delPath, 'write');
122
123
  if (notifyCallback) notifyCallback(`🗑️ มิ้นท์ขอย้ายไฟล์ไปที่ถังขยะ: ${actionObj.target}`);
123
124
  const resDel = await deleteFile(delPath);
124
125
  observation = resDel.success ? "File moved to trash." : `Failed: ${resDel.message}`;