@pheem49/mint 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,11 +2,13 @@ const { GoogleGenAI } = require('@google/genai');
2
2
  const { readChatHistory, writeChatHistory, clearChatHistory } = require('../System/chat_history_manager');
3
3
  const { readConfig } = require('../System/config_manager');
4
4
  const pluginManager = require('../Plugins/plugin_manager');
5
+ const mcpManager = require('../Plugins/mcp_manager');
5
6
 
6
7
  let ai = null;
7
8
  let activeApiKey = '';
8
9
  const initialEnvKey = (process.env.GEMINI_API_KEY || '').trim();
9
- const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash'; // Optimized model
10
+ const axios = require('axios');
11
+ const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash';
10
12
 
11
13
  function decodeUnicode(str) {
12
14
  if (!str) return '';
@@ -53,12 +55,24 @@ Always respond exactly with valid JSON containing NO MARKDOWN FORMATTING (do not
53
55
  {
54
56
  "response": "Your conversational reply here (Matches user language).",
55
57
  "action": {
56
- "type": "none" | "open_url" | "open_app" | "search" | "web_automation" | "create_folder" | "open_file" | "delete_file" | "clipboard_write" | "system_info" | "plugin" | "learn_file" | "system_automation",
58
+ "type": "none" | "open_url" | "open_app" | "search" | "web_automation" | "create_folder" | "open_file" | "open_folder" | "delete_file" | "clipboard_write" | "system_info" | "plugin" | "learn_file" | "learn_folder" | "system_automation" | "mcp_tool" | "mouse_click" | "mouse_move" | "type_text" | "key_tap",
59
+
57
60
  "pluginName": "only if type is plugin",
58
- "target": "target string based on type or plugin instruction"
61
+ "server": "only if type is mcp_tool (server name)",
62
+ "target": "target string based on type (tool name if mcp_tool, text to type if type_text, key name if key_tap)",
63
+ "x": 0-1000, // required for mouse_click and mouse_move
64
+ "y": 0-1000, // required for mouse_click and mouse_move
65
+ "button": 1 | 2 | 3, // optional for mouse_click, 1=left, 2=middle, 3=right
66
+ "args": { "param": "value" } // only if type is mcp_tool
59
67
  }
60
68
  }
61
69
 
70
+ COORDINATE SYSTEM:
71
+ - When analyzing an image, use a coordinate system from 0 to 1000.
72
+ - (0, 0) is the Top-Left corner.
73
+ - (1000, 1000) is the Bottom-Right corner.
74
+ - To click an element, estimate its center point and provide x and y.
75
+
62
76
  Examples:
63
77
  Input: "Hi, what is your name?"
64
78
  Output: { "response": "Hello! My name is Mint, your personal AI assistant. How can I help you today?", "action": { "type": "none", "target": "" } }
@@ -122,7 +136,19 @@ const MAX_HISTORY_MESSAGES = 20; // Keep only the last 20 messages (approx 10 tu
122
136
  function createChat(history = []) {
123
137
  // Load plugins and get dynamic description for the prompt
124
138
  pluginManager.loadPlugins();
125
- const dynamicPrompt = systemInstruction + pluginManager.getPromptDescriptions();
139
+ // Inject MCP Tools
140
+ const mcpTools = mcpManager.getAllTools();
141
+ let mcpPrompt = "\n\nAVAILABLE MCP TOOLS (Model Context Protocol):\n";
142
+ if (mcpTools.length > 0) {
143
+ mcpTools.forEach(tool => {
144
+ mcpPrompt += `- Server: ${tool.serverName}, Tool: ${tool.name}\n Desc: ${tool.description}\n Args: ${JSON.stringify(tool.inputSchema.properties)}\n`;
145
+ });
146
+ mcpPrompt += "\nTo use these tools, use action type 'mcp_tool', specify the 'server' name, set 'target' to the tool name, and provide 'args'.\n";
147
+ } else {
148
+ mcpPrompt += "No MCP tools currently connected.\n";
149
+ }
150
+
151
+ const dynamicPrompt = systemInstruction + pluginManager.getPromptDescriptions() + mcpPrompt;
126
152
 
127
153
  // Truncate history and strip custom fields like 'timestamp' before passing to SDK
128
154
  const cleanedHistory = (history || []).map(msg => ({
@@ -151,7 +177,18 @@ resolveApiKey();
151
177
  initAiClient();
152
178
  createChat(readChatHistory());
153
179
 
154
- const { searchKnowledge } = require('./knowledge_base');
180
+ function shouldUseKnowledgeSearch(message) {
181
+ const text = (message || '').trim().toLowerCase();
182
+ if (!text) return false;
183
+
184
+ const knowledgeHints = [
185
+ 'readme', 'docs', 'documentation', 'manual', 'guide', 'knowledge', 'rag',
186
+ 'search local', 'search files', 'learn file', 'project files', 'source code',
187
+ 'ไฟล์', 'เอกสาร', 'คู่มือ', 'ค้นหาในเครื่อง', 'ค้นหาไฟล์', 'ข้อมูลในเครื่อง', 'โค้ดโปรเจค'
188
+ ];
189
+
190
+ return knowledgeHints.some(hint => text.includes(hint));
191
+ }
155
192
 
156
193
  async function handleChat(message, base64Image = null, base64Audio = null) {
157
194
  try {
@@ -175,7 +212,8 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
175
212
  let finalMessage = message;
176
213
 
177
214
  // Inject Local RAG Context
178
- if (message && message.trim().length > 0) {
215
+ if (message && message.trim().length > 0 && shouldUseKnowledgeSearch(message)) {
216
+ const { searchKnowledge } = require('./knowledge_base');
179
217
  const retrievedDocs = await searchKnowledge(message);
180
218
  if (retrievedDocs && retrievedDocs.length > 0) {
181
219
  let contextString = `\n\n[LOCAL KNOWLEDGE BASE - USE THIS CONTEXT TO ANSWER]\n`;
@@ -187,9 +225,17 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
187
225
  }
188
226
 
189
227
  if (provider === 'ollama') {
190
- const axios = require('axios');
191
- return await handleOllamaChat(finalMessage, base64Image, base64Audio, config, axios);
228
+ return await handleOllamaChat(finalMessage, base64Image, base64Audio, config);
229
+ }
230
+
231
+ if (provider === 'anthropic') {
232
+ return await handleAnthropicChat(finalMessage, base64Image, config);
192
233
  }
234
+
235
+ if (provider === 'openai') {
236
+ return await handleOpenAIChat(finalMessage, base64Image, config);
237
+ }
238
+
193
239
 
194
240
  const desiredModel = resolveGeminiModel();
195
241
  if (!chat || activeModel !== desiredModel) {
@@ -288,7 +334,127 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
288
334
  }
289
335
  }
290
336
 
291
- async function handleOllamaChat(finalMessage, base64Image, base64Audio, config, axios) {
337
+ async function handleAnthropicChat(finalMessage, base64Image, config) {
338
+ const history = readChatHistory() || [];
339
+ const apiKey = config.anthropicApiKey || process.env.ANTHROPIC_API_KEY;
340
+ if (!apiKey) return { response: "กรุณาใส่ Anthropic API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
341
+
342
+ const mcpTools = mcpManager.getAllTools();
343
+ let mcpPrompt = "\n\nAVAILABLE MCP TOOLS:\n";
344
+ mcpTools.forEach(tool => {
345
+ mcpPrompt += `- Server: ${tool.serverName}, Tool: ${tool.name}\n Desc: ${tool.description}\n Args: ${JSON.stringify(tool.inputSchema.properties)}\n`;
346
+ });
347
+
348
+ const systemPrompt = systemInstruction + pluginManager.getPromptDescriptions() + mcpPrompt;
349
+
350
+ const messages = [];
351
+ for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
352
+ const role = msg.role === 'model' ? 'assistant' : 'user';
353
+ let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
354
+ if (text) messages.push({ role, content: text });
355
+ }
356
+
357
+ const content = [];
358
+ if (base64Image) {
359
+ const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
360
+ const mimeType = base64Image.match(/^data:(image\/\w+);base64,/)[1];
361
+ content.push({
362
+ type: "image",
363
+ source: { type: "base64", media_type: mimeType, data: base64Data }
364
+ });
365
+ }
366
+ content.push({ type: "text", text: finalMessage || "Analyze this." });
367
+ messages.push({ role: "user", content });
368
+
369
+ const response = await axios.post('https://api.anthropic.com/v1/messages', {
370
+ model: config.anthropicModel || 'claude-3-5-sonnet-latest',
371
+ max_tokens: 4096,
372
+ system: systemPrompt,
373
+ messages: messages
374
+ }, {
375
+ headers: {
376
+ 'x-api-key': apiKey,
377
+ 'anthropic-version': '2023-06-01',
378
+ 'content-type': 'application/json'
379
+ }
380
+ });
381
+
382
+ const outputText = response.data.content[0].text;
383
+ history.push({ role: 'user', parts: [{ text: finalMessage }] });
384
+ history.push({ role: 'model', parts: [{ text: outputText }] });
385
+ writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
386
+
387
+ return parseAiResponse(outputText);
388
+ }
389
+
390
+ async function handleOpenAIChat(finalMessage, base64Image, config) {
391
+ const history = readChatHistory() || [];
392
+ const apiKey = config.openaiApiKey || process.env.OPENAI_API_KEY;
393
+ if (!apiKey) return { response: "กรุณาใส่ OpenAI API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
394
+
395
+ const mcpTools = mcpManager.getAllTools();
396
+ let mcpPrompt = "\n\nAVAILABLE MCP TOOLS:\n";
397
+ mcpTools.forEach(tool => {
398
+ mcpPrompt += `- Server: ${tool.serverName}, Tool: ${tool.name}\n Desc: ${tool.description}\n Args: ${JSON.stringify(tool.inputSchema.properties)}\n`;
399
+ });
400
+
401
+ const systemPrompt = systemInstruction + pluginManager.getPromptDescriptions() + mcpPrompt;
402
+
403
+ const messages = [{ role: "system", content: systemPrompt }];
404
+ for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
405
+ const role = msg.role === 'model' ? 'assistant' : 'user';
406
+ let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
407
+ if (text) messages.push({ role, content: text });
408
+ }
409
+
410
+ const content = [{ type: "text", text: finalMessage || "Analyze this." }];
411
+ if (base64Image) {
412
+ content.push({
413
+ type: "image_url",
414
+ image_url: { url: base64Image }
415
+ });
416
+ }
417
+ messages.push({ role: "user", content });
418
+
419
+ const response = await axios.post('https://api.openai.com/v1/chat/completions', {
420
+ model: config.openaiModel || 'gpt-4o',
421
+ messages: messages,
422
+ response_format: { type: "json_object" }
423
+ }, {
424
+ headers: {
425
+ 'Authorization': `Bearer ${apiKey}`,
426
+ 'Content-Type': 'application/json'
427
+ }
428
+ });
429
+
430
+ const outputText = response.data.choices[0].message.content;
431
+ history.push({ role: 'user', parts: [{ text: finalMessage }] });
432
+ history.push({ role: 'model', parts: [{ text: outputText }] });
433
+ writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
434
+
435
+ return parseAiResponse(outputText);
436
+ }
437
+
438
+ function parseAiResponse(outputText) {
439
+ let parsedResult;
440
+ try {
441
+ parsedResult = JSON.parse(outputText);
442
+ } catch (e) {
443
+ const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
444
+ if (jsonMatch) {
445
+ parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
446
+ } else {
447
+ parsedResult = { response: outputText, action: { type: "none", target: "" } };
448
+ }
449
+ }
450
+ if (parsedResult && typeof parsedResult.response === 'string') {
451
+ parsedResult.response = decodeUnicode(parsedResult.response);
452
+ }
453
+ parsedResult.timestamp = new Date().toISOString();
454
+ return parsedResult;
455
+ }
456
+
457
+ async function handleOllamaChat(finalMessage, base64Image, base64Audio, config) {
292
458
  const history = readChatHistory() || [];
293
459
  pluginManager.loadPlugins();
294
460
 
@@ -1,15 +1,14 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
3
  const os = require('os');
4
+ const crypto = require('crypto');
4
5
  const { GoogleGenAI } = require('@google/genai');
5
6
  const pdf = require('pdf-parse');
6
7
  const mammoth = require('mammoth');
7
8
  const xlsx = require('xlsx');
8
- const axios = require('axios');
9
- const cheerio = require('cheerio');
10
9
  const { readConfig } = require('../System/config_manager');
11
10
 
12
- // Handle electron dependency safely for benchmarks/tests
11
+ // Handle electron dependency safely
13
12
  let app;
14
13
  try {
15
14
  const electron = require('electron');
@@ -20,7 +19,7 @@ try {
20
19
 
21
20
  let ai = null;
22
21
  let activeApiKey = '';
23
- const initialEnvKey = (process.env.GEMINI_API_KEY || '').trim();
22
+ let DatabaseSync = null;
24
23
 
25
24
  function resolveApiKey() {
26
25
  let settingsKey = '';
@@ -30,53 +29,63 @@ function resolveApiKey() {
30
29
  } catch (e) {
31
30
  settingsKey = '';
32
31
  }
33
-
34
- const envKey = initialEnvKey;
35
- const selectedKey = settingsKey || envKey || '';
36
-
37
- if (selectedKey !== (process.env.GEMINI_API_KEY || '')) {
38
- process.env.GEMINI_API_KEY = selectedKey;
39
- }
40
-
32
+ const selectedKey = settingsKey || process.env.GEMINI_API_KEY || '';
41
33
  activeApiKey = selectedKey;
42
34
  return selectedKey;
43
35
  }
44
36
 
45
37
  function getAiClient() {
46
- const prevKey = activeApiKey;
47
- const nextKey = resolveApiKey();
48
- if (!ai || nextKey !== prevKey) {
49
- ai = new GoogleGenAI({ apiKey: nextKey });
38
+ const key = resolveApiKey();
39
+ if (!ai || activeApiKey !== key) {
40
+ ai = new GoogleGenAI({ apiKey: key });
50
41
  }
51
42
  return ai;
52
43
  }
53
44
 
54
45
  function getDbPath() {
46
+ const fileName = 'mint-knowledge.sqlite';
55
47
  if (app && app.getPath) {
56
- return path.join(app.getPath('userData'), 'mint-knowledge.json');
48
+ return path.join(app.getPath('userData'), fileName);
57
49
  }
58
- // Use global .mint directory for CLI/Benchmarking
59
50
  const mintDir = path.join(os.homedir(), '.mint');
60
- if (!fs.existsSync(mintDir)) {
61
- fs.mkdirSync(mintDir, { recursive: true });
62
- }
63
- return path.join(mintDir, 'mint-knowledge.json');
51
+ if (!fs.existsSync(mintDir)) fs.mkdirSync(mintDir, { recursive: true });
52
+ return path.join(mintDir, fileName);
64
53
  }
65
54
 
66
- function loadDb() {
67
- try {
68
- const p = getDbPath();
69
- if (fs.existsSync(p)) {
70
- return JSON.parse(fs.readFileSync(p, 'utf8'));
71
- }
72
- } catch (err) {
73
- console.error('[KnowledgeBase] Load Error:', err);
55
+ function getDatabaseSync() {
56
+ if (!DatabaseSync) {
57
+ ({ DatabaseSync } = require('node:sqlite'));
74
58
  }
75
- return { documents: [] };
59
+ return DatabaseSync;
76
60
  }
77
61
 
78
- function saveDb(db) {
79
- fs.writeFileSync(getDbPath(), JSON.stringify(db, null, 2));
62
+ // Initialize Database
63
+ let dbInstance = null;
64
+ function getDb() {
65
+ if (dbInstance) return dbInstance;
66
+ const dbPath = getDbPath();
67
+ const Database = getDatabaseSync();
68
+ dbInstance = new Database(dbPath);
69
+
70
+ // Create Tables
71
+ dbInstance.exec(`
72
+ CREATE TABLE IF NOT EXISTS sources (
73
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
74
+ path TEXT UNIQUE,
75
+ name TEXT,
76
+ hash TEXT,
77
+ last_indexed DATETIME DEFAULT CURRENT_TIMESTAMP
78
+ );
79
+ CREATE TABLE IF NOT EXISTS chunks (
80
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
81
+ source_id INTEGER,
82
+ text TEXT,
83
+ embedding BLOB,
84
+ FOREIGN KEY(source_id) REFERENCES sources(id) ON DELETE CASCADE
85
+ );
86
+ CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_id);
87
+ `);
88
+ return dbInstance;
80
89
  }
81
90
 
82
91
  async function generateEmbedding(text) {
@@ -85,138 +94,203 @@ async function generateEmbedding(text) {
85
94
  model: 'gemini-embedding-001',
86
95
  contents: text,
87
96
  });
88
- // The google/genai package returns an array of embeddings
89
97
  return response.embeddings[0].values;
90
98
  }
91
99
 
100
+
92
101
  function cosineSimilarity(vecA, vecB) {
93
- let dotProduct = 0.0;
94
- let normA = 0.0;
95
- let normB = 0.0;
102
+ let dotProduct = 0, normA = 0, normB = 0;
96
103
  for (let i = 0; i < vecA.length; i++) {
97
104
  dotProduct += vecA[i] * vecB[i];
98
105
  normA += vecA[i] * vecA[i];
99
106
  normB += vecB[i] * vecB[i];
100
107
  }
101
- if (normA === 0 || normB === 0) return 0;
102
108
  return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
103
109
  }
104
110
 
111
+ function getFileHash(filePath) {
112
+ const content = fs.readFileSync(filePath);
113
+ return crypto.createHash('md5').update(content).digest('hex');
114
+ }
115
+
105
116
  function chunkText(text, maxChars = 1000, overlap = 200) {
106
117
  const chunks = [];
107
118
  let current = 0;
108
- const step = maxChars - overlap;
109
119
  while (current < text.length) {
110
120
  chunks.push(text.slice(current, current + maxChars));
111
- current += step;
121
+ current += (maxChars - overlap);
122
+ if (current >= text.length) break;
112
123
  }
113
124
  return chunks;
114
125
  }
115
126
 
116
- /**
117
- * Reads a local file or URL, chunks its text, generates embeddings, and saves to knowledge base.
118
- */
119
- async function indexFile(resourcePath) {
127
+ async function indexFile(filePath) {
120
128
  try {
121
- if (!resourcePath || resourcePath.trim() === '') return "ไม่พบข้อมูล กรุณาระบุ Path หรือ URL ค่ะ";
122
-
129
+ if (!fs.existsSync(filePath)) return `ไม่พบไฟล์: ${filePath}`;
130
+ const stats = fs.statSync(filePath);
131
+ if (stats.isDirectory()) return await indexFolder(filePath);
132
+ if (stats.size > 10 * 1024 * 1024) return `ไฟล์ใหญ่เกินไป (> 10MB): ${filePath}`;
133
+
134
+ const hash = getFileHash(filePath);
135
+ const db = getDb();
136
+
137
+ // Check if already indexed and unchanged
138
+ const checkStmt = db.prepare("SELECT id, hash FROM sources WHERE path = ?");
139
+ const existing = checkStmt.get(filePath);
140
+
141
+ if (existing && existing.hash === hash) {
142
+ return `⏩ ${path.basename(filePath)} ไม่มีการเปลี่ยนแปลง (ข้ามการอ่าน)`;
143
+ }
144
+
145
+ console.log(`[RAG] Indexing ${filePath}...`);
123
146
  let content = '';
124
- let sourceName = '';
125
- let resourceId = '';
126
-
127
- // Handle Web URLs
128
- if (resourcePath.startsWith('http://') || resourcePath.startsWith('https://')) {
129
- sourceName = resourcePath;
130
- resourceId = resourcePath;
131
- try {
132
- const response = await axios.get(resourcePath);
133
- const $ = cheerio.load(response.data);
134
- $('script, style, noscript, nav, footer, header').remove();
135
- content = $('body').text().replace(/\s+/g, ' ').trim();
136
- } catch (e) {
137
- return `ไม่สามารถดึงข้อมูลจากเว็บไซต์ได้ค่ะ: ${e.message}`;
147
+ const ext = path.extname(filePath).toLowerCase();
148
+
149
+ // Extraction logic
150
+ if (ext === '.pdf') {
151
+ const data = await pdf(fs.readFileSync(filePath));
152
+ content = data.text;
153
+ } else if (ext === '.docx') {
154
+ const res = await mammoth.extractRawText({ path: filePath });
155
+ content = res.value;
156
+ } else if (ext === '.xlsx') {
157
+ const wb = xlsx.readFile(filePath);
158
+ content = wb.SheetNames.map(n => xlsx.utils.sheet_to_csv(wb.Sheets[n])).join('\n');
159
+ } else {
160
+ content = fs.readFileSync(filePath, 'utf8');
161
+ }
162
+
163
+ if (!content.trim()) return `⚠️ ไฟล์ไม่มีข้อความ: ${filePath}`;
164
+
165
+ // Begin transaction
166
+ db.exec("BEGIN TRANSACTION");
167
+ try {
168
+ if (existing) {
169
+ db.prepare("DELETE FROM chunks WHERE source_id = ?").run(existing.id);
170
+ db.prepare("UPDATE sources SET hash = ?, last_indexed = CURRENT_TIMESTAMP WHERE id = ?").run(hash, existing.id);
171
+ } else {
172
+ db.prepare("INSERT INTO sources (path, name, hash) VALUES (?, ?, ?)").run(filePath, path.basename(filePath), hash);
138
173
  }
139
- }
140
- // Handle Local Files
141
- else {
142
- const filePath = resourcePath;
143
- if (!fs.existsSync(filePath)) return `ไม่พบไฟล์: ${filePath}`;
144
174
 
145
- const stats = fs.statSync(filePath);
146
- if (stats.size > 5 * 1024 * 1024) return `ขนาดไฟล์ใหญ่เกินไป (> 5MB): ${filePath}`;
175
+ const sourceId = existing ? existing.id : db.prepare("SELECT last_insert_rowid() as id").get().id;
176
+ const chunks = chunkText(content);
147
177
 
148
- sourceName = path.basename(filePath);
149
- resourceId = filePath;
150
- const ext = path.extname(filePath).toLowerCase();
151
-
152
- if (ext === '.pdf') {
153
- const dataBuffer = fs.readFileSync(filePath);
154
- const data = await pdf(dataBuffer);
155
- content = data.text;
156
- } else if (ext === '.docx') {
157
- const result = await mammoth.extractRawText({path: filePath});
158
- content = result.value;
159
- } else if (ext === '.xlsx') {
160
- const workbook = xlsx.readFile(filePath);
161
- content = '';
162
- for (const sheetName of workbook.SheetNames) {
163
- const sheet = workbook.Sheets[sheetName];
164
- const csv = xlsx.utils.sheet_to_csv(sheet);
165
- content += `\n--- Sheet: ${sheetName} ---\n` + csv;
166
- }
167
- } else {
168
- content = fs.readFileSync(filePath, 'utf8');
178
+ const insertChunk = db.prepare("INSERT INTO chunks (source_id, text, embedding) VALUES (?, ?, ?)");
179
+ for (const chunk of chunks) {
180
+ const embedding = await generateEmbedding(chunk);
181
+ const embeddingBlob = Buffer.from(new Float32Array(embedding).buffer);
182
+ insertChunk.run(sourceId, chunk, embeddingBlob);
169
183
  }
184
+ db.exec("COMMIT");
185
+ return `✅ Successfully indexed ${path.basename(filePath)} (${chunks.length} chunks)`;
186
+ } catch (e) {
187
+ db.exec("ROLLBACK");
188
+ throw e;
170
189
  }
171
-
172
- if (!content || content.trim().length === 0) return `ข้อมูลว่างเปล่าหรือไม่มีข้อความ: ${resourcePath}`;
173
-
174
- const chunks = chunkText(content);
175
- const db = loadDb();
176
-
177
- for (let i = 0; i < chunks.length; i++) {
178
- const embedding = await generateEmbedding(chunks[i]);
179
- db.documents.push({
180
- id: `${resourceId}#${i}-${Date.now()}`,
181
- source: sourceName,
182
- path: resourcePath,
183
- text: chunks[i],
184
- embedding
185
- });
186
- }
187
-
188
- saveDb(db);
189
- return `✅ เรียนรู้ข้อมูลจาก ${sourceName} เรียบร้อยแล้ว (แบ่งเป็น ${chunks.length} ส่วน)`;
190
190
  } catch (err) {
191
- console.error('[KnowledgeBase] Indexing error:', err);
192
- return `❌ เกิดข้อผิดพลาดในการเรียนรู้ไฟล์: ${err.message}`;
191
+ console.error('[RAG] Error:', err);
192
+ return `❌ Failed to index: ${err.message}`;
193
193
  }
194
194
  }
195
195
 
196
196
  /**
197
- * Searches the local knowledge base for relevant chunks.
197
+ * Recursively gets all files in a directory asynchronously
198
198
  */
199
+ async function getAllFiles(dirPath, arrayOfFiles = []) {
200
+ const files = await fs.promises.readdir(dirPath, { withFileTypes: true });
201
+
202
+ for (const file of files) {
203
+ const fullPath = path.join(dirPath, file.name);
204
+ if (file.isDirectory()) {
205
+ await getAllFiles(fullPath, arrayOfFiles);
206
+ } else {
207
+ arrayOfFiles.push(fullPath);
208
+ }
209
+ }
210
+ return arrayOfFiles;
211
+ }
212
+
213
+ async function indexFolder(folderPath) {
214
+ console.log(`[RAG] Indexing folder: ${folderPath}`);
215
+ const files = await getAllFiles(folderPath);
216
+ console.log(`[RAG] Found ${files.length} files to check.`);
217
+
218
+ // Process in small batches to avoid blocking
219
+ const BATCH_SIZE = 5;
220
+ let indexedCount = 0;
221
+ let skippedCount = 0;
222
+
223
+ for (let i = 0; i < files.length; i += BATCH_SIZE) {
224
+ const batch = files.slice(i, i + BATCH_SIZE);
225
+ await Promise.all(batch.map(async (file) => {
226
+ const res = await indexFile(file);
227
+ if (res && res.startsWith('✅')) indexedCount++;
228
+ else skippedCount++;
229
+ }));
230
+ }
231
+
232
+ console.log(`[RAG] Indexing complete. ${indexedCount} new/updated, ${skippedCount} skipped.`);
233
+ return `📂 Folder indexing complete: ${indexedCount} learned, ${skippedCount} skipped.`;
234
+ }
235
+
199
236
  async function searchKnowledge(query, topK = 3) {
200
- const db = loadDb();
201
- if (!db.documents || db.documents.length === 0) return null;
237
+ const startTime = Date.now();
238
+ const db = getDb();
239
+ const MAX_CHUNKS_TO_SEARCH = 2000; // Limit search to keep it fast
202
240
 
241
+ const countRes = db.prepare("SELECT COUNT(*) as count FROM chunks").get();
242
+ if (!countRes || countRes.count === 0) return null;
243
+
203
244
  try {
204
245
  const queryVector = await generateEmbedding(query);
205
- const results = db.documents.map(doc => ({
206
- ...doc,
207
- score: cosineSimilarity(queryVector, doc.embedding)
208
- })).sort((a, b) => b.score - a.score);
209
-
210
- // Return top results above a threshold
211
- const top = results.slice(0, topK).filter(r => r.score > 0.65);
212
- if (top.length > 0) {
213
- console.log(`[KnowledgeBase] Found ${top.length} matches for query.`);
214
- return top;
246
+ const queryTyped = new Float32Array(queryVector);
247
+ const results = [];
248
+
249
+ // Search most recent or top chunks first, but limit the total scan
250
+ const stmt = db.prepare("SELECT text, embedding, source_id FROM chunks LIMIT ?");
251
+ let processed = 0;
252
+
253
+ for (const c of stmt.iterate(MAX_CHUNKS_TO_SEARCH)) {
254
+ if (!c.embedding) continue;
255
+ processed++;
256
+
257
+ const chunkVector = new Float32Array(c.embedding.buffer, c.embedding.byteOffset, c.embedding.byteLength / 4);
258
+
259
+ let dotProduct = 0, normA = 0, normB = 0;
260
+ for (let i = 0; i < queryTyped.length; i++) {
261
+ const a = queryTyped[i];
262
+ const b = chunkVector[i];
263
+ dotProduct += a * b;
264
+ normA += a * a;
265
+ normB += b * b;
266
+ }
267
+ const score = dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
268
+
269
+ if (score > 0.65) {
270
+ results.push({ text: c.text, score, source_id: c.source_id });
271
+ }
215
272
  }
216
- } catch(err) {
217
- console.error("[KnowledgeBase] Search error:", err);
273
+
274
+ if (results.length > 0) {
275
+ results.sort((a, b) => b.score - a.score);
276
+ const top = results.slice(0, topK);
277
+
278
+ const sourceIds = [...new Set(top.map(t => t.source_id))];
279
+ const sources = db.prepare(`SELECT id, name FROM sources WHERE id IN (${sourceIds.join(',')})`).all();
280
+ const sourceMap = Object.fromEntries(sources.map(s => [s.id, s.name]));
281
+
282
+ console.log(`[RAG] Search took ${Date.now() - startTime}ms for ${processed} chunks.`);
283
+ return top.map(t => ({
284
+ text: t.text,
285
+ source: sourceMap[t.source_id],
286
+ score: t.score
287
+ }));
288
+ }
289
+ } catch (e) {
290
+ console.error("[RAG] Search Error:", e);
218
291
  }
219
292
  return null;
220
293
  }
221
294
 
222
- module.exports = { indexFile, searchKnowledge };
295
+
296
+ module.exports = { indexFile, indexFolder, searchKnowledge };