@pheem49/mint 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/index.html +16 -0
- package/main.js +36 -83
- package/mint-cli-logic.js +19 -0
- package/mint-cli.js +117 -15
- package/package.json +8 -2
- package/src/AI_Brain/Gemini_API.js +175 -9
- package/src/AI_Brain/knowledge_base.js +199 -125
- package/src/Automation_Layer/file_operations.js +41 -19
- package/src/CLI/chat_router.js +166 -0
- package/src/CLI/chat_ui.js +239 -110
- package/src/CLI/code_agent.js +443 -0
- package/src/CLI/code_session_memory.js +62 -0
- package/src/CLI/list_features.js +1 -0
- package/src/Plugins/mcp_manager.js +95 -0
- package/src/Plugins/plugin_manager.js +2 -2
- package/src/System/config_manager.js +27 -7
- package/src/System/granular_automation.js +88 -0
- package/src/UI/settings.html +24 -0
- package/src/UI/settings.js +98 -1
|
@@ -2,11 +2,13 @@ const { GoogleGenAI } = require('@google/genai');
|
|
|
2
2
|
const { readChatHistory, writeChatHistory, clearChatHistory } = require('../System/chat_history_manager');
|
|
3
3
|
const { readConfig } = require('../System/config_manager');
|
|
4
4
|
const pluginManager = require('../Plugins/plugin_manager');
|
|
5
|
+
const mcpManager = require('../Plugins/mcp_manager');
|
|
5
6
|
|
|
6
7
|
let ai = null;
|
|
7
8
|
let activeApiKey = '';
|
|
8
9
|
const initialEnvKey = (process.env.GEMINI_API_KEY || '').trim();
|
|
9
|
-
const
|
|
10
|
+
const axios = require('axios');
|
|
11
|
+
const DEFAULT_GEMINI_MODEL = 'gemini-2.5-flash';
|
|
10
12
|
|
|
11
13
|
function decodeUnicode(str) {
|
|
12
14
|
if (!str) return '';
|
|
@@ -53,12 +55,24 @@ Always respond exactly with valid JSON containing NO MARKDOWN FORMATTING (do not
|
|
|
53
55
|
{
|
|
54
56
|
"response": "Your conversational reply here (Matches user language).",
|
|
55
57
|
"action": {
|
|
56
|
-
"type": "none" | "open_url" | "open_app" | "search" | "web_automation" | "create_folder" | "open_file" | "delete_file" | "clipboard_write" | "system_info" | "plugin" | "learn_file" | "system_automation",
|
|
58
|
+
"type": "none" | "open_url" | "open_app" | "search" | "web_automation" | "create_folder" | "open_file" | "open_folder" | "delete_file" | "clipboard_write" | "system_info" | "plugin" | "learn_file" | "learn_folder" | "system_automation" | "mcp_tool" | "mouse_click" | "mouse_move" | "type_text" | "key_tap",
|
|
59
|
+
|
|
57
60
|
"pluginName": "only if type is plugin",
|
|
58
|
-
"
|
|
61
|
+
"server": "only if type is mcp_tool (server name)",
|
|
62
|
+
"target": "target string based on type (tool name if mcp_tool, text to type if type_text, key name if key_tap)",
|
|
63
|
+
"x": 0-1000, // required for mouse_click and mouse_move
|
|
64
|
+
"y": 0-1000, // required for mouse_click and mouse_move
|
|
65
|
+
"button": 1 | 2 | 3, // optional for mouse_click, 1=left, 2=middle, 3=right
|
|
66
|
+
"args": { "param": "value" } // only if type is mcp_tool
|
|
59
67
|
}
|
|
60
68
|
}
|
|
61
69
|
|
|
70
|
+
COORDINATE SYSTEM:
|
|
71
|
+
- When analyzing an image, use a coordinate system from 0 to 1000.
|
|
72
|
+
- (0, 0) is the Top-Left corner.
|
|
73
|
+
- (1000, 1000) is the Bottom-Right corner.
|
|
74
|
+
- To click an element, estimate its center point and provide x and y.
|
|
75
|
+
|
|
62
76
|
Examples:
|
|
63
77
|
Input: "Hi, what is your name?"
|
|
64
78
|
Output: { "response": "Hello! My name is Mint, your personal AI assistant. How can I help you today?", "action": { "type": "none", "target": "" } }
|
|
@@ -122,7 +136,19 @@ const MAX_HISTORY_MESSAGES = 20; // Keep only the last 20 messages (approx 10 tu
|
|
|
122
136
|
function createChat(history = []) {
|
|
123
137
|
// Load plugins and get dynamic description for the prompt
|
|
124
138
|
pluginManager.loadPlugins();
|
|
125
|
-
|
|
139
|
+
// Inject MCP Tools
|
|
140
|
+
const mcpTools = mcpManager.getAllTools();
|
|
141
|
+
let mcpPrompt = "\n\nAVAILABLE MCP TOOLS (Model Context Protocol):\n";
|
|
142
|
+
if (mcpTools.length > 0) {
|
|
143
|
+
mcpTools.forEach(tool => {
|
|
144
|
+
mcpPrompt += `- Server: ${tool.serverName}, Tool: ${tool.name}\n Desc: ${tool.description}\n Args: ${JSON.stringify(tool.inputSchema.properties)}\n`;
|
|
145
|
+
});
|
|
146
|
+
mcpPrompt += "\nTo use these tools, use action type 'mcp_tool', specify the 'server' name, set 'target' to the tool name, and provide 'args'.\n";
|
|
147
|
+
} else {
|
|
148
|
+
mcpPrompt += "No MCP tools currently connected.\n";
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const dynamicPrompt = systemInstruction + pluginManager.getPromptDescriptions() + mcpPrompt;
|
|
126
152
|
|
|
127
153
|
// Truncate history and strip custom fields like 'timestamp' before passing to SDK
|
|
128
154
|
const cleanedHistory = (history || []).map(msg => ({
|
|
@@ -151,7 +177,18 @@ resolveApiKey();
|
|
|
151
177
|
initAiClient();
|
|
152
178
|
createChat(readChatHistory());
|
|
153
179
|
|
|
154
|
-
|
|
180
|
+
function shouldUseKnowledgeSearch(message) {
|
|
181
|
+
const text = (message || '').trim().toLowerCase();
|
|
182
|
+
if (!text) return false;
|
|
183
|
+
|
|
184
|
+
const knowledgeHints = [
|
|
185
|
+
'readme', 'docs', 'documentation', 'manual', 'guide', 'knowledge', 'rag',
|
|
186
|
+
'search local', 'search files', 'learn file', 'project files', 'source code',
|
|
187
|
+
'ไฟล์', 'เอกสาร', 'คู่มือ', 'ค้นหาในเครื่อง', 'ค้นหาไฟล์', 'ข้อมูลในเครื่อง', 'โค้ดโปรเจค'
|
|
188
|
+
];
|
|
189
|
+
|
|
190
|
+
return knowledgeHints.some(hint => text.includes(hint));
|
|
191
|
+
}
|
|
155
192
|
|
|
156
193
|
async function handleChat(message, base64Image = null, base64Audio = null) {
|
|
157
194
|
try {
|
|
@@ -175,7 +212,8 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
|
|
|
175
212
|
let finalMessage = message;
|
|
176
213
|
|
|
177
214
|
// Inject Local RAG Context
|
|
178
|
-
if (message && message.trim().length > 0) {
|
|
215
|
+
if (message && message.trim().length > 0 && shouldUseKnowledgeSearch(message)) {
|
|
216
|
+
const { searchKnowledge } = require('./knowledge_base');
|
|
179
217
|
const retrievedDocs = await searchKnowledge(message);
|
|
180
218
|
if (retrievedDocs && retrievedDocs.length > 0) {
|
|
181
219
|
let contextString = `\n\n[LOCAL KNOWLEDGE BASE - USE THIS CONTEXT TO ANSWER]\n`;
|
|
@@ -187,9 +225,17 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
|
|
|
187
225
|
}
|
|
188
226
|
|
|
189
227
|
if (provider === 'ollama') {
|
|
190
|
-
|
|
191
|
-
|
|
228
|
+
return await handleOllamaChat(finalMessage, base64Image, base64Audio, config);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (provider === 'anthropic') {
|
|
232
|
+
return await handleAnthropicChat(finalMessage, base64Image, config);
|
|
192
233
|
}
|
|
234
|
+
|
|
235
|
+
if (provider === 'openai') {
|
|
236
|
+
return await handleOpenAIChat(finalMessage, base64Image, config);
|
|
237
|
+
}
|
|
238
|
+
|
|
193
239
|
|
|
194
240
|
const desiredModel = resolveGeminiModel();
|
|
195
241
|
if (!chat || activeModel !== desiredModel) {
|
|
@@ -288,7 +334,127 @@ async function handleChat(message, base64Image = null, base64Audio = null) {
|
|
|
288
334
|
}
|
|
289
335
|
}
|
|
290
336
|
|
|
291
|
-
async function
|
|
337
|
+
async function handleAnthropicChat(finalMessage, base64Image, config) {
|
|
338
|
+
const history = readChatHistory() || [];
|
|
339
|
+
const apiKey = config.anthropicApiKey || process.env.ANTHROPIC_API_KEY;
|
|
340
|
+
if (!apiKey) return { response: "กรุณาใส่ Anthropic API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
|
|
341
|
+
|
|
342
|
+
const mcpTools = mcpManager.getAllTools();
|
|
343
|
+
let mcpPrompt = "\n\nAVAILABLE MCP TOOLS:\n";
|
|
344
|
+
mcpTools.forEach(tool => {
|
|
345
|
+
mcpPrompt += `- Server: ${tool.serverName}, Tool: ${tool.name}\n Desc: ${tool.description}\n Args: ${JSON.stringify(tool.inputSchema.properties)}\n`;
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
const systemPrompt = systemInstruction + pluginManager.getPromptDescriptions() + mcpPrompt;
|
|
349
|
+
|
|
350
|
+
const messages = [];
|
|
351
|
+
for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
|
|
352
|
+
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
353
|
+
let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
|
|
354
|
+
if (text) messages.push({ role, content: text });
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const content = [];
|
|
358
|
+
if (base64Image) {
|
|
359
|
+
const base64Data = base64Image.replace(/^data:image\/\w+;base64,/, '');
|
|
360
|
+
const mimeType = base64Image.match(/^data:(image\/\w+);base64,/)[1];
|
|
361
|
+
content.push({
|
|
362
|
+
type: "image",
|
|
363
|
+
source: { type: "base64", media_type: mimeType, data: base64Data }
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
content.push({ type: "text", text: finalMessage || "Analyze this." });
|
|
367
|
+
messages.push({ role: "user", content });
|
|
368
|
+
|
|
369
|
+
const response = await axios.post('https://api.anthropic.com/v1/messages', {
|
|
370
|
+
model: config.anthropicModel || 'claude-3-5-sonnet-latest',
|
|
371
|
+
max_tokens: 4096,
|
|
372
|
+
system: systemPrompt,
|
|
373
|
+
messages: messages
|
|
374
|
+
}, {
|
|
375
|
+
headers: {
|
|
376
|
+
'x-api-key': apiKey,
|
|
377
|
+
'anthropic-version': '2023-06-01',
|
|
378
|
+
'content-type': 'application/json'
|
|
379
|
+
}
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
const outputText = response.data.content[0].text;
|
|
383
|
+
history.push({ role: 'user', parts: [{ text: finalMessage }] });
|
|
384
|
+
history.push({ role: 'model', parts: [{ text: outputText }] });
|
|
385
|
+
writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
|
|
386
|
+
|
|
387
|
+
return parseAiResponse(outputText);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
async function handleOpenAIChat(finalMessage, base64Image, config) {
|
|
391
|
+
const history = readChatHistory() || [];
|
|
392
|
+
const apiKey = config.openaiApiKey || process.env.OPENAI_API_KEY;
|
|
393
|
+
if (!apiKey) return { response: "กรุณาใส่ OpenAI API Key ในการตั้งค่าก่อนนะคะ", action: { type: "none" } };
|
|
394
|
+
|
|
395
|
+
const mcpTools = mcpManager.getAllTools();
|
|
396
|
+
let mcpPrompt = "\n\nAVAILABLE MCP TOOLS:\n";
|
|
397
|
+
mcpTools.forEach(tool => {
|
|
398
|
+
mcpPrompt += `- Server: ${tool.serverName}, Tool: ${tool.name}\n Desc: ${tool.description}\n Args: ${JSON.stringify(tool.inputSchema.properties)}\n`;
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
const systemPrompt = systemInstruction + pluginManager.getPromptDescriptions() + mcpPrompt;
|
|
402
|
+
|
|
403
|
+
const messages = [{ role: "system", content: systemPrompt }];
|
|
404
|
+
for (const msg of history.slice(-MAX_HISTORY_MESSAGES)) {
|
|
405
|
+
const role = msg.role === 'model' ? 'assistant' : 'user';
|
|
406
|
+
let text = Array.isArray(msg.parts) ? msg.parts.map(p => p.text || '').join('\n') : '';
|
|
407
|
+
if (text) messages.push({ role, content: text });
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
const content = [{ type: "text", text: finalMessage || "Analyze this." }];
|
|
411
|
+
if (base64Image) {
|
|
412
|
+
content.push({
|
|
413
|
+
type: "image_url",
|
|
414
|
+
image_url: { url: base64Image }
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
messages.push({ role: "user", content });
|
|
418
|
+
|
|
419
|
+
const response = await axios.post('https://api.openai.com/v1/chat/completions', {
|
|
420
|
+
model: config.openaiModel || 'gpt-4o',
|
|
421
|
+
messages: messages,
|
|
422
|
+
response_format: { type: "json_object" }
|
|
423
|
+
}, {
|
|
424
|
+
headers: {
|
|
425
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
426
|
+
'Content-Type': 'application/json'
|
|
427
|
+
}
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
const outputText = response.data.choices[0].message.content;
|
|
431
|
+
history.push({ role: 'user', parts: [{ text: finalMessage }] });
|
|
432
|
+
history.push({ role: 'model', parts: [{ text: outputText }] });
|
|
433
|
+
writeChatHistory(history.slice(-MAX_HISTORY_MESSAGES));
|
|
434
|
+
|
|
435
|
+
return parseAiResponse(outputText);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
function parseAiResponse(outputText) {
|
|
439
|
+
let parsedResult;
|
|
440
|
+
try {
|
|
441
|
+
parsedResult = JSON.parse(outputText);
|
|
442
|
+
} catch (e) {
|
|
443
|
+
const jsonMatch = outputText.match(/```json\n([\s\S]*?)\n```/) || outputText.match(/\{[\s\S]*\}/);
|
|
444
|
+
if (jsonMatch) {
|
|
445
|
+
parsedResult = JSON.parse(jsonMatch[jsonMatch.length > 1 ? 1 : 0]);
|
|
446
|
+
} else {
|
|
447
|
+
parsedResult = { response: outputText, action: { type: "none", target: "" } };
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
if (parsedResult && typeof parsedResult.response === 'string') {
|
|
451
|
+
parsedResult.response = decodeUnicode(parsedResult.response);
|
|
452
|
+
}
|
|
453
|
+
parsedResult.timestamp = new Date().toISOString();
|
|
454
|
+
return parsedResult;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
async function handleOllamaChat(finalMessage, base64Image, base64Audio, config) {
|
|
292
458
|
const history = readChatHistory() || [];
|
|
293
459
|
pluginManager.loadPlugins();
|
|
294
460
|
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
3
|
const os = require('os');
|
|
4
|
+
const crypto = require('crypto');
|
|
4
5
|
const { GoogleGenAI } = require('@google/genai');
|
|
5
6
|
const pdf = require('pdf-parse');
|
|
6
7
|
const mammoth = require('mammoth');
|
|
7
8
|
const xlsx = require('xlsx');
|
|
8
|
-
const axios = require('axios');
|
|
9
|
-
const cheerio = require('cheerio');
|
|
10
9
|
const { readConfig } = require('../System/config_manager');
|
|
11
10
|
|
|
12
|
-
// Handle electron dependency safely
|
|
11
|
+
// Handle electron dependency safely
|
|
13
12
|
let app;
|
|
14
13
|
try {
|
|
15
14
|
const electron = require('electron');
|
|
@@ -20,7 +19,7 @@ try {
|
|
|
20
19
|
|
|
21
20
|
let ai = null;
|
|
22
21
|
let activeApiKey = '';
|
|
23
|
-
|
|
22
|
+
let DatabaseSync = null;
|
|
24
23
|
|
|
25
24
|
function resolveApiKey() {
|
|
26
25
|
let settingsKey = '';
|
|
@@ -30,53 +29,63 @@ function resolveApiKey() {
|
|
|
30
29
|
} catch (e) {
|
|
31
30
|
settingsKey = '';
|
|
32
31
|
}
|
|
33
|
-
|
|
34
|
-
const envKey = initialEnvKey;
|
|
35
|
-
const selectedKey = settingsKey || envKey || '';
|
|
36
|
-
|
|
37
|
-
if (selectedKey !== (process.env.GEMINI_API_KEY || '')) {
|
|
38
|
-
process.env.GEMINI_API_KEY = selectedKey;
|
|
39
|
-
}
|
|
40
|
-
|
|
32
|
+
const selectedKey = settingsKey || process.env.GEMINI_API_KEY || '';
|
|
41
33
|
activeApiKey = selectedKey;
|
|
42
34
|
return selectedKey;
|
|
43
35
|
}
|
|
44
36
|
|
|
45
37
|
function getAiClient() {
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
ai = new GoogleGenAI({ apiKey: nextKey });
|
|
38
|
+
const key = resolveApiKey();
|
|
39
|
+
if (!ai || activeApiKey !== key) {
|
|
40
|
+
ai = new GoogleGenAI({ apiKey: key });
|
|
50
41
|
}
|
|
51
42
|
return ai;
|
|
52
43
|
}
|
|
53
44
|
|
|
54
45
|
function getDbPath() {
|
|
46
|
+
const fileName = 'mint-knowledge.sqlite';
|
|
55
47
|
if (app && app.getPath) {
|
|
56
|
-
return path.join(app.getPath('userData'),
|
|
48
|
+
return path.join(app.getPath('userData'), fileName);
|
|
57
49
|
}
|
|
58
|
-
// Use global .mint directory for CLI/Benchmarking
|
|
59
50
|
const mintDir = path.join(os.homedir(), '.mint');
|
|
60
|
-
if (!fs.existsSync(mintDir)) {
|
|
61
|
-
|
|
62
|
-
}
|
|
63
|
-
return path.join(mintDir, 'mint-knowledge.json');
|
|
51
|
+
if (!fs.existsSync(mintDir)) fs.mkdirSync(mintDir, { recursive: true });
|
|
52
|
+
return path.join(mintDir, fileName);
|
|
64
53
|
}
|
|
65
54
|
|
|
66
|
-
function
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
if (fs.existsSync(p)) {
|
|
70
|
-
return JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
71
|
-
}
|
|
72
|
-
} catch (err) {
|
|
73
|
-
console.error('[KnowledgeBase] Load Error:', err);
|
|
55
|
+
function getDatabaseSync() {
|
|
56
|
+
if (!DatabaseSync) {
|
|
57
|
+
({ DatabaseSync } = require('node:sqlite'));
|
|
74
58
|
}
|
|
75
|
-
return
|
|
59
|
+
return DatabaseSync;
|
|
76
60
|
}
|
|
77
61
|
|
|
78
|
-
|
|
79
|
-
|
|
62
|
+
// Initialize Database
|
|
63
|
+
let dbInstance = null;
|
|
64
|
+
function getDb() {
|
|
65
|
+
if (dbInstance) return dbInstance;
|
|
66
|
+
const dbPath = getDbPath();
|
|
67
|
+
const Database = getDatabaseSync();
|
|
68
|
+
dbInstance = new Database(dbPath);
|
|
69
|
+
|
|
70
|
+
// Create Tables
|
|
71
|
+
dbInstance.exec(`
|
|
72
|
+
CREATE TABLE IF NOT EXISTS sources (
|
|
73
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
74
|
+
path TEXT UNIQUE,
|
|
75
|
+
name TEXT,
|
|
76
|
+
hash TEXT,
|
|
77
|
+
last_indexed DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
78
|
+
);
|
|
79
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
80
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
81
|
+
source_id INTEGER,
|
|
82
|
+
text TEXT,
|
|
83
|
+
embedding BLOB,
|
|
84
|
+
FOREIGN KEY(source_id) REFERENCES sources(id) ON DELETE CASCADE
|
|
85
|
+
);
|
|
86
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_id);
|
|
87
|
+
`);
|
|
88
|
+
return dbInstance;
|
|
80
89
|
}
|
|
81
90
|
|
|
82
91
|
async function generateEmbedding(text) {
|
|
@@ -85,138 +94,203 @@ async function generateEmbedding(text) {
|
|
|
85
94
|
model: 'gemini-embedding-001',
|
|
86
95
|
contents: text,
|
|
87
96
|
});
|
|
88
|
-
// The google/genai package returns an array of embeddings
|
|
89
97
|
return response.embeddings[0].values;
|
|
90
98
|
}
|
|
91
99
|
|
|
100
|
+
|
|
92
101
|
function cosineSimilarity(vecA, vecB) {
|
|
93
|
-
let dotProduct = 0
|
|
94
|
-
let normA = 0.0;
|
|
95
|
-
let normB = 0.0;
|
|
102
|
+
let dotProduct = 0, normA = 0, normB = 0;
|
|
96
103
|
for (let i = 0; i < vecA.length; i++) {
|
|
97
104
|
dotProduct += vecA[i] * vecB[i];
|
|
98
105
|
normA += vecA[i] * vecA[i];
|
|
99
106
|
normB += vecB[i] * vecB[i];
|
|
100
107
|
}
|
|
101
|
-
if (normA === 0 || normB === 0) return 0;
|
|
102
108
|
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
103
109
|
}
|
|
104
110
|
|
|
111
|
+
function getFileHash(filePath) {
|
|
112
|
+
const content = fs.readFileSync(filePath);
|
|
113
|
+
return crypto.createHash('md5').update(content).digest('hex');
|
|
114
|
+
}
|
|
115
|
+
|
|
105
116
|
function chunkText(text, maxChars = 1000, overlap = 200) {
|
|
106
117
|
const chunks = [];
|
|
107
118
|
let current = 0;
|
|
108
|
-
const step = maxChars - overlap;
|
|
109
119
|
while (current < text.length) {
|
|
110
120
|
chunks.push(text.slice(current, current + maxChars));
|
|
111
|
-
current +=
|
|
121
|
+
current += (maxChars - overlap);
|
|
122
|
+
if (current >= text.length) break;
|
|
112
123
|
}
|
|
113
124
|
return chunks;
|
|
114
125
|
}
|
|
115
126
|
|
|
116
|
-
|
|
117
|
-
* Reads a local file or URL, chunks its text, generates embeddings, and saves to knowledge base.
|
|
118
|
-
*/
|
|
119
|
-
async function indexFile(resourcePath) {
|
|
127
|
+
async function indexFile(filePath) {
|
|
120
128
|
try {
|
|
121
|
-
if (!
|
|
122
|
-
|
|
129
|
+
if (!fs.existsSync(filePath)) return `ไม่พบไฟล์: ${filePath}`;
|
|
130
|
+
const stats = fs.statSync(filePath);
|
|
131
|
+
if (stats.isDirectory()) return await indexFolder(filePath);
|
|
132
|
+
if (stats.size > 10 * 1024 * 1024) return `ไฟล์ใหญ่เกินไป (> 10MB): ${filePath}`;
|
|
133
|
+
|
|
134
|
+
const hash = getFileHash(filePath);
|
|
135
|
+
const db = getDb();
|
|
136
|
+
|
|
137
|
+
// Check if already indexed and unchanged
|
|
138
|
+
const checkStmt = db.prepare("SELECT id, hash FROM sources WHERE path = ?");
|
|
139
|
+
const existing = checkStmt.get(filePath);
|
|
140
|
+
|
|
141
|
+
if (existing && existing.hash === hash) {
|
|
142
|
+
return `⏩ ${path.basename(filePath)} ไม่มีการเปลี่ยนแปลง (ข้ามการอ่าน)`;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
console.log(`[RAG] Indexing ${filePath}...`);
|
|
123
146
|
let content = '';
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
147
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
148
|
+
|
|
149
|
+
// Extraction logic
|
|
150
|
+
if (ext === '.pdf') {
|
|
151
|
+
const data = await pdf(fs.readFileSync(filePath));
|
|
152
|
+
content = data.text;
|
|
153
|
+
} else if (ext === '.docx') {
|
|
154
|
+
const res = await mammoth.extractRawText({ path: filePath });
|
|
155
|
+
content = res.value;
|
|
156
|
+
} else if (ext === '.xlsx') {
|
|
157
|
+
const wb = xlsx.readFile(filePath);
|
|
158
|
+
content = wb.SheetNames.map(n => xlsx.utils.sheet_to_csv(wb.Sheets[n])).join('\n');
|
|
159
|
+
} else {
|
|
160
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (!content.trim()) return `⚠️ ไฟล์ไม่มีข้อความ: ${filePath}`;
|
|
164
|
+
|
|
165
|
+
// Begin transaction
|
|
166
|
+
db.exec("BEGIN TRANSACTION");
|
|
167
|
+
try {
|
|
168
|
+
if (existing) {
|
|
169
|
+
db.prepare("DELETE FROM chunks WHERE source_id = ?").run(existing.id);
|
|
170
|
+
db.prepare("UPDATE sources SET hash = ?, last_indexed = CURRENT_TIMESTAMP WHERE id = ?").run(hash, existing.id);
|
|
171
|
+
} else {
|
|
172
|
+
db.prepare("INSERT INTO sources (path, name, hash) VALUES (?, ?, ?)").run(filePath, path.basename(filePath), hash);
|
|
138
173
|
}
|
|
139
|
-
}
|
|
140
|
-
// Handle Local Files
|
|
141
|
-
else {
|
|
142
|
-
const filePath = resourcePath;
|
|
143
|
-
if (!fs.existsSync(filePath)) return `ไม่พบไฟล์: ${filePath}`;
|
|
144
174
|
|
|
145
|
-
const
|
|
146
|
-
|
|
175
|
+
const sourceId = existing ? existing.id : db.prepare("SELECT last_insert_rowid() as id").get().id;
|
|
176
|
+
const chunks = chunkText(content);
|
|
147
177
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
const dataBuffer = fs.readFileSync(filePath);
|
|
154
|
-
const data = await pdf(dataBuffer);
|
|
155
|
-
content = data.text;
|
|
156
|
-
} else if (ext === '.docx') {
|
|
157
|
-
const result = await mammoth.extractRawText({path: filePath});
|
|
158
|
-
content = result.value;
|
|
159
|
-
} else if (ext === '.xlsx') {
|
|
160
|
-
const workbook = xlsx.readFile(filePath);
|
|
161
|
-
content = '';
|
|
162
|
-
for (const sheetName of workbook.SheetNames) {
|
|
163
|
-
const sheet = workbook.Sheets[sheetName];
|
|
164
|
-
const csv = xlsx.utils.sheet_to_csv(sheet);
|
|
165
|
-
content += `\n--- Sheet: ${sheetName} ---\n` + csv;
|
|
166
|
-
}
|
|
167
|
-
} else {
|
|
168
|
-
content = fs.readFileSync(filePath, 'utf8');
|
|
178
|
+
const insertChunk = db.prepare("INSERT INTO chunks (source_id, text, embedding) VALUES (?, ?, ?)");
|
|
179
|
+
for (const chunk of chunks) {
|
|
180
|
+
const embedding = await generateEmbedding(chunk);
|
|
181
|
+
const embeddingBlob = Buffer.from(new Float32Array(embedding).buffer);
|
|
182
|
+
insertChunk.run(sourceId, chunk, embeddingBlob);
|
|
169
183
|
}
|
|
184
|
+
db.exec("COMMIT");
|
|
185
|
+
return `✅ Successfully indexed ${path.basename(filePath)} (${chunks.length} chunks)`;
|
|
186
|
+
} catch (e) {
|
|
187
|
+
db.exec("ROLLBACK");
|
|
188
|
+
throw e;
|
|
170
189
|
}
|
|
171
|
-
|
|
172
|
-
if (!content || content.trim().length === 0) return `ข้อมูลว่างเปล่าหรือไม่มีข้อความ: ${resourcePath}`;
|
|
173
|
-
|
|
174
|
-
const chunks = chunkText(content);
|
|
175
|
-
const db = loadDb();
|
|
176
|
-
|
|
177
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
178
|
-
const embedding = await generateEmbedding(chunks[i]);
|
|
179
|
-
db.documents.push({
|
|
180
|
-
id: `${resourceId}#${i}-${Date.now()}`,
|
|
181
|
-
source: sourceName,
|
|
182
|
-
path: resourcePath,
|
|
183
|
-
text: chunks[i],
|
|
184
|
-
embedding
|
|
185
|
-
});
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
saveDb(db);
|
|
189
|
-
return `✅ เรียนรู้ข้อมูลจาก ${sourceName} เรียบร้อยแล้ว (แบ่งเป็น ${chunks.length} ส่วน)`;
|
|
190
190
|
} catch (err) {
|
|
191
|
-
console.error('[
|
|
192
|
-
return `❌
|
|
191
|
+
console.error('[RAG] Error:', err);
|
|
192
|
+
return `❌ Failed to index: ${err.message}`;
|
|
193
193
|
}
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
/**
|
|
197
|
-
*
|
|
197
|
+
* Recursively gets all files in a directory asynchronously
|
|
198
198
|
*/
|
|
199
|
+
async function getAllFiles(dirPath, arrayOfFiles = []) {
|
|
200
|
+
const files = await fs.promises.readdir(dirPath, { withFileTypes: true });
|
|
201
|
+
|
|
202
|
+
for (const file of files) {
|
|
203
|
+
const fullPath = path.join(dirPath, file.name);
|
|
204
|
+
if (file.isDirectory()) {
|
|
205
|
+
await getAllFiles(fullPath, arrayOfFiles);
|
|
206
|
+
} else {
|
|
207
|
+
arrayOfFiles.push(fullPath);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
return arrayOfFiles;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
async function indexFolder(folderPath) {
|
|
214
|
+
console.log(`[RAG] Indexing folder: ${folderPath}`);
|
|
215
|
+
const files = await getAllFiles(folderPath);
|
|
216
|
+
console.log(`[RAG] Found ${files.length} files to check.`);
|
|
217
|
+
|
|
218
|
+
// Process in small batches to avoid blocking
|
|
219
|
+
const BATCH_SIZE = 5;
|
|
220
|
+
let indexedCount = 0;
|
|
221
|
+
let skippedCount = 0;
|
|
222
|
+
|
|
223
|
+
for (let i = 0; i < files.length; i += BATCH_SIZE) {
|
|
224
|
+
const batch = files.slice(i, i + BATCH_SIZE);
|
|
225
|
+
await Promise.all(batch.map(async (file) => {
|
|
226
|
+
const res = await indexFile(file);
|
|
227
|
+
if (res && res.startsWith('✅')) indexedCount++;
|
|
228
|
+
else skippedCount++;
|
|
229
|
+
}));
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
console.log(`[RAG] Indexing complete. ${indexedCount} new/updated, ${skippedCount} skipped.`);
|
|
233
|
+
return `📂 Folder indexing complete: ${indexedCount} learned, ${skippedCount} skipped.`;
|
|
234
|
+
}
|
|
235
|
+
|
|
199
236
|
async function searchKnowledge(query, topK = 3) {
|
|
200
|
-
const
|
|
201
|
-
|
|
237
|
+
const startTime = Date.now();
|
|
238
|
+
const db = getDb();
|
|
239
|
+
const MAX_CHUNKS_TO_SEARCH = 2000; // Limit search to keep it fast
|
|
202
240
|
|
|
241
|
+
const countRes = db.prepare("SELECT COUNT(*) as count FROM chunks").get();
|
|
242
|
+
if (!countRes || countRes.count === 0) return null;
|
|
243
|
+
|
|
203
244
|
try {
|
|
204
245
|
const queryVector = await generateEmbedding(query);
|
|
205
|
-
const
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
246
|
+
const queryTyped = new Float32Array(queryVector);
|
|
247
|
+
const results = [];
|
|
248
|
+
|
|
249
|
+
// Search most recent or top chunks first, but limit the total scan
|
|
250
|
+
const stmt = db.prepare("SELECT text, embedding, source_id FROM chunks LIMIT ?");
|
|
251
|
+
let processed = 0;
|
|
252
|
+
|
|
253
|
+
for (const c of stmt.iterate(MAX_CHUNKS_TO_SEARCH)) {
|
|
254
|
+
if (!c.embedding) continue;
|
|
255
|
+
processed++;
|
|
256
|
+
|
|
257
|
+
const chunkVector = new Float32Array(c.embedding.buffer, c.embedding.byteOffset, c.embedding.byteLength / 4);
|
|
258
|
+
|
|
259
|
+
let dotProduct = 0, normA = 0, normB = 0;
|
|
260
|
+
for (let i = 0; i < queryTyped.length; i++) {
|
|
261
|
+
const a = queryTyped[i];
|
|
262
|
+
const b = chunkVector[i];
|
|
263
|
+
dotProduct += a * b;
|
|
264
|
+
normA += a * a;
|
|
265
|
+
normB += b * b;
|
|
266
|
+
}
|
|
267
|
+
const score = dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
268
|
+
|
|
269
|
+
if (score > 0.65) {
|
|
270
|
+
results.push({ text: c.text, score, source_id: c.source_id });
|
|
271
|
+
}
|
|
215
272
|
}
|
|
216
|
-
|
|
217
|
-
|
|
273
|
+
|
|
274
|
+
if (results.length > 0) {
|
|
275
|
+
results.sort((a, b) => b.score - a.score);
|
|
276
|
+
const top = results.slice(0, topK);
|
|
277
|
+
|
|
278
|
+
const sourceIds = [...new Set(top.map(t => t.source_id))];
|
|
279
|
+
const sources = db.prepare(`SELECT id, name FROM sources WHERE id IN (${sourceIds.join(',')})`).all();
|
|
280
|
+
const sourceMap = Object.fromEntries(sources.map(s => [s.id, s.name]));
|
|
281
|
+
|
|
282
|
+
console.log(`[RAG] Search took ${Date.now() - startTime}ms for ${processed} chunks.`);
|
|
283
|
+
return top.map(t => ({
|
|
284
|
+
text: t.text,
|
|
285
|
+
source: sourceMap[t.source_id],
|
|
286
|
+
score: t.score
|
|
287
|
+
}));
|
|
288
|
+
}
|
|
289
|
+
} catch (e) {
|
|
290
|
+
console.error("[RAG] Search Error:", e);
|
|
218
291
|
}
|
|
219
292
|
return null;
|
|
220
293
|
}
|
|
221
294
|
|
|
222
|
-
|
|
295
|
+
|
|
296
|
+
module.exports = { indexFile, indexFolder, searchKnowledge };
|