pinpoint-bot 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "pinpoint-bot",
3
+ "version": "1.0.0",
4
+ "description": "WhatsApp bot for Pinpoint local file assistant",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "pinpoint-bot": "./bin/pinpoint-bot.js"
8
+ },
9
+ "files": [
10
+ "bin/",
11
+ "index.js",
12
+ "src/",
13
+ "test/"
14
+ ],
15
+ "scripts": {
16
+ "lint": "eslint . && prettier --check .",
17
+ "lint:fix": "eslint --fix . && prettier --write .",
18
+ "test": "node --test"
19
+ },
20
+ "keywords": ["whatsapp", "bot", "baileys", "pinpoint", "local-search", "assistant"],
21
+ "author": "Pinpoint contributors",
22
+ "license": "MIT",
23
+ "homepage": "https://github.com/vijishmadhavan/pinpoint",
24
+ "repository": {
25
+ "type": "git",
26
+ "url": "git+https://github.com/vijishmadhavan/pinpoint.git",
27
+ "directory": "bot"
28
+ },
29
+ "bugs": {
30
+ "url": "https://github.com/vijishmadhavan/pinpoint/issues"
31
+ },
32
+ "dependencies": {
33
+ "@google/genai": "^1.43.0",
34
+ "@whiskeysockets/baileys": "^7.0.0-rc.9",
35
+ "dotenv": "^17.3.1",
36
+ "qrcode-terminal": "^0.12.0"
37
+ },
38
+ "devDependencies": {
39
+ "@eslint/js": "^10.0.1",
40
+ "eslint": "^10.0.2",
41
+ "prettier": "^3.8.1"
42
+ }
43
+ }
package/src/llm.js ADDED
@@ -0,0 +1,254 @@
1
+ /**
2
+ * LLM adapter layer — Gemini (default) + Ollama (optional)
3
+ *
4
+ * Extracted from bot/index.js (Seg 22C).
5
+ * Provides unified llmGenerate() that routes to Gemini or Ollama,
6
+ * plus token tracking and cost summaries.
7
+ *
8
+ * Usage:
9
+ * const llm = require("./src/llm");
10
+ * llm.init({ ai, OLLAMA_MODEL, OLLAMA_URL, OLLAMA_THINK, USE_OLLAMA, GEMINI_MODEL, sessionCosts, TOKEN_COST_INPUT, TOKEN_COST_OUTPUT });
11
+ * const response = await llm.llmGenerate({ model, contents, config, tools });
12
+ */
13
+
14
+ // Module-level references set by init()
15
+ let _ai, _USE_OLLAMA, _OLLAMA_MODEL, _OLLAMA_URL, _OLLAMA_THINK, _GEMINI_MODEL;
16
+ let _sessionCosts, _TOKEN_COST_INPUT, _TOKEN_COST_OUTPUT;
17
+
18
+ function init(config) {
19
+ _ai = config.ai;
20
+ _USE_OLLAMA = config.USE_OLLAMA;
21
+ _OLLAMA_MODEL = config.OLLAMA_MODEL;
22
+ _OLLAMA_URL = config.OLLAMA_URL;
23
+ _OLLAMA_THINK = config.OLLAMA_THINK;
24
+ _GEMINI_MODEL = config.GEMINI_MODEL;
25
+ _sessionCosts = config.sessionCosts;
26
+ _TOKEN_COST_INPUT = config.TOKEN_COST_INPUT;
27
+ _TOKEN_COST_OUTPUT = config.TOKEN_COST_OUTPUT;
28
+ }
29
+
30
+ // --- Ollama adapter: translates Gemini format <-> Ollama format ---
31
+ // So the rest of the code stays identical regardless of which LLM is used.
32
+ function geminiToolsToOllama(geminiTools) {
33
+ // Gemini: [{ functionDeclarations: [{ name, description, parameters: { type: "OBJECT", properties, required } }] }]
34
+ // Ollama: [{ type: "function", function: { name, description, parameters: { type: "object", ... } } }]
35
+ if (!geminiTools?.[0]?.functionDeclarations) return [];
36
+ return geminiTools[0].functionDeclarations.map((fd) => ({
37
+ type: "function",
38
+ function: {
39
+ name: fd.name,
40
+ description: fd.description,
41
+ parameters: lowerTypes(fd.parameters),
42
+ },
43
+ }));
44
+ }
45
+
46
+ function lowerTypes(schema) {
47
+ if (!schema) return schema;
48
+ const out = { ...schema };
49
+ if (out.type) out.type = out.type.toLowerCase();
50
+ if (out.properties) {
51
+ out.properties = {};
52
+ for (const [k, v] of Object.entries(schema.properties)) {
53
+ out.properties[k] = lowerTypes(v);
54
+ }
55
+ }
56
+ if (out.items) out.items = lowerTypes(out.items);
57
+ return out;
58
+ }
59
+
60
+ function geminiContentsToOllama(contents, systemInstruction) {
61
+ // Convert Gemini contents array to Ollama messages array
62
+ const messages = [];
63
+ if (systemInstruction) messages.push({ role: "system", content: systemInstruction });
64
+
65
+ for (const entry of contents) {
66
+ if (!entry?.parts) continue;
67
+ const role = entry.role === "model" ? "assistant" : "user";
68
+
69
+ // Check for function calls (model response with tool calls)
70
+ const funcCalls = entry.parts.filter((p) => p.functionCall);
71
+ if (funcCalls.length > 0) {
72
+ // Text part if any
73
+ const textParts = entry.parts
74
+ .filter((p) => p.text)
75
+ .map((p) => p.text)
76
+ .join("\n");
77
+ messages.push({
78
+ role: "assistant",
79
+ content: textParts || "",
80
+ tool_calls: funcCalls.map((p) => ({
81
+ id: `call_${p.functionCall.name}_${Date.now()}`,
82
+ type: "function",
83
+ function: { name: p.functionCall.name, arguments: p.functionCall.args || {} },
84
+ })),
85
+ });
86
+ continue;
87
+ }
88
+
89
+ // Check for function responses (tool results)
90
+ const funcResponses = entry.parts.filter((p) => p.functionResponse);
91
+ if (funcResponses.length > 0) {
92
+ for (const p of funcResponses) {
93
+ messages.push({
94
+ role: "tool",
95
+ content: JSON.stringify(p.functionResponse.response?.result ?? ""),
96
+ });
97
+ }
98
+ // Also include any text nudges (round-based efficiency hints)
99
+ const textNudges = entry.parts.filter((p) => p.text);
100
+ for (const p of textNudges) {
101
+ messages.push({ role: "system", content: p.text });
102
+ }
103
+ continue;
104
+ }
105
+
106
+ // Regular text + images (Ollama uses "images" array with base64 data)
107
+ const text = entry.parts
108
+ .filter((p) => p.text)
109
+ .map((p) => p.text)
110
+ .join("\n");
111
+ const images = entry.parts.filter((p) => p.inlineData).map((p) => p.inlineData.data);
112
+ if (text || images.length > 0) {
113
+ const msg = { role, content: text || "" };
114
+ if (images.length > 0) msg.images = images;
115
+ messages.push(msg);
116
+ }
117
+ }
118
+ return messages;
119
+ }
120
+
121
+ async function ollamaGenerate(contents, config, toolsDefs) {
122
+ // Build Ollama chat request
123
+ const messages = geminiContentsToOllama(contents, config?.systemInstruction);
124
+ const body = {
125
+ model: _OLLAMA_MODEL,
126
+ messages,
127
+ stream: false,
128
+ think: _OLLAMA_THINK, // Optional thinking — smarter tool picks but slower (~30s vs ~0.8s)
129
+ };
130
+ if (toolsDefs) body.tools = geminiToolsToOllama(toolsDefs);
131
+
132
+ const resp = await fetch(`${_OLLAMA_URL}/api/chat`, {
133
+ method: "POST",
134
+ headers: { "Content-Type": "application/json" },
135
+ body: JSON.stringify(body),
136
+ });
137
+ if (!resp.ok) throw new Error(`Ollama error: ${resp.status} ${await resp.text()}`);
138
+ const data = await resp.json();
139
+ const msg = data.message || {};
140
+
141
+ // Translate Ollama response -> Gemini response shape
142
+ const functionCalls = (msg.tool_calls || []).map((tc) => ({
143
+ name: tc.function.name,
144
+ args: typeof tc.function.arguments === "string" ? JSON.parse(tc.function.arguments) : tc.function.arguments,
145
+ }));
146
+
147
+ // Separate thinking content from visible response (Qwen3 uses <think>...</think> tags)
148
+ let visibleText = msg.content || "";
149
+ let thinkingTokens = 0;
150
+ if (_OLLAMA_THINK && visibleText.includes("<think>")) {
151
+ const thinkMatch = visibleText.match(/<think>([\s\S]*?)<\/think>/);
152
+ if (thinkMatch) {
153
+ thinkingTokens = Math.ceil(thinkMatch[1].length / 4); // rough estimate
154
+ visibleText = visibleText.replace(/<think>[\s\S]*?<\/think>\s*/g, "").trim();
155
+ }
156
+ }
157
+
158
+ return {
159
+ text: visibleText,
160
+ functionCalls: functionCalls.length > 0 ? functionCalls : null,
161
+ candidates: [
162
+ {
163
+ content: {
164
+ role: "model",
165
+ parts: [
166
+ ...(visibleText ? [{ text: visibleText }] : []),
167
+ ...functionCalls.map((fc) => ({ functionCall: { name: fc.name, args: fc.args } })),
168
+ ],
169
+ },
170
+ finishReason: functionCalls.length > 0 ? "TOOL_CALLS" : "STOP",
171
+ },
172
+ ],
173
+ usageMetadata: {
174
+ promptTokenCount: data.prompt_eval_count || 0,
175
+ candidatesTokenCount: data.eval_count || 0,
176
+ thoughtsTokenCount: thinkingTokens,
177
+ },
178
+ };
179
+ }
180
+
181
+ // Unified LLM call — routes to Gemini or Ollama, with retry on transient errors
182
+ async function llmGenerate({ model, contents, config, tools: toolsDefs }) {
183
+ if (_USE_OLLAMA) {
184
+ return ollamaGenerate(contents, config, toolsDefs);
185
+ }
186
+ const maxRetries = 2;
187
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
188
+ try {
189
+ return await _ai.models.generateContent({ model, contents, config: { ...config, tools: toolsDefs } });
190
+ } catch (err) {
191
+ const msg = String(err.message || err);
192
+ const isTransient =
193
+ msg.includes("429") ||
194
+ msg.includes("503") ||
195
+ msg.includes("500") ||
196
+ msg.includes("RESOURCE_EXHAUSTED") ||
197
+ msg.includes("Internal error");
198
+ if (isTransient && attempt < maxRetries) {
199
+ const wait = 2 ** (attempt + 1) * 1000; // 2s, 4s
200
+ console.warn(`[Gemini] Transient error (${msg.slice(0, 60)}), retry in ${wait / 1000}s...`);
201
+ await new Promise((r) => setTimeout(r, wait));
202
+ continue;
203
+ }
204
+ throw err;
205
+ }
206
+ }
207
+ }
208
+
209
+ // --- Token tracking & cost ---
210
+
211
+ function trackTokens(chatJid, response) {
212
+ const usage = response.usageMetadata;
213
+ if (!usage) return;
214
+ if (!_sessionCosts[chatJid]) {
215
+ _sessionCosts[chatJid] = { input: 0, output: 0, thinking: 0, rounds: 0, started: Date.now() };
216
+ }
217
+ const s = _sessionCosts[chatJid];
218
+ s.input += usage.promptTokenCount || 0;
219
+ s.output += usage.candidatesTokenCount || 0;
220
+ s.thinking += usage.thoughtsTokenCount || 0;
221
+ s.rounds++;
222
+ return {
223
+ input: usage.promptTokenCount || 0,
224
+ output: usage.candidatesTokenCount || 0,
225
+ thinking: usage.thoughtsTokenCount || 0,
226
+ };
227
+ }
228
+
229
+ function formatTokens(n) {
230
+ if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + "M";
231
+ if (n >= 1_000) return (n / 1_000).toFixed(1) + "K";
232
+ return String(n);
233
+ }
234
+
235
+ function getCostSummary(chatJid) {
236
+ const s = _sessionCosts[chatJid];
237
+ if (!s || s.rounds === 0) return "No token usage in this session.";
238
+ const cost = s.input * _TOKEN_COST_INPUT + s.output * _TOKEN_COST_OUTPUT;
239
+ const elapsed = Math.round((Date.now() - s.started) / 60000);
240
+ const thinkStr = s.thinking ? `, thinking: ${formatTokens(s.thinking)}` : "";
241
+ return `*Session tokens:* ${formatTokens(s.input + s.output)} (input: ${formatTokens(s.input)}, output: ${formatTokens(s.output)}${thinkStr})\n*Rounds:* ${s.rounds}\n*Estimated cost:* $${cost.toFixed(4)}\n*Duration:* ${elapsed} min`;
242
+ }
243
+
244
+ module.exports = {
245
+ init,
246
+ geminiToolsToOllama,
247
+ lowerTypes,
248
+ geminiContentsToOllama,
249
+ ollamaGenerate,
250
+ llmGenerate,
251
+ trackTokens,
252
+ formatTokens,
253
+ getCostSummary,
254
+ };
package/src/skills.js ADDED
@@ -0,0 +1,163 @@
1
+ const { readFileSync, readdirSync, existsSync } = require("fs");
2
+ const pathModule = require("path");
3
+ const os = require("os");
4
+ const { INTENT_KEYWORDS, SKILL_CATEGORIES } = require("./tools");
5
+
6
+ const USER_DATA_DIR = process.env.PINPOINT_USER_DIR || pathModule.join(os.homedir(), ".pinpoint");
7
+
8
+ // --- System paths (WSL-aware) ---
9
+ const HOME_DIR = os.homedir();
10
+ let WIN_HOME = null;
11
+ const wslUserPath = `/mnt/c/Users/${pathModule.basename(HOME_DIR)}`;
12
+ if (existsSync(wslUserPath)) {
13
+ WIN_HOME = wslUserPath;
14
+ }
15
+ const USER_HOME = WIN_HOME || HOME_DIR;
16
+ const DOWNLOADS = pathModule.join(USER_HOME, "Downloads");
17
+ const DOCUMENTS = pathModule.join(USER_HOME, "Documents");
18
+ const DESKTOP = pathModule.join(USER_HOME, "Desktop");
19
+ const PICTURES = pathModule.join(USER_HOME, "Pictures");
20
+
21
+ // --- Load skills from skills/*.md at startup (hierarchical: general + task-specific) ---
22
+ const SKILLS_DIR = process.env.PINPOINT_SKILLS_DIR || pathModule.join(__dirname, "..", "..", "skills");
23
+
24
+ // General skills: always injected (core rules, batch awareness, common mistakes)
25
+ const GENERAL_SKILL_FILES = ["batch-awareness.md", "common-mistakes.md", "core-rules.md"];
26
+
27
+ const _skillCache = {}; // filename → content
28
+ function _loadSkill(filename) {
29
+ if (!_skillCache[filename]) {
30
+ try {
31
+ _skillCache[filename] = readFileSync(pathModule.join(SKILLS_DIR, filename), "utf-8");
32
+ } catch (e) {
33
+ _skillCache[filename] = "";
34
+ }
35
+ }
36
+ return _skillCache[filename];
37
+ }
38
+
39
+ // Preload all skills at startup
40
+ try {
41
+ const allFiles = readdirSync(SKILLS_DIR)
42
+ .filter((f) => f.endsWith(".md"))
43
+ .sort();
44
+ for (const file of allFiles) _loadSkill(file);
45
+ console.log(`[Pinpoint] Loaded ${allFiles.length} skills: ${allFiles.map((f) => f.replace(".md", "")).join(", ")}`);
46
+ } catch (err) {
47
+ console.log("[Pinpoint] No skills loaded:", err.message);
48
+ }
49
+
50
+ // Build general skills content (always included)
51
+ const generalSkillsContent = GENERAL_SKILL_FILES.map((f) => _loadSkill(f))
52
+ .filter(Boolean)
53
+ .join("\n\n");
54
+
55
+ // Detect user intent → return relevant skill categories
56
+ function detectIntentCategories(message) {
57
+ const cats = new Set();
58
+ for (const [cat, regex] of Object.entries(INTENT_KEYWORDS)) {
59
+ if (regex.test(message)) cats.add(cat);
60
+ }
61
+ // Always include search (core functionality)
62
+ if (cats.size === 0) cats.add("search");
63
+ return cats;
64
+ }
65
+
66
+ // Build task-specific skills for a message
67
+ function getTaskSkills(message) {
68
+ const cats = detectIntentCategories(message);
69
+ const files = new Set();
70
+ for (const cat of cats) {
71
+ for (const f of SKILL_CATEGORIES[cat] || []) files.add(f);
72
+ }
73
+ // Don't duplicate general skills
74
+ for (const f of GENERAL_SKILL_FILES) files.delete(f);
75
+ return [...files]
76
+ .map((f) => _loadSkill(f))
77
+ .filter(Boolean)
78
+ .join("\n\n");
79
+ }
80
+
81
+ const SYSTEM_PROMPT_BASE = `You are Pinpoint, a local file assistant with full power over the user's files.
82
+ You search, read, analyze, organize, and manage files on their computer.
83
+
84
+ ## How to Work
85
+ Do what has been asked; nothing more. Go straight to the point without going in circles.
86
+ 1. GATHER — call 1-2 tools to collect info. If results are sufficient, skip to step 3.
87
+ 2. ACT — if user wants something done (move, create, convert), do it in one call.
88
+ 3. ANSWER — respond concisely with what you have. Stop.
89
+
90
+ When user asks you to DO something (organize, move, sort, create, convert) — do it. Don't stop to ask permission.
91
+ Gather what you need, then act, then report. Complete the full task in one turn.
92
+
93
+ Rules:
94
+ - Never call the same tool with identical arguments twice.
95
+ - Prefer batch tools (folder param, batch_move) over loops.
96
+ - If user sends a file/image with NO instruction — ask what they want.
97
+ - If an image is already inline, you can SEE it — don't re-read it.
98
+
99
+ ## Honesty
100
+ - Report ONLY what tool results show. Quote exact numbers (moved_count, error_count, etc.).
101
+ - If batch_move returned moved_count: 0, tell the user "0 files were moved" — never claim files were moved.
102
+ - Check "Actions Taken This Session" before claiming you did something — it has the real outcomes.
103
+ - Never claim you performed an action unless the tool result confirms it.
104
+
105
+ ## Context Priority
106
+ When multiple sources of info conflict, trust in this order:
107
+ 1. Current user message (highest)
108
+ 2. Recent conversation turns
109
+ 3. Active tool results
110
+ 4. Persistent memories
111
+ 5. Older conversation history
112
+
113
+ ${generalSkillsContent}
114
+
115
+ ## System Paths
116
+ - Home: ${USER_HOME}
117
+ - Downloads: ${DOWNLOADS}
118
+ - Documents: ${DOCUMENTS}
119
+ - Desktop: ${DESKTOP}
120
+ - Pictures: ${PICTURES}
121
+ Use these full ABSOLUTE paths when the user says "Downloads" or "my Documents".
122
+
123
+ ## Result References
124
+ When a tool returns many items (files, images, faces), the result is stored server-side and you receive a reference like @ref:1 with a preview.
125
+ To use these results in another tool (batch_move, compress_files, etc.), pass the @ref:N as the value — it will be resolved automatically.
126
+ Example: list_files returns @ref:1 (500 files) → batch_move({ sources: "@ref:1", destination: "/path" }) moves all 500.
127
+ `;
128
+
129
+ const USER_TZ = process.env.TZ || Intl.DateTimeFormat().resolvedOptions().timeZone || "Asia/Kolkata";
130
+
131
+ function getSystemPrompt(userMessage = "", chatJid = "", { memoryEnabled, memoryContext, actionLedgerText } = {}) {
132
+ const tz = USER_TZ;
133
+ // Inject task-specific skills based on user message intent
134
+ const taskSkills = userMessage ? getTaskSkills(userMessage) : "";
135
+ let prompt = SYSTEM_PROMPT_BASE;
136
+ if (taskSkills) prompt += `\n${taskSkills}\n`;
137
+ prompt += `\nCurrent date and time: ${new Date().toLocaleString("en-IN", { timeZone: tz, day: "numeric", month: "long", year: "numeric", hour: "2-digit", minute: "2-digit", hour12: true, timeZoneName: "short" })}`;
138
+ if (memoryEnabled && memoryContext) {
139
+ prompt += `\n\n## Saved memories\n${memoryContext}`;
140
+ } else if (memoryEnabled) {
141
+ prompt += `\n\n## Saved memories\nNo memories saved yet.`;
142
+ } else if (!memoryEnabled) {
143
+ prompt += `\n\n## Memory\nMemory is currently OFF. If user asks to remember something, tell them to enable it with /memory on.`;
144
+ }
145
+ // Action ledger: inject real outcomes of every mutating action (OpenClaw pattern)
146
+ if (actionLedgerText) prompt += actionLedgerText;
147
+ return prompt;
148
+ }
149
+
150
+ module.exports = {
151
+ SKILLS_DIR,
152
+ USER_DATA_DIR,
153
+ GENERAL_SKILL_FILES,
154
+ detectIntentCategories,
155
+ getTaskSkills,
156
+ SYSTEM_PROMPT_BASE,
157
+ getSystemPrompt,
158
+ USER_HOME,
159
+ DOWNLOADS,
160
+ DOCUMENTS,
161
+ DESKTOP,
162
+ PICTURES,
163
+ };