npm - pinpoint-bot - Versions diffs - 1.0.0 - Mend

pinpoint-bot 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/bin/pinpoint-bot.js +9 -0
package/index.js +2592 -0
package/package.json +43 -0
package/src/llm.js +254 -0
package/src/skills.js +163 -0
package/src/tools.js +2013 -0
package/test/skills-paths.test.js +24 -0

package/package.json ADDED Viewed

@@ -0,0 +1,43 @@
+{
+  "name": "pinpoint-bot",
+  "version": "1.0.0",
+  "description": "WhatsApp bot for Pinpoint local file assistant",
+  "main": "index.js",
+  "bin": {
+    "pinpoint-bot": "./bin/pinpoint-bot.js"
+  },
+  "files": [
+    "bin/",
+    "index.js",
+    "src/",
+    "test/"
+  ],
+  "scripts": {
+    "lint": "eslint . && prettier --check .",
+    "lint:fix": "eslint --fix . && prettier --write .",
+    "test": "node --test"
+  },
+  "keywords": ["whatsapp", "bot", "baileys", "pinpoint", "local-search", "assistant"],
+  "author": "Pinpoint contributors",
+  "license": "MIT",
+  "homepage": "https://github.com/vijishmadhavan/pinpoint",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/vijishmadhavan/pinpoint.git",
+    "directory": "bot"
+  },
+  "bugs": {
+    "url": "https://github.com/vijishmadhavan/pinpoint/issues"
+  },
+  "dependencies": {
+    "@google/genai": "^1.43.0",
+    "@whiskeysockets/baileys": "^7.0.0-rc.9",
+    "dotenv": "^17.3.1",
+    "qrcode-terminal": "^0.12.0"
+  },
+  "devDependencies": {
+    "@eslint/js": "^10.0.1",
+    "eslint": "^10.0.2",
+    "prettier": "^3.8.1"
+  }
+}

package/src/llm.js ADDED Viewed

@@ -0,0 +1,254 @@
+/**
+ * LLM adapter layer — Gemini (default) + Ollama (optional)
+ *
+ * Extracted from bot/index.js (Seg 22C).
+ * Provides unified llmGenerate() that routes to Gemini or Ollama,
+ * plus token tracking and cost summaries.
+ *
+ * Usage:
+ *   const llm = require("./src/llm");
+ *   llm.init({ ai, OLLAMA_MODEL, OLLAMA_URL, OLLAMA_THINK, USE_OLLAMA, GEMINI_MODEL, sessionCosts, TOKEN_COST_INPUT, TOKEN_COST_OUTPUT });
+ *   const response = await llm.llmGenerate({ model, contents, config, tools });
+ */
+// Module-level references set by init()
+let _ai, _USE_OLLAMA, _OLLAMA_MODEL, _OLLAMA_URL, _OLLAMA_THINK, _GEMINI_MODEL;
+let _sessionCosts, _TOKEN_COST_INPUT, _TOKEN_COST_OUTPUT;
+function init(config) {
+  _ai = config.ai;
+  _USE_OLLAMA = config.USE_OLLAMA;
+  _OLLAMA_MODEL = config.OLLAMA_MODEL;
+  _OLLAMA_URL = config.OLLAMA_URL;
+  _OLLAMA_THINK = config.OLLAMA_THINK;
+  _GEMINI_MODEL = config.GEMINI_MODEL;
+  _sessionCosts = config.sessionCosts;
+  _TOKEN_COST_INPUT = config.TOKEN_COST_INPUT;
+  _TOKEN_COST_OUTPUT = config.TOKEN_COST_OUTPUT;
+}
+// --- Ollama adapter: translates Gemini format <-> Ollama format ---
+// So the rest of the code stays identical regardless of which LLM is used.
+function geminiToolsToOllama(geminiTools) {
+  // Gemini: [{ functionDeclarations: [{ name, description, parameters: { type: "OBJECT", properties, required } }] }]
+  // Ollama: [{ type: "function", function: { name, description, parameters: { type: "object", ... } } }]
+  if (!geminiTools?.[0]?.functionDeclarations) return [];
+  return geminiTools[0].functionDeclarations.map((fd) => ({
+    type: "function",
+    function: {
+      name: fd.name,
+      description: fd.description,
+      parameters: lowerTypes(fd.parameters),
+    },
+  }));
+}
+function lowerTypes(schema) {
+  if (!schema) return schema;
+  const out = { ...schema };
+  if (out.type) out.type = out.type.toLowerCase();
+  if (out.properties) {
+    out.properties = {};
+    for (const [k, v] of Object.entries(schema.properties)) {
+      out.properties[k] = lowerTypes(v);
+    }
+  }
+  if (out.items) out.items = lowerTypes(out.items);
+  return out;
+}
+function geminiContentsToOllama(contents, systemInstruction) {
+  // Convert Gemini contents array to Ollama messages array
+  const messages = [];
+  if (systemInstruction) messages.push({ role: "system", content: systemInstruction });
+  for (const entry of contents) {
+    if (!entry?.parts) continue;
+    const role = entry.role === "model" ? "assistant" : "user";
+    // Check for function calls (model response with tool calls)
+    const funcCalls = entry.parts.filter((p) => p.functionCall);
+    if (funcCalls.length > 0) {
+      // Text part if any
+      const textParts = entry.parts
+        .filter((p) => p.text)
+        .map((p) => p.text)
+        .join("\n");
+      messages.push({
+        role: "assistant",
+        content: textParts || "",
+        tool_calls: funcCalls.map((p) => ({
+          id: `call_${p.functionCall.name}_${Date.now()}`,
+          type: "function",
+          function: { name: p.functionCall.name, arguments: p.functionCall.args || {} },
+        })),
+      });
+      continue;
+    }
+    // Check for function responses (tool results)
+    const funcResponses = entry.parts.filter((p) => p.functionResponse);
+    if (funcResponses.length > 0) {
+      for (const p of funcResponses) {
+        messages.push({
+          role: "tool",
+          content: JSON.stringify(p.functionResponse.response?.result ?? ""),
+        });
+      }
+      // Also include any text nudges (round-based efficiency hints)
+      const textNudges = entry.parts.filter((p) => p.text);
+      for (const p of textNudges) {
+        messages.push({ role: "system", content: p.text });
+      }
+      continue;
+    }
+    // Regular text + images (Ollama uses "images" array with base64 data)
+    const text = entry.parts
+      .filter((p) => p.text)
+      .map((p) => p.text)
+      .join("\n");
+    const images = entry.parts.filter((p) => p.inlineData).map((p) => p.inlineData.data);
+    if (text || images.length > 0) {
+      const msg = { role, content: text || "" };
+      if (images.length > 0) msg.images = images;
+      messages.push(msg);
+    }
+  }
+  return messages;
+}
+async function ollamaGenerate(contents, config, toolsDefs) {
+  // Build Ollama chat request
+  const messages = geminiContentsToOllama(contents, config?.systemInstruction);
+  const body = {
+    model: _OLLAMA_MODEL,
+    messages,
+    stream: false,
+    think: _OLLAMA_THINK, // Optional thinking — smarter tool picks but slower (~30s vs ~0.8s)
+  };
+  if (toolsDefs) body.tools = geminiToolsToOllama(toolsDefs);
+  const resp = await fetch(`${_OLLAMA_URL}/api/chat`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  if (!resp.ok) throw new Error(`Ollama error: ${resp.status} ${await resp.text()}`);
+  const data = await resp.json();
+  const msg = data.message || {};
+  // Translate Ollama response -> Gemini response shape
+  const functionCalls = (msg.tool_calls || []).map((tc) => ({
+    name: tc.function.name,
+    args: typeof tc.function.arguments === "string" ? JSON.parse(tc.function.arguments) : tc.function.arguments,
+  }));
+  // Separate thinking content from visible response (Qwen3 uses <think>...</think> tags)
+  let visibleText = msg.content || "";
+  let thinkingTokens = 0;
+  if (_OLLAMA_THINK && visibleText.includes("<think>")) {
+    const thinkMatch = visibleText.match(/<think>([\s\S]*?)<\/think>/);
+    if (thinkMatch) {
+      thinkingTokens = Math.ceil(thinkMatch[1].length / 4); // rough estimate
+      visibleText = visibleText.replace(/<think>[\s\S]*?<\/think>\s*/g, "").trim();
+    }
+  }
+  return {
+    text: visibleText,
+    functionCalls: functionCalls.length > 0 ? functionCalls : null,
+    candidates: [
+      {
+        content: {
+          role: "model",
+          parts: [
+            ...(visibleText ? [{ text: visibleText }] : []),
+            ...functionCalls.map((fc) => ({ functionCall: { name: fc.name, args: fc.args } })),
+          ],
+        },
+        finishReason: functionCalls.length > 0 ? "TOOL_CALLS" : "STOP",
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: data.prompt_eval_count || 0,
+      candidatesTokenCount: data.eval_count || 0,
+      thoughtsTokenCount: thinkingTokens,
+    },
+  };
+}
+// Unified LLM call — routes to Gemini or Ollama, with retry on transient errors
+async function llmGenerate({ model, contents, config, tools: toolsDefs }) {
+  if (_USE_OLLAMA) {
+    return ollamaGenerate(contents, config, toolsDefs);
+  }
+  const maxRetries = 2;
+  for (let attempt = 0; attempt <= maxRetries; attempt++) {
+    try {
+      return await _ai.models.generateContent({ model, contents, config: { ...config, tools: toolsDefs } });
+    } catch (err) {
+      const msg = String(err.message || err);
+      const isTransient =
+        msg.includes("429") ||
+        msg.includes("503") ||
+        msg.includes("500") ||
+        msg.includes("RESOURCE_EXHAUSTED") ||
+        msg.includes("Internal error");
+      if (isTransient && attempt < maxRetries) {
+        const wait = 2 ** (attempt + 1) * 1000; // 2s, 4s
+        console.warn(`[Gemini] Transient error (${msg.slice(0, 60)}), retry in ${wait / 1000}s...`);
+        await new Promise((r) => setTimeout(r, wait));
+        continue;
+      }
+      throw err;
+    }
+  }
+}
+// --- Token tracking & cost ---
+function trackTokens(chatJid, response) {
+  const usage = response.usageMetadata;
+  if (!usage) return;
+  if (!_sessionCosts[chatJid]) {
+    _sessionCosts[chatJid] = { input: 0, output: 0, thinking: 0, rounds: 0, started: Date.now() };
+  }
+  const s = _sessionCosts[chatJid];
+  s.input += usage.promptTokenCount || 0;
+  s.output += usage.candidatesTokenCount || 0;
+  s.thinking += usage.thoughtsTokenCount || 0;
+  s.rounds++;
+  return {
+    input: usage.promptTokenCount || 0,
+    output: usage.candidatesTokenCount || 0,
+    thinking: usage.thoughtsTokenCount || 0,
+  };
+}
+function formatTokens(n) {
+  if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + "M";
+  if (n >= 1_000) return (n / 1_000).toFixed(1) + "K";
+  return String(n);
+}
+function getCostSummary(chatJid) {
+  const s = _sessionCosts[chatJid];
+  if (!s || s.rounds === 0) return "No token usage in this session.";
+  const cost = s.input * _TOKEN_COST_INPUT + s.output * _TOKEN_COST_OUTPUT;
+  const elapsed = Math.round((Date.now() - s.started) / 60000);
+  const thinkStr = s.thinking ? `, thinking: ${formatTokens(s.thinking)}` : "";
+  return `*Session tokens:* ${formatTokens(s.input + s.output)} (input: ${formatTokens(s.input)}, output: ${formatTokens(s.output)}${thinkStr})\n*Rounds:* ${s.rounds}\n*Estimated cost:* $${cost.toFixed(4)}\n*Duration:* ${elapsed} min`;
+}
+module.exports = {
+  init,
+  geminiToolsToOllama,
+  lowerTypes,
+  geminiContentsToOllama,
+  ollamaGenerate,
+  llmGenerate,
+  trackTokens,
+  formatTokens,
+  getCostSummary,
+};

package/src/skills.js ADDED Viewed

@@ -0,0 +1,163 @@
+const { readFileSync, readdirSync, existsSync } = require("fs");
+const pathModule = require("path");
+const os = require("os");
+const { INTENT_KEYWORDS, SKILL_CATEGORIES } = require("./tools");
+const USER_DATA_DIR = process.env.PINPOINT_USER_DIR || pathModule.join(os.homedir(), ".pinpoint");
+// --- System paths (WSL-aware) ---
+const HOME_DIR = os.homedir();
+let WIN_HOME = null;
+const wslUserPath = `/mnt/c/Users/${pathModule.basename(HOME_DIR)}`;
+if (existsSync(wslUserPath)) {
+  WIN_HOME = wslUserPath;
+}
+const USER_HOME = WIN_HOME || HOME_DIR;
+const DOWNLOADS = pathModule.join(USER_HOME, "Downloads");
+const DOCUMENTS = pathModule.join(USER_HOME, "Documents");
+const DESKTOP = pathModule.join(USER_HOME, "Desktop");
+const PICTURES = pathModule.join(USER_HOME, "Pictures");
+// --- Load skills from skills/*.md at startup (hierarchical: general + task-specific) ---
+const SKILLS_DIR = process.env.PINPOINT_SKILLS_DIR || pathModule.join(__dirname, "..", "..", "skills");
+// General skills: always injected (core rules, batch awareness, common mistakes)
+const GENERAL_SKILL_FILES = ["batch-awareness.md", "common-mistakes.md", "core-rules.md"];
+const _skillCache = {}; // filename → content
+function _loadSkill(filename) {
+  if (!_skillCache[filename]) {
+    try {
+      _skillCache[filename] = readFileSync(pathModule.join(SKILLS_DIR, filename), "utf-8");
+    } catch (e) {
+      _skillCache[filename] = "";
+    }
+  }
+  return _skillCache[filename];
+}
+// Preload all skills at startup
+try {
+  const allFiles = readdirSync(SKILLS_DIR)
+    .filter((f) => f.endsWith(".md"))
+    .sort();
+  for (const file of allFiles) _loadSkill(file);
+  console.log(`[Pinpoint] Loaded ${allFiles.length} skills: ${allFiles.map((f) => f.replace(".md", "")).join(", ")}`);
+} catch (err) {
+  console.log("[Pinpoint] No skills loaded:", err.message);
+}
+// Build general skills content (always included)
+const generalSkillsContent = GENERAL_SKILL_FILES.map((f) => _loadSkill(f))
+  .filter(Boolean)
+  .join("\n\n");
+// Detect user intent → return relevant skill categories
+function detectIntentCategories(message) {
+  const cats = new Set();
+  for (const [cat, regex] of Object.entries(INTENT_KEYWORDS)) {
+    if (regex.test(message)) cats.add(cat);
+  }
+  // Always include search (core functionality)
+  if (cats.size === 0) cats.add("search");
+  return cats;
+}
+// Build task-specific skills for a message
+function getTaskSkills(message) {
+  const cats = detectIntentCategories(message);
+  const files = new Set();
+  for (const cat of cats) {
+    for (const f of SKILL_CATEGORIES[cat] || []) files.add(f);
+  }
+  // Don't duplicate general skills
+  for (const f of GENERAL_SKILL_FILES) files.delete(f);
+  return [...files]
+    .map((f) => _loadSkill(f))
+    .filter(Boolean)
+    .join("\n\n");
+}
+const SYSTEM_PROMPT_BASE = `You are Pinpoint, a local file assistant with full power over the user's files.
+You search, read, analyze, organize, and manage files on their computer.
+## How to Work
+Do what has been asked; nothing more. Go straight to the point without going in circles.
+1. GATHER — call 1-2 tools to collect info. If results are sufficient, skip to step 3.
+2. ACT — if user wants something done (move, create, convert), do it in one call.
+3. ANSWER — respond concisely with what you have. Stop.
+When user asks you to DO something (organize, move, sort, create, convert) — do it. Don't stop to ask permission.
+Gather what you need, then act, then report. Complete the full task in one turn.
+Rules:
+- Never call the same tool with identical arguments twice.
+- Prefer batch tools (folder param, batch_move) over loops.
+- If user sends a file/image with NO instruction — ask what they want.
+- If an image is already inline, you can SEE it — don't re-read it.
+## Honesty
+- Report ONLY what tool results show. Quote exact numbers (moved_count, error_count, etc.).
+- If batch_move returned moved_count: 0, tell the user "0 files were moved" — never claim files were moved.
+- Check "Actions Taken This Session" before claiming you did something — it has the real outcomes.
+- Never claim you performed an action unless the tool result confirms it.
+## Context Priority
+When multiple sources of info conflict, trust in this order:
+1. Current user message (highest)
+2. Recent conversation turns
+3. Active tool results
+4. Persistent memories
+5. Older conversation history
+${generalSkillsContent}
+## System Paths
+- Home: ${USER_HOME}
+- Downloads: ${DOWNLOADS}
+- Documents: ${DOCUMENTS}
+- Desktop: ${DESKTOP}
+- Pictures: ${PICTURES}
+Use these full ABSOLUTE paths when the user says "Downloads" or "my Documents".
+## Result References
+When a tool returns many items (files, images, faces), the result is stored server-side and you receive a reference like @ref:1 with a preview.
+To use these results in another tool (batch_move, compress_files, etc.), pass the @ref:N as the value — it will be resolved automatically.
+Example: list_files returns @ref:1 (500 files) → batch_move({ sources: "@ref:1", destination: "/path" }) moves all 500.
+`;
+const USER_TZ = process.env.TZ || Intl.DateTimeFormat().resolvedOptions().timeZone || "Asia/Kolkata";
+function getSystemPrompt(userMessage = "", chatJid = "", { memoryEnabled, memoryContext, actionLedgerText } = {}) {
+  const tz = USER_TZ;
+  // Inject task-specific skills based on user message intent
+  const taskSkills = userMessage ? getTaskSkills(userMessage) : "";
+  let prompt = SYSTEM_PROMPT_BASE;
+  if (taskSkills) prompt += `\n${taskSkills}\n`;
+  prompt += `\nCurrent date and time: ${new Date().toLocaleString("en-IN", { timeZone: tz, day: "numeric", month: "long", year: "numeric", hour: "2-digit", minute: "2-digit", hour12: true, timeZoneName: "short" })}`;
+  if (memoryEnabled && memoryContext) {
+    prompt += `\n\n## Saved memories\n${memoryContext}`;
+  } else if (memoryEnabled) {
+    prompt += `\n\n## Saved memories\nNo memories saved yet.`;
+  } else if (!memoryEnabled) {
+    prompt += `\n\n## Memory\nMemory is currently OFF. If user asks to remember something, tell them to enable it with /memory on.`;
+  }
+  // Action ledger: inject real outcomes of every mutating action (OpenClaw pattern)
+  if (actionLedgerText) prompt += actionLedgerText;
+  return prompt;
+}
+module.exports = {
+  SKILLS_DIR,
+  USER_DATA_DIR,
+  GENERAL_SKILL_FILES,
+  detectIntentCategories,
+  getTaskSkills,
+  SYSTEM_PROMPT_BASE,
+  getSystemPrompt,
+  USER_HOME,
+  DOWNLOADS,
+  DOCUMENTS,
+  DESKTOP,
+  PICTURES,
+};