npm - @bamdra/bamdra-memory-vector - Versions diffs - 0.1.7 → 0.1.9 - Mend

@bamdra/bamdra-memory-vector 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js +349 -42
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/skills/bamdra-memory-vector-operator/SKILL.md +3 -0

package/dist/index.js CHANGED Viewed

@@ -30,6 +30,7 @@ var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${_
 var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
 // src/index.ts
+var import_node_child_process = require("child_process");
 var import_node_crypto = require("crypto");
 var import_node_fs = require("fs");
 var import_node_os = require("os");
@@ -38,51 +39,52 @@ var import_node_url = require("url");
 var GLOBAL_VECTOR_API_KEY = "__OPENCLAW_BAMDRA_MEMORY_VECTOR__";
 var PLUGIN_ID = "bamdra-memory-vector";
 var SKILL_ID = "bamdra-memory-vector-operator";
-var TOOL_NAME = "bamdra_memory_vector_search";
+var SEARCH_TOOL_NAME = "bamdra_memory_vector_search";
+var REINDEX_TOOL_NAME = "bamdra_memory_vector_reindex";
+var DEFAULT_LIBRARY_DIRS = ["knowledge", "docs", "notes", "ideas"];
+var RUNTIME_DIR = "_runtime";
+var SUPPORTED_TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
+  ".md",
+  ".mdx",
+  ".txt",
+  ".text",
+  ".json",
+  ".yaml",
+  ".yml",
+  ".csv",
+  ".tsv",
+  ".docx",
+  ".pdf"
+]);
 var LocalVectorIndex = class {
   config;
   records = /* @__PURE__ */ new Map();
   constructor(inputConfig) {
     this.config = normalizeConfig(inputConfig);
     (0, import_node_fs.mkdirSync)((0, import_node_path.dirname)(this.config.indexPath), { recursive: true });
-    (0, import_node_fs.mkdirSync)(this.config.privateMarkdownRoot, { recursive: true });
-    (0, import_node_fs.mkdirSync)(this.config.sharedMarkdownRoot, { recursive: true });
-    this.load();
+    this.ensureLibraryRoots();
+    this.syncFilesystemIndex();
   }
   upsert(args) {
-    const id = hashId(`${args.userId ?? "shared"}:${args.sourcePath}:${args.title}`);
-    const record = {
-      id,
+    const visibility = args.userId == null ? "shared" : "private";
+    const runtimeRoot = visibility === "shared" ? this.config.sharedMarkdownRoot : this.config.privateMarkdownRoot;
+    const runtimeRelativePath = normalizeRuntimeSourcePath({
+      visibility,
       userId: args.userId,
-      sessionId: args.sessionId,
       topicId: args.topicId,
       sourcePath: args.sourcePath,
-      title: args.title,
-      text: args.text,
-      tags: args.tags ?? [],
-      embedding: embed(`${args.title}
-${args.text}`, this.config.dimensions),
-      updatedAt: (/* @__PURE__ */ new Date()).toISOString()
-    };
-    this.records.set(id, record);
-    const markdownRoot = args.userId == null ? this.config.sharedMarkdownRoot : this.config.privateMarkdownRoot;
-    const markdownPath = (0, import_node_path.join)(markdownRoot, args.sourcePath);
-    (0, import_node_fs.mkdirSync)((0, import_node_path.dirname)(markdownPath), { recursive: true });
-    (0, import_node_fs.writeFileSync)(markdownPath, `# ${args.title}
-${args.text}
-`, "utf8");
-    this.flush();
+      title: args.title
+    });
+    const absolutePath = (0, import_node_path.join)(runtimeRoot, runtimeRelativePath);
+    (0, import_node_fs.mkdirSync)((0, import_node_path.dirname)(absolutePath), { recursive: true });
+    (0, import_node_fs.writeFileSync)(absolutePath, renderRuntimeMarkdown(args.title, args.text, args.tags ?? []), "utf8");
+    this.syncFilesystemIndex();
   }
   search(args) {
+    this.syncFilesystemIndex();
     const limit = args.limit ?? 5;
     const queryEmbedding = embed(args.query, this.config.dimensions);
-    return [...this.records.values()].filter((record) => {
-      if (args.userId == null) {
-        return record.userId == null;
-      }
-      return record.userId === args.userId || record.userId == null;
-    }).map((record) => ({
+    return [...this.records.values()].filter((record) => canAccessRecord(record, args.userId)).filter((record) => !args.topicId || record.topicId === args.topicId || record.topicId == null).map((record) => ({
       id: record.id,
       userId: record.userId,
       topicId: record.topicId,
@@ -96,20 +98,60 @@ ${args.text}
       source: "vector"
     })).sort((a, b) => b.score - a.score).slice(0, limit);
   }
+  rebuild() {
+    this.syncFilesystemIndex();
+    return {
+      records: this.records.size,
+      privateRoot: this.config.privateMarkdownRoot,
+      sharedRoot: this.config.sharedMarkdownRoot
+    };
+  }
+  ensureLibraryRoots() {
+    (0, import_node_fs.mkdirSync)(this.config.privateMarkdownRoot, { recursive: true });
+    (0, import_node_fs.mkdirSync)(this.config.sharedMarkdownRoot, { recursive: true });
+    for (const root of [this.config.privateMarkdownRoot, this.config.sharedMarkdownRoot]) {
+      for (const dirName of DEFAULT_LIBRARY_DIRS) {
+        (0, import_node_fs.mkdirSync)((0, import_node_path.join)(root, dirName), { recursive: true });
+      }
+      (0, import_node_fs.mkdirSync)((0, import_node_path.join)(root, RUNTIME_DIR), { recursive: true });
+    }
+  }
+  syncFilesystemIndex() {
+    const nextRecords = /* @__PURE__ */ new Map();
+    const documents = [
+      ...scanRoot(this.config.privateMarkdownRoot, "private"),
+      ...scanRoot(this.config.sharedMarkdownRoot, "shared")
+    ];
+    for (const document2 of documents) {
+      const chunks = chunkDocument(document2);
+      chunks.forEach((chunk, index) => {
+        const id = hashId(`${document2.visibility}:${document2.relativePath}:${index}`);
+        nextRecords.set(id, {
+          id,
+          userId: document2.userId,
+          topicId: document2.topicId,
+          sessionId: document2.sessionId,
+          sourcePath: document2.relativePath,
+          title: chunk.title,
+          text: chunk.text,
+          tags: dedupeTextItems([...document2.tags, ...chunk.tags]),
+          embedding: embed(`${chunk.title}
+${chunk.text}`, this.config.dimensions),
+          updatedAt: document2.updatedAt,
+          visibility: document2.visibility,
+          sourceKind: document2.sourceKind,
+          absolutePath: document2.absolutePath
+        });
+      });
+    }
+    this.records = nextRecords;
+    this.flush();
+  }
   flush() {
     const payload = JSON.stringify([...this.records.values()], null, 2);
     (0, import_node_fs.writeFileSync)(this.config.indexPath, `${payload}
 `, "utf8");
   }
-  load() {
-    if (!(0, import_node_fs.existsSync)(this.config.indexPath)) {
-      return;
-    }
-    const payload = JSON.parse((0, import_node_fs.readFileSync)(this.config.indexPath, "utf8"));
-    for (const record of payload) {
-      this.records.set(record.id, record);
-    }
-  }
 };
 function register(api) {
   queueMicrotask(() => {
@@ -121,8 +163,8 @@ function register(api) {
   const runtime = new LocalVectorIndex(api.pluginConfig ?? api.config ?? api.plugin?.config);
   exposeVectorApi(runtime);
   api.registerTool?.({
-    name: TOOL_NAME,
-    description: "Search the current user's vector memory index",
+    name: SEARCH_TOOL_NAME,
+    description: "Search the current user's vector memory and knowledge index",
     parameters: {
       type: "object",
       additionalProperties: false,
@@ -150,6 +192,25 @@ function register(api) {
       };
     }
   });
+  api.registerTool?.({
+    name: REINDEX_TOOL_NAME,
+    description: "Rebuild the vector knowledge index from the private and shared library roots",
+    parameters: {
+      type: "object",
+      additionalProperties: false,
+      properties: {}
+    },
+    async execute() {
+      return {
+        content: [
+          {
+            type: "text",
+            text: JSON.stringify(runtime.rebuild(), null, 2)
+          }
+        ]
+      };
+    }
+  });
 }
 async function activate(api) {
   register(api);
@@ -161,6 +222,9 @@ function exposeVectorApi(runtime) {
     },
     search(args) {
       return runtime.search(args);
+    },
+    rebuild() {
+      return runtime.rebuild();
     }
   };
 }
@@ -216,7 +280,8 @@ function ensureHostConfig(config) {
   changed = ensureArrayIncludes(plugins, "allow", PLUGIN_ID) || changed;
   changed = ensureArrayIncludes(load, "paths", (0, import_node_path.join)((0, import_node_os.homedir)(), ".openclaw", "extensions")) || changed;
   changed = ensureArrayIncludes(skillsLoad, "extraDirs", (0, import_node_path.join)((0, import_node_os.homedir)(), ".openclaw", "skills")) || changed;
-  changed = ensureArrayIncludes(tools, "allow", TOOL_NAME) || changed;
+  changed = ensureArrayIncludes(tools, "allow", SEARCH_TOOL_NAME) || changed;
+  changed = ensureArrayIncludes(tools, "allow", REINDEX_TOOL_NAME) || changed;
   if (typeof entry.enabled !== "boolean") {
     entry.enabled = false;
     changed = true;
@@ -278,6 +343,236 @@ function ensureAgentSkills(agents, skillId) {
   }
   return changed;
 }
+function scanRoot(rootDir, visibility) {
+  if (!(0, import_node_fs.existsSync)(rootDir)) {
+    return [];
+  }
+  const files = walkFiles(rootDir);
+  const documents = [];
+  for (const absolutePath of files) {
+    const extension = (0, import_node_path.extname)(absolutePath).toLowerCase();
+    if (!SUPPORTED_TEXT_EXTENSIONS.has(extension)) {
+      continue;
+    }
+    const relativePath = (0, import_node_path.relative)(rootDir, absolutePath).split(import_node_path.sep).join("/");
+    const stat = (0, import_node_fs.statSync)(absolutePath);
+    const text = extractFileText(absolutePath);
+    if (!text || !text.trim()) {
+      continue;
+    }
+    const metadata = inferDocumentMetadata(relativePath, visibility);
+    documents.push({
+      absolutePath,
+      relativePath,
+      visibility,
+      sourceKind: relativePath.startsWith(`${RUNTIME_DIR}/`) ? "runtime" : "knowledge",
+      userId: metadata.userId,
+      topicId: metadata.topicId,
+      sessionId: metadata.sessionId,
+      updatedAt: stat.mtime.toISOString(),
+      title: inferDocumentTitle(relativePath, text),
+      tags: metadata.tags,
+      text
+    });
+  }
+  return documents;
+}
+function walkFiles(rootDir) {
+  const results = [];
+  const stack = [rootDir];
+  while (stack.length > 0) {
+    const current = stack.pop();
+    if (!current) {
+      continue;
+    }
+    for (const entry of (0, import_node_fs.readdirSync)(current, { withFileTypes: true })) {
+      if (entry.name.startsWith(".") || entry.name === "node_modules") {
+        continue;
+      }
+      const absolutePath = (0, import_node_path.join)(current, entry.name);
+      if (entry.isDirectory()) {
+        stack.push(absolutePath);
+      } else if (entry.isFile()) {
+        results.push(absolutePath);
+      }
+    }
+  }
+  return results;
+}
+function inferDocumentMetadata(relativePath, visibility) {
+  const segments = relativePath.split("/");
+  const tags = segments.filter((segment) => segment && segment !== RUNTIME_DIR).slice(0, 4).map((segment) => sanitizeTag(segment));
+  if (visibility === "shared") {
+    return { userId: null, topicId: extractTopicId(segments), sessionId: extractSessionId(segments), tags };
+  }
+  const userSegmentIndex = segments.findIndex((segment) => segment === "user");
+  const userId = userSegmentIndex >= 0 ? segments[userSegmentIndex + 1] ?? null : null;
+  return {
+    userId,
+    topicId: extractTopicId(segments),
+    sessionId: extractSessionId(segments),
+    tags
+  };
+}
+function extractTopicId(segments) {
+  const topicSegment = segments.find((segment) => segment.startsWith("topic-"));
+  return topicSegment ?? null;
+}
+function extractSessionId(segments) {
+  const sessionIndex = segments.findIndex((segment) => segment === "sessions");
+  if (sessionIndex < 0) {
+    return null;
+  }
+  return segments[sessionIndex + 1] ?? null;
+}
+function inferDocumentTitle(relativePath, text) {
+  const headingMatch = text.match(/^#\s+(.+)$/m);
+  if (headingMatch?.[1]) {
+    return headingMatch[1].trim();
+  }
+  const firstNonEmpty = text.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
+  if (firstNonEmpty) {
+    return firstNonEmpty.slice(0, 80);
+  }
+  return (0, import_node_path.basename)(relativePath, (0, import_node_path.extname)(relativePath));
+}
+function chunkDocument(document2) {
+  if ((0, import_node_path.extname)(document2.absolutePath).toLowerCase().startsWith(".md")) {
+    return chunkMarkdown(document2.text, document2.title, document2.tags);
+  }
+  return chunkPlainText(document2.text, document2.title, document2.tags);
+}
+function chunkMarkdown(text, fallbackTitle, baseTags) {
+  const lines = text.split(/\r?\n/);
+  const chunks = [];
+  let headingTrail = [];
+  let buffer = [];
+  const flush = () => {
+    const content = buffer.join("\n").trim();
+    if (!content) {
+      buffer = [];
+      return;
+    }
+    chunks.push({
+      title: headingTrail.length > 0 ? headingTrail.join(" / ") : fallbackTitle,
+      text: content,
+      tags: baseTags
+    });
+    buffer = [];
+  };
+  for (const line of lines) {
+    const heading = line.match(/^(#{1,6})\s+(.+)$/);
+    if (heading) {
+      flush();
+      const depth = heading[1].length;
+      headingTrail = [...headingTrail.slice(0, depth - 1), heading[2].trim()];
+      continue;
+    }
+    buffer.push(line);
+    if (buffer.join("\n").length > 900) {
+      flush();
+    }
+  }
+  flush();
+  return chunks.length > 0 ? chunks : chunkPlainText(text, fallbackTitle, baseTags);
+}
+function chunkPlainText(text, title, tags) {
+  const normalized = text.replace(/\r/g, "").trim();
+  if (!normalized) {
+    return [];
+  }
+  const paragraphs = normalized.split(/\n{2,}/).map((item) => item.trim()).filter(Boolean);
+  const chunks = [];
+  let buffer = "";
+  for (const paragraph of paragraphs.length > 0 ? paragraphs : [normalized]) {
+    const next = buffer ? `${buffer}
+${paragraph}` : paragraph;
+    if (next.length > 900 && buffer) {
+      chunks.push({ title, text: buffer, tags });
+      buffer = paragraph;
+    } else {
+      buffer = next;
+    }
+  }
+  if (buffer) {
+    chunks.push({ title, text: buffer, tags });
+  }
+  return chunks;
+}
+function extractFileText(absolutePath) {
+  const extension = (0, import_node_path.extname)(absolutePath).toLowerCase();
+  if (extension === ".docx") {
+    return extractDocxText(absolutePath);
+  }
+  if (extension === ".pdf") {
+    return extractPdfText(absolutePath);
+  }
+  return (0, import_node_fs.readFileSync)(absolutePath, "utf8");
+}
+function extractDocxText(absolutePath) {
+  try {
+    const xml = (0, import_node_child_process.execFileSync)("unzip", ["-p", absolutePath, "word/document.xml"], {
+      encoding: "utf8",
+      stdio: ["ignore", "pipe", "ignore"]
+    });
+    return stripXmlText(xml);
+  } catch {
+    return "";
+  }
+}
+function extractPdfText(absolutePath) {
+  try {
+    return (0, import_node_child_process.execFileSync)("pdftotext", ["-layout", "-nopgbrk", absolutePath, "-"], {
+      encoding: "utf8",
+      stdio: ["ignore", "pipe", "ignore"]
+    }).trim();
+  } catch {
+    try {
+      return (0, import_node_child_process.execFileSync)("mdls", ["-raw", "-name", "kMDItemTextContent", absolutePath], {
+        encoding: "utf8",
+        stdio: ["ignore", "pipe", "ignore"]
+      }).trim();
+    } catch {
+      return "";
+    }
+  }
+}
+function stripXmlText(xml) {
+  return xml.replace(/<\/w:p>/g, "\n").replace(/<[^>]+>/g, " ").replace(/\s+\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/[ \t]{2,}/g, " ").trim();
+}
+function normalizeRuntimeSourcePath(args) {
+  const topicSegment = args.topicId ?? "general";
+  const slug = slugify(args.title) || "memory-note";
+  const baseName = `${slug}.md`;
+  if (args.visibility === "shared") {
+    return (0, import_node_path.join)(RUNTIME_DIR, "shared", "topics", topicSegment, baseName);
+  }
+  return (0, import_node_path.join)(RUNTIME_DIR, "user", args.userId ?? "current", "topics", topicSegment, baseName);
+}
+function renderRuntimeMarkdown(title, text, tags) {
+  const frontmatter = [
+    "---",
+    `title: ${JSON.stringify(title)}`,
+    `tags: ${JSON.stringify(tags)}`,
+    "---"
+  ].join("\n");
+  return `${frontmatter}
+# ${title}
+${text.trim()}
+`;
+}
+function canAccessRecord(record, userId) {
+  if (record.visibility === "shared") {
+    return true;
+  }
+  if (record.userId == null) {
+    return userId != null;
+  }
+  return record.userId === userId;
+}
 function embed(text, dimensions) {
   const vector = Array.from({ length: dimensions }, () => 0);
   const tokens = text.toLowerCase().split(/[^a-z0-9_\u4e00-\u9fff]+/i).filter(Boolean);
@@ -305,11 +600,23 @@ function inferMatchReasons(query, record) {
   if (record.text.toLowerCase().includes(normalized)) {
     reasons.push("text");
   }
+  if (record.sourcePath.toLowerCase().includes(normalized)) {
+    reasons.push("path");
+  }
   if (reasons.length === 0) {
     reasons.push("semantic");
   }
   return reasons;
 }
+function dedupeTextItems(items) {
+  return [...new Set(items.map((item) => item.trim()).filter(Boolean))];
+}
+function sanitizeTag(value) {
+  return value.replace(/\.[^.]+$/, "").replace(/[_-]+/g, " ").trim();
+}
+function slugify(value) {
+  return value.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
+}
 function hashId(value) {
   return (0, import_node_crypto.createHash)("sha1").update(value).digest("hex").slice(0, 24);
 }

package/openclaw.plugin.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "type": "tool",
   "name": "Bamdra Memory Vector",
   "description": "Local vector-style semantic retrieval enhancement for Bamdra memory.",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "main": "./dist/index.js",
   "skills": ["./skills"],
   "configSchema": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bamdra/bamdra-memory-vector",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "description": "Lightweight local semantic retrieval enhancement for the Bamdra OpenClaw memory suite.",
   "license": "MIT",
   "homepage": "https://www.bamdra.com",

package/skills/bamdra-memory-vector-operator/SKILL.md CHANGED Viewed

@@ -24,12 +24,15 @@ It complements topic memory. Use it when the user remembers something fuzzily, w
 - keep cross-user boundaries intact
 - do not flood the prompt with low-signal chunks
 - prefer a few strong recalls over many weak ones
+- when the question plausibly targets local docs, notes, ideas, or knowledge files, check local vector-backed knowledge before using web search
 ## Markdown Knowledge Model
 - private Markdown is for one user's durable notes and memory fragments
 - shared Markdown is for team or reusable knowledge
 - both are editable by humans outside the runtime
+- common human-managed directories include `knowledge/`, `docs/`, `notes/`, and `ideas/`
+- `_runtime/` is system-managed and should not be treated as the main editing area
 ## Shared vs Private