@bamdra/bamdra-memory-vector 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,6 +30,7 @@ var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${_
30
30
  var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
31
31
 
32
32
  // src/index.ts
33
+ var import_node_child_process = require("child_process");
33
34
  var import_node_crypto = require("crypto");
34
35
  var import_node_fs = require("fs");
35
36
  var import_node_os = require("os");
@@ -38,51 +39,52 @@ var import_node_url = require("url");
38
39
  var GLOBAL_VECTOR_API_KEY = "__OPENCLAW_BAMDRA_MEMORY_VECTOR__";
39
40
  var PLUGIN_ID = "bamdra-memory-vector";
40
41
  var SKILL_ID = "bamdra-memory-vector-operator";
41
- var TOOL_NAME = "bamdra_memory_vector_search";
42
+ var SEARCH_TOOL_NAME = "bamdra_memory_vector_search";
43
+ var REINDEX_TOOL_NAME = "bamdra_memory_vector_reindex";
44
+ var DEFAULT_LIBRARY_DIRS = ["knowledge", "docs", "notes", "ideas"];
45
+ var RUNTIME_DIR = "_runtime";
46
+ var SUPPORTED_TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
47
+ ".md",
48
+ ".mdx",
49
+ ".txt",
50
+ ".text",
51
+ ".json",
52
+ ".yaml",
53
+ ".yml",
54
+ ".csv",
55
+ ".tsv",
56
+ ".docx",
57
+ ".pdf"
58
+ ]);
42
59
  var LocalVectorIndex = class {
43
60
  config;
44
61
  records = /* @__PURE__ */ new Map();
45
62
  constructor(inputConfig) {
46
63
  this.config = normalizeConfig(inputConfig);
47
64
  (0, import_node_fs.mkdirSync)((0, import_node_path.dirname)(this.config.indexPath), { recursive: true });
48
- (0, import_node_fs.mkdirSync)(this.config.privateMarkdownRoot, { recursive: true });
49
- (0, import_node_fs.mkdirSync)(this.config.sharedMarkdownRoot, { recursive: true });
50
- this.load();
65
+ this.ensureLibraryRoots();
66
+ this.syncFilesystemIndex();
51
67
  }
52
68
  upsert(args) {
53
- const id = hashId(`${args.userId ?? "shared"}:${args.sourcePath}:${args.title}`);
54
- const record = {
55
- id,
69
+ const visibility = args.userId == null ? "shared" : "private";
70
+ const runtimeRoot = visibility === "shared" ? this.config.sharedMarkdownRoot : this.config.privateMarkdownRoot;
71
+ const runtimeRelativePath = normalizeRuntimeSourcePath({
72
+ visibility,
56
73
  userId: args.userId,
57
- sessionId: args.sessionId,
58
74
  topicId: args.topicId,
59
75
  sourcePath: args.sourcePath,
60
- title: args.title,
61
- text: args.text,
62
- tags: args.tags ?? [],
63
- embedding: embed(`${args.title}
64
- ${args.text}`, this.config.dimensions),
65
- updatedAt: (/* @__PURE__ */ new Date()).toISOString()
66
- };
67
- this.records.set(id, record);
68
- const markdownRoot = args.userId == null ? this.config.sharedMarkdownRoot : this.config.privateMarkdownRoot;
69
- const markdownPath = (0, import_node_path.join)(markdownRoot, args.sourcePath);
70
- (0, import_node_fs.mkdirSync)((0, import_node_path.dirname)(markdownPath), { recursive: true });
71
- (0, import_node_fs.writeFileSync)(markdownPath, `# ${args.title}
72
-
73
- ${args.text}
74
- `, "utf8");
75
- this.flush();
76
+ title: args.title
77
+ });
78
+ const absolutePath = (0, import_node_path.join)(runtimeRoot, runtimeRelativePath);
79
+ (0, import_node_fs.mkdirSync)((0, import_node_path.dirname)(absolutePath), { recursive: true });
80
+ (0, import_node_fs.writeFileSync)(absolutePath, renderRuntimeMarkdown(args.title, args.text, args.tags ?? []), "utf8");
81
+ this.syncFilesystemIndex();
76
82
  }
77
83
  search(args) {
84
+ this.syncFilesystemIndex();
78
85
  const limit = args.limit ?? 5;
79
86
  const queryEmbedding = embed(args.query, this.config.dimensions);
80
- return [...this.records.values()].filter((record) => {
81
- if (args.userId == null) {
82
- return record.userId == null;
83
- }
84
- return record.userId === args.userId || record.userId == null;
85
- }).map((record) => ({
87
+ return [...this.records.values()].filter((record) => canAccessRecord(record, args.userId)).filter((record) => !args.topicId || record.topicId === args.topicId || record.topicId == null).map((record) => ({
86
88
  id: record.id,
87
89
  userId: record.userId,
88
90
  topicId: record.topicId,
@@ -96,20 +98,60 @@ ${args.text}
96
98
  source: "vector"
97
99
  })).sort((a, b) => b.score - a.score).slice(0, limit);
98
100
  }
101
+ rebuild() {
102
+ this.syncFilesystemIndex();
103
+ return {
104
+ records: this.records.size,
105
+ privateRoot: this.config.privateMarkdownRoot,
106
+ sharedRoot: this.config.sharedMarkdownRoot
107
+ };
108
+ }
109
+ ensureLibraryRoots() {
110
+ (0, import_node_fs.mkdirSync)(this.config.privateMarkdownRoot, { recursive: true });
111
+ (0, import_node_fs.mkdirSync)(this.config.sharedMarkdownRoot, { recursive: true });
112
+ for (const root of [this.config.privateMarkdownRoot, this.config.sharedMarkdownRoot]) {
113
+ for (const dirName of DEFAULT_LIBRARY_DIRS) {
114
+ (0, import_node_fs.mkdirSync)((0, import_node_path.join)(root, dirName), { recursive: true });
115
+ }
116
+ (0, import_node_fs.mkdirSync)((0, import_node_path.join)(root, RUNTIME_DIR), { recursive: true });
117
+ }
118
+ }
119
+ syncFilesystemIndex() {
120
+ const nextRecords = /* @__PURE__ */ new Map();
121
+ const documents = [
122
+ ...scanRoot(this.config.privateMarkdownRoot, "private"),
123
+ ...scanRoot(this.config.sharedMarkdownRoot, "shared")
124
+ ];
125
+ for (const document2 of documents) {
126
+ const chunks = chunkDocument(document2);
127
+ chunks.forEach((chunk, index) => {
128
+ const id = hashId(`${document2.visibility}:${document2.relativePath}:${index}`);
129
+ nextRecords.set(id, {
130
+ id,
131
+ userId: document2.userId,
132
+ topicId: document2.topicId,
133
+ sessionId: document2.sessionId,
134
+ sourcePath: document2.relativePath,
135
+ title: chunk.title,
136
+ text: chunk.text,
137
+ tags: dedupeTextItems([...document2.tags, ...chunk.tags]),
138
+ embedding: embed(`${chunk.title}
139
+ ${chunk.text}`, this.config.dimensions),
140
+ updatedAt: document2.updatedAt,
141
+ visibility: document2.visibility,
142
+ sourceKind: document2.sourceKind,
143
+ absolutePath: document2.absolutePath
144
+ });
145
+ });
146
+ }
147
+ this.records = nextRecords;
148
+ this.flush();
149
+ }
99
150
  flush() {
100
151
  const payload = JSON.stringify([...this.records.values()], null, 2);
101
152
  (0, import_node_fs.writeFileSync)(this.config.indexPath, `${payload}
102
153
  `, "utf8");
103
154
  }
104
- load() {
105
- if (!(0, import_node_fs.existsSync)(this.config.indexPath)) {
106
- return;
107
- }
108
- const payload = JSON.parse((0, import_node_fs.readFileSync)(this.config.indexPath, "utf8"));
109
- for (const record of payload) {
110
- this.records.set(record.id, record);
111
- }
112
- }
113
155
  };
114
156
  function register(api) {
115
157
  queueMicrotask(() => {
@@ -121,8 +163,8 @@ function register(api) {
121
163
  const runtime = new LocalVectorIndex(api.pluginConfig ?? api.config ?? api.plugin?.config);
122
164
  exposeVectorApi(runtime);
123
165
  api.registerTool?.({
124
- name: TOOL_NAME,
125
- description: "Search the current user's vector memory index",
166
+ name: SEARCH_TOOL_NAME,
167
+ description: "Search the current user's vector memory and knowledge index",
126
168
  parameters: {
127
169
  type: "object",
128
170
  additionalProperties: false,
@@ -150,6 +192,25 @@ function register(api) {
150
192
  };
151
193
  }
152
194
  });
195
+ api.registerTool?.({
196
+ name: REINDEX_TOOL_NAME,
197
+ description: "Rebuild the vector knowledge index from the private and shared library roots",
198
+ parameters: {
199
+ type: "object",
200
+ additionalProperties: false,
201
+ properties: {}
202
+ },
203
+ async execute() {
204
+ return {
205
+ content: [
206
+ {
207
+ type: "text",
208
+ text: JSON.stringify(runtime.rebuild(), null, 2)
209
+ }
210
+ ]
211
+ };
212
+ }
213
+ });
153
214
  }
154
215
  async function activate(api) {
155
216
  register(api);
@@ -161,6 +222,9 @@ function exposeVectorApi(runtime) {
161
222
  },
162
223
  search(args) {
163
224
  return runtime.search(args);
225
+ },
226
+ rebuild() {
227
+ return runtime.rebuild();
164
228
  }
165
229
  };
166
230
  }
@@ -216,7 +280,8 @@ function ensureHostConfig(config) {
216
280
  changed = ensureArrayIncludes(plugins, "allow", PLUGIN_ID) || changed;
217
281
  changed = ensureArrayIncludes(load, "paths", (0, import_node_path.join)((0, import_node_os.homedir)(), ".openclaw", "extensions")) || changed;
218
282
  changed = ensureArrayIncludes(skillsLoad, "extraDirs", (0, import_node_path.join)((0, import_node_os.homedir)(), ".openclaw", "skills")) || changed;
219
- changed = ensureArrayIncludes(tools, "allow", TOOL_NAME) || changed;
283
+ changed = ensureArrayIncludes(tools, "allow", SEARCH_TOOL_NAME) || changed;
284
+ changed = ensureArrayIncludes(tools, "allow", REINDEX_TOOL_NAME) || changed;
220
285
  if (typeof entry.enabled !== "boolean") {
221
286
  entry.enabled = false;
222
287
  changed = true;
@@ -278,6 +343,236 @@ function ensureAgentSkills(agents, skillId) {
278
343
  }
279
344
  return changed;
280
345
  }
346
+ function scanRoot(rootDir, visibility) {
347
+ if (!(0, import_node_fs.existsSync)(rootDir)) {
348
+ return [];
349
+ }
350
+ const files = walkFiles(rootDir);
351
+ const documents = [];
352
+ for (const absolutePath of files) {
353
+ const extension = (0, import_node_path.extname)(absolutePath).toLowerCase();
354
+ if (!SUPPORTED_TEXT_EXTENSIONS.has(extension)) {
355
+ continue;
356
+ }
357
+ const relativePath = (0, import_node_path.relative)(rootDir, absolutePath).split(import_node_path.sep).join("/");
358
+ const stat = (0, import_node_fs.statSync)(absolutePath);
359
+ const text = extractFileText(absolutePath);
360
+ if (!text || !text.trim()) {
361
+ continue;
362
+ }
363
+ const metadata = inferDocumentMetadata(relativePath, visibility);
364
+ documents.push({
365
+ absolutePath,
366
+ relativePath,
367
+ visibility,
368
+ sourceKind: relativePath.startsWith(`${RUNTIME_DIR}/`) ? "runtime" : "knowledge",
369
+ userId: metadata.userId,
370
+ topicId: metadata.topicId,
371
+ sessionId: metadata.sessionId,
372
+ updatedAt: stat.mtime.toISOString(),
373
+ title: inferDocumentTitle(relativePath, text),
374
+ tags: metadata.tags,
375
+ text
376
+ });
377
+ }
378
+ return documents;
379
+ }
380
+ function walkFiles(rootDir) {
381
+ const results = [];
382
+ const stack = [rootDir];
383
+ while (stack.length > 0) {
384
+ const current = stack.pop();
385
+ if (!current) {
386
+ continue;
387
+ }
388
+ for (const entry of (0, import_node_fs.readdirSync)(current, { withFileTypes: true })) {
389
+ if (entry.name.startsWith(".") || entry.name === "node_modules") {
390
+ continue;
391
+ }
392
+ const absolutePath = (0, import_node_path.join)(current, entry.name);
393
+ if (entry.isDirectory()) {
394
+ stack.push(absolutePath);
395
+ } else if (entry.isFile()) {
396
+ results.push(absolutePath);
397
+ }
398
+ }
399
+ }
400
+ return results;
401
+ }
402
+ function inferDocumentMetadata(relativePath, visibility) {
403
+ const segments = relativePath.split("/");
404
+ const tags = segments.filter((segment) => segment && segment !== RUNTIME_DIR).slice(0, 4).map((segment) => sanitizeTag(segment));
405
+ if (visibility === "shared") {
406
+ return { userId: null, topicId: extractTopicId(segments), sessionId: extractSessionId(segments), tags };
407
+ }
408
+ const userSegmentIndex = segments.findIndex((segment) => segment === "user");
409
+ const userId = userSegmentIndex >= 0 ? segments[userSegmentIndex + 1] ?? null : null;
410
+ return {
411
+ userId,
412
+ topicId: extractTopicId(segments),
413
+ sessionId: extractSessionId(segments),
414
+ tags
415
+ };
416
+ }
417
+ function extractTopicId(segments) {
418
+ const topicSegment = segments.find((segment) => segment.startsWith("topic-"));
419
+ return topicSegment ?? null;
420
+ }
421
+ function extractSessionId(segments) {
422
+ const sessionIndex = segments.findIndex((segment) => segment === "sessions");
423
+ if (sessionIndex < 0) {
424
+ return null;
425
+ }
426
+ return segments[sessionIndex + 1] ?? null;
427
+ }
428
+ function inferDocumentTitle(relativePath, text) {
429
+ const headingMatch = text.match(/^#\s+(.+)$/m);
430
+ if (headingMatch?.[1]) {
431
+ return headingMatch[1].trim();
432
+ }
433
+ const firstNonEmpty = text.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
434
+ if (firstNonEmpty) {
435
+ return firstNonEmpty.slice(0, 80);
436
+ }
437
+ return (0, import_node_path.basename)(relativePath, (0, import_node_path.extname)(relativePath));
438
+ }
439
+ function chunkDocument(document2) {
440
+ if ((0, import_node_path.extname)(document2.absolutePath).toLowerCase().startsWith(".md")) {
441
+ return chunkMarkdown(document2.text, document2.title, document2.tags);
442
+ }
443
+ return chunkPlainText(document2.text, document2.title, document2.tags);
444
+ }
445
+ function chunkMarkdown(text, fallbackTitle, baseTags) {
446
+ const lines = text.split(/\r?\n/);
447
+ const chunks = [];
448
+ let headingTrail = [];
449
+ let buffer = [];
450
+ const flush = () => {
451
+ const content = buffer.join("\n").trim();
452
+ if (!content) {
453
+ buffer = [];
454
+ return;
455
+ }
456
+ chunks.push({
457
+ title: headingTrail.length > 0 ? headingTrail.join(" / ") : fallbackTitle,
458
+ text: content,
459
+ tags: baseTags
460
+ });
461
+ buffer = [];
462
+ };
463
+ for (const line of lines) {
464
+ const heading = line.match(/^(#{1,6})\s+(.+)$/);
465
+ if (heading) {
466
+ flush();
467
+ const depth = heading[1].length;
468
+ headingTrail = [...headingTrail.slice(0, depth - 1), heading[2].trim()];
469
+ continue;
470
+ }
471
+ buffer.push(line);
472
+ if (buffer.join("\n").length > 900) {
473
+ flush();
474
+ }
475
+ }
476
+ flush();
477
+ return chunks.length > 0 ? chunks : chunkPlainText(text, fallbackTitle, baseTags);
478
+ }
479
+ function chunkPlainText(text, title, tags) {
480
+ const normalized = text.replace(/\r/g, "").trim();
481
+ if (!normalized) {
482
+ return [];
483
+ }
484
+ const paragraphs = normalized.split(/\n{2,}/).map((item) => item.trim()).filter(Boolean);
485
+ const chunks = [];
486
+ let buffer = "";
487
+ for (const paragraph of paragraphs.length > 0 ? paragraphs : [normalized]) {
488
+ const next = buffer ? `${buffer}
489
+
490
+ ${paragraph}` : paragraph;
491
+ if (next.length > 900 && buffer) {
492
+ chunks.push({ title, text: buffer, tags });
493
+ buffer = paragraph;
494
+ } else {
495
+ buffer = next;
496
+ }
497
+ }
498
+ if (buffer) {
499
+ chunks.push({ title, text: buffer, tags });
500
+ }
501
+ return chunks;
502
+ }
503
+ function extractFileText(absolutePath) {
504
+ const extension = (0, import_node_path.extname)(absolutePath).toLowerCase();
505
+ if (extension === ".docx") {
506
+ return extractDocxText(absolutePath);
507
+ }
508
+ if (extension === ".pdf") {
509
+ return extractPdfText(absolutePath);
510
+ }
511
+ return (0, import_node_fs.readFileSync)(absolutePath, "utf8");
512
+ }
513
+ function extractDocxText(absolutePath) {
514
+ try {
515
+ const xml = (0, import_node_child_process.execFileSync)("unzip", ["-p", absolutePath, "word/document.xml"], {
516
+ encoding: "utf8",
517
+ stdio: ["ignore", "pipe", "ignore"]
518
+ });
519
+ return stripXmlText(xml);
520
+ } catch {
521
+ return "";
522
+ }
523
+ }
524
+ function extractPdfText(absolutePath) {
525
+ try {
526
+ return (0, import_node_child_process.execFileSync)("pdftotext", ["-layout", "-nopgbrk", absolutePath, "-"], {
527
+ encoding: "utf8",
528
+ stdio: ["ignore", "pipe", "ignore"]
529
+ }).trim();
530
+ } catch {
531
+ try {
532
+ return (0, import_node_child_process.execFileSync)("mdls", ["-raw", "-name", "kMDItemTextContent", absolutePath], {
533
+ encoding: "utf8",
534
+ stdio: ["ignore", "pipe", "ignore"]
535
+ }).trim();
536
+ } catch {
537
+ return "";
538
+ }
539
+ }
540
+ }
541
+ function stripXmlText(xml) {
542
+ return xml.replace(/<\/w:p>/g, "\n").replace(/<[^>]+>/g, " ").replace(/\s+\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/[ \t]{2,}/g, " ").trim();
543
+ }
544
+ function normalizeRuntimeSourcePath(args) {
545
+ const topicSegment = args.topicId ?? "general";
546
+ const slug = slugify(args.title) || "memory-note";
547
+ const baseName = `${slug}.md`;
548
+ if (args.visibility === "shared") {
549
+ return (0, import_node_path.join)(RUNTIME_DIR, "shared", "topics", topicSegment, baseName);
550
+ }
551
+ return (0, import_node_path.join)(RUNTIME_DIR, "user", args.userId ?? "current", "topics", topicSegment, baseName);
552
+ }
553
+ function renderRuntimeMarkdown(title, text, tags) {
554
+ const frontmatter = [
555
+ "---",
556
+ `title: ${JSON.stringify(title)}`,
557
+ `tags: ${JSON.stringify(tags)}`,
558
+ "---"
559
+ ].join("\n");
560
+ return `${frontmatter}
561
+
562
+ # ${title}
563
+
564
+ ${text.trim()}
565
+ `;
566
+ }
567
+ function canAccessRecord(record, userId) {
568
+ if (record.visibility === "shared") {
569
+ return true;
570
+ }
571
+ if (record.userId == null) {
572
+ return userId != null;
573
+ }
574
+ return record.userId === userId;
575
+ }
281
576
  function embed(text, dimensions) {
282
577
  const vector = Array.from({ length: dimensions }, () => 0);
283
578
  const tokens = text.toLowerCase().split(/[^a-z0-9_\u4e00-\u9fff]+/i).filter(Boolean);
@@ -305,11 +600,23 @@ function inferMatchReasons(query, record) {
305
600
  if (record.text.toLowerCase().includes(normalized)) {
306
601
  reasons.push("text");
307
602
  }
603
+ if (record.sourcePath.toLowerCase().includes(normalized)) {
604
+ reasons.push("path");
605
+ }
308
606
  if (reasons.length === 0) {
309
607
  reasons.push("semantic");
310
608
  }
311
609
  return reasons;
312
610
  }
611
+ function dedupeTextItems(items) {
612
+ return [...new Set(items.map((item) => item.trim()).filter(Boolean))];
613
+ }
614
+ function sanitizeTag(value) {
615
+ return value.replace(/\.[^.]+$/, "").replace(/[_-]+/g, " ").trim();
616
+ }
617
+ function slugify(value) {
618
+ return value.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
619
+ }
313
620
  function hashId(value) {
314
621
  return (0, import_node_crypto.createHash)("sha1").update(value).digest("hex").slice(0, 24);
315
622
  }
@@ -3,7 +3,7 @@
3
3
  "type": "tool",
4
4
  "name": "Bamdra Memory Vector",
5
5
  "description": "Local vector-style semantic retrieval enhancement for Bamdra memory.",
6
- "version": "0.1.7",
6
+ "version": "0.1.9",
7
7
  "main": "./dist/index.js",
8
8
  "skills": ["./skills"],
9
9
  "configSchema": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bamdra/bamdra-memory-vector",
3
- "version": "0.1.7",
3
+ "version": "0.1.9",
4
4
  "description": "Lightweight local semantic retrieval enhancement for the Bamdra OpenClaw memory suite.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://www.bamdra.com",
@@ -24,12 +24,15 @@ It complements topic memory. Use it when the user remembers something fuzzily, w
24
24
  - keep cross-user boundaries intact
25
25
  - do not flood the prompt with low-signal chunks
26
26
  - prefer a few strong recalls over many weak ones
27
+ - when the question plausibly targets local docs, notes, ideas, or knowledge files, check local vector-backed knowledge before using web search
27
28
 
28
29
  ## Markdown Knowledge Model
29
30
 
30
31
  - private Markdown is for one user's durable notes and memory fragments
31
32
  - shared Markdown is for team or reusable knowledge
32
33
  - both are editable by humans outside the runtime
34
+ - common human-managed directories include `knowledge/`, `docs/`, `notes/`, and `ideas/`
35
+ - `_runtime/` is system-managed and should not be treated as the main editing area
33
36
 
34
37
  ## Shared vs Private
35
38