membot 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,15 @@ membot add ./docs --refresh-frequency 24h # auto-refresh every day
36
36
 
37
37
  Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
38
38
 
39
+ The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
40
+
41
+ - Local file → absolute path with leading `/` stripped (e.g. `/Users/me/projA/README.md` → `Users/me/projA/README.md`).
42
+ - Local directory or glob → each entry's absolute path under the same shape.
43
+ - URL → `remotes/{host}/{path}` with `/`'s preserved (e.g. `https://github.com/userA/projA/blob/main/README.md` → `remotes/github.com/userA/projA/blob/main/README.md`). Query strings and fragments are dropped from the logical_path (the full URL is still stored for refresh).
44
+ - `inline:<text>` → `inline/{timestamp}.md`.
45
+
46
+ Pass `-p <path>` (or `--logical-path`) to override. On a directory walk it's treated as a *prefix* — entries land at `{prefix}/{path-relative-to-walk-base}`. Re-running `membot add` on the same source reuses the same logical_path and creates a new version (correct refresh behavior).
47
+
39
48
  ## 3. Read
40
49
 
41
50
  ```bash
@@ -101,7 +110,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
101
110
  | ------------------------------------- | ------------------------------------------------------------------------------ |
102
111
  | `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
103
112
  | `membot ls [prefix]` | List current files (size, mime, refresh status) |
104
- | `membot tree [prefix]` | Render the synthesised logical-path tree |
113
+ | `membot tree [prefix]` | Render the synthesised logical-path tree (`--max-depth`, `--max-items` cap output) |
105
114
  | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
106
115
  | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
107
116
  | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
@@ -36,6 +36,15 @@ membot add ./docs --refresh-frequency 24h # auto-refresh every day
36
36
 
37
37
  Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
38
38
 
39
+ The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
40
+
41
+ - Local file → absolute path with leading `/` stripped (e.g. `/Users/me/projA/README.md` → `Users/me/projA/README.md`).
42
+ - Local directory or glob → each entry's absolute path under the same shape.
43
+ - URL → `remotes/{host}/{path}` with `/`'s preserved (e.g. `https://github.com/userA/projA/blob/main/README.md` → `remotes/github.com/userA/projA/blob/main/README.md`). Query strings and fragments are dropped from the logical_path (the full URL is still stored for refresh).
44
+ - `inline:<text>` → `inline/{timestamp}.md`.
45
+
46
+ Pass `-p <path>` (or `--logical-path`) to override. On a directory walk it's treated as a *prefix* — entries land at `{prefix}/{path-relative-to-walk-base}`. Re-running `membot add` on the same source reuses the same logical_path and creates a new version (correct refresh behavior).
47
+
39
48
  ## 3. Read
40
49
 
41
50
  ```bash
@@ -101,7 +110,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
101
110
  | ------------------------------------- | ------------------------------------------------------------------------------ |
102
111
  | `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
103
112
  | `membot ls [prefix]` | List current files (size, mime, refresh status) |
104
- | `membot tree [prefix]` | Render the synthesised logical-path tree |
113
+ | `membot tree [prefix]` | Render the synthesised logical-path tree (`--max-depth`, `--max-items` cap output) |
105
114
  | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
106
115
  | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
107
116
  | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
package/README.md CHANGED
@@ -50,9 +50,9 @@ The skill files describe the discover → ingest → search → read → write w
50
50
 
51
51
  | Command | Description |
52
52
  | ------------------------------- | --------------------------------------------------------------------------------- |
53
- | `membot add <source>` | Ingest a file, directory, glob, URL, or `inline:<text>`. Skips on unchanged source bytes; pass `--force` to re-ingest |
53
+ | `membot add <source>` | Ingest a file, directory, glob, URL, or `inline:<text>`. Default `logical_path` mirrors the source (absolute path for local files, `remotes/{host}/{path}` for URLs) so files with the same basename in different projects don't collide. Pass `-p <path>` to override or, on a directory walk, to set a prefix. Skips on unchanged source bytes; pass `--force` to re-ingest. |
54
54
  | `membot ls [prefix]` | List current files (size, mime, refresh status) |
55
- | `membot tree [prefix]` | Render the synthesised logical-path tree |
55
+ | `membot tree [prefix]` | Render the synthesised logical-path tree (`--max-depth`, `--max-items` cap output) |
56
56
  | `membot read <path>` | Read the markdown surrogate (or `--bytes` for original bytes, base64) |
57
57
  | `membot search <query>` | Hybrid search (semantic + BM25); `--include-history` searches older versions |
58
58
  | `membot info <path>` | Inspect metadata (source, fetcher, schedule, digests) without content |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "membot",
3
- "version": "0.1.2",
3
+ "version": "0.2.1",
4
4
  "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -209,7 +209,7 @@ async function ingestLocalFiles(
209
209
  const isMulti = resolved.entries.length > 1;
210
210
 
211
211
  for (const entry of resolved.entries) {
212
- ctx.progress.tick(entry.relPath);
212
+ ctx.progress.tick(entry.relPathFromBase);
213
213
  const logicalPath = pickLogicalPath(input.logical_path, entry, isMulti);
214
214
  const result: IngestEntryResult = {
215
215
  source_path: entry.absPath,
@@ -404,26 +404,47 @@ async function persistVersion(ctx: AppContext, p: PersistParams): Promise<string
404
404
  }
405
405
 
406
406
  /**
407
- * Pick the logical path for a single matched entry. For a single-file
408
- * ingest with explicit `logical_path`, use it as-is. For multi-entry
409
- * ingests with `logical_path` set, treat it as a *prefix* under which
410
- * each entry's relative path is placed.
407
+ * Pick the logical path for a single matched entry.
408
+ *
409
+ * - Default (no explicit logical_path): use the entry's absolute filesystem
410
+ * path with `\` normalized to `/` and the leading `/` stripped. This
411
+ * keeps `~/projA/README.md` and `~/projB/README.md` from colliding under
412
+ * a shared `README.md`. Two adds of the same absolute path produce the
413
+ * same logical_path, so the second add correctly creates a new version.
414
+ * - Single-source with explicit logical_path: use it verbatim.
415
+ * - Multi-entry (directory/glob) with explicit logical_path: treat as a
416
+ * prefix and append each entry's path relative to the walk base.
411
417
  */
412
- function pickLogicalPath(explicit: string | undefined, entry: ResolvedLocalEntry, isMulti: boolean): string {
413
- if (!explicit) return entry.relPath.replaceAll("\\", "/");
418
+ export function pickLogicalPath(explicit: string | undefined, entry: ResolvedLocalEntry, isMulti: boolean): string {
419
+ if (!explicit) return normalizeAbs(entry.absPath);
414
420
  if (!isMulti) return explicit;
415
421
  const prefix = explicit.endsWith("/") ? explicit.slice(0, -1) : explicit;
416
- return `${prefix}/${entry.relPath.replaceAll("\\", "/")}`;
422
+ return `${prefix}/${entry.relPathFromBase.replaceAll("\\", "/")}`;
417
423
  }
418
424
 
419
- /** Default logical path for an ingested URL — host + path, sanitized. */
420
- function defaultLogicalForUrl(url: string): string {
425
+ /**
426
+ * Normalize an absolute filesystem path into a logical_path:
427
+ * `\` → `/`, leading `/` stripped. Drive letters (Windows `C:`) are kept
428
+ * as the first path segment.
429
+ */
430
+ export function normalizeAbs(absPath: string): string {
431
+ return absPath.replaceAll("\\", "/").replace(/^\/+/, "");
432
+ }
433
+
434
+ /**
435
+ * Default logical path for an ingested URL: `remotes/{host}/{pathname}`
436
+ * with slashes preserved so two projects on the same host (e.g.,
437
+ * github.com) don't collide. Query string and fragment are dropped from
438
+ * the logical_path for stable identity — the full URL is still preserved
439
+ * on the row in `source_path` and used for refresh.
440
+ */
441
+ export function defaultLogicalForUrl(url: string): string {
421
442
  try {
422
443
  const u = new URL(url);
423
- const tail = u.pathname.replace(/^\/+/, "").replaceAll("/", "_") || "root";
424
- return `urls/${u.hostname}/${tail || "root"}`;
444
+ const tail = u.pathname.replace(/^\/+/, "").replace(/\/+$/, "") || "index";
445
+ return `remotes/${u.hostname}/${tail}`;
425
446
  } catch {
426
- return `urls/${url.replace(/[^a-z0-9.-]/gi, "_")}`;
447
+ return `remotes/${url.replace(/[^a-z0-9.-]/gi, "_")}`;
427
448
  }
428
449
  }
429
450
 
@@ -9,9 +9,15 @@ export type ResolvedSource =
9
9
  | { kind: "local-files"; entries: ResolvedLocalEntry[]; basePath: string };
10
10
 
11
11
  export interface ResolvedLocalEntry {
12
+ /** Absolute filesystem path (post-realpath). */
12
13
  absPath: string;
13
- /** Path relative to the base; used to derive a default logical_path. */
14
- relPath: string;
14
+ /**
15
+ * Path relative to the walk base. Used when the caller passes an
16
+ * explicit `logical_path` *prefix* (directory/glob mode) — entries land
17
+ * at `{prefix}/{relPathFromBase}`. For default logical_paths we use
18
+ * `absPath` directly so paths from different filesystems don't collide.
19
+ */
20
+ relPathFromBase: string;
15
21
  }
16
22
 
17
23
  export interface ResolveOptions {
@@ -91,10 +97,11 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
91
97
  }
92
98
 
93
99
  if (st.isFile()) {
100
+ const real = await realpath(abs);
94
101
  return {
95
102
  kind: "local-files",
96
- basePath: abs,
97
- entries: [{ absPath: abs, relPath: source.split(sep).pop() ?? source }],
103
+ basePath: real,
104
+ entries: [{ absPath: real, relPathFromBase: real.split(sep).pop() ?? real }],
98
105
  };
99
106
  }
100
107
 
@@ -201,7 +208,7 @@ async function walk(
201
208
  if (isExclude?.(relForMatch)) continue;
202
209
  if (!isInclude(relForMatch)) continue;
203
210
  if (extraMatchers.some((m) => !m(relForMatch))) continue;
204
- entries.push({ absPath: real, relPath: relForMatch });
211
+ entries.push({ absPath: real, relPathFromBase: relForMatch });
205
212
  }
206
213
 
207
214
  return { kind: "local-files", basePath: base, entries };
@@ -14,7 +14,14 @@ export const addOperation = defineOperation({
14
14
  - a glob pattern (e.g. "docs/**/*.md")
15
15
  - a URL (fetched via mcpx if configured, otherwise plain HTTP)
16
16
  - "inline:<text>" literal
17
- PDF, DOCX, HTML, images, and other binaries are converted to markdown — native libraries first, vision/OCR for images, LLM fallback for messy or scanned input. Original bytes are kept in the blobs table; \`membot_read bytes=true\` returns them. Setting \`refresh_frequency\` enables automatic refresh from the daemon. By default, re-ingesting an unchanged source (same source_sha256 as the current version) is a no-op and reports \`status: "unchanged"\`; pass \`force=true\` to always create a new version. Each newly-ingested file becomes a new version under its own logical_path; existing versions stay queryable via membot_versions. Directory/glob ingests stream one file at a time — partial failures do not abort the rest; the response lists per-entry status.`,
17
+ PDF, DOCX, HTML, images, and other binaries are converted to markdown — native libraries first, vision/OCR for images, LLM fallback for messy or scanned input. Original bytes are kept in the blobs table; \`membot_read bytes=true\` returns them. Setting \`refresh_frequency\` enables automatic refresh from the daemon. By default, re-ingesting an unchanged source (same source_sha256 as the current version) is a no-op and reports \`status: "unchanged"\`; pass \`force=true\` to always create a new version. Each newly-ingested file becomes a new version under its own logical_path; existing versions stay queryable via membot_versions. Directory/glob ingests stream one file at a time — partial failures do not abort the rest; the response lists per-entry status.
18
+
19
+ When \`logical_path\` is omitted, it is derived from the source so files with the same basename in different projects do not collide:
20
+ - Local sources use the entry's absolute filesystem path with the leading "/" stripped (e.g. "/Users/me/projA/README.md" → "Users/me/projA/README.md").
21
+ - URLs use "remotes/{host}/{path}" with slashes preserved (e.g. "https://github.com/u/p/blob/main/README.md" → "remotes/github.com/u/p/blob/main/README.md"). Query strings and fragments are dropped from the logical_path; the full URL is still stored on the row for refresh.
22
+ - "inline:<text>" defaults to "inline/{timestamp}.md".
23
+
24
+ Pass \`logical_path\` to override. For a directory or glob walk it is treated as a PREFIX — each entry is placed at "{prefix}/{path-relative-to-walk-base}". Re-running \`membot_add\` on the same source resolves to the same logical_path; if bytes are unchanged the call is a no-op (status \`unchanged\`), otherwise a new version is created.`,
18
25
  inputSchema: z.object({
19
26
  source: z.string().describe("Local path, directory, glob, URL, or `inline:<text>` literal"),
20
27
  logical_path: z.string().optional().describe("Destination logical_path (single source) or prefix (directory/glob)"),
@@ -8,6 +8,7 @@ interface TreeNode {
8
8
  full_path: string;
9
9
  is_file: boolean;
10
10
  children?: TreeNode[];
11
+ children_truncated?: number;
11
12
  }
12
13
 
13
14
  export const treeOperation = defineOperation({
@@ -18,6 +19,10 @@ export const treeOperation = defineOperation({
18
19
  inputSchema: z.object({
19
20
  prefix: z.string().optional().describe("Only show paths starting with this prefix"),
20
21
  max_depth: z.number().default(4).describe("How many path segments deep to render"),
22
+ max_items: z
23
+ .number()
24
+ .default(20)
25
+ .describe("Max children to render at each level; remainder is summarised as '+N more'"),
21
26
  }),
22
27
  outputSchema: z.object({
23
28
  root: z.string(),
@@ -27,77 +32,116 @@ export const treeOperation = defineOperation({
27
32
  full_path: z.string(),
28
33
  is_file: z.boolean(),
29
34
  children: z.array(z.unknown()).optional(),
35
+ children_truncated: z.number().optional(),
30
36
  }),
31
37
  ),
38
+ truncated: z.number().optional(),
32
39
  }),
33
40
  cli: { positional: ["prefix"] },
34
41
  console_formatter: (result) => {
35
42
  const lines: string[] = [colors.bold(result.root)];
36
43
  const nodes = result.tree as TreeNode[];
37
- renderNodes(nodes, "", lines);
44
+ const topTruncated = (result as { truncated?: number }).truncated ?? 0;
45
+ renderNodes(nodes, "", lines, topTruncated);
38
46
  if (lines.length === 1) lines.push(colors.dim("(empty)"));
39
47
  return lines.join("\n");
40
48
  },
41
49
  handler: async (input, ctx) => {
42
50
  const allPaths = await listAllCurrentPaths(ctx.db);
43
51
  const filtered = input.prefix ? allPaths.filter((p) => p.startsWith(input.prefix!)) : allPaths;
44
- return { root: input.prefix ?? "/", tree: buildTree(filtered, input.max_depth) };
52
+ const tree = buildTree(filtered, input.max_depth);
53
+ const truncated = truncateTree(tree, input.max_items);
54
+ return {
55
+ root: input.prefix ?? "/",
56
+ tree,
57
+ ...(truncated > 0 ? { truncated } : {}),
58
+ };
45
59
  },
46
60
  });
47
61
 
48
62
  /**
49
63
  * Build a tree of TreeNode objects from a flat list of `/`-delimited paths.
50
- * Splits each path into segments and groups by common prefix; nodes deeper
51
- * than `maxDepth` are folded into their parent's `children` summary count.
64
+ * Splits each path into segments and groups by common prefix. Segments
65
+ * deeper than `maxDepth` are folded into the deepest visible ancestor
66
+ * that ancestor is marked `is_file=true` so the renderer surfaces it as a
67
+ * leaf even though longer paths exist underneath. Children are sorted by
68
+ * name within each level so downstream truncation is deterministic.
52
69
  */
53
- function buildTree(paths: string[], maxDepth: number): TreeNode[] {
54
- const root: Map<string, TreeNode> = new Map();
70
+ export function buildTree(paths: string[], maxDepth: number): TreeNode[] {
71
+ interface MutableNode {
72
+ name: string;
73
+ full_path: string;
74
+ is_file: boolean;
75
+ children: Map<string, MutableNode>;
76
+ }
77
+ const root = new Map<string, MutableNode>();
55
78
  for (const path of paths) {
56
79
  const segs = path.split("/").filter(Boolean);
80
+ if (segs.length === 0) continue;
57
81
  let level = root;
58
82
  const trail: string[] = [];
59
- for (let i = 0; i < segs.length && i < maxDepth; i++) {
83
+ const stop = Math.min(segs.length, maxDepth);
84
+ for (let i = 0; i < stop; i++) {
60
85
  const seg = segs[i]!;
61
86
  trail.push(seg);
62
- const fullPath = trail.join("/");
63
87
  let node = level.get(seg);
64
88
  if (!node) {
65
- node = { name: seg, full_path: fullPath, is_file: i === segs.length - 1 };
89
+ node = { name: seg, full_path: trail.join("/"), is_file: false, children: new Map() };
66
90
  level.set(seg, node);
67
- } else if (i === segs.length - 1) {
68
- node.is_file = true;
69
- }
70
- if (i < segs.length - 1) {
71
- if (!node.children) node.children = [];
72
- const childMap = new Map(node.children.map((c) => [c.name, c] as const));
73
- node.children = [...childMap.values()];
74
- level = childMap;
75
- if (childMap.size === 0) {
76
- level = new Map();
77
- node.children = [];
78
- } else {
79
- // rebuild level pointer
80
- level = new Map(node.children.map((c) => [c.name, c] as const));
81
- }
82
91
  }
92
+ const isTerminal = i === segs.length - 1 || i === maxDepth - 1;
93
+ if (isTerminal) node.is_file = true;
94
+ level = node.children;
83
95
  }
84
96
  }
85
- return [...root.values()].sort((a, b) => a.name.localeCompare(b.name));
97
+ const finalize = (m: Map<string, MutableNode>): TreeNode[] => {
98
+ const arr = [...m.values()].sort((a, b) => a.name.localeCompare(b.name));
99
+ return arr.map((n) => {
100
+ const out: TreeNode = { name: n.name, full_path: n.full_path, is_file: n.is_file };
101
+ if (n.children.size > 0) out.children = finalize(n.children);
102
+ return out;
103
+ });
104
+ };
105
+ return finalize(root);
106
+ }
107
+
108
+ /**
109
+ * Trim each child list (and the root list) to `maxItems`, mutating in place.
110
+ * Returns the number of root entries dropped; per-node drops are recorded on
111
+ * `node.children_truncated`. Input is assumed pre-sorted (by `buildTree`) so
112
+ * "first N" is stable.
113
+ */
114
+ export function truncateTree(nodes: TreeNode[], maxItems: number): number {
115
+ for (const node of nodes) {
116
+ if (node.children?.length) {
117
+ const dropped = truncateTree(node.children, maxItems);
118
+ if (dropped > 0) node.children_truncated = dropped;
119
+ }
120
+ }
121
+ if (nodes.length > maxItems) {
122
+ const dropped = nodes.length - maxItems;
123
+ nodes.length = maxItems;
124
+ return dropped;
125
+ }
126
+ return 0;
86
127
  }
87
128
 
88
129
  /**
89
130
  * Walk a tree and append `├── name` / `└── name` lines with proper continuation
90
- * prefixes. Directories are rendered in cyan-bold; files in plain text.
131
+ * prefixes. Directories are rendered in cyan-bold; files in plain text. When a
132
+ * level was truncated, a dim trailing `+N more` line is appended at that level.
91
133
  */
92
- function renderNodes(nodes: TreeNode[], prefix: string, out: string[]): void {
93
- const sorted = [...nodes].sort((a, b) => a.name.localeCompare(b.name));
94
- sorted.forEach((node, i) => {
95
- const last = i === sorted.length - 1;
134
+ function renderNodes(nodes: TreeNode[], prefix: string, out: string[], truncatedCount = 0): void {
135
+ nodes.forEach((node, i) => {
136
+ const last = i === nodes.length - 1 && truncatedCount === 0;
96
137
  const branch = last ? "└── " : "├── ";
97
138
  const label = node.is_file && !node.children?.length ? node.name : colors.cyan(colors.bold(node.name));
98
139
  out.push(`${prefix}${branch}${label}`);
99
140
  if (node.children?.length) {
100
- renderNodes(node.children, prefix + (last ? " " : "│ "), out);
141
+ renderNodes(node.children, prefix + (last ? " " : "│ "), out, node.children_truncated ?? 0);
101
142
  }
102
143
  });
144
+ if (truncatedCount > 0) {
145
+ out.push(`${prefix}└── ${colors.dim(`+${truncatedCount} more`)}`);
146
+ }
103
147
  }