membot 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ membot search "<question>" # hybrid search (semantic + keyword)
29
29
  membot add ./README.md # single file
30
30
  membot add ./docs # recursive directory walk
31
31
  membot add "docs/**/*.md" # glob
32
+ membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
32
33
  membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
33
34
  membot add "inline:Decision: use X because Y" # literal text
34
35
  membot add ./docs --refresh-frequency 24h # auto-refresh every day
@@ -74,7 +75,8 @@ Inline writes create a new `(logical_path, version_id)` row just like file inges
74
75
  membot refresh <logical_path> # re-read source; new version only if bytes changed
75
76
  membot refresh # refresh all rows whose schedule has elapsed
76
77
  membot mv old/path new/path # rename (history preserved under both)
77
- membot rm <logical_path> # tombstone (history still queryable)
78
+ membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
79
+ membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
78
80
  membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
79
81
  ```
80
82
 
@@ -108,7 +110,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
108
110
 
109
111
  | Command | Purpose |
110
112
  | ------------------------------------- | ------------------------------------------------------------------------------ |
111
- | `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
113
+ | `membot add <sources...>` | Ingest one or more files, directories, globs, URLs, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
112
114
  | `membot ls [prefix]` | List current files (size, mime, refresh status) |
113
115
  | `membot tree [prefix]` | Render the synthesised logical-path tree (`--max-depth`, `--max-items` cap output) |
114
116
  | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
@@ -118,7 +120,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
118
120
  | `membot versions <path>` | List every version newest-first with version_id and change notes |
119
121
  | `membot diff <path> --a <ts>` | Unified diff between two versions |
120
122
  | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
121
- | `membot rm <path>` | Tombstone a logical_path (history still queryable) |
123
+ | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
122
124
  | `membot refresh [path]` | Re-read source; create new version only if bytes changed |
123
125
  | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
124
126
  | `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
@@ -29,6 +29,7 @@ membot search "<question>" # hybrid search (semantic + keyword)
29
29
  membot add ./README.md # single file
30
30
  membot add ./docs # recursive directory walk
31
31
  membot add "docs/**/*.md" # glob
32
+ membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
32
33
  membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
33
34
  membot add "inline:Decision: use X because Y" # literal text
34
35
  membot add ./docs --refresh-frequency 24h # auto-refresh every day
@@ -74,7 +75,8 @@ Inline writes create a new `(logical_path, version_id)` row just like file inges
74
75
  membot refresh <logical_path> # re-read source; new version only if bytes changed
75
76
  membot refresh # refresh all rows whose schedule has elapsed
76
77
  membot mv old/path new/path # rename (history preserved under both)
77
- membot rm <logical_path> # tombstone (history still queryable)
78
+ membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
79
+ membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
78
80
  membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
79
81
  ```
80
82
 
@@ -108,7 +110,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
108
110
 
109
111
  | Command | Purpose |
110
112
  | ------------------------------------- | ------------------------------------------------------------------------------ |
111
- | `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
113
+ | `membot add <sources...>` | Ingest one or more files, directories, globs, URLs, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
112
114
  | `membot ls [prefix]` | List current files (size, mime, refresh status) |
113
115
  | `membot tree [prefix]` | Render the synthesised logical-path tree (`--max-depth`, `--max-items` cap output) |
114
116
  | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
@@ -118,7 +120,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
118
120
  | `membot versions <path>` | List every version newest-first with version_id and change notes |
119
121
  | `membot diff <path> --a <ts>` | Unified diff between two versions |
120
122
  | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
121
- | `membot rm <path>` | Tombstone a logical_path (history still queryable) |
123
+ | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
122
124
  | `membot refresh [path]` | Re-read source; create new version only if bytes changed |
123
125
  | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
124
126
  | `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
package/README.md CHANGED
@@ -25,12 +25,13 @@ This pulls in DuckDB's per-platform native bindings alongside membot. The build
25
25
  ## Quick start
26
26
 
27
27
  ```bash
28
- membot add ./docs # ingest a directory recursively
29
- membot add https://example.com/spec.pdf # ingest a URL (auto-converted to markdown)
30
- membot ls # list current files
31
- membot search "how does refresh work?" # hybrid search
32
- membot read docs/refresh.md # read the markdown surrogate
33
- membot serve # expose the same operations as MCP tools (stdio)
28
+ membot add ./docs # ingest a directory recursively
29
+ membot add https://example.com/spec.pdf # ingest a URL (auto-converted to markdown)
30
+ membot add a.md b.md "docs/**/*.md" # any number of files / globs in one call
31
+ membot ls # list current files
32
+ membot search "how does refresh work?" # hybrid search
33
+ membot read docs/refresh.md # read the markdown surrogate
34
+ membot serve # expose the same operations as MCP tools (stdio)
34
35
  ```
35
36
 
36
37
  ## Use with Claude Code or Cursor
@@ -50,7 +51,7 @@ The skill files describe the discover → ingest → search → read → write w
50
51
 
51
52
  | Command | Description |
52
53
  | ------------------------------- | --------------------------------------------------------------------------------- |
53
- | `membot add <source>` | Ingest a file, directory, glob, URL, or `inline:<text>`. Default `logical_path` mirrors the source (absolute path for local files, `remotes/{host}/{path}` for URLs) so files with the same basename in different projects don't collide. Pass `-p <path>` to override or, on a directory walk, to set a prefix. Skips on unchanged source bytes; pass `--force` to re-ingest. |
54
+ | `membot add <sources...>` | Ingest one or more files, directories, globs, URLs, or `inline:<text>`. Default `logical_path` mirrors the source (absolute path for local files, `remotes/{host}/{path}` for URLs) so files with the same basename in different projects don't collide. Pass `-p <path>` to override or set a prefix. Skips unchanged source bytes; pass `--force` to re-ingest. |
54
55
  | `membot ls [prefix]` | List current files (size, mime, refresh status) |
55
56
  | `membot tree [prefix]` | Render the synthesised logical-path tree (`--max-depth`, `--max-items` cap output) |
56
57
  | `membot read <path>` | Read the markdown surrogate (or `--bytes` for original bytes, base64) |
@@ -60,7 +61,7 @@ The skill files describe the discover → ingest → search → read → write w
60
61
  | `membot diff <path> <a> [b]` | Unified diff between two versions |
61
62
  | `membot write <path>` | Write inline agent-authored markdown as a new version |
62
63
  | `membot mv <from> <to>` | Rename a logical_path (history preserved under both) |
63
- | `membot rm <path>` | Tombstone a logical_path (history still queryable) |
64
+ | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
64
65
  | `membot refresh [path]` | Re-read source; new version only if bytes changed |
65
66
  | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
66
67
  | `membot serve` | Run the MCP server (stdio default; `--http <port>` for HTTP) |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "membot",
3
- "version": "0.2.1",
3
+ "version": "0.3.1",
4
4
  "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -22,11 +22,22 @@ export function applySchemaToCommand<S extends z.ZodObject>(
22
22
  const shape = schema.shape;
23
23
  const positionalOrder = options.positional ?? [];
24
24
 
25
- for (const fieldName of positionalOrder) {
25
+ for (let i = 0; i < positionalOrder.length; i++) {
26
+ const fieldName = positionalOrder[i];
27
+ if (fieldName === undefined) continue;
26
28
  const fieldSchema = shape[fieldName];
27
29
  if (!fieldSchema) continue;
28
30
  const required = !isOptional(fieldSchema);
29
- const label = required ? `<${fieldName}>` : `[${fieldName}]`;
31
+ const variadic = unwrap(fieldSchema) instanceof z.ZodArray;
32
+ if (variadic && i !== positionalOrder.length - 1) {
33
+ throw new HelpfulError({
34
+ kind: "internal_error",
35
+ message: `variadic positional \`${fieldName}\` must be the last positional argument`,
36
+ hint: "Reorder the operation's `cli.positional` so the array-typed field comes last.",
37
+ });
38
+ }
39
+ const inner = variadic ? `${fieldName}...` : fieldName;
40
+ const label = required ? `<${inner}>` : `[${inner}]`;
30
41
  cmd.argument(label, describeOf(fieldSchema));
31
42
  }
32
43
 
@@ -8,23 +8,33 @@ const FetcherKindEnum = z.enum(["http", "mcpx", "local", "inline"]);
8
8
  export const addOperation = defineOperation({
9
9
  name: "membot_add",
10
10
  cliName: "add",
11
- description: `Ingest one or many sources into the store. \`source\` accepts:
11
+ description: `Ingest one or many sources into the store. Each \`sources\` arg accepts:
12
12
  - a local file path
13
13
  - a local directory (recursive walk, symlinks followed)
14
14
  - a glob pattern (e.g. "docs/**/*.md")
15
15
  - a URL (fetched via mcpx if configured, otherwise plain HTTP)
16
16
  - "inline:<text>" literal
17
- PDF, DOCX, HTML, images, and other binaries are converted to markdown — native libraries first, vision/OCR for images, LLM fallback for messy or scanned input. Original bytes are kept in the blobs table; \`membot_read bytes=true\` returns them. Setting \`refresh_frequency\` enables automatic refresh from the daemon. By default, re-ingesting an unchanged source (same source_sha256 as the current version) is a no-op and reports \`status: "unchanged"\`; pass \`force=true\` to always create a new version. Each newly-ingested file becomes a new version under its own logical_path; existing versions stay queryable via membot_versions. Directory/glob ingests stream one file at a time — partial failures do not abort the rest; the response lists per-entry status.
17
+ Pass any number of args; each is resolved independently and the matched entries are concatenated into one response. PDF, DOCX, HTML, images, and other binaries are converted to markdown — native libraries first, vision/OCR for images, LLM fallback for messy or scanned input. Original bytes are kept in the blobs table; \`membot_read bytes=true\` returns them. Setting \`refresh_frequency\` enables automatic refresh from the daemon. By default, re-ingesting an unchanged source (same source_sha256 as the current version) is a no-op and reports \`status: "unchanged"\`; pass \`force=true\` to always create a new version. Each newly-ingested file becomes a new version under its own logical_path; existing versions stay queryable via membot_versions. Directory/glob ingests stream one file at a time — partial failures do not abort the rest; the response lists per-entry status.
18
18
 
19
19
  When \`logical_path\` is omitted, it is derived from the source so files with the same basename in different projects do not collide:
20
20
  - Local sources use the entry's absolute filesystem path with the leading "/" stripped (e.g. "/Users/me/projA/README.md" → "Users/me/projA/README.md").
21
21
  - URLs use "remotes/{host}/{path}" with slashes preserved (e.g. "https://github.com/u/p/blob/main/README.md" → "remotes/github.com/u/p/blob/main/README.md"). Query strings and fragments are dropped from the logical_path; the full URL is still stored on the row for refresh.
22
22
  - "inline:<text>" defaults to "inline/{timestamp}.md".
23
23
 
24
- Pass \`logical_path\` to override. For a directory or glob walk it is treated as a PREFIX — each entry is placed at "{prefix}/{path-relative-to-walk-base}". Re-running \`membot_add\` on the same source resolves to the same logical_path; if bytes are unchanged the call is a no-op (status \`unchanged\`), otherwise a new version is created.`,
24
+ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it is treated as a PREFIX — each entry is placed at "{prefix}/{path-relative-to-walk-base}". Re-running \`membot_add\` on the same source resolves to the same logical_path; if bytes are unchanged the call is a no-op (status \`unchanged\`), otherwise a new version is created.`,
25
25
  inputSchema: z.object({
26
- source: z.string().describe("Local path, directory, glob, URL, or `inline:<text>` literal"),
27
- logical_path: z.string().optional().describe("Destination logical_path (single source) or prefix (directory/glob)"),
26
+ sources: z
27
+ .array(z.string())
28
+ .min(1)
29
+ .describe(
30
+ "One or more sources. Each arg is independently resolved as a local path, directory, glob, URL, or `inline:<text>` literal.",
31
+ ),
32
+ logical_path: z
33
+ .string()
34
+ .optional()
35
+ .describe(
36
+ "Destination logical_path (single source resolving to a single entry) or prefix (multi-arg / directory / glob)",
37
+ ),
28
38
  include: z
29
39
  .string()
30
40
  .optional()
@@ -67,7 +77,7 @@ Pass \`logical_path\` to override. For a directory or glob walk it is treated as
67
77
  failed: z.number(),
68
78
  }),
69
79
  cli: {
70
- positional: ["source"],
80
+ positional: ["sources"],
71
81
  aliases: { logical_path: "-p", refresh_frequency: "-r", change_note: "-m", force: "-f" },
72
82
  },
73
83
  console_formatter: (result) => {
@@ -85,5 +95,23 @@ Pass \`logical_path\` to override. For a directory or glob walk it is treated as
85
95
  if (result.failed > 0) parts.push(colors.red(`failed ${result.failed}`));
86
96
  return `${lines.join("\n")}\n${parts.join(", ")}`;
87
97
  },
88
- handler: async (input, ctx) => ingest(input, ctx),
98
+ handler: async (input, ctx) => {
99
+ const { sources, ...rest } = input;
100
+ const aggregated = {
101
+ ingested: [] as Awaited<ReturnType<typeof ingest>>["ingested"],
102
+ total: 0,
103
+ ok: 0,
104
+ unchanged: 0,
105
+ failed: 0,
106
+ };
107
+ for (const source of sources) {
108
+ const r = await ingest({ ...rest, source }, ctx);
109
+ aggregated.ingested.push(...r.ingested);
110
+ aggregated.total += r.total;
111
+ aggregated.ok += r.ok;
112
+ aggregated.unchanged += r.unchanged;
113
+ aggregated.failed += r.failed;
114
+ }
115
+ return aggregated;
116
+ },
89
117
  });
@@ -1,6 +1,8 @@
1
+ import picomatch from "picomatch";
1
2
  import { z } from "zod";
2
- import { getCurrent, tombstone } from "../db/files.ts";
3
- import { HelpfulError } from "../errors.ts";
3
+ import { listAllCurrentPaths, tombstone } from "../db/files.ts";
4
+ import { asHelpful, HelpfulError } from "../errors.ts";
5
+ import { isGlob } from "../ingest/source-resolver.ts";
4
6
  import { colors } from "../output/formatter.ts";
5
7
  import { defineOperation } from "./types.ts";
6
8
 
@@ -8,28 +10,85 @@ export const removeOperation = defineOperation({
8
10
  name: "membot_delete",
9
11
  cliName: "rm",
10
12
  bashEquivalent: "rm",
11
- description: `Tombstone a logical_path so it no longer appears in membot_list / membot_tree / membot_search. Old versions remain queryable via membot_versions and membot_read with an explicit version. Use membot_prune to permanently drop history.`,
13
+ description: `Tombstone one or more logical_paths so they no longer appear in membot_list / membot_tree / membot_search. Each \`paths\` arg is independently treated as either a literal logical_path or a glob pattern (e.g. "docs/**/*.md"); globs are matched against current logical_paths in the DB, not the filesystem. The union of matches is deduplicated, then tombstoned one at a time — partial failures are reported per-entry without aborting the rest. An input arg that matches zero current files is an error (the response includes which arg). Old versions remain queryable via membot_versions and membot_read with an explicit version. Use membot_prune to permanently drop history.`,
12
14
  inputSchema: z.object({
13
- logical_path: z.string().describe("Path to tombstone"),
15
+ paths: z
16
+ .array(z.string())
17
+ .min(1)
18
+ .describe(
19
+ 'One or more logical_paths or glob patterns (e.g. "docs/**/*.md"). Each arg is matched independently against current logical_paths in the DB.',
20
+ ),
14
21
  change_note: z.string().optional().describe("Why this is being deleted"),
15
22
  }),
16
23
  outputSchema: z.object({
17
- logical_path: z.string(),
18
- tombstone_version_id: z.string(),
24
+ removed: z.array(
25
+ z.object({
26
+ logical_path: z.string(),
27
+ version_id: z.string().nullable(),
28
+ status: z.enum(["ok", "failed"]),
29
+ error: z.string().optional(),
30
+ }),
31
+ ),
32
+ total: z.number(),
33
+ ok: z.number(),
34
+ failed: z.number(),
19
35
  }),
20
- cli: { positional: ["logical_path"], aliases: { change_note: "-m" } },
21
- console_formatter: (result) =>
22
- `${colors.green("✓")} tombstoned ${colors.cyan(result.logical_path)} ${colors.dim(`@ ${result.tombstone_version_id}`)}`,
36
+ cli: { positional: ["paths"], aliases: { change_note: "-m" } },
37
+ console_formatter: (result) => {
38
+ const lines = result.removed.map((e) =>
39
+ e.status === "ok"
40
+ ? `${colors.green("✓")} tombstoned ${colors.cyan(e.logical_path)} ${colors.dim(`@ ${e.version_id}`)}`
41
+ : `${colors.red("✗")} ${e.logical_path} ${colors.dim(e.error ?? "")}`,
42
+ );
43
+ const summary = result.failed
44
+ ? `${colors.green(`removed ${result.ok}`)}, ${colors.red(`failed ${result.failed}`)}`
45
+ : colors.green(`removed ${result.ok}`);
46
+ return `${lines.join("\n")}\n${summary}`;
47
+ },
23
48
  handler: async (input, ctx) => {
24
- const cur = await getCurrent(ctx.db, input.logical_path);
25
- if (!cur) {
26
- throw new HelpfulError({
27
- kind: "not_found",
28
- message: `${input.logical_path} doesn't exist (or is already tombstoned)`,
29
- hint: `Run \`membot ls\` to see active paths, or \`membot versions ${input.logical_path}\` to see history.`,
30
- });
49
+ const currentPaths = await listAllCurrentPaths(ctx.db);
50
+ const currentSet = new Set(currentPaths);
51
+ const targets = new Set<string>();
52
+
53
+ for (const arg of input.paths) {
54
+ const matches: string[] = [];
55
+ if (isGlob(arg)) {
56
+ const isMatch = picomatch(arg, { dot: true });
57
+ for (const p of currentPaths) {
58
+ if (isMatch(p)) matches.push(p);
59
+ }
60
+ } else if (currentSet.has(arg)) {
61
+ matches.push(arg);
62
+ }
63
+ if (matches.length === 0) {
64
+ throw new HelpfulError({
65
+ kind: "not_found",
66
+ message: `no current files match \`${arg}\``,
67
+ hint: "Run `membot ls` to see active paths, or pass a different glob.",
68
+ });
69
+ }
70
+ for (const m of matches) targets.add(m);
31
71
  }
32
- const v = await tombstone(ctx.db, input.logical_path, input.change_note ?? "deleted");
33
- return { logical_path: input.logical_path, tombstone_version_id: v };
72
+
73
+ const note = input.change_note ?? "deleted";
74
+ const removed: { logical_path: string; version_id: string | null; status: "ok" | "failed"; error?: string }[] = [];
75
+ for (const path of targets) {
76
+ try {
77
+ const versionId = await tombstone(ctx.db, path, note);
78
+ removed.push({ logical_path: path, version_id: versionId, status: "ok" });
79
+ } catch (err) {
80
+ const helpful = asHelpful(err, `while tombstoning ${path}`, "Re-run with --verbose to see the cause.");
81
+ removed.push({
82
+ logical_path: path,
83
+ version_id: null,
84
+ status: "failed",
85
+ error: helpful.message,
86
+ });
87
+ }
88
+ }
89
+
90
+ const ok = removed.filter((r) => r.status === "ok").length;
91
+ const failed = removed.length - ok;
92
+ return { removed, total: removed.length, ok, failed };
34
93
  },
35
94
  });
@@ -1,4 +1,5 @@
1
1
  import { z } from "zod";
2
+ import { HelpfulError } from "../errors.ts";
2
3
  import { embedSingle } from "../ingest/embedder.ts";
3
4
  import { colors } from "../output/formatter.ts";
4
5
  import { fuseRRF } from "../search/hybrid.ts";
@@ -52,6 +53,14 @@ export const searchOperation = defineOperation({
52
53
  const query = input.query ?? input.pattern ?? "";
53
54
  const pattern = input.pattern ?? input.query ?? "";
54
55
 
56
+ if (!query.trim() && !pattern.trim()) {
57
+ throw new HelpfulError({
58
+ kind: "input_error",
59
+ message: "search requires a query or pattern",
60
+ hint: 'Pass a natural-language query (e.g. `membot search "oauth flow"`) or a keyword pattern (e.g. `membot search --pattern OAuth`).',
61
+ });
62
+ }
63
+
55
64
  const semanticHits =
56
65
  input.mode === "keyword" || !query.trim()
57
66
  ? []