membot 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.claude/skills/membot.md +137 -0
  2. package/.cursor/rules/membot.mdc +137 -0
  3. package/README.md +131 -0
  4. package/package.json +83 -24
  5. package/patches/@huggingface%2Ftransformers@4.2.0.patch +137 -0
  6. package/scripts/apply-transformers-patch.sh +35 -0
  7. package/src/cli.ts +72 -0
  8. package/src/commands/check-update.ts +69 -0
  9. package/src/commands/mcpx.ts +112 -0
  10. package/src/commands/reindex.ts +53 -0
  11. package/src/commands/serve.ts +58 -0
  12. package/src/commands/skill.ts +131 -0
  13. package/src/commands/upgrade.ts +220 -0
  14. package/src/config/loader.ts +100 -0
  15. package/src/config/schemas.ts +39 -0
  16. package/src/constants.ts +42 -0
  17. package/src/context.ts +80 -0
  18. package/src/db/blobs.ts +53 -0
  19. package/src/db/chunks.ts +176 -0
  20. package/src/db/connection.ts +173 -0
  21. package/src/db/files.ts +325 -0
  22. package/src/db/migrations/001-init.ts +63 -0
  23. package/src/db/migrations/002-fts.ts +12 -0
  24. package/src/db/migrations.ts +45 -0
  25. package/src/errors.ts +87 -0
  26. package/src/ingest/chunker.ts +117 -0
  27. package/src/ingest/converter/docx.ts +15 -0
  28. package/src/ingest/converter/html.ts +20 -0
  29. package/src/ingest/converter/image.ts +71 -0
  30. package/src/ingest/converter/index.ts +119 -0
  31. package/src/ingest/converter/llm.ts +66 -0
  32. package/src/ingest/converter/ocr.ts +51 -0
  33. package/src/ingest/converter/pdf.ts +38 -0
  34. package/src/ingest/converter/text.ts +8 -0
  35. package/src/ingest/describer.ts +72 -0
  36. package/src/ingest/embedder.ts +98 -0
  37. package/src/ingest/fetcher.ts +280 -0
  38. package/src/ingest/ingest.ts +444 -0
  39. package/src/ingest/local-reader.ts +64 -0
  40. package/src/ingest/search-text.ts +18 -0
  41. package/src/ingest/source-resolver.ts +186 -0
  42. package/src/mcp/instructions.ts +34 -0
  43. package/src/mcp/server.ts +101 -0
  44. package/src/mount/commander.ts +174 -0
  45. package/src/mount/mcp.ts +111 -0
  46. package/src/mount/zod-to-cli.ts +158 -0
  47. package/src/operations/add.ts +69 -0
  48. package/src/operations/diff.ts +105 -0
  49. package/src/operations/index.ts +38 -0
  50. package/src/operations/info.ts +95 -0
  51. package/src/operations/list.ts +87 -0
  52. package/src/operations/move.ts +83 -0
  53. package/src/operations/prune.ts +80 -0
  54. package/src/operations/read.ts +102 -0
  55. package/src/operations/refresh.ts +72 -0
  56. package/src/operations/remove.ts +35 -0
  57. package/src/operations/search.ts +72 -0
  58. package/src/operations/tree.ts +103 -0
  59. package/src/operations/types.ts +81 -0
  60. package/src/operations/versions.ts +78 -0
  61. package/src/operations/write.ts +77 -0
  62. package/src/output/formatter.ts +68 -0
  63. package/src/output/logger.ts +114 -0
  64. package/src/output/progress.ts +78 -0
  65. package/src/output/tty.ts +91 -0
  66. package/src/refresh/runner.ts +296 -0
  67. package/src/refresh/scheduler.ts +54 -0
  68. package/src/sdk.ts +27 -0
  69. package/src/search/hybrid.ts +100 -0
  70. package/src/search/keyword.ts +62 -0
  71. package/src/search/semantic.ts +56 -0
  72. package/src/types/text-modules.d.ts +9 -0
  73. package/src/update/background.ts +73 -0
  74. package/src/update/cache.ts +40 -0
  75. package/src/update/checker.ts +117 -0
  76. package/.claude/settings.local.json +0 -7
  77. package/CLAUDE.md +0 -139
  78. package/docs/plan.md +0 -905
@@ -0,0 +1,137 @@
1
+ ---
2
+ name: membot
3
+ description: Persistent, versioned context store for AI agents — ingest, search, read, and write knowledge via the membot CLI or MCP server
4
+ trigger: when the user wants to remember, recall, or search project knowledge, ingest documents into a long-lived store, or surface relevant context for a task
5
+ ---
6
+
7
+ # membot — Persistent Context for Agents
8
+
9
+ You have access to a long-lived context store via `membot`. Files (markdown, PDFs, DOCX, HTML, URLs, agent notes) are ingested, converted to markdown, chunked, embedded locally, and indexed in DuckDB with hybrid search (semantic + BM25). Every artifact is addressed by a virtual `logical_path`. Every change creates a new immutable version — nothing is overwritten in place.
10
+
11
+ Use this workflow:
12
+
13
+ ## 1. Discover what's already there
14
+
15
+ Before ingesting, check whether the knowledge already exists.
16
+
17
+ ```bash
18
+ membot tree # synthesised directory tree of logical_paths
19
+ membot ls # one row per current file (size, mime, refresh status)
20
+ membot ls docs/ # filter by prefix
21
+ membot search "<question>" # hybrid search (semantic + keyword)
22
+ ```
23
+
24
+ `search` is the primary discovery tool — prefer it over scanning files.
25
+
26
+ ## 2. Ingest
27
+
28
+ ```bash
29
+ membot add ./README.md # single file
30
+ membot add ./docs # recursive directory walk
31
+ membot add "docs/**/*.md" # glob
32
+ membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
33
+ membot add "inline:Decision: use X because Y" # literal text
34
+ membot add ./docs --refresh-frequency 24h # auto-refresh every day
35
+ ```
36
+
37
+ Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
38
+
39
+ ## 3. Read
40
+
41
+ ```bash
42
+ membot read <logical_path> # current markdown surrogate
43
+ membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
44
+ membot read <logical_path> --version <ts> # historical snapshot
45
+ membot info <logical_path> # metadata only (no content)
46
+ membot versions <logical_path> # every version, newest first
47
+ membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
48
+ ```
49
+
50
+ Defaults to the current (non-tombstoned) version. Pass `--version` only when you need history.
51
+
52
+ ## 4. Write your own notes
53
+
54
+ Persist agent-authored summaries, decisions, or synthesised context so they survive across conversations:
55
+
56
+ ```bash
57
+ membot write notes/decision-2026-05.md --content "Decided to ..."
58
+ ```
59
+
60
+ Inline writes create a new `(logical_path, version_id)` row just like file ingests — `membot versions` lists them, `membot diff` compares them. To mirror an external doc that should re-fetch over time, use `membot add <url> --refresh-frequency` instead.
61
+
62
+ ## 5. Refresh, rename, delete, prune
63
+
64
+ ```bash
65
+ membot refresh <logical_path> # re-read source; new version only if bytes changed
66
+ membot refresh # refresh all rows whose schedule has elapsed
67
+ membot mv old/path new/path # rename (history preserved under both)
68
+ membot rm <logical_path> # tombstone (history still queryable)
69
+ membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
70
+ ```
71
+
72
+ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --version <ts>` still work. Pruning is the only way to actually remove data.
73
+
74
+ ## Versioning rules
75
+
76
+ - Defaults always operate on the current, non-tombstoned version.
77
+ - Pass an explicit `--version <timestamp>` (from `membot versions`) to read or diff history.
78
+ - `membot_add`, refresh-with-changes, `write`, and `mv` each create a new version. The previous version is preserved.
79
+ - Mutating an existing version is not possible — corrections are new versions.
80
+
81
+ ## When to use this skill
82
+
83
+ - The user asks to remember, recall, save, or look up something across conversations.
84
+ - You need project-specific context (specs, decisions, transcripts, rendered docs) that's larger than fits in the prompt.
85
+ - You need to ingest a document (PDF, DOCX, HTML, URL) and reason over it.
86
+ - You're producing a summary or decision that should survive past this conversation.
87
+
88
+ ## When NOT to use this skill
89
+
90
+ - Reading a file the user just pointed at — use the regular file-read tool unless they want it persisted.
91
+ - Storing secrets, credentials, or anything that shouldn't sit in `~/.membot/index.duckdb`.
92
+ - Quick scratch state for the current turn — keep that in the conversation.
93
+
94
+ ## MCP server
95
+
96
+ `membot serve` exposes the same operations as MCP tools (`membot_add`, `membot_search`, etc.) over stdio (default) or HTTP (`--http <port>`). When connected, prefer the MCP tools over shelling out — they return structured `outputSchema` data with `version_id` echoed on every read.
97
+
98
+ ## Available commands
99
+
100
+ | Command | Purpose |
101
+ | ------------------------------------- | ------------------------------------------------------------------------------ |
102
+ | `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>` (one new version each) |
103
+ | `membot ls [prefix]` | List current files (size, mime, refresh status) |
104
+ | `membot tree [prefix]` | Render the synthesised logical-path tree |
105
+ | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
106
+ | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
107
+ | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
108
+ | `membot info <path>` | Inspect metadata (source, fetcher, refresh schedule, digests) without content |
109
+ | `membot versions <path>` | List every version newest-first with version_id and change notes |
110
+ | `membot diff <path> --a <ts>` | Unified diff between two versions |
111
+ | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
112
+ | `membot rm <path>` | Tombstone a logical_path (history still queryable) |
113
+ | `membot refresh [path]` | Re-read source; create new version only if bytes changed |
114
+ | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
115
+ | `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
116
+ | `membot reindex` | Rebuild the FTS keyword index over current chunks |
117
+
118
+ ## Output formats
119
+
120
+ - TTY → spinners, colors, tables. `--no-color` disables ANSI.
121
+ - Piped, `--json`, `CI=true`, or `NO_COLOR` → JSON to stdout, structured logs to stderr, no ANSI bytes.
122
+ - Use `--json` when parsing output programmatically (it's automatic when piped, but explicit is safer).
123
+ - Use `--verbose` if a command fails unexpectedly.
124
+
125
+ ## Troubleshooting
126
+
127
+ - **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
128
+ - **"refresh failed: auth"** → The original fetch used an authenticated mcpx tool; re-auth via `mcpx auth <server>`.
129
+ - **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
130
+ - **Stale results after manual DB edits** → `membot reindex`.
131
+ - **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
132
+
133
+ ## Configuration
134
+
135
+ - Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
136
+ - Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
137
+ - Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
@@ -0,0 +1,137 @@
1
+ ---
2
+ description: Persistent, versioned context store for AI agents — ingest, search, read, and write knowledge via the membot CLI or MCP server
3
+ globs:
4
+ alwaysApply: true
5
+ ---
6
+
7
+ # membot — Persistent Context for Agents
8
+
9
+ You have access to a long-lived context store via `membot`. Files (markdown, PDFs, DOCX, HTML, URLs, agent notes) are ingested, converted to markdown, chunked, embedded locally, and indexed in DuckDB with hybrid search (semantic + BM25). Every artifact is addressed by a virtual `logical_path`. Every change creates a new immutable version — nothing is overwritten in place.
10
+
11
+ Use this workflow:
12
+
13
+ ## 1. Discover what's already there
14
+
15
+ Before ingesting, check whether the knowledge already exists.
16
+
17
+ ```bash
18
+ membot tree # synthesised directory tree of logical_paths
19
+ membot ls # one row per current file (size, mime, refresh status)
20
+ membot ls docs/ # filter by prefix
21
+ membot search "<question>" # hybrid search (semantic + keyword)
22
+ ```
23
+
24
+ `search` is the primary discovery tool — prefer it over scanning files.
25
+
26
+ ## 2. Ingest
27
+
28
+ ```bash
29
+ membot add ./README.md # single file
30
+ membot add ./docs # recursive directory walk
31
+ membot add "docs/**/*.md" # glob
32
+ membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
33
+ membot add "inline:Decision: use X because Y" # literal text
34
+ membot add ./docs --refresh-frequency 24h # auto-refresh every day
35
+ ```
36
+
37
+ Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
38
+
39
+ ## 3. Read
40
+
41
+ ```bash
42
+ membot read <logical_path> # current markdown surrogate
43
+ membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
44
+ membot read <logical_path> --version <ts> # historical snapshot
45
+ membot info <logical_path> # metadata only (no content)
46
+ membot versions <logical_path> # every version, newest first
47
+ membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
48
+ ```
49
+
50
+ Defaults to the current (non-tombstoned) version. Pass `--version` only when you need history.
51
+
52
+ ## 4. Write your own notes
53
+
54
+ Persist agent-authored summaries, decisions, or synthesised context so they survive across conversations:
55
+
56
+ ```bash
57
+ membot write notes/decision-2026-05.md --content "Decided to ..."
58
+ ```
59
+
60
+ Inline writes create a new `(logical_path, version_id)` row just like file ingests — `membot versions` lists them, `membot diff` compares them. To mirror an external doc that should re-fetch over time, use `membot add <url> --refresh-frequency` instead.
61
+
62
+ ## 5. Refresh, rename, delete, prune
63
+
64
+ ```bash
65
+ membot refresh <logical_path> # re-read source; new version only if bytes changed
66
+ membot refresh # refresh all rows whose schedule has elapsed
67
+ membot mv old/path new/path # rename (history preserved under both)
68
+ membot rm <logical_path> # tombstone (history still queryable)
69
+ membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
70
+ ```
71
+
72
+ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --version <ts>` still work. Pruning is the only way to actually remove data.
73
+
74
+ ## Versioning rules
75
+
76
+ - Defaults always operate on the current, non-tombstoned version.
77
+ - Pass an explicit `--version <timestamp>` (from `membot versions`) to read or diff history.
78
+ - `membot_add`, refresh-with-changes, `write`, and `mv` each create a new version. The previous version is preserved.
79
+ - Mutating an existing version is not possible — corrections are new versions.
80
+
81
+ ## When to use this rule
82
+
83
+ - The user asks to remember, recall, save, or look up something across conversations.
84
+ - You need project-specific context (specs, decisions, transcripts, rendered docs) that's larger than fits in the prompt.
85
+ - You need to ingest a document (PDF, DOCX, HTML, URL) and reason over it.
86
+ - You're producing a summary or decision that should survive past this conversation.
87
+
88
+ ## When NOT to use this rule
89
+
90
+ - Reading a file the user just pointed at — use the regular file-read tool unless they want it persisted.
91
+ - Storing secrets, credentials, or anything that shouldn't sit in `~/.membot/index.duckdb`.
92
+ - Quick scratch state for the current turn — keep that in the conversation.
93
+
94
+ ## MCP server
95
+
96
+ `membot serve` exposes the same operations as MCP tools (`membot_add`, `membot_search`, etc.) over stdio (default) or HTTP (`--http <port>`). When connected, prefer the MCP tools over shelling out — they return structured `outputSchema` data with `version_id` echoed on every read.
97
+
98
+ ## Available commands
99
+
100
+ | Command | Purpose |
101
+ | ------------------------------------- | ------------------------------------------------------------------------------ |
102
+ | `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>` (one new version each) |
103
+ | `membot ls [prefix]` | List current files (size, mime, refresh status) |
104
+ | `membot tree [prefix]` | Render the synthesised logical-path tree |
105
+ | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
106
+ | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
107
+ | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
108
+ | `membot info <path>` | Inspect metadata (source, fetcher, refresh schedule, digests) without content |
109
+ | `membot versions <path>` | List every version newest-first with version_id and change notes |
110
+ | `membot diff <path> --a <ts>` | Unified diff between two versions |
111
+ | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
112
+ | `membot rm <path>` | Tombstone a logical_path (history still queryable) |
113
+ | `membot refresh [path]` | Re-read source; create new version only if bytes changed |
114
+ | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
115
+ | `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
116
+ | `membot reindex` | Rebuild the FTS keyword index over current chunks |
117
+
118
+ ## Output formats
119
+
120
+ - TTY → spinners, colors, tables. `--no-color` disables ANSI.
121
+ - Piped, `--json`, `CI=true`, or `NO_COLOR` → JSON to stdout, structured logs to stderr, no ANSI bytes.
122
+ - Use `--json` when parsing output programmatically (it's automatic when piped, but explicit is safer).
123
+ - Use `--verbose` if a command fails unexpectedly.
124
+
125
+ ## Troubleshooting
126
+
127
+ - **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
128
+ - **"refresh failed: auth"** → The original fetch used an authenticated mcpx tool; re-auth via `mcpx auth <server>`.
129
+ - **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
130
+ - **Stale results after manual DB edits** → `membot reindex`.
131
+ - **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
132
+
133
+ ## Configuration
134
+
135
+ - Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
136
+ - Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
137
+ - Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
package/README.md ADDED
@@ -0,0 +1,131 @@
1
+ # membot
2
+
3
+ > Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.
4
+
5
+ [![npm](https://img.shields.io/npm/v/membot.svg)](https://www.npmjs.com/package/membot)
6
+ [![license](https://img.shields.io/npm/l/membot.svg)](./LICENSE)
7
+
8
+ `membot` is a single-binary CLI and MCP server that gives AI agents a persistent, versioned, searchable context store. Files (markdown, PDFs, DOCX, HTML, URLs, agent-authored notes) are ingested, converted to markdown, chunked, embedded **locally** with `@huggingface/transformers` (WASM, no cloud calls), and indexed in DuckDB with hybrid search (semantic vector + BM25). Every change creates a new version — nothing is overwritten in place.
9
+
10
+ - **Local everything** — embeddings run on your machine; data lives in `~/.membot/index.duckdb`.
11
+ - **One mental model** — every artifact (markdown, PDF, image, audio) becomes a markdown surrogate that flows through the same chunk → embed → search pipeline.
12
+ - **Append-only versioning** — every ingest, refresh, or write creates a new `(logical_path, version_id)` row. History is queryable; nothing is mutated.
13
+ - **Two surfaces, one source of truth** — every operation is exposed identically as a CLI subcommand and an MCP tool. The agent sees `membot_search`; you see `membot search`.
14
+
15
+ ## Install
16
+
17
+ ```bash
18
+ # macOS / Linux — pre-built binary
19
+ curl -fsSL https://raw.githubusercontent.com/evantahler/membot/main/install.sh | bash
20
+
21
+ # Windows — PowerShell
22
+ iwr -useb https://raw.githubusercontent.com/evantahler/membot/main/install.ps1 | iex
23
+
24
+ # From npm (requires Bun or Node)
25
+ bun add -g membot
26
+ # or
27
+ npm install -g membot
28
+ ```
29
+
30
+ ## Quick start
31
+
32
+ ```bash
33
+ membot add ./docs # ingest a directory recursively
34
+ membot add https://example.com/spec.pdf # ingest a URL (auto-converted to markdown)
35
+ membot ls # list current files
36
+ membot search "how does refresh work?" # hybrid search
37
+ membot read docs/refresh.md # read the markdown surrogate
38
+ membot serve # expose the same operations as MCP tools (stdio)
39
+ ```
40
+
41
+ ## Use with Claude Code or Cursor
42
+
43
+ `membot skill install` drops the agent skill into the right place so Claude Code or Cursor know **when** to call `membot`.
44
+
45
+ ```bash
46
+ membot skill install --claude # writes ./.claude/skills/membot.md (project)
47
+ membot skill install --cursor # writes ./.cursor/rules/membot.mdc (project)
48
+ membot skill install --claude --global # writes ~/.claude/skills/membot.md
49
+ membot skill install --claude --cursor -f # both, overwrite if present
50
+ ```
51
+
52
+ The skill files describe the discover → ingest → search → read → write workflow and the versioning rules. You can re-run with `--force` to refresh after upgrading membot.
53
+
54
+ ## Commands
55
+
56
+ | Command | Description |
57
+ | ------------------------------- | --------------------------------------------------------------------------------- |
58
+ | `membot add <source>` | Ingest a file, directory, glob, URL, or `inline:<text>`. Each match → new version |
59
+ | `membot ls [prefix]` | List current files (size, mime, refresh status) |
60
+ | `membot tree [prefix]` | Render the synthesised logical-path tree |
61
+ | `membot read <path>` | Read the markdown surrogate (or `--bytes` for original bytes, base64) |
62
+ | `membot search <query>` | Hybrid search (semantic + BM25); `--include-history` searches older versions |
63
+ | `membot info <path>` | Inspect metadata (source, fetcher, schedule, digests) without content |
64
+ | `membot versions <path>` | List every version newest-first |
65
+ | `membot diff <path> <a> [b]` | Unified diff between two versions |
66
+ | `membot write <path>` | Write inline agent-authored markdown as a new version |
67
+ | `membot mv <from> <to>` | Rename a logical_path (history preserved under both) |
68
+ | `membot rm <path>` | Tombstone a logical_path (history still queryable) |
69
+ | `membot refresh [path]` | Re-read source; new version only if bytes changed |
70
+ | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
71
+ | `membot serve` | Run the MCP server (stdio default; `--http <port>` for HTTP) |
72
+ | `membot reindex` | Rebuild the FTS keyword index over current chunks |
73
+ | `membot mcpx <subcommand>` | Forward to the bundled `mcpx` CLI for managing remote MCP servers |
74
+ | `membot skill install` | Install the Claude Code / Cursor agent skill |
75
+
76
+ Run `membot <command> --help` for full flags and arguments. Every command produces JSON when piped, when `--json` is set, or when `CI=true`.
77
+
78
+ ## MCP server
79
+
80
+ `membot serve` exposes every operation as an MCP tool. Stdio is the default; pass `--http <port>` for streamable HTTP.
81
+
82
+ **Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
83
+
84
+ ```json
85
+ {
86
+ "mcpServers": {
87
+ "membot": {
88
+ "command": "membot",
89
+ "args": ["serve"]
90
+ }
91
+ }
92
+ }
93
+ ```
94
+
95
+ **Streamable HTTP** (any MCP client that speaks HTTP):
96
+
97
+ ```bash
98
+ membot serve --http 3000
99
+ # tool endpoint: http://localhost:3000/mcp
100
+ ```
101
+
102
+ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, which re-reads any file whose `refresh_frequency` has elapsed.
103
+
104
+ ## Configuration
105
+
106
+ - **Data directory:** `~/.membot/` (override with `MEMBOT_HOME=/path` or `--config <path>`).
107
+ - `~/.membot/index.duckdb` — all content, blobs, chunks, embeddings, and metadata.
108
+ - `~/.membot/models/` — cached embedding model weights (`Xenova/bge-small-en-v1.5`, 384-dim).
109
+ - `~/.membot/logs/` — daemon logs when running `serve --watch`.
110
+ - **Config file:** `~/.membot/config.json` (optional; defaults are sane).
111
+ - **Environment variables:**
112
+ - `ANTHROPIC_API_KEY` — optional. Enables LLM fallback for messy / scanned input (vision captions for images, last-resort markdown conversion). Without it, the pipeline degrades to deterministic native conversion.
113
+ - `MEMBOT_HOME` — override the data directory.
114
+ - `NO_COLOR`, `CI`, `FORCE_COLOR` — standard output controls.
115
+
116
+ ## Development
117
+
118
+ ```bash
119
+ bun install
120
+ bun run dev <args> # run from source
121
+ bun test # full test suite (real ephemeral DuckDB per test)
122
+ bun run lint # biome + tsc
123
+ bun run format # biome --write
124
+ bun run build # compile a standalone binary into dist/membot
125
+ ```
126
+
127
+ Architecture, design constraints, and reference projects are documented in [`docs/plan.md`](./docs/plan.md) and [`CLAUDE.md`](./CLAUDE.md).
128
+
129
+ ## License
130
+
131
+ MIT © Evan Tahler
package/package.json CHANGED
@@ -1,26 +1,85 @@
1
1
  {
2
- "name": "membot",
3
- "version": "0.0.1",
4
- "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
- "keywords": [
6
- "mcp",
7
- "model-context-protocol",
8
- "context",
9
- "memory",
10
- "agent",
11
- "rag",
12
- "embeddings",
13
- "duckdb",
14
- "bun"
15
- ],
16
- "license": "MIT",
17
- "author": "Evan Tahler <evan@arcade.dev>",
18
- "repository": {
19
- "type": "git",
20
- "url": "https://github.com/evantahler/membot.git"
21
- },
22
- "homepage": "https://github.com/evantahler/membot",
23
- "bugs": {
24
- "url": "https://github.com/evantahler/membot/issues"
25
- }
2
+ "name": "membot",
3
+ "version": "0.1.1",
4
+ "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
+ "type": "module",
6
+ "exports": {
7
+ ".": "./src/sdk.ts",
8
+ "./cli": "./src/cli.ts"
9
+ },
10
+ "main": "./src/sdk.ts",
11
+ "types": "./src/sdk.ts",
12
+ "bin": {
13
+ "membot": "./src/cli.ts"
14
+ },
15
+ "files": [
16
+ "src",
17
+ "patches",
18
+ "scripts",
19
+ ".claude",
20
+ ".cursor",
21
+ "README.md",
22
+ "LICENSE"
23
+ ],
24
+ "scripts": {
25
+ "dev": "bun run src/cli.ts",
26
+ "test": "bun test",
27
+ "lint": "biome ci . && tsc --noEmit",
28
+ "format": "biome check --write .",
29
+ "prebuild": "bash scripts/apply-transformers-patch.sh",
30
+ "build": "bun build --compile --minify --sourcemap --external '@duckdb/*' ./src/cli.ts --outfile dist/membot"
31
+ },
32
+ "keywords": [
33
+ "mcp",
34
+ "model-context-protocol",
35
+ "context",
36
+ "memory",
37
+ "agent",
38
+ "rag",
39
+ "embeddings",
40
+ "duckdb",
41
+ "bun"
42
+ ],
43
+ "license": "MIT",
44
+ "author": "Evan Tahler <evan@evantahler.com>",
45
+ "repository": {
46
+ "type": "git",
47
+ "url": "https://github.com/evantahler/membot.git"
48
+ },
49
+ "homepage": "https://github.com/evantahler/membot",
50
+ "bugs": {
51
+ "url": "https://github.com/evantahler/membot/issues"
52
+ },
53
+ "publishConfig": {
54
+ "access": "public"
55
+ },
56
+ "dependencies": {
57
+ "@anthropic-ai/sdk": "^0.32.0",
58
+ "@duckdb/node-api": "1.5.2-r.1",
59
+ "@evantahler/mcpx": "^0.21.4",
60
+ "@huggingface/transformers": "^4.2.0",
61
+ "@modelcontextprotocol/sdk": "^1.29.0",
62
+ "ansis": "^4.2.0",
63
+ "commander": "^14.0.3",
64
+ "gray-matter": "^4.0.3",
65
+ "mammoth": "^1.8.0",
66
+ "nanospinner": "^1.2.2",
67
+ "onnxruntime-web": "1.26.0-dev.20260416-b7804b056c",
68
+ "picomatch": "^4.0.4",
69
+ "@types/picomatch": "^4.0.3",
70
+ "tesseract.js": "^5.1.0",
71
+ "turndown": "^7.2.0",
72
+ "@types/turndown": "^5.0.5",
73
+ "unpdf": "^0.12.0",
74
+ "zod": "^4.0.0",
75
+ "zod-to-json-schema": "^3.23.0"
76
+ },
77
+ "devDependencies": {
78
+ "@biomejs/biome": "^2.4.14",
79
+ "@types/bun": "latest",
80
+ "typescript": "^6"
81
+ },
82
+ "peerDependencies": {
83
+ "typescript": "^6"
84
+ }
26
85
  }
@@ -0,0 +1,137 @@
1
+ diff --git a/dist/transformers.node.mjs b/dist/transformers.node.mjs
2
+ index bacb354fe1b898d4c535a39f5ef1ba5c6a463d75..0ab58f60460236259f9eba88447313f8661cc0ca 100644
3
+ --- a/dist/transformers.node.mjs
4
+ +++ b/dist/transformers.node.mjs
5
+ @@ -7542,7 +7542,10 @@ var uint16_to_float32 = /* @__PURE__ */ (function() {
6
+ })();
7
+
8
+ // src/backends/onnx.js
9
+ -import * as ONNX_NODE from "onnxruntime-node";
10
+ +// PATCHED (mcpx): static `import 'onnxruntime-node'` makes Bun --compile bundle
11
+ +// the native binding which then fails to dlopen libonnxruntime at runtime.
12
+ +// We never want the native bindings — onnxruntime-web (WASM) runs fine.
13
+ +var ONNX_NODE = void 0;
14
+
15
+ // ../../node_modules/.pnpm/onnxruntime-web@1.26.0-dev.20260416-b7804b056c/node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs
16
+ var ort_webgpu_bundle_min_exports = {};
17
+ @@ -11551,23 +11554,12 @@ var ORT_SYMBOL = /* @__PURE__ */ Symbol.for("onnxruntime");
18
+ if (ORT_SYMBOL in globalThis) {
19
+ ONNX = globalThis[ORT_SYMBOL];
20
+ } else if (apis.IS_NODE_ENV) {
21
+ - ONNX = ONNX_NODE;
22
+ - switch (process.platform) {
23
+ - case "win32":
24
+ - supportedDevices.push("dml");
25
+ - break;
26
+ - case "linux":
27
+ - if (process.arch === "x64") {
28
+ - supportedDevices.push("cuda");
29
+ - }
30
+ - break;
31
+ - case "darwin":
32
+ - supportedDevices.push("coreml");
33
+ - break;
34
+ - }
35
+ - supportedDevices.push("webgpu");
36
+ - supportedDevices.push("cpu");
37
+ - defaultDevices = ["cpu"];
38
+ + // PATCHED (mcpx): force the WASM backend in node-like envs so onnxruntime-node
39
+ + // native bindings are never loaded (they can't be bundled into the Bun
40
+ + // --compile single binary).
41
+ + ONNX = ort_webgpu_bundle_min_exports;
42
+ + supportedDevices.push("wasm");
43
+ + defaultDevices = ["wasm"];
44
+ } else {
45
+ ONNX = ort_webgpu_bundle_min_exports;
46
+ if (apis.IS_WEBNN_AVAILABLE) {
47
+ @@ -17738,7 +17730,14 @@ var CohereAsrProcessor = class extends Processor {
48
+ };
49
+
50
+ // src/utils/image.js
51
+ -import sharp from "sharp";
52
+ +// PATCHED (mcpx): sharp has native bindings that can't be bundled into a
53
+ +// Bun --compile binary. We don't need image processing for text embeddings;
54
+ +// stub sharp with a function that throws lazily if image processing is ever
55
+ +// actually invoked. Truthy so the `else if (sharp)` branch below initializes
56
+ +// loadImageFunction normally.
57
+ +var sharp = function sharpStub() {
58
+ + throw new Error("Image processing (sharp) is not available in this build.");
59
+ +};
60
+ var createCanvasFunction;
61
+ var ImageDataClass;
62
+ var loadImageFunction;
63
+ @@ -22328,11 +22327,16 @@ function getExternalDataChunkNames(fullName, numChunks) {
64
+ async function getCoreModelFile(pretrained_model_name_or_path, fileName, options, suffix) {
65
+ const baseName = `${fileName}${suffix}.onnx`;
66
+ const fullPath = `${options.subfolder ?? ""}/${baseName}`;
67
+ - return await getModelFile(pretrained_model_name_or_path, fullPath, true, options, apis.IS_NODE_ENV);
68
+ + // PATCHED (mcpx): always return the model bytes (buffer) instead of a path.
69
+ + // Our patched onnxruntime backend is the bundled webgpu/web build, which
70
+ + // can't read paths via node:fs and would try to `fetch()` a bare path,
71
+ + // failing with `ERR_INVALID_URL`. Returning the buffer skips the fetch.
72
+ + return await getModelFile(pretrained_model_name_or_path, fullPath, true, options, false);
73
+ }
74
+ async function getModelDataFiles(pretrained_model_name_or_path, fileName, suffix, options, use_external_data_format, session_options = {}) {
75
+ const baseName = `${fileName}${suffix}.onnx`;
76
+ - const return_path = apis.IS_NODE_ENV;
77
+ + // PATCHED (mcpx): see getCoreModelFile patch above — return buffers, not paths.
78
+ + const return_path = false;
79
+ let externalDataPromises = [];
80
+ const num_chunks = resolveExternalDataFormat(use_external_data_format, baseName, fileName);
81
+ if (num_chunks > 0) {
82
+ diff --git a/src/backends/onnx.js b/src/backends/onnx.js
83
+ index 13b1a748272d03aa062950ff585c1a2277ba96c9..9863c46653618091593b6428a8c16622186318d6 100644
84
+ --- a/src/backends/onnx.js
85
+ +++ b/src/backends/onnx.js
86
+ @@ -20,7 +20,10 @@ import { env, apis, LogLevel } from '../env.js';
87
+
88
+ // NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`.
89
+ // In either case, we select the default export if it exists, otherwise we use the named export.
90
+ -import * as ONNX_NODE from 'onnxruntime-node';
91
+ +// PATCHED (mcpx): the static `import 'onnxruntime-node'` segfaults under Bun
92
+ +// (oven-sh/bun#26081). We never want the native bindings — `onnxruntime-web` (WASM) runs
93
+ +// fine on Bun + Node + browsers. The IS_NODE_ENV branch below is rerouted to ONNX_WEB.
94
+ +const ONNX_NODE = undefined;
95
+ import * as ONNX_WEB from 'onnxruntime-web/webgpu';
96
+ import { loadWasmBinary, loadWasmFactory } from './utils/cacheWasm.js';
97
+ import { isBlobURL, toAbsoluteURL } from '../utils/hub/utils.js';
98
+ @@ -106,34 +109,11 @@ if (ORT_SYMBOL in globalThis) {
99
+ // If the JS runtime exposes their own ONNX runtime, use it
100
+ ONNX = globalThis[ORT_SYMBOL];
101
+ } else if (apis.IS_NODE_ENV) {
102
+ - ONNX = ONNX_NODE;
103
+ -
104
+ - // Updated as of ONNX Runtime 1.23.0-dev.20250612-70f14d7670
105
+ - // The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
106
+ - // | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64 | Linux arm64 | MacOS x64 | MacOS arm64 |
107
+ - // | --------------------- | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ |
108
+ - // | CPU | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
109
+ - // | WebGPU (experimental) | ✔️ | ✔️ | ✔️ | ❌ | ✔️ | ✔️ |
110
+ - // | DirectML | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ |
111
+ - // | CUDA | ❌ | ❌ | ✔️ (CUDA v12) | ❌ | ❌ | ❌ |
112
+ - // | CoreML | ❌ | ❌ | ❌ | ❌ | ✔️ | ✔️ |
113
+ - switch (process.platform) {
114
+ - case 'win32': // Windows x64 and Windows arm64
115
+ - supportedDevices.push('dml');
116
+ - break;
117
+ - case 'linux': // Linux x64 and Linux arm64
118
+ - if (process.arch === 'x64') {
119
+ - supportedDevices.push('cuda');
120
+ - }
121
+ - break;
122
+ - case 'darwin': // MacOS x64 and MacOS arm64
123
+ - supportedDevices.push('coreml');
124
+ - break;
125
+ - }
126
+ -
127
+ - supportedDevices.push('webgpu');
128
+ - supportedDevices.push('cpu');
129
+ - defaultDevices = ['cpu'];
130
+ + // PATCHED (mcpx): force the WASM backend in node-like envs to avoid
131
+ + // loading onnxruntime-node native bindings (segfaults under Bun).
132
+ + ONNX = ONNX_WEB;
133
+ + supportedDevices.push('wasm');
134
+ + defaultDevices = ['wasm'];
135
+ } else {
136
+ ONNX = ONNX_WEB;
137
+