membot 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +137 -0
- package/.cursor/rules/membot.mdc +137 -0
- package/README.md +131 -0
- package/package.json +4 -2
- package/src/cli.ts +2 -0
- package/src/commands/skill.ts +131 -0
- package/src/ingest/embedder.ts +18 -3
- package/src/types/text-modules.d.ts +9 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: membot
|
|
3
|
+
description: Persistent, versioned context store for AI agents — ingest, search, read, and write knowledge via the membot CLI or MCP server
|
|
4
|
+
trigger: when the user wants to remember, recall, or search project knowledge, ingest documents into a long-lived store, or surface relevant context for a task
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# membot — Persistent Context for Agents
|
|
8
|
+
|
|
9
|
+
You have access to a long-lived context store via `membot`. Files (markdown, PDFs, DOCX, HTML, URLs, agent notes) are ingested, converted to markdown, chunked, embedded locally, and indexed in DuckDB with hybrid search (semantic + BM25). Every artifact is addressed by a virtual `logical_path`. Every change creates a new immutable version — nothing is overwritten in place.
|
|
10
|
+
|
|
11
|
+
Use this workflow:
|
|
12
|
+
|
|
13
|
+
## 1. Discover what's already there
|
|
14
|
+
|
|
15
|
+
Before ingesting, check whether the knowledge already exists.
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
membot tree # synthesised directory tree of logical_paths
|
|
19
|
+
membot ls # one row per current file (size, mime, refresh status)
|
|
20
|
+
membot ls docs/ # filter by prefix
|
|
21
|
+
membot search "<question>" # hybrid search (semantic + keyword)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
`search` is the primary discovery tool — prefer it over scanning files.
|
|
25
|
+
|
|
26
|
+
## 2. Ingest
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
membot add ./README.md # single file
|
|
30
|
+
membot add ./docs # recursive directory walk
|
|
31
|
+
membot add "docs/**/*.md" # glob
|
|
32
|
+
membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
|
|
33
|
+
membot add "inline:Decision: use X because Y" # literal text
|
|
34
|
+
membot add ./docs --refresh-frequency 24h # auto-refresh every day
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
|
|
38
|
+
|
|
39
|
+
## 3. Read
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
membot read <logical_path> # current markdown surrogate
|
|
43
|
+
membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
|
|
44
|
+
membot read <logical_path> --version <ts> # historical snapshot
|
|
45
|
+
membot info <logical_path> # metadata only (no content)
|
|
46
|
+
membot versions <logical_path> # every version, newest first
|
|
47
|
+
membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Defaults to the current (non-tombstoned) version. Pass `--version` only when you need history.
|
|
51
|
+
|
|
52
|
+
## 4. Write your own notes
|
|
53
|
+
|
|
54
|
+
Persist agent-authored summaries, decisions, or synthesised context so they survive across conversations:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
membot write notes/decision-2026-05.md --content "Decided to ..."
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Inline writes create a new `(logical_path, version_id)` row just like file ingests — `membot versions` lists them, `membot diff` compares them. To mirror an external doc that should re-fetch over time, use `membot add <url> --refresh-frequency` instead.
|
|
61
|
+
|
|
62
|
+
## 5. Refresh, rename, delete, prune
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
membot refresh <logical_path> # re-read source; new version only if bytes changed
|
|
66
|
+
membot refresh # refresh all rows whose schedule has elapsed
|
|
67
|
+
membot mv old/path new/path # rename (history preserved under both)
|
|
68
|
+
membot rm <logical_path> # tombstone (history still queryable)
|
|
69
|
+
membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --version <ts>` still work. Pruning is the only way to actually remove data.
|
|
73
|
+
|
|
74
|
+
## Versioning rules
|
|
75
|
+
|
|
76
|
+
- Defaults always operate on the current, non-tombstoned version.
|
|
77
|
+
- Pass an explicit `--version <timestamp>` (from `membot versions`) to read or diff history.
|
|
78
|
+
- `membot_add`, refresh-with-changes, `write`, and `mv` each create a new version. The previous version is preserved.
|
|
79
|
+
- Mutating an existing version is not possible — corrections are new versions.
|
|
80
|
+
|
|
81
|
+
## When to use this skill
|
|
82
|
+
|
|
83
|
+
- The user asks to remember, recall, save, or look up something across conversations.
|
|
84
|
+
- You need project-specific context (specs, decisions, transcripts, rendered docs) that's larger than fits in the prompt.
|
|
85
|
+
- You need to ingest a document (PDF, DOCX, HTML, URL) and reason over it.
|
|
86
|
+
- You're producing a summary or decision that should survive past this conversation.
|
|
87
|
+
|
|
88
|
+
## When NOT to use this skill
|
|
89
|
+
|
|
90
|
+
- Reading a file the user just pointed at — use the regular file-read tool unless they want it persisted.
|
|
91
|
+
- Storing secrets, credentials, or anything that shouldn't sit in `~/.membot/index.duckdb`.
|
|
92
|
+
- Quick scratch state for the current turn — keep that in the conversation.
|
|
93
|
+
|
|
94
|
+
## MCP server
|
|
95
|
+
|
|
96
|
+
`membot serve` exposes the same operations as MCP tools (`membot_add`, `membot_search`, etc.) over stdio (default) or HTTP (`--http <port>`). When connected, prefer the MCP tools over shelling out — they return structured `outputSchema` data with `version_id` echoed on every read.
|
|
97
|
+
|
|
98
|
+
## Available commands
|
|
99
|
+
|
|
100
|
+
| Command | Purpose |
|
|
101
|
+
| ------------------------------------- | ------------------------------------------------------------------------------ |
|
|
102
|
+
| `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>` (one new version each) |
|
|
103
|
+
| `membot ls [prefix]` | List current files (size, mime, refresh status) |
|
|
104
|
+
| `membot tree [prefix]` | Render the synthesised logical-path tree |
|
|
105
|
+
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
|
106
|
+
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
107
|
+
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
108
|
+
| `membot info <path>` | Inspect metadata (source, fetcher, refresh schedule, digests) without content |
|
|
109
|
+
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
110
|
+
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
111
|
+
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
112
|
+
| `membot rm <path>` | Tombstone a logical_path (history still queryable) |
|
|
113
|
+
| `membot refresh [path]` | Re-read source; create new version only if bytes changed |
|
|
114
|
+
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
115
|
+
| `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
|
|
116
|
+
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
117
|
+
|
|
118
|
+
## Output formats
|
|
119
|
+
|
|
120
|
+
- TTY → spinners, colors, tables. `--no-color` disables ANSI.
|
|
121
|
+
- Piped, `--json`, `CI=true`, or `NO_COLOR` → JSON to stdout, structured logs to stderr, no ANSI bytes.
|
|
122
|
+
- Use `--json` when parsing output programmatically (it's automatic when piped, but explicit is safer).
|
|
123
|
+
- Use `--verbose` if a command fails unexpectedly.
|
|
124
|
+
|
|
125
|
+
## Troubleshooting
|
|
126
|
+
|
|
127
|
+
- **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
|
|
128
|
+
- **"refresh failed: auth"** → The original fetch used an authenticated mcpx tool; re-auth via `mcpx auth <server>`.
|
|
129
|
+
- **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
|
|
130
|
+
- **Stale results after manual DB edits** → `membot reindex`.
|
|
131
|
+
- **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
|
|
132
|
+
|
|
133
|
+
## Configuration
|
|
134
|
+
|
|
135
|
+
- Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
|
|
136
|
+
- Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
|
|
137
|
+
- Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Persistent, versioned context store for AI agents — ingest, search, read, and write knowledge via the membot CLI or MCP server
|
|
3
|
+
globs:
|
|
4
|
+
alwaysApply: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# membot — Persistent Context for Agents
|
|
8
|
+
|
|
9
|
+
You have access to a long-lived context store via `membot`. Files (markdown, PDFs, DOCX, HTML, URLs, agent notes) are ingested, converted to markdown, chunked, embedded locally, and indexed in DuckDB with hybrid search (semantic + BM25). Every artifact is addressed by a virtual `logical_path`. Every change creates a new immutable version — nothing is overwritten in place.
|
|
10
|
+
|
|
11
|
+
Use this workflow:
|
|
12
|
+
|
|
13
|
+
## 1. Discover what's already there
|
|
14
|
+
|
|
15
|
+
Before ingesting, check whether the knowledge already exists.
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
membot tree # synthesised directory tree of logical_paths
|
|
19
|
+
membot ls # one row per current file (size, mime, refresh status)
|
|
20
|
+
membot ls docs/ # filter by prefix
|
|
21
|
+
membot search "<question>" # hybrid search (semantic + keyword)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
`search` is the primary discovery tool — prefer it over scanning files.
|
|
25
|
+
|
|
26
|
+
## 2. Ingest
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
membot add ./README.md # single file
|
|
30
|
+
membot add ./docs # recursive directory walk
|
|
31
|
+
membot add "docs/**/*.md" # glob
|
|
32
|
+
membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
|
|
33
|
+
membot add "inline:Decision: use X because Y" # literal text
|
|
34
|
+
membot add ./docs --refresh-frequency 24h # auto-refresh every day
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
|
|
38
|
+
|
|
39
|
+
## 3. Read
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
membot read <logical_path> # current markdown surrogate
|
|
43
|
+
membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
|
|
44
|
+
membot read <logical_path> --version <ts> # historical snapshot
|
|
45
|
+
membot info <logical_path> # metadata only (no content)
|
|
46
|
+
membot versions <logical_path> # every version, newest first
|
|
47
|
+
membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Defaults to the current (non-tombstoned) version. Pass `--version` only when you need history.
|
|
51
|
+
|
|
52
|
+
## 4. Write your own notes
|
|
53
|
+
|
|
54
|
+
Persist agent-authored summaries, decisions, or synthesised context so they survive across conversations:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
membot write notes/decision-2026-05.md --content "Decided to ..."
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Inline writes create a new `(logical_path, version_id)` row just like file ingests — `membot versions` lists them, `membot diff` compares them. To mirror an external doc that should re-fetch over time, use `membot add <url> --refresh-frequency` instead.
|
|
61
|
+
|
|
62
|
+
## 5. Refresh, rename, delete, prune
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
membot refresh <logical_path> # re-read source; new version only if bytes changed
|
|
66
|
+
membot refresh # refresh all rows whose schedule has elapsed
|
|
67
|
+
membot mv old/path new/path # rename (history preserved under both)
|
|
68
|
+
membot rm <logical_path> # tombstone (history still queryable)
|
|
69
|
+
membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --version <ts>` still work. Pruning is the only way to actually remove data.
|
|
73
|
+
|
|
74
|
+
## Versioning rules
|
|
75
|
+
|
|
76
|
+
- Defaults always operate on the current, non-tombstoned version.
|
|
77
|
+
- Pass an explicit `--version <timestamp>` (from `membot versions`) to read or diff history.
|
|
78
|
+
- `membot_add`, refresh-with-changes, `write`, and `mv` each create a new version. The previous version is preserved.
|
|
79
|
+
- Mutating an existing version is not possible — corrections are new versions.
|
|
80
|
+
|
|
81
|
+
## When to use this rule
|
|
82
|
+
|
|
83
|
+
- The user asks to remember, recall, save, or look up something across conversations.
|
|
84
|
+
- You need project-specific context (specs, decisions, transcripts, rendered docs) that's larger than fits in the prompt.
|
|
85
|
+
- You need to ingest a document (PDF, DOCX, HTML, URL) and reason over it.
|
|
86
|
+
- You're producing a summary or decision that should survive past this conversation.
|
|
87
|
+
|
|
88
|
+
## When NOT to use this rule
|
|
89
|
+
|
|
90
|
+
- Reading a file the user just pointed at — use the regular file-read tool unless they want it persisted.
|
|
91
|
+
- Storing secrets, credentials, or anything that shouldn't sit in `~/.membot/index.duckdb`.
|
|
92
|
+
- Quick scratch state for the current turn — keep that in the conversation.
|
|
93
|
+
|
|
94
|
+
## MCP server
|
|
95
|
+
|
|
96
|
+
`membot serve` exposes the same operations as MCP tools (`membot_add`, `membot_search`, etc.) over stdio (default) or HTTP (`--http <port>`). When connected, prefer the MCP tools over shelling out — they return structured `outputSchema` data with `version_id` echoed on every read.
|
|
97
|
+
|
|
98
|
+
## Available commands
|
|
99
|
+
|
|
100
|
+
| Command | Purpose |
|
|
101
|
+
| ------------------------------------- | ------------------------------------------------------------------------------ |
|
|
102
|
+
| `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>` (one new version each) |
|
|
103
|
+
| `membot ls [prefix]` | List current files (size, mime, refresh status) |
|
|
104
|
+
| `membot tree [prefix]` | Render the synthesised logical-path tree |
|
|
105
|
+
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
|
106
|
+
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
107
|
+
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
108
|
+
| `membot info <path>` | Inspect metadata (source, fetcher, refresh schedule, digests) without content |
|
|
109
|
+
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
110
|
+
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
111
|
+
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
112
|
+
| `membot rm <path>` | Tombstone a logical_path (history still queryable) |
|
|
113
|
+
| `membot refresh [path]` | Re-read source; create new version only if bytes changed |
|
|
114
|
+
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
115
|
+
| `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
|
|
116
|
+
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
117
|
+
|
|
118
|
+
## Output formats
|
|
119
|
+
|
|
120
|
+
- TTY → spinners, colors, tables. `--no-color` disables ANSI.
|
|
121
|
+
- Piped, `--json`, `CI=true`, or `NO_COLOR` → JSON to stdout, structured logs to stderr, no ANSI bytes.
|
|
122
|
+
- Use `--json` when parsing output programmatically (it's automatic when piped, but explicit is safer).
|
|
123
|
+
- Use `--verbose` if a command fails unexpectedly.
|
|
124
|
+
|
|
125
|
+
## Troubleshooting
|
|
126
|
+
|
|
127
|
+
- **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
|
|
128
|
+
- **"refresh failed: auth"** → The original fetch used an authenticated mcpx tool; re-auth via `mcpx auth <server>`.
|
|
129
|
+
- **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
|
|
130
|
+
- **Stale results after manual DB edits** → `membot reindex`.
|
|
131
|
+
- **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
|
|
132
|
+
|
|
133
|
+
## Configuration
|
|
134
|
+
|
|
135
|
+
- Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
|
|
136
|
+
- Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
|
|
137
|
+
- Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
|
package/README.md
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# membot
|
|
2
|
+
|
|
3
|
+
> Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/membot)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
|
|
8
|
+
`membot` is a single-binary CLI and MCP server that gives AI agents a persistent, versioned, searchable context store. Files (markdown, PDFs, DOCX, HTML, URLs, agent-authored notes) are ingested, converted to markdown, chunked, embedded **locally** with `@huggingface/transformers` (WASM, no cloud calls), and indexed in DuckDB with hybrid search (semantic vector + BM25). Every change creates a new version — nothing is overwritten in place.
|
|
9
|
+
|
|
10
|
+
- **Local everything** — embeddings run on your machine; data lives in `~/.membot/index.duckdb`.
|
|
11
|
+
- **One mental model** — every artifact (markdown, PDF, image, audio) becomes a markdown surrogate that flows through the same chunk → embed → search pipeline.
|
|
12
|
+
- **Append-only versioning** — every ingest, refresh, or write creates a new `(logical_path, version_id)` row. History is queryable; nothing is mutated.
|
|
13
|
+
- **Two surfaces, one source of truth** — every operation is exposed identically as a CLI subcommand and an MCP tool. The agent sees `membot_search`; you see `membot search`.
|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# macOS / Linux — pre-built binary
|
|
19
|
+
curl -fsSL https://raw.githubusercontent.com/evantahler/membot/main/install.sh | bash
|
|
20
|
+
|
|
21
|
+
# Windows — PowerShell
|
|
22
|
+
iwr -useb https://raw.githubusercontent.com/evantahler/membot/main/install.ps1 | iex
|
|
23
|
+
|
|
24
|
+
# From npm (requires Bun or Node)
|
|
25
|
+
bun add -g membot
|
|
26
|
+
# or
|
|
27
|
+
npm install -g membot
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick start
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
membot add ./docs # ingest a directory recursively
|
|
34
|
+
membot add https://example.com/spec.pdf # ingest a URL (auto-converted to markdown)
|
|
35
|
+
membot ls # list current files
|
|
36
|
+
membot search "how does refresh work?" # hybrid search
|
|
37
|
+
membot read docs/refresh.md # read the markdown surrogate
|
|
38
|
+
membot serve # expose the same operations as MCP tools (stdio)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Use with Claude Code or Cursor
|
|
42
|
+
|
|
43
|
+
`membot skill install` drops the agent skill into the right place so Claude Code or Cursor know **when** to call `membot`.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
membot skill install --claude # writes ./.claude/skills/membot.md (project)
|
|
47
|
+
membot skill install --cursor # writes ./.cursor/rules/membot.mdc (project)
|
|
48
|
+
membot skill install --claude --global # writes ~/.claude/skills/membot.md
|
|
49
|
+
membot skill install --claude --cursor -f # both, overwrite if present
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The skill files describe the discover → ingest → search → read → write workflow and the versioning rules. You can re-run with `--force` to refresh after upgrading membot.
|
|
53
|
+
|
|
54
|
+
## Commands
|
|
55
|
+
|
|
56
|
+
| Command | Description |
|
|
57
|
+
| ------------------------------- | --------------------------------------------------------------------------------- |
|
|
58
|
+
| `membot add <source>` | Ingest a file, directory, glob, URL, or `inline:<text>`. Each match → new version |
|
|
59
|
+
| `membot ls [prefix]` | List current files (size, mime, refresh status) |
|
|
60
|
+
| `membot tree [prefix]` | Render the synthesised logical-path tree |
|
|
61
|
+
| `membot read <path>` | Read the markdown surrogate (or `--bytes` for original bytes, base64) |
|
|
62
|
+
| `membot search <query>` | Hybrid search (semantic + BM25); `--include-history` searches older versions |
|
|
63
|
+
| `membot info <path>` | Inspect metadata (source, fetcher, schedule, digests) without content |
|
|
64
|
+
| `membot versions <path>` | List every version newest-first |
|
|
65
|
+
| `membot diff <path> <a> [b]` | Unified diff between two versions |
|
|
66
|
+
| `membot write <path>` | Write inline agent-authored markdown as a new version |
|
|
67
|
+
| `membot mv <from> <to>` | Rename a logical_path (history preserved under both) |
|
|
68
|
+
| `membot rm <path>` | Tombstone a logical_path (history still queryable) |
|
|
69
|
+
| `membot refresh [path]` | Re-read source; new version only if bytes changed |
|
|
70
|
+
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
71
|
+
| `membot serve` | Run the MCP server (stdio default; `--http <port>` for HTTP) |
|
|
72
|
+
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
73
|
+
| `membot mcpx <subcommand>` | Forward to the bundled `mcpx` CLI for managing remote MCP servers |
|
|
74
|
+
| `membot skill install` | Install the Claude Code / Cursor agent skill |
|
|
75
|
+
|
|
76
|
+
Run `membot <command> --help` for full flags and arguments. Every command produces JSON when piped, when `--json` is set, or when `CI=true`.
|
|
77
|
+
|
|
78
|
+
## MCP server
|
|
79
|
+
|
|
80
|
+
`membot serve` exposes every operation as an MCP tool. Stdio is the default; pass `--http <port>` for streamable HTTP.
|
|
81
|
+
|
|
82
|
+
**Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
|
|
83
|
+
|
|
84
|
+
```json
|
|
85
|
+
{
|
|
86
|
+
"mcpServers": {
|
|
87
|
+
"membot": {
|
|
88
|
+
"command": "membot",
|
|
89
|
+
"args": ["serve"]
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Streamable HTTP** (any MCP client that speaks HTTP):
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
membot serve --http 3000
|
|
99
|
+
# tool endpoint: http://localhost:3000/mcp
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, which re-reads any file whose `refresh_frequency` has elapsed.
|
|
103
|
+
|
|
104
|
+
## Configuration
|
|
105
|
+
|
|
106
|
+
- **Data directory:** `~/.membot/` (override with `MEMBOT_HOME=/path` or `--config <path>`).
|
|
107
|
+
- `~/.membot/index.duckdb` — all content, blobs, chunks, embeddings, and metadata.
|
|
108
|
+
- `~/.membot/models/` — cached embedding model weights (`Xenova/bge-small-en-v1.5`, 384-dim).
|
|
109
|
+
- `~/.membot/logs/` — daemon logs when running `serve --watch`.
|
|
110
|
+
- **Config file:** `~/.membot/config.json` (optional; defaults are sane).
|
|
111
|
+
- **Environment variables:**
|
|
112
|
+
- `ANTHROPIC_API_KEY` — optional. Enables LLM fallback for messy / scanned input (vision captions for images, last-resort markdown conversion). Without it, the pipeline degrades to deterministic native conversion.
|
|
113
|
+
- `MEMBOT_HOME` — override the data directory.
|
|
114
|
+
- `NO_COLOR`, `CI`, `FORCE_COLOR` — standard output controls.
|
|
115
|
+
|
|
116
|
+
## Development
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
bun install
|
|
120
|
+
bun run dev <args> # run from source
|
|
121
|
+
bun test # full test suite (real ephemeral DuckDB per test)
|
|
122
|
+
bun run lint # biome + tsc
|
|
123
|
+
bun run format # biome --write
|
|
124
|
+
bun run build # compile a standalone binary into dist/membot
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Architecture, design constraints, and reference projects are documented in [`docs/plan.md`](./docs/plan.md) and [`CLAUDE.md`](./CLAUDE.md).
|
|
128
|
+
|
|
129
|
+
## License
|
|
130
|
+
|
|
131
|
+
MIT © Evan Tahler
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "membot",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
"src",
|
|
17
17
|
"patches",
|
|
18
18
|
"scripts",
|
|
19
|
+
".claude",
|
|
20
|
+
".cursor",
|
|
19
21
|
"README.md",
|
|
20
22
|
"LICENSE"
|
|
21
23
|
],
|
|
@@ -39,7 +41,7 @@
|
|
|
39
41
|
"bun"
|
|
40
42
|
],
|
|
41
43
|
"license": "MIT",
|
|
42
|
-
"author": "Evan Tahler <evan@
|
|
44
|
+
"author": "Evan Tahler <evan@evantahler.com>",
|
|
43
45
|
"repository": {
|
|
44
46
|
"type": "git",
|
|
45
47
|
"url": "https://github.com/evantahler/membot.git"
|
package/src/cli.ts
CHANGED
|
@@ -7,6 +7,7 @@ import { registerCheckUpdateCommand } from "./commands/check-update.ts";
|
|
|
7
7
|
import { registerMcpxCommand } from "./commands/mcpx.ts";
|
|
8
8
|
import { registerReindexCommand } from "./commands/reindex.ts";
|
|
9
9
|
import { registerServeCommand } from "./commands/serve.ts";
|
|
10
|
+
import { registerSkillCommand } from "./commands/skill.ts";
|
|
10
11
|
import { registerUpgradeCommand } from "./commands/upgrade.ts";
|
|
11
12
|
import type { BuildContextOptions } from "./context.ts";
|
|
12
13
|
import { mountAsCommanderCommand } from "./mount/commander.ts";
|
|
@@ -57,6 +58,7 @@ for (const op of OPERATIONS) {
|
|
|
57
58
|
registerServeCommand(program);
|
|
58
59
|
registerReindexCommand(program);
|
|
59
60
|
registerMcpxCommand(program);
|
|
61
|
+
registerSkillCommand(program);
|
|
60
62
|
registerCheckUpdateCommand(program);
|
|
61
63
|
registerUpgradeCommand(program);
|
|
62
64
|
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join, resolve } from "node:path";
|
|
4
|
+
import type { Command } from "commander";
|
|
5
|
+
import claudeSkill from "../../.claude/skills/membot.md" with { type: "text" };
|
|
6
|
+
import cursorRule from "../../.cursor/rules/membot.mdc" with { type: "text" };
|
|
7
|
+
import { HelpfulError, isHelpfulError, mapKindToExit } from "../errors.ts";
|
|
8
|
+
import { renderCliError } from "../mount/commander.ts";
|
|
9
|
+
import { logger } from "../output/logger.ts";
|
|
10
|
+
import { detectMode, setMode } from "../output/tty.ts";
|
|
11
|
+
|
|
12
|
+
interface SkillTarget {
|
|
13
|
+
agentLabel: string;
|
|
14
|
+
scopeLabel: string;
|
|
15
|
+
dir: string;
|
|
16
|
+
filename: string;
|
|
17
|
+
content: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface SkillInstallOptions {
|
|
21
|
+
claude?: boolean;
|
|
22
|
+
cursor?: boolean;
|
|
23
|
+
global?: boolean;
|
|
24
|
+
project?: boolean;
|
|
25
|
+
force?: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* `membot skill install [--claude] [--cursor] [--global|--project] [-f]`
|
|
30
|
+
*
|
|
31
|
+
* Drop the membot agent skill into the right location for Claude Code
|
|
32
|
+
* (`.claude/skills/membot.md`) or Cursor (`.cursor/rules/membot.mdc`),
|
|
33
|
+
* either in the current project (default) or in the user's home directory
|
|
34
|
+
* (`--global`). Both flags can be combined to install for both targets at
|
|
35
|
+
* once. The skill files are bundled into the binary via Bun text imports
|
|
36
|
+
* so this works in the compiled distribution as well as in `bun run`.
|
|
37
|
+
*/
|
|
38
|
+
export function registerSkillCommand(program: Command): void {
|
|
39
|
+
const skill = program.command("skill").description("Install agent skills (Claude Code, Cursor)");
|
|
40
|
+
|
|
41
|
+
skill
|
|
42
|
+
.command("install")
|
|
43
|
+
.description(
|
|
44
|
+
"Install the membot skill into Claude Code (.claude/skills/membot.md) and/or Cursor (.cursor/rules/membot.mdc)",
|
|
45
|
+
)
|
|
46
|
+
.option("--claude", "install for Claude Code")
|
|
47
|
+
.option("--cursor", "install for Cursor")
|
|
48
|
+
.option("--global", "install to the user's home directory (default: project)")
|
|
49
|
+
.option("--project", "install to the current working directory (default)")
|
|
50
|
+
.option("-f, --force", "overwrite if the skill file already exists")
|
|
51
|
+
.action((opts: SkillInstallOptions) => {
|
|
52
|
+
const globalOpts = program.optsWithGlobals<{ json?: boolean; verbose?: boolean; color?: boolean }>();
|
|
53
|
+
setMode(
|
|
54
|
+
detectMode({
|
|
55
|
+
json: globalOpts.json,
|
|
56
|
+
verbose: globalOpts.verbose,
|
|
57
|
+
noColor: globalOpts.color === false,
|
|
58
|
+
}),
|
|
59
|
+
);
|
|
60
|
+
try {
|
|
61
|
+
install(opts);
|
|
62
|
+
} catch (err) {
|
|
63
|
+
renderCliError(err);
|
|
64
|
+
process.exit(isHelpfulError(err) ? mapKindToExit(err.kind) : 1);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Resolve and write every requested skill file. Throws `HelpfulError` on
|
|
71
|
+
* any input or conflict failure so the mount-style error renderer can
|
|
72
|
+
* surface a uniform JSON / colorized message.
|
|
73
|
+
*/
|
|
74
|
+
function install(opts: SkillInstallOptions): void {
|
|
75
|
+
if (!opts.claude && !opts.cursor) {
|
|
76
|
+
throw new HelpfulError({
|
|
77
|
+
kind: "input_error",
|
|
78
|
+
message: "no agent target specified",
|
|
79
|
+
hint: "Pass --claude, --cursor, or both — e.g. `membot skill install --claude`",
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const targets = computeTargets(opts);
|
|
84
|
+
for (const target of targets) {
|
|
85
|
+
const dest = join(target.dir, target.filename);
|
|
86
|
+
if (existsSync(dest) && !opts.force) {
|
|
87
|
+
throw new HelpfulError({
|
|
88
|
+
kind: "conflict",
|
|
89
|
+
message: `${dest} already exists`,
|
|
90
|
+
hint: "Re-run with --force to overwrite",
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
mkdirSync(target.dir, { recursive: true });
|
|
94
|
+
writeFileSync(dest, target.content, "utf-8");
|
|
95
|
+
logger.info(`installed ${target.agentLabel} skill (${target.scopeLabel}): ${dest}`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Materialise the (agent × scope) cartesian product of install targets the
|
|
101
|
+
* user asked for. Default scope is project when neither --global nor
|
|
102
|
+
* --project is passed; passing both installs to both locations.
|
|
103
|
+
*/
|
|
104
|
+
function computeTargets(opts: SkillInstallOptions): SkillTarget[] {
|
|
105
|
+
const scopes: { label: string; resolveDir: (rel: string) => string }[] = [];
|
|
106
|
+
if (opts.global) scopes.push({ label: "global", resolveDir: (rel) => join(homedir(), rel) });
|
|
107
|
+
if (opts.project || !opts.global) scopes.push({ label: "project", resolveDir: (rel) => resolve(rel) });
|
|
108
|
+
|
|
109
|
+
const targets: SkillTarget[] = [];
|
|
110
|
+
for (const scope of scopes) {
|
|
111
|
+
if (opts.claude) {
|
|
112
|
+
targets.push({
|
|
113
|
+
agentLabel: "Claude Code",
|
|
114
|
+
scopeLabel: scope.label,
|
|
115
|
+
dir: scope.resolveDir(".claude/skills"),
|
|
116
|
+
filename: "membot.md",
|
|
117
|
+
content: claudeSkill,
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
if (opts.cursor) {
|
|
121
|
+
targets.push({
|
|
122
|
+
agentLabel: "Cursor",
|
|
123
|
+
scopeLabel: scope.label,
|
|
124
|
+
dir: scope.resolveDir(".cursor/rules"),
|
|
125
|
+
filename: "membot.mdc",
|
|
126
|
+
content: cursorRule,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return targets;
|
|
131
|
+
}
|
package/src/ingest/embedder.ts
CHANGED
|
@@ -31,6 +31,15 @@ function isModelCached(model: string): boolean {
|
|
|
31
31
|
* Lazily load (and cache) the feature-extraction pipeline for a model. Loading
|
|
32
32
|
* is expensive (downloads weights on first run, ~100s of ms to instantiate
|
|
33
33
|
* ONNX), so we hold one promise per model name for the life of the process.
|
|
34
|
+
*
|
|
35
|
+
* Try `wasm` first, fall back to `cpu` on "Unsupported device". The transformers
|
|
36
|
+
* patch (applied for `bun build --compile` and via `bun run prebuild` for local
|
|
37
|
+
* dev) registers `wasm` as a supported device backed by onnxruntime-web — that's
|
|
38
|
+
* mandatory for the single-binary build because native bindings can't be
|
|
39
|
+
* bundled. When the package is unpatched (npm-installed membot, or `bun dev`
|
|
40
|
+
* before `prebuild`), `wasm` is rejected and we fall back to the default `cpu`
|
|
41
|
+
* device, which uses the onnxruntime-node native bindings that ship with the
|
|
42
|
+
* unpatched package.
|
|
34
43
|
*/
|
|
35
44
|
async function getPipeline(model: string): Promise<FeatureExtractionPipeline> {
|
|
36
45
|
let p = pipelinePromises.get(model);
|
|
@@ -40,9 +49,15 @@ async function getPipeline(model: string): Promise<FeatureExtractionPipeline> {
|
|
|
40
49
|
} else {
|
|
41
50
|
logger.info(`embedder: loading model ${model} (first run, downloading weights)`);
|
|
42
51
|
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
p = (async () => {
|
|
53
|
+
try {
|
|
54
|
+
return (await pipeline("feature-extraction", model, { device: "wasm" })) as FeatureExtractionPipeline;
|
|
55
|
+
} catch (err) {
|
|
56
|
+
if (!String((err as Error)?.message ?? "").includes("Unsupported device")) throw err;
|
|
57
|
+
logger.debug("embedder: wasm backend unavailable, falling back to cpu (onnxruntime-node)");
|
|
58
|
+
return (await pipeline("feature-extraction", model, { device: "cpu" })) as FeatureExtractionPipeline;
|
|
59
|
+
}
|
|
60
|
+
})();
|
|
46
61
|
pipelinePromises.set(model, p);
|
|
47
62
|
}
|
|
48
63
|
return p;
|