membot 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +25 -10
- package/.cursor/rules/membot.mdc +25 -10
- package/README.md +35 -4
- package/package.json +8 -5
- package/scripts/apply-patches.sh +0 -11
- package/src/cli.ts +2 -2
- package/src/commands/login-page.mustache +50 -0
- package/src/commands/login.ts +83 -0
- package/src/config/schemas.ts +17 -5
- package/src/constants.ts +13 -1
- package/src/context.ts +1 -24
- package/src/db/files.ts +21 -25
- package/src/db/migrations/003-downloader-columns.ts +58 -0
- package/src/db/migrations.ts +2 -1
- package/src/ingest/converter/index.ts +9 -0
- package/src/ingest/converter/xlsx.ts +111 -0
- package/src/ingest/downloaders/browser.ts +180 -0
- package/src/ingest/downloaders/generic-web.ts +81 -0
- package/src/ingest/downloaders/github.ts +178 -0
- package/src/ingest/downloaders/google-docs.ts +56 -0
- package/src/ingest/downloaders/google-shared.ts +86 -0
- package/src/ingest/downloaders/google-sheets.ts +58 -0
- package/src/ingest/downloaders/google-slides.ts +53 -0
- package/src/ingest/downloaders/index.ts +182 -0
- package/src/ingest/downloaders/linear.ts +291 -0
- package/src/ingest/fetcher.ts +104 -129
- package/src/ingest/ingest.ts +43 -70
- package/src/mcp/instructions.ts +4 -2
- package/src/operations/add.ts +6 -4
- package/src/operations/info.ts +4 -6
- package/src/operations/move.ts +2 -3
- package/src/operations/refresh.ts +2 -4
- package/src/operations/remove.ts +23 -2
- package/src/operations/tree.ts +1 -1
- package/src/operations/types.ts +1 -1
- package/src/refresh/runner.ts +59 -114
- package/src/types/text-modules.d.ts +5 -0
- package/patches/@evantahler%2Fmcpx@0.21.4.patch +0 -51
- package/src/commands/mcpx.ts +0 -112
- package/src/ingest/agent-fetcher.ts +0 -639
package/.claude/skills/membot.md
CHANGED
|
@@ -26,15 +26,26 @@ membot search "<question>" # hybrid search (semantic + keyword)
|
|
|
26
26
|
## 2. Ingest
|
|
27
27
|
|
|
28
28
|
```bash
|
|
29
|
-
membot add ./README.md
|
|
30
|
-
membot add ./docs
|
|
31
|
-
membot add "docs/**/*.md"
|
|
32
|
-
membot add a.md b.md "docs/**/*.md"
|
|
33
|
-
membot add https://
|
|
34
|
-
membot add
|
|
35
|
-
membot add
|
|
29
|
+
membot add ./README.md # single file
|
|
30
|
+
membot add ./docs # recursive directory walk
|
|
31
|
+
membot add "docs/**/*.md" # glob
|
|
32
|
+
membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
|
|
33
|
+
membot add https://docs.google.com/document/d/<ID>/edit # Google Docs/Sheets/Slides via export endpoints
|
|
34
|
+
membot add https://github.com/<owner>/<repo>/issues/<n> # GitHub issues + PRs (with comments)
|
|
35
|
+
membot add https://linear.app/<workspace>/issue/<KEY> # Linear issues + projects
|
|
36
|
+
membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
|
|
37
|
+
membot add "inline:Decision: use X because Y" # literal text
|
|
38
|
+
membot add ./docs --refresh-frequency 24h # auto-refresh every day
|
|
36
39
|
```
|
|
37
40
|
|
|
41
|
+
Remote URLs go through per-service downloaders. Google needs cookies
|
|
42
|
+
captured by `membot login` (one-time browser sign-in); GitHub and
|
|
43
|
+
Linear need API keys set via
|
|
44
|
+
`membot config set downloaders.<svc>.api_key`. If a fetch fails with
|
|
45
|
+
an auth error, the `HelpfulError` will tell you exactly which command
|
|
46
|
+
to run. Fetches are non-interactive — they never open a browser
|
|
47
|
+
during ingest or refresh.
|
|
48
|
+
|
|
38
49
|
Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
|
|
39
50
|
|
|
40
51
|
The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
|
|
@@ -77,6 +88,7 @@ membot refresh # refresh all rows whose schedule has ela
|
|
|
77
88
|
membot mv old/path new/path # rename (history preserved under both)
|
|
78
89
|
membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
|
|
79
90
|
membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
|
|
91
|
+
membot rm -r remotes/docs.google.com # --recursive removes every path under a directory prefix
|
|
80
92
|
membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
|
|
81
93
|
```
|
|
82
94
|
|
|
@@ -116,16 +128,17 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
116
128
|
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
|
117
129
|
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
118
130
|
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
119
|
-
| `membot info <path>` | Inspect metadata (source,
|
|
131
|
+
| `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
|
|
120
132
|
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
121
133
|
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
122
134
|
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
123
|
-
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
|
|
135
|
+
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
|
|
124
136
|
| `membot refresh [path]` | Re-read source; create new version only if bytes changed |
|
|
125
137
|
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
126
138
|
| `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
|
|
127
139
|
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
128
140
|
| `membot config <subcommand>` | Host-side config management (`get` / `set` / `unset` / `list` / `path`). **Don't run** — this is for the human operator, not for agents |
|
|
141
|
+
| `membot login` | Open a browser to sign into Google / GitHub / Linear / etc. (one-time host-side setup). **Don't run** — this is for the human operator |
|
|
129
142
|
|
|
130
143
|
## Output formats
|
|
131
144
|
|
|
@@ -137,7 +150,9 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
137
150
|
## Troubleshooting
|
|
138
151
|
|
|
139
152
|
- **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
|
|
140
|
-
- **"refresh failed: auth"**
|
|
153
|
+
- **"refresh failed: auth"** for a Google URL → cookies expired. Run `membot login` to refresh the browser session.
|
|
154
|
+
- **"refresh failed: auth"** for a GitHub URL → set the PAT via `membot config set downloaders.github.api_key <PAT>` (or export `GITHUB_TOKEN`).
|
|
155
|
+
- **"refresh failed: auth"** for a Linear URL → set the personal API key via `membot config set downloaders.linear.api_key <KEY>` (create one at `linear.app/settings/api`).
|
|
141
156
|
- **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
|
|
142
157
|
- **Stale results after manual DB edits** → `membot reindex`.
|
|
143
158
|
- **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
|
package/.cursor/rules/membot.mdc
CHANGED
|
@@ -26,15 +26,26 @@ membot search "<question>" # hybrid search (semantic + keyword)
|
|
|
26
26
|
## 2. Ingest
|
|
27
27
|
|
|
28
28
|
```bash
|
|
29
|
-
membot add ./README.md
|
|
30
|
-
membot add ./docs
|
|
31
|
-
membot add "docs/**/*.md"
|
|
32
|
-
membot add a.md b.md "docs/**/*.md"
|
|
33
|
-
membot add https://
|
|
34
|
-
membot add
|
|
35
|
-
membot add
|
|
29
|
+
membot add ./README.md # single file
|
|
30
|
+
membot add ./docs # recursive directory walk
|
|
31
|
+
membot add "docs/**/*.md" # glob
|
|
32
|
+
membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
|
|
33
|
+
membot add https://docs.google.com/document/d/<ID>/edit # Google Docs/Sheets/Slides via export endpoints
|
|
34
|
+
membot add https://github.com/<owner>/<repo>/issues/<n> # GitHub issues + PRs (with comments)
|
|
35
|
+
membot add https://linear.app/<workspace>/issue/<KEY> # Linear issues + projects
|
|
36
|
+
membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
|
|
37
|
+
membot add "inline:Decision: use X because Y" # literal text
|
|
38
|
+
membot add ./docs --refresh-frequency 24h # auto-refresh every day
|
|
36
39
|
```
|
|
37
40
|
|
|
41
|
+
Remote URLs go through per-service downloaders. Google needs cookies
|
|
42
|
+
captured by `membot login` (one-time browser sign-in); GitHub and
|
|
43
|
+
Linear need API keys set via
|
|
44
|
+
`membot config set downloaders.<svc>.api_key`. If a fetch fails with
|
|
45
|
+
an auth error, the `HelpfulError` will tell you exactly which command
|
|
46
|
+
to run. Fetches are non-interactive — they never open a browser
|
|
47
|
+
during ingest or refresh.
|
|
48
|
+
|
|
38
49
|
Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
|
|
39
50
|
|
|
40
51
|
The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
|
|
@@ -77,6 +88,7 @@ membot refresh # refresh all rows whose schedule has ela
|
|
|
77
88
|
membot mv old/path new/path # rename (history preserved under both)
|
|
78
89
|
membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
|
|
79
90
|
membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
|
|
91
|
+
membot rm -r remotes/docs.google.com # --recursive removes every path under a directory prefix
|
|
80
92
|
membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
|
|
81
93
|
```
|
|
82
94
|
|
|
@@ -116,16 +128,17 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
116
128
|
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
|
117
129
|
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
118
130
|
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
119
|
-
| `membot info <path>` | Inspect metadata (source,
|
|
131
|
+
| `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
|
|
120
132
|
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
121
133
|
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
122
134
|
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
123
|
-
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
|
|
135
|
+
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
|
|
124
136
|
| `membot refresh [path]` | Re-read source; create new version only if bytes changed |
|
|
125
137
|
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
126
138
|
| `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
|
|
127
139
|
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
128
140
|
| `membot config <subcommand>` | Host-side config management (`get` / `set` / `unset` / `list` / `path`). **Don't run** — this is for the human operator, not for agents |
|
|
141
|
+
| `membot login` | Open a browser to sign into Google / GitHub / Linear / etc. (one-time host-side setup). **Don't run** — this is for the human operator |
|
|
129
142
|
|
|
130
143
|
## Output formats
|
|
131
144
|
|
|
@@ -137,7 +150,9 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
137
150
|
## Troubleshooting
|
|
138
151
|
|
|
139
152
|
- **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
|
|
140
|
-
- **"refresh failed: auth"**
|
|
153
|
+
- **"refresh failed: auth"** for a Google URL → cookies expired. Run `membot login` to refresh the browser session.
|
|
154
|
+
- **"refresh failed: auth"** for a GitHub URL → set the PAT via `membot config set downloaders.github.api_key <PAT>` (or export `GITHUB_TOKEN`).
|
|
155
|
+
- **"refresh failed: auth"** for a Linear URL → set the personal API key via `membot config set downloaders.linear.api_key <KEY>` (create one at `linear.app/settings/api`).
|
|
141
156
|
- **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
|
|
142
157
|
- **Stale results after manual DB edits** → `membot reindex`.
|
|
143
158
|
- **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
|
package/README.md
CHANGED
|
@@ -16,15 +16,43 @@
|
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
18
|
bun install -g membot
|
|
19
|
+
bunx playwright install chromium # one-time browser binary download (~150 MB)
|
|
19
20
|
```
|
|
20
21
|
|
|
21
|
-
This pulls in DuckDB's per-platform native bindings alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`), so a global Bun install is the supported path.
|
|
22
|
+
This pulls in DuckDB's per-platform native bindings and Playwright's Chromium binary alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`) and `playwright*` (the browser binary lives in `~/.cache/ms-playwright`), so a global Bun install is the supported path.
|
|
23
|
+
|
|
24
|
+
After installing, set up the services you want to ingest from:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
membot login
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
A real Chromium window opens with two sections:
|
|
31
|
+
|
|
32
|
+
- **Browser sign-in** — Google Docs / Sheets / Slides. Click the Google link in the window, sign in, close the window. Cookies + IndexedDB persist to `~/.membot/auth/browser-profile/` and reused by every browser-based downloader.
|
|
33
|
+
- **API-key services** — GitHub and Linear. The window shows the settings URL where you create a token and the `membot config set …` command to run in your terminal:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# GitHub: settings/tokens → fine-grained, repo:read
|
|
37
|
+
membot config set downloaders.github.api_key <PAT>
|
|
38
|
+
# or read from environment
|
|
39
|
+
export GITHUB_TOKEN=<PAT>
|
|
40
|
+
|
|
41
|
+
# Linear: linear.app/settings/api → personal API key
|
|
42
|
+
membot config set downloaders.linear.api_key <KEY>
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Public GitHub repos work without a token (rate-limited at 60 req/hr). Linear always needs a key.
|
|
22
46
|
|
|
23
47
|
## Quick start
|
|
24
48
|
|
|
25
49
|
```bash
|
|
50
|
+
membot login # one-time: sign into Google / GitHub / Linear in a browser
|
|
26
51
|
membot add ./docs # ingest a directory recursively
|
|
27
|
-
membot add https://
|
|
52
|
+
membot add https://docs.google.com/document/d/.. # Google Docs / Sheets / Slides via export endpoints
|
|
53
|
+
membot add https://github.com/o/r/issues/123 # GitHub issues + PRs (with comments)
|
|
54
|
+
membot add https://linear.app/w/issue/ABC-12 # Linear issues + projects
|
|
55
|
+
membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
|
|
28
56
|
membot add a.md b.md "docs/**/*.md" # any number of files / globs in one call
|
|
29
57
|
membot ls # list current files
|
|
30
58
|
membot search "how does refresh work?" # hybrid search
|
|
@@ -59,13 +87,13 @@ The skill files describe the discover → ingest → search → read → write w
|
|
|
59
87
|
| `membot diff <path> <a> [b]` | Unified diff between two versions |
|
|
60
88
|
| `membot write <path>` | Write inline agent-authored markdown as a new version |
|
|
61
89
|
| `membot mv <from> <to>` | Rename a logical_path (history preserved under both) |
|
|
62
|
-
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
|
|
90
|
+
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
|
|
63
91
|
| `membot refresh [path]` | Re-read source; new version only if bytes changed |
|
|
64
92
|
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
65
93
|
| `membot serve` | Run the MCP server (stdio default; `--http <port>` for HTTP) |
|
|
66
94
|
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
67
95
|
| `membot config <subcommand>` | Get / set values in `~/.membot/config.json` (`get`, `set`, `unset`, `list`, `path`) |
|
|
68
|
-
| `membot
|
|
96
|
+
| `membot login` | Open a Chromium window to sign into Google / GitHub / Linear / etc. — closes save the session |
|
|
69
97
|
| `membot skill install` | Install the Claude Code / Cursor agent skill |
|
|
70
98
|
|
|
71
99
|
Run `membot <command> --help` for full flags and arguments. Every command produces JSON when piped, when `--json` is set, or when `CI=true`.
|
|
@@ -114,9 +142,12 @@ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, whic
|
|
|
114
142
|
```
|
|
115
143
|
|
|
116
144
|
Values are written with file mode `0600`. `ANTHROPIC_API_KEY` set in the environment still wins on read, so existing env-var setups keep working.
|
|
145
|
+
- **Browser session:** `~/.membot/auth/browser-profile/` (Playwright persistent profile — cookies, localStorage, IndexedDB). Captured by `membot login`; cookie-based downloaders (Google) reuse it on every fetch. Delete the directory to force a fresh login.
|
|
146
|
+
- **API keys:** stored under `downloaders.<service>.api_key` in `~/.membot/config.json`. Read by API-based downloaders (GitHub, Linear).
|
|
117
147
|
- **Environment variables:**
|
|
118
148
|
- `ANTHROPIC_API_KEY` — optional. Enables LLM fallback for messy / scanned input (vision captions for images, last-resort markdown conversion). Without it, the pipeline degrades to deterministic native conversion. Equivalent to `membot config set llm.anthropic_api_key ...`; the env var takes precedence on read.
|
|
119
149
|
- `MEMBOT_HOME` — override the data directory.
|
|
150
|
+
- `MEMBOT_SKIP_E2E` — skip live-network E2E downloader tests in `bun test`.
|
|
120
151
|
- `NO_COLOR`, `CI`, `FORCE_COLOR` — standard output controls.
|
|
121
152
|
|
|
122
153
|
## Development
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "membot",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
"lint": "biome ci . && tsc --noEmit",
|
|
28
28
|
"format": "biome check --write .",
|
|
29
29
|
"prebuild": "bash scripts/apply-patches.sh",
|
|
30
|
-
"build": "bun build --compile --minify --sourcemap --external '@duckdb/*' ./src/cli.ts --outfile dist/membot"
|
|
30
|
+
"build": "bun build --compile --minify --sourcemap --external '@duckdb/*' --external 'playwright*' ./src/cli.ts --outfile dist/membot"
|
|
31
31
|
},
|
|
32
32
|
"keywords": [
|
|
33
33
|
"mcp",
|
|
@@ -56,27 +56,30 @@
|
|
|
56
56
|
"dependencies": {
|
|
57
57
|
"@anthropic-ai/sdk": "^0.32.0",
|
|
58
58
|
"@duckdb/node-api": "1.5.2-r.1",
|
|
59
|
-
"@evantahler/mcpx": "^0.21.4",
|
|
60
59
|
"@huggingface/transformers": "^4.2.0",
|
|
61
60
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
61
|
+
"@types/picomatch": "^4.0.3",
|
|
62
|
+
"@types/turndown": "^5.0.5",
|
|
62
63
|
"ansis": "^4.2.0",
|
|
63
64
|
"commander": "^14.0.3",
|
|
64
65
|
"gray-matter": "^4.0.3",
|
|
65
66
|
"mammoth": "^1.8.0",
|
|
67
|
+
"mustache": "^4.2.0",
|
|
66
68
|
"nanospinner": "^1.2.2",
|
|
67
69
|
"onnxruntime-web": "1.26.0-dev.20260416-b7804b056c",
|
|
68
70
|
"picomatch": "^4.0.4",
|
|
69
|
-
"
|
|
71
|
+
"playwright": "^1.59.1",
|
|
70
72
|
"tesseract.js": "^5.1.0",
|
|
71
73
|
"turndown": "^7.2.0",
|
|
72
|
-
"@types/turndown": "^5.0.5",
|
|
73
74
|
"unpdf": "^0.12.0",
|
|
75
|
+
"xlsx": "^0.18.5",
|
|
74
76
|
"zod": "^4.0.0",
|
|
75
77
|
"zod-to-json-schema": "^3.23.0"
|
|
76
78
|
},
|
|
77
79
|
"devDependencies": {
|
|
78
80
|
"@biomejs/biome": "^2.4.14",
|
|
79
81
|
"@types/bun": "latest",
|
|
82
|
+
"@types/mustache": "^4.2.6",
|
|
80
83
|
"typescript": "^6"
|
|
81
84
|
},
|
|
82
85
|
"peerDependencies": {
|
package/scripts/apply-patches.sh
CHANGED
|
@@ -38,14 +38,3 @@ apply_patch \
|
|
|
38
38
|
"node_modules/@huggingface/transformers" \
|
|
39
39
|
".membot-transformers-patch-applied"
|
|
40
40
|
|
|
41
|
-
# @evantahler/mcpx — rewrite `src/search/onnx-wasm-paths.ts` so its static
|
|
42
|
-
# `with { type: "file" }` imports of onnxruntime-web's WASM resolve from the
|
|
43
|
-
# consumer's hoisted node_modules layout (../../../../onnxruntime-web/...)
|
|
44
|
-
# instead of mcpx's own repo layout (../../node_modules/...). With this
|
|
45
|
-
# patch in place, mcpx's semantic search runs end-to-end inside membot
|
|
46
|
-
# (the agent fetcher's `mcp_search` exercises it) and `bun build --compile`
|
|
47
|
-
# can bundle the WASM assets into the standalone binary.
|
|
48
|
-
apply_patch \
|
|
49
|
-
"patches/@evantahler%2Fmcpx@0.21.4.patch" \
|
|
50
|
-
"node_modules/@evantahler/mcpx" \
|
|
51
|
-
".membot-mcpx-patch-applied"
|
package/src/cli.ts
CHANGED
|
@@ -5,7 +5,7 @@ import { program } from "commander";
|
|
|
5
5
|
import pkg from "../package.json" with { type: "json" };
|
|
6
6
|
import { registerCheckUpdateCommand } from "./commands/check-update.ts";
|
|
7
7
|
import { registerConfigCommand } from "./commands/config.ts";
|
|
8
|
-
import {
|
|
8
|
+
import { registerLoginCommand } from "./commands/login.ts";
|
|
9
9
|
import { registerReindexCommand } from "./commands/reindex.ts";
|
|
10
10
|
import { registerServeCommand } from "./commands/serve.ts";
|
|
11
11
|
import { registerSkillCommand } from "./commands/skill.ts";
|
|
@@ -59,7 +59,7 @@ for (const op of OPERATIONS) {
|
|
|
59
59
|
registerServeCommand(program);
|
|
60
60
|
registerReindexCommand(program);
|
|
61
61
|
registerConfigCommand(program);
|
|
62
|
-
|
|
62
|
+
registerLoginCommand(program);
|
|
63
63
|
registerSkillCommand(program);
|
|
64
64
|
registerCheckUpdateCommand(program);
|
|
65
65
|
registerUpgradeCommand(program);
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8" />
|
|
5
|
+
<title>membot login</title>
|
|
6
|
+
<style>
|
|
7
|
+
body { font-family: -apple-system, BlinkMacSystemFont, system-ui, sans-serif; padding: 2.5rem; max-width: 720px; margin: auto; color: #222; line-height: 1.5; }
|
|
8
|
+
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
|
9
|
+
h2 { font-size: 1.05rem; margin-top: 2rem; margin-bottom: 0.5rem; color: #444; }
|
|
10
|
+
.hint { color: #666; margin-bottom: 1.5rem; }
|
|
11
|
+
ul { padding: 0; list-style: none; }
|
|
12
|
+
li { padding: 0.7rem 0.9rem; border: 1px solid #ddd; border-radius: 6px; margin-bottom: 0.5rem; }
|
|
13
|
+
a { color: #2855ff; text-decoration: none; font-weight: 500; }
|
|
14
|
+
a:hover { text-decoration: underline; }
|
|
15
|
+
.desc { color: #666; }
|
|
16
|
+
.footer { color: #888; font-size: 0.9rem; margin-top: 2rem; }
|
|
17
|
+
code { background: #f5f5f5; padding: 0.1rem 0.35rem; border-radius: 3px; font-size: 0.85rem; }
|
|
18
|
+
pre { background: #f5f5f5; padding: 0.6rem 0.8rem; border-radius: 4px; font-size: 0.85rem; overflow-x: auto; margin: 0.4rem 0 0 0; }
|
|
19
|
+
</style>
|
|
20
|
+
</head>
|
|
21
|
+
<body>
|
|
22
|
+
<h1>Set up the services membot will fetch from</h1>
|
|
23
|
+
<p class="hint">This is a separate browser session from your daily Chrome — even if you're signed in there, you have to set things up here.</p>
|
|
24
|
+
|
|
25
|
+
{{#hasBrowser}}
|
|
26
|
+
<h2>Sign in via this browser</h2>
|
|
27
|
+
<p class="hint">Click any link, complete the login, then close this window when you're done. Cookies + IndexedDB land in your membot profile.</p>
|
|
28
|
+
<ul>
|
|
29
|
+
{{#browser}}
|
|
30
|
+
<li><a href="{{url}}">{{name}}</a>{{#description}} <span class="desc">— {{description}}</span>{{/description}}</li>
|
|
31
|
+
{{/browser}}
|
|
32
|
+
</ul>
|
|
33
|
+
{{/hasBrowser}}
|
|
34
|
+
|
|
35
|
+
{{#hasApiKey}}
|
|
36
|
+
<h2>API-key services</h2>
|
|
37
|
+
<p class="hint">Open the settings page, create a token, then run the command shown in your terminal (not in this browser).</p>
|
|
38
|
+
<ul>
|
|
39
|
+
{{#apiKey}}
|
|
40
|
+
<li>
|
|
41
|
+
<a href="{{url}}">{{name}}</a>{{#description}} <span class="desc">— {{description}}</span>{{/description}}
|
|
42
|
+
<pre>{{setupCommand}}</pre>
|
|
43
|
+
</li>
|
|
44
|
+
{{/apiKey}}
|
|
45
|
+
</ul>
|
|
46
|
+
{{/hasApiKey}}
|
|
47
|
+
|
|
48
|
+
<p class="footer">Closing this window saves the browser-session profile. Run <code>membot login</code> again to refresh expired sessions.</p>
|
|
49
|
+
</body>
|
|
50
|
+
</html>
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import type { Command } from "commander";
|
|
3
|
+
import Mustache from "mustache";
|
|
4
|
+
import { FILES } from "../constants.ts";
|
|
5
|
+
import { buildContext, closeContext } from "../context.ts";
|
|
6
|
+
import { HelpfulError } from "../errors.ts";
|
|
7
|
+
import { BrowserPool } from "../ingest/downloaders/browser.ts";
|
|
8
|
+
import { collectLoginEntries } from "../ingest/downloaders/index.ts";
|
|
9
|
+
import { logger } from "../output/logger.ts";
|
|
10
|
+
import LOGIN_PAGE_TEMPLATE from "./login-page.mustache" with { type: "text" };
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* `membot login`
|
|
14
|
+
*
|
|
15
|
+
* Open a real Chromium window backed by membot's persistent profile
|
|
16
|
+
* (cookies + localStorage + IndexedDB + service workers all stored
|
|
17
|
+
* under `~/.membot/auth/browser-profile/`) and pre-navigate to a
|
|
18
|
+
* small intro page that lists every login button declared by the
|
|
19
|
+
* registered downloaders. Adding a new downloader with `logins: […]`
|
|
20
|
+
* automatically gets a button on this page — login.ts knows nothing
|
|
21
|
+
* service-specific itself.
|
|
22
|
+
*
|
|
23
|
+
* Why a persistent profile instead of `storageState` JSON: SPA-heavy
|
|
24
|
+
* services like Linear stash session/sync state in IndexedDB, which
|
|
25
|
+
* `storageState` doesn't capture. A fresh headless context with
|
|
26
|
+
* cookies but no IndexedDB hangs on Linear's "Loading…" placeholder
|
|
27
|
+
* forever. The persistent profile carries IDB along with cookies, so
|
|
28
|
+
* the next headless run finds Linear's app fully bootstrapped.
|
|
29
|
+
*
|
|
30
|
+
* Window-close detection uses page-close events because on macOS
|
|
31
|
+
* closing the last chromium window doesn't quit the process —
|
|
32
|
+
* `browser.on('disconnected')` never fires. See `BrowserPool.waitForUserDone`.
|
|
33
|
+
*/
|
|
34
|
+
export function registerLoginCommand(program: Command): void {
|
|
35
|
+
program
|
|
36
|
+
.command("login")
|
|
37
|
+
.description(
|
|
38
|
+
"Open a browser to sign into the services membot fetches from — closing the window saves your session.",
|
|
39
|
+
)
|
|
40
|
+
.action(async () => {
|
|
41
|
+
const ctx = await buildContext({});
|
|
42
|
+
const userDataDir = join(ctx.dataDir, FILES.BROWSER_PROFILE);
|
|
43
|
+
const pool = new BrowserPool({ userDataDir, headless: false });
|
|
44
|
+
const entries = collectLoginEntries();
|
|
45
|
+
const html = Mustache.render(LOGIN_PAGE_TEMPLATE, {
|
|
46
|
+
browser: entries.browser,
|
|
47
|
+
apiKey: entries.apiKey,
|
|
48
|
+
hasBrowser: entries.browser.length > 0,
|
|
49
|
+
hasApiKey: entries.apiKey.length > 0,
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
let cookieCount = 0;
|
|
53
|
+
try {
|
|
54
|
+
const page = await pool.newPage();
|
|
55
|
+
await page.goto(`data:text/html;charset=utf-8,${encodeURIComponent(html)}`).catch(() => {});
|
|
56
|
+
|
|
57
|
+
logger.info("Sign into the services you want membot to fetch from, then close the browser window.");
|
|
58
|
+
logger.info(`Session profile will be stored at ${userDataDir}.`);
|
|
59
|
+
|
|
60
|
+
await pool.waitForUserDone(page);
|
|
61
|
+
cookieCount = await pool.cookieCount();
|
|
62
|
+
} catch (err) {
|
|
63
|
+
if (err instanceof HelpfulError) throw err;
|
|
64
|
+
throw new HelpfulError({
|
|
65
|
+
kind: "internal_error",
|
|
66
|
+
message: `login failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
67
|
+
hint: "Run `bunx playwright install chromium` to ensure the browser binary is installed, then retry.",
|
|
68
|
+
});
|
|
69
|
+
} finally {
|
|
70
|
+
await pool.dispose();
|
|
71
|
+
await closeContext(ctx);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (cookieCount === 0) {
|
|
75
|
+
throw new HelpfulError({
|
|
76
|
+
kind: "auth_error",
|
|
77
|
+
message: `Browser profile at ${userDataDir} has no cookies — no service was signed in.`,
|
|
78
|
+
hint: "Run `membot login` again and sign in (Google / GitHub / Linear / …) before closing the window.",
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
logger.info(`Saved session profile (${cookieCount} cookie${cookieCount === 1 ? "" : "s"}).`);
|
|
82
|
+
});
|
|
83
|
+
}
|
package/src/config/schemas.ts
CHANGED
|
@@ -15,14 +15,23 @@ export const LlmConfigSchema = z.object({
|
|
|
15
15
|
vision_model: z.string().default(DEFAULTS.VISION_MODEL),
|
|
16
16
|
});
|
|
17
17
|
|
|
18
|
-
export const McpxConfigSchema = z.object({
|
|
19
|
-
config_path: z.string().default(""),
|
|
20
|
-
});
|
|
21
|
-
|
|
22
18
|
export const DaemonConfigSchema = z.object({
|
|
23
19
|
tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
|
|
24
20
|
});
|
|
25
21
|
|
|
22
|
+
export const LinearDownloaderConfigSchema = z.object({
|
|
23
|
+
api_key: z.string().meta({ secret: true }).default(""),
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
export const GithubDownloaderConfigSchema = z.object({
|
|
27
|
+
api_key: z.string().meta({ secret: true }).default(""),
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
export const DownloadersConfigSchema = z.object({
|
|
31
|
+
linear: LinearDownloaderConfigSchema.default(() => LinearDownloaderConfigSchema.parse({})),
|
|
32
|
+
github: GithubDownloaderConfigSchema.default(() => GithubDownloaderConfigSchema.parse({})),
|
|
33
|
+
});
|
|
34
|
+
|
|
26
35
|
export const DbLockRetryConfigSchema = z.object({
|
|
27
36
|
max_attempts: z.number().int().positive().default(30),
|
|
28
37
|
base_delay_ms: z.number().int().positive().default(100),
|
|
@@ -35,7 +44,7 @@ export const MembotConfigSchema = z.object({
|
|
|
35
44
|
embedding_dimension: z.number().int().positive().default(EMBEDDING_DIMENSION),
|
|
36
45
|
chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
|
|
37
46
|
llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
|
|
38
|
-
|
|
47
|
+
downloaders: DownloadersConfigSchema.default(() => DownloadersConfigSchema.parse({})),
|
|
39
48
|
daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
|
|
40
49
|
db_lock_retry: DbLockRetryConfigSchema.default(() => DbLockRetryConfigSchema.parse({})),
|
|
41
50
|
default_refresh_frequency_sec: z.number().int().positive().nullable().default(null),
|
|
@@ -44,3 +53,6 @@ export const MembotConfigSchema = z.object({
|
|
|
44
53
|
export type MembotConfig = z.infer<typeof MembotConfigSchema>;
|
|
45
54
|
export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
|
|
46
55
|
export type LlmConfig = z.infer<typeof LlmConfigSchema>;
|
|
56
|
+
export type DownloadersConfig = z.infer<typeof DownloadersConfigSchema>;
|
|
57
|
+
export type LinearDownloaderConfig = z.infer<typeof LinearDownloaderConfigSchema>;
|
|
58
|
+
export type GithubDownloaderConfig = z.infer<typeof GithubDownloaderConfigSchema>;
|
package/src/constants.ts
CHANGED
|
@@ -13,7 +13,6 @@ export const ENV = {
|
|
|
13
13
|
CONFIG: "MEMBOT_CONFIG",
|
|
14
14
|
DEBUG: "MEMBOT_DEBUG",
|
|
15
15
|
ANTHROPIC_API_KEY: "ANTHROPIC_API_KEY",
|
|
16
|
-
MCPX_CONFIG_PATH: "MCP_CONFIG_PATH",
|
|
17
16
|
NO_UPDATE_CHECK: "MEMBOT_NO_UPDATE_CHECK",
|
|
18
17
|
} as const;
|
|
19
18
|
|
|
@@ -48,4 +47,17 @@ export const FILES = {
|
|
|
48
47
|
INDEX_DUCKDB: "index.duckdb",
|
|
49
48
|
MODELS_DIR: "models",
|
|
50
49
|
LOGS_DIR: "logs",
|
|
50
|
+
AUTH_DIR: "auth",
|
|
51
|
+
/**
|
|
52
|
+
* Persistent Chromium profile directory. We use
|
|
53
|
+
* `chromium.launchPersistentContext(userDataDir)` rather than the
|
|
54
|
+
* lighter `storageState` JSON snapshot because Linear (and other
|
|
55
|
+
* SPA-heavy services) stash critical session state in IndexedDB —
|
|
56
|
+
* which `storageState` doesn't capture. A persistent profile
|
|
57
|
+
* survives the full set: cookies, localStorage, IndexedDB, service
|
|
58
|
+
* workers, etc. Trade-off: directory-sized state instead of a tiny
|
|
59
|
+
* JSON file, and only one process can have the profile open at a
|
|
60
|
+
* time (chromium's single-instance lock).
|
|
61
|
+
*/
|
|
62
|
+
BROWSER_PROFILE: "auth/browser-profile",
|
|
51
63
|
} as const;
|
package/src/context.ts
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import { join } from "node:path";
|
|
2
|
-
import { McpxClient } from "@evantahler/mcpx";
|
|
3
2
|
import { loadConfig } from "./config/loader.ts";
|
|
4
3
|
import type { MembotConfig } from "./config/schemas.ts";
|
|
5
|
-
import {
|
|
4
|
+
import { FILES } from "./constants.ts";
|
|
6
5
|
import { type DbConnection, openDb } from "./db/connection.ts";
|
|
7
6
|
import { logger } from "./output/logger.ts";
|
|
8
7
|
import type { Progress } from "./output/progress.ts";
|
|
@@ -16,7 +15,6 @@ export interface AppContext {
|
|
|
16
15
|
db: DbConnection;
|
|
17
16
|
logger: typeof logger;
|
|
18
17
|
progress: Progress;
|
|
19
|
-
mcpx: McpxClient | null;
|
|
20
18
|
}
|
|
21
19
|
|
|
22
20
|
export interface BuildContextOptions {
|
|
@@ -32,7 +30,6 @@ export interface BuildContextOptions {
|
|
|
32
30
|
* - output mode (TTY/JSON/color detection — frozen for the rest of the run)
|
|
33
31
|
* - config (~/.membot/config.json with env overrides)
|
|
34
32
|
* - DuckDB connection (~/.membot/index.duckdb), running migrations on first open
|
|
35
|
-
* - mcpx client (lazy — opened on first remote fetch; null when no servers)
|
|
36
33
|
*/
|
|
37
34
|
export async function buildContext(options: BuildContextOptions = {}): Promise<AppContext> {
|
|
38
35
|
setMode(detectMode({ json: options.json, verbose: options.verbose, noColor: options.noColor }));
|
|
@@ -45,8 +42,6 @@ export async function buildContext(options: BuildContextOptions = {}): Promise<A
|
|
|
45
42
|
maxDelayMs: config.db_lock_retry.max_delay_ms,
|
|
46
43
|
});
|
|
47
44
|
|
|
48
|
-
const mcpx = await maybeMcpx(config);
|
|
49
|
-
|
|
50
45
|
return {
|
|
51
46
|
config,
|
|
52
47
|
dataDir,
|
|
@@ -54,31 +49,13 @@ export async function buildContext(options: BuildContextOptions = {}): Promise<A
|
|
|
54
49
|
db,
|
|
55
50
|
logger,
|
|
56
51
|
progress: createProgress(),
|
|
57
|
-
mcpx,
|
|
58
52
|
};
|
|
59
53
|
}
|
|
60
54
|
|
|
61
|
-
async function maybeMcpx(config: MembotConfig): Promise<McpxClient | null> {
|
|
62
|
-
const configDir = config.mcpx.config_path || process.env[ENV.MCPX_CONFIG_PATH];
|
|
63
|
-
try {
|
|
64
|
-
const client = new McpxClient(configDir ? { configDir } : {});
|
|
65
|
-
return client;
|
|
66
|
-
} catch {
|
|
67
|
-
return null;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
55
|
export async function closeContext(ctx: AppContext): Promise<void> {
|
|
72
56
|
try {
|
|
73
57
|
await ctx.db.close();
|
|
74
58
|
} catch {
|
|
75
59
|
// best effort
|
|
76
60
|
}
|
|
77
|
-
if (ctx.mcpx) {
|
|
78
|
-
try {
|
|
79
|
-
await ctx.mcpx.close();
|
|
80
|
-
} catch {
|
|
81
|
-
// best effort
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
61
|
}
|