membot 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/skills/membot.md +25 -10
  2. package/.cursor/rules/membot.mdc +25 -10
  3. package/README.md +35 -4
  4. package/package.json +8 -5
  5. package/scripts/apply-patches.sh +0 -11
  6. package/src/cli.ts +2 -2
  7. package/src/commands/login-page.mustache +50 -0
  8. package/src/commands/login.ts +83 -0
  9. package/src/config/schemas.ts +17 -5
  10. package/src/constants.ts +13 -1
  11. package/src/context.ts +1 -24
  12. package/src/db/files.ts +21 -25
  13. package/src/db/migrations/003-downloader-columns.ts +58 -0
  14. package/src/db/migrations.ts +2 -1
  15. package/src/ingest/converter/index.ts +9 -0
  16. package/src/ingest/converter/xlsx.ts +111 -0
  17. package/src/ingest/downloaders/browser.ts +180 -0
  18. package/src/ingest/downloaders/generic-web.ts +81 -0
  19. package/src/ingest/downloaders/github.ts +178 -0
  20. package/src/ingest/downloaders/google-docs.ts +56 -0
  21. package/src/ingest/downloaders/google-shared.ts +86 -0
  22. package/src/ingest/downloaders/google-sheets.ts +58 -0
  23. package/src/ingest/downloaders/google-slides.ts +53 -0
  24. package/src/ingest/downloaders/index.ts +182 -0
  25. package/src/ingest/downloaders/linear.ts +291 -0
  26. package/src/ingest/fetcher.ts +107 -127
  27. package/src/ingest/ingest.ts +43 -69
  28. package/src/mcp/instructions.ts +4 -2
  29. package/src/operations/add.ts +6 -4
  30. package/src/operations/info.ts +4 -6
  31. package/src/operations/move.ts +2 -3
  32. package/src/operations/refresh.ts +2 -4
  33. package/src/operations/remove.ts +23 -2
  34. package/src/operations/tree.ts +1 -1
  35. package/src/operations/types.ts +1 -1
  36. package/src/refresh/runner.ts +59 -114
  37. package/src/types/text-modules.d.ts +5 -0
  38. package/patches/@evantahler%2Fmcpx@0.21.4.patch +0 -51
  39. package/src/commands/mcpx.ts +0 -112
  40. package/src/ingest/agent-fetcher.ts +0 -564
@@ -26,15 +26,26 @@ membot search "<question>" # hybrid search (semantic + keyword)
26
26
  ## 2. Ingest
27
27
 
28
28
  ```bash
29
- membot add ./README.md # single file
30
- membot add ./docs # recursive directory walk
31
- membot add "docs/**/*.md" # glob
32
- membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
33
- membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
34
- membot add "inline:Decision: use X because Y" # literal text
35
- membot add ./docs --refresh-frequency 24h # auto-refresh every day
29
+ membot add ./README.md # single file
30
+ membot add ./docs # recursive directory walk
31
+ membot add "docs/**/*.md" # glob
32
+ membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
33
+ membot add https://docs.google.com/document/d/<ID>/edit # Google Docs/Sheets/Slides via export endpoints
34
+ membot add https://github.com/<owner>/<repo>/issues/<n> # GitHub issues + PRs (with comments)
35
+ membot add https://linear.app/<workspace>/issue/<KEY> # Linear issues + projects
36
+ membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
37
+ membot add "inline:Decision: use X because Y" # literal text
38
+ membot add ./docs --refresh-frequency 24h # auto-refresh every day
36
39
  ```
37
40
 
41
+ Remote URLs go through per-service downloaders. Google needs cookies
42
+ captured by `membot login` (one-time browser sign-in); GitHub and
43
+ Linear need API keys set via
44
+ `membot config set downloaders.<svc>.api_key`. If a fetch fails with
45
+ an auth error, the `HelpfulError` will tell you exactly which command
46
+ to run. Fetches are non-interactive — they never open a browser
47
+ during ingest or refresh.
48
+
38
49
  Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
39
50
 
40
51
  The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
@@ -77,6 +88,7 @@ membot refresh # refresh all rows whose schedule has ela
77
88
  membot mv old/path new/path # rename (history preserved under both)
78
89
  membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
79
90
  membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
91
+ membot rm -r remotes/docs.google.com # --recursive removes every path under a directory prefix
80
92
  membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
81
93
  ```
82
94
 
@@ -116,16 +128,17 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
116
128
  | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
117
129
  | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
118
130
  | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
119
- | `membot info <path>` | Inspect metadata (source, fetcher, refresh schedule, digests) without content |
131
+ | `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
120
132
  | `membot versions <path>` | List every version newest-first with version_id and change notes |
121
133
  | `membot diff <path> --a <ts>` | Unified diff between two versions |
122
134
  | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
123
- | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
135
+ | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
124
136
  | `membot refresh [path]` | Re-read source; create new version only if bytes changed |
125
137
  | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
126
138
  | `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
127
139
  | `membot reindex` | Rebuild the FTS keyword index over current chunks |
128
140
  | `membot config <subcommand>` | Host-side config management (`get` / `set` / `unset` / `list` / `path`). **Don't run** — this is for the human operator, not for agents |
141
+ | `membot login` | Open a browser to sign into Google / GitHub / Linear / etc. (one-time host-side setup). **Don't run** — this is for the human operator |
129
142
 
130
143
  ## Output formats
131
144
 
@@ -137,7 +150,9 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
137
150
  ## Troubleshooting
138
151
 
139
152
  - **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
140
- - **"refresh failed: auth"** The original fetch used an authenticated mcpx tool; re-auth via `mcpx auth <server>`.
153
+ - **"refresh failed: auth"** for a Google URL cookies expired. Run `membot login` to refresh the browser session.
154
+ - **"refresh failed: auth"** for a GitHub URL → set the PAT via `membot config set downloaders.github.api_key <PAT>` (or export `GITHUB_TOKEN`).
155
+ - **"refresh failed: auth"** for a Linear URL → set the personal API key via `membot config set downloaders.linear.api_key <KEY>` (create one at `linear.app/settings/api`).
141
156
  - **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
142
157
  - **Stale results after manual DB edits** → `membot reindex`.
143
158
  - **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
@@ -26,15 +26,26 @@ membot search "<question>" # hybrid search (semantic + keyword)
26
26
  ## 2. Ingest
27
27
 
28
28
  ```bash
29
- membot add ./README.md # single file
30
- membot add ./docs # recursive directory walk
31
- membot add "docs/**/*.md" # glob
32
- membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
33
- membot add https://example.com/spec.pdf # URL (auto-converted to markdown)
34
- membot add "inline:Decision: use X because Y" # literal text
35
- membot add ./docs --refresh-frequency 24h # auto-refresh every day
29
+ membot add ./README.md # single file
30
+ membot add ./docs # recursive directory walk
31
+ membot add "docs/**/*.md" # glob
32
+ membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
33
+ membot add https://docs.google.com/document/d/<ID>/edit # Google Docs/Sheets/Slides via export endpoints
34
+ membot add https://github.com/<owner>/<repo>/issues/<n> # GitHub issues + PRs (with comments)
35
+ membot add https://linear.app/<workspace>/issue/<KEY> # Linear issues + projects
36
+ membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
37
+ membot add "inline:Decision: use X because Y" # literal text
38
+ membot add ./docs --refresh-frequency 24h # auto-refresh every day
36
39
  ```
37
40
 
41
+ Remote URLs go through per-service downloaders. Google needs cookies
42
+ captured by `membot login` (one-time browser sign-in); GitHub and
43
+ Linear need API keys set via
44
+ `membot config set downloaders.<svc>.api_key`. If a fetch fails with
45
+ an auth error, the `HelpfulError` will tell you exactly which command
46
+ to run. Fetches are non-interactive — they never open a browser
47
+ during ingest or refresh.
48
+
38
49
  Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
39
50
 
40
51
  The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
@@ -77,6 +88,7 @@ membot refresh # refresh all rows whose schedule has ela
77
88
  membot mv old/path new/path # rename (history preserved under both)
78
89
  membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
79
90
  membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
91
+ membot rm -r remotes/docs.google.com # --recursive removes every path under a directory prefix
80
92
  membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
81
93
  ```
82
94
 
@@ -116,16 +128,17 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
116
128
  | `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
117
129
  | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
118
130
  | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
119
- | `membot info <path>` | Inspect metadata (source, fetcher, refresh schedule, digests) without content |
131
+ | `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
120
132
  | `membot versions <path>` | List every version newest-first with version_id and change notes |
121
133
  | `membot diff <path> --a <ts>` | Unified diff between two versions |
122
134
  | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
123
- | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
135
+ | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
124
136
  | `membot refresh [path]` | Re-read source; create new version only if bytes changed |
125
137
  | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
126
138
  | `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
127
139
  | `membot reindex` | Rebuild the FTS keyword index over current chunks |
128
140
  | `membot config <subcommand>` | Host-side config management (`get` / `set` / `unset` / `list` / `path`). **Don't run** — this is for the human operator, not for agents |
141
+ | `membot login` | Open a browser to sign into Google / GitHub / Linear / etc. (one-time host-side setup). **Don't run** — this is for the human operator |
129
142
 
130
143
  ## Output formats
131
144
 
@@ -137,7 +150,9 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
137
150
  ## Troubleshooting
138
151
 
139
152
  - **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
140
- - **"refresh failed: auth"** The original fetch used an authenticated mcpx tool; re-auth via `mcpx auth <server>`.
153
+ - **"refresh failed: auth"** for a Google URL cookies expired. Run `membot login` to refresh the browser session.
154
+ - **"refresh failed: auth"** for a GitHub URL → set the PAT via `membot config set downloaders.github.api_key <PAT>` (or export `GITHUB_TOKEN`).
155
+ - **"refresh failed: auth"** for a Linear URL → set the personal API key via `membot config set downloaders.linear.api_key <KEY>` (create one at `linear.app/settings/api`).
141
156
  - **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
142
157
  - **Stale results after manual DB edits** → `membot reindex`.
143
158
  - **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
package/README.md CHANGED
@@ -16,15 +16,43 @@
16
16
 
17
17
  ```bash
18
18
  bun install -g membot
19
+ bunx playwright install chromium # one-time browser binary download (~150 MB)
19
20
  ```
20
21
 
21
- This pulls in DuckDB's per-platform native bindings alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`), so a global Bun install is the supported path.
22
+ This pulls in DuckDB's per-platform native bindings and Playwright's Chromium binary alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`) and `playwright*` (the browser binary lives in `~/.cache/ms-playwright`), so a global Bun install is the supported path.
23
+
24
+ After installing, set up the services you want to ingest from:
25
+
26
+ ```bash
27
+ membot login
28
+ ```
29
+
30
+ A real Chromium window opens with two sections:
31
+
32
+ - **Browser sign-in** — Google Docs / Sheets / Slides. Click the Google link in the window, sign in, close the window. Cookies + IndexedDB persist to `~/.membot/auth/browser-profile/` and reused by every browser-based downloader.
33
+ - **API-key services** — GitHub and Linear. The window shows the settings URL where you create a token and the `membot config set …` command to run in your terminal:
34
+
35
+ ```bash
36
+ # GitHub: settings/tokens → fine-grained, repo:read
37
+ membot config set downloaders.github.api_key <PAT>
38
+ # or read from environment
39
+ export GITHUB_TOKEN=<PAT>
40
+
41
+ # Linear: linear.app/settings/api → personal API key
42
+ membot config set downloaders.linear.api_key <KEY>
43
+ ```
44
+
45
+ Public GitHub repos work without a token (rate-limited at 60 req/hr). Linear always needs a key.
22
46
 
23
47
  ## Quick start
24
48
 
25
49
  ```bash
50
+ membot login # one-time: sign into Google / GitHub / Linear in a browser
26
51
  membot add ./docs # ingest a directory recursively
27
- membot add https://example.com/spec.pdf # ingest a URL (auto-converted to markdown)
52
+ membot add https://docs.google.com/document/d/.. # Google Docs / Sheets / Slides via export endpoints
53
+ membot add https://github.com/o/r/issues/123 # GitHub issues + PRs (with comments)
54
+ membot add https://linear.app/w/issue/ABC-12 # Linear issues + projects
55
+ membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
28
56
  membot add a.md b.md "docs/**/*.md" # any number of files / globs in one call
29
57
  membot ls # list current files
30
58
  membot search "how does refresh work?" # hybrid search
@@ -59,13 +87,13 @@ The skill files describe the discover → ingest → search → read → write w
59
87
  | `membot diff <path> <a> [b]` | Unified diff between two versions |
60
88
  | `membot write <path>` | Write inline agent-authored markdown as a new version |
61
89
  | `membot mv <from> <to>` | Rename a logical_path (history preserved under both) |
62
- | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
90
+ | `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
63
91
  | `membot refresh [path]` | Re-read source; new version only if bytes changed |
64
92
  | `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
65
93
  | `membot serve` | Run the MCP server (stdio default; `--http <port>` for HTTP) |
66
94
  | `membot reindex` | Rebuild the FTS keyword index over current chunks |
67
95
  | `membot config <subcommand>` | Get / set values in `~/.membot/config.json` (`get`, `set`, `unset`, `list`, `path`) |
68
- | `membot mcpx <subcommand>` | Forward to the bundled `mcpx` CLI for managing remote MCP servers |
96
+ | `membot login` | Open a Chromium window to sign into Google / GitHub / Linear / etc. — closes save the session |
69
97
  | `membot skill install` | Install the Claude Code / Cursor agent skill |
70
98
 
71
99
  Run `membot <command> --help` for full flags and arguments. Every command produces JSON when piped, when `--json` is set, or when `CI=true`.
@@ -114,9 +142,12 @@ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, whic
114
142
  ```
115
143
 
116
144
  Values are written with file mode `0600`. `ANTHROPIC_API_KEY` set in the environment still wins on read, so existing env-var setups keep working.
145
+ - **Browser session:** `~/.membot/auth/browser-profile/` (Playwright persistent profile — cookies, localStorage, IndexedDB). Captured by `membot login`; cookie-based downloaders (Google) reuse it on every fetch. Delete the directory to force a fresh login.
146
+ - **API keys:** stored under `downloaders.<service>.api_key` in `~/.membot/config.json`. Read by API-based downloaders (GitHub, Linear).
117
147
  - **Environment variables:**
118
148
  - `ANTHROPIC_API_KEY` — optional. Enables LLM fallback for messy / scanned input (vision captions for images, last-resort markdown conversion). Without it, the pipeline degrades to deterministic native conversion. Equivalent to `membot config set llm.anthropic_api_key ...`; the env var takes precedence on read.
119
149
  - `MEMBOT_HOME` — override the data directory.
150
+ - `MEMBOT_SKIP_E2E` — skip live-network E2E downloader tests in `bun test`.
120
151
  - `NO_COLOR`, `CI`, `FORCE_COLOR` — standard output controls.
121
152
 
122
153
  ## Development
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "membot",
3
- "version": "0.5.1",
3
+ "version": "0.6.0",
4
4
  "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -27,7 +27,7 @@
27
27
  "lint": "biome ci . && tsc --noEmit",
28
28
  "format": "biome check --write .",
29
29
  "prebuild": "bash scripts/apply-patches.sh",
30
- "build": "bun build --compile --minify --sourcemap --external '@duckdb/*' ./src/cli.ts --outfile dist/membot"
30
+ "build": "bun build --compile --minify --sourcemap --external '@duckdb/*' --external 'playwright*' ./src/cli.ts --outfile dist/membot"
31
31
  },
32
32
  "keywords": [
33
33
  "mcp",
@@ -56,27 +56,30 @@
56
56
  "dependencies": {
57
57
  "@anthropic-ai/sdk": "^0.32.0",
58
58
  "@duckdb/node-api": "1.5.2-r.1",
59
- "@evantahler/mcpx": "^0.21.4",
60
59
  "@huggingface/transformers": "^4.2.0",
61
60
  "@modelcontextprotocol/sdk": "^1.29.0",
61
+ "@types/picomatch": "^4.0.3",
62
+ "@types/turndown": "^5.0.5",
62
63
  "ansis": "^4.2.0",
63
64
  "commander": "^14.0.3",
64
65
  "gray-matter": "^4.0.3",
65
66
  "mammoth": "^1.8.0",
67
+ "mustache": "^4.2.0",
66
68
  "nanospinner": "^1.2.2",
67
69
  "onnxruntime-web": "1.26.0-dev.20260416-b7804b056c",
68
70
  "picomatch": "^4.0.4",
69
- "@types/picomatch": "^4.0.3",
71
+ "playwright": "^1.59.1",
70
72
  "tesseract.js": "^5.1.0",
71
73
  "turndown": "^7.2.0",
72
- "@types/turndown": "^5.0.5",
73
74
  "unpdf": "^0.12.0",
75
+ "xlsx": "^0.18.5",
74
76
  "zod": "^4.0.0",
75
77
  "zod-to-json-schema": "^3.23.0"
76
78
  },
77
79
  "devDependencies": {
78
80
  "@biomejs/biome": "^2.4.14",
79
81
  "@types/bun": "latest",
82
+ "@types/mustache": "^4.2.6",
80
83
  "typescript": "^6"
81
84
  },
82
85
  "peerDependencies": {
@@ -38,14 +38,3 @@ apply_patch \
38
38
  "node_modules/@huggingface/transformers" \
39
39
  ".membot-transformers-patch-applied"
40
40
 
41
- # @evantahler/mcpx — rewrite `src/search/onnx-wasm-paths.ts` so its static
42
- # `with { type: "file" }` imports of onnxruntime-web's WASM resolve from the
43
- # consumer's hoisted node_modules layout (../../../../onnxruntime-web/...)
44
- # instead of mcpx's own repo layout (../../node_modules/...). With this
45
- # patch in place, mcpx's semantic search runs end-to-end inside membot
46
- # (the agent fetcher's `mcp_search` exercises it) and `bun build --compile`
47
- # can bundle the WASM assets into the standalone binary.
48
- apply_patch \
49
- "patches/@evantahler%2Fmcpx@0.21.4.patch" \
50
- "node_modules/@evantahler/mcpx" \
51
- ".membot-mcpx-patch-applied"
package/src/cli.ts CHANGED
@@ -5,7 +5,7 @@ import { program } from "commander";
5
5
  import pkg from "../package.json" with { type: "json" };
6
6
  import { registerCheckUpdateCommand } from "./commands/check-update.ts";
7
7
  import { registerConfigCommand } from "./commands/config.ts";
8
- import { registerMcpxCommand } from "./commands/mcpx.ts";
8
+ import { registerLoginCommand } from "./commands/login.ts";
9
9
  import { registerReindexCommand } from "./commands/reindex.ts";
10
10
  import { registerServeCommand } from "./commands/serve.ts";
11
11
  import { registerSkillCommand } from "./commands/skill.ts";
@@ -59,7 +59,7 @@ for (const op of OPERATIONS) {
59
59
  registerServeCommand(program);
60
60
  registerReindexCommand(program);
61
61
  registerConfigCommand(program);
62
- registerMcpxCommand(program);
62
+ registerLoginCommand(program);
63
63
  registerSkillCommand(program);
64
64
  registerCheckUpdateCommand(program);
65
65
  registerUpgradeCommand(program);
@@ -0,0 +1,50 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <title>membot login</title>
6
+ <style>
7
+ body { font-family: -apple-system, BlinkMacSystemFont, system-ui, sans-serif; padding: 2.5rem; max-width: 720px; margin: auto; color: #222; line-height: 1.5; }
8
+ h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
9
+ h2 { font-size: 1.05rem; margin-top: 2rem; margin-bottom: 0.5rem; color: #444; }
10
+ .hint { color: #666; margin-bottom: 1.5rem; }
11
+ ul { padding: 0; list-style: none; }
12
+ li { padding: 0.7rem 0.9rem; border: 1px solid #ddd; border-radius: 6px; margin-bottom: 0.5rem; }
13
+ a { color: #2855ff; text-decoration: none; font-weight: 500; }
14
+ a:hover { text-decoration: underline; }
15
+ .desc { color: #666; }
16
+ .footer { color: #888; font-size: 0.9rem; margin-top: 2rem; }
17
+ code { background: #f5f5f5; padding: 0.1rem 0.35rem; border-radius: 3px; font-size: 0.85rem; }
18
+ pre { background: #f5f5f5; padding: 0.6rem 0.8rem; border-radius: 4px; font-size: 0.85rem; overflow-x: auto; margin: 0.4rem 0 0 0; }
19
+ </style>
20
+ </head>
21
+ <body>
22
+ <h1>Set up the services membot will fetch from</h1>
23
+ <p class="hint">This is a separate browser session from your daily Chrome — even if you're signed in there, you have to set things up here.</p>
24
+
25
+ {{#hasBrowser}}
26
+ <h2>Sign in via this browser</h2>
27
+ <p class="hint">Click any link, complete the login, then close this window when you're done. Cookies + IndexedDB land in your membot profile.</p>
28
+ <ul>
29
+ {{#browser}}
30
+ <li><a href="{{url}}">{{name}}</a>{{#description}} <span class="desc">— {{description}}</span>{{/description}}</li>
31
+ {{/browser}}
32
+ </ul>
33
+ {{/hasBrowser}}
34
+
35
+ {{#hasApiKey}}
36
+ <h2>API-key services</h2>
37
+ <p class="hint">Open the settings page, create a token, then run the command shown in your terminal (not in this browser).</p>
38
+ <ul>
39
+ {{#apiKey}}
40
+ <li>
41
+ <a href="{{url}}">{{name}}</a>{{#description}} <span class="desc">— {{description}}</span>{{/description}}
42
+ <pre>{{setupCommand}}</pre>
43
+ </li>
44
+ {{/apiKey}}
45
+ </ul>
46
+ {{/hasApiKey}}
47
+
48
+ <p class="footer">Closing this window saves the browser-session profile. Run <code>membot login</code> again to refresh expired sessions.</p>
49
+ </body>
50
+ </html>
@@ -0,0 +1,83 @@
1
+ import { join } from "node:path";
2
+ import type { Command } from "commander";
3
+ import Mustache from "mustache";
4
+ import { FILES } from "../constants.ts";
5
+ import { buildContext, closeContext } from "../context.ts";
6
+ import { HelpfulError } from "../errors.ts";
7
+ import { BrowserPool } from "../ingest/downloaders/browser.ts";
8
+ import { collectLoginEntries } from "../ingest/downloaders/index.ts";
9
+ import { logger } from "../output/logger.ts";
10
+ import LOGIN_PAGE_TEMPLATE from "./login-page.mustache" with { type: "text" };
11
+
12
+ /**
13
+ * `membot login`
14
+ *
15
+ * Open a real Chromium window backed by membot's persistent profile
16
+ * (cookies + localStorage + IndexedDB + service workers all stored
17
+ * under `~/.membot/auth/browser-profile/`) and pre-navigate to a
18
+ * small intro page that lists every login button declared by the
19
+ * registered downloaders. Adding a new downloader with `logins: […]`
20
+ * automatically gets a button on this page — login.ts knows nothing
21
+ * service-specific itself.
22
+ *
23
+ * Why a persistent profile instead of `storageState` JSON: SPA-heavy
24
+ * services like Linear stash session/sync state in IndexedDB, which
25
+ * `storageState` doesn't capture. A fresh headless context with
26
+ * cookies but no IndexedDB hangs on Linear's "Loading…" placeholder
27
+ * forever. The persistent profile carries IDB along with cookies, so
28
+ * the next headless run finds Linear's app fully bootstrapped.
29
+ *
30
+ * Window-close detection uses page-close events because on macOS
31
+ * closing the last chromium window doesn't quit the process —
32
+ * `browser.on('disconnected')` never fires. See `BrowserPool.waitForUserDone`.
33
+ */
34
+ export function registerLoginCommand(program: Command): void {
35
+ program
36
+ .command("login")
37
+ .description(
38
+ "Open a browser to sign into the services membot fetches from — closing the window saves your session.",
39
+ )
40
+ .action(async () => {
41
+ const ctx = await buildContext({});
42
+ const userDataDir = join(ctx.dataDir, FILES.BROWSER_PROFILE);
43
+ const pool = new BrowserPool({ userDataDir, headless: false });
44
+ const entries = collectLoginEntries();
45
+ const html = Mustache.render(LOGIN_PAGE_TEMPLATE, {
46
+ browser: entries.browser,
47
+ apiKey: entries.apiKey,
48
+ hasBrowser: entries.browser.length > 0,
49
+ hasApiKey: entries.apiKey.length > 0,
50
+ });
51
+
52
+ let cookieCount = 0;
53
+ try {
54
+ const page = await pool.newPage();
55
+ await page.goto(`data:text/html;charset=utf-8,${encodeURIComponent(html)}`).catch(() => {});
56
+
57
+ logger.info("Sign into the services you want membot to fetch from, then close the browser window.");
58
+ logger.info(`Session profile will be stored at ${userDataDir}.`);
59
+
60
+ await pool.waitForUserDone(page);
61
+ cookieCount = await pool.cookieCount();
62
+ } catch (err) {
63
+ if (err instanceof HelpfulError) throw err;
64
+ throw new HelpfulError({
65
+ kind: "internal_error",
66
+ message: `login failed: ${err instanceof Error ? err.message : String(err)}`,
67
+ hint: "Run `bunx playwright install chromium` to ensure the browser binary is installed, then retry.",
68
+ });
69
+ } finally {
70
+ await pool.dispose();
71
+ await closeContext(ctx);
72
+ }
73
+
74
+ if (cookieCount === 0) {
75
+ throw new HelpfulError({
76
+ kind: "auth_error",
77
+ message: `Browser profile at ${userDataDir} has no cookies — no service was signed in.`,
78
+ hint: "Run `membot login` again and sign in (Google / GitHub / Linear / …) before closing the window.",
79
+ });
80
+ }
81
+ logger.info(`Saved session profile (${cookieCount} cookie${cookieCount === 1 ? "" : "s"}).`);
82
+ });
83
+ }
@@ -15,14 +15,23 @@ export const LlmConfigSchema = z.object({
15
15
  vision_model: z.string().default(DEFAULTS.VISION_MODEL),
16
16
  });
17
17
 
18
- export const McpxConfigSchema = z.object({
19
- config_path: z.string().default(""),
20
- });
21
-
22
18
  export const DaemonConfigSchema = z.object({
23
19
  tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
24
20
  });
25
21
 
22
+ export const LinearDownloaderConfigSchema = z.object({
23
+ api_key: z.string().meta({ secret: true }).default(""),
24
+ });
25
+
26
+ export const GithubDownloaderConfigSchema = z.object({
27
+ api_key: z.string().meta({ secret: true }).default(""),
28
+ });
29
+
30
+ export const DownloadersConfigSchema = z.object({
31
+ linear: LinearDownloaderConfigSchema.default(() => LinearDownloaderConfigSchema.parse({})),
32
+ github: GithubDownloaderConfigSchema.default(() => GithubDownloaderConfigSchema.parse({})),
33
+ });
34
+
26
35
  export const DbLockRetryConfigSchema = z.object({
27
36
  max_attempts: z.number().int().positive().default(30),
28
37
  base_delay_ms: z.number().int().positive().default(100),
@@ -35,7 +44,7 @@ export const MembotConfigSchema = z.object({
35
44
  embedding_dimension: z.number().int().positive().default(EMBEDDING_DIMENSION),
36
45
  chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
37
46
  llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
38
- mcpx: McpxConfigSchema.default(() => McpxConfigSchema.parse({})),
47
+ downloaders: DownloadersConfigSchema.default(() => DownloadersConfigSchema.parse({})),
39
48
  daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
40
49
  db_lock_retry: DbLockRetryConfigSchema.default(() => DbLockRetryConfigSchema.parse({})),
41
50
  default_refresh_frequency_sec: z.number().int().positive().nullable().default(null),
@@ -44,3 +53,6 @@ export const MembotConfigSchema = z.object({
44
53
  export type MembotConfig = z.infer<typeof MembotConfigSchema>;
45
54
  export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
46
55
  export type LlmConfig = z.infer<typeof LlmConfigSchema>;
56
+ export type DownloadersConfig = z.infer<typeof DownloadersConfigSchema>;
57
+ export type LinearDownloaderConfig = z.infer<typeof LinearDownloaderConfigSchema>;
58
+ export type GithubDownloaderConfig = z.infer<typeof GithubDownloaderConfigSchema>;
package/src/constants.ts CHANGED
@@ -13,7 +13,6 @@ export const ENV = {
13
13
  CONFIG: "MEMBOT_CONFIG",
14
14
  DEBUG: "MEMBOT_DEBUG",
15
15
  ANTHROPIC_API_KEY: "ANTHROPIC_API_KEY",
16
- MCPX_CONFIG_PATH: "MCP_CONFIG_PATH",
17
16
  NO_UPDATE_CHECK: "MEMBOT_NO_UPDATE_CHECK",
18
17
  } as const;
19
18
 
@@ -48,4 +47,17 @@ export const FILES = {
48
47
  INDEX_DUCKDB: "index.duckdb",
49
48
  MODELS_DIR: "models",
50
49
  LOGS_DIR: "logs",
50
+ AUTH_DIR: "auth",
51
+ /**
52
+ * Persistent Chromium profile directory. We use
53
+ * `chromium.launchPersistentContext(userDataDir)` rather than the
54
+ * lighter `storageState` JSON snapshot because Linear (and other
55
+ * SPA-heavy services) stash critical session state in IndexedDB —
56
+ * which `storageState` doesn't capture. A persistent profile
57
+ * survives the full set: cookies, localStorage, IndexedDB, service
58
+ * workers, etc. Trade-off: directory-sized state instead of a tiny
59
+ * JSON file, and only one process can have the profile open at a
60
+ * time (chromium's single-instance lock).
61
+ */
62
+ BROWSER_PROFILE: "auth/browser-profile",
51
63
  } as const;
package/src/context.ts CHANGED
@@ -1,8 +1,7 @@
1
1
  import { join } from "node:path";
2
- import { McpxClient } from "@evantahler/mcpx";
3
2
  import { loadConfig } from "./config/loader.ts";
4
3
  import type { MembotConfig } from "./config/schemas.ts";
5
- import { ENV, FILES } from "./constants.ts";
4
+ import { FILES } from "./constants.ts";
6
5
  import { type DbConnection, openDb } from "./db/connection.ts";
7
6
  import { logger } from "./output/logger.ts";
8
7
  import type { Progress } from "./output/progress.ts";
@@ -16,7 +15,6 @@ export interface AppContext {
16
15
  db: DbConnection;
17
16
  logger: typeof logger;
18
17
  progress: Progress;
19
- mcpx: McpxClient | null;
20
18
  }
21
19
 
22
20
  export interface BuildContextOptions {
@@ -32,7 +30,6 @@ export interface BuildContextOptions {
32
30
  * - output mode (TTY/JSON/color detection — frozen for the rest of the run)
33
31
  * - config (~/.membot/config.json with env overrides)
34
32
  * - DuckDB connection (~/.membot/index.duckdb), running migrations on first open
35
- * - mcpx client (lazy — opened on first remote fetch; null when no servers)
36
33
  */
37
34
  export async function buildContext(options: BuildContextOptions = {}): Promise<AppContext> {
38
35
  setMode(detectMode({ json: options.json, verbose: options.verbose, noColor: options.noColor }));
@@ -45,8 +42,6 @@ export async function buildContext(options: BuildContextOptions = {}): Promise<A
45
42
  maxDelayMs: config.db_lock_retry.max_delay_ms,
46
43
  });
47
44
 
48
- const mcpx = await maybeMcpx(config);
49
-
50
45
  return {
51
46
  config,
52
47
  dataDir,
@@ -54,31 +49,13 @@ export async function buildContext(options: BuildContextOptions = {}): Promise<A
54
49
  db,
55
50
  logger,
56
51
  progress: createProgress(),
57
- mcpx,
58
52
  };
59
53
  }
60
54
 
61
- async function maybeMcpx(config: MembotConfig): Promise<McpxClient | null> {
62
- const configDir = config.mcpx.config_path || process.env[ENV.MCPX_CONFIG_PATH];
63
- try {
64
- const client = new McpxClient(configDir ? { configDir } : {});
65
- return client;
66
- } catch {
67
- return null;
68
- }
69
- }
70
-
71
55
  export async function closeContext(ctx: AppContext): Promise<void> {
72
56
  try {
73
57
  await ctx.db.close();
74
58
  } catch {
75
59
  // best effort
76
60
  }
77
- if (ctx.mcpx) {
78
- try {
79
- await ctx.mcpx.close();
80
- } catch {
81
- // best effort
82
- }
83
- }
84
61
  }