membot 0.5.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +25 -10
- package/.cursor/rules/membot.mdc +25 -10
- package/README.md +36 -4
- package/package.json +8 -5
- package/scripts/apply-patches.sh +0 -11
- package/scripts/build-test-docx.ts +84 -0
- package/src/cli.ts +2 -2
- package/src/commands/login-page.mustache +50 -0
- package/src/commands/login.ts +83 -0
- package/src/config/schemas.ts +23 -5
- package/src/constants.ts +20 -1
- package/src/context.ts +1 -24
- package/src/db/files.ts +21 -25
- package/src/db/migrations/003-downloader-columns.ts +58 -0
- package/src/db/migrations.ts +2 -1
- package/src/ingest/converter/docx.ts +47 -5
- package/src/ingest/converter/html.ts +10 -3
- package/src/ingest/converter/image.ts +40 -3
- package/src/ingest/converter/images-inline.ts +132 -0
- package/src/ingest/converter/index.ts +13 -3
- package/src/ingest/converter/xlsx.ts +111 -0
- package/src/ingest/downloaders/browser.ts +180 -0
- package/src/ingest/downloaders/generic-web.ts +81 -0
- package/src/ingest/downloaders/github.ts +178 -0
- package/src/ingest/downloaders/google-docs.ts +56 -0
- package/src/ingest/downloaders/google-shared.ts +86 -0
- package/src/ingest/downloaders/google-sheets.ts +58 -0
- package/src/ingest/downloaders/google-slides.ts +53 -0
- package/src/ingest/downloaders/index.ts +182 -0
- package/src/ingest/downloaders/linear.ts +291 -0
- package/src/ingest/fetcher.ts +104 -129
- package/src/ingest/ingest.ts +44 -71
- package/src/mcp/instructions.ts +4 -2
- package/src/operations/add.ts +6 -4
- package/src/operations/info.ts +4 -6
- package/src/operations/move.ts +2 -3
- package/src/operations/refresh.ts +2 -4
- package/src/operations/remove.ts +23 -2
- package/src/operations/tree.ts +1 -1
- package/src/operations/types.ts +1 -1
- package/src/refresh/runner.ts +60 -115
- package/src/types/text-modules.d.ts +5 -0
- package/patches/@evantahler%2Fmcpx@0.21.4.patch +0 -51
- package/src/commands/mcpx.ts +0 -112
- package/src/ingest/agent-fetcher.ts +0 -639
package/.claude/skills/membot.md
CHANGED
|
@@ -26,15 +26,26 @@ membot search "<question>" # hybrid search (semantic + keyword)
|
|
|
26
26
|
## 2. Ingest
|
|
27
27
|
|
|
28
28
|
```bash
|
|
29
|
-
membot add ./README.md
|
|
30
|
-
membot add ./docs
|
|
31
|
-
membot add "docs/**/*.md"
|
|
32
|
-
membot add a.md b.md "docs/**/*.md"
|
|
33
|
-
membot add https://
|
|
34
|
-
membot add
|
|
35
|
-
membot add
|
|
29
|
+
membot add ./README.md # single file
|
|
30
|
+
membot add ./docs # recursive directory walk
|
|
31
|
+
membot add "docs/**/*.md" # glob
|
|
32
|
+
membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
|
|
33
|
+
membot add https://docs.google.com/document/d/<ID>/edit # Google Docs/Sheets/Slides via export endpoints
|
|
34
|
+
membot add https://github.com/<owner>/<repo>/issues/<n> # GitHub issues + PRs (with comments)
|
|
35
|
+
membot add https://linear.app/<workspace>/issue/<KEY> # Linear issues + projects
|
|
36
|
+
membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
|
|
37
|
+
membot add "inline:Decision: use X because Y" # literal text
|
|
38
|
+
membot add ./docs --refresh-frequency 24h # auto-refresh every day
|
|
36
39
|
```
|
|
37
40
|
|
|
41
|
+
Remote URLs go through per-service downloaders. Google needs cookies
|
|
42
|
+
captured by `membot login` (one-time browser sign-in); GitHub and
|
|
43
|
+
Linear need API keys set via
|
|
44
|
+
`membot config set downloaders.<svc>.api_key`. If a fetch fails with
|
|
45
|
+
an auth error, the `HelpfulError` will tell you exactly which command
|
|
46
|
+
to run. Fetches are non-interactive — they never open a browser
|
|
47
|
+
during ingest or refresh.
|
|
48
|
+
|
|
38
49
|
Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
|
|
39
50
|
|
|
40
51
|
The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
|
|
@@ -77,6 +88,7 @@ membot refresh # refresh all rows whose schedule has ela
|
|
|
77
88
|
membot mv old/path new/path # rename (history preserved under both)
|
|
78
89
|
membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
|
|
79
90
|
membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
|
|
91
|
+
membot rm -r remotes/docs.google.com # --recursive removes every path under a directory prefix
|
|
80
92
|
membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
|
|
81
93
|
```
|
|
82
94
|
|
|
@@ -116,16 +128,17 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
116
128
|
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
|
117
129
|
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
118
130
|
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
119
|
-
| `membot info <path>` | Inspect metadata (source,
|
|
131
|
+
| `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
|
|
120
132
|
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
121
133
|
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
122
134
|
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
123
|
-
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
|
|
135
|
+
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
|
|
124
136
|
| `membot refresh [path]` | Re-read source; create new version only if bytes changed |
|
|
125
137
|
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
126
138
|
| `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
|
|
127
139
|
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
128
140
|
| `membot config <subcommand>` | Host-side config management (`get` / `set` / `unset` / `list` / `path`). **Don't run** — this is for the human operator, not for agents |
|
|
141
|
+
| `membot login` | Open a browser to sign into Google / GitHub / Linear / etc. (one-time host-side setup). **Don't run** — this is for the human operator |
|
|
129
142
|
|
|
130
143
|
## Output formats
|
|
131
144
|
|
|
@@ -137,7 +150,9 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
137
150
|
## Troubleshooting
|
|
138
151
|
|
|
139
152
|
- **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
|
|
140
|
-
- **"refresh failed: auth"**
|
|
153
|
+
- **"refresh failed: auth"** for a Google URL → cookies expired. Run `membot login` to refresh the browser session.
|
|
154
|
+
- **"refresh failed: auth"** for a GitHub URL → set the PAT via `membot config set downloaders.github.api_key <PAT>` (or export `GITHUB_TOKEN`).
|
|
155
|
+
- **"refresh failed: auth"** for a Linear URL → set the personal API key via `membot config set downloaders.linear.api_key <KEY>` (create one at `linear.app/settings/api`).
|
|
141
156
|
- **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
|
|
142
157
|
- **Stale results after manual DB edits** → `membot reindex`.
|
|
143
158
|
- **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
|
package/.cursor/rules/membot.mdc
CHANGED
|
@@ -26,15 +26,26 @@ membot search "<question>" # hybrid search (semantic + keyword)
|
|
|
26
26
|
## 2. Ingest
|
|
27
27
|
|
|
28
28
|
```bash
|
|
29
|
-
membot add ./README.md
|
|
30
|
-
membot add ./docs
|
|
31
|
-
membot add "docs/**/*.md"
|
|
32
|
-
membot add a.md b.md "docs/**/*.md"
|
|
33
|
-
membot add https://
|
|
34
|
-
membot add
|
|
35
|
-
membot add
|
|
29
|
+
membot add ./README.md # single file
|
|
30
|
+
membot add ./docs # recursive directory walk
|
|
31
|
+
membot add "docs/**/*.md" # glob
|
|
32
|
+
membot add a.md b.md "docs/**/*.md" # any number of args; each resolved independently
|
|
33
|
+
membot add https://docs.google.com/document/d/<ID>/edit # Google Docs/Sheets/Slides via export endpoints
|
|
34
|
+
membot add https://github.com/<owner>/<repo>/issues/<n> # GitHub issues + PRs (with comments)
|
|
35
|
+
membot add https://linear.app/<workspace>/issue/<KEY> # Linear issues + projects
|
|
36
|
+
membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
|
|
37
|
+
membot add "inline:Decision: use X because Y" # literal text
|
|
38
|
+
membot add ./docs --refresh-frequency 24h # auto-refresh every day
|
|
36
39
|
```
|
|
37
40
|
|
|
41
|
+
Remote URLs go through per-service downloaders. Google needs cookies
|
|
42
|
+
captured by `membot login` (one-time browser sign-in); GitHub and
|
|
43
|
+
Linear need API keys set via
|
|
44
|
+
`membot config set downloaders.<svc>.api_key`. If a fetch fails with
|
|
45
|
+
an auth error, the `HelpfulError` will tell you exactly which command
|
|
46
|
+
to run. Fetches are non-interactive — they never open a browser
|
|
47
|
+
during ingest or refresh.
|
|
48
|
+
|
|
38
49
|
Each entry becomes a new version under its own `logical_path`. PDFs/DOCX/HTML are converted to markdown; images get vision captions; original bytes are kept and reachable via `membot read --bytes`.
|
|
39
50
|
|
|
40
51
|
The default `logical_path` mirrors the source path so files with the same basename in different projects don't collide:
|
|
@@ -77,6 +88,7 @@ membot refresh # refresh all rows whose schedule has ela
|
|
|
77
88
|
membot mv old/path new/path # rename (history preserved under both)
|
|
78
89
|
membot rm <paths...> # tombstone one or more paths/globs (history still queryable)
|
|
79
90
|
membot rm "docs/**/*.md" notes/old.md # globs match logical_paths in the DB; literals + globs can mix
|
|
91
|
+
membot rm -r remotes/docs.google.com # --recursive removes every path under a directory prefix
|
|
80
92
|
membot prune --before <iso-ts> # drop non-current versions older than cutoff (irreversible)
|
|
81
93
|
```
|
|
82
94
|
|
|
@@ -116,16 +128,17 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
116
128
|
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
|
117
129
|
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
118
130
|
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
119
|
-
| `membot info <path>` | Inspect metadata (source,
|
|
131
|
+
| `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
|
|
120
132
|
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
121
133
|
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
122
134
|
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
123
|
-
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
|
|
135
|
+
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
|
|
124
136
|
| `membot refresh [path]` | Re-read source; create new version only if bytes changed |
|
|
125
137
|
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
126
138
|
| `membot serve` | Start MCP server (stdio default, `--http <port>` for HTTP) |
|
|
127
139
|
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
128
140
|
| `membot config <subcommand>` | Host-side config management (`get` / `set` / `unset` / `list` / `path`). **Don't run** — this is for the human operator, not for agents |
|
|
141
|
+
| `membot login` | Open a browser to sign into Google / GitHub / Linear / etc. (one-time host-side setup). **Don't run** — this is for the human operator |
|
|
129
142
|
|
|
130
143
|
## Output formats
|
|
131
144
|
|
|
@@ -137,7 +150,9 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
137
150
|
## Troubleshooting
|
|
138
151
|
|
|
139
152
|
- **"ingest failed: unsupported mime"** → Add a converter or pass `--bytes` to keep the original; LLM-fallback only runs when `ANTHROPIC_API_KEY` is set.
|
|
140
|
-
- **"refresh failed: auth"**
|
|
153
|
+
- **"refresh failed: auth"** for a Google URL → cookies expired. Run `membot login` to refresh the browser session.
|
|
154
|
+
- **"refresh failed: auth"** for a GitHub URL → set the PAT via `membot config set downloaders.github.api_key <PAT>` (or export `GITHUB_TOKEN`).
|
|
155
|
+
- **"refresh failed: auth"** for a Linear URL → set the personal API key via `membot config set downloaders.linear.api_key <KEY>` (create one at `linear.app/settings/api`).
|
|
141
156
|
- **Search returns nothing** → Confirm the file ingested with `membot info <path>`; if needed, run `membot reindex` to rebuild the FTS keyword index.
|
|
142
157
|
- **Stale results after manual DB edits** → `membot reindex`.
|
|
143
158
|
- **Two paths point at the same content** → `membot mv` doesn't merge; tombstone one with `membot rm`.
|
package/README.md
CHANGED
|
@@ -16,15 +16,43 @@
|
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
18
|
bun install -g membot
|
|
19
|
+
bunx playwright install chromium # one-time browser binary download (~150 MB)
|
|
19
20
|
```
|
|
20
21
|
|
|
21
|
-
This pulls in DuckDB's per-platform native bindings alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`), so a global Bun install is the supported path.
|
|
22
|
+
This pulls in DuckDB's per-platform native bindings and Playwright's Chromium binary alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`) and `playwright*` (the browser binary lives in `~/.cache/ms-playwright`), so a global Bun install is the supported path.
|
|
23
|
+
|
|
24
|
+
After installing, set up the services you want to ingest from:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
membot login
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
A real Chromium window opens with two sections:
|
|
31
|
+
|
|
32
|
+
- **Browser sign-in** — Google Docs / Sheets / Slides. Click the Google link in the window, sign in, close the window. Cookies + IndexedDB persist to `~/.membot/auth/browser-profile/` and reused by every browser-based downloader.
|
|
33
|
+
- **API-key services** — GitHub and Linear. The window shows the settings URL where you create a token and the `membot config set …` command to run in your terminal:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# GitHub: settings/tokens → fine-grained, repo:read
|
|
37
|
+
membot config set downloaders.github.api_key <PAT>
|
|
38
|
+
# or read from environment
|
|
39
|
+
export GITHUB_TOKEN=<PAT>
|
|
40
|
+
|
|
41
|
+
# Linear: linear.app/settings/api → personal API key
|
|
42
|
+
membot config set downloaders.linear.api_key <KEY>
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Public GitHub repos work without a token (rate-limited at 60 req/hr). Linear always needs a key.
|
|
22
46
|
|
|
23
47
|
## Quick start
|
|
24
48
|
|
|
25
49
|
```bash
|
|
50
|
+
membot login # one-time: sign into Google / GitHub / Linear in a browser
|
|
26
51
|
membot add ./docs # ingest a directory recursively
|
|
27
|
-
membot add https://
|
|
52
|
+
membot add https://docs.google.com/document/d/.. # Google Docs / Sheets / Slides via export endpoints
|
|
53
|
+
membot add https://github.com/o/r/issues/123 # GitHub issues + PRs (with comments)
|
|
54
|
+
membot add https://linear.app/w/issue/ABC-12 # Linear issues + projects
|
|
55
|
+
membot add https://example.com/spec.pdf # any other URL (browser print-to-PDF fallback)
|
|
28
56
|
membot add a.md b.md "docs/**/*.md" # any number of files / globs in one call
|
|
29
57
|
membot ls # list current files
|
|
30
58
|
membot search "how does refresh work?" # hybrid search
|
|
@@ -59,13 +87,13 @@ The skill files describe the discover → ingest → search → read → write w
|
|
|
59
87
|
| `membot diff <path> <a> [b]` | Unified diff between two versions |
|
|
60
88
|
| `membot write <path>` | Write inline agent-authored markdown as a new version |
|
|
61
89
|
| `membot mv <from> <to>` | Rename a logical_path (history preserved under both) |
|
|
62
|
-
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); history kept |
|
|
90
|
+
| `membot rm <paths...>` | Tombstone one or more logical_paths or globs (e.g. `"docs/**/*.md"`); pass `-r` / `--recursive` to remove a directory prefix; history kept |
|
|
63
91
|
| `membot refresh [path]` | Re-read source; new version only if bytes changed |
|
|
64
92
|
| `membot prune --before <ts>` | Permanently drop non-current versions older than cutoff (irreversible) |
|
|
65
93
|
| `membot serve` | Run the MCP server (stdio default; `--http <port>` for HTTP) |
|
|
66
94
|
| `membot reindex` | Rebuild the FTS keyword index over current chunks |
|
|
67
95
|
| `membot config <subcommand>` | Get / set values in `~/.membot/config.json` (`get`, `set`, `unset`, `list`, `path`) |
|
|
68
|
-
| `membot
|
|
96
|
+
| `membot login` | Open a Chromium window to sign into Google / GitHub / Linear / etc. — closes save the session |
|
|
69
97
|
| `membot skill install` | Install the Claude Code / Cursor agent skill |
|
|
70
98
|
|
|
71
99
|
Run `membot <command> --help` for full flags and arguments. Every command produces JSON when piped, when `--json` is set, or when `CI=true`.
|
|
@@ -108,15 +136,19 @@ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, whic
|
|
|
108
136
|
membot config list # show every value (secrets masked)
|
|
109
137
|
membot config set llm.anthropic_api_key sk-ant-... # enable LLM-fallback paths
|
|
110
138
|
membot config set chunker.target_chars 800 # tweak any nested value
|
|
139
|
+
membot config set converters.max_inline_image_captions 50 # raise per-doc cap on vision captions for embedded images
|
|
111
140
|
membot config get llm.anthropic_api_key --show-secrets # reveal the masked key
|
|
112
141
|
membot config unset chunker.target_chars # back to schema default
|
|
113
142
|
membot config path # print the absolute config path
|
|
114
143
|
```
|
|
115
144
|
|
|
116
145
|
Values are written with file mode `0600`. `ANTHROPIC_API_KEY` set in the environment still wins on read, so existing env-var setups keep working.
|
|
146
|
+
- **Browser session:** `~/.membot/auth/browser-profile/` (Playwright persistent profile — cookies, localStorage, IndexedDB). Captured by `membot login`; cookie-based downloaders (Google) reuse it on every fetch. Delete the directory to force a fresh login.
|
|
147
|
+
- **API keys:** stored under `downloaders.<service>.api_key` in `~/.membot/config.json`. Read by API-based downloaders (GitHub, Linear).
|
|
117
148
|
- **Environment variables:**
|
|
118
149
|
- `ANTHROPIC_API_KEY` — optional. Enables LLM fallback for messy / scanned input (vision captions for images, last-resort markdown conversion). Without it, the pipeline degrades to deterministic native conversion. Equivalent to `membot config set llm.anthropic_api_key ...`; the env var takes precedence on read.
|
|
119
150
|
- `MEMBOT_HOME` — override the data directory.
|
|
151
|
+
- `MEMBOT_SKIP_E2E` — skip live-network E2E downloader tests in `bun test`.
|
|
120
152
|
- `NO_COLOR`, `CI`, `FORCE_COLOR` — standard output controls.
|
|
121
153
|
|
|
122
154
|
## Development
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "membot",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
"lint": "biome ci . && tsc --noEmit",
|
|
28
28
|
"format": "biome check --write .",
|
|
29
29
|
"prebuild": "bash scripts/apply-patches.sh",
|
|
30
|
-
"build": "bun build --compile --minify --sourcemap --external '@duckdb/*' ./src/cli.ts --outfile dist/membot"
|
|
30
|
+
"build": "bun build --compile --minify --sourcemap --external '@duckdb/*' --external 'playwright*' ./src/cli.ts --outfile dist/membot"
|
|
31
31
|
},
|
|
32
32
|
"keywords": [
|
|
33
33
|
"mcp",
|
|
@@ -56,27 +56,30 @@
|
|
|
56
56
|
"dependencies": {
|
|
57
57
|
"@anthropic-ai/sdk": "^0.32.0",
|
|
58
58
|
"@duckdb/node-api": "1.5.2-r.1",
|
|
59
|
-
"@evantahler/mcpx": "^0.21.4",
|
|
60
59
|
"@huggingface/transformers": "^4.2.0",
|
|
61
60
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
61
|
+
"@types/picomatch": "^4.0.3",
|
|
62
|
+
"@types/turndown": "^5.0.5",
|
|
62
63
|
"ansis": "^4.2.0",
|
|
63
64
|
"commander": "^14.0.3",
|
|
64
65
|
"gray-matter": "^4.0.3",
|
|
65
66
|
"mammoth": "^1.8.0",
|
|
67
|
+
"mustache": "^4.2.0",
|
|
66
68
|
"nanospinner": "^1.2.2",
|
|
67
69
|
"onnxruntime-web": "1.26.0-dev.20260416-b7804b056c",
|
|
68
70
|
"picomatch": "^4.0.4",
|
|
69
|
-
"
|
|
71
|
+
"playwright": "^1.59.1",
|
|
70
72
|
"tesseract.js": "^5.1.0",
|
|
71
73
|
"turndown": "^7.2.0",
|
|
72
|
-
"@types/turndown": "^5.0.5",
|
|
73
74
|
"unpdf": "^0.12.0",
|
|
75
|
+
"xlsx": "^0.18.5",
|
|
74
76
|
"zod": "^4.0.0",
|
|
75
77
|
"zod-to-json-schema": "^3.23.0"
|
|
76
78
|
},
|
|
77
79
|
"devDependencies": {
|
|
78
80
|
"@biomejs/biome": "^2.4.14",
|
|
79
81
|
"@types/bun": "latest",
|
|
82
|
+
"@types/mustache": "^4.2.6",
|
|
80
83
|
"typescript": "^6"
|
|
81
84
|
},
|
|
82
85
|
"peerDependencies": {
|
package/scripts/apply-patches.sh
CHANGED
|
@@ -38,14 +38,3 @@ apply_patch \
|
|
|
38
38
|
"node_modules/@huggingface/transformers" \
|
|
39
39
|
".membot-transformers-patch-applied"
|
|
40
40
|
|
|
41
|
-
# @evantahler/mcpx — rewrite `src/search/onnx-wasm-paths.ts` so its static
|
|
42
|
-
# `with { type: "file" }` imports of onnxruntime-web's WASM resolve from the
|
|
43
|
-
# consumer's hoisted node_modules layout (../../../../onnxruntime-web/...)
|
|
44
|
-
# instead of mcpx's own repo layout (../../node_modules/...). With this
|
|
45
|
-
# patch in place, mcpx's semantic search runs end-to-end inside membot
|
|
46
|
-
# (the agent fetcher's `mcp_search` exercises it) and `bun build --compile`
|
|
47
|
-
# can bundle the WASM assets into the standalone binary.
|
|
48
|
-
apply_patch \
|
|
49
|
-
"patches/@evantahler%2Fmcpx@0.21.4.patch" \
|
|
50
|
-
"node_modules/@evantahler/mcpx" \
|
|
51
|
-
".membot-mcpx-patch-applied"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* One-shot generator for `test/fixtures/sample-with-image.docx`. Run this
|
|
4
|
+
* (`bun scripts/build-test-docx.ts`) when the fixture is missing or when
|
|
5
|
+
* the embedded test image needs to change. The DOCX itself is committed
|
|
6
|
+
* to the repo so test runs don't depend on jszip-as-transitive-dep.
|
|
7
|
+
*/
|
|
8
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
9
|
+
import { dirname } from "node:path";
|
|
10
|
+
// jszip ships transitively via mammoth; this script is run by hand, not in tests.
|
|
11
|
+
import JSZip from "../node_modules/jszip/lib/index.js";
|
|
12
|
+
|
|
13
|
+
const TINY_PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
|
|
14
|
+
|
|
15
|
+
const documentXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
16
|
+
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
17
|
+
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
18
|
+
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
|
19
|
+
xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
20
|
+
xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
|
|
21
|
+
<w:body>
|
|
22
|
+
<w:p><w:r><w:t>Lead paragraph before the diagram.</w:t></w:r></w:p>
|
|
23
|
+
<w:p><w:r><w:drawing>
|
|
24
|
+
<wp:inline>
|
|
25
|
+
<wp:extent cx="635" cy="635"/>
|
|
26
|
+
<wp:docPr id="1" name="Picture 1" descr="architecture diagram"/>
|
|
27
|
+
<a:graphic>
|
|
28
|
+
<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
|
|
29
|
+
<pic:pic>
|
|
30
|
+
<pic:nvPicPr>
|
|
31
|
+
<pic:cNvPr id="1" name="img.png" descr="architecture diagram"/>
|
|
32
|
+
<pic:cNvPicPr/>
|
|
33
|
+
</pic:nvPicPr>
|
|
34
|
+
<pic:blipFill>
|
|
35
|
+
<a:blip r:embed="rId1"/>
|
|
36
|
+
<a:stretch><a:fillRect/></a:stretch>
|
|
37
|
+
</pic:blipFill>
|
|
38
|
+
<pic:spPr>
|
|
39
|
+
<a:xfrm><a:off x="0" y="0"/><a:ext cx="635" cy="635"/></a:xfrm>
|
|
40
|
+
<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>
|
|
41
|
+
</pic:spPr>
|
|
42
|
+
</pic:pic>
|
|
43
|
+
</a:graphicData>
|
|
44
|
+
</a:graphic>
|
|
45
|
+
</wp:inline>
|
|
46
|
+
</w:drawing></w:r></w:p>
|
|
47
|
+
<w:p><w:r><w:t>Trailing paragraph after the diagram.</w:t></w:r></w:p>
|
|
48
|
+
</w:body>
|
|
49
|
+
</w:document>`;
|
|
50
|
+
|
|
51
|
+
const documentRels = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
52
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
53
|
+
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1.png"/>
|
|
54
|
+
</Relationships>`;
|
|
55
|
+
|
|
56
|
+
const rootRels = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
57
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
58
|
+
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
|
|
59
|
+
</Relationships>`;
|
|
60
|
+
|
|
61
|
+
const contentTypes = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
62
|
+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
63
|
+
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
64
|
+
<Default Extension="xml" ContentType="application/xml"/>
|
|
65
|
+
<Default Extension="png" ContentType="image/png"/>
|
|
66
|
+
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
67
|
+
</Types>`;
|
|
68
|
+
|
|
69
|
+
async function main(): Promise<void> {
|
|
70
|
+
const zip = new JSZip();
|
|
71
|
+
zip.file("[Content_Types].xml", contentTypes);
|
|
72
|
+
zip.file("_rels/.rels", rootRels);
|
|
73
|
+
zip.file("word/document.xml", documentXml);
|
|
74
|
+
zip.file("word/_rels/document.xml.rels", documentRels);
|
|
75
|
+
zip.file("word/media/image1.png", Buffer.from(TINY_PNG_BASE64, "base64"));
|
|
76
|
+
|
|
77
|
+
const buffer = await zip.generateAsync({ type: "nodebuffer" });
|
|
78
|
+
const out = "test/fixtures/sample-with-image.docx";
|
|
79
|
+
mkdirSync(dirname(out), { recursive: true });
|
|
80
|
+
writeFileSync(out, buffer);
|
|
81
|
+
console.log(`wrote ${out} (${buffer.byteLength} bytes)`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
await main();
|
package/src/cli.ts
CHANGED
|
@@ -5,7 +5,7 @@ import { program } from "commander";
|
|
|
5
5
|
import pkg from "../package.json" with { type: "json" };
|
|
6
6
|
import { registerCheckUpdateCommand } from "./commands/check-update.ts";
|
|
7
7
|
import { registerConfigCommand } from "./commands/config.ts";
|
|
8
|
-
import {
|
|
8
|
+
import { registerLoginCommand } from "./commands/login.ts";
|
|
9
9
|
import { registerReindexCommand } from "./commands/reindex.ts";
|
|
10
10
|
import { registerServeCommand } from "./commands/serve.ts";
|
|
11
11
|
import { registerSkillCommand } from "./commands/skill.ts";
|
|
@@ -59,7 +59,7 @@ for (const op of OPERATIONS) {
|
|
|
59
59
|
registerServeCommand(program);
|
|
60
60
|
registerReindexCommand(program);
|
|
61
61
|
registerConfigCommand(program);
|
|
62
|
-
|
|
62
|
+
registerLoginCommand(program);
|
|
63
63
|
registerSkillCommand(program);
|
|
64
64
|
registerCheckUpdateCommand(program);
|
|
65
65
|
registerUpgradeCommand(program);
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8" />
|
|
5
|
+
<title>membot login</title>
|
|
6
|
+
<style>
|
|
7
|
+
body { font-family: -apple-system, BlinkMacSystemFont, system-ui, sans-serif; padding: 2.5rem; max-width: 720px; margin: auto; color: #222; line-height: 1.5; }
|
|
8
|
+
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
|
9
|
+
h2 { font-size: 1.05rem; margin-top: 2rem; margin-bottom: 0.5rem; color: #444; }
|
|
10
|
+
.hint { color: #666; margin-bottom: 1.5rem; }
|
|
11
|
+
ul { padding: 0; list-style: none; }
|
|
12
|
+
li { padding: 0.7rem 0.9rem; border: 1px solid #ddd; border-radius: 6px; margin-bottom: 0.5rem; }
|
|
13
|
+
a { color: #2855ff; text-decoration: none; font-weight: 500; }
|
|
14
|
+
a:hover { text-decoration: underline; }
|
|
15
|
+
.desc { color: #666; }
|
|
16
|
+
.footer { color: #888; font-size: 0.9rem; margin-top: 2rem; }
|
|
17
|
+
code { background: #f5f5f5; padding: 0.1rem 0.35rem; border-radius: 3px; font-size: 0.85rem; }
|
|
18
|
+
pre { background: #f5f5f5; padding: 0.6rem 0.8rem; border-radius: 4px; font-size: 0.85rem; overflow-x: auto; margin: 0.4rem 0 0 0; }
|
|
19
|
+
</style>
|
|
20
|
+
</head>
|
|
21
|
+
<body>
|
|
22
|
+
<h1>Set up the services membot will fetch from</h1>
|
|
23
|
+
<p class="hint">This is a separate browser session from your daily Chrome — even if you're signed in there, you have to set things up here.</p>
|
|
24
|
+
|
|
25
|
+
{{#hasBrowser}}
|
|
26
|
+
<h2>Sign in via this browser</h2>
|
|
27
|
+
<p class="hint">Click any link, complete the login, then close this window when you're done. Cookies + IndexedDB land in your membot profile.</p>
|
|
28
|
+
<ul>
|
|
29
|
+
{{#browser}}
|
|
30
|
+
<li><a href="{{url}}">{{name}}</a>{{#description}} <span class="desc">— {{description}}</span>{{/description}}</li>
|
|
31
|
+
{{/browser}}
|
|
32
|
+
</ul>
|
|
33
|
+
{{/hasBrowser}}
|
|
34
|
+
|
|
35
|
+
{{#hasApiKey}}
|
|
36
|
+
<h2>API-key services</h2>
|
|
37
|
+
<p class="hint">Open the settings page, create a token, then run the command shown in your terminal (not in this browser).</p>
|
|
38
|
+
<ul>
|
|
39
|
+
{{#apiKey}}
|
|
40
|
+
<li>
|
|
41
|
+
<a href="{{url}}">{{name}}</a>{{#description}} <span class="desc">— {{description}}</span>{{/description}}
|
|
42
|
+
<pre>{{setupCommand}}</pre>
|
|
43
|
+
</li>
|
|
44
|
+
{{/apiKey}}
|
|
45
|
+
</ul>
|
|
46
|
+
{{/hasApiKey}}
|
|
47
|
+
|
|
48
|
+
<p class="footer">Closing this window saves the browser-session profile. Run <code>membot login</code> again to refresh expired sessions.</p>
|
|
49
|
+
</body>
|
|
50
|
+
</html>
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import type { Command } from "commander";
|
|
3
|
+
import Mustache from "mustache";
|
|
4
|
+
import { FILES } from "../constants.ts";
|
|
5
|
+
import { buildContext, closeContext } from "../context.ts";
|
|
6
|
+
import { HelpfulError } from "../errors.ts";
|
|
7
|
+
import { BrowserPool } from "../ingest/downloaders/browser.ts";
|
|
8
|
+
import { collectLoginEntries } from "../ingest/downloaders/index.ts";
|
|
9
|
+
import { logger } from "../output/logger.ts";
|
|
10
|
+
import LOGIN_PAGE_TEMPLATE from "./login-page.mustache" with { type: "text" };
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* `membot login`
|
|
14
|
+
*
|
|
15
|
+
* Open a real Chromium window backed by membot's persistent profile
|
|
16
|
+
* (cookies + localStorage + IndexedDB + service workers all stored
|
|
17
|
+
* under `~/.membot/auth/browser-profile/`) and pre-navigate to a
|
|
18
|
+
* small intro page that lists every login button declared by the
|
|
19
|
+
* registered downloaders. Adding a new downloader with `logins: […]`
|
|
20
|
+
* automatically gets a button on this page — login.ts knows nothing
|
|
21
|
+
* service-specific itself.
|
|
22
|
+
*
|
|
23
|
+
* Why a persistent profile instead of `storageState` JSON: SPA-heavy
|
|
24
|
+
* services like Linear stash session/sync state in IndexedDB, which
|
|
25
|
+
* `storageState` doesn't capture. A fresh headless context with
|
|
26
|
+
* cookies but no IndexedDB hangs on Linear's "Loading…" placeholder
|
|
27
|
+
* forever. The persistent profile carries IDB along with cookies, so
|
|
28
|
+
* the next headless run finds Linear's app fully bootstrapped.
|
|
29
|
+
*
|
|
30
|
+
* Window-close detection uses page-close events because on macOS
|
|
31
|
+
* closing the last chromium window doesn't quit the process —
|
|
32
|
+
* `browser.on('disconnected')` never fires. See `BrowserPool.waitForUserDone`.
|
|
33
|
+
*/
|
|
34
|
+
export function registerLoginCommand(program: Command): void {
|
|
35
|
+
program
|
|
36
|
+
.command("login")
|
|
37
|
+
.description(
|
|
38
|
+
"Open a browser to sign into the services membot fetches from — closing the window saves your session.",
|
|
39
|
+
)
|
|
40
|
+
.action(async () => {
|
|
41
|
+
const ctx = await buildContext({});
|
|
42
|
+
const userDataDir = join(ctx.dataDir, FILES.BROWSER_PROFILE);
|
|
43
|
+
const pool = new BrowserPool({ userDataDir, headless: false });
|
|
44
|
+
const entries = collectLoginEntries();
|
|
45
|
+
const html = Mustache.render(LOGIN_PAGE_TEMPLATE, {
|
|
46
|
+
browser: entries.browser,
|
|
47
|
+
apiKey: entries.apiKey,
|
|
48
|
+
hasBrowser: entries.browser.length > 0,
|
|
49
|
+
hasApiKey: entries.apiKey.length > 0,
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
let cookieCount = 0;
|
|
53
|
+
try {
|
|
54
|
+
const page = await pool.newPage();
|
|
55
|
+
await page.goto(`data:text/html;charset=utf-8,${encodeURIComponent(html)}`).catch(() => {});
|
|
56
|
+
|
|
57
|
+
logger.info("Sign into the services you want membot to fetch from, then close the browser window.");
|
|
58
|
+
logger.info(`Session profile will be stored at ${userDataDir}.`);
|
|
59
|
+
|
|
60
|
+
await pool.waitForUserDone(page);
|
|
61
|
+
cookieCount = await pool.cookieCount();
|
|
62
|
+
} catch (err) {
|
|
63
|
+
if (err instanceof HelpfulError) throw err;
|
|
64
|
+
throw new HelpfulError({
|
|
65
|
+
kind: "internal_error",
|
|
66
|
+
message: `login failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
67
|
+
hint: "Run `bunx playwright install chromium` to ensure the browser binary is installed, then retry.",
|
|
68
|
+
});
|
|
69
|
+
} finally {
|
|
70
|
+
await pool.dispose();
|
|
71
|
+
await closeContext(ctx);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (cookieCount === 0) {
|
|
75
|
+
throw new HelpfulError({
|
|
76
|
+
kind: "auth_error",
|
|
77
|
+
message: `Browser profile at ${userDataDir} has no cookies — no service was signed in.`,
|
|
78
|
+
hint: "Run `membot login` again and sign in (Google / GitHub / Linear / …) before closing the window.",
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
logger.info(`Saved session profile (${cookieCount} cookie${cookieCount === 1 ? "" : "s"}).`);
|
|
82
|
+
});
|
|
83
|
+
}
|
package/src/config/schemas.ts
CHANGED
|
@@ -7,6 +7,10 @@ export const ChunkerConfigSchema = z.object({
|
|
|
7
7
|
max_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_MAX_CHARS),
|
|
8
8
|
});
|
|
9
9
|
|
|
10
|
+
export const ConvertersConfigSchema = z.object({
|
|
11
|
+
max_inline_image_captions: z.number().int().nonnegative().default(DEFAULTS.MAX_INLINE_IMAGE_CAPTIONS),
|
|
12
|
+
});
|
|
13
|
+
|
|
10
14
|
export const LlmConfigSchema = z.object({
|
|
11
15
|
anthropic_api_key: z.string().meta({ secret: true }).default(""),
|
|
12
16
|
converter_model: z.string().default(DEFAULTS.CONVERTER_MODEL),
|
|
@@ -15,14 +19,23 @@ export const LlmConfigSchema = z.object({
|
|
|
15
19
|
vision_model: z.string().default(DEFAULTS.VISION_MODEL),
|
|
16
20
|
});
|
|
17
21
|
|
|
18
|
-
export const McpxConfigSchema = z.object({
|
|
19
|
-
config_path: z.string().default(""),
|
|
20
|
-
});
|
|
21
|
-
|
|
22
22
|
export const DaemonConfigSchema = z.object({
|
|
23
23
|
tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
|
|
24
24
|
});
|
|
25
25
|
|
|
26
|
+
export const LinearDownloaderConfigSchema = z.object({
|
|
27
|
+
api_key: z.string().meta({ secret: true }).default(""),
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
export const GithubDownloaderConfigSchema = z.object({
|
|
31
|
+
api_key: z.string().meta({ secret: true }).default(""),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
export const DownloadersConfigSchema = z.object({
|
|
35
|
+
linear: LinearDownloaderConfigSchema.default(() => LinearDownloaderConfigSchema.parse({})),
|
|
36
|
+
github: GithubDownloaderConfigSchema.default(() => GithubDownloaderConfigSchema.parse({})),
|
|
37
|
+
});
|
|
38
|
+
|
|
26
39
|
export const DbLockRetryConfigSchema = z.object({
|
|
27
40
|
max_attempts: z.number().int().positive().default(30),
|
|
28
41
|
base_delay_ms: z.number().int().positive().default(100),
|
|
@@ -34,8 +47,9 @@ export const MembotConfigSchema = z.object({
|
|
|
34
47
|
embedding_model: z.string().default(EMBEDDING_MODEL),
|
|
35
48
|
embedding_dimension: z.number().int().positive().default(EMBEDDING_DIMENSION),
|
|
36
49
|
chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
|
|
50
|
+
converters: ConvertersConfigSchema.default(() => ConvertersConfigSchema.parse({})),
|
|
37
51
|
llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
|
|
38
|
-
|
|
52
|
+
downloaders: DownloadersConfigSchema.default(() => DownloadersConfigSchema.parse({})),
|
|
39
53
|
daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
|
|
40
54
|
db_lock_retry: DbLockRetryConfigSchema.default(() => DbLockRetryConfigSchema.parse({})),
|
|
41
55
|
default_refresh_frequency_sec: z.number().int().positive().nullable().default(null),
|
|
@@ -43,4 +57,8 @@ export const MembotConfigSchema = z.object({
|
|
|
43
57
|
|
|
44
58
|
export type MembotConfig = z.infer<typeof MembotConfigSchema>;
|
|
45
59
|
export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
|
|
60
|
+
export type ConvertersConfig = z.infer<typeof ConvertersConfigSchema>;
|
|
46
61
|
export type LlmConfig = z.infer<typeof LlmConfigSchema>;
|
|
62
|
+
export type DownloadersConfig = z.infer<typeof DownloadersConfigSchema>;
|
|
63
|
+
export type LinearDownloaderConfig = z.infer<typeof LinearDownloaderConfigSchema>;
|
|
64
|
+
export type GithubDownloaderConfig = z.infer<typeof GithubDownloaderConfigSchema>;
|
package/src/constants.ts
CHANGED
|
@@ -13,7 +13,6 @@ export const ENV = {
|
|
|
13
13
|
CONFIG: "MEMBOT_CONFIG",
|
|
14
14
|
DEBUG: "MEMBOT_DEBUG",
|
|
15
15
|
ANTHROPIC_API_KEY: "ANTHROPIC_API_KEY",
|
|
16
|
-
MCPX_CONFIG_PATH: "MCP_CONFIG_PATH",
|
|
17
16
|
NO_UPDATE_CHECK: "MEMBOT_NO_UPDATE_CHECK",
|
|
18
17
|
} as const;
|
|
19
18
|
|
|
@@ -41,6 +40,13 @@ export const DEFAULTS = {
|
|
|
41
40
|
VISION_MODEL: "claude-haiku-4-5-20251001",
|
|
42
41
|
UPDATE_CHECK_INTERVAL_MS: 24 * 60 * 60 * 1000,
|
|
43
42
|
UPDATE_CHECK_TIMEOUT_MS: 5_000,
|
|
43
|
+
/**
|
|
44
|
+
* Per-document cap on Claude vision caption calls when expanding inline
|
|
45
|
+
* images during DOCX/HTML conversion. Beyond this, images get a small
|
|
46
|
+
* deterministic placeholder so a slide-deck-shaped doc with hundreds of
|
|
47
|
+
* embedded images doesn't fan out into hundreds of vision requests.
|
|
48
|
+
*/
|
|
49
|
+
MAX_INLINE_IMAGE_CAPTIONS: 20,
|
|
44
50
|
} as const;
|
|
45
51
|
|
|
46
52
|
export const FILES = {
|
|
@@ -48,4 +54,17 @@ export const FILES = {
|
|
|
48
54
|
INDEX_DUCKDB: "index.duckdb",
|
|
49
55
|
MODELS_DIR: "models",
|
|
50
56
|
LOGS_DIR: "logs",
|
|
57
|
+
AUTH_DIR: "auth",
|
|
58
|
+
/**
|
|
59
|
+
* Persistent Chromium profile directory. We use
|
|
60
|
+
* `chromium.launchPersistentContext(userDataDir)` rather than the
|
|
61
|
+
* lighter `storageState` JSON snapshot because Linear (and other
|
|
62
|
+
* SPA-heavy services) stash critical session state in IndexedDB —
|
|
63
|
+
* which `storageState` doesn't capture. A persistent profile
|
|
64
|
+
* survives the full set: cookies, localStorage, IndexedDB, service
|
|
65
|
+
* workers, etc. Trade-off: directory-sized state instead of a tiny
|
|
66
|
+
* JSON file, and only one process can have the profile open at a
|
|
67
|
+
* time (chromium's single-instance lock).
|
|
68
|
+
*/
|
|
69
|
+
BROWSER_PROFILE: "auth/browser-profile",
|
|
51
70
|
} as const;
|