docdex 0.2.21 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -4
- package/README.md +221 -272
- package/assets/agents.md +14 -0
- package/bin/docdex.js +6 -11
- package/lib/install.js +38 -136
- package/lib/postinstall_setup.js +289 -98
- package/lib/uninstall.js +263 -34
- package/package.json +2 -4
- package/bin/docdex-mcp-server.js +0 -66
- package/lib/playwright_fetch.js +0 -174
- package/lib/playwright_install.js +0 -302
package/CHANGELOG.md
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## Unreleased
|
|
4
|
+
- Remove legacy stdio MCP (`docdexd mcp` / `docdex-mcp-server`); MCP is served only over HTTP/SSE.
|
|
5
|
+
|
|
6
|
+
## 0.2.23
|
|
7
|
+
- Add Smithery session config schema metadata (titles/descriptions, defaults, example config) for local MCP sessions.
|
|
8
|
+
- Enrich MCP tools with titles, descriptions, parameter descriptions, and annotations to improve Smithery scoring.
|
|
9
|
+
- Expose MCP prompts and resources (with titles/mime types/annotations) for onboarding, incident triage, and refactor planning.
|
|
10
|
+
- Switch web scraping to Chromium-only installs under `~/.docdex/state/bin/chromium/` and remove legacy browser tooling.
|
|
11
|
+
|
|
3
12
|
## 0.2.21
|
|
4
13
|
- Prompt for npm updates at CLI start (TTY-only, opt-out via `DOCDEX_UPDATE_CHECK=0`).
|
|
5
|
-
- Export bundled Playwright fetcher for daemon startup (launchd/systemd/schtasks + immediate spawn).
|
|
6
|
-
- Pass `DOCDEX_PLAYWRIGHT_FETCHER` in the npm wrapper when launching the daemon.
|
|
7
14
|
|
|
8
15
|
## 0.2.19
|
|
9
|
-
- Playwright issue fix
|
|
10
16
|
- Agents md adding command manually
|
|
11
17
|
- Agents md append repeat fix
|
|
12
18
|
|
|
@@ -33,7 +39,7 @@
|
|
|
33
39
|
- Publish npm wrapper with the latest MCP-compliant binary.
|
|
34
40
|
|
|
35
41
|
## 0.1.5
|
|
36
|
-
- Publish the MCP-enabled CLI wrapper
|
|
42
|
+
- Publish the MCP-enabled CLI wrapper and align docs with MCP mode.
|
|
37
43
|
- Keep npm version in sync with the MCP release for binary downloads.
|
|
38
44
|
|
|
39
45
|
## 0.1.4
|
package/README.md
CHANGED
|
@@ -1,286 +1,235 @@
|
|
|
1
|
+
[](https://docdex.org)
|
|
2
|
+

|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
[](https://lobehub.com/mcp/bekirdag-docdex)
|
|
7
|
+
|
|
8
|
+
<a href="https://glama.ai/mcp/servers/@bekirdag/docdex">
|
|
9
|
+
<img width="380" height="200" src="https://glama.ai/mcp/servers/@bekirdag/docdex/badge" />
|
|
10
|
+
</a>
|
|
11
|
+
|
|
1
12
|
# Docdex
|
|
2
13
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
## Features
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
4) Optional TLS/auth/rate-limit settings secure remote access; audit logging can record access actions.
|
|
40
|
-
|
|
41
|
-
## Quick start
|
|
14
|
+
> **Turn your repository into fast, private context that humans and AI can trust.**
|
|
15
|
+
|
|
16
|
+
Docdex is a **local-first indexer and search daemon** for documentation and source code. It sits between your raw files and your AI assistant, providing deterministic search, code intelligence, and persistent memory without ever uploading your code to a cloud vector store.
|
|
17
|
+
|
|
18
|
+
## ⚡ Why Docdex?
|
|
19
|
+
|
|
20
|
+
Most AI tools rely on "grep" (fast but dumb) or hosted RAG (slow and requires uploads). Docdex runs locally, understands code structure, and gives your AI agents a persistent memory.
|
|
21
|
+
|
|
22
|
+
| Problem | Typical Approach | The Docdex Solution |
|
|
23
|
+
| --- | --- | --- |
|
|
24
|
+
| **Finding Context** | `grep`/`rg` (Noisy, literal matches) | **Ranked, structured results** based on intent. |
|
|
25
|
+
| **Code Privacy** | Hosted RAG (Requires uploading code) | **Local-only indexing.** Your code stays on your machine. |
|
|
26
|
+
| **Siloed Search** | IDE-only search bars | **Shared Daemon** serving CLI, HTTP, and MCP clients simultaneously. |
|
|
27
|
+
| **Code Awareness** | String matching | **AST & Impact Graph** to understand dependencies and definitions. |
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## 🚀 Features
|
|
32
|
+
|
|
33
|
+
* **📚 Document Indexing:** Rank and summarize repo documentation instantly.
|
|
34
|
+
* **🧠 AST & Impact Graph:** Search by function intent and track downstream dependencies (supports Rust, Python, JS/TS, Go, Java, C++, and more).
|
|
35
|
+
* **💾 Repo Memory:** Stores project facts, decisions, and notes locally.
|
|
36
|
+
* **👤 Agent Memory:** Remembers user preferences (e.g., "Use concise bullet points") across different repositories.
|
|
37
|
+
* **🔌 MCP Native:** Auto-configures for tools like Claude Desktop, Cursor, and Windsurf.
|
|
38
|
+
* **🌐 Web Enrichment:** Optional web search with local LLM filtering (via Ollama).
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## 📦 Set-and-Forget Install
|
|
43
|
+
|
|
44
|
+
Install once, point your agent at Docdex, and it keeps working in the background.
|
|
45
|
+
|
|
46
|
+
### 1. Install via npm (Recommended)
|
|
47
|
+
|
|
48
|
+
Requires Node.js >= 18. This will download the correct binary for your OS (macOS, Linux, Windows).
|
|
49
|
+
|
|
42
50
|
```bash
|
|
43
|
-
# install (npm)
|
|
44
51
|
npm i -g docdex
|
|
45
|
-
# or use once
|
|
46
|
-
npx docdex --version
|
|
47
52
|
|
|
48
|
-
|
|
49
|
-
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 2. Auto-Configuration
|
|
56
|
+
|
|
57
|
+
If you have any of the following clients installed, Docdex automatically configures them to use the local MCP endpoint (daemon HTTP/SSE):
|
|
58
|
+
|
|
59
|
+
> **Claude Desktop, Cursor, Windsurf, Cline, Roo Code, Continue, VS Code, PearAI, Void, Zed, Codex.**
|
|
60
|
+
|
|
61
|
+
*Note: Restart your AI client after installation.*
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## 🛠️ Usage Workflow
|
|
66
|
+
|
|
67
|
+
### 1. Index a Repository
|
|
68
|
+
|
|
69
|
+
Run this once to build the index and graph data.
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
docdexd index --repo /path/to/my-project
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 2. Start the Daemon
|
|
50
77
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
78
|
+
Start the shared server. This handles HTTP requests and MCP connections.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
docdexd daemon --host 127.0.0.1 --port 28491
|
|
55
82
|
|
|
56
|
-
# ad-hoc chat via CLI (JSON)
|
|
57
|
-
docdexd chat --repo /path/to/repo --query "otp flow" --limit 5
|
|
58
83
|
```
|
|
59
84
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
## Usage cheat sheet
|
|
68
|
-
- Build index: `docdexd index --repo <path>` (add `--exclude-*` to skip paths).
|
|
69
|
-
- Serve with watcher: `docdexd serve --repo <path> --host 127.0.0.1 --port 3210 --log warn --auth-token <token>` (secure mode also allowlists loopback and rate-limits by default; add `--allow-ip`/`--secure-mode=false`/`--rate-limit-per-min` as needed for remote use).
|
|
70
|
-
- Secure serving: add `--auth-token <token>` (required by default); use TLS with `--tls-cert/--tls-key` or `--certbot-domain <domain>`.
|
|
71
|
-
- Single-file ingest: `docdexd ingest --repo <path> --file docs/new.md` (honors excludes).
|
|
72
|
-
- Query via CLI: `docdexd chat --repo <path> --query "term" --limit 4` (add `--repo-only` to ignore libs index hits).
|
|
73
|
-
- Memory: `docdexd memory-store --repo <path> --text "..."` and `docdexd memory-recall --repo <path> --query "..."`.
|
|
74
|
-
- Web fallback (disabled by default): `DOCDEX_WEB_ENABLED=1 docdexd web-search --query "..."`.
|
|
75
|
-
- Git hygiene: add `.docdex/` to `.gitignore` only if you opt into an in-repo `--state-dir`.
|
|
76
|
-
- Health check: `curl http://127.0.0.1:3210/healthz`.
|
|
77
|
-
- Summary-only search responses: `curl "http://127.0.0.1:3210/search?q=foo&snippets=false"`; fetch snippets only for top hits.
|
|
78
|
-
- Repo-only HTTP search (ignore libs index hits): `curl "http://127.0.0.1:3210/search?q=foo&include_libs=false"`.
|
|
79
|
-
- Token budgets: `curl "http://127.0.0.1:3210/search?q=foo&max_tokens=800"` to drop hits that would exceed your prompt budget; pair with `snippets=false` then fetch 1–2 snippets you keep.
|
|
80
|
-
- Text-only snippets: append `text_only=true` to `/snippet/:doc_id` or start `serve` with `--strip-snippet-html` (or `--disable-snippet-text` to return metadata only).
|
|
81
|
-
- Keep requests compact: defaults enforce `max_query_bytes=4096` and `max_request_bytes=16384`; keep queries short and leave `--max-limit` low (default 8) to avoid oversized responses.
|
|
82
|
-
- Prompt hygiene: in agent prompts, normalize whitespace and include only `rel_path`, `summary`, and trimmed `snippet` (omit `score`/`token_estimate`/`doc_id`).
|
|
83
|
-
- Trim noise early: use `--exclude-dir` and `--exclude-prefix` to keep vendor/build/cache/secrets out of the index so snippets stay relevant and short.
|
|
84
|
-
- Quiet logging for agents: run `docdexd serve --log warn --access-log=false` if you marshal responses elsewhere to cut log overhead.
|
|
85
|
-
- Cache hits client-side: store `doc_id` ↔ `rel_path` ↔ `summary` to avoid repeat snippet calls; fetch snippets only for new doc_ids.
|
|
86
|
-
- Agent help: `curl http://127.0.0.1:3210/ai-help` (requires auth if configured; include `Authorization: Bearer <token>` when you’ve set `--auth-token`). The response includes a short MCP registration recipe.
|
|
87
|
-
|
|
88
|
-
## Versioning
|
|
89
|
-
- Semantic versioning with tagged releases (`vX.Y.Z`). The Rust crate and npm package share the same version.
|
|
90
|
-
- Conventional Commits drive release notes via Release Please; it opens release PRs that bump `Cargo.toml` and `npm/package.json`, update changelogs, and creates the tag/release on merge.
|
|
91
|
-
- Pin to a released version when integrating (e.g., in scripts or Dockerfiles) so upgrades are explicit and reversible.
|
|
92
|
-
- If you build from source, the version comes from `Cargo.toml` in this repo; the npm wrapper uses the matching version to fetch binaries.
|
|
93
|
-
|
|
94
|
-
## Paths and defaults
|
|
95
|
-
- State/index directory: `~/.docdex/state/repos/<fingerprint>/index` (legacy `.gpt-creator/docdex/index` is reused with a warning). The directory is created with `0700` permissions by default.
|
|
96
|
-
- HTTP API: defaults to `127.0.0.1:3210` when serving.
|
|
97
|
-
- State and logs stay local; no external services are required.
|
|
98
|
-
|
|
99
|
-
## Configuration knobs
|
|
100
|
-
- `--repo <path>`: workspace root to index (defaults to `.`).
|
|
101
|
-
- `--state-dir <path>` / `DOCDEX_STATE_DIR`: override state storage path (relative paths resolve under the repo root; absolute paths outside the repo are treated as shared base dirs and scoped to `<state-dir>/repos/<repo_id>/index`).
|
|
102
|
-
- `--exclude-prefix a,b,c` / `DOCDEX_EXCLUDE_PREFIXES`: extra relative prefixes to skip.
|
|
103
|
-
- `--exclude-dir a,b,c` / `DOCDEX_EXCLUDE_DIRS`: extra directory names to skip anywhere in the tree.
|
|
104
|
-
- `--auth-token <token>` / `DOCDEX_AUTH_TOKEN`: bearer token required in secure mode (default); omit only when starting with `--secure-mode=false`.
|
|
105
|
-
- `--secure-mode <true|false>` / `DOCDEX_SECURE_MODE`: default `true`; when enabled, requires an auth token, loopback allowlist by default, and default rate limiting (60 req/min).
|
|
106
|
-
- `--allow-ip a,b,c` / `DOCDEX_ALLOW_IPS`: optional comma-separated IPs/CIDRs allowed to reach the HTTP API (default: loopback-only in secure mode; allow all when secure mode is disabled).
|
|
107
|
-
- `--tls-cert` / `DOCDEX_TLS_CERT` and `--tls-key` / `DOCDEX_TLS_KEY`: serve HTTPS with the provided cert/key. With TLS enforcement on, non-loopback binds must use HTTPS unless you explicitly opt out.
|
|
108
|
-
- `--certbot-domain <domain>` / `DOCDEX_CERTBOT_DOMAIN`: point TLS at `/etc/letsencrypt/live/<domain>/{fullchain.pem,privkey.pem}` (Certbot). Conflicts with manual `--tls-*`.
|
|
109
|
-
- `--certbot-live-dir <path>` / `DOCDEX_CERTBOT_LIVE_DIR`: use a specific Certbot live dir containing `fullchain.pem` and `privkey.pem`.
|
|
110
|
-
- `--require-tls <true|false>` / `DOCDEX_REQUIRE_TLS`: default `true`. Enforce TLS for non-loopback binds; set to `false` when TLS is already terminated by a trusted proxy.
|
|
111
|
-
- `--insecure` / `DOCDEX_INSECURE_HTTP=true`: allow plain HTTP on non-loopback binds even when TLS is enforced (only use behind a trusted proxy).
|
|
112
|
-
- `--max-limit <n>` / `DOCDEX_MAX_LIMIT`: clamp HTTP `limit` to at most `n` (default: 8).
|
|
113
|
-
- `--max-query-bytes <n>` / `DOCDEX_MAX_QUERY_BYTES`: reject requests whose query string exceeds `n` bytes (default: 4096).
|
|
114
|
-
- `--max-request-bytes <n>` / `DOCDEX_MAX_REQUEST_BYTES`: reject requests whose Content-Length or size hint exceeds `n` bytes (default: 16384).
|
|
115
|
-
- `--rate-limit-per-min <n>` / `DOCDEX_RATE_LIMIT_PER_MIN`: per-IP request budget per minute (default 60 in secure mode when unset/0; 0 disables when secure mode is off).
|
|
116
|
-
- `--rate-limit-burst <n>` / `DOCDEX_RATE_LIMIT_BURST`: optional burst capacity for the rate limiter (defaults to per-minute limit when 0).
|
|
117
|
-
- `--audit-log-path <path>` / `DOCDEX_AUDIT_LOG_PATH`: write audit log JSONL to this path (default: `<state-dir>/audit.log`).
|
|
118
|
-
- `--audit-max-bytes <n>` / `DOCDEX_AUDIT_MAX_BYTES`: rotate audit log after this many bytes (default: 5_000_000).
|
|
119
|
-
- `--audit-max-files <n>` / `DOCDEX_AUDIT_MAX_FILES`: keep at most this many rotated audit files (default: 5).
|
|
120
|
-
- `--audit-disable` / `DOCDEX_AUDIT_DISABLE=true`: disable audit logging entirely.
|
|
121
|
-
- `--strip-snippet-html` / `DOCDEX_STRIP_SNIPPET_HTML=true`: omit `snippet.html` in responses to force text-only snippets (HTML is sanitized by default when present).
|
|
122
|
-
- `--disable-snippet-text` / `DOCDEX_DISABLE_SNIPPET_TEXT=true`: omit snippet text/html in responses entirely (only doc metadata is returned).
|
|
123
|
-
- `--enable-memory <true|false>` / `DOCDEX_ENABLE_MEMORY`: control memory endpoints (enabled by default via config; set `[memory].enabled=false` or `DOCDEX_ENABLE_MEMORY=0` to disable).
|
|
124
|
-
- `DOCDEX_WEB_ENABLED=1` / `DOCDEX_OFFLINE=1`: enable web fallback or force offline mode.
|
|
125
|
-
- `--access-log <true|false>` / `DOCDEX_ACCESS_LOG`: emit minimal structured access logs with query values redacted (default: true).
|
|
126
|
-
- `--run-as-uid` / `DOCDEX_RUN_AS_UID`, `--run-as-gid` / `DOCDEX_RUN_AS_GID`: (Unix) drop privileges to the provided UID/GID after startup prep.
|
|
127
|
-
- `--chroot <path>` / `DOCDEX_CHROOT`: (Unix) chroot into `path` before serving; repo/state paths must exist inside that jail.
|
|
128
|
-
- `--unshare-net` / `DOCDEX_UNSHARE_NET=true`: (Linux only) unshare the network namespace before serving (requires CAP_SYS_ADMIN/root); no-op on other platforms.
|
|
129
|
-
- Logging: `--log <level>` on `serve` (defaults to `info`), or `RUST_LOG=docdexd=debug` style filters.
|
|
130
|
-
- Secure mode defaults: when `--secure-mode=true` (default), docdex requires an auth token, allows only loopback IPs unless overridden, and applies a 60 req/min rate limit. Set `--secure-mode=false` to opt out for local dev and adjust `--allow-ip`/rate limits as needed.
|
|
131
|
-
|
|
132
|
-
## Indexing rules (see `index/mod.rs`)
|
|
133
|
-
- File types: `.md`, `.markdown`, `.mdx`, `.txt`, `.rs`, `.py`, `.js`, `.jsx`, `.ts`, `.tsx`, `.go` (extend `DEFAULT_EXTENSIONS` to add more).
|
|
134
|
-
- Skipped directories: broad VCS/build/cache/vendor folders across ecosystems (e.g., `.git`, `.hg`, `.svn`, `node_modules`, `.pnpm-store`, `.yarn*`, `.nx`, `.rollup-cache`, `.webpack-cache`, `.tsbuildinfo`, `.next`, `.nuxt`, `.svelte-kit`, `.mypy_cache`, `.ruff_cache`, `.venv`, `target`, `go-build`, `.gradle`, `.mvn`, `pods`, `.dart_tool`, `.android`, `.serverless`, `.vercel`, `.netlify`, `_build`, `_opam`, `.stack-work`, `elm-stuff`, `library`, `intermediate`, `.godot`, etc.; see `DEFAULT_EXCLUDED_DIR_NAMES` for the full list).
|
|
135
|
-
- Skipped relative prefixes: `logs/`, `.docdex/`, `.docdex/logs/`, `.docdex/tmp/`, `.gpt-creator/logs/`, `.gpt-creator/tmp/`, `.mastercoda/logs/`, `.mastercoda/tmp/`, `docker/.data/`, `docker-data/`, `.docker/`.
|
|
136
|
-
- Snippet sizing: summaries ~360 chars (up to 4 segments); snippets ~420 chars.
|
|
137
|
-
|
|
138
|
-
## HTTP API
|
|
139
|
-
- `GET /healthz` — returns `ok`; this endpoint is unauthenticated and not rate-limited (IP allowlist still applies).
|
|
140
|
-
- `GET /search?q=<text>&limit=<n>&snippets=<bool>&max_tokens=<u64>&include_libs=<bool>` — returns `{ hits: [...] }` with doc id, `rel_path`/`path`, `kind` (`doc`|`code`), summary, snippet, score, token estimate. Optional: `force_web`, `skip_local_search`, `no_cache`, `max_web_results`, `llm_filter_local_results`, `diff_mode`, `diff_base`, `diff_head`, `diff_path`, `repo_id`.
|
|
141
|
-
- `GET /snippet/:doc_id?window=<lines>&q=<query>&text_only=<bool>&max_tokens=<u64>` — returns `{ doc, snippet }` with optional highlighted snippet; falls back to preview when query highlighting is empty (default window: 40 lines).
|
|
142
|
-
- `POST /v1/index/rebuild` — rebuild the repo index.
|
|
143
|
-
- `POST /v1/index/ingest` — ingest a single file.
|
|
144
|
-
- `POST /v1/chat/completions` — OpenAI-compatible chat completion with docdex context.
|
|
145
|
-
- `GET /v1/graph/impact` / `GET /v1/graph/impact/diagnostics` — impact graph edges + unresolved imports.
|
|
146
|
-
- `GET /v1/symbols`, `GET /v1/symbols/status` — symbols per file + parser drift status.
|
|
147
|
-
- `GET /v1/ast`, `GET /v1/ast/search`, `POST /v1/ast/query` — AST queries.
|
|
148
|
-
- `POST /v1/memory/store`, `POST /v1/memory/recall` — memory endpoints (enabled by default).
|
|
149
|
-
- `POST /v1/web/search`, `POST /v1/web/fetch`, `POST /v1/web/cache/flush` — web discovery/fetch (requires `DOCDEX_WEB_ENABLED=1`).
|
|
150
|
-
- `GET /ai-help` — JSON quickstart for agents.
|
|
151
|
-
- `GET /metrics` — Prometheus-style counters/gauges (see `docs/ops/browser_guard.md` in the repo).
|
|
152
|
-
- Repo scoping: include `repo_id` in query/body or the `x-docdex-repo-id` header; mismatches are rejected.
|
|
153
|
-
- If `--auth-token` is set, include `Authorization: Bearer <token>` on HTTP calls (including `/ai-help`).
|
|
154
|
-
|
|
155
|
-
## CLI commands
|
|
156
|
-
- `serve --repo <path> [--host 127.0.0.1] [--port 3210] [--log info]` — start HTTP API with file watching for incremental updates.
|
|
157
|
-
- `index --repo <path>` — rebuild the entire index.
|
|
158
|
-
- `ingest --repo <path> --file <file>` — reindex a single file.
|
|
159
|
-
- `chat --repo <path> --query "<text>" [--limit 8] [--repo-only|--web-only] [--max-web-results N]` — run a chat/search query (omit `--query` to enter REPL mode).
|
|
160
|
-
- `web-search --query "<text>"`, `web-fetch --url <url>`, `web-rag --query "<text>"` — web discovery/fetch and web-assisted queries.
|
|
161
|
-
- `memory-store --text "<text>"` / `memory-recall --query "<text>" --top-k 5` — memory store/recall (enabled by default).
|
|
162
|
-
- `symbols-status --repo <path>` — report Tree-sitter parser drift.
|
|
163
|
-
- `impact-diagnostics --repo <path>` — list unresolved dynamic imports.
|
|
164
|
-
- `self-check --repo <path> --terms "foo,bar" [--limit 5]` — scan the index for sensitive terms before enabling access.
|
|
165
|
-
|
|
166
|
-
## Perf checks
|
|
167
|
-
- Repo-only search latency (p95 < 50ms; see `docs/sds/sds.md`): `cargo test --release repo_only_search_p95_under_50ms_with_libs_index_present -- --ignored --nocapture`.
|
|
168
|
-
|
|
169
|
-
## Help and command discovery
|
|
170
|
-
- List all commands/flags: `docdexd --help`.
|
|
171
|
-
- Dump help for every subcommand: `docdexd help-all`.
|
|
172
|
-
- See `serve` options (TLS, auth, rate limits, watcher): `docdexd serve --help`.
|
|
173
|
-
- Indexing options: `docdexd index --help` (exclude paths, custom state dir).
|
|
174
|
-
- Ad-hoc queries: `docdexd chat --help`.
|
|
175
|
-
- Self-check scanner options: `docdexd self-check --help`.
|
|
176
|
-
- Agent help endpoint: `curl http://127.0.0.1:3210/ai-help` (include `Authorization: Bearer <token>` if `--auth-token` is set) for a JSON listing of endpoints, limits, and best practices.
|
|
177
|
-
- MCP help/registration: `docdexd mcp --help` lists MCP flags; register with your client using `docdexd mcp --repo <repo> --log warn --max-results 8` (Codex CLI shortcut: `codex mcp add docdex -- docdexd mcp --repo <repo> --log warn --max-results 8`).
|
|
178
|
-
- Environment variables mirror the flags (e.g., `DOCDEX_AUTH_TOKEN`, `DOCDEX_TLS_CERT`, `DOCDEX_MAX_LIMIT`).
|
|
179
|
-
- Command overview (same as `docdexd --help`):
|
|
180
|
-
- `serve` — run HTTP API with watcher and security knobs.
|
|
181
|
-
- `index` — build or rebuild the whole index.
|
|
182
|
-
- `ingest` — reindex a single file.
|
|
183
|
-
- `chat` — run an ad-hoc search, JSON to stdout (omit `--query` for REPL).
|
|
184
|
-
- `web-search` / `web-fetch` / `web-rag` — web discovery and web-assisted queries (requires `DOCDEX_WEB_ENABLED=1`).
|
|
185
|
-
- `memory-store` / `memory-recall` — memory store/recall.
|
|
186
|
-
- `symbols-status` / `impact-diagnostics` — code intelligence status and unresolved imports.
|
|
187
|
-
- `repo` — inspect or reassociate repo identity for shared state dirs.
|
|
188
|
-
- `mcp` / `mcp-add` — MCP server + helper for agent CLIs.
|
|
189
|
-
- `self-check` — scan index for sensitive terms with report.
|
|
190
|
-
- `help-all` — print help for every command/flag in one output.
|
|
191
|
-
|
|
192
|
-
## Troubleshooting
|
|
193
|
-
- Stale index: re-run `docdexd index --repo <path>`.
|
|
194
|
-
- Port conflicts: change `--host/--port`.
|
|
195
|
-
- Installer failures (`npm i -g docdex`): use the printed `DOCDEX_*` error code; see `docs/ops/installer_error_codes.md`.
|
|
196
|
-
|
|
197
|
-
## Security considerations
|
|
198
|
-
- Default bind is `127.0.0.1`; keep it unless you are behind a trusted reverse proxy/firewall. Avoid `--host 0.0.0.0` on untrusted networks.
|
|
199
|
-
- By default, non-loopback binds require TLS; opt out only with `--require-tls=false` or `--insecure` when traffic is already terminating at a trusted proxy.
|
|
200
|
-
- If exposing externally, place a reverse proxy in front, terminate TLS, and require auth (basic/OAuth/mTLS) plus IP/VPN allowlisting. Example (nginx):
|
|
201
|
-
```
|
|
202
|
-
server {
|
|
203
|
-
listen 443 ssl;
|
|
204
|
-
server_name docdex.example.com;
|
|
205
|
-
ssl_certificate /path/fullchain.pem;
|
|
206
|
-
ssl_certificate_key /path/privkey.pem;
|
|
207
|
-
auth_basic "Protected";
|
|
208
|
-
auth_basic_user_file /etc/nginx/.htpasswd; # or hook OAuth/mTLS instead
|
|
209
|
-
allow 10.0.0.0/8;
|
|
210
|
-
allow 192.168.0.0/16;
|
|
211
|
-
deny all;
|
|
212
|
-
location / {
|
|
213
|
-
proxy_pass http://127.0.0.1:3210;
|
|
214
|
-
proxy_set_header Host $host;
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
```
|
|
218
|
-
- Trim the corpus: prefer a curated staging directory, or use `--exclude-dir` / `--exclude-prefix` to keep secrets/private paths out before indexing; the watcher will ingest any in-scope file change under `repo`.
|
|
219
|
-
- Mind logs: avoid verbose logging in production if snippets/paths are sensitive; reverse-proxy access logs can also capture query terms and paths.
|
|
220
|
-
- Least privilege: run docdex under a low-privilege user/container and keep the state dir on a path with restricted permissions.
|
|
221
|
-
- Validate before publish: run `docdexd chat` for sensitive keywords to confirm no hits; store indexes on encrypted disks if required.
|
|
222
|
-
- Optional hardening: require an auth token on the HTTP API (or proxy); enforce TLS when not on localhost (default) or explicitly opt out with `--require-tls=false`/`--insecure` only behind a trusted proxy; enable rate limiting (`--rate-limit-per-min`) and clamp `limit`/request sizes (`--max-limit`, `--max-query-bytes`, `--max-request-bytes`); escape/sanitize snippet HTML if embedding or disable snippets entirely with `--disable-snippet-text`; state dir is created `0700` by default—keep it under an unprivileged user, optionally `--run-as-uid/--run-as-gid`, `--chroot`, or containerize; keep access logging minimal/redacted (`--access-log`), and run `self-check` for sensitive terms before exposing the service; for at-rest confidentiality, place the state dir on encrypted storage or use host-level disk encryption.
|
|
223
|
-
|
|
224
|
-
## Integrating with LLM tools
|
|
225
|
-
Docdex is tool-agnostic. Drop-in recipe for agents/codegen tools:
|
|
226
|
-
- Start once per repo: `docdexd index --repo <repo>` then `docdexd serve --repo <repo> --host 127.0.0.1 --port 3210 --log warn` (or use the CLI directly without serving).
|
|
227
|
-
- Configure via env: `DOCDEX_STATE_DIR` (state location), `DOCDEX_EXCLUDE_PREFIXES`, `DOCDEX_EXCLUDE_DIRS`, `RUST_LOG=docdexd=debug` (optional verbose logs).
|
|
228
|
-
- Query over HTTP: `GET /search?q=<text>&limit=<n>` returns `{hits:[...], top_score, meta}`; `GET /snippet/:doc_id` fetches a focused snippet plus doc metadata.
|
|
229
|
-
- Or chat over HTTP: `POST /v1/chat/completions` (OpenAI-compatible) with a `docdex` object to control gating and repo context.
|
|
230
|
-
- Or query via CLI: `docdexd chat --repo <repo> --query "<text>" --limit 8` (JSON to stdout).
|
|
231
|
-
- Health check: `GET /healthz` should return `ok` before issuing search requests.
|
|
232
|
-
- Inject snippets into prompts:
|
|
85
|
+
### 3. Ask Questions (CLI)
|
|
86
|
+
|
|
87
|
+
You can chat directly from the terminal.
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
docdexd chat --repo /path/to/my-project --query "how does auth work?"
|
|
91
|
+
|
|
233
92
|
```
|
|
234
|
-
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## 🔌 Model Context Protocol (MCP)
|
|
97
|
+
|
|
98
|
+
Docdex is designed to be the "brain" for your AI agents. It exposes an MCP endpoint that agents connect to.
|
|
99
|
+
|
|
100
|
+
### Architecture
|
|
101
|
+
|
|
102
|
+
```mermaid
|
|
103
|
+
flowchart LR
|
|
104
|
+
Repo[Repo on disk] --> Indexer[Docdex Indexer]
|
|
105
|
+
Indexer --> Daemon[Docdex Daemon]
|
|
106
|
+
Daemon -->|HTTP + SSE| MCPClient[MCP Client]
|
|
107
|
+
MCPClient --> Host[AI Agent / Editor]
|
|
108
|
+
|
|
235
109
|
```
|
|
236
110
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
111
|
+
Use the daemon HTTP/SSE endpoint.
|
|
112
|
+
|
|
113
|
+
### Manual Configuration
|
|
114
|
+
|
|
115
|
+
If you need to configure your client manually:
|
|
116
|
+
|
|
117
|
+
**JSON (Claude/Cursor/Continue):**
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"mcpServers": {
|
|
122
|
+
"docdex": {
|
|
123
|
+
"url": "http://localhost:28491/v1/mcp/sse"
|
|
250
124
|
}
|
|
251
125
|
}
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**TOML (Codex):**
|
|
131
|
+
|
|
132
|
+
```toml
|
|
133
|
+
[mcp_servers]
|
|
134
|
+
docdex = { url = "http://localhost:28491/v1/mcp" }
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## 🤖 capabilities & Examples
|
|
141
|
+
|
|
142
|
+
### 1. AST & Impact Analysis
|
|
143
|
+
|
|
144
|
+
Don't just find the string "addressGenerator"; find the **definition** and what it impacts.
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# Find definition
|
|
148
|
+
curl "http://127.0.0.1:28491/v1/ast?name=addressGenerator&pathPrefix=src"
|
|
149
|
+
|
|
150
|
+
# Track downstream impact (what breaks if I change this?)
|
|
151
|
+
curl "http://127.0.0.1:28491/v1/graph/impact?file=src/app.ts&maxDepth=3"
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 2. Memory System
|
|
156
|
+
|
|
157
|
+
Docdex allows you to store "facts" that retrieval helps recall later.
|
|
158
|
+
|
|
159
|
+
**Repo Memory (Project specific):**
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
# Teach the repo a fact
|
|
163
|
+
docdexd memory-store --repo . --text "Payments retry up to 3 times with backoff."
|
|
164
|
+
|
|
165
|
+
# Recall it later
|
|
166
|
+
docdexd memory-recall --repo . --query "payments retry policy"
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
**Agent Memory (User preference):**
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
# Set a style preference
|
|
174
|
+
docdexd profile add --agent-id "default" --category style --content "Use concise bullet points."
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### 3. Local LLM (Ollama)
|
|
179
|
+
|
|
180
|
+
Docdex uses Ollama for embeddings and optional local chat.
|
|
181
|
+
|
|
182
|
+
* **Setup:** Run `docdex setup` for an interactive wizard.
|
|
183
|
+
* **Manual:** Ensure `nomic-embed-text` is pulled in Ollama (`ollama pull nomic-embed-text`).
|
|
184
|
+
* **Custom URL:**
|
|
185
|
+
```bash
|
|
186
|
+
DOCDEX_OLLAMA_BASE_URL=http://127.0.0.1:11434 docdexd daemon --host 127.0.0.1 --port 28491
|
|
187
|
+
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## ⚙️ Configuration & HTTP API
|
|
195
|
+
|
|
196
|
+
Docdex runs as a local daemon serving:
|
|
197
|
+
|
|
198
|
+
* **CLI Commands:** `docdexd chat`
|
|
199
|
+
* **HTTP API:** `/search`, `/v1/ast`, `/v1/graph/impact`
|
|
200
|
+
* **MCP Endpoints:** `/v1/mcp` and `/v1/mcp/sse`
|
|
201
|
+
|
|
202
|
+
### Multi-Repo Setup
|
|
203
|
+
|
|
204
|
+
Run a single daemon and mount additional repos on demand.
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
docdexd daemon --port 28491
|
|
208
|
+
|
|
209
|
+
# Mount repos and capture repo_id values
|
|
210
|
+
curl -X POST "http://127.0.0.1:28491/v1/initialize" \
|
|
211
|
+
-H "Content-Type: application/json" \
|
|
212
|
+
-d '{"rootUri":"file:///path/to/repo-a"}'
|
|
213
|
+
|
|
214
|
+
curl -X POST "http://127.0.0.1:28491/v1/initialize" \
|
|
215
|
+
-H "Content-Type: application/json" \
|
|
216
|
+
-d '{"rootUri":"file:///path/to/repo-b"}'
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Notes:
|
|
220
|
+
- When more than one repo is mounted (or the daemon starts without a default repo), include `x-docdex-repo-id: <sha256>` on HTTP requests.
|
|
221
|
+
- MCP sessions bind to the repo provided in `initialize.rootUri` and reuse that repo automatically.
|
|
222
|
+
|
|
223
|
+
### Security
|
|
224
|
+
|
|
225
|
+
* **Secure Mode:** By default, Docdex enforces TLS on non-loopback binds.
|
|
226
|
+
* **Loopback:** `127.0.0.1` is accessible without TLS for local agents.
|
|
227
|
+
* To expose to a network (use with caution), use `--expose` and `--auth-token`.
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## 📚 Learn More
|
|
232
|
+
|
|
233
|
+
* **Detailed Usage:** `docs/usage.md`
|
|
234
|
+
* **API Reference:** `docs/http_api.md`
|
|
235
|
+
* **MCP Specs:** `docs/mcp/errors.md`
|
package/assets/agents.md
CHANGED
|
@@ -46,6 +46,20 @@ Standard retrieval. The daemon automatically handles the waterfall (Local -> Web
|
|
|
46
46
|
| docdex_search | Search code, docs, and ingested libraries. Returns ranked snippets. |
|
|
47
47
|
| docdex_web_research | Explicitly trigger Tier 2 web discovery (DDG + Headless Chrome). Use when you need external docs not present locally. |
|
|
48
48
|
|
|
49
|
+
Tier 2 discovery providers (in fallback order when DDG HTML fails or is blocked):
|
|
50
|
+
- DuckDuckGo Lite
|
|
51
|
+
- SearXNG JSON (public instance or self-hosted)
|
|
52
|
+
- Google Mobile
|
|
53
|
+
- Brave Search API (requires DOCDEX_BRAVE_API_KEY)
|
|
54
|
+
- Google Custom Search JSON API (requires DOCDEX_GOOGLE_CSE_API_KEY + DOCDEX_GOOGLE_CSE_CX)
|
|
55
|
+
- Bing Web Search API (requires DOCDEX_BING_API_KEY)
|
|
56
|
+
- Tavily API (requires DOCDEX_TAVILY_API_KEY)
|
|
57
|
+
- Exa API (requires DOCDEX_EXA_API_KEY)
|
|
58
|
+
|
|
59
|
+
Defaults and overrides:
|
|
60
|
+
- Default endpoints: `https://html.duckduckgo.com/html/`, `https://lite.duckduckgo.com/lite/`, `https://searx.be/search` (built-in fallback list), `https://www.google.com/m`, `https://api.search.brave.com/res/v1/web/search`, `https://www.googleapis.com/customsearch/v1`, `https://api.bing.microsoft.com/v7.0/search`, `https://api.tavily.com/search`, `https://api.exa.ai/search`
|
|
61
|
+
- Override envs: `DOCDEX_DDG_BASE_URL`, `DOCDEX_WEB_SEARXNG_URLS` (comma list, or `DOCDEX_SEARXNG_URLS`), `DOCDEX_WEB_GOOGLE_MOBILE_URL`, `DOCDEX_BRAVE_API_KEY` + `DOCDEX_BRAVE_API_URL`, `DOCDEX_GOOGLE_CSE_API_KEY` + `DOCDEX_GOOGLE_CSE_CX` + `DOCDEX_GOOGLE_CSE_API_URL`, `DOCDEX_BING_API_KEY` + `DOCDEX_BING_API_URL`, `DOCDEX_TAVILY_API_KEY` + `DOCDEX_TAVILY_API_URL`, `DOCDEX_EXA_API_KEY` + `DOCDEX_EXA_API_URL`
|
|
62
|
+
|
|
49
63
|
### B. Code Intelligence (AST & Graph)
|
|
50
64
|
|
|
51
65
|
Precision tools for structural analysis. Do not rely on text search for definitions or dependencies.
|
package/bin/docdex.js
CHANGED
|
@@ -29,6 +29,12 @@ function printLines(lines, { stderr } = {}) {
|
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
function envBool(value) {
|
|
33
|
+
if (!value) return false;
|
|
34
|
+
const normalized = String(value).trim().toLowerCase();
|
|
35
|
+
return ["1", "true", "t", "yes", "y", "on"].includes(normalized);
|
|
36
|
+
}
|
|
37
|
+
|
|
32
38
|
function readInstallMetadata({ fsModule, pathModule, basePath }) {
|
|
33
39
|
if (!fsModule || typeof fsModule.readFileSync !== "function") return null;
|
|
34
40
|
const metadataPath = pathModule.join(basePath, "docdexd-install.json");
|
|
@@ -178,10 +184,6 @@ async function run() {
|
|
|
178
184
|
basePath,
|
|
179
185
|
process.platform === "win32" ? "docdexd.exe" : "docdexd"
|
|
180
186
|
);
|
|
181
|
-
const mcpBinaryPath = path.join(
|
|
182
|
-
basePath,
|
|
183
|
-
process.platform === "win32" ? "docdex-mcp-server.exe" : "docdex-mcp-server"
|
|
184
|
-
);
|
|
185
187
|
|
|
186
188
|
if (!fs.existsSync(binaryPath)) {
|
|
187
189
|
console.error(`[docdex] Missing binary for ${platformKey}. Try reinstalling or set DOCDEX_DOWNLOAD_REPO to a repo with release assets.`);
|
|
@@ -202,13 +204,6 @@ async function run() {
|
|
|
202
204
|
});
|
|
203
205
|
|
|
204
206
|
const env = { ...process.env };
|
|
205
|
-
if (!env.DOCDEX_MCP_SERVER_BIN && fs.existsSync(mcpBinaryPath)) {
|
|
206
|
-
env.DOCDEX_MCP_SERVER_BIN = mcpBinaryPath;
|
|
207
|
-
}
|
|
208
|
-
const fetcherPath = path.join(__dirname, "..", "lib", "playwright_fetch.js");
|
|
209
|
-
if (!env.DOCDEX_PLAYWRIGHT_FETCHER && fs.existsSync(fetcherPath)) {
|
|
210
|
-
env.DOCDEX_PLAYWRIGHT_FETCHER = fetcherPath;
|
|
211
|
-
}
|
|
212
207
|
const child = spawn(binaryPath, process.argv.slice(2), { stdio: "inherit", env });
|
|
213
208
|
child.on("exit", (code) => process.exit(code ?? 1));
|
|
214
209
|
child.on("error", (err) => {
|