codebase-index 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_index/__init__.py +7 -0
- codebase_index/__main__.py +3 -0
- codebase_index/cli.py +916 -0
- codebase_index/config.py +110 -0
- codebase_index/discovery/__init__.py +10 -0
- codebase_index/discovery/classify.py +151 -0
- codebase_index/discovery/ignore.py +58 -0
- codebase_index/discovery/walker.py +75 -0
- codebase_index/doctor.py +138 -0
- codebase_index/embeddings/__init__.py +2 -0
- codebase_index/embeddings/backend.py +67 -0
- codebase_index/embeddings/external.py +56 -0
- codebase_index/embeddings/local.py +41 -0
- codebase_index/embeddings/noop.py +15 -0
- codebase_index/graph/__init__.py +8 -0
- codebase_index/graph/analysis.py +468 -0
- codebase_index/graph/builder.py +160 -0
- codebase_index/graph/expand.py +136 -0
- codebase_index/graph/export.py +381 -0
- codebase_index/graph/navigate.py +201 -0
- codebase_index/indexer/__init__.py +8 -0
- codebase_index/indexer/doc_chunks.py +202 -0
- codebase_index/indexer/freshness.py +109 -0
- codebase_index/indexer/pipeline.py +423 -0
- codebase_index/mcp/__init__.py +2 -0
- codebase_index/mcp/server.py +354 -0
- codebase_index/models.py +145 -0
- codebase_index/output/__init__.py +6 -0
- codebase_index/output/json.py +13 -0
- codebase_index/output/markdown.py +316 -0
- codebase_index/output/redact.py +31 -0
- codebase_index/parsers/__init__.py +9 -0
- codebase_index/parsers/base.py +47 -0
- codebase_index/parsers/languages.py +290 -0
- codebase_index/parsers/line_chunker.py +39 -0
- codebase_index/parsers/symbol_chunks.py +62 -0
- codebase_index/parsers/treesitter.py +439 -0
- codebase_index/retrieval/__init__.py +9 -0
- codebase_index/retrieval/budget.py +82 -0
- codebase_index/retrieval/fusion.py +62 -0
- codebase_index/retrieval/intent.py +56 -0
- codebase_index/retrieval/pipeline.py +207 -0
- codebase_index/retrieval/rerank.py +69 -0
- codebase_index/retrieval/searchers.py +291 -0
- codebase_index/retrieval/skeleton.py +251 -0
- codebase_index/retrieval/types.py +79 -0
- codebase_index/scaffold.py +399 -0
- codebase_index/service.py +158 -0
- codebase_index/skill_template/SKILL.md +198 -0
- codebase_index/skill_template/examples/hooks/settings.json +16 -0
- codebase_index/skill_template/scripts/cbx +25 -0
- codebase_index/skill_template/scripts/cbx.ps1 +25 -0
- codebase_index/skill_update.py +150 -0
- codebase_index/storage/__init__.py +8 -0
- codebase_index/storage/db.py +116 -0
- codebase_index/storage/repo.py +701 -0
- codebase_index/storage/schema.sql +125 -0
- codebase_index/watch/__init__.py +5 -0
- codebase_index/watch/watcher.py +93 -0
- codebase_index-1.6.0.dist-info/METADATA +748 -0
- codebase_index-1.6.0.dist-info/RECORD +64 -0
- codebase_index-1.6.0.dist-info/WHEEL +4 -0
- codebase_index-1.6.0.dist-info/entry_points.txt +4 -0
- codebase_index-1.6.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,748 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codebase-index
|
|
3
|
+
Version: 1.6.0
|
|
4
|
+
Summary: Local-first hybrid codebase index for AI coding agents, exposed as CLI, Skill, and MCP tools.
|
|
5
|
+
Project-URL: Homepage, https://github.com/denfry/codebase-index
|
|
6
|
+
Project-URL: Documentation, https://github.com/denfry/codebase-index/tree/main/docs
|
|
7
|
+
Project-URL: Changelog, https://github.com/denfry/codebase-index/blob/main/CHANGELOG.md
|
|
8
|
+
Project-URL: Issues, https://github.com/denfry/codebase-index/issues
|
|
9
|
+
Author: codebase-index contributors
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: ai-agents,claude-code,cli,code-search,codebase-indexing,codebase-rag,codex-cli,fts5,local-first,mcp,opencode,rag,semantic-code-search,sqlite,tree-sitter
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: pathspec>=0.12
|
|
25
|
+
Requires-Dist: pydantic>=2.6
|
|
26
|
+
Requires-Dist: rich>=13.0
|
|
27
|
+
Requires-Dist: tree-sitter-language-pack==1.8.1
|
|
28
|
+
Requires-Dist: tree-sitter==0.25.2
|
|
29
|
+
Requires-Dist: typer>=0.12
|
|
30
|
+
Provides-Extra: build
|
|
31
|
+
Requires-Dist: build>=1.2; extra == 'build'
|
|
32
|
+
Requires-Dist: twine>=5.0; extra == 'build'
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: mcp>=1.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pyyaml>=6.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
40
|
+
Provides-Extra: embeddings
|
|
41
|
+
Requires-Dist: numpy>=1.26; extra == 'embeddings'
|
|
42
|
+
Requires-Dist: sqlite-vec>=0.1; extra == 'embeddings'
|
|
43
|
+
Provides-Extra: embeddings-local
|
|
44
|
+
Requires-Dist: sentence-transformers>=3.0; extra == 'embeddings-local'
|
|
45
|
+
Provides-Extra: mcp
|
|
46
|
+
Requires-Dist: mcp>=1.0; extra == 'mcp'
|
|
47
|
+
Provides-Extra: watch
|
|
48
|
+
Requires-Dist: watchdog>=4.0; extra == 'watch'
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
|
|
51
|
+
# codebase-index: Local Codebase Indexing for AI Coding Agents
|
|
52
|
+
|
|
53
|
+
`codebase-index` is a local-first codebase indexing tool that helps Claude Code,
|
|
54
|
+
Codex CLI, OpenCode, and other AI coding agents find relevant files, symbols, and
|
|
55
|
+
references without scanning an entire repository.
|
|
56
|
+
|
|
57
|
+
[](LICENSE)
|
|
58
|
+
[](https://www.python.org/)
|
|
59
|
+
[](https://github.com/denfry/codebase-index/actions)
|
|
60
|
+
[](skill/SKILL.md)
|
|
61
|
+
[](#which-ai-clis-does-codebase-index-support)
|
|
62
|
+
[](#which-ai-clis-does-codebase-index-support)
|
|
63
|
+
[](docs/MCP.md)
|
|
64
|
+
[](#safety-and-privacy)
|
|
65
|
+
[](#safety-and-privacy)
|
|
66
|
+
[](#safety-and-privacy)
|
|
67
|
+
[](docs/DATABASE_SCHEMA.md)
|
|
68
|
+
[](docs/ARCHITECTURE.md)
|
|
69
|
+
|
|
70
|
+
<p align="center">
|
|
71
|
+
<img src="assets/demo.png" width="820"
|
|
72
|
+
alt="codebase-index ranking a local search for 'where is user authentication implemented?' into scored files with recommended file:line ranges to read">
|
|
73
|
+
</p>
|
|
74
|
+
|
|
75
|
+
## What Is codebase-index?
|
|
76
|
+
|
|
77
|
+
**codebase-index is a private, offline retrieval layer for AI code search.** It
|
|
78
|
+
builds a SQLite index of your repository, extracts symbols with Tree-sitter,
|
|
79
|
+
ranks matches with hybrid retrieval, and returns compact file:line ranges that
|
|
80
|
+
an AI coding agent can read instead of opening broad file sets.
|
|
81
|
+
|
|
82
|
+
Use it when you want Cursor-like codebase awareness in terminal-based AI tools
|
|
83
|
+
while keeping source code, snippets, and search metadata on your machine.
|
|
84
|
+
|
|
85
|
+
> **codebase-index is not an IDE and not a coding agent.** It is the local
|
|
86
|
+
> retrieval/index layer that gives terminal and MCP-based AI agents precise
|
|
87
|
+
> codebase context. The agent stays your interface; this gives it better aim.
|
|
88
|
+
|
|
89
|
+
## Who Is It For?
|
|
90
|
+
|
|
91
|
+
- **Claude Code / Codex CLI / OpenCode users** on medium-to-large repos who want
|
|
92
|
+
the agent to read 3 ranked files instead of grepping and scanning 60.
|
|
93
|
+
- **Privacy-constrained teams** (proprietary or regulated code) who cannot send
|
|
94
|
+
source to a cloud code-intelligence service.
|
|
95
|
+
- **MCP power users** who want a stable, queryable code index as a tool, not a
|
|
96
|
+
black box baked into one agent's prompt.
|
|
97
|
+
- **Tooling authors** who need scriptable retrieval (`--json`, SQLite, MCP) that
|
|
98
|
+
other tools can build on.
|
|
99
|
+
|
|
100
|
+
Not for you if you want a full IDE, org-scale multi-repo search, or a hosted
|
|
101
|
+
platform — use Cursor or Sourcegraph for those.
|
|
102
|
+
|
|
103
|
+
## Start Here
|
|
104
|
+
|
|
105
|
+
If you are opening this repository for the first time, follow this order:
|
|
106
|
+
|
|
107
|
+
1. [Quick Start (5 minutes)](docs/QUICKSTART.md)
|
|
108
|
+
2. [Installation Guide](docs/INSTALLATION.md)
|
|
109
|
+
3. [Benchmarks](docs/BENCHMARKS.md)
|
|
110
|
+
4. [How the skill works](skill/SKILL.md)
|
|
111
|
+
5. [MCP server](docs/MCP.md)
|
|
112
|
+
6. [FAQ](docs/FAQ.md)
|
|
113
|
+
|
|
114
|
+
If you only need the shortest path, run:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pip install "codebase-index @ git+https://github.com/denfry/codebase-index.git@v1.6.0"
|
|
118
|
+
cd your-project
|
|
119
|
+
codebase-index init # prompts for Claude Code / Codex CLI / OpenCode
|
|
120
|
+
codebase-index index
|
|
121
|
+
codebase-index search "where is authentication implemented?"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Project Status
|
|
125
|
+
|
|
126
|
+
**`1.6.0` is released.** The current release includes repository discovery,
|
|
127
|
+
SQLite FTS5 storage, Tree-sitter symbols and references, hybrid ranking, graph
|
|
128
|
+
impact analysis, token-budgeted retrieval packets, optional local embeddings,
|
|
129
|
+
hooks/watch support, multi-CLI installation, MCP server support, and a tested
|
|
130
|
+
GitHub-only `pipx` install path.
|
|
131
|
+
|
|
132
|
+
The `1.6.0` release turns the dependency graph into a navigable map: every edge
|
|
133
|
+
carries a `confidence` audit trail (`extracted`/`inferred`/`ambiguous`, surfaced
|
|
134
|
+
in `refs`/`impact`); a new zero-dependency analytics pass computes modules
|
|
135
|
+
(communities), god nodes, and surprising cross-module links, exposed via the
|
|
136
|
+
`architecture` command/MCP tool; `path` traces the shortest dependency chain
|
|
137
|
+
between two symbols and `describe` prints a symbol's node card; and the HTML
|
|
138
|
+
graph is coloured by module and sized by connectivity, with `--format
|
|
139
|
+
graphml|dot|neo4j` exports for external tools. Requires a one-time reindex
|
|
140
|
+
(schema 2 → 3).
|
|
141
|
+
|
|
142
|
+
The earlier `1.4.0` release hardened the MCP contract (a `schema_version` +
|
|
143
|
+
`tool` envelope on every payload, golden-locked per tool, plus a fix so the
|
|
144
|
+
server loads on current `mcp`/`pydantic`), dampened the god-class `in_degree`
|
|
145
|
+
rerank tiebreak (logarithmic, validated no-regression on the public benchmark),
|
|
146
|
+
and labelled config/IaC files (Dockerfile, Terraform, HCL, INI, Makefiles) so
|
|
147
|
+
infra surfaces in `stats` and search.
|
|
148
|
+
|
|
149
|
+
The earlier `1.3.0` release added a content-addressed embedding cache (rebuilds reuse
|
|
150
|
+
vectors for unchanged content), a batched graph build (7–28× faster edge
|
|
151
|
+
resolution plus a new `edges(file_id)` index), a shared CLI/MCP service layer
|
|
152
|
+
(MCP hybrid search now uses the vector channel; `index_stats` reports the
|
|
153
|
+
per-language graph tier), graph-coverage signals in `stats`/`refs`/`impact`,
|
|
154
|
+
CLI pagination via `search --offset`, and single-source versioning with a CI
|
|
155
|
+
gate that keeps every committed skill copy in sync.
|
|
156
|
+
The `1.2.1` release added skill auto-update/rollback commands and version
|
|
157
|
+
stamps so installed skills stay in sync with the package automatically.
|
|
158
|
+
See [CHANGELOG.md](CHANGELOG.md) and
|
|
159
|
+
[docs/ROADMAP.md](docs/ROADMAP.md).
|
|
160
|
+
|
|
161
|
+
MCP is now available as a stdio server via `codebase-index mcp --root <repo>`.
|
|
162
|
+
It exposes `healthcheck`, `search_code`, `find_symbol`, `find_refs`,
|
|
163
|
+
`impact_of`, `explain_code`, `architecture_overview`, `path_between`,
|
|
164
|
+
`describe_symbol`, and `index_stats`; see [docs/MCP.md](docs/MCP.md).
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
You: "Where is user authentication implemented?"
|
|
168
|
+
Agent: searches local index (symbols + FTS5 + graph)
|
|
169
|
+
reads only 3 ranked files instead of scanning 60
|
|
170
|
+
answers with citations: src/auth/AuthService.ts:12-148
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## How Do I Install codebase-index?
|
|
176
|
+
|
|
177
|
+
For most users, install the package from the tagged GitHub release and run
|
|
178
|
+
`init` inside the repository you want to index:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
pip install "codebase-index @ git+https://github.com/denfry/codebase-index.git@v1.6.0"
|
|
182
|
+
cd your-project
|
|
183
|
+
codebase-index init # choose Claude Code, Codex CLI, OpenCode, or all
|
|
184
|
+
codebase-index index
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
In a non-interactive script, pass a target explicitly:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
codebase-index init --target auto # install into detected AI CLIs
|
|
191
|
+
codebase-index init --target codex # write AGENTS.md + Codex resources
|
|
192
|
+
codebase-index init --target claude # write .claude/skills/codebase-index
|
|
193
|
+
codebase-index init --target opencode # write OpenCode command + agent files
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Install as a Claude Code plugin
|
|
197
|
+
|
|
198
|
+
One command in Claude Code:
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
/plugin marketplace add denfry/codebase-index
|
|
202
|
+
/plugin install codebase-index@codebase-index
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
Or just ask: "install the codebase-index plugin".
|
|
206
|
+
|
|
207
|
+
**What happens on first run:** when a session starts, a `SessionStart` hook
|
|
208
|
+
(`scripts/bootstrap.sh` / `.ps1`) creates a private Python virtual environment under
|
|
209
|
+
`~/.claude/plugins/data/codebase-index-*/venv` and installs the pinned
|
|
210
|
+
`codebase-index` package (from `requirements.lock`) into it — using `uv` if present,
|
|
211
|
+
otherwise `python -m venv` + `pip`. It reinstalls only when the lock file changes.
|
|
212
|
+
Nothing is installed globally; uninstalling the plugin removes the data directory.
|
|
213
|
+
|
|
214
|
+
**Prerequisite:** Python 3.11+ on your PATH. The first install needs network access to
|
|
215
|
+
fetch the package; later sessions are offline. The skill builds its index on
|
|
216
|
+
your first codebase question, so there is no manual `index` step.
|
|
217
|
+
|
|
218
|
+
**Distribution note:** the plugin bootstrap installs the pinned requirement from
|
|
219
|
+
`requirements.lock`. In `1.6.0`, that lock points at the tagged GitHub release
|
|
220
|
+
instead of PyPI. You can override it with `CBX_INSTALL_SPEC` when testing a local
|
|
221
|
+
checkout or a different Git ref.
|
|
222
|
+
|
|
223
|
+
## What Problem Does codebase-index Solve?
|
|
224
|
+
|
|
225
|
+
AI coding agents struggle with large repositories when they rely on broad file
|
|
226
|
+
reads, grep output, or user-provided context. `codebase-index` gives those agents
|
|
227
|
+
a ranked local retrieval packet before they read source files.
|
|
228
|
+
|
|
229
|
+
- **Token waste** — Scanning entire files or running broad grep/glob queries burns through the context window on irrelevant content.
|
|
230
|
+
- **No symbol awareness** — Standard search can't distinguish a function definition from a call, or a class from a variable.
|
|
231
|
+
- **No ranking** — Grep returns all matches with no relevance ordering. The agent must read everything.
|
|
232
|
+
- **No context** — Grep doesn't know which files are related or what to read next.
|
|
233
|
+
- **Cloud dependency** — External code indexing services send your proprietary code to remote servers.
|
|
234
|
+
|
|
235
|
+
Developers get Cursor-like codebase awareness in Claude Code, Codex CLI, and
|
|
236
|
+
OpenCode without leaving the terminal or sending code to a remote indexing
|
|
237
|
+
service.
|
|
238
|
+
|
|
239
|
+
## How Is This Different?
|
|
240
|
+
|
|
241
|
+
Short answers to the questions people actually ask. The full, honest matrix —
|
|
242
|
+
including when you should pick the other tool — is in
|
|
243
|
+
[docs/COMPARISON.md](docs/COMPARISON.md).
|
|
244
|
+
|
|
245
|
+
- **Why not just `grep`/`rg`?** Grep returns every match with no ranking, no
|
|
246
|
+
symbol awareness, and no idea which files relate. codebase-index ranks results,
|
|
247
|
+
knows a definition from a call, expands along the dependency graph, and returns
|
|
248
|
+
specific line ranges under a token budget — so the agent reads less and answers
|
|
249
|
+
with citations.
|
|
250
|
+
- **Why not Cursor?** Cursor is a great AI IDE with strong codebase awareness, but
|
|
251
|
+
it is proprietary and IDE-centric. codebase-index is a local, open retrieval
|
|
252
|
+
layer for **terminal and MCP** agents, offline by default, with no IDE lock-in.
|
|
253
|
+
If you live inside Cursor, keep using Cursor.
|
|
254
|
+
- **Why not Aider repo-map?** Aider's repo-map is a good graph-ranked,
|
|
255
|
+
token-budgeted context map — but it is optimized to feed Aider's own chat.
|
|
256
|
+
codebase-index is a **reusable, queryable index**: CLI/JSON/MCP commands return
|
|
257
|
+
ranked `file:line` ranges, symbols, references, and impact that *any*
|
|
258
|
+
shell-capable agent can consume, with freshness and security gates.
|
|
259
|
+
- **Why not Sourcegraph / Cody / Amp?** They are excellent enterprise-grade,
|
|
260
|
+
cross-repo code intelligence platforms. They are also heavier and
|
|
261
|
+
account/platform-oriented. codebase-index is single-repo, local, and
|
|
262
|
+
lightweight — no server, no account, no code leaving the machine by default.
|
|
263
|
+
- **Why not Codebase-Memory MCP?** It is the closest direct alternative — a
|
|
264
|
+
broader graph engine with a static binary and wide language/agent coverage. We
|
|
265
|
+
do **not** claim to beat it globally. We differentiate on simplicity, a strict
|
|
266
|
+
privacy model, token-budgeted retrieval packets, a transparent Python
|
|
267
|
+
implementation, the Claude/Codex/OpenCode workflow, and honest benchmarks. If
|
|
268
|
+
you need its broader graph and language reach today, choose it.
|
|
269
|
+
|
|
270
|
+
**What makes it trustworthy?** No telemetry, no network by default, a multi-gate
|
|
271
|
+
exclusion pipeline (secrets/binaries/generated/dependencies never indexed),
|
|
272
|
+
output-time secret redaction, a `doctor --strict` safety self-check, and a
|
|
273
|
+
public benchmark suite wired as a CI regression gate. Claims that aren't proven
|
|
274
|
+
in this repo are marked as roadmap, not done.
|
|
275
|
+
|
|
276
|
+
### Proven today vs. roadmap
|
|
277
|
+
|
|
278
|
+
| Capability | Status |
|
|
279
|
+
|---|---|
|
|
280
|
+
| Hybrid retrieval (path + symbol + FTS5 + graph), token-budgeted packets | ✅ Shipped |
|
|
281
|
+
| Tree-sitter symbols for 12 Tier-A languages + Tier-B generic path | ✅ Shipped |
|
|
282
|
+
| Import/call/reference/inheritance graph, `refs`/`impact` | ✅ Shipped |
|
|
283
|
+
| Optional local embeddings; external embeddings gated 3 ways | ✅ Shipped |
|
|
284
|
+
| stdio MCP server; CLI/skill/MCP share one service layer | ✅ Shipped |
|
|
285
|
+
| Honest 55k LOC Java benchmark (recall@3 70% vs 40% `rg`, ~13× fewer tokens) | ✅ Shipped |
|
|
286
|
+
| 10k/100k/1M LOC public-repo benchmarks | 🚧 Roadmap |
|
|
287
|
+
| Framework-aware typed edges (route→handler→service→model) | 🚧 Roadmap |
|
|
288
|
+
| PyPI / `uvx` / Homebrew, signed checksums, SBOM | 🚧 Roadmap |
|
|
289
|
+
| Verified per-client MCP docs, paged/progressive results | 🚧 Roadmap |
|
|
290
|
+
|
|
291
|
+
See [docs/PRODUCT_UPGRADE_PLAN.md](docs/PRODUCT_UPGRADE_PLAN.md) for the full
|
|
292
|
+
upgrade plan and ranked roadmap.
|
|
293
|
+
|
|
294
|
+
## How Does codebase-index Work?
|
|
295
|
+
|
|
296
|
+
`codebase-index` builds a local hybrid index that combines:
|
|
297
|
+
|
|
298
|
+
- **Symbol search** — Tree-sitter AST parsing extracts classes, functions, methods, and variables across the supported code-language set.
|
|
299
|
+
- **Full-text search** — SQLite FTS5 for fast lexical search across code chunks.
|
|
300
|
+
- **Path search** — File path matching for location-aware queries.
|
|
301
|
+
- **Optional semantic search** — Vector embeddings for similarity-based retrieval (opt-in, local by default).
|
|
302
|
+
- **Dependency graph** — Import, call, and reference edges for impact analysis and graph expansion.
|
|
303
|
+
- **Token-budgeted output** — Ranked retrieval packets with specific line ranges, not whole files.
|
|
304
|
+
|
|
305
|
+
The AI agent reads only the recommended files and line ranges, not the entire
|
|
306
|
+
repository.
|
|
307
|
+
|
|
308
|
+
## Quick Demo
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
/codebase-index "where is user authentication implemented?"
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Expected output:
|
|
315
|
+
|
|
316
|
+
```
|
|
317
|
+
Top matches:
|
|
318
|
+
┌──────┬──────────────────────────┬──────────────────────────┬───────┬──────────────────────────────┐
|
|
319
|
+
│ Rank │ Path │ Symbols │ Score │ Reason │
|
|
320
|
+
├──────┼──────────────────────────┼──────────────────────────┼───────┼──────────────────────────────┤
|
|
321
|
+
│ 1 │ src/auth/AuthService.ts │ AuthService, login │ 0.92 │ exact symbol match │
|
|
322
|
+
│ 2 │ src/routes/auth.ts │ loginHandler, logout │ 0.78 │ FTS match · 4 callers │
|
|
323
|
+
│ 3 │ src/middleware/auth.ts │ requireAuth │ 0.65 │ path match · FTS match │
|
|
324
|
+
└──────┴──────────────────────────┴──────────────────────────┴───────┴──────────────────────────────┘
|
|
325
|
+
|
|
326
|
+
Recommended reads:
|
|
327
|
+
1. src/auth/AuthService.ts:12-148
|
|
328
|
+
reason: matched AuthService, login(), validatePassword()
|
|
329
|
+
2. src/routes/auth.ts:20-91
|
|
330
|
+
reason: /login route calls AuthService.login()
|
|
331
|
+
3. src/middleware/auth.ts:5-42
|
|
332
|
+
reason: auth middleware validates sessions
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
## Installation Options
|
|
336
|
+
|
|
337
|
+
If you are new to this repo, start with [docs/QUICKSTART.md](docs/QUICKSTART.md).
|
|
338
|
+
If you want all install options and troubleshooting, use [docs/INSTALLATION.md](docs/INSTALLATION.md).
|
|
339
|
+
|
|
340
|
+
**Multi-CLI installer (Claude Code + Codex CLI + OpenCode):** one command via
|
|
341
|
+
`install.sh` / `install.ps1` — see [docs/installer.md](docs/installer.md).
|
|
342
|
+
|
|
343
|
+
```bash
|
|
344
|
+
# macOS / Linux
|
|
345
|
+
curl -fsSL https://raw.githubusercontent.com/denfry/codebase-index/main/install.sh | sh
|
|
346
|
+
```
|
|
347
|
+
```powershell
|
|
348
|
+
# Windows PowerShell
|
|
349
|
+
irm https://raw.githubusercontent.com/denfry/codebase-index/main/install.ps1 | iex
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
### Option 1: Install from a tagged GitHub release
|
|
353
|
+
|
|
354
|
+
```bash
|
|
355
|
+
cd your-project
|
|
356
|
+
pip install "codebase-index @ git+https://github.com/denfry/codebase-index.git@v1.6.0"
|
|
357
|
+
codebase-index init
|
|
358
|
+
codebase-index index
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
### Python version compatibility
|
|
362
|
+
|
|
363
|
+
`codebase-index` requires Python 3.11 or newer.
|
|
364
|
+
|
|
365
|
+
If `codebase-index init --target opencode` fails with:
|
|
366
|
+
|
|
367
|
+
```text
|
|
368
|
+
ModuleNotFoundError: No module named 'importlib.resources.abc'; 'importlib.resources' is not a package
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
the `pipx` environment was likely created with an older Python version. Reinstall `codebase-index` using Python 3.11+ explicitly:
|
|
372
|
+
|
|
373
|
+
```powershell
|
|
374
|
+
pipx uninstall codebase-index
|
|
375
|
+
py -0p
|
|
376
|
+
pipx install --python "<path-to-python-3.11-or-newer>\python.exe" "git+https://github.com/denfry/codebase-index.git@v1.6.0"
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
For example:
|
|
380
|
+
|
|
381
|
+
```powershell
|
|
382
|
+
pipx install --python "C:\Users\you\AppData\Local\Programs\Python\Python312\python.exe" "git+https://github.com/denfry/codebase-index.git@v1.6.0"
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
Then run initialization again:
|
|
386
|
+
|
|
387
|
+
```powershell
|
|
388
|
+
codebase-index init --target opencode
|
|
389
|
+
codebase-index index
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
### Option 2: Install with pipx from GitHub
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
pipx install "git+https://github.com/denfry/codebase-index.git@v1.6.0"
|
|
397
|
+
cd your-project
|
|
398
|
+
codebase-index init --target auto
|
|
399
|
+
codebase-index index
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
### Option 3: Install from source
|
|
403
|
+
|
|
404
|
+
```bash
|
|
405
|
+
git clone https://github.com/denfry/codebase-index.git
|
|
406
|
+
cd codebase-index
|
|
407
|
+
pip install -e ".[dev]"
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
### Distribution roadmap
|
|
411
|
+
|
|
412
|
+
PyPI, `uvx`, Homebrew, signed release checksums, and SBOMs are important for a
|
|
413
|
+
tool that reads entire repositories, but they are not all verified as shipped in
|
|
414
|
+
`1.6.0`. Target install story:
|
|
415
|
+
|
|
416
|
+
```bash
|
|
417
|
+
uvx codebase-index init
|
|
418
|
+
pipx install codebase-index
|
|
419
|
+
brew install denfry/tap/codebase-index
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
### Verify the install
|
|
423
|
+
|
|
424
|
+
```bash
|
|
425
|
+
codebase-index doctor
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
See [docs/INSTALLATION.md](docs/INSTALLATION.md) for the full guide, including optional extras (embeddings, watch mode) and troubleshooting.
|
|
429
|
+
|
|
430
|
+
## Usage
|
|
431
|
+
|
|
432
|
+
```bash
|
|
433
|
+
# Initialize the index for your project
|
|
434
|
+
codebase-index init
|
|
435
|
+
|
|
436
|
+
# Build the index
|
|
437
|
+
codebase-index index
|
|
438
|
+
|
|
439
|
+
# Search for something
|
|
440
|
+
codebase-index search "where is authentication implemented?"
|
|
441
|
+
|
|
442
|
+
# Look up a specific symbol
|
|
443
|
+
codebase-index symbol "AuthService"
|
|
444
|
+
|
|
445
|
+
# Find callers and references
|
|
446
|
+
codebase-index refs "AuthService.login"
|
|
447
|
+
|
|
448
|
+
# Analyze impact of a change
|
|
449
|
+
codebase-index impact "src/auth/AuthService.ts"
|
|
450
|
+
|
|
451
|
+
# Map the codebase: modules, god nodes, surprising links, suggested questions
|
|
452
|
+
codebase-index architecture
|
|
453
|
+
|
|
454
|
+
# How are two symbols/files connected? Shortest dependency/call path
|
|
455
|
+
codebase-index path "renew" "refresh_access_token"
|
|
456
|
+
|
|
457
|
+
# Node card: definition, callers, callees, centrality, module
|
|
458
|
+
codebase-index describe "Database"
|
|
459
|
+
|
|
460
|
+
# Visualize the graph (modules coloured, size = connectivity, edge style = confidence)
|
|
461
|
+
codebase-index graph --open
|
|
462
|
+
# …or export for external tools: graphml (Gephi/yEd), dot (Graphviz), neo4j (Cypher)
|
|
463
|
+
codebase-index graph --format graphml -o graph.graphml
|
|
464
|
+
|
|
465
|
+
# View index statistics
|
|
466
|
+
codebase-index stats
|
|
467
|
+
|
|
468
|
+
# Run diagnostics
|
|
469
|
+
codebase-index doctor
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
Add `--json` to any command for machine-readable output.
|
|
473
|
+
|
|
474
|
+
## How Does Retrieval Flow Through codebase-index?
|
|
475
|
+
|
|
476
|
+
```
|
|
477
|
+
User question
|
|
478
|
+
↓
|
|
479
|
+
CLI instructions or skill
|
|
480
|
+
↓
|
|
481
|
+
Hybrid retrieval
|
|
482
|
+
├─ Path search
|
|
483
|
+
├─ Symbol search (Tree-sitter AST)
|
|
484
|
+
├─ SQLite FTS5 full-text search
|
|
485
|
+
├─ Optional embeddings (vector search)
|
|
486
|
+
└─ Graph expansion (callers, imports, references)
|
|
487
|
+
↓
|
|
488
|
+
Ranked retrieval packet
|
|
489
|
+
↓
|
|
490
|
+
Agent reads only the recommended line ranges
|
|
491
|
+
↓
|
|
492
|
+
Answer with precise file:line citations
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
## Features
|
|
496
|
+
|
|
497
|
+
- [x] **Local-first indexing** — All data stays on your machine
|
|
498
|
+
- [x] **No network by default** — Zero external API calls out of the box
|
|
499
|
+
- [x] **Respects ignore files** — `.gitignore`, `.claudeignore`, `.codeindexignore`
|
|
500
|
+
- [x] **SQLite storage** — Fast, reliable, single-file database
|
|
501
|
+
- [x] **FTS5 lexical search** — Full-text search with code-aware tokenization
|
|
502
|
+
- [x] **Tree-sitter AST parsing** — Tier-A symbol extraction for Python, JavaScript, TypeScript, Java, Go, Rust, C, C++, C#, Ruby, PHP, and Kotlin; Tier-B generic extraction for code languages with a loadable grammar such as Lua
|
|
503
|
+
- [x] **Symbol extraction** — Classes, functions, methods, variables with line ranges
|
|
504
|
+
- [x] **Incremental indexing** — Only changed files are re-indexed
|
|
505
|
+
- [x] **Token-budgeted output** — Configurable max output size
|
|
506
|
+
- [x] **Secret redaction** — Masks keys, tokens, and credentials in snippets
|
|
507
|
+
- [x] **Optional embeddings** — Local or remote vector search (opt-in)
|
|
508
|
+
- [x] **Optional hooks/watch** — Auto-update index after file edits
|
|
509
|
+
- [x] **Multi-CLI setup** — Claude Code, Codex CLI, and OpenCode instructions
|
|
510
|
+
- [x] **MCP server** — stdio MCP tools for search, symbols, refs, impact, explain, health, and stats
|
|
511
|
+
|
|
512
|
+
## Safety and Privacy
|
|
513
|
+
|
|
514
|
+
> **Trust model in 60 seconds**
|
|
515
|
+
> 1. **Offline by default** — the base install has zero network dependencies; nothing leaves your machine.
|
|
516
|
+
> 2. **One opt-in exit, triple-gated** — external embeddings require `allow_external` **and** an env API key **and** a printed endpoint warning, or they are refused.
|
|
517
|
+
> 3. **Secrets never get in** — `.env`, keys, certs, and credential files are excluded before parsing (multi-gate ignore pipeline).
|
|
518
|
+
> 4. **Secrets never get out** — every snippet is redacted (AWS keys, private keys, JWTs, bearer tokens, connection strings) before it reaches the agent.
|
|
519
|
+
> 5. **No telemetry, ever** — no analytics, no phone-home, no usage data.
|
|
520
|
+
> 6. **Verify it yourself** — `codebase-index doctor --strict` audits all of the above and exits non-zero in CI on any high-severity finding.
|
|
521
|
+
|
|
522
|
+
`codebase-index` is designed with privacy as a first principle:
|
|
523
|
+
|
|
524
|
+
- **No telemetry** — No usage data, analytics, or crash reports are collected or transmitted.
|
|
525
|
+
- **No external API calls by default** — All indexing, storage, and search happen locally.
|
|
526
|
+
- **Does not index sensitive files** — `.env`, private keys, certificates, tokens, and credential files are excluded before parsing.
|
|
527
|
+
- **Respects ignore files** — `.gitignore`, `.claudeignore`, `.codeindexignore`, and `.cursorignore` are all honored.
|
|
528
|
+
- **Index stored locally** — SQLite database in `.claude/cache/codebase-index/` (gitignored by default).
|
|
529
|
+
- **Optional embeddings are local by default** — External embedding APIs require explicit opt-in with warnings.
|
|
530
|
+
- **Secret redaction** — Snippets are scrubbed for AWS keys, private keys, JWTs, bearer tokens, and connection strings before output.
|
|
531
|
+
|
|
532
|
+
See [docs/SECURITY_MODEL.md](docs/SECURITY_MODEL.md) for the full security model and threat analysis.
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
## Benchmark Results
|
|
536
|
+
|
|
537
|
+
There are three benchmark surfaces today:
|
|
538
|
+
|
|
539
|
+
1. **Public benchmark suite** in `tests/benchmark_public.py`: reproducible
|
|
540
|
+
multi-language fixture with Recall@1/3/5, MRR, nDCG, answer-correctness proxy,
|
|
541
|
+
token economy, language breakdown, freshness latency, graph tasks, and scale counters.
|
|
542
|
+
2. **Smoke benchmark** on `sample_repo`: validates the CLI is fast and stable on
|
|
543
|
+
a tiny fixture, but it is not evidence of production retrieval quality.
|
|
544
|
+
3. **Honest benchmark** on a real Java repository: `tests/benchmark_honest.py`
|
|
545
|
+
compares codebase-index against a disciplined `rg` + read-window baseline on
|
|
546
|
+
10 realistic questions. Results are documented in
|
|
547
|
+
[tests/benchmark_honest_RESULTS.md](tests/benchmark_honest_RESULTS.md).
|
|
548
|
+
|
|
549
|
+
Run the public suite:
|
|
550
|
+
|
|
551
|
+
```bash
|
|
552
|
+
python tests/benchmark_public.py --workdir .tmp-public-benchmark
|
|
553
|
+
```
|
|
554
|
+
|
|
555
|
+
Current honest benchmark headline:
|
|
556
|
+
|
|
557
|
+
| Metric | Result |
|
|
558
|
+
|---|---|
|
|
559
|
+
| Repo | 303 Java files, ~55k LOC |
|
|
560
|
+
| Retrieval quality | recall@3: 70% index vs 40% `rg` baseline |
|
|
561
|
+
| Token economy | ~13x fewer answer tokens than `rg` + 80-line windows |
|
|
562
|
+
| Verified language impact | Java symbols fixed from 0 to 3,543 symbols |
|
|
563
|
+
|
|
564
|
+
The public suite now has the metric framework. It still needs larger public or
|
|
565
|
+
documented external repos for 10k/100k/1M LOC scale claims and deeper framework
|
|
566
|
+
graph tasks. See [docs/BENCHMARKS.md](docs/BENCHMARKS.md).
|
|
567
|
+
|
|
568
|
+
## Repository Layout
|
|
569
|
+
|
|
570
|
+
```
|
|
571
|
+
├── skill/ # Source instruction package (SKILL.md, scripts, examples)
|
|
572
|
+
├── skills/ # Plugin skill copy
|
|
573
|
+
├── src/codebase_index/ # Python package (CLI, indexer, retrieval, storage)
|
|
574
|
+
├── docs/ # Documentation (architecture, schema, security, FAQ)
|
|
575
|
+
├── examples/ # Sample queries, retrieval output, demo project
|
|
576
|
+
├── tests/ # Test suite with fixture repositories
|
|
577
|
+
├── bin/ # Plugin CLI wrappers (cbx, codebase-index)
|
|
578
|
+
├── scripts/ # Bootstrap scripts (bootstrap.sh, bootstrap.ps1)
|
|
579
|
+
├── hooks/ # Plugin hooks (hooks.json)
|
|
580
|
+
├── .claude-plugin/ # Plugin manifest + marketplace catalog
|
|
581
|
+
├── .github/ # Issue templates, CI workflows, PR template
|
|
582
|
+
├── README.md # This file
|
|
583
|
+
├── LICENSE # MIT License
|
|
584
|
+
├── CHANGELOG.md # Release history
|
|
585
|
+
├── CONTRIBUTING.md # Contributor guide
|
|
586
|
+
├── SECURITY.md # Security policy
|
|
587
|
+
├── ROADMAP.md # Development milestones
|
|
588
|
+
├── requirements.lock # Pinned install spec for bootstrap
|
|
589
|
+
└── pyproject.toml # Package configuration
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
## Configuration
|
|
593
|
+
|
|
594
|
+
Create `.codeindex.json` in your project root:
|
|
595
|
+
|
|
596
|
+
```json
|
|
597
|
+
{
|
|
598
|
+
"index": {
|
|
599
|
+
"max_file_bytes": 1048576,
|
|
600
|
+
"chunk_size": 500,
|
|
601
|
+
"chunk_overlap": 50
|
|
602
|
+
},
|
|
603
|
+
"embeddings": {
|
|
604
|
+
"backend": "noop",
|
|
605
|
+
"allow_external": false
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
### Ignore Files
|
|
611
|
+
|
|
612
|
+
- `.codeindexignore` — Tool-specific ignore patterns (highest priority)
|
|
613
|
+
- `.gitignore` — Standard git ignore patterns
|
|
614
|
+
- `.claudeignore` — Claude-specific ignore patterns
|
|
615
|
+
|
|
616
|
+
### Cache Location
|
|
617
|
+
|
|
618
|
+
```
|
|
619
|
+
.claude/cache/codebase-index/
|
|
620
|
+
├── index.sqlite # SQLite database with FTS5
|
|
621
|
+
└── config.json # Resolved configuration
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
## Which AI CLIs Does codebase-index Support?
|
|
625
|
+
|
|
626
|
+
`codebase-index init` can install instructions for three AI coding CLIs:
|
|
627
|
+
|
|
628
|
+
| CLI | Files written by `init` | Best command |
|
|
629
|
+
|---|---|---|
|
|
630
|
+
| Claude Code | `.claude/skills/codebase-index/` | `codebase-index init --target claude` |
|
|
631
|
+
| Codex CLI | `AGENTS.md` + `.codex/skills/codebase-index/` | `codebase-index init --target codex` |
|
|
632
|
+
| OpenCode | `.opencode/commands/` + `.opencode/agents/` + resources | `codebase-index init --target opencode` |
|
|
633
|
+
|
|
634
|
+
Use `codebase-index init --target auto` to install into detected CLIs, or
|
|
635
|
+
`codebase-index init --target all` to write every supported integration.
|
|
636
|
+
|
|
637
|
+
### Claude Code Integration
|
|
638
|
+
|
|
639
|
+
The Claude Code skill is defined in [`skill/SKILL.md`](skill/SKILL.md) with
|
|
640
|
+
YAML frontmatter for automatic selection.
|
|
641
|
+
|
|
642
|
+
Example `.claude/CLAUDE.md`:
|
|
643
|
+
|
|
644
|
+
```markdown
|
|
645
|
+
## Codebase Questions
|
|
646
|
+
|
|
647
|
+
Before answering any question about this project's code:
|
|
648
|
+
1. Use the codebase-index skill to search the local index first.
|
|
649
|
+
2. Read only the recommended line ranges — do not scan entire files.
|
|
650
|
+
3. Answer with file:line citations.
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
### Optional Hooks
|
|
654
|
+
|
|
655
|
+
Configure automatic index updates in `.codeindex.json`:
|
|
656
|
+
|
|
657
|
+
```json
|
|
658
|
+
{
|
|
659
|
+
"hooks": {
|
|
660
|
+
"post_tool_use": {
|
|
661
|
+
"enabled": true,
|
|
662
|
+
"events": ["Write", "Edit"],
|
|
663
|
+
"command": "codebase-index update --quiet"
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
See [skill/examples/](skill/examples/) for full examples.
|
|
670
|
+
|
|
671
|
+
## FAQ
|
|
672
|
+
|
|
673
|
+
### Is this a Cursor replacement?
|
|
674
|
+
|
|
675
|
+
No. `codebase-index` is not a replacement for Cursor or any IDE. It is a
|
|
676
|
+
local retrieval layer for terminal AI coding agents. You still use Claude Code,
|
|
677
|
+
Codex CLI, OpenCode, or another agent as your primary interface.
|
|
678
|
+
|
|
679
|
+
### Does it send my code anywhere?
|
|
680
|
+
|
|
681
|
+
No. By default, `codebase-index` is completely local-first and offline. All indexing, storage, and search happen on your machine. External embeddings are opt-in only and require explicit configuration.
|
|
682
|
+
|
|
683
|
+
### Does it work without embeddings?
|
|
684
|
+
|
|
685
|
+
Yes. The default configuration disables embeddings entirely (`backend = "noop"`). Search uses SQLite FTS5, Tree-sitter symbol extraction, path matching, and graph expansion. Embeddings are an optional enhancement.
|
|
686
|
+
|
|
687
|
+
### Does it support large repositories?
|
|
688
|
+
|
|
689
|
+
Yes. The index is incremental — only changed files are re-indexed. SQLite with FTS5 handles large datasets efficiently. Generated files, dependencies, and binaries are excluded automatically.
|
|
690
|
+
|
|
691
|
+
### Why not just use Grep?
|
|
692
|
+
|
|
693
|
+
Grep returns all matches with no ranking, no symbol awareness, and no context about related files. `codebase-index` combines lexical search with symbol extraction and graph expansion to return **ranked, contextual results** with specific line ranges to read.
|
|
694
|
+
|
|
695
|
+
### Does it support MCP?
|
|
696
|
+
|
|
697
|
+
Yes. Run `codebase-index mcp --root <repo>` to expose the local index over stdio
|
|
698
|
+
MCP. See [docs/MCP.md](docs/MCP.md) for tools and client config templates.
|
|
699
|
+
|
|
700
|
+
### Can I use it with other agents?
|
|
701
|
+
|
|
702
|
+
Yes. The CLI is agent-agnostic. Any agent that can run shell commands can use
|
|
703
|
+
`codebase-index`, and JSON output (`--json`) is parseable by other tools.
|
|
704
|
+
|
|
705
|
+
### How do I reset the index?
|
|
706
|
+
|
|
707
|
+
```bash
|
|
708
|
+
codebase-index clean # reset the index DB (keeps the skill)
|
|
709
|
+
codebase-index clean --all # wipe the whole .claude/cache/codebase-index/ dir
|
|
710
|
+
# Or manually: rm -rf .claude/cache/codebase-index/
|
|
711
|
+
codebase-index index
|
|
712
|
+
```
|
|
713
|
+
|
|
714
|
+
## Contributing
|
|
715
|
+
|
|
716
|
+
We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for the full guide.
|
|
717
|
+
|
|
718
|
+
Quick start:
|
|
719
|
+
|
|
720
|
+
```bash
|
|
721
|
+
git clone https://github.com/denfry/codebase-index.git
|
|
722
|
+
cd codebase-index
|
|
723
|
+
pip install -e ".[dev]"
|
|
724
|
+
pytest
|
|
725
|
+
ruff check src/ tests/
|
|
726
|
+
```
|
|
727
|
+
|
|
728
|
+
## Roadmap
|
|
729
|
+
|
|
730
|
+
See [ROADMAP.md](ROADMAP.md) for the full milestone plan.
|
|
731
|
+
|
|
732
|
+
| Milestone | Status | Description |
|
|
733
|
+
|---|---|---|
|
|
734
|
+
| M0 | ✅ Done | Repository packaging |
|
|
735
|
+
| M1 | ✅ Done | SQLite + FTS5 index |
|
|
736
|
+
| M2 | ✅ Done | Tree-sitter symbol extraction |
|
|
737
|
+
| M3 | ✅ Done | Hybrid retrieval |
|
|
738
|
+
| M4 | ✅ Done | Graph expansion |
|
|
739
|
+
| M5 | ✅ Done | Token-budgeted retrieval packets |
|
|
740
|
+
| M6 | ✅ Done | Optional local embeddings |
|
|
741
|
+
| M7 | ✅ Done | Claude Code Skill packaging |
|
|
742
|
+
| M7.5 | ✅ Done | One-command plugin install |
|
|
743
|
+
| M8 | ✅ Done | Hooks + watch mode |
|
|
744
|
+
| M9 | ✅ Done | Public release |
|
|
745
|
+
|
|
746
|
+
## License
|
|
747
|
+
|
|
748
|
+
[MIT](LICENSE)
|