code-context-mcp 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_context_mcp-1.0.0/LICENSE +21 -0
- code_context_mcp-1.0.0/PKG-INFO +181 -0
- code_context_mcp-1.0.0/README.md +134 -0
- code_context_mcp-1.0.0/pyproject.toml +69 -0
- code_context_mcp-1.0.0/setup.cfg +4 -0
- code_context_mcp-1.0.0/src/code_context/__init__.py +3 -0
- code_context_mcp-1.0.0/src/code_context/_background.py +93 -0
- code_context_mcp-1.0.0/src/code_context/_composition.py +425 -0
- code_context_mcp-1.0.0/src/code_context/_watcher.py +89 -0
- code_context_mcp-1.0.0/src/code_context/adapters/__init__.py +0 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/__init__.py +0 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/chunker_dispatcher.py +43 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/chunker_line.py +54 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/chunker_treesitter.py +215 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/chunker_treesitter_queries.py +111 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/code_source_fs.py +122 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/embeddings_local.py +111 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/embeddings_openai.py +58 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/git_source_cli.py +211 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/introspector_fs.py +224 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/keyword_index_sqlite.py +206 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/reranker_crossencoder.py +61 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/symbol_index_sqlite.py +264 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driven/vector_store_numpy.py +119 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driving/__init__.py +0 -0
- code_context_mcp-1.0.0/src/code_context/adapters/driving/mcp_server.py +365 -0
- code_context_mcp-1.0.0/src/code_context/cli.py +161 -0
- code_context_mcp-1.0.0/src/code_context/config.py +114 -0
- code_context_mcp-1.0.0/src/code_context/domain/__init__.py +0 -0
- code_context_mcp-1.0.0/src/code_context/domain/index_bus.py +52 -0
- code_context_mcp-1.0.0/src/code_context/domain/models.py +140 -0
- code_context_mcp-1.0.0/src/code_context/domain/ports.py +205 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/__init__.py +0 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/explain_diff.py +98 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/find_definition.py +30 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/find_references.py +22 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/get_file_tree.py +36 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/get_summary.py +24 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/indexer.py +336 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/recent_changes.py +36 -0
- code_context_mcp-1.0.0/src/code_context/domain/use_cases/search_repo.py +131 -0
- code_context_mcp-1.0.0/src/code_context/server.py +151 -0
- code_context_mcp-1.0.0/src/code_context_mcp.egg-info/PKG-INFO +181 -0
- code_context_mcp-1.0.0/src/code_context_mcp.egg-info/SOURCES.txt +46 -0
- code_context_mcp-1.0.0/src/code_context_mcp.egg-info/dependency_links.txt +1 -0
- code_context_mcp-1.0.0/src/code_context_mcp.egg-info/entry_points.txt +3 -0
- code_context_mcp-1.0.0/src/code_context_mcp.egg-info/requires.txt +21 -0
- code_context_mcp-1.0.0/src/code_context_mcp.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 code-context contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-context-mcp
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: MCP server with local RAG for Claude Code repo context
|
|
5
|
+
Author: code-context contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/nachogeinfor-ops/code-context
|
|
8
|
+
Project-URL: Documentation, https://github.com/nachogeinfor-ops/code-context#readme
|
|
9
|
+
Project-URL: Issues, https://github.com/nachogeinfor-ops/code-context/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/nachogeinfor-ops/code-context/blob/main/CHANGELOG.md
|
|
11
|
+
Project-URL: Tool Protocol, https://github.com/nachogeinfor-ops/context-template/blob/main/docs/tool-protocol.md
|
|
12
|
+
Keywords: claude-code,mcp,rag,developer-tools
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Software Development
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
25
|
+
Requires-Python: >=3.11
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: mcp>=1.0
|
|
29
|
+
Requires-Dist: numpy>=1.26
|
|
30
|
+
Requires-Dist: pyarrow>=14
|
|
31
|
+
Requires-Dist: platformdirs>=4
|
|
32
|
+
Requires-Dist: pathspec>=0.12
|
|
33
|
+
Requires-Dist: filelock>=3.13
|
|
34
|
+
Requires-Dist: sentence-transformers>=2.7
|
|
35
|
+
Requires-Dist: tree-sitter>=0.22
|
|
36
|
+
Requires-Dist: tree-sitter-language-pack>=0.7
|
|
37
|
+
Provides-Extra: openai
|
|
38
|
+
Requires-Dist: openai>=1.30; extra == "openai"
|
|
39
|
+
Provides-Extra: watch
|
|
40
|
+
Requires-Dist: watchdog>=4; extra == "watch"
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
44
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
45
|
+
Requires-Dist: watchdog>=4; extra == "dev"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
# code-context
|
|
49
|
+
|
|
50
|
+
[](https://pypi.org/project/code-context-mcp/)
|
|
51
|
+
[](https://github.com/nachogeinfor-ops/code-context/actions/workflows/ci.yml)
|
|
52
|
+
[](https://pypi.org/project/code-context-mcp/)
|
|
53
|
+
[](LICENSE)
|
|
54
|
+
|
|
55
|
+
> **Status: stable (v1.0.0).** A Python MCP server with local RAG
|
|
56
|
+
> for [Claude Code](https://docs.claude.com/claude-code).
|
|
57
|
+
> Implements the [`code-context` Tool Protocol](https://github.com/nachogeinfor-ops/context-template/blob/main/docs/tool-protocol.md)
|
|
58
|
+
> v1.2 defined by [`context-template`](https://github.com/nachogeinfor-ops/context-template).
|
|
59
|
+
|
|
60
|
+
## What it does
|
|
61
|
+
|
|
62
|
+
When you point Claude Code at a repo, you give it `CLAUDE.md` for static context. `code-context` adds **dynamic context** via 7 MCP tools:
|
|
63
|
+
|
|
64
|
+
- **`search_repo(query, top_k?, scope?)`** — **hybrid retrieval** across the codebase: vector embeddings (semantic) fused with BM25 keyword search (exact identifiers) via Reciprocal Rank Fusion. Optional cross-encoder reranking (off by default — enable with `CC_RERANK=on`).
|
|
65
|
+
- **`recent_changes(since?, paths?, max?)`** — recent git commits, optionally filtered.
|
|
66
|
+
- **`get_summary(scope?, path?)`** — structured project summary (name, stack, key modules, stats).
|
|
67
|
+
- **`find_definition(name, language?, max?)`** — locate where a symbol (function, class, method, type) is defined. Use INSTEAD of `Grep` for `def X` / `class X` / `function X` patterns. Returns repo-relative paths with line ranges and the symbol's kind (function, class, method, interface, struct, enum, record).
|
|
68
|
+
- **`find_references(name, max?)`** — list every line mentioning a named symbol. Use INSTEAD of `grep -n "X"` when the user asks "who calls X?" or "where is X used?". Word-boundary matched, so `log` doesn't return `logger`.
|
|
69
|
+
- **`get_file_tree(path?, max_depth?, include_hidden?)`** — repo-relative directory tree, gitignore-aware. Use INSTEAD of `Bash: ls -R` or `Bash: tree` for orientation prompts ("show me the project structure", "what's in this module?"). Returns hierarchical FileTreeNode with file sizes; honors `.gitignore`; defaults to depth 4.
|
|
70
|
+
- **`explain_diff(ref, max_chunks?)`** — AST-aligned chunks affected by the diff at `ref` (full SHA, `HEAD`, `HEAD~N`, branch). Use INSTEAD of `Bash: git show <sha>` for "what does this commit do" questions. The chunker resolves which whole functions/classes were touched, not raw line additions.
|
|
71
|
+
|
|
72
|
+
Architecture: hexagonal (ports & adapters). 9 driven ports with default implementations (sentence-transformers embeddings, NumPy+Parquet vector store, tree-sitter / line chunker, filesystem code source, git CLI, filesystem introspector, SQLite FTS5 keyword index, cross-encoder reranker, SQLite-backed symbol index). All swappable.
|
|
73
|
+
|
|
74
|
+
## Install
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install code-context-mcp
|
|
78
|
+
# or, if you don't want torch (~2 GB), use the OpenAI embeddings backend:
|
|
79
|
+
pip install code-context-mcp[openai]
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
> The PyPI distribution is **`code-context-mcp`** (the unhyphenated `code-context` name was squatted by an unrelated, abandoned project from 2023; see CHANGELOG for context). The Python module is still `code_context` and the CLI binaries are still `code-context` and `code-context-server`, so quickstart commands and `from code_context import ...` are unchanged.
|
|
83
|
+
|
|
84
|
+
> Note: the default install pulls `sentence-transformers` + the `all-MiniLM-L6-v2` model on first run. Plan for ~2 GB of disk after first reindex (torch ≈ 2 GB, model ≈ 90 MB). Use the `[openai]` extra to avoid torch entirely.
|
|
85
|
+
|
|
86
|
+
## Quickstart
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
cd /path/to/your/repo
|
|
90
|
+
claude mcp add code-context --command code-context-server
|
|
91
|
+
# Open Claude Code. From v0.9.0 the server starts in <1 s on a previously-indexed
|
|
92
|
+
# repo; the first reindex (and any subsequent ones) run on a background thread,
|
|
93
|
+
# so queries are never blocked. Cold start: queries return [] until the first
|
|
94
|
+
# bg reindex completes (~30-60 s on a typical repo with all-MiniLM on CPU).
|
|
95
|
+
# Edit-cycle reindex is sub-10 s thanks to v0.8.0's dirty_set tracking.
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Live mode (optional)
|
|
99
|
+
|
|
100
|
+
If you want every save in the repo to flow into the index without
|
|
101
|
+
manual `code-context reindex`:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install code-context-mcp[watch] # adds watchdog
|
|
105
|
+
export CC_WATCH=on
|
|
106
|
+
claude mcp add code-context --command code-context-server
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Edits are debounced for ~1 s (configurable via
|
|
110
|
+
`CC_WATCH_DEBOUNCE_MS`) and then trigger a background reindex.
|
|
111
|
+
Default off — opt-in.
|
|
112
|
+
|
|
113
|
+
For OpenAI embeddings:
|
|
114
|
+
```bash
|
|
115
|
+
export CC_EMBEDDINGS=openai
|
|
116
|
+
export OPENAI_API_KEY=sk-...
|
|
117
|
+
claude mcp add code-context --command code-context-server
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Making Claude actually use these tools
|
|
121
|
+
|
|
122
|
+
Claude Code defaults to its built-in tools (`Bash`, `Grep`, `Glob`, `Read`) over MCP servers because it knows them best. To get the value of `code-context`, give Claude an explicit hint by adding a section like this to your project's `CLAUDE.md`:
|
|
123
|
+
|
|
124
|
+
```markdown
|
|
125
|
+
## Context tools
|
|
126
|
+
|
|
127
|
+
This repo has the [code-context](https://github.com/nachogeinfor-ops/code-context) MCP server installed. Prefer it over built-in tools:
|
|
128
|
+
|
|
129
|
+
- **`search_repo(query, top_k?, scope?)`** — for conceptual questions like "where do we handle authentication" or "how is caching implemented". Use this instead of `Grep` whenever the query isn't an exact string match.
|
|
130
|
+
- **`recent_changes(since?, paths?, max?)`** — for "what changed recently" / commit-history questions. Use this instead of shelling out to `git log`.
|
|
131
|
+
- **`get_summary(scope?, path?)`** — for project orientation at session start, or to inspect a specific module.
|
|
132
|
+
- **`find_definition(name, language?, max?)`** — for "where is X defined?". Use this instead of `Grep` for `def X` / `class X` patterns; tree-sitter-indexed at reindex time, so it's faster and more accurate than scanning text.
|
|
133
|
+
- **`find_references(name, max?)`** — for "who calls X?" / "where is X used?". Use this instead of `grep -n`; word-boundary matched so `log` won't match `logger`.
|
|
134
|
+
- **`get_file_tree(path?, max_depth?, include_hidden?)`** — for "show me the project structure" / "what's in this module?". Use this instead of `Bash: ls -R` / `Bash: tree`; gitignore-aware and structured (file sizes included).
|
|
135
|
+
- **`explain_diff(ref, max_chunks?)`** — for "what does this commit do?" / "what changed in HEAD~3?". Use this instead of `Bash: git show <sha>`; the chunker resolves whole functions/classes that were touched, not raw line additions.
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Without this hint, Claude will work fine — it just won't reach for the MCP tools, which means the index goes unused. The hint is one paragraph; copy-paste it.
|
|
139
|
+
|
|
140
|
+
## CLI
|
|
141
|
+
|
|
142
|
+
`code-context-server` is the MCP binary; you don't run it directly. The companion `code-context` CLI helps administer the index:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
code-context status # print index health + dirty/deleted counts
|
|
146
|
+
code-context reindex # incremental by default (only changed files)
|
|
147
|
+
code-context reindex --force # full reindex (post-model-upgrade or cache reset)
|
|
148
|
+
code-context query "where do we validate user emails" # debug, no MCP
|
|
149
|
+
code-context clear --yes # delete the cache for this repo
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Configuration
|
|
153
|
+
|
|
154
|
+
Configured via env vars. See [`docs/configuration.md`](docs/configuration.md) for the full list. Most-used:
|
|
155
|
+
|
|
156
|
+
| Var | Default |
|
|
157
|
+
|---|---|
|
|
158
|
+
| `CC_EMBEDDINGS` | `local` (or `openai`) |
|
|
159
|
+
| `CC_EMBEDDINGS_MODEL` | `all-MiniLM-L6-v2` |
|
|
160
|
+
| `CC_INCLUDE_EXTENSIONS` | `.py,.js,.ts,.jsx,.tsx,.go,.rs,.java,.c,.cpp,.h,.hpp,.md,.yaml,.yml,.json` |
|
|
161
|
+
| `CC_CHUNKER` | `treesitter` (AST-aware for Py/JS/TS/Go/Rust/C#, line fallback) — set `line` for v0.1.x behavior |
|
|
162
|
+
| `CC_CACHE_DIR` | platformdirs user cache |
|
|
163
|
+
|
|
164
|
+
## Documentation
|
|
165
|
+
|
|
166
|
+
- **[Public API (v1)](docs/v1-api.md)** — what's stable; what's not. Read this before depending on `code-context` from another project.
|
|
167
|
+
- **[Configuration](docs/configuration.md)** — every env var with examples (chunker strategies, hybrid search, symbol index, background reindex, watch mode, …).
|
|
168
|
+
- **[Architecture](docs/architecture.md)** — hexagonal diagram, port contracts, indexing lifecycle, Sprint 7 background-thread + bus.
|
|
169
|
+
- **[Eval suite](benchmarks/eval/README.md)** — NDCG@10 / MRR / latency baselines per retrieval mode.
|
|
170
|
+
- **[Releasing](docs/release.md)** — Trusted Publisher setup, per-release checklist.
|
|
171
|
+
- **[Extending](docs/extending.md)** — write your own embeddings provider, vector store, or chunker.
|
|
172
|
+
|
|
173
|
+
## Status
|
|
174
|
+
|
|
175
|
+
**v1.0.0 — stable.** Public surface frozen; v1.x will only add. See
|
|
176
|
+
[`docs/v1-api.md`](docs/v1-api.md) for the commitment scope and
|
|
177
|
+
[`CHANGELOG.md`](CHANGELOG.md) for what shipped in each version.
|
|
178
|
+
|
|
179
|
+
## License
|
|
180
|
+
|
|
181
|
+
[MIT](LICENSE).
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# code-context
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/code-context-mcp/)
|
|
4
|
+
[](https://github.com/nachogeinfor-ops/code-context/actions/workflows/ci.yml)
|
|
5
|
+
[](https://pypi.org/project/code-context-mcp/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
> **Status: stable (v1.0.0).** A Python MCP server with local RAG
|
|
9
|
+
> for [Claude Code](https://docs.claude.com/claude-code).
|
|
10
|
+
> Implements the [`code-context` Tool Protocol](https://github.com/nachogeinfor-ops/context-template/blob/main/docs/tool-protocol.md)
|
|
11
|
+
> v1.2 defined by [`context-template`](https://github.com/nachogeinfor-ops/context-template).
|
|
12
|
+
|
|
13
|
+
## What it does
|
|
14
|
+
|
|
15
|
+
When you point Claude Code at a repo, you give it `CLAUDE.md` for static context. `code-context` adds **dynamic context** via 7 MCP tools:
|
|
16
|
+
|
|
17
|
+
- **`search_repo(query, top_k?, scope?)`** — **hybrid retrieval** across the codebase: vector embeddings (semantic) fused with BM25 keyword search (exact identifiers) via Reciprocal Rank Fusion. Optional cross-encoder reranking (off by default — enable with `CC_RERANK=on`).
|
|
18
|
+
- **`recent_changes(since?, paths?, max?)`** — recent git commits, optionally filtered.
|
|
19
|
+
- **`get_summary(scope?, path?)`** — structured project summary (name, stack, key modules, stats).
|
|
20
|
+
- **`find_definition(name, language?, max?)`** — locate where a symbol (function, class, method, type) is defined. Use INSTEAD of `Grep` for `def X` / `class X` / `function X` patterns. Returns repo-relative paths with line ranges and the symbol's kind (function, class, method, interface, struct, enum, record).
|
|
21
|
+
- **`find_references(name, max?)`** — list every line mentioning a named symbol. Use INSTEAD of `grep -n "X"` when the user asks "who calls X?" or "where is X used?". Word-boundary matched, so `log` doesn't return `logger`.
|
|
22
|
+
- **`get_file_tree(path?, max_depth?, include_hidden?)`** — repo-relative directory tree, gitignore-aware. Use INSTEAD of `Bash: ls -R` or `Bash: tree` for orientation prompts ("show me the project structure", "what's in this module?"). Returns hierarchical FileTreeNode with file sizes; honors `.gitignore`; defaults to depth 4.
|
|
23
|
+
- **`explain_diff(ref, max_chunks?)`** — AST-aligned chunks affected by the diff at `ref` (full SHA, `HEAD`, `HEAD~N`, branch). Use INSTEAD of `Bash: git show <sha>` for "what does this commit do" questions. The chunker resolves which whole functions/classes were touched, not raw line additions.
|
|
24
|
+
|
|
25
|
+
Architecture: hexagonal (ports & adapters). 9 driven ports with default implementations (sentence-transformers embeddings, NumPy+Parquet vector store, tree-sitter / line chunker, filesystem code source, git CLI, filesystem introspector, SQLite FTS5 keyword index, cross-encoder reranker, SQLite-backed symbol index). All swappable.
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install code-context-mcp
|
|
31
|
+
# or, if you don't want torch (~2 GB), use the OpenAI embeddings backend:
|
|
32
|
+
pip install code-context-mcp[openai]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
> The PyPI distribution is **`code-context-mcp`** (the unhyphenated `code-context` name was squatted by an unrelated, abandoned project from 2023; see CHANGELOG for context). The Python module is still `code_context` and the CLI binaries are still `code-context` and `code-context-server`, so quickstart commands and `from code_context import ...` are unchanged.
|
|
36
|
+
|
|
37
|
+
> Note: the default install pulls `sentence-transformers` + the `all-MiniLM-L6-v2` model on first run. Plan for ~2 GB of disk after first reindex (torch ≈ 2 GB, model ≈ 90 MB). Use the `[openai]` extra to avoid torch entirely.
|
|
38
|
+
|
|
39
|
+
## Quickstart
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
cd /path/to/your/repo
|
|
43
|
+
claude mcp add code-context --command code-context-server
|
|
44
|
+
# Open Claude Code. From v0.9.0 the server starts in <1 s on a previously-indexed
|
|
45
|
+
# repo; the first reindex (and any subsequent ones) run on a background thread,
|
|
46
|
+
# so queries are never blocked. Cold start: queries return [] until the first
|
|
47
|
+
# bg reindex completes (~30-60 s on a typical repo with all-MiniLM on CPU).
|
|
48
|
+
# Edit-cycle reindex is sub-10 s thanks to v0.8.0's dirty_set tracking.
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Live mode (optional)
|
|
52
|
+
|
|
53
|
+
If you want every save in the repo to flow into the index without
|
|
54
|
+
manual `code-context reindex`:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install code-context-mcp[watch] # adds watchdog
|
|
58
|
+
export CC_WATCH=on
|
|
59
|
+
claude mcp add code-context --command code-context-server
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Edits are debounced for ~1 s (configurable via
|
|
63
|
+
`CC_WATCH_DEBOUNCE_MS`) and then trigger a background reindex.
|
|
64
|
+
Default off — opt-in.
|
|
65
|
+
|
|
66
|
+
For OpenAI embeddings:
|
|
67
|
+
```bash
|
|
68
|
+
export CC_EMBEDDINGS=openai
|
|
69
|
+
export OPENAI_API_KEY=sk-...
|
|
70
|
+
claude mcp add code-context --command code-context-server
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Making Claude actually use these tools
|
|
74
|
+
|
|
75
|
+
Claude Code defaults to its built-in tools (`Bash`, `Grep`, `Glob`, `Read`) over MCP servers because it knows them best. To get the value of `code-context`, give Claude an explicit hint by adding a section like this to your project's `CLAUDE.md`:
|
|
76
|
+
|
|
77
|
+
```markdown
|
|
78
|
+
## Context tools
|
|
79
|
+
|
|
80
|
+
This repo has the [code-context](https://github.com/nachogeinfor-ops/code-context) MCP server installed. Prefer it over built-in tools:
|
|
81
|
+
|
|
82
|
+
- **`search_repo(query, top_k?, scope?)`** — for conceptual questions like "where do we handle authentication" or "how is caching implemented". Use this instead of `Grep` whenever the query isn't an exact string match.
|
|
83
|
+
- **`recent_changes(since?, paths?, max?)`** — for "what changed recently" / commit-history questions. Use this instead of shelling out to `git log`.
|
|
84
|
+
- **`get_summary(scope?, path?)`** — for project orientation at session start, or to inspect a specific module.
|
|
85
|
+
- **`find_definition(name, language?, max?)`** — for "where is X defined?". Use this instead of `Grep` for `def X` / `class X` patterns; tree-sitter-indexed at reindex time, so it's faster and more accurate than scanning text.
|
|
86
|
+
- **`find_references(name, max?)`** — for "who calls X?" / "where is X used?". Use this instead of `grep -n`; word-boundary matched so `log` won't match `logger`.
|
|
87
|
+
- **`get_file_tree(path?, max_depth?, include_hidden?)`** — for "show me the project structure" / "what's in this module?". Use this instead of `Bash: ls -R` / `Bash: tree`; gitignore-aware and structured (file sizes included).
|
|
88
|
+
- **`explain_diff(ref, max_chunks?)`** — for "what does this commit do?" / "what changed in HEAD~3?". Use this instead of `Bash: git show <sha>`; the chunker resolves whole functions/classes that were touched, not raw line additions.
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Without this hint, Claude will work fine — it just won't reach for the MCP tools, which means the index goes unused. The hint is one paragraph; copy-paste it.
|
|
92
|
+
|
|
93
|
+
## CLI
|
|
94
|
+
|
|
95
|
+
`code-context-server` is the MCP binary; you don't run it directly. The companion `code-context` CLI helps administer the index:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
code-context status # print index health + dirty/deleted counts
|
|
99
|
+
code-context reindex # incremental by default (only changed files)
|
|
100
|
+
code-context reindex --force # full reindex (post-model-upgrade or cache reset)
|
|
101
|
+
code-context query "where do we validate user emails" # debug, no MCP
|
|
102
|
+
code-context clear --yes # delete the cache for this repo
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Configuration
|
|
106
|
+
|
|
107
|
+
Configured via env vars. See [`docs/configuration.md`](docs/configuration.md) for the full list. Most-used:
|
|
108
|
+
|
|
109
|
+
| Var | Default |
|
|
110
|
+
|---|---|
|
|
111
|
+
| `CC_EMBEDDINGS` | `local` (or `openai`) |
|
|
112
|
+
| `CC_EMBEDDINGS_MODEL` | `all-MiniLM-L6-v2` |
|
|
113
|
+
| `CC_INCLUDE_EXTENSIONS` | `.py,.js,.ts,.jsx,.tsx,.go,.rs,.java,.c,.cpp,.h,.hpp,.md,.yaml,.yml,.json` |
|
|
114
|
+
| `CC_CHUNKER` | `treesitter` (AST-aware for Py/JS/TS/Go/Rust/C#, line fallback) — set `line` for v0.1.x behavior |
|
|
115
|
+
| `CC_CACHE_DIR` | platformdirs user cache |
|
|
116
|
+
|
|
117
|
+
## Documentation
|
|
118
|
+
|
|
119
|
+
- **[Public API (v1)](docs/v1-api.md)** — what's stable; what's not. Read this before depending on `code-context` from another project.
|
|
120
|
+
- **[Configuration](docs/configuration.md)** — every env var with examples (chunker strategies, hybrid search, symbol index, background reindex, watch mode, …).
|
|
121
|
+
- **[Architecture](docs/architecture.md)** — hexagonal diagram, port contracts, indexing lifecycle, Sprint 7 background-thread + bus.
|
|
122
|
+
- **[Eval suite](benchmarks/eval/README.md)** — NDCG@10 / MRR / latency baselines per retrieval mode.
|
|
123
|
+
- **[Releasing](docs/release.md)** — Trusted Publisher setup, per-release checklist.
|
|
124
|
+
- **[Extending](docs/extending.md)** — write your own embeddings provider, vector store, or chunker.
|
|
125
|
+
|
|
126
|
+
## Status
|
|
127
|
+
|
|
128
|
+
**v1.0.0 — stable.** Public surface frozen; v1.x will only add. See
|
|
129
|
+
[`docs/v1-api.md`](docs/v1-api.md) for the commitment scope and
|
|
130
|
+
[`CHANGELOG.md`](CHANGELOG.md) for what shipped in each version.
|
|
131
|
+
|
|
132
|
+
## License
|
|
133
|
+
|
|
134
|
+
[MIT](LICENSE).
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "code-context-mcp"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "MCP server with local RAG for Claude Code repo context"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [{ name = "code-context contributors" }]
|
|
13
|
+
keywords = ["claude-code", "mcp", "rag", "developer-tools"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 5 - Production/Stable",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: POSIX :: Linux",
|
|
19
|
+
"Operating System :: MacOS",
|
|
20
|
+
"Operating System :: Microsoft :: Windows",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Topic :: Software Development",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
"Topic :: Text Processing :: Indexing",
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"mcp>=1.0",
|
|
30
|
+
"numpy>=1.26",
|
|
31
|
+
"pyarrow>=14",
|
|
32
|
+
"platformdirs>=4",
|
|
33
|
+
"pathspec>=0.12",
|
|
34
|
+
"filelock>=3.13",
|
|
35
|
+
"sentence-transformers>=2.7",
|
|
36
|
+
"tree-sitter>=0.22",
|
|
37
|
+
"tree-sitter-language-pack>=0.7",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
openai = ["openai>=1.30"]
|
|
42
|
+
watch = ["watchdog>=4"]
|
|
43
|
+
dev = [
|
|
44
|
+
"pytest>=7.4",
|
|
45
|
+
"pytest-asyncio>=0.23",
|
|
46
|
+
"ruff>=0.5",
|
|
47
|
+
"watchdog>=4",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
[project.scripts]
|
|
51
|
+
code-context = "code_context.cli:main"
|
|
52
|
+
code-context-server = "code_context.server:main"
|
|
53
|
+
|
|
54
|
+
[project.urls]
|
|
55
|
+
Homepage = "https://github.com/nachogeinfor-ops/code-context"
|
|
56
|
+
Documentation = "https://github.com/nachogeinfor-ops/code-context#readme"
|
|
57
|
+
Issues = "https://github.com/nachogeinfor-ops/code-context/issues"
|
|
58
|
+
Changelog = "https://github.com/nachogeinfor-ops/code-context/blob/main/CHANGELOG.md"
|
|
59
|
+
"Tool Protocol" = "https://github.com/nachogeinfor-ops/context-template/blob/main/docs/tool-protocol.md"
|
|
60
|
+
|
|
61
|
+
[tool.setuptools.packages.find]
|
|
62
|
+
where = ["src"]
|
|
63
|
+
|
|
64
|
+
[tool.ruff]
|
|
65
|
+
line-length = 100
|
|
66
|
+
target-version = "py311"
|
|
67
|
+
|
|
68
|
+
[tool.ruff.lint]
|
|
69
|
+
select = ["E", "F", "I", "B", "UP", "SIM"]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""BackgroundIndexer — runs reindex on a worker thread, posts to the bus.
|
|
2
|
+
|
|
3
|
+
Single-threaded coordinator. External code calls `.trigger()` to ask
|
|
4
|
+
for a reindex; the thread coalesces multiple triggers into one job
|
|
5
|
+
(an `Event` is set/cleared, not a queue), so a 5-event burst from a
|
|
6
|
+
file watcher saving in rapid succession produces ONE reindex, not
|
|
7
|
+
five. On completion, the configured `swap` callback runs first
|
|
8
|
+
(typically `_atomic_swap_current` from the composition root) and
|
|
9
|
+
then `bus.publish_swap(new_dir)` notifies any subscriber.
|
|
10
|
+
|
|
11
|
+
Errors in the indexer are caught and logged at ERROR level; the
|
|
12
|
+
worker keeps running so the next trigger has a chance. This matches
|
|
13
|
+
the philosophy of "background reindex must never crash the MCP
|
|
14
|
+
server."
|
|
15
|
+
|
|
16
|
+
The thread is daemonic so it doesn't block process exit if `.stop()`
|
|
17
|
+
is missed (e.g., a hard SIGINT before the main loop's finally
|
|
18
|
+
block). `.stop()` itself sets a flag and joins with a 5 s timeout
|
|
19
|
+
by default; longer for the ~1 s default `idle_seconds`.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import threading
|
|
26
|
+
from collections.abc import Callable
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
from code_context.domain.index_bus import IndexUpdateBus
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BackgroundIndexer(threading.Thread):
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
*,
|
|
39
|
+
indexer: Any, # IndexerUseCase, untyped to avoid circular import
|
|
40
|
+
swap: Callable[[Path], None],
|
|
41
|
+
bus: IndexUpdateBus,
|
|
42
|
+
idle_seconds: float = 1.0,
|
|
43
|
+
) -> None:
|
|
44
|
+
super().__init__(name="code-context-bg-indexer", daemon=True)
|
|
45
|
+
self._indexer = indexer
|
|
46
|
+
self._swap = swap
|
|
47
|
+
self._bus = bus
|
|
48
|
+
self._idle = idle_seconds
|
|
49
|
+
self._wake = threading.Event()
|
|
50
|
+
self._stop_event = threading.Event()
|
|
51
|
+
|
|
52
|
+
def trigger(self) -> None:
|
|
53
|
+
"""Ask the worker thread to run a reindex.
|
|
54
|
+
|
|
55
|
+
Idempotent within an idle window: 5 rapid triggers coalesce
|
|
56
|
+
into one job because the Event is sticky until consumed.
|
|
57
|
+
"""
|
|
58
|
+
self._wake.set()
|
|
59
|
+
|
|
60
|
+
def stop(self, timeout: float = 5.0) -> None:
|
|
61
|
+
"""Signal the worker to exit and join up to `timeout` seconds."""
|
|
62
|
+
self._stop_event.set()
|
|
63
|
+
self._wake.set() # break out of `wait()`
|
|
64
|
+
self.join(timeout=timeout)
|
|
65
|
+
|
|
66
|
+
def run(self) -> None:
|
|
67
|
+
while not self._stop_event.is_set():
|
|
68
|
+
self._wake.wait()
|
|
69
|
+
self._wake.clear()
|
|
70
|
+
if self._stop_event.is_set():
|
|
71
|
+
return
|
|
72
|
+
try:
|
|
73
|
+
self._reindex_once()
|
|
74
|
+
except Exception: # noqa: BLE001 - bg failure must not kill the thread
|
|
75
|
+
log.exception("background reindex failed; will retry on next trigger")
|
|
76
|
+
# Idle so rapid triggers coalesce; stop_event lets `.stop()`
|
|
77
|
+
# break out without waiting the full window.
|
|
78
|
+
self._stop_event.wait(self._idle)
|
|
79
|
+
|
|
80
|
+
def _reindex_once(self) -> None:
|
|
81
|
+
stale = self._indexer.dirty_set()
|
|
82
|
+
no_work = (
|
|
83
|
+
not stale.full_reindex_required and not stale.dirty_files and not stale.deleted_files
|
|
84
|
+
)
|
|
85
|
+
if no_work:
|
|
86
|
+
return
|
|
87
|
+
if stale.full_reindex_required:
|
|
88
|
+
new_dir = self._indexer.run()
|
|
89
|
+
else:
|
|
90
|
+
new_dir = self._indexer.run_incremental(stale)
|
|
91
|
+
self._swap(new_dir)
|
|
92
|
+
self._bus.publish_swap(str(new_dir))
|
|
93
|
+
log.info("background reindex complete (%s) -> %s", stale.reason, new_dir)
|