codetex-mcp 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codetex_mcp-0.1.3/.github/workflows/ci.yml +37 -0
- codetex_mcp-0.1.3/.github/workflows/release.yml +42 -0
- codetex_mcp-0.1.3/.gitignore +12 -0
- codetex_mcp-0.1.3/.python-version +1 -0
- codetex_mcp-0.1.3/CHANGELOG.md +41 -0
- codetex_mcp-0.1.3/CLAUDE.md +96 -0
- codetex_mcp-0.1.3/LICENSE +21 -0
- codetex_mcp-0.1.3/PKG-INFO +434 -0
- codetex_mcp-0.1.3/README.md +381 -0
- codetex_mcp-0.1.3/main.py +6 -0
- codetex_mcp-0.1.3/prd.json +386 -0
- codetex_mcp-0.1.3/progress.md +794 -0
- codetex_mcp-0.1.3/pyproject.toml +93 -0
- codetex_mcp-0.1.3/ralph.sh +40 -0
- codetex_mcp-0.1.3/src/codetex_mcp/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/__main__.py +4 -0
- codetex_mcp-0.1.3/src/codetex_mcp/analysis/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/analysis/fallback_parser.py +395 -0
- codetex_mcp-0.1.3/src/codetex_mcp/analysis/models.py +50 -0
- codetex_mcp-0.1.3/src/codetex_mcp/analysis/parser.py +85 -0
- codetex_mcp-0.1.3/src/codetex_mcp/analysis/tree_sitter.py +518 -0
- codetex_mcp-0.1.3/src/codetex_mcp/cli/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/cli/app.py +504 -0
- codetex_mcp-0.1.3/src/codetex_mcp/config/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/config/ignore.py +121 -0
- codetex_mcp-0.1.3/src/codetex_mcp/config/settings.py +153 -0
- codetex_mcp-0.1.3/src/codetex_mcp/core/__init__.py +79 -0
- codetex_mcp-0.1.3/src/codetex_mcp/core/context_store.py +185 -0
- codetex_mcp-0.1.3/src/codetex_mcp/core/indexer.py +508 -0
- codetex_mcp-0.1.3/src/codetex_mcp/core/repo_manager.py +120 -0
- codetex_mcp-0.1.3/src/codetex_mcp/core/search_engine.py +111 -0
- codetex_mcp-0.1.3/src/codetex_mcp/core/syncer.py +499 -0
- codetex_mcp-0.1.3/src/codetex_mcp/embeddings/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/embeddings/embedder.py +43 -0
- codetex_mcp-0.1.3/src/codetex_mcp/exceptions.py +53 -0
- codetex_mcp-0.1.3/src/codetex_mcp/git/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/git/operations.py +154 -0
- codetex_mcp-0.1.3/src/codetex_mcp/llm/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/llm/prompts.py +179 -0
- codetex_mcp-0.1.3/src/codetex_mcp/llm/provider.py +96 -0
- codetex_mcp-0.1.3/src/codetex_mcp/llm/rate_limiter.py +41 -0
- codetex_mcp-0.1.3/src/codetex_mcp/server/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/server/mcp_server.py +243 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/database.py +94 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/files.py +140 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/migrations/001_initial.sql +84 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/migrations/__init__.py +0 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/repositories.py +108 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/symbols.py +120 -0
- codetex_mcp-0.1.3/src/codetex_mcp/storage/vectors.py +90 -0
- codetex_mcp-0.1.3/tasks/architecture.md +1434 -0
- codetex_mcp-0.1.3/tasks/prd-code-context-manager.md +225 -0
- codetex_mcp-0.1.3/tests/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_analysis/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_analysis/test_fallback_parser.py +344 -0
- codetex_mcp-0.1.3/tests/test_analysis/test_models.py +93 -0
- codetex_mcp-0.1.3/tests/test_analysis/test_parser.py +163 -0
- codetex_mcp-0.1.3/tests/test_analysis/test_tree_sitter.py +324 -0
- codetex_mcp-0.1.3/tests/test_cli/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_cli/test_app.py +896 -0
- codetex_mcp-0.1.3/tests/test_config/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_config/test_ignore.py +210 -0
- codetex_mcp-0.1.3/tests/test_config/test_settings.py +255 -0
- codetex_mcp-0.1.3/tests/test_core/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_core/test_app_context.py +138 -0
- codetex_mcp-0.1.3/tests/test_core/test_context_store.py +315 -0
- codetex_mcp-0.1.3/tests/test_core/test_indexer.py +753 -0
- codetex_mcp-0.1.3/tests/test_core/test_repo_manager.py +260 -0
- codetex_mcp-0.1.3/tests/test_core/test_search_engine.py +235 -0
- codetex_mcp-0.1.3/tests/test_core/test_syncer.py +878 -0
- codetex_mcp-0.1.3/tests/test_embeddings/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_embeddings/test_embedder.py +228 -0
- codetex_mcp-0.1.3/tests/test_git/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_git/test_operations.py +339 -0
- codetex_mcp-0.1.3/tests/test_llm/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_llm/test_prompts.py +296 -0
- codetex_mcp-0.1.3/tests/test_llm/test_provider.py +260 -0
- codetex_mcp-0.1.3/tests/test_llm/test_rate_limiter.py +119 -0
- codetex_mcp-0.1.3/tests/test_server/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_server/test_mcp_server.py +503 -0
- codetex_mcp-0.1.3/tests/test_storage/__init__.py +0 -0
- codetex_mcp-0.1.3/tests/test_storage/test_database.py +280 -0
- codetex_mcp-0.1.3/tests/test_storage/test_files.py +234 -0
- codetex_mcp-0.1.3/tests/test_storage/test_repositories.py +139 -0
- codetex_mcp-0.1.3/tests/test_storage/test_symbols.py +274 -0
- codetex_mcp-0.1.3/tests/test_storage/test_vectors.py +260 -0
- codetex_mcp-0.1.3/uv.lock +2372 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
ci:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.12"
|
|
20
|
+
|
|
21
|
+
- name: Install uv
|
|
22
|
+
uses: astral-sh/setup-uv@v4
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: uv sync
|
|
26
|
+
|
|
27
|
+
- name: Lint
|
|
28
|
+
run: uv run ruff check src/ tests/
|
|
29
|
+
|
|
30
|
+
- name: Format check
|
|
31
|
+
run: uv run ruff format --check src/ tests/
|
|
32
|
+
|
|
33
|
+
- name: Type check
|
|
34
|
+
run: uv run mypy src/
|
|
35
|
+
|
|
36
|
+
- name: Test
|
|
37
|
+
run: uv run pytest --cov=codetex_mcp
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
release:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
concurrency: release
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
contents: write
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
with:
|
|
18
|
+
fetch-depth: 0
|
|
19
|
+
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
uses: astral-sh/setup-uv@v4
|
|
27
|
+
|
|
28
|
+
- name: Python Semantic Release
|
|
29
|
+
id: release
|
|
30
|
+
uses: python-semantic-release/python-semantic-release@v9
|
|
31
|
+
with:
|
|
32
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
33
|
+
|
|
34
|
+
- name: Build package
|
|
35
|
+
if: steps.release.outputs.released == 'true'
|
|
36
|
+
run: uv build
|
|
37
|
+
|
|
38
|
+
- name: Publish to PyPI
|
|
39
|
+
if: steps.release.outputs.released == 'true'
|
|
40
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
41
|
+
with:
|
|
42
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# CHANGELOG
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
## v0.1.3 (2026-03-30)
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
- Add .coverage to .gitignore
|
|
9
|
+
([`178b3e9`](https://github.com/mrosata/codetex-mcp/commit/178b3e99a9a202accd0932fd8e2105ef4649d939))
|
|
10
|
+
|
|
11
|
+
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## v0.1.2 (2026-03-30)
|
|
15
|
+
|
|
16
|
+
### Bug Fixes
|
|
17
|
+
|
|
18
|
+
- **ci**: Move dev deps to dependency-groups and fix lint/format
|
|
19
|
+
([`818cb98`](https://github.com/mrosata/codetex-mcp/commit/818cb982a09b251b1834b594c2187f958c703402))
|
|
20
|
+
|
|
21
|
+
Move dev dependencies (ruff, mypy, pytest) from [project.optional-dependencies] to
|
|
22
|
+
[dependency-groups] so uv sync installs them automatically in CI. Fix ruff lint errors (unused
|
|
23
|
+
imports) and apply ruff format across all source files.
|
|
24
|
+
|
|
25
|
+
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## v0.1.1 (2026-03-29)
|
|
29
|
+
|
|
30
|
+
### Bug Fixes
|
|
31
|
+
|
|
32
|
+
- **ci**: Remove build_command from semantic-release config
|
|
33
|
+
([`a3dd857`](https://github.com/mrosata/codetex-mcp/commit/a3dd857ae2e6af13b9c5e45d1acec5726f80ecf9))
|
|
34
|
+
|
|
35
|
+
The PSR GitHub Action runs in a Docker container without uv, causing `uv build` to fail with exit
|
|
36
|
+
code 127. The workflow already has a separate build step that runs on the runner.
|
|
37
|
+
|
|
38
|
+
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
## v0.1.0 (2026-03-29)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
**codetex-mcp** is a commit-aware code context manager for LLMs, providing both an MCP server (stdio transport) and a CLI. It indexes Git repositories into a multi-tier context hierarchy (repo overview → file summaries → symbol details), stores results in SQLite with sqlite-vec for vector search, and serves them to LLM clients via the MCP protocol.
|
|
8
|
+
|
|
9
|
+
**Status:** Early stage — project scaffolding and detailed architecture/PRD exist, implementation is in progress.
|
|
10
|
+
|
|
11
|
+
## Development Environment
|
|
12
|
+
|
|
13
|
+
- **Python:** >=3.12 (see `.python-version`)
|
|
14
|
+
- **Package Manager:** uv
|
|
15
|
+
- **Linting/Formatting:** ruff
|
|
16
|
+
- **Type Checking:** mypy
|
|
17
|
+
- **Testing:** pytest with pytest-asyncio
|
|
18
|
+
|
|
19
|
+
## Commands
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Install dependencies
|
|
23
|
+
uv sync
|
|
24
|
+
|
|
25
|
+
# Run tests
|
|
26
|
+
uv run pytest
|
|
27
|
+
|
|
28
|
+
# Run a single test file
|
|
29
|
+
uv run pytest tests/test_storage/test_database.py
|
|
30
|
+
|
|
31
|
+
# Run tests with coverage
|
|
32
|
+
uv run pytest --cov=codetex_mcp
|
|
33
|
+
|
|
34
|
+
# Lint and format
|
|
35
|
+
uv run ruff check src/ tests/
|
|
36
|
+
uv run ruff format src/ tests/
|
|
37
|
+
|
|
38
|
+
# Type check
|
|
39
|
+
uv run mypy src/
|
|
40
|
+
|
|
41
|
+
# Run CLI
|
|
42
|
+
uv run codetex <command>
|
|
43
|
+
|
|
44
|
+
# Run as module
|
|
45
|
+
uv run python -m codetex_mcp
|
|
46
|
+
|
|
47
|
+
# Run MCP server (stdio transport)
|
|
48
|
+
uv run codetex serve
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Architecture
|
|
52
|
+
|
|
53
|
+
Two entry points (CLI via Typer, MCP server via FastMCP) share the same core service layer. No DI framework — services are wired manually via a `create_app()` factory.
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
CLI (typer) ──┐
|
|
57
|
+
├──▶ core/ (RepoManager, Indexer, Syncer, ContextStore, SearchEngine)
|
|
58
|
+
MCP (FastMCP)─┘ │ │ │
|
|
59
|
+
analysis/ llm/ embeddings/
|
|
60
|
+
(tree-sitter + fallback) (sentence-transformers)
|
|
61
|
+
└──────────┼────────────┘
|
|
62
|
+
storage/ (SQLite + sqlite-vec)
|
|
63
|
+
│
|
|
64
|
+
git/ ←─┴──▶ config/
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**Key modules under `src/codetex_mcp/`:**
|
|
68
|
+
- `cli/app.py` — Typer app with 8 commands (add, index, sync, context, status, list, serve, config)
|
|
69
|
+
- `server/mcp_server.py` — FastMCP server with 7 tools
|
|
70
|
+
- `core/` — Domain logic (no direct I/O, dependencies injected)
|
|
71
|
+
- `analysis/` — Tree-sitter AST parsing with regex fallback; `parser.py` is the unified dispatcher
|
|
72
|
+
- `llm/provider.py` — Abstract base + Anthropic implementation for tier summarization
|
|
73
|
+
- `embeddings/embedder.py` — sentence-transformers wrapper (lazy model loading)
|
|
74
|
+
- `storage/` — SQLite via aiosqlite; DAO pattern with separate modules per entity (repositories, files, symbols, vectors)
|
|
75
|
+
- `storage/migrations/` — SQL migration files applied by `database.py`
|
|
76
|
+
- `git/operations.py` — Subprocess git wrapper (no GitPython)
|
|
77
|
+
- `config/settings.py` — TOML config loader with env var overrides
|
|
78
|
+
- `exceptions.py` — Error hierarchy (11 exception classes)
|
|
79
|
+
|
|
80
|
+
**Data model:** Single SQLite database for all repos. 6 main tables (repositories, files, symbols, dependencies, repo_overviews, schema_version) + 2 vector tables (384-dim embeddings for files and symbols).
|
|
81
|
+
|
|
82
|
+
**Pipelines:** Full index is a 9-step pipeline (discover files → parse AST → generate summaries → embed → store). Incremental sync is a 7-step pipeline using git diff to process only changed files.
|
|
83
|
+
|
|
84
|
+
## Reference Documents
|
|
85
|
+
|
|
86
|
+
- `tasks/architecture.md` — Complete technical architecture (module interfaces, data model, pipeline specs, config schema, wiring)
|
|
87
|
+
- `tasks/prd-code-context-manager.md` — Product requirements document
|
|
88
|
+
- `prd.json` — PRD in structured JSON with 20 user stories and acceptance criteria
|
|
89
|
+
|
|
90
|
+
## Conventions
|
|
91
|
+
|
|
92
|
+
- All core services are async
|
|
93
|
+
- Tree-sitter grammars are optional extras (installed per-language)
|
|
94
|
+
- Config lives at `~/.codetex/config.toml` at runtime; SQLite database at `~/.codetex/codetex.db`
|
|
95
|
+
- MCP tool responses are structured markdown strings optimized for LLM consumption
|
|
96
|
+
- CLI output uses `rich` for progress bars, tables, and markdown rendering
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael Rosata
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codetex-mcp
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Commit-aware code context manager for LLMs - MCP server and CLI
|
|
5
|
+
Project-URL: Homepage, https://github.com/mrosata/codetex-mcp
|
|
6
|
+
Project-URL: Repository, https://github.com/mrosata/codetex-mcp
|
|
7
|
+
Project-URL: Issues, https://github.com/mrosata/codetex-mcp/issues
|
|
8
|
+
Author-email: Michael Rosata <michael.rosata@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: code-context,llm,mcp,sqlite,tree-sitter
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Requires-Dist: aiosqlite>=0.20
|
|
18
|
+
Requires-Dist: anthropic>=0.40
|
|
19
|
+
Requires-Dist: mcp>=1.0
|
|
20
|
+
Requires-Dist: pathspec>=0.12
|
|
21
|
+
Requires-Dist: rich>=13.0
|
|
22
|
+
Requires-Dist: sentence-transformers>=3.0
|
|
23
|
+
Requires-Dist: sqlite-vec>=0.1
|
|
24
|
+
Requires-Dist: tiktoken>=0.7
|
|
25
|
+
Requires-Dist: tree-sitter>=0.23
|
|
26
|
+
Requires-Dist: typer>=0.9
|
|
27
|
+
Provides-Extra: all-grammars
|
|
28
|
+
Requires-Dist: tree-sitter-cpp>=0.23; extra == 'all-grammars'
|
|
29
|
+
Requires-Dist: tree-sitter-go>=0.23; extra == 'all-grammars'
|
|
30
|
+
Requires-Dist: tree-sitter-java>=0.23; extra == 'all-grammars'
|
|
31
|
+
Requires-Dist: tree-sitter-javascript>=0.23; extra == 'all-grammars'
|
|
32
|
+
Requires-Dist: tree-sitter-python>=0.23; extra == 'all-grammars'
|
|
33
|
+
Requires-Dist: tree-sitter-ruby>=0.23; extra == 'all-grammars'
|
|
34
|
+
Requires-Dist: tree-sitter-rust>=0.23; extra == 'all-grammars'
|
|
35
|
+
Requires-Dist: tree-sitter-typescript>=0.23; extra == 'all-grammars'
|
|
36
|
+
Provides-Extra: tree-sitter-cpp
|
|
37
|
+
Requires-Dist: tree-sitter-cpp>=0.23; extra == 'tree-sitter-cpp'
|
|
38
|
+
Provides-Extra: tree-sitter-go
|
|
39
|
+
Requires-Dist: tree-sitter-go>=0.23; extra == 'tree-sitter-go'
|
|
40
|
+
Provides-Extra: tree-sitter-java
|
|
41
|
+
Requires-Dist: tree-sitter-java>=0.23; extra == 'tree-sitter-java'
|
|
42
|
+
Provides-Extra: tree-sitter-javascript
|
|
43
|
+
Requires-Dist: tree-sitter-javascript>=0.23; extra == 'tree-sitter-javascript'
|
|
44
|
+
Provides-Extra: tree-sitter-python
|
|
45
|
+
Requires-Dist: tree-sitter-python>=0.23; extra == 'tree-sitter-python'
|
|
46
|
+
Provides-Extra: tree-sitter-ruby
|
|
47
|
+
Requires-Dist: tree-sitter-ruby>=0.23; extra == 'tree-sitter-ruby'
|
|
48
|
+
Provides-Extra: tree-sitter-rust
|
|
49
|
+
Requires-Dist: tree-sitter-rust>=0.23; extra == 'tree-sitter-rust'
|
|
50
|
+
Provides-Extra: tree-sitter-typescript
|
|
51
|
+
Requires-Dist: tree-sitter-typescript>=0.23; extra == 'tree-sitter-typescript'
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# codetex-mcp
|
|
55
|
+
|
|
56
|
+
A commit-aware code context manager for LLMs. Indexes Git repositories into a multi-tier knowledge hierarchy — repo overviews, file summaries, and symbol details — stored in SQLite with vector search. Serves context to LLM clients via the [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) or a local CLI.
|
|
57
|
+
|
|
58
|
+
## What It Does
|
|
59
|
+
|
|
60
|
+
codetex builds a structured, searchable index of your codebase that LLMs can query on demand:
|
|
61
|
+
|
|
62
|
+
- **Tier 1 — Repo Overview:** Purpose, architecture, directory structure, key technologies, entry points
|
|
63
|
+
- **Tier 2 — File Summaries:** Per-file purpose, public interfaces, dependencies, roles
|
|
64
|
+
- **Tier 3 — Symbol Details:** Function/class signatures, parameters, return types, call relationships
|
|
65
|
+
|
|
66
|
+
Summaries are generated by an LLM (Anthropic Claude). Embeddings are computed locally with [sentence-transformers](https://www.sbert.net/) for semantic search. Everything is stored in a single SQLite database with [sqlite-vec](https://github.com/asg017/sqlite-vec) for vector queries.
|
|
67
|
+
|
|
68
|
+
Incremental sync means only changed files are re-analyzed when you update your code.
|
|
69
|
+
|
|
70
|
+
## Requirements
|
|
71
|
+
|
|
72
|
+
- Python 3.12+
|
|
73
|
+
- Git
|
|
74
|
+
- An [Anthropic API key](https://console.anthropic.com/) (for indexing)
|
|
75
|
+
|
|
76
|
+
## Installation
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# With pip
|
|
80
|
+
pip install codetex-mcp
|
|
81
|
+
|
|
82
|
+
# With uv (recommended)
|
|
83
|
+
uv tool install codetex-mcp
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Tree-sitter grammars (optional)
|
|
87
|
+
|
|
88
|
+
codetex uses tree-sitter for accurate AST parsing. Without grammars installed, it falls back to regex-based extraction that works for any language but is less precise.
|
|
89
|
+
|
|
90
|
+
Install grammars for the languages you work with:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# Individual languages
|
|
94
|
+
pip install "codetex-mcp[tree-sitter-python]"
|
|
95
|
+
pip install "codetex-mcp[tree-sitter-typescript]"
|
|
96
|
+
|
|
97
|
+
# All 8 supported languages (Python, JS, TS, Go, Rust, Java, Ruby, C/C++)
|
|
98
|
+
pip install "codetex-mcp[all-grammars]"
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Quick Start
|
|
102
|
+
|
|
103
|
+
### 1. Set your Anthropic API key
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Via environment variable
|
|
107
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
108
|
+
|
|
109
|
+
# Or via config
|
|
110
|
+
codetex config set llm.api_key sk-ant-...
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### 2. Add a repository
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# Local repo
|
|
117
|
+
codetex add /path/to/your/project
|
|
118
|
+
|
|
119
|
+
# Remote repo (clones to ~/.codetex/repos/)
|
|
120
|
+
codetex add https://github.com/user/repo.git
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### 3. Index it
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
# Preview what indexing will cost (no API calls)
|
|
127
|
+
codetex index my-project --dry-run
|
|
128
|
+
|
|
129
|
+
# Build the full index
|
|
130
|
+
codetex index my-project
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### 4. Query your codebase
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
# Repo overview (Tier 1)
|
|
137
|
+
codetex context my-project
|
|
138
|
+
|
|
139
|
+
# File summary (Tier 2)
|
|
140
|
+
codetex context my-project --file src/auth/login.py
|
|
141
|
+
|
|
142
|
+
# Symbol detail (Tier 3)
|
|
143
|
+
codetex context my-project --symbol authenticate_user
|
|
144
|
+
|
|
145
|
+
# Semantic search
|
|
146
|
+
codetex context my-project --query "how is authentication implemented?"
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### 5. Keep it up to date
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Incremental sync — only re-analyzes changed files
|
|
153
|
+
codetex sync my-project
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## MCP Server Setup
|
|
157
|
+
|
|
158
|
+
The MCP server lets LLM clients (like Claude Code, Cursor, Windsurf, etc.) query your indexed codebases directly.
|
|
159
|
+
|
|
160
|
+
### Claude Code
|
|
161
|
+
|
|
162
|
+
Add to your Claude Code MCP settings (`~/.claude/claude_desktop_config.json`):
|
|
163
|
+
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"mcpServers": {
|
|
167
|
+
"codetex": {
|
|
168
|
+
"command": "codetex",
|
|
169
|
+
"args": ["serve"],
|
|
170
|
+
"env": {
|
|
171
|
+
"ANTHROPIC_API_KEY": "sk-ant-..."
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
If you installed with `uv tool`, use the full path:
|
|
179
|
+
|
|
180
|
+
```json
|
|
181
|
+
{
|
|
182
|
+
"mcpServers": {
|
|
183
|
+
"codetex": {
|
|
184
|
+
"command": "/path/to/codetex",
|
|
185
|
+
"args": ["serve"],
|
|
186
|
+
"env": {
|
|
187
|
+
"ANTHROPIC_API_KEY": "sk-ant-..."
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Find the path with `which codetex` or `uv tool dir`.
|
|
195
|
+
|
|
196
|
+
### Other MCP Clients
|
|
197
|
+
|
|
198
|
+
Any client that supports MCP stdio transport can use codetex. The server command is:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
codetex serve
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Available MCP Tools
|
|
205
|
+
|
|
206
|
+
Once connected, the LLM has access to 7 tools:
|
|
207
|
+
|
|
208
|
+
| Tool | Description |
|
|
209
|
+
|------|-------------|
|
|
210
|
+
| `get_repo_overview` | Tier 1 repo overview (architecture, technologies, entry points) |
|
|
211
|
+
| `get_file_context` | Tier 2 file summary with symbol list |
|
|
212
|
+
| `get_symbol_detail` | Tier 3 full symbol detail (signature, params, relationships) |
|
|
213
|
+
| `search_context` | Semantic search across all indexed context |
|
|
214
|
+
| `get_repo_status` | Index status (staleness, file/symbol counts, last indexed) |
|
|
215
|
+
| `sync_repo` | Trigger incremental sync from within the LLM session |
|
|
216
|
+
| `list_repos` | List all registered repositories |
|
|
217
|
+
|
|
218
|
+
## CLI Reference
|
|
219
|
+
|
|
220
|
+
### `codetex add <target>`
|
|
221
|
+
|
|
222
|
+
Register a git repository. Accepts a local path or remote URL.
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
codetex add . # Current directory
|
|
226
|
+
codetex add /path/to/repo # Local path
|
|
227
|
+
codetex add https://github.com/user/repo.git # Remote (clones locally)
|
|
228
|
+
codetex add git@github.com:user/repo.git # SSH remote
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### `codetex index <repo-name>`
|
|
232
|
+
|
|
233
|
+
Build a full index for a registered repository.
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
codetex index my-project # Full index
|
|
237
|
+
codetex index my-project --dry-run # Preview (files, symbols, estimated LLM calls/tokens)
|
|
238
|
+
codetex index my-project --path src/ # Index only files under src/
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### `codetex sync <repo-name>`
|
|
242
|
+
|
|
243
|
+
Incremental sync to the current HEAD. Only files changed since the last indexed commit are re-analyzed.
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
codetex sync my-project # Sync changes
|
|
247
|
+
codetex sync my-project --dry-run # Preview what would change
|
|
248
|
+
codetex sync my-project --path src/ # Sync only changes under src/
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### `codetex context <repo-name>`
|
|
252
|
+
|
|
253
|
+
Query indexed context at any tier.
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
codetex context my-project # Tier 1: repo overview
|
|
257
|
+
codetex context my-project --file src/main.py # Tier 2: file summary
|
|
258
|
+
codetex context my-project --symbol MyClass # Tier 3: symbol detail
|
|
259
|
+
codetex context my-project --query "error handling" # Semantic search
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### `codetex status <repo-name>`
|
|
263
|
+
|
|
264
|
+
Show index status: indexed commit, current HEAD, staleness, file/symbol counts, token usage.
|
|
265
|
+
|
|
266
|
+
### `codetex list`
|
|
267
|
+
|
|
268
|
+
List all registered repositories with their index status.
|
|
269
|
+
|
|
270
|
+
### `codetex config show`
|
|
271
|
+
|
|
272
|
+
Display the current configuration.
|
|
273
|
+
|
|
274
|
+
### `codetex config set <key> <value>`
|
|
275
|
+
|
|
276
|
+
Update a configuration value.
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
codetex config set llm.api_key sk-ant-...
|
|
280
|
+
codetex config set llm.model claude-sonnet-4-5-20250929
|
|
281
|
+
codetex config set indexing.max_file_size_kb 1024
|
|
282
|
+
codetex config set indexing.max_concurrent_llm_calls 10
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
## Configuration
|
|
286
|
+
|
|
287
|
+
Configuration is loaded in layers (last wins):
|
|
288
|
+
|
|
289
|
+
1. **Defaults** — sensible out-of-the-box values
|
|
290
|
+
2. **TOML file** — `~/.codetex/config.toml`
|
|
291
|
+
3. **Environment variables** — override everything
|
|
292
|
+
|
|
293
|
+
### Config file
|
|
294
|
+
|
|
295
|
+
```toml
|
|
296
|
+
# ~/.codetex/config.toml
|
|
297
|
+
|
|
298
|
+
[storage]
|
|
299
|
+
data_dir = "~/.codetex" # Base directory for DB and cloned repos
|
|
300
|
+
|
|
301
|
+
[llm]
|
|
302
|
+
provider = "anthropic" # LLM provider (currently: anthropic)
|
|
303
|
+
model = "claude-sonnet-4-5-20250929" # Model used for summarization
|
|
304
|
+
api_key = "sk-ant-..." # Anthropic API key
|
|
305
|
+
|
|
306
|
+
[indexing]
|
|
307
|
+
max_file_size_kb = 512 # Skip files larger than this
|
|
308
|
+
max_concurrent_llm_calls = 5 # Parallel LLM requests during indexing
|
|
309
|
+
tier1_rebuild_threshold = 0.10 # Rebuild repo overview if >=10% of files changed on sync
|
|
310
|
+
|
|
311
|
+
[embedding]
|
|
312
|
+
model = "all-MiniLM-L6-v2" # Sentence-transformers model for embeddings
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
### Environment variables
|
|
316
|
+
|
|
317
|
+
| Variable | Maps to | Example |
|
|
318
|
+
|----------|---------|---------|
|
|
319
|
+
| `ANTHROPIC_API_KEY` | `llm.api_key` | `sk-ant-...` |
|
|
320
|
+
| `CODETEX_DATA_DIR` | `storage.data_dir` | `/custom/path` |
|
|
321
|
+
| `CODETEX_LLM_PROVIDER` | `llm.provider` | `anthropic` |
|
|
322
|
+
| `CODETEX_LLM_MODEL` | `llm.model` | `claude-sonnet-4-5-20250929` |
|
|
323
|
+
| `CODETEX_MAX_FILE_SIZE_KB` | `indexing.max_file_size_kb` | `1024` |
|
|
324
|
+
| `CODETEX_MAX_CONCURRENT_LLM` | `indexing.max_concurrent_llm_calls` | `10` |
|
|
325
|
+
| `CODETEX_TIER1_THRESHOLD` | `indexing.tier1_rebuild_threshold` | `0.15` |
|
|
326
|
+
| `CODETEX_EMBEDDING_MODEL` | `embedding.model` | `all-MiniLM-L6-v2` |
|
|
327
|
+
|
|
328
|
+
## File Exclusion
|
|
329
|
+
|
|
330
|
+
Files are filtered through multiple stages:
|
|
331
|
+
|
|
332
|
+
1. **Default excludes** — `node_modules/`, `__pycache__/`, `.git/`, `dist/`, `build/`, `.venv/`, `*.lock`, `*.min.js`, `*.pyc`, `*.so`, etc.
|
|
333
|
+
2. **`.gitignore`** — standard gitignore rules from your repo
|
|
334
|
+
3. **`.codetexignore`** — same syntax as `.gitignore`, placed in your repo root. Use `!pattern` to un-ignore files
|
|
335
|
+
4. **File size** — files exceeding `max_file_size_kb` are skipped
|
|
336
|
+
5. **Binary detection** — files with null bytes in the first 8 KB are skipped
|
|
337
|
+
|
|
338
|
+
## Language Support
|
|
339
|
+
|
|
340
|
+
| Language | Tree-sitter (full AST) | Fallback (regex) |
|
|
341
|
+
|----------|:----------------------:|:-----------------:|
|
|
342
|
+
| Python | Yes | Yes |
|
|
343
|
+
| JavaScript | Yes | Yes |
|
|
344
|
+
| TypeScript | Yes | Yes |
|
|
345
|
+
| Go | Yes | Yes |
|
|
346
|
+
| Rust | Yes | Yes |
|
|
347
|
+
| Java | Yes | Yes |
|
|
348
|
+
| Ruby | Yes | Yes |
|
|
349
|
+
| C/C++ | Yes | Yes |
|
|
350
|
+
| All others | — | Yes |
|
|
351
|
+
|
|
352
|
+
Tree-sitter grammars are optional. The fallback parser uses regex patterns to extract functions, classes, and imports from any language.
|
|
353
|
+
|
|
354
|
+
## Architecture
|
|
355
|
+
|
|
356
|
+
```
|
|
357
|
+
CLI (Typer) ──┐
|
|
358
|
+
├──▶ Core Services (Indexer, Syncer, ContextStore, SearchEngine)
|
|
359
|
+
MCP (FastMCP)─┘ │ │ │
|
|
360
|
+
Analysis LLM Provider Embeddings
|
|
361
|
+
(tree-sitter + (Anthropic) (sentence-transformers)
|
|
362
|
+
regex fallback) │ │
|
|
363
|
+
└──────────────┴──────────────┘
|
|
364
|
+
│
|
|
365
|
+
SQLite + sqlite-vec
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
- **Two entry points** (CLI and MCP server) share the same core service layer
|
|
369
|
+
- **No DI framework** — services are wired via a `create_app()` factory
|
|
370
|
+
- **All core services are async** — CLI bridges with `asyncio.run()`
|
|
371
|
+
- **Embeddings are local** — no external API calls for vector search (model auto-downloads on first run, ~90 MB)
|
|
372
|
+
- **Single SQLite database** — 6 main tables + 2 vector tables (384-dimensional embeddings)
|
|
373
|
+
|
|
374
|
+
## Development
|
|
375
|
+
|
|
376
|
+
```bash
|
|
377
|
+
git clone https://github.com/mrosata/codetex-mcp.git
|
|
378
|
+
cd codetex-mcp
|
|
379
|
+
|
|
380
|
+
# Install dependencies (including dev)
|
|
381
|
+
uv sync
|
|
382
|
+
|
|
383
|
+
# Run tests
|
|
384
|
+
uv run pytest
|
|
385
|
+
|
|
386
|
+
# Run tests with coverage
|
|
387
|
+
uv run pytest --cov=codetex_mcp
|
|
388
|
+
|
|
389
|
+
# Lint and format
|
|
390
|
+
uv run ruff check src/ tests/
|
|
391
|
+
uv run ruff format src/ tests/
|
|
392
|
+
|
|
393
|
+
# Type check
|
|
394
|
+
uv run mypy src/
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
## Releasing
|
|
398
|
+
|
|
399
|
+
Releases are automated via GitHub Actions and [python-semantic-release](https://python-semantic-release.readthedocs.io/). Version bumps are driven by **conventional commit messages** on `main`.
|
|
400
|
+
|
|
401
|
+
### Commit message format
|
|
402
|
+
|
|
403
|
+
| Prefix | Effect | Example |
|
|
404
|
+
|--------|--------|---------|
|
|
405
|
+
| `fix: ...` | Patch bump (0.1.0 → 0.1.1) | `fix: handle missing gitignore` |
|
|
406
|
+
| `feat: ...` | Minor bump (0.1.0 → 0.2.0) | `feat: add Ruby tree-sitter support` |
|
|
407
|
+
| `feat!: ...` | Major bump (0.1.0 → 1.0.0) | `feat!: redesign context API` |
|
|
408
|
+
| `docs:`, `chore:`, `ci:`, `test:`, `refactor:` | No release | `docs: update README` |
|
|
409
|
+
|
|
410
|
+
A `BREAKING CHANGE:` line in the commit body also triggers a major bump.
|
|
411
|
+
|
|
412
|
+
### How it works
|
|
413
|
+
|
|
414
|
+
1. Push or merge a PR to `main`
|
|
415
|
+
2. CI runs lint, type check, and tests
|
|
416
|
+
3. The release workflow analyzes commits since the last tag
|
|
417
|
+
4. If a version bump is needed, it:
|
|
418
|
+
- Updates the version in `pyproject.toml`
|
|
419
|
+
- Creates a git tag (e.g., `v0.2.0`)
|
|
420
|
+
- Publishes a GitHub Release with a changelog
|
|
421
|
+
- Builds and publishes the package to PyPI
|
|
422
|
+
|
|
423
|
+
### Manual release (not recommended)
|
|
424
|
+
|
|
425
|
+
If you need to release without the automation:
|
|
426
|
+
|
|
427
|
+
```bash
|
|
428
|
+
uv build
|
|
429
|
+
uv publish
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
## License
|
|
433
|
+
|
|
434
|
+
MIT
|