codetex-mcp 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. codetex_mcp-0.1.3/.github/workflows/ci.yml +37 -0
  2. codetex_mcp-0.1.3/.github/workflows/release.yml +42 -0
  3. codetex_mcp-0.1.3/.gitignore +12 -0
  4. codetex_mcp-0.1.3/.python-version +1 -0
  5. codetex_mcp-0.1.3/CHANGELOG.md +41 -0
  6. codetex_mcp-0.1.3/CLAUDE.md +96 -0
  7. codetex_mcp-0.1.3/LICENSE +21 -0
  8. codetex_mcp-0.1.3/PKG-INFO +434 -0
  9. codetex_mcp-0.1.3/README.md +381 -0
  10. codetex_mcp-0.1.3/main.py +6 -0
  11. codetex_mcp-0.1.3/prd.json +386 -0
  12. codetex_mcp-0.1.3/progress.md +794 -0
  13. codetex_mcp-0.1.3/pyproject.toml +93 -0
  14. codetex_mcp-0.1.3/ralph.sh +40 -0
  15. codetex_mcp-0.1.3/src/codetex_mcp/__init__.py +0 -0
  16. codetex_mcp-0.1.3/src/codetex_mcp/__main__.py +4 -0
  17. codetex_mcp-0.1.3/src/codetex_mcp/analysis/__init__.py +0 -0
  18. codetex_mcp-0.1.3/src/codetex_mcp/analysis/fallback_parser.py +395 -0
  19. codetex_mcp-0.1.3/src/codetex_mcp/analysis/models.py +50 -0
  20. codetex_mcp-0.1.3/src/codetex_mcp/analysis/parser.py +85 -0
  21. codetex_mcp-0.1.3/src/codetex_mcp/analysis/tree_sitter.py +518 -0
  22. codetex_mcp-0.1.3/src/codetex_mcp/cli/__init__.py +0 -0
  23. codetex_mcp-0.1.3/src/codetex_mcp/cli/app.py +504 -0
  24. codetex_mcp-0.1.3/src/codetex_mcp/config/__init__.py +0 -0
  25. codetex_mcp-0.1.3/src/codetex_mcp/config/ignore.py +121 -0
  26. codetex_mcp-0.1.3/src/codetex_mcp/config/settings.py +153 -0
  27. codetex_mcp-0.1.3/src/codetex_mcp/core/__init__.py +79 -0
  28. codetex_mcp-0.1.3/src/codetex_mcp/core/context_store.py +185 -0
  29. codetex_mcp-0.1.3/src/codetex_mcp/core/indexer.py +508 -0
  30. codetex_mcp-0.1.3/src/codetex_mcp/core/repo_manager.py +120 -0
  31. codetex_mcp-0.1.3/src/codetex_mcp/core/search_engine.py +111 -0
  32. codetex_mcp-0.1.3/src/codetex_mcp/core/syncer.py +499 -0
  33. codetex_mcp-0.1.3/src/codetex_mcp/embeddings/__init__.py +0 -0
  34. codetex_mcp-0.1.3/src/codetex_mcp/embeddings/embedder.py +43 -0
  35. codetex_mcp-0.1.3/src/codetex_mcp/exceptions.py +53 -0
  36. codetex_mcp-0.1.3/src/codetex_mcp/git/__init__.py +0 -0
  37. codetex_mcp-0.1.3/src/codetex_mcp/git/operations.py +154 -0
  38. codetex_mcp-0.1.3/src/codetex_mcp/llm/__init__.py +0 -0
  39. codetex_mcp-0.1.3/src/codetex_mcp/llm/prompts.py +179 -0
  40. codetex_mcp-0.1.3/src/codetex_mcp/llm/provider.py +96 -0
  41. codetex_mcp-0.1.3/src/codetex_mcp/llm/rate_limiter.py +41 -0
  42. codetex_mcp-0.1.3/src/codetex_mcp/server/__init__.py +0 -0
  43. codetex_mcp-0.1.3/src/codetex_mcp/server/mcp_server.py +243 -0
  44. codetex_mcp-0.1.3/src/codetex_mcp/storage/__init__.py +0 -0
  45. codetex_mcp-0.1.3/src/codetex_mcp/storage/database.py +94 -0
  46. codetex_mcp-0.1.3/src/codetex_mcp/storage/files.py +140 -0
  47. codetex_mcp-0.1.3/src/codetex_mcp/storage/migrations/001_initial.sql +84 -0
  48. codetex_mcp-0.1.3/src/codetex_mcp/storage/migrations/__init__.py +0 -0
  49. codetex_mcp-0.1.3/src/codetex_mcp/storage/repositories.py +108 -0
  50. codetex_mcp-0.1.3/src/codetex_mcp/storage/symbols.py +120 -0
  51. codetex_mcp-0.1.3/src/codetex_mcp/storage/vectors.py +90 -0
  52. codetex_mcp-0.1.3/tasks/architecture.md +1434 -0
  53. codetex_mcp-0.1.3/tasks/prd-code-context-manager.md +225 -0
  54. codetex_mcp-0.1.3/tests/__init__.py +0 -0
  55. codetex_mcp-0.1.3/tests/test_analysis/__init__.py +0 -0
  56. codetex_mcp-0.1.3/tests/test_analysis/test_fallback_parser.py +344 -0
  57. codetex_mcp-0.1.3/tests/test_analysis/test_models.py +93 -0
  58. codetex_mcp-0.1.3/tests/test_analysis/test_parser.py +163 -0
  59. codetex_mcp-0.1.3/tests/test_analysis/test_tree_sitter.py +324 -0
  60. codetex_mcp-0.1.3/tests/test_cli/__init__.py +0 -0
  61. codetex_mcp-0.1.3/tests/test_cli/test_app.py +896 -0
  62. codetex_mcp-0.1.3/tests/test_config/__init__.py +0 -0
  63. codetex_mcp-0.1.3/tests/test_config/test_ignore.py +210 -0
  64. codetex_mcp-0.1.3/tests/test_config/test_settings.py +255 -0
  65. codetex_mcp-0.1.3/tests/test_core/__init__.py +0 -0
  66. codetex_mcp-0.1.3/tests/test_core/test_app_context.py +138 -0
  67. codetex_mcp-0.1.3/tests/test_core/test_context_store.py +315 -0
  68. codetex_mcp-0.1.3/tests/test_core/test_indexer.py +753 -0
  69. codetex_mcp-0.1.3/tests/test_core/test_repo_manager.py +260 -0
  70. codetex_mcp-0.1.3/tests/test_core/test_search_engine.py +235 -0
  71. codetex_mcp-0.1.3/tests/test_core/test_syncer.py +878 -0
  72. codetex_mcp-0.1.3/tests/test_embeddings/__init__.py +0 -0
  73. codetex_mcp-0.1.3/tests/test_embeddings/test_embedder.py +228 -0
  74. codetex_mcp-0.1.3/tests/test_git/__init__.py +0 -0
  75. codetex_mcp-0.1.3/tests/test_git/test_operations.py +339 -0
  76. codetex_mcp-0.1.3/tests/test_llm/__init__.py +0 -0
  77. codetex_mcp-0.1.3/tests/test_llm/test_prompts.py +296 -0
  78. codetex_mcp-0.1.3/tests/test_llm/test_provider.py +260 -0
  79. codetex_mcp-0.1.3/tests/test_llm/test_rate_limiter.py +119 -0
  80. codetex_mcp-0.1.3/tests/test_server/__init__.py +0 -0
  81. codetex_mcp-0.1.3/tests/test_server/test_mcp_server.py +503 -0
  82. codetex_mcp-0.1.3/tests/test_storage/__init__.py +0 -0
  83. codetex_mcp-0.1.3/tests/test_storage/test_database.py +280 -0
  84. codetex_mcp-0.1.3/tests/test_storage/test_files.py +234 -0
  85. codetex_mcp-0.1.3/tests/test_storage/test_repositories.py +139 -0
  86. codetex_mcp-0.1.3/tests/test_storage/test_symbols.py +274 -0
  87. codetex_mcp-0.1.3/tests/test_storage/test_vectors.py +260 -0
  88. codetex_mcp-0.1.3/uv.lock +2372 -0
@@ -0,0 +1,37 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ ci:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+
21
+ - name: Install uv
22
+ uses: astral-sh/setup-uv@v4
23
+
24
+ - name: Install dependencies
25
+ run: uv sync
26
+
27
+ - name: Lint
28
+ run: uv run ruff check src/ tests/
29
+
30
+ - name: Format check
31
+ run: uv run ruff format --check src/ tests/
32
+
33
+ - name: Type check
34
+ run: uv run mypy src/
35
+
36
+ - name: Test
37
+ run: uv run pytest --cov=codetex_mcp
@@ -0,0 +1,42 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ release:
9
+ runs-on: ubuntu-latest
10
+ concurrency: release
11
+ permissions:
12
+ id-token: write
13
+ contents: write
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ with:
18
+ fetch-depth: 0
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.12"
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v4
27
+
28
+ - name: Python Semantic Release
29
+ id: release
30
+ uses: python-semantic-release/python-semantic-release@v9
31
+ with:
32
+ github_token: ${{ secrets.GITHUB_TOKEN }}
33
+
34
+ - name: Build package
35
+ if: steps.release.outputs.released == 'true'
36
+ run: uv build
37
+
38
+ - name: Publish to PyPI
39
+ if: steps.release.outputs.released == 'true'
40
+ uses: pypa/gh-action-pypi-publish@release/v1
41
+ with:
42
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .venv/
8
+ .env
9
+ *.db
10
+ *.sqlite3
11
+ .coverage
12
+ .DS_Store
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,41 @@
1
+ # CHANGELOG
2
+
3
+
4
+ ## v0.1.3 (2026-03-30)
5
+
6
+ ### Bug Fixes
7
+
8
+ - Add .coverage to .gitignore
9
+ ([`178b3e9`](https://github.com/mrosata/codetex-mcp/commit/178b3e99a9a202accd0932fd8e2105ef4649d939))
10
+
11
+ Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
12
+
13
+
14
+ ## v0.1.2 (2026-03-30)
15
+
16
+ ### Bug Fixes
17
+
18
+ - **ci**: Move dev deps to dependency-groups and fix lint/format
19
+ ([`818cb98`](https://github.com/mrosata/codetex-mcp/commit/818cb982a09b251b1834b594c2187f958c703402))
20
+
21
+ Move dev dependencies (ruff, mypy, pytest) from [project.optional-dependencies] to
22
+ [dependency-groups] so uv sync installs them automatically in CI. Fix ruff lint errors (unused
23
+ imports) and apply ruff format across all source files.
24
+
25
+ Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
26
+
27
+
28
+ ## v0.1.1 (2026-03-29)
29
+
30
+ ### Bug Fixes
31
+
32
+ - **ci**: Remove build_command from semantic-release config
33
+ ([`a3dd857`](https://github.com/mrosata/codetex-mcp/commit/a3dd857ae2e6af13b9c5e45d1acec5726f80ecf9))
34
+
35
+ The PSR GitHub Action runs in a Docker container without uv, causing `uv build` to fail with exit
36
+ code 127. The workflow already has a separate build step that runs on the runner.
37
+
38
+ Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
39
+
40
+
41
+ ## v0.1.0 (2026-03-29)
@@ -0,0 +1,96 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ **codetex-mcp** is a commit-aware code context manager for LLMs, providing both an MCP server (stdio transport) and a CLI. It indexes Git repositories into a multi-tier context hierarchy (repo overview → file summaries → symbol details), stores results in SQLite with sqlite-vec for vector search, and serves them to LLM clients via the MCP protocol.
8
+
9
+ **Status:** Early stage — project scaffolding and detailed architecture/PRD exist, implementation is in progress.
10
+
11
+ ## Development Environment
12
+
13
+ - **Python:** >=3.12 (see `.python-version`)
14
+ - **Package Manager:** uv
15
+ - **Linting/Formatting:** ruff
16
+ - **Type Checking:** mypy
17
+ - **Testing:** pytest with pytest-asyncio
18
+
19
+ ## Commands
20
+
21
+ ```bash
22
+ # Install dependencies
23
+ uv sync
24
+
25
+ # Run tests
26
+ uv run pytest
27
+
28
+ # Run a single test file
29
+ uv run pytest tests/test_storage/test_database.py
30
+
31
+ # Run tests with coverage
32
+ uv run pytest --cov=codetex_mcp
33
+
34
+ # Lint and format
35
+ uv run ruff check src/ tests/
36
+ uv run ruff format src/ tests/
37
+
38
+ # Type check
39
+ uv run mypy src/
40
+
41
+ # Run CLI
42
+ uv run codetex <command>
43
+
44
+ # Run as module
45
+ uv run python -m codetex_mcp
46
+
47
+ # Run MCP server (stdio transport)
48
+ uv run codetex serve
49
+ ```
50
+
51
+ ## Architecture
52
+
53
+ Two entry points (CLI via Typer, MCP server via FastMCP) share the same core service layer. No DI framework — services are wired manually via a `create_app()` factory.
54
+
55
+ ```
56
+ CLI (typer) ──┐
57
+ ├──▶ core/ (RepoManager, Indexer, Syncer, ContextStore, SearchEngine)
58
+ MCP (FastMCP)─┘ │ │ │
59
+ analysis/ llm/ embeddings/
60
+ (tree-sitter + fallback) (sentence-transformers)
61
+ └──────────┼────────────┘
62
+ storage/ (SQLite + sqlite-vec)
63
+
64
+ git/ ←─┴──▶ config/
65
+ ```
66
+
67
+ **Key modules under `src/codetex_mcp/`:**
68
+ - `cli/app.py` — Typer app with 8 commands (add, index, sync, context, status, list, serve, config)
69
+ - `server/mcp_server.py` — FastMCP server with 7 tools
70
+ - `core/` — Domain logic (no direct I/O, dependencies injected)
71
+ - `analysis/` — Tree-sitter AST parsing with regex fallback; `parser.py` is the unified dispatcher
72
+ - `llm/provider.py` — Abstract base + Anthropic implementation for tier summarization
73
+ - `embeddings/embedder.py` — sentence-transformers wrapper (lazy model loading)
74
+ - `storage/` — SQLite via aiosqlite; DAO pattern with separate modules per entity (repositories, files, symbols, vectors)
75
+ - `storage/migrations/` — SQL migration files applied by `database.py`
76
+ - `git/operations.py` — Subprocess git wrapper (no GitPython)
77
+ - `config/settings.py` — TOML config loader with env var overrides
78
+ - `exceptions.py` — Error hierarchy (11 exception classes)
79
+
80
+ **Data model:** Single SQLite database for all repos. 6 main tables (repositories, files, symbols, dependencies, repo_overviews, schema_version) + 2 vector tables (384-dim embeddings for files and symbols).
81
+
82
+ **Pipelines:** Full index is a 9-step pipeline (discover files → parse AST → generate summaries → embed → store). Incremental sync is a 7-step pipeline using git diff to process only changed files.
83
+
84
+ ## Reference Documents
85
+
86
+ - `tasks/architecture.md` — Complete technical architecture (module interfaces, data model, pipeline specs, config schema, wiring)
87
+ - `tasks/prd-code-context-manager.md` — Product requirements document
88
+ - `prd.json` — PRD in structured JSON with 20 user stories and acceptance criteria
89
+
90
+ ## Conventions
91
+
92
+ - All core services are async
93
+ - Tree-sitter grammars are optional extras (installed per-language)
94
+ - Config lives at `~/.codetex/config.toml` at runtime; SQLite database at `~/.codetex/codetex.db`
95
+ - MCP tool responses are structured markdown strings optimized for LLM consumption
96
+ - CLI output uses `rich` for progress bars, tables, and markdown rendering
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Michael Rosata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,434 @@
1
+ Metadata-Version: 2.4
2
+ Name: codetex-mcp
3
+ Version: 0.1.3
4
+ Summary: Commit-aware code context manager for LLMs - MCP server and CLI
5
+ Project-URL: Homepage, https://github.com/mrosata/codetex-mcp
6
+ Project-URL: Repository, https://github.com/mrosata/codetex-mcp
7
+ Project-URL: Issues, https://github.com/mrosata/codetex-mcp/issues
8
+ Author-email: Michael Rosata <michael.rosata@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: code-context,llm,mcp,sqlite,tree-sitter
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: aiosqlite>=0.20
18
+ Requires-Dist: anthropic>=0.40
19
+ Requires-Dist: mcp>=1.0
20
+ Requires-Dist: pathspec>=0.12
21
+ Requires-Dist: rich>=13.0
22
+ Requires-Dist: sentence-transformers>=3.0
23
+ Requires-Dist: sqlite-vec>=0.1
24
+ Requires-Dist: tiktoken>=0.7
25
+ Requires-Dist: tree-sitter>=0.23
26
+ Requires-Dist: typer>=0.9
27
+ Provides-Extra: all-grammars
28
+ Requires-Dist: tree-sitter-cpp>=0.23; extra == 'all-grammars'
29
+ Requires-Dist: tree-sitter-go>=0.23; extra == 'all-grammars'
30
+ Requires-Dist: tree-sitter-java>=0.23; extra == 'all-grammars'
31
+ Requires-Dist: tree-sitter-javascript>=0.23; extra == 'all-grammars'
32
+ Requires-Dist: tree-sitter-python>=0.23; extra == 'all-grammars'
33
+ Requires-Dist: tree-sitter-ruby>=0.23; extra == 'all-grammars'
34
+ Requires-Dist: tree-sitter-rust>=0.23; extra == 'all-grammars'
35
+ Requires-Dist: tree-sitter-typescript>=0.23; extra == 'all-grammars'
36
+ Provides-Extra: tree-sitter-cpp
37
+ Requires-Dist: tree-sitter-cpp>=0.23; extra == 'tree-sitter-cpp'
38
+ Provides-Extra: tree-sitter-go
39
+ Requires-Dist: tree-sitter-go>=0.23; extra == 'tree-sitter-go'
40
+ Provides-Extra: tree-sitter-java
41
+ Requires-Dist: tree-sitter-java>=0.23; extra == 'tree-sitter-java'
42
+ Provides-Extra: tree-sitter-javascript
43
+ Requires-Dist: tree-sitter-javascript>=0.23; extra == 'tree-sitter-javascript'
44
+ Provides-Extra: tree-sitter-python
45
+ Requires-Dist: tree-sitter-python>=0.23; extra == 'tree-sitter-python'
46
+ Provides-Extra: tree-sitter-ruby
47
+ Requires-Dist: tree-sitter-ruby>=0.23; extra == 'tree-sitter-ruby'
48
+ Provides-Extra: tree-sitter-rust
49
+ Requires-Dist: tree-sitter-rust>=0.23; extra == 'tree-sitter-rust'
50
+ Provides-Extra: tree-sitter-typescript
51
+ Requires-Dist: tree-sitter-typescript>=0.23; extra == 'tree-sitter-typescript'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # codetex-mcp
55
+
56
+ A commit-aware code context manager for LLMs. Indexes Git repositories into a multi-tier knowledge hierarchy — repo overviews, file summaries, and symbol details — stored in SQLite with vector search. Serves context to LLM clients via the [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) or a local CLI.
57
+
58
+ ## What It Does
59
+
60
+ codetex builds a structured, searchable index of your codebase that LLMs can query on demand:
61
+
62
+ - **Tier 1 — Repo Overview:** Purpose, architecture, directory structure, key technologies, entry points
63
+ - **Tier 2 — File Summaries:** Per-file purpose, public interfaces, dependencies, roles
64
+ - **Tier 3 — Symbol Details:** Function/class signatures, parameters, return types, call relationships
65
+
66
+ Summaries are generated by an LLM (Anthropic Claude). Embeddings are computed locally with [sentence-transformers](https://www.sbert.net/) for semantic search. Everything is stored in a single SQLite database with [sqlite-vec](https://github.com/asg017/sqlite-vec) for vector queries.
67
+
68
+ Incremental sync means only changed files are re-analyzed when you update your code.
69
+
70
+ ## Requirements
71
+
72
+ - Python 3.12+
73
+ - Git
74
+ - An [Anthropic API key](https://console.anthropic.com/) (for indexing)
75
+
76
+ ## Installation
77
+
78
+ ```bash
79
+ # With pip
80
+ pip install codetex-mcp
81
+
82
+ # With uv (recommended)
83
+ uv tool install codetex-mcp
84
+ ```
85
+
86
+ ### Tree-sitter grammars (optional)
87
+
88
+ codetex uses tree-sitter for accurate AST parsing. Without grammars installed, it falls back to regex-based extraction that works for any language but is less precise.
89
+
90
+ Install grammars for the languages you work with:
91
+
92
+ ```bash
93
+ # Individual languages
94
+ pip install "codetex-mcp[tree-sitter-python]"
95
+ pip install "codetex-mcp[tree-sitter-typescript]"
96
+
97
+ # All 8 supported languages (Python, JS, TS, Go, Rust, Java, Ruby, C/C++)
98
+ pip install "codetex-mcp[all-grammars]"
99
+ ```
100
+
101
+ ## Quick Start
102
+
103
+ ### 1. Set your Anthropic API key
104
+
105
+ ```bash
106
+ # Via environment variable
107
+ export ANTHROPIC_API_KEY=sk-ant-...
108
+
109
+ # Or via config
110
+ codetex config set llm.api_key sk-ant-...
111
+ ```
112
+
113
+ ### 2. Add a repository
114
+
115
+ ```bash
116
+ # Local repo
117
+ codetex add /path/to/your/project
118
+
119
+ # Remote repo (clones to ~/.codetex/repos/)
120
+ codetex add https://github.com/user/repo.git
121
+ ```
122
+
123
+ ### 3. Index it
124
+
125
+ ```bash
126
+ # Preview what indexing will cost (no API calls)
127
+ codetex index my-project --dry-run
128
+
129
+ # Build the full index
130
+ codetex index my-project
131
+ ```
132
+
133
+ ### 4. Query your codebase
134
+
135
+ ```bash
136
+ # Repo overview (Tier 1)
137
+ codetex context my-project
138
+
139
+ # File summary (Tier 2)
140
+ codetex context my-project --file src/auth/login.py
141
+
142
+ # Symbol detail (Tier 3)
143
+ codetex context my-project --symbol authenticate_user
144
+
145
+ # Semantic search
146
+ codetex context my-project --query "how is authentication implemented?"
147
+ ```
148
+
149
+ ### 5. Keep it up to date
150
+
151
+ ```bash
152
+ # Incremental sync — only re-analyzes changed files
153
+ codetex sync my-project
154
+ ```
155
+
156
+ ## MCP Server Setup
157
+
158
+ The MCP server lets LLM clients (like Claude Code, Cursor, Windsurf, etc.) query your indexed codebases directly.
159
+
160
+ ### Claude Code
161
+
162
+ Add to your Claude Code MCP settings (`~/.claude/claude_desktop_config.json`):
163
+
164
+ ```json
165
+ {
166
+ "mcpServers": {
167
+ "codetex": {
168
+ "command": "codetex",
169
+ "args": ["serve"],
170
+ "env": {
171
+ "ANTHROPIC_API_KEY": "sk-ant-..."
172
+ }
173
+ }
174
+ }
175
+ }
176
+ ```
177
+
178
+ If you installed with `uv tool`, use the full path:
179
+
180
+ ```json
181
+ {
182
+ "mcpServers": {
183
+ "codetex": {
184
+ "command": "/path/to/codetex",
185
+ "args": ["serve"],
186
+ "env": {
187
+ "ANTHROPIC_API_KEY": "sk-ant-..."
188
+ }
189
+ }
190
+ }
191
+ }
192
+ ```
193
+
194
+ Find the path with `which codetex` or `uv tool dir`.
195
+
196
+ ### Other MCP Clients
197
+
198
+ Any client that supports MCP stdio transport can use codetex. The server command is:
199
+
200
+ ```bash
201
+ codetex serve
202
+ ```
203
+
204
+ ### Available MCP Tools
205
+
206
+ Once connected, the LLM has access to 7 tools:
207
+
208
+ | Tool | Description |
209
+ |------|-------------|
210
+ | `get_repo_overview` | Tier 1 repo overview (architecture, technologies, entry points) |
211
+ | `get_file_context` | Tier 2 file summary with symbol list |
212
+ | `get_symbol_detail` | Tier 3 full symbol detail (signature, params, relationships) |
213
+ | `search_context` | Semantic search across all indexed context |
214
+ | `get_repo_status` | Index status (staleness, file/symbol counts, last indexed) |
215
+ | `sync_repo` | Trigger incremental sync from within the LLM session |
216
+ | `list_repos` | List all registered repositories |
217
+
218
+ ## CLI Reference
219
+
220
+ ### `codetex add <target>`
221
+
222
+ Register a git repository. Accepts a local path or remote URL.
223
+
224
+ ```bash
225
+ codetex add . # Current directory
226
+ codetex add /path/to/repo # Local path
227
+ codetex add https://github.com/user/repo.git # Remote (clones locally)
228
+ codetex add git@github.com:user/repo.git # SSH remote
229
+ ```
230
+
231
+ ### `codetex index <repo-name>`
232
+
233
+ Build a full index for a registered repository.
234
+
235
+ ```bash
236
+ codetex index my-project # Full index
237
+ codetex index my-project --dry-run # Preview (files, symbols, estimated LLM calls/tokens)
238
+ codetex index my-project --path src/ # Index only files under src/
239
+ ```
240
+
241
+ ### `codetex sync <repo-name>`
242
+
243
+ Incremental sync to the current HEAD. Only files changed since the last indexed commit are re-analyzed.
244
+
245
+ ```bash
246
+ codetex sync my-project # Sync changes
247
+ codetex sync my-project --dry-run # Preview what would change
248
+ codetex sync my-project --path src/ # Sync only changes under src/
249
+ ```
250
+
251
+ ### `codetex context <repo-name>`
252
+
253
+ Query indexed context at any tier.
254
+
255
+ ```bash
256
+ codetex context my-project # Tier 1: repo overview
257
+ codetex context my-project --file src/main.py # Tier 2: file summary
258
+ codetex context my-project --symbol MyClass # Tier 3: symbol detail
259
+ codetex context my-project --query "error handling" # Semantic search
260
+ ```
261
+
262
+ ### `codetex status <repo-name>`
263
+
264
+ Show index status: indexed commit, current HEAD, staleness, file/symbol counts, token usage.
265
+
266
+ ### `codetex list`
267
+
268
+ List all registered repositories with their index status.
269
+
270
+ ### `codetex config show`
271
+
272
+ Display the current configuration.
273
+
274
+ ### `codetex config set <key> <value>`
275
+
276
+ Update a configuration value.
277
+
278
+ ```bash
279
+ codetex config set llm.api_key sk-ant-...
280
+ codetex config set llm.model claude-sonnet-4-5-20250929
281
+ codetex config set indexing.max_file_size_kb 1024
282
+ codetex config set indexing.max_concurrent_llm_calls 10
283
+ ```
284
+
285
+ ## Configuration
286
+
287
+ Configuration is loaded in layers (last wins):
288
+
289
+ 1. **Defaults** — sensible out-of-the-box values
290
+ 2. **TOML file** — `~/.codetex/config.toml`
291
+ 3. **Environment variables** — override everything
292
+
293
+ ### Config file
294
+
295
+ ```toml
296
+ # ~/.codetex/config.toml
297
+
298
+ [storage]
299
+ data_dir = "~/.codetex" # Base directory for DB and cloned repos
300
+
301
+ [llm]
302
+ provider = "anthropic" # LLM provider (currently: anthropic)
303
+ model = "claude-sonnet-4-5-20250929" # Model used for summarization
304
+ api_key = "sk-ant-..." # Anthropic API key
305
+
306
+ [indexing]
307
+ max_file_size_kb = 512 # Skip files larger than this
308
+ max_concurrent_llm_calls = 5 # Parallel LLM requests during indexing
309
+ tier1_rebuild_threshold = 0.10 # Rebuild repo overview if >=10% of files changed on sync
310
+
311
+ [embedding]
312
+ model = "all-MiniLM-L6-v2" # Sentence-transformers model for embeddings
313
+ ```
314
+
315
+ ### Environment variables
316
+
317
+ | Variable | Maps to | Example |
318
+ |----------|---------|---------|
319
+ | `ANTHROPIC_API_KEY` | `llm.api_key` | `sk-ant-...` |
320
+ | `CODETEX_DATA_DIR` | `storage.data_dir` | `/custom/path` |
321
+ | `CODETEX_LLM_PROVIDER` | `llm.provider` | `anthropic` |
322
+ | `CODETEX_LLM_MODEL` | `llm.model` | `claude-sonnet-4-5-20250929` |
323
+ | `CODETEX_MAX_FILE_SIZE_KB` | `indexing.max_file_size_kb` | `1024` |
324
+ | `CODETEX_MAX_CONCURRENT_LLM` | `indexing.max_concurrent_llm_calls` | `10` |
325
+ | `CODETEX_TIER1_THRESHOLD` | `indexing.tier1_rebuild_threshold` | `0.15` |
326
+ | `CODETEX_EMBEDDING_MODEL` | `embedding.model` | `all-MiniLM-L6-v2` |
327
+
328
+ ## File Exclusion
329
+
330
+ Files are filtered through multiple stages:
331
+
332
+ 1. **Default excludes** — `node_modules/`, `__pycache__/`, `.git/`, `dist/`, `build/`, `.venv/`, `*.lock`, `*.min.js`, `*.pyc`, `*.so`, etc.
333
+ 2. **`.gitignore`** — standard gitignore rules from your repo
334
+ 3. **`.codetexignore`** — same syntax as `.gitignore`, placed in your repo root. Use `!pattern` to un-ignore files
335
+ 4. **File size** — files exceeding `max_file_size_kb` are skipped
336
+ 5. **Binary detection** — files with null bytes in the first 8 KB are skipped
337
+
338
+ ## Language Support
339
+
340
+ | Language | Tree-sitter (full AST) | Fallback (regex) |
341
+ |----------|:----------------------:|:-----------------:|
342
+ | Python | Yes | Yes |
343
+ | JavaScript | Yes | Yes |
344
+ | TypeScript | Yes | Yes |
345
+ | Go | Yes | Yes |
346
+ | Rust | Yes | Yes |
347
+ | Java | Yes | Yes |
348
+ | Ruby | Yes | Yes |
349
+ | C/C++ | Yes | Yes |
350
+ | All others | — | Yes |
351
+
352
+ Tree-sitter grammars are optional. The fallback parser uses regex patterns to extract functions, classes, and imports from any language.
353
+
354
+ ## Architecture
355
+
356
+ ```
357
+ CLI (Typer) ──┐
358
+ ├──▶ Core Services (Indexer, Syncer, ContextStore, SearchEngine)
359
+ MCP (FastMCP)─┘ │ │ │
360
+ Analysis LLM Provider Embeddings
361
+ (tree-sitter + (Anthropic) (sentence-transformers)
362
+ regex fallback) │ │
363
+ └──────────────┴──────────────┘
364
+
365
+ SQLite + sqlite-vec
366
+ ```
367
+
368
+ - **Two entry points** (CLI and MCP server) share the same core service layer
369
+ - **No DI framework** — services are wired via a `create_app()` factory
370
+ - **All core services are async** — CLI bridges with `asyncio.run()`
371
+ - **Embeddings are local** — no external API calls for vector search (model auto-downloads on first run, ~90 MB)
372
+ - **Single SQLite database** — 6 main tables + 2 vector tables (384-dimensional embeddings)
373
+
374
+ ## Development
375
+
376
+ ```bash
377
+ git clone https://github.com/mrosata/codetex-mcp.git
378
+ cd codetex-mcp
379
+
380
+ # Install dependencies (including dev)
381
+ uv sync
382
+
383
+ # Run tests
384
+ uv run pytest
385
+
386
+ # Run tests with coverage
387
+ uv run pytest --cov=codetex_mcp
388
+
389
+ # Lint and format
390
+ uv run ruff check src/ tests/
391
+ uv run ruff format src/ tests/
392
+
393
+ # Type check
394
+ uv run mypy src/
395
+ ```
396
+
397
+ ## Releasing
398
+
399
+ Releases are automated via GitHub Actions and [python-semantic-release](https://python-semantic-release.readthedocs.io/). Version bumps are driven by **conventional commit messages** on `main`.
400
+
401
+ ### Commit message format
402
+
403
+ | Prefix | Effect | Example |
404
+ |--------|--------|---------|
405
+ | `fix: ...` | Patch bump (0.1.0 → 0.1.1) | `fix: handle missing gitignore` |
406
+ | `feat: ...` | Minor bump (0.1.0 → 0.2.0) | `feat: add Ruby tree-sitter support` |
407
+ | `feat!: ...` | Major bump (0.1.0 → 1.0.0) | `feat!: redesign context API` |
408
+ | `docs:`, `chore:`, `ci:`, `test:`, `refactor:` | No release | `docs: update README` |
409
+
410
+ A `BREAKING CHANGE:` line in the commit body also triggers a major bump.
411
+
412
+ ### How it works
413
+
414
+ 1. Push or merge a PR to `main`
415
+ 2. CI runs lint, type check, and tests
416
+ 3. The release workflow analyzes commits since the last tag
417
+ 4. If a version bump is needed, it:
418
+ - Updates the version in `pyproject.toml`
419
+ - Creates a git tag (e.g., `v0.2.0`)
420
+ - Publishes a GitHub Release with a changelog
421
+ - Builds and publishes the package to PyPI
422
+
423
+ ### Manual release (not recommended)
424
+
425
+ If you need to release without the automation:
426
+
427
+ ```bash
428
+ uv build
429
+ uv publish
430
+ ```
431
+
432
+ ## License
433
+
434
+ MIT