codebase-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. codebase_mcp-0.1.0/.gitignore +38 -0
  2. codebase_mcp-0.1.0/PKG-INFO +424 -0
  3. codebase_mcp-0.1.0/README.md +385 -0
  4. codebase_mcp-0.1.0/mcp-config-example.json +10 -0
  5. codebase_mcp-0.1.0/pyproject.toml +67 -0
  6. codebase_mcp-0.1.0/src/codebase_mcp/__init__.py +3 -0
  7. codebase_mcp-0.1.0/src/codebase_mcp/__main__.py +524 -0
  8. codebase_mcp-0.1.0/src/codebase_mcp/config.py +211 -0
  9. codebase_mcp-0.1.0/src/codebase_mcp/db.py +541 -0
  10. codebase_mcp-0.1.0/src/codebase_mcp/exporter.py +243 -0
  11. codebase_mcp-0.1.0/src/codebase_mcp/handoff.py +317 -0
  12. codebase_mcp-0.1.0/src/codebase_mcp/indexer.py +415 -0
  13. codebase_mcp-0.1.0/src/codebase_mcp/models.py +46 -0
  14. codebase_mcp-0.1.0/src/codebase_mcp/parsers/__init__.py +15 -0
  15. codebase_mcp-0.1.0/src/codebase_mcp/parsers/base.py +157 -0
  16. codebase_mcp-0.1.0/src/codebase_mcp/parsers/config_parsers.py +462 -0
  17. codebase_mcp-0.1.0/src/codebase_mcp/parsers/generic.py +95 -0
  18. codebase_mcp-0.1.0/src/codebase_mcp/parsers/go.py +222 -0
  19. codebase_mcp-0.1.0/src/codebase_mcp/parsers/python.py +231 -0
  20. codebase_mcp-0.1.0/src/codebase_mcp/parsers/rust.py +205 -0
  21. codebase_mcp-0.1.0/src/codebase_mcp/parsers/typescript.py +303 -0
  22. codebase_mcp-0.1.0/src/codebase_mcp/parsers/universal.py +625 -0
  23. codebase_mcp-0.1.0/src/codebase_mcp/server.py +1291 -0
  24. codebase_mcp-0.1.0/src/codebase_mcp/watcher.py +169 -0
  25. codebase_mcp-0.1.0/src/codebase_mcp/webui.py +611 -0
  26. codebase_mcp-0.1.0/tests/conftest.py +46 -0
  27. codebase_mcp-0.1.0/tests/test_call_graph.py +313 -0
  28. codebase_mcp-0.1.0/tests/test_db.py +386 -0
  29. codebase_mcp-0.1.0/tests/test_exclude.py +124 -0
  30. codebase_mcp-0.1.0/tests/test_indexer.py +266 -0
  31. codebase_mcp-0.1.0/tests/test_parsers.py +346 -0
  32. codebase_mcp-0.1.0/tests/test_scan.py +171 -0
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .eggs/
11
+ *.egg
12
+ *.whl
13
+ .venv/
14
+ venv/
15
+ env/
16
+ pip-wheel-metadata/
17
+
18
+ # Testing
19
+ .pytest_cache/
20
+ .coverage
21
+ htmlcov/
22
+ .tox/
23
+
24
+ # IDE
25
+ .vscode/
26
+ .idea/
27
+ *.swp
28
+ *.swo
29
+
30
+ # Project index (each user builds their own)
31
+ .codebase-mcp/
32
+
33
+ # Exports / handoffs
34
+ exports/
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
@@ -0,0 +1,424 @@
1
+ Metadata-Version: 2.4
2
+ Name: codebase-mcp
3
+ Version: 0.1.0
4
+ Summary: Persistent, portable codebase intelligence MCP server with incremental indexing and decision memory
5
+ License: MIT
6
+ Keywords: agent,ai,codebase,indexer,mcp,tree-sitter
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: click>=8.0
18
+ Requires-Dist: mcp[cli]>=1.0.0
19
+ Requires-Dist: rich>=13.0
20
+ Requires-Dist: tree-sitter-language-pack>=0.1.0
21
+ Requires-Dist: tree-sitter>=0.23.0
22
+ Provides-Extra: all
23
+ Requires-Dist: pyyaml>=6.0; extra == 'all'
24
+ Requires-Dist: watchdog>=4.0; extra == 'all'
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
27
+ Requires-Dist: pytest>=7.0; extra == 'dev'
28
+ Provides-Extra: langs
29
+ Requires-Dist: tree-sitter-go; extra == 'langs'
30
+ Requires-Dist: tree-sitter-javascript; extra == 'langs'
31
+ Requires-Dist: tree-sitter-python; extra == 'langs'
32
+ Requires-Dist: tree-sitter-rust; extra == 'langs'
33
+ Requires-Dist: tree-sitter-typescript; extra == 'langs'
34
+ Provides-Extra: watch
35
+ Requires-Dist: watchdog>=4.0; extra == 'watch'
36
+ Provides-Extra: yaml
37
+ Requires-Dist: pyyaml>=6.0; extra == 'yaml'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # codebase-mcp
41
+
42
+ A local MCP server that gives any AI agent or IDE a persistent, structured understanding of your codebase. It indexes your project once, stores everything in a local SQLite database, and answers structural questions instantly without the agent having to read any files.
43
+
44
+ ---
45
+
46
+ ## The problem it solves
47
+
48
+ Every time you start a new session in Claude Code, Cursor, Cline, or any other AI tool, the agent has to re-read your files to understand the codebase. On large projects this burns thousands of tokens just on orientation, and the agent still only sees a shallow surface. It cannot answer questions like "what calls this function", "what changed since yesterday", or "what decisions were made and why" without reading everything again.
49
+
50
+ codebase-mcp solves this by:
51
+
52
+ - Parsing your code once with tree-sitter (Python, TypeScript, JavaScript, Go, Rust, and 50+ more languages)
53
+ - Storing every function, class, method, import, and call site in a local database
54
+ - Keeping that database up to date incrementally (only changed files are re-parsed)
55
+ - Exposing a set of MCP tools so any agent can query the structure without reading files
56
+ - Persisting architectural decisions, notes, and session history across every agent and IDE you use
57
+
58
+ ---
59
+
60
+ ## How it is different from standard approaches
61
+
62
+ | | Standard approach | codebase-mcp |
63
+ |---|---|---|
64
+ | Codebase understanding | Agent reads files in context window | Pre-indexed, queried via tool calls |
65
+ | Cost per session | Hundreds to thousands of tokens on orientation | Near zero — index is already built |
66
+ | Call graph | Not available | Full caller/callee resolution with 3 strategies |
67
+ | Decisions and notes | Lost when context resets | Stored in database, searchable forever |
68
+ | Switching agents/IDEs | Start over from scratch | Export once, import anywhere |
69
+ | Multi-language | Depends on the agent | 50+ languages via tree-sitter |
70
+ | Incremental updates | Full re-read every time | SHA256-based, only changed files reparsed |
71
+
72
+ The core idea is that the agent should never read source files to understand structure. It should call tools and get structured answers back. Reading files is for when you actually need to see the code, not for orientation.
73
+
74
+ ---
75
+
76
+ ## Installation
77
+
78
+ Requires Python 3.11 or later.
79
+
80
+ ```bash
81
+ pip install git+https://github.com/vatsal2025/CodeBase.git
82
+ ```
83
+
84
+ Or clone and install in editable mode for development:
85
+
86
+ ```bash
87
+ git clone https://github.com/vatsal2025/CodeBase.git
88
+ cd CodeBase
89
+ pip install -e .
90
+ ```
91
+
92
+ ---
93
+
94
+ ## Registering with your IDE or agent
95
+
96
+ Run this once after installation. It writes the MCP server configuration into the config files for every supported tool automatically.
97
+
98
+ ```bash
99
+ codebase-mcp setup
100
+ ```
101
+
102
+ To target a specific tool:
103
+
104
+ ```bash
105
+ codebase-mcp setup --ide claude-code
106
+ codebase-mcp setup --ide cursor
107
+ codebase-mcp setup --ide windsurf
108
+ codebase-mcp setup --ide cline
109
+ codebase-mcp setup --ide zed
110
+ ```
111
+
112
+ For Claude Code global registration (available across all projects):
113
+
114
+ ```bash
115
+ codebase-mcp setup --ide claude-code --global
116
+ ```
117
+
118
+ After setup, restart your IDE or agent. The MCP server named `codebase-intel` will appear in the tool list.
119
+
120
+ ---
121
+
122
+ ## Indexing your project
123
+
124
+ Before the agent can use the tools, you need to build the index. You can do this from the terminal or let the agent do it on first run.
125
+
126
+ From the terminal:
127
+
128
+ ```bash
129
+ cd /path/to/your/project
130
+ codebase-mcp index .
131
+ ```
132
+
133
+ Force a full re-index (ignores hash cache):
134
+
135
+ ```bash
136
+ codebase-mcp index . --full
137
+ ```
138
+
139
+ The index is stored at `.codebase-mcp/index.db` inside your project directory. On typical projects it builds in under 30 seconds.
140
+
141
+ ---
142
+
143
+ ## Tools reference
144
+
145
+ These are the tools the agent has access to. A well-configured agent should call these instead of reading files.
146
+
147
+ ### Session start
148
+
149
+ **session_bootstrap(project_root)**
150
+ Call this at the start of every session. Returns project stats, most-referenced files, active decisions, recent notes, and whether the index needs updating. One call gives the agent a complete orientation with minimal tokens.
151
+
152
+ **what_changed(project_root)**
153
+ Returns what files changed since the last index run, with a diff of added and removed symbols. Use this when returning to a project after a gap.
154
+
155
+ **index_project(project_root, full_reindex)**
156
+ Builds or updates the index. Only changed files are re-parsed. Call this when session_bootstrap reports stale files.
157
+
158
+ **get_index_status(project_root)**
159
+ Check staleness without triggering a re-index.
160
+
161
+ ### Structural queries
162
+
163
+ **search_symbols(query, kind, language, limit)**
164
+ Full-text search across all symbols. Finds functions, classes, methods, structs, interfaces, and traits by name or docstring content. Works across all languages.
165
+
166
+ ```
167
+ search_symbols("authenticate")
168
+ search_symbols("User", kind="class")
169
+ search_symbols("validate", language="typescript")
170
+ ```
171
+
172
+ **get_symbol(qualified_name)**
173
+ Get complete details for one symbol: its signature, docstring, file location, and its full list of callers and callees.
174
+
175
+ ```
176
+ get_symbol("src.auth.jwt.verify_token")
177
+ ```
178
+
179
+ **get_file_outline(path)**
180
+ Get the complete structure of a file — all symbols organized hierarchically (methods grouped under their class), with signatures and line ranges. Does not require reading the file.
181
+
182
+ **get_file_context(path)**
183
+ Everything about a file in one call: its outline, who imports it, decisions linked to it, and notes attached to it.
184
+
185
+ **get_call_graph(qualified_name, depth)**
186
+ Trace callers and callees recursively. Shows exactly what calls a function and what that function calls, across files and languages.
187
+
188
+ **find_references(name)**
189
+ Find every place a symbol is used across the codebase.
190
+
191
+ **search_code(pattern, file_pattern, language)**
192
+ Grep-style search across source files. Returns matching lines with context. Use this when you need to see actual code, not just structure.
193
+
194
+ **find_todos()**
195
+ Returns all TODO, FIXME, HACK, BUG, and NOTE comments in the entire codebase in a single call.
196
+
197
+ **query_symbols_sql(sql)**
198
+ Run a raw SQL query against the symbol database for advanced filtering. Use this for anything the other search tools cannot express.
199
+
200
+ ### Knowledge persistence
201
+
202
+ **add_decision(title, body, category, session_id)**
203
+ Record an architectural decision. Categories: architecture, security, performance, api, database, general. These persist across every session, agent, and IDE.
204
+
205
+ ```
206
+ add_decision(
207
+ title="Use JWT for stateless auth",
208
+ body="Chosen over sessions to support horizontal scaling. HS256 with 1h expiry.",
209
+ category="security"
210
+ )
211
+ ```
212
+
213
+ **search_decisions(query, category, status)**
214
+ Search recorded decisions by keyword, category, or status (active/superseded/deprecated). Always check this at session start to recover context from previous sessions.
215
+
216
+ **update_decision(decision_id, status, body)**
217
+ Mark a decision as superseded or deprecated when the approach changes.
218
+
219
+ **add_note(body, scope, scope_ref)**
220
+ Attach a note to the whole project, a specific file, or a specific symbol. Notes persist and are returned by get_file_context.
221
+
222
+ ```
223
+ add_note("Token refresh logic is intentionally synchronous — see issue #42", scope="file", scope_ref="src/auth/jwt.py")
224
+ ```
225
+
226
+ **get_notes(scope, scope_ref)**
227
+ Retrieve notes for the project, a file, or a symbol.
228
+
229
+ ### Knowledge transfer
230
+
231
+ **export_context(project_root, output)**
232
+ Export decisions, notes, and optionally the full symbol index to a JSON file. Use this before switching agents or onboarding a new team member.
233
+
234
+ **import_context(import_file, project_root)**
235
+ Import an exported context file. Merges decisions and notes into the local database.
236
+
237
+ **create_handoff(project_root, output)**
238
+ Create a complete handoff package: context export plus a human-readable summary of the project state, top files, and active decisions. Use this when switching from one agent to another.
239
+
240
+ **index_github_repo(url)**
241
+ Clone a GitHub repository, index it, and return a bootstrap summary. Use this to explore any open source project without manually cloning.
242
+
243
+ ---
244
+
245
+ ## How to use it to full potential
246
+
247
+ ### At the start of every session
248
+
249
+ The agent should always call `session_bootstrap` first, not read any files. If the index is stale, it should call `index_project` immediately after. Then it should call `search_decisions` to recover context from previous sessions.
250
+
251
+ A good agent prompt to enforce this:
252
+
253
+ ```
254
+ Before doing anything else in this project:
255
+ 1. Call session_bootstrap to orient yourself
256
+ 2. If index_stale is true, call index_project
257
+ 3. Call search_decisions to review past decisions
258
+ 4. Never read a source file to understand structure — use search_symbols, get_file_outline, or get_call_graph instead
259
+ ```
260
+
261
+ ### Recording decisions as you work
262
+
263
+ Every significant decision made during a session should be recorded immediately with `add_decision`. This is the most important habit. When you or a future agent returns to the project, `search_decisions` recovers this context in one call instead of re-deriving it from reading code.
264
+
265
+ What is worth recording:
266
+ - Why a library or framework was chosen
267
+ - Why a design pattern was picked over alternatives
268
+ - Security constraints or compliance requirements
269
+ - Non-obvious performance decisions
270
+ - Anything that would take more than 5 minutes to figure out from reading the code
271
+
272
+ ### Using the call graph
273
+
274
+ Before modifying a function, call `get_symbol` with its qualified name to see its callers. This tells you the blast radius of any change without reading files. `get_call_graph` with depth > 1 traces multi-level dependencies.
275
+
276
+ ### Watching for changes
277
+
278
+ If you run the watcher, the index stays current automatically:
279
+
280
+ ```bash
281
+ codebase-mcp serve --watch
282
+ ```
283
+
284
+ From within a session, you can also start it via the tool:
285
+
286
+ ```
287
+ start_file_watcher(project_root="...")
288
+ ```
289
+
290
+ ---
291
+
292
+ ## Transferring knowledge when switching platforms
293
+
294
+ The index and all knowledge (decisions, notes, session history) live in `.codebase-mcp/index.db` inside your project directory. There are three ways to transfer this to another platform or agent.
295
+
296
+ ### Option 1: Commit the database to git
297
+
298
+ Add the `.codebase-mcp/` directory to git instead of ignoring it. Anyone who clones the repository gets the full index, all decisions, and all notes immediately. No re-indexing required.
299
+
300
+ Remove the exclusion from your `.gitignore`:
301
+
302
+ ```
303
+ # Remove or comment out this line:
304
+ # .codebase-mcp/
305
+ ```
306
+
307
+ Then commit:
308
+
309
+ ```bash
310
+ git add .codebase-mcp/index.db
311
+ git commit -m "Add codebase index and decision log"
312
+ ```
313
+
314
+ This is the recommended approach for teams. New developers get the full context on clone.
315
+
316
+ ### Option 2: Export and import
317
+
318
+ Export from the source machine:
319
+
320
+ ```bash
321
+ codebase-mcp export . --output context.json
322
+ ```
323
+
324
+ Import on the destination:
325
+
326
+ ```bash
327
+ codebase-mcp import context.json /path/to/project
328
+ ```
329
+
330
+ The export includes decisions, notes, and optionally the full symbol index. You can share it as a file attachment, a gist, or through any file transfer mechanism.
331
+
332
+ The agent can also do this directly:
333
+
334
+ ```
335
+ export_context(project_root="/path/to/project", output="context.json")
336
+ import_context(import_file="context.json", project_root="/path/to/project")
337
+ ```
338
+
339
+ ### Option 3: Create a handoff package
340
+
341
+ When switching from one agent or IDE to another mid-session:
342
+
343
+ ```bash
344
+ codebase-mcp handoff . --output handoff/
345
+ ```
346
+
347
+ Or via tool:
348
+
349
+ ```
350
+ create_handoff(project_root="...")
351
+ ```
352
+
353
+ The handoff includes the export JSON plus a written summary of current project state, active decisions, and recent changes. Give this to the new agent at session start.
354
+
355
+ ### What transfers and what does not
356
+
357
+ | Data | Transfers | Notes |
358
+ |---|---|---|
359
+ | Decisions | Yes | All statuses |
360
+ | Notes | Yes | All scopes |
361
+ | Symbol index | Optional | Rebuilt automatically by index_project |
362
+ | Call graph | Rebuilt from index | Run index_project after import |
363
+ | Session history | No | Sessions are local only |
364
+
365
+ ---
366
+
367
+ ## Configuration
368
+
369
+ The config file lives at `.codebase-mcp/config.json` inside your project. It is created automatically on first index with sensible defaults.
370
+
371
+ Key settings:
372
+
373
+ ```json
374
+ {
375
+ "project_root": "/path/to/project",
376
+ "exclude_patterns": [
377
+ "**/.git/**",
378
+ "**/node_modules/**",
379
+ "**/__pycache__/**",
380
+ "**/dist/**",
381
+ "**/build/**",
382
+ "**/*.min.js",
383
+ "**/*.map"
384
+ ],
385
+ "max_file_size_kb": 500,
386
+ "include_extensions": []
387
+ }
388
+ ```
389
+
390
+ `exclude_patterns` accepts standard glob patterns. `include_extensions` restricts indexing to specific file types if set.
391
+
392
+ ---
393
+
394
+ ## Supported languages
395
+
396
+ Full tree-sitter parsing (functions, classes, methods, imports, call graph):
397
+
398
+ - Python
399
+ - TypeScript and JavaScript (including JSX/TSX)
400
+ - Go
401
+ - Rust
402
+
403
+ Universal parser (symbols and structure, no call graph):
404
+
405
+ - Java, Kotlin, Swift, C, C++, C#, Ruby, PHP, Scala, Dart, Lua, Bash, SQL, HTML, CSS, YAML, TOML, JSON, Dockerfile, Makefile, and 30+ more via tree-sitter-language-pack
406
+
407
+ ---
408
+
409
+ ## Development
410
+
411
+ ```bash
412
+ git clone https://github.com/vatsal2025/CodeBase.git
413
+ cd CodeBase
414
+ pip install -e ".[dev]"
415
+ pytest tests/
416
+ ```
417
+
418
+ All 147 tests must pass before submitting changes.
419
+
420
+ ---
421
+
422
+ ## License
423
+
424
+ MIT