projectmind-mcp 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. projectmind_mcp-0.9.0/PKG-INFO +399 -0
  2. projectmind_mcp-0.9.0/README.md +351 -0
  3. projectmind_mcp-0.9.0/annotations.py +315 -0
  4. projectmind_mcp-0.9.0/ast_splitter.py +382 -0
  5. projectmind_mcp-0.9.0/background_indexer.py +523 -0
  6. projectmind_mcp-0.9.0/bm25_index.py +221 -0
  7. projectmind_mcp-0.9.0/cache_manager.py +286 -0
  8. projectmind_mcp-0.9.0/code_intelligence.py +1551 -0
  9. projectmind_mcp-0.9.0/codebase_indexer.py +440 -0
  10. projectmind_mcp-0.9.0/config.py +421 -0
  11. projectmind_mcp-0.9.0/context.py +147 -0
  12. projectmind_mcp-0.9.0/exceptions.py +49 -0
  13. projectmind_mcp-0.9.0/git_utils.py +161 -0
  14. projectmind_mcp-0.9.0/incremental_indexing.py +158 -0
  15. projectmind_mcp-0.9.0/logger.py +119 -0
  16. projectmind_mcp-0.9.0/maintenance.py +560 -0
  17. projectmind_mcp-0.9.0/manifest.py +495 -0
  18. projectmind_mcp-0.9.0/mcp_server.py +3477 -0
  19. projectmind_mcp-0.9.0/memory_limited_indexer.py +112 -0
  20. projectmind_mcp-0.9.0/memory_manager.py +413 -0
  21. projectmind_mcp-0.9.0/projectmind_mcp.egg-info/PKG-INFO +399 -0
  22. projectmind_mcp-0.9.0/projectmind_mcp.egg-info/SOURCES.txt +53 -0
  23. projectmind_mcp-0.9.0/projectmind_mcp.egg-info/dependency_links.txt +1 -0
  24. projectmind_mcp-0.9.0/projectmind_mcp.egg-info/entry_points.txt +3 -0
  25. projectmind_mcp-0.9.0/projectmind_mcp.egg-info/requires.txt +28 -0
  26. projectmind_mcp-0.9.0/projectmind_mcp.egg-info/top_level.txt +21 -0
  27. projectmind_mcp-0.9.0/pyproject.toml +195 -0
  28. projectmind_mcp-0.9.0/query_router.py +417 -0
  29. projectmind_mcp-0.9.0/setup.cfg +4 -0
  30. projectmind_mcp-0.9.0/symbol_graph.py +1345 -0
  31. projectmind_mcp-0.9.0/tests/test_annotations_and_bm25_only.py +190 -0
  32. projectmind_mcp-0.9.0/tests/test_cache_manager.py +242 -0
  33. projectmind_mcp-0.9.0/tests/test_config.py +137 -0
  34. projectmind_mcp-0.9.0/tests/test_context.py +111 -0
  35. projectmind_mcp-0.9.0/tests/test_git_utils.py +173 -0
  36. projectmind_mcp-0.9.0/tests/test_import_graph.py +148 -0
  37. projectmind_mcp-0.9.0/tests/test_import_graph_cap.py +63 -0
  38. projectmind_mcp-0.9.0/tests/test_incremental_indexing.py +198 -0
  39. projectmind_mcp-0.9.0/tests/test_indexing_limit.py +37 -0
  40. projectmind_mcp-0.9.0/tests/test_logging.py +110 -0
  41. projectmind_mcp-0.9.0/tests/test_manifest_symbols.py +49 -0
  42. projectmind_mcp-0.9.0/tests/test_mcp_tools.py +303 -0
  43. projectmind_mcp-0.9.0/tests/test_memory_limited_indexer.py +201 -0
  44. projectmind_mcp-0.9.0/tests/test_memory_limits.py +238 -0
  45. projectmind_mcp-0.9.0/tests/test_memory_search.py +94 -0
  46. projectmind_mcp-0.9.0/tests/test_path_validation.py +93 -0
  47. projectmind_mcp-0.9.0/tests/test_python_resolution.py +93 -0
  48. projectmind_mcp-0.9.0/tests/test_query_router_fusion.py +74 -0
  49. projectmind_mcp-0.9.0/tests/test_regression_fixes.py +286 -0
  50. projectmind_mcp-0.9.0/tests/test_search.py +243 -0
  51. projectmind_mcp-0.9.0/tests/test_stdio.py +85 -0
  52. projectmind_mcp-0.9.0/tests/test_tool_budget.py +74 -0
  53. projectmind_mcp-0.9.0/tests/test_transactional_save.py +208 -0
  54. projectmind_mcp-0.9.0/tests/test_unicode_handling.py +119 -0
  55. projectmind_mcp-0.9.0/vector_store_manager.py +528 -0
@@ -0,0 +1,399 @@
1
+ Metadata-Version: 2.4
2
+ Name: projectmind-mcp
3
+ Version: 0.9.0
4
+ Summary: Give your AI coding assistant a brain: persistent memory, hybrid code search, AI-authored annotations, and an AST symbol graph — 100% local MCP server, no API keys.
5
+ Author: Mykola Riabokon
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Nik0lay1/project-mind-mcp
8
+ Project-URL: Repository, https://github.com/Nik0lay1/project-mind-mcp
9
+ Project-URL: Changelog, https://github.com/Nik0lay1/project-mind-mcp/blob/main/CHANGELOG.md
10
+ Project-URL: Issues, https://github.com/Nik0lay1/project-mind-mcp/issues
11
+ Keywords: mcp,model-context-protocol,code-search,claude,ai-memory,bm25,symbol-graph
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Software Development
20
+ Requires-Python: <3.13,>=3.10
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: mcp>=0.1.0
23
+ Requires-Dist: langchain-text-splitters>=0.0.1
24
+ Requires-Dist: GitPython>=3.1.0
25
+ Requires-Dist: radon>=6.0.0
26
+ Requires-Dist: pylint>=3.0.0
27
+ Requires-Dist: tree-sitter>=0.25.0
28
+ Requires-Dist: tree-sitter-python>=0.25.0
29
+ Requires-Dist: tree-sitter-javascript>=0.25.0
30
+ Requires-Dist: tree-sitter-typescript>=0.23.0
31
+ Requires-Dist: tree-sitter-java>=0.23.0
32
+ Requires-Dist: tree-sitter-go>=0.25.0
33
+ Requires-Dist: tree-sitter-rust>=0.24.0
34
+ Requires-Dist: tree-sitter-ruby>=0.23.0
35
+ Requires-Dist: rank-bm25>=0.2.2
36
+ Provides-Extra: vector
37
+ Requires-Dist: chromadb>=1.0.0; extra == "vector"
38
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "vector"
39
+ Requires-Dist: numpy<2.0,>=1.26; extra == "vector"
40
+ Provides-Extra: dev
41
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
42
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
43
+ Requires-Dist: pytest-timeout>=2.0.0; extra == "dev"
44
+ Requires-Dist: black>=23.0.0; extra == "dev"
45
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
46
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
47
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
48
+
49
+ # ProjectMind MCP
50
+
51
+ > Give your AI coding assistant a brain. Persistent memory, semantic code search, a symbol-level call graph, and project intelligence — all running locally with no API keys required.
52
+
53
+ [![CI](https://github.com/Nik0lay1/project-mind-mcp/actions/workflows/ci.yml/badge.svg)](https://github.com/Nik0lay1/project-mind-mcp/actions/workflows/ci.yml)
54
+ ![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)
55
+ ![License](https://img.shields.io/badge/license-MIT-green)
56
+ ![Version](https://img.shields.io/badge/version-0.9.0-orange)
57
+ [![PyPI](https://img.shields.io/pypi/v/projectmind-mcp)](https://pypi.org/project/projectmind-mcp/)
58
+
59
+ **ProjectMind** is an open-source [MCP (Model Context Protocol)](https://modelcontextprotocol.io) server that supercharges AI assistants like Claude, Zencoder, and Cursor with long-term project memory and intelligent codebase search.
60
+
61
+ > 🤖 **This project was built with AI** — designed, coded, debugged, and documented using AI-assisted development from day one.
62
+
63
+ ---
64
+
65
+ ## Why ProjectMind?
66
+
67
+ Every time you start a new AI session, your assistant forgets everything about your project. ProjectMind solves this:
68
+
69
+ - **No more re-explaining** your architecture every session — memory persists in `.ai/memory.md`
70
+ - **Semantic code search** that understands *what* code does, not just *what* it's named
71
+ - **Symbol graph** — ask "who calls this function?" and get exact file:line answers
72
+ - **Dependency graph analysis** to understand how modules connect
73
+ - **Works 100% locally** — your code never leaves your machine
74
+
75
+ **New in v0.9.0**: ✍️ AI-authored annotations (semantic search without embeddings), 📦 lightweight core with the vector stack as an optional `[vector]` extra, 🚀 one-line install from [PyPI](https://pypi.org/project/projectmind-mcp/).
76
+
77
+ 📝 See [CHANGELOG.md](CHANGELOG.md) for the full release history.
78
+
79
+ ---
80
+
81
+ ## Features
82
+
83
+ ### 🧠 Persistent Project Memory
84
+ Save architectural decisions, tech stack notes, and context that survives across sessions. The AI reads this at the start of every conversation.
85
+
86
+ Hierarchical memory access avoids dumping everything at once:
87
+ - `read_memory_index()` — section headings only (cheap)
88
+ - `read_memory_section(name)` — expand only the section you need
89
+ - `search_memory(query)` — **relevance-ranked** retrieval of the memory blocks most relevant to a task (keyword-scored), not just the head of the file
90
+
91
+ All memory I/O is UTF-8 — notes in any language survive round-trips on any OS.
92
+
93
+ ### 🪜 Tiered Hierarchical Search
94
+ Queries escalate through tiers only when the previous one is insufficient — large repositories never trigger a cold load just to answer a path lookup.
95
+
96
+ | Tier | Engine | When used | Typical latency |
97
+ |---|---|---|---|
98
+ | **L0 Manifest** | `.ai/manifest.json` (paths + whole-file symbols) | always | < 50 ms |
99
+ | **L0 Annotations** | `.ai/annotations.json` (AI-written summaries) | always | < 5 ms |
100
+ | **L1 Symbol** | `.ai/symbol_graph.json` (AST symbol names) | when a prebuilt graph exists | < 10 ms |
101
+ | **L1 BM25** | `rank-bm25` lexical index (chunks + annotations) | when L0 is weak | ~ 100 ms |
102
+ | **L2 Vector** | ChromaDB + sentence-transformers (**optional** `[vector]` extra) | only on `intent='semantic'/'deep'` | first call ~ 30 s, then cached |
103
+
104
+ Use the unified `query(text, intent, n_results)` tool with one of:
105
+ - `overview` — L0 only (paths + top-level symbols)
106
+ - `lookup` — L0 + symbols + BM25 (keyword/lexical)
107
+ - `semantic` — escalates to embeddings if the lexical signal is weak
108
+ - `deep` — all tiers with relaxed thresholds
109
+
110
+ Hits from every tier are fused with **scale-invariant Reciprocal Rank Fusion** (normalized to 0..1), so no single tier can dominate the ranking by raw score magnitude — results corroborated across tiers float to the top.
111
+
112
+ ### ✍️ AI-Authored Annotations — semantic search without embeddings
113
+ The killer pattern: your AI assistant *already understands your code* — so let it write the search index. As the assistant works on files, it saves 1-2 sentence summaries + keywords via `save_annotation()`. Annotations are indexed into BM25 and a dedicated query tier, so natural-language queries land on the right files with plain keyword search:
114
+
115
+ ```
116
+ save_annotation("src/auth/session.py",
117
+ "Session lifecycle: creates, refreshes and revokes OAuth sessions",
118
+ keywords="login, oauth, token refresh")
119
+
120
+ query("where are sessions revoked?") → src/auth/session.py (L0_annot tier)
121
+ ```
122
+
123
+ - `list_unannotated_files()` — coverage report: files missing or with stale annotations (file changed since)
124
+ - `get_annotations(path)` — review what the index "knows" about a file
125
+ - Stored in human-readable `.ai/annotations.json`, UTF-8, atomic writes
126
+ - This is why the vector stack is now optional: a smart model + cheap precise tools beats a small embedding model guessing
127
+
128
+ ### 🧬 Symbol Graph (AST-level call/inherit/implement graph)
129
+ tree-sitter parses every source file into a graph of *symbols*, not files. Symbols are keyed by qualified id (`path::Class.method`), so same-named symbols across files stay distinct.
130
+
131
+ - `find_symbol("UserService")` — where is it defined? (no embedding model needed, answers in milliseconds)
132
+ - `get_symbol_relations("save", relation="callers")` — who calls it, with file:line
133
+ - Relations: `callers`, `callees`, `implementors`, `subclasses`, `bases`, `usages`, `info`
134
+ - Inheritance extraction works across **Python, JS, TS, Java, Ruby**; builtin-call noise (`print`, `len`, `push`…) is filtered out
135
+ - The graph is rebuilt automatically by the background indexer and persisted to `.ai/symbol_graph.json`
136
+
137
+ ### 🔍 Semantic Code Search
138
+ Search your codebase by *meaning*, not just text. Powered by a local `sentence-transformers` model — no OpenAI key needed.
139
+
140
+ ```
141
+ "find authentication middleware" → finds auth code even if it's named differently
142
+ ```
143
+
144
+ ### 🌳 AST-Aware Chunking
145
+ Unlike naive text splitters that cut code in the middle of a function, ProjectMind uses [tree-sitter](https://tree-sitter.github.io) to parse source files into **exact syntax units**:
146
+
147
+ - Functions and methods are indexed as individual, self-contained chunks
148
+ - Class methods get a `# Class: ClassName` context prefix for better search relevance
149
+ - Rich metadata per chunk: `symbol_type`, `symbol_name`, `class_name`, `line_start`, `line_end`
150
+ - Supports: **Python, JavaScript, TypeScript, TSX, Java, Go, Rust, Ruby**
151
+ - Graceful fallback to text splitting for unsupported file types
152
+
153
+ ### 🕸 Dependency Graph Intelligence
154
+ - Traverse import relationships up to 5 levels deep
155
+ - Find related files via shared dependency clustering
156
+ - Discover the shortest path between any two modules
157
+ - Identify entry points and orphaned modules
158
+ - **Monorepo-aware JS/TS resolution** — follows `tsconfig`/`jsconfig` path aliases (`@/...`) and workspace/package imports; multi-line, dynamic `import()` and side-effect imports are all detected
159
+ - **Python `src/`-layout & relative-import resolution** — absolute imports resolve through `src/`/`lib/` roots, dotted (`utils.helpers`) and relative (`.`/`..`) imports resolve to the right files
160
+ - **Cached import graph** (120 s TTL) with a precomputed reverse graph — impact/cluster analysis is O(E), not O(N²·E)
161
+
162
+ ### ⚡ Instant Project Exploration (no indexing needed)
163
+ - `get_project_overview()` — manifest-first; tech stack, git info, file stats in < 1 second
164
+ - `explore_directory(path)` — browse project tree level by level
165
+ - `get_file_summary(path)` — imports, classes, functions, git history
166
+
167
+ ### ⚡ Hybrid Search (BM25 + Vector)
168
+ Two search engines combined via **Reciprocal Rank Fusion (RRF)**:
169
+ - **BM25** catches exact keyword matches — finds `getUserById` when you type exactly that
170
+ - **Vector search** catches semantic matches — finds auth code even if named differently
171
+ - RRF merges both ranked lists for best-of-both-worlds results
172
+ - Automatic fallback to pure vector search when the BM25 index is not ready
173
+ - The BM25 index is persisted as **JSON** (`.ai/bm25_index.json`) — never pickle, so indexing a third-party repo can't execute untrusted input
174
+
175
+ ### 🔄 True Incremental Indexing
176
+ `index_changed_files` re-indexes only what changed — and cleans up after itself:
177
+ - A changed file's **old chunks are deleted** before the new ones land (renamed/removed symbols don't haunt search results)
178
+ - Chunks of **deleted files** are removed from ChromaDB, BM25 and the metadata
179
+ - BM25 is patched **in memory** per file and rebuilt once — no full-database fetch per small change
180
+ - Failed writes are never silent: metadata isn't saved, so affected files retry on the next run
181
+
182
+ ### 🏃 Background Indexing
183
+ `index_codebase()` returns instantly and indexes in a daemon thread; `session_init` auto-starts it when the index is missing. Poll `get_index_progress()` for a live progress bar with ETA. Switching projects mid-run cancels the old job safely — no cross-project contamination.
184
+
185
+ ### 🩺 Self-Healing Maintenance Daemon
186
+ A background thread keeps the index lean without user intervention. State persists in `.ai/maintenance_state.json`.
187
+
188
+ | Task | Trigger | Action |
189
+ |---|---|---|
190
+ | `manifest_refresh` | every 5 min | rebuild L0 manifest if files changed |
191
+ | `stale_gc` | hourly | delete embeddings for files removed from disk |
192
+ | `db_compaction` | daily | `VACUUM` ChromaDB SQLite when > 200 MB |
193
+ | `log_truncate` | every 6 h | truncate `projectmind.log` when > 8 MB |
194
+ | `model_unload` | every 5 min | release `sentence-transformers` after 1 h idle (env-tunable) |
195
+ | `cache_pressure` | every minute | drop file/query caches when RSS > 500 MB |
196
+
197
+ Inspect with `maintenance_status()`; force a sync run with `maintenance_run()`; aggressively clean the index with `prune_index(force=True)`.
198
+
199
+ ### 📊 Code Quality Metrics
200
+ Cyclomatic complexity, pylint scores, test coverage tracking — all queryable via MCP tools.
201
+ Both `analyze_code_complexity` and `analyze_code_quality` accept `mode='quick'` (default, fast) or `mode='deep'` (wider scan). pylint runs in a subprocess with a wall-clock budget, so these tools return partial results instead of timing out.
202
+
203
+ ### ⚡ Lazy `session_init` (no 30 s timeouts)
204
+ `session_init` never loads the embedding model; it returns the project root + manifest + memory index in well under a second even on multi-GB repositories. The vector store is loaded only when an `intent='semantic'` or `'deep'` query actually needs it.
205
+
206
+ ---
207
+
208
+ ## Quick Start
209
+
210
+ ### One-liner (recommended)
211
+
212
+ ```bash
213
+ # Claude Code
214
+ claude mcp add --scope user Memory -- uvx projectmind-mcp
215
+ ```
216
+
217
+ The default install is **lightweight** (a few MB — BM25 keyword search, AI annotations, symbol graph, memory). To add the optional embedding/vector tier (~500 MB, fully local):
218
+
219
+ ```bash
220
+ claude mcp add --scope user Memory -- uvx --from "projectmind-mcp[vector]" projectmind-mcp
221
+ ```
222
+
223
+ **Other MCP clients (Claude Desktop / Zencoder / Cursor)** — `mcp.json`:
224
+
225
+ ```json
226
+ {
227
+ "mcpServers": {
228
+ "Memory": {
229
+ "command": "uvx",
230
+ "args": ["projectmind-mcp"]
231
+ }
232
+ }
233
+ }
234
+ ```
235
+
236
+ ### From source (development)
237
+
238
+ ```bash
239
+ git clone https://github.com/Nik0lay1/project-mind-mcp.git
240
+ cd project-mind-mcp
241
+ python -m venv .venv
242
+
243
+ # Windows # macOS/Linux
244
+ .venv\Scripts\pip install -e ".[vector,dev]" # .venv/bin/pip install -e ".[vector,dev]"
245
+ ```
246
+
247
+ Then point your MCP client at `.venv/Scripts/python.exe mcp_server.py` (or use the `projectmind-mcp` console script from the venv).
248
+
249
+ ### 3. Bootstrap a session
250
+
251
+ Ask the AI (once per session):
252
+ ```
253
+ Memory__session_init(project_path="<absolute path to your project>")
254
+ ```
255
+
256
+ If the index doesn't exist yet, background indexing starts automatically — check `Memory__get_index_progress()`. For manual full re-indexing of large projects:
257
+
258
+ ```bash
259
+ # Windows
260
+ .venv\Scripts\python.exe run_index.py
261
+
262
+ # macOS/Linux
263
+ .venv/bin/python run_index.py
264
+ ```
265
+
266
+ ---
267
+
268
+ ## Available Tools (45+)
269
+
270
+ | Category | Tools |
271
+ |---|---|
272
+ | **Session** | `session_init`, `health`, `set_project_root` |
273
+ | **Memory** | `read_memory`, `read_memory_index`, `read_memory_section`, `search_memory`, `update_memory`, `clear_memory`, `save_memory_version` |
274
+ | **Search** | `query` (tier-aware), `search_codebase`, `search_for_feature`, `search_architecture`, `search_for_errors`, `search_with_dependencies` |
275
+ | **Annotations** | `save_annotation`, `get_annotations`, `list_unannotated_files` |
276
+ | **Symbols** | `find_symbol`, `get_symbol_relations` (callers / callees / implementors / subclasses / bases / usages) |
277
+ | **Exploration** | `get_project_overview`, `explore_directory`, `get_file_summary` |
278
+ | **Dependencies** | `get_file_relations`, `get_dependencies_with_depth`, `get_module_cluster`, `find_dependency_path`, `analyze_change_impact` |
279
+ | **Indexing** | `index_codebase` (background by default), `index_changed_files`, `get_index_progress`, `get_index_stats`, `prune_index` |
280
+ | **Git** | `ingest_git_history`, `get_recent_changes_summary`, `auto_update_memory_from_commits` |
281
+ | **Quality** | `analyze_code_complexity`, `analyze_code_quality`, `get_test_coverage_info` |
282
+ | **Maintenance** | `maintenance_status`, `maintenance_run` |
283
+ | **Project** | `detect_project_conventions`, `generate_project_summary` |
284
+
285
+ Full reference: [docs/api/tools-reference.md](docs/api/tools-reference.md)
286
+
287
+ ---
288
+
289
+ ## How It Works
290
+
291
+ ```
292
+ Your Project
293
+
294
+
295
+ ProjectMind MCP Server
296
+
297
+ ├── .ai/memory.md ← persistent notes & decisions (UTF-8)
298
+ ├── .ai/annotations.json ← AI-written file summaries (search tier)
299
+ ├── .ai/manifest.json ← L0: paths, symbols, modules (≤200 KB)
300
+ ├── .ai/symbol_graph.json ← L1: AST call/inherit/implement graph
301
+ ├── .ai/bm25_index.json ← L1: lexical index (JSON, not pickle)
302
+ ├── .ai/vector_store/ ← L2: ChromaDB embeddings (local)
303
+ ├── .ai/index_metadata.json ← tracks changed files (mtime)
304
+ ├── .ai/index_progress.json ← live background-indexing progress
305
+ ├── .ai/maintenance_state.json ← self-healing daemon schedule
306
+ └── .ai/.indexignore ← per-project ignore patterns
307
+
308
+
309
+ AI Assistant (Claude / Zencoder / Cursor)
310
+ ```
311
+
312
+ **Embedding model**: `flax-sentence-embeddings/st-codesearch-distilroberta-base`
313
+ - Trained specifically on code (CodeSearchNet dataset)
314
+ - ~130 MB, runs fully locally on CPU
315
+ - No API keys, no data sent anywhere
316
+
317
+ **Search pipeline**: manifest + symbol graph + BM25 (keyword) + ChromaDB (semantic) → Reciprocal Rank Fusion → top-N results
318
+
319
+ ---
320
+
321
+ ## Requirements
322
+
323
+ - Python 3.10 – 3.12
324
+ - ~500 MB disk (model + dependencies)
325
+ - Works on Windows, macOS, Linux
326
+
327
+ ---
328
+
329
+ ## Configuration
330
+
331
+ All settings in `config.py`:
332
+
333
+ | Setting | Default | Description |
334
+ |---|---|---|
335
+ | `MODEL_NAME` | `flax-sentence-embeddings/st-codesearch-distilroberta-base` | Embedding model |
336
+ | `CHUNK_SIZE` | `1500` | Characters per chunk |
337
+ | `MAX_FILE_SIZE_MB` | `10` | Skip files larger than this |
338
+ | `MAX_MEMORY_MB` | `100` | Memory limit for indexing batch |
339
+ | `IMPORT_GRAPH_MAX_FILES` | `8000` | Max files scanned when building the import graph |
340
+ | `TOOL_SOFT_BUDGET_SECONDS` | `20` | Wall-clock budget for analysis tools — they return partial results instead of timing out |
341
+
342
+ Override via environment variables:
343
+ ```bash
344
+ PROJECTMIND_MAX_FILE_SIZE_MB=5
345
+ PROJECTMIND_MAX_MEMORY_MB=200
346
+ PROJECTMIND_IMPORT_GRAPH_MAX_FILES=20000
347
+ PROJECTMIND_SYMBOL_GRAPH_MAX_FILES=8000
348
+ PROJECTMIND_TOOL_BUDGET_SECONDS=45
349
+ PROJECTMIND_MODEL_IDLE_UNLOAD_SECONDS=3600
350
+ ```
351
+
352
+ Custom ignore patterns: create `.indexignore` in the project root (fallback: `.ai/.indexignore`), same substring syntax as shown in the generated default.
353
+
354
+ ---
355
+
356
+ ## Project Structure
357
+
358
+ ```
359
+ mcp_server.py ← all MCP tool definitions
360
+ config.py ← configuration + path validation
361
+ manifest.py ← L0 lightweight project manifest
362
+ symbol_graph.py ← AST symbol graph (call/inherit/implement), format v3
363
+ query_router.py ← tier-aware query() router (L0 → L1_symbol → L1 → L2)
364
+ maintenance.py ← self-healing background daemon
365
+ background_indexer.py ← non-blocking indexing with live progress
366
+ vector_store_manager.py ← ChromaDB wrapper + hybrid search (L2)
367
+ bm25_index.py ← BM25 keyword index + RRF fusion (L1, JSON persistence)
368
+ codebase_indexer.py ← file scanning & AST-aware chunking
369
+ ast_splitter.py ← tree-sitter parser (9 languages, thread-safe)
370
+ code_intelligence.py ← import graph, complexity analysis, cached graphs
371
+ memory_manager.py ← persistent memory read/write (UTF-8, atomic)
372
+ incremental_indexing.py ← change tracking (mtime) + atomic writes
373
+ context.py ← dependency injection (thread-safe lazy init)
374
+ run_index.py ← helper script for manual re-indexing
375
+ ```
376
+
377
+ ---
378
+
379
+ ## Contributing
380
+
381
+ Issues and PRs are welcome. This is an open project — built in the open, improved in the open.
382
+
383
+ ```bash
384
+ pip install -e ".[dev]"
385
+ pytest tests/
386
+ ruff check .
387
+ ```
388
+
389
+ CI runs ruff, black, mypy and the unit suite on Python 3.10–3.12 (see [.github/workflows/ci.yml](.github/workflows/ci.yml)).
390
+
391
+ ---
392
+
393
+ ## License
394
+
395
+ MIT
396
+
397
+ ---
398
+
399
+ *Built with AI assistance — used throughout development for coding, debugging, refactoring, and documentation.*