codespine 0.5.2__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codespine-0.5.4/PKG-INFO +270 -0
- codespine-0.5.4/README.md +206 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/__init__.py +1 -1
- codespine-0.5.4/codespine/analysis/community.py +182 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/analysis/coupling.py +8 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/analysis/crossmodule.py +10 -1
- {codespine-0.5.2 → codespine-0.5.4}/codespine/analysis/deadcode.py +66 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/analysis/flow.py +11 -2
- {codespine-0.5.2 → codespine-0.5.4}/codespine/cli.py +46 -16
- {codespine-0.5.2 → codespine-0.5.4}/codespine/db/store.py +54 -33
- codespine-0.5.4/codespine.egg-info/PKG-INFO +270 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine.egg-info/SOURCES.txt +2 -0
- {codespine-0.5.2 → codespine-0.5.4}/pyproject.toml +1 -1
- codespine-0.5.4/tests/test_community_detection.py +44 -0
- codespine-0.5.4/tests/test_deadcode.py +52 -0
- codespine-0.5.2/PKG-INFO +0 -333
- codespine-0.5.2/README.md +0 -269
- codespine-0.5.2/codespine/analysis/community.py +0 -75
- codespine-0.5.2/codespine.egg-info/PKG-INFO +0 -333
- {codespine-0.5.2 → codespine-0.5.4}/LICENSE +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/analysis/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/analysis/context.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/analysis/impact.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/config.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/db/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/db/schema.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/diff/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/indexer/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/indexer/engine.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/mcp/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/mcp/server.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/noise/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/noise/blocklist.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/search/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/search/bm25.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/search/fuzzy.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/search/hybrid.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/search/rrf.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/search/vector.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/watch/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine/watch/watcher.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/gindex.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/setup.cfg +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/tests/test_call_resolver.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/tests/test_java_parser.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/tests/test_multimodule_index.py +0 -0
- {codespine-0.5.2 → codespine-0.5.4}/tests/test_search_ranking.py +0 -0
codespine-0.5.4/PKG-INFO
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codespine
|
|
3
|
+
Version: 0.5.4
|
|
4
|
+
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
|
+
Author: CodeSpine contributors
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 CodeSpine contributors
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/vinayak3022/codeSpine
|
|
29
|
+
Project-URL: Repository, https://github.com/vinayak3022/codeSpine
|
|
30
|
+
Project-URL: Issues, https://github.com/vinayak3022/codeSpine/issues
|
|
31
|
+
Keywords: java,code-indexing,graph,kuzu,mcp
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
40
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
41
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: click
|
|
46
|
+
Requires-Dist: kuzu
|
|
47
|
+
Requires-Dist: tree-sitter
|
|
48
|
+
Requires-Dist: tree-sitter-java
|
|
49
|
+
Requires-Dist: fastmcp>=2.3.0
|
|
50
|
+
Requires-Dist: psutil
|
|
51
|
+
Requires-Dist: watchfiles
|
|
52
|
+
Provides-Extra: ml
|
|
53
|
+
Requires-Dist: sentence-transformers; extra == "ml"
|
|
54
|
+
Requires-Dist: numpy; extra == "ml"
|
|
55
|
+
Provides-Extra: community
|
|
56
|
+
Requires-Dist: igraph; extra == "community"
|
|
57
|
+
Requires-Dist: leidenalg; extra == "community"
|
|
58
|
+
Provides-Extra: full
|
|
59
|
+
Requires-Dist: sentence-transformers; extra == "full"
|
|
60
|
+
Requires-Dist: numpy; extra == "full"
|
|
61
|
+
Requires-Dist: igraph; extra == "full"
|
|
62
|
+
Requires-Dist: leidenalg; extra == "full"
|
|
63
|
+
Dynamic: license-file
|
|
64
|
+
|
|
65
|
+
# CodeSpine
|
|
66
|
+
|
|
67
|
+
CodeSpine cuts token burn for coding agents working on Java codebases.
|
|
68
|
+
|
|
69
|
+
Instead of having an agent open dozens of `.java` files to answer one question, CodeSpine indexes the codebase once and serves the structure over MCP. The agent asks for symbols, callers, impact, flows, dead code, and module boundaries directly, which means fewer file reads, fewer wasted context windows, and fewer hallucinated code paths.
|
|
70
|
+
|
|
71
|
+
It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
72
|
+
|
|
73
|
+
## Why It Saves Tokens
|
|
74
|
+
|
|
75
|
+
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
76
|
+
- Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
|
|
77
|
+
- Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
|
|
78
|
+
- Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
|
|
79
|
+
|
|
80
|
+
## Install
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pip install codespine
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Optional semantic search:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install "codespine[ml]"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## What It Does
|
|
93
|
+
|
|
94
|
+
- Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`
|
|
95
|
+
- Impact analysis: callers, dependencies, and confidence-scored edges
|
|
96
|
+
- Dead code detection: Java-aware exemptions for tests, framework hooks, contracts, and common DI patterns
|
|
97
|
+
- Execution flows: traces from entry points through the call graph
|
|
98
|
+
- Community detection: structural clusters for architectural context
|
|
99
|
+
- Change coupling: git-history-based file relationships
|
|
100
|
+
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
101
|
+
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
102
|
+
|
|
103
|
+
## Quick Start
|
|
104
|
+
|
|
105
|
+
Index a repo:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
codespine analyse /path/to/project
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Run a deeper pass:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
codespine analyse /path/to/project --deep
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Add embeddings for semantic search:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
codespine analyse /path/to/project --embed
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Typical output:
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
$ codespine analyse .
|
|
127
|
+
Walking files... 142 files found
|
|
128
|
+
Index mode... incremental (8 files to index, 0 deleted)
|
|
129
|
+
Parsing code... 8/8
|
|
130
|
+
Tracing calls... 847 calls resolved
|
|
131
|
+
Analyzing types... 234 type relationships
|
|
132
|
+
Cross-module linking... skipped (single module)
|
|
133
|
+
Detecting communities... 8 clusters found
|
|
134
|
+
Detecting execution flows... 34 processes found
|
|
135
|
+
Finding dead code... 12 unreachable symbols
|
|
136
|
+
Analyzing git history... 18 coupled file pairs
|
|
137
|
+
Generating embeddings... 0 vectors stored
|
|
138
|
+
|
|
139
|
+
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Search the index:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
codespine search "retry payment"
|
|
146
|
+
codespine context "PaymentService"
|
|
147
|
+
codespine impact "com.example.PaymentService#charge(java.lang.String)"
|
|
148
|
+
codespine stats
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## MCP
|
|
152
|
+
|
|
153
|
+
Foreground MCP server:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
codespine mcp
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Minimal MCP config:
|
|
160
|
+
|
|
161
|
+
```json
|
|
162
|
+
{
|
|
163
|
+
"mcpServers": {
|
|
164
|
+
"codespine": {
|
|
165
|
+
"command": "codespine",
|
|
166
|
+
"args": ["mcp"]
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
If the client launches the wrong Python environment, use the absolute binary path instead:
|
|
173
|
+
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"mcpServers": {
|
|
177
|
+
"codespine": {
|
|
178
|
+
"command": "/absolute/path/to/codespine",
|
|
179
|
+
"args": ["mcp"]
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Common MCP tools:
|
|
186
|
+
|
|
187
|
+
- `search_hybrid(query, k, project)`
|
|
188
|
+
- `find_symbol(name, kind, project, limit)`
|
|
189
|
+
- `get_symbol_context(query, max_depth, project)`
|
|
190
|
+
- `get_impact(symbol, max_depth, project)`
|
|
191
|
+
- `detect_dead_code(limit, project, strict)`
|
|
192
|
+
- `trace_execution_flows(entry_symbol, max_depth, project)`
|
|
193
|
+
- `get_symbol_community(symbol)`
|
|
194
|
+
- `get_change_coupling(months, min_strength, min_cochanges, project)`
|
|
195
|
+
- `compare_branches(base_ref, head_ref)`
|
|
196
|
+
- `get_codebase_stats()`
|
|
197
|
+
|
|
198
|
+
## CLI
|
|
199
|
+
|
|
200
|
+
Core commands:
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
codespine analyse <path>
|
|
204
|
+
codespine analyse <path> --full
|
|
205
|
+
codespine analyse <path> --deep
|
|
206
|
+
codespine analyse <path> --embed
|
|
207
|
+
codespine watch --path .
|
|
208
|
+
codespine search "query"
|
|
209
|
+
codespine context "symbol"
|
|
210
|
+
codespine impact "symbol"
|
|
211
|
+
codespine deadcode
|
|
212
|
+
codespine flow
|
|
213
|
+
codespine community
|
|
214
|
+
codespine coupling
|
|
215
|
+
codespine diff main..feature
|
|
216
|
+
codespine stats
|
|
217
|
+
codespine list
|
|
218
|
+
codespine clear-project <project_id>
|
|
219
|
+
codespine clear-index
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
`analyse` defaults to incremental mode. Repeat runs are designed to be fast when files have not changed.
|
|
223
|
+
|
|
224
|
+
## Workspace And Module Detection
|
|
225
|
+
|
|
226
|
+
CodeSpine can index:
|
|
227
|
+
|
|
228
|
+
- a single Java repo
|
|
229
|
+
- a multi-module Maven or Gradle repo
|
|
230
|
+
- a workspace directory containing multiple repos
|
|
231
|
+
|
|
232
|
+
Project IDs are:
|
|
233
|
+
|
|
234
|
+
- single-module repo: `payments-service`
|
|
235
|
+
- multi-module repo: `payments-service::core`, `payments-service::api`
|
|
236
|
+
|
|
237
|
+
That same project ID can be passed into MCP tools and CLI analysis calls that support project scoping.
|
|
238
|
+
|
|
239
|
+
## Deep Analysis Trade-Offs
|
|
240
|
+
|
|
241
|
+
`--deep` enables the expensive graph-wide passes:
|
|
242
|
+
|
|
243
|
+
- communities
|
|
244
|
+
- execution flows
|
|
245
|
+
- dead code
|
|
246
|
+
- git coupling
|
|
247
|
+
|
|
248
|
+
Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
|
|
249
|
+
|
|
250
|
+
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
251
|
+
|
|
252
|
+
## Runtime Files
|
|
253
|
+
|
|
254
|
+
- `~/.codespine_db` - graph database
|
|
255
|
+
- `~/.codespine.pid` - MCP background server PID
|
|
256
|
+
- `~/.codespine.log` - server log
|
|
257
|
+
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
258
|
+
- `~/.codespine_index_meta/` - incremental file metadata cache
|
|
259
|
+
|
|
260
|
+
## Notes
|
|
261
|
+
|
|
262
|
+
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
263
|
+
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
264
|
+
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
265
|
+
|
|
266
|
+
## Project Docs
|
|
267
|
+
|
|
268
|
+
- [Contributing](.github/CONTRIBUTING.md)
|
|
269
|
+
- [Security](.github/SECURITY.md)
|
|
270
|
+
- [Code of Conduct](.github/CODE_OF_CONDUCT.md)
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# CodeSpine
|
|
2
|
+
|
|
3
|
+
CodeSpine cuts token burn for coding agents working on Java codebases.
|
|
4
|
+
|
|
5
|
+
Instead of having an agent open dozens of `.java` files to answer one question, CodeSpine indexes the codebase once and serves the structure over MCP. The agent asks for symbols, callers, impact, flows, dead code, and module boundaries directly, which means fewer file reads, fewer wasted context windows, and fewer hallucinated code paths.
|
|
6
|
+
|
|
7
|
+
It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
8
|
+
|
|
9
|
+
## Why It Saves Tokens
|
|
10
|
+
|
|
11
|
+
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
12
|
+
- Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
|
|
13
|
+
- Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
|
|
14
|
+
- Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install codespine
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Optional semantic search:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install "codespine[ml]"
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## What It Does
|
|
29
|
+
|
|
30
|
+
- Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`
|
|
31
|
+
- Impact analysis: callers, dependencies, and confidence-scored edges
|
|
32
|
+
- Dead code detection: Java-aware exemptions for tests, framework hooks, contracts, and common DI patterns
|
|
33
|
+
- Execution flows: traces from entry points through the call graph
|
|
34
|
+
- Community detection: structural clusters for architectural context
|
|
35
|
+
- Change coupling: git-history-based file relationships
|
|
36
|
+
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
37
|
+
- MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
Index a repo:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
codespine analyse /path/to/project
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Run a deeper pass:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
codespine analyse /path/to/project --deep
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Add embeddings for semantic search:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
codespine analyse /path/to/project --embed
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Typical output:
|
|
60
|
+
|
|
61
|
+
```text
|
|
62
|
+
$ codespine analyse .
|
|
63
|
+
Walking files... 142 files found
|
|
64
|
+
Index mode... incremental (8 files to index, 0 deleted)
|
|
65
|
+
Parsing code... 8/8
|
|
66
|
+
Tracing calls... 847 calls resolved
|
|
67
|
+
Analyzing types... 234 type relationships
|
|
68
|
+
Cross-module linking... skipped (single module)
|
|
69
|
+
Detecting communities... 8 clusters found
|
|
70
|
+
Detecting execution flows... 34 processes found
|
|
71
|
+
Finding dead code... 12 unreachable symbols
|
|
72
|
+
Analyzing git history... 18 coupled file pairs
|
|
73
|
+
Generating embeddings... 0 vectors stored
|
|
74
|
+
|
|
75
|
+
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows (no embeddings; rerun with --embed for semantic search)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Search the index:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
codespine search "retry payment"
|
|
82
|
+
codespine context "PaymentService"
|
|
83
|
+
codespine impact "com.example.PaymentService#charge(java.lang.String)"
|
|
84
|
+
codespine stats
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## MCP
|
|
88
|
+
|
|
89
|
+
Foreground MCP server:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
codespine mcp
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Minimal MCP config:
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{
|
|
99
|
+
"mcpServers": {
|
|
100
|
+
"codespine": {
|
|
101
|
+
"command": "codespine",
|
|
102
|
+
"args": ["mcp"]
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
If the client launches the wrong Python environment, use the absolute binary path instead:
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"mcpServers": {
|
|
113
|
+
"codespine": {
|
|
114
|
+
"command": "/absolute/path/to/codespine",
|
|
115
|
+
"args": ["mcp"]
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Common MCP tools:
|
|
122
|
+
|
|
123
|
+
- `search_hybrid(query, k, project)`
|
|
124
|
+
- `find_symbol(name, kind, project, limit)`
|
|
125
|
+
- `get_symbol_context(query, max_depth, project)`
|
|
126
|
+
- `get_impact(symbol, max_depth, project)`
|
|
127
|
+
- `detect_dead_code(limit, project, strict)`
|
|
128
|
+
- `trace_execution_flows(entry_symbol, max_depth, project)`
|
|
129
|
+
- `get_symbol_community(symbol)`
|
|
130
|
+
- `get_change_coupling(months, min_strength, min_cochanges, project)`
|
|
131
|
+
- `compare_branches(base_ref, head_ref)`
|
|
132
|
+
- `get_codebase_stats()`
|
|
133
|
+
|
|
134
|
+
## CLI
|
|
135
|
+
|
|
136
|
+
Core commands:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
codespine analyse <path>
|
|
140
|
+
codespine analyse <path> --full
|
|
141
|
+
codespine analyse <path> --deep
|
|
142
|
+
codespine analyse <path> --embed
|
|
143
|
+
codespine watch --path .
|
|
144
|
+
codespine search "query"
|
|
145
|
+
codespine context "symbol"
|
|
146
|
+
codespine impact "symbol"
|
|
147
|
+
codespine deadcode
|
|
148
|
+
codespine flow
|
|
149
|
+
codespine community
|
|
150
|
+
codespine coupling
|
|
151
|
+
codespine diff main..feature
|
|
152
|
+
codespine stats
|
|
153
|
+
codespine list
|
|
154
|
+
codespine clear-project <project_id>
|
|
155
|
+
codespine clear-index
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
`analyse` defaults to incremental mode. Repeat runs are designed to be fast when files have not changed.
|
|
159
|
+
|
|
160
|
+
## Workspace And Module Detection
|
|
161
|
+
|
|
162
|
+
CodeSpine can index:
|
|
163
|
+
|
|
164
|
+
- a single Java repo
|
|
165
|
+
- a multi-module Maven or Gradle repo
|
|
166
|
+
- a workspace directory containing multiple repos
|
|
167
|
+
|
|
168
|
+
Project IDs are:
|
|
169
|
+
|
|
170
|
+
- single-module repo: `payments-service`
|
|
171
|
+
- multi-module repo: `payments-service::core`, `payments-service::api`
|
|
172
|
+
|
|
173
|
+
That same project ID can be passed into MCP tools and CLI analysis calls that support project scoping.
|
|
174
|
+
|
|
175
|
+
## Deep Analysis Trade-Offs
|
|
176
|
+
|
|
177
|
+
`--deep` enables the expensive graph-wide passes:
|
|
178
|
+
|
|
179
|
+
- communities
|
|
180
|
+
- execution flows
|
|
181
|
+
- dead code
|
|
182
|
+
- git coupling
|
|
183
|
+
|
|
184
|
+
Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
|
|
185
|
+
|
|
186
|
+
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
187
|
+
|
|
188
|
+
## Runtime Files
|
|
189
|
+
|
|
190
|
+
- `~/.codespine_db` - graph database
|
|
191
|
+
- `~/.codespine.pid` - MCP background server PID
|
|
192
|
+
- `~/.codespine.log` - server log
|
|
193
|
+
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
194
|
+
- `~/.codespine_index_meta/` - incremental file metadata cache
|
|
195
|
+
|
|
196
|
+
## Notes
|
|
197
|
+
|
|
198
|
+
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
199
|
+
- `codespine clear-index` rebuilds the local index database from scratch.
|
|
200
|
+
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
201
|
+
|
|
202
|
+
## Project Docs
|
|
203
|
+
|
|
204
|
+
- [Contributing](.github/CONTRIBUTING.md)
|
|
205
|
+
- [Security](.github/SECURITY.md)
|
|
206
|
+
- [Code of Conduct](.github/CODE_OF_CONDUCT.md)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter, defaultdict
|
|
4
|
+
|
|
5
|
+
MAX_LEIDEN_SYMBOLS = 12000
|
|
6
|
+
MIN_COMMUNITY_SIZE = 2
|
|
7
|
+
PACKAGE_BUCKET_DEPTH = 5
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _package_bucket(fqname: str) -> str:
|
|
11
|
+
base = (fqname or "").split("#", 1)[0]
|
|
12
|
+
parts = [p for p in base.split(".") if p]
|
|
13
|
+
if len(parts) <= 2:
|
|
14
|
+
return base or "default"
|
|
15
|
+
package_parts = parts[:-1] if len(parts) > 1 else parts
|
|
16
|
+
depth = min(PACKAGE_BUCKET_DEPTH, len(package_parts))
|
|
17
|
+
return ".".join(package_parts[:depth]) or base or "default"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _community_label(symbol_ids: list[str], symbol_meta: dict[str, dict]) -> str:
|
|
21
|
+
bucket_counts = Counter(_package_bucket(symbol_meta[sid].get("fqname", "")) for sid in symbol_ids if sid in symbol_meta)
|
|
22
|
+
if bucket_counts:
|
|
23
|
+
return bucket_counts.most_common(1)[0][0]
|
|
24
|
+
return "community"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _call_graph_communities(symbol_meta: dict[str, dict], method_edges: list[tuple[str, str]], progress=None) -> dict[str, int]:
|
|
28
|
+
def _ping(msg: str) -> None:
|
|
29
|
+
if progress:
|
|
30
|
+
progress(msg)
|
|
31
|
+
|
|
32
|
+
graph_nodes = sorted({sid for edge in method_edges for sid in edge})
|
|
33
|
+
if not graph_nodes:
|
|
34
|
+
return {}
|
|
35
|
+
|
|
36
|
+
if len(graph_nodes) > MAX_LEIDEN_SYMBOLS:
|
|
37
|
+
_ping(f"graph too large for leiden ({len(graph_nodes)} symbols), using package fallback")
|
|
38
|
+
return {}
|
|
39
|
+
|
|
40
|
+
index_of = {sid: i for i, sid in enumerate(graph_nodes)}
|
|
41
|
+
membership: dict[str, int] = {}
|
|
42
|
+
try:
|
|
43
|
+
import igraph as ig
|
|
44
|
+
import leidenalg
|
|
45
|
+
|
|
46
|
+
_ping(f"{len(graph_nodes)} connected symbols, running leiden")
|
|
47
|
+
g = ig.Graph(directed=False)
|
|
48
|
+
g.add_vertices(len(graph_nodes))
|
|
49
|
+
g.add_edges([(index_of[src], index_of[dst]) for src, dst in method_edges if src in index_of and dst in index_of])
|
|
50
|
+
part = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
|
|
51
|
+
for idx, cid in enumerate(part.membership):
|
|
52
|
+
membership[graph_nodes[idx]] = int(cid)
|
|
53
|
+
except Exception:
|
|
54
|
+
_ping("leiden unavailable, using package fallback")
|
|
55
|
+
return {}
|
|
56
|
+
return membership
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def detect_communities(store, progress=None) -> list[dict]:
|
|
60
|
+
def _ping(msg: str) -> None:
|
|
61
|
+
if progress:
|
|
62
|
+
progress(msg)
|
|
63
|
+
|
|
64
|
+
_ping("loading symbols")
|
|
65
|
+
symbols = store.query_records(
|
|
66
|
+
"""
|
|
67
|
+
MATCH (s:Symbol)
|
|
68
|
+
RETURN s.id as id, s.kind as kind, s.fqname as fqname, s.file_id as file_id
|
|
69
|
+
"""
|
|
70
|
+
)
|
|
71
|
+
if not symbols:
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
symbol_meta = {s["id"]: s for s in symbols}
|
|
75
|
+
method_symbols_by_key: dict[tuple[str, str], str] = {}
|
|
76
|
+
class_symbols_by_key: dict[tuple[str, str], str] = {}
|
|
77
|
+
for symbol in symbols:
|
|
78
|
+
key = (symbol.get("file_id", ""), symbol.get("fqname", ""))
|
|
79
|
+
if symbol.get("kind") == "method":
|
|
80
|
+
method_symbols_by_key[key] = symbol["id"]
|
|
81
|
+
elif symbol.get("kind") == "class":
|
|
82
|
+
class_symbols_by_key[key] = symbol["id"]
|
|
83
|
+
|
|
84
|
+
_ping("loading methods")
|
|
85
|
+
method_rows = store.query_records(
|
|
86
|
+
"""
|
|
87
|
+
MATCH (m:Method), (c:Class)
|
|
88
|
+
WHERE m.class_id = c.id
|
|
89
|
+
RETURN m.id as method_id, c.file_id as file_id, c.fqcn as class_fqcn, m.signature as signature
|
|
90
|
+
"""
|
|
91
|
+
)
|
|
92
|
+
method_symbol_ids: dict[str, str] = {}
|
|
93
|
+
graph_edges: set[tuple[str, str]] = set()
|
|
94
|
+
for row in method_rows:
|
|
95
|
+
file_id = row.get("file_id", "")
|
|
96
|
+
fqname = f"{row.get('class_fqcn', '')}#{row.get('signature', '')}"
|
|
97
|
+
method_symbol_id = method_symbols_by_key.get((file_id, fqname))
|
|
98
|
+
if not method_symbol_id:
|
|
99
|
+
continue
|
|
100
|
+
method_symbol_ids[row["method_id"]] = method_symbol_id
|
|
101
|
+
class_symbol_id = class_symbols_by_key.get((file_id, row.get("class_fqcn", "")))
|
|
102
|
+
if class_symbol_id and class_symbol_id != method_symbol_id:
|
|
103
|
+
graph_edges.add(tuple(sorted((method_symbol_id, class_symbol_id))))
|
|
104
|
+
|
|
105
|
+
_ping("loading call edges")
|
|
106
|
+
call_rows = store.query_records(
|
|
107
|
+
"""
|
|
108
|
+
MATCH (a:Method)-[:CALLS]->(b:Method)
|
|
109
|
+
RETURN a.id as src, b.id as dst
|
|
110
|
+
"""
|
|
111
|
+
)
|
|
112
|
+
for row in call_rows:
|
|
113
|
+
src = method_symbol_ids.get(row.get("src", ""))
|
|
114
|
+
dst = method_symbol_ids.get(row.get("dst", ""))
|
|
115
|
+
if src and dst and src != dst:
|
|
116
|
+
graph_edges.add(tuple(sorted((src, dst))))
|
|
117
|
+
|
|
118
|
+
_ping(f"{len(symbols)} symbols, {len(graph_edges)} structural edges")
|
|
119
|
+
membership = _call_graph_communities(symbol_meta, sorted(graph_edges), progress=progress)
|
|
120
|
+
|
|
121
|
+
grouped: dict[str, list[str]] = defaultdict(list)
|
|
122
|
+
next_fallback_id = 1000000
|
|
123
|
+
|
|
124
|
+
# Keep only meaningful graph communities; tiny ones get merged by package bucket below.
|
|
125
|
+
temp_grouped: dict[int, list[str]] = defaultdict(list)
|
|
126
|
+
for sid, cid in membership.items():
|
|
127
|
+
temp_grouped[cid].append(sid)
|
|
128
|
+
|
|
129
|
+
for cid, members in temp_grouped.items():
|
|
130
|
+
if len(members) >= MIN_COMMUNITY_SIZE:
|
|
131
|
+
grouped[f"graph:{cid}"].extend(members)
|
|
132
|
+
else:
|
|
133
|
+
for sid in members:
|
|
134
|
+
grouped[f"pkg:{_package_bucket(symbol_meta[sid].get('fqname', ''))}"].append(sid)
|
|
135
|
+
|
|
136
|
+
for sid, meta in symbol_meta.items():
|
|
137
|
+
if sid in membership:
|
|
138
|
+
continue
|
|
139
|
+
grouped[f"pkg:{_package_bucket(meta.get('fqname', ''))}"].append(sid)
|
|
140
|
+
|
|
141
|
+
# Filter out residual singletons. They are not useful architectural communities.
|
|
142
|
+
filtered = {cid: members for cid, members in grouped.items() if len(members) >= MIN_COMMUNITY_SIZE}
|
|
143
|
+
if not filtered:
|
|
144
|
+
# Last resort: put everything into one broad bucket so callers still get context.
|
|
145
|
+
cid = f"fallback:{next_fallback_id}"
|
|
146
|
+
filtered[cid] = list(symbol_meta.keys())
|
|
147
|
+
|
|
148
|
+
_ping(f"{len(filtered)} clusters, replacing previous communities")
|
|
149
|
+
store.clear_communities()
|
|
150
|
+
|
|
151
|
+
communities: list[dict] = []
|
|
152
|
+
total_clusters = len(filtered)
|
|
153
|
+
for idx, (cid, symbol_ids) in enumerate(sorted(filtered.items()), start=1):
|
|
154
|
+
label = _community_label(symbol_ids, symbol_meta)
|
|
155
|
+
cohesion = min(1.0, len(symbol_ids) / max(len(symbol_meta), 1))
|
|
156
|
+
store.set_community(cid, label, cohesion, symbol_ids)
|
|
157
|
+
if idx % 100 == 0 or idx == total_clusters:
|
|
158
|
+
_ping(f"persisting {idx}/{total_clusters} clusters")
|
|
159
|
+
communities.append(
|
|
160
|
+
{
|
|
161
|
+
"community_id": cid,
|
|
162
|
+
"label": label,
|
|
163
|
+
"cohesion": cohesion,
|
|
164
|
+
"size": len(symbol_ids),
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
communities.sort(key=lambda c: (c["size"], c["label"]), reverse=True)
|
|
169
|
+
return communities
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def symbol_community(store, symbol_query: str) -> dict:
|
|
173
|
+
recs = store.query_records(
|
|
174
|
+
"""
|
|
175
|
+
MATCH (s:Symbol)-[:IN_COMMUNITY]->(c:Community)
|
|
176
|
+
WHERE s.id = $q OR lower(s.fqname) = lower($q) OR lower(s.name) = lower($q)
|
|
177
|
+
RETURN s.id as symbol_id, s.fqname as fqname, c.id as community_id, c.label as label, c.cohesion as cohesion
|
|
178
|
+
LIMIT 20
|
|
179
|
+
""",
|
|
180
|
+
{"q": symbol_query},
|
|
181
|
+
)
|
|
182
|
+
return {"query": symbol_query, "matches": recs}
|
|
@@ -46,11 +46,18 @@ def compute_coupling(
|
|
|
46
46
|
months: int = SETTINGS.default_coupling_months,
|
|
47
47
|
min_strength: float = SETTINGS.default_min_coupling_strength,
|
|
48
48
|
min_cochanges: int = SETTINGS.default_min_cochanges,
|
|
49
|
+
progress=None,
|
|
49
50
|
) -> list[dict]:
|
|
51
|
+
def _ping(msg: str) -> None:
|
|
52
|
+
if progress:
|
|
53
|
+
progress(msg)
|
|
54
|
+
|
|
55
|
+
_ping("reading git history")
|
|
50
56
|
changesets = _git_changed_file_sets(repo_path, months)
|
|
51
57
|
if not changesets:
|
|
52
58
|
return []
|
|
53
59
|
|
|
60
|
+
_ping(f"{len(changesets)} commits, computing co-changes")
|
|
54
61
|
file_changes = Counter()
|
|
55
62
|
co_changes: Counter[tuple[str, str]] = Counter()
|
|
56
63
|
|
|
@@ -60,6 +67,7 @@ def compute_coupling(
|
|
|
60
67
|
for a, b in itertools.combinations(sorted(cs), 2):
|
|
61
68
|
co_changes[(a, b)] += 1
|
|
62
69
|
|
|
70
|
+
_ping(f"{len(co_changes)} pairs, filtering and persisting")
|
|
63
71
|
results = []
|
|
64
72
|
for (a, b), pair_count in co_changes.items():
|
|
65
73
|
denom = max(file_changes[a], file_changes[b])
|