codespine 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codespine-0.1.1/LICENSE +21 -0
- codespine-0.1.1/PKG-INFO +336 -0
- codespine-0.1.1/README.md +272 -0
- codespine-0.1.1/codespine/__init__.py +4 -0
- codespine-0.1.1/codespine/analysis/__init__.py +1 -0
- codespine-0.1.1/codespine/analysis/community.py +75 -0
- codespine-0.1.1/codespine/analysis/context.py +24 -0
- codespine-0.1.1/codespine/analysis/coupling.py +119 -0
- codespine-0.1.1/codespine/analysis/deadcode.py +107 -0
- codespine-0.1.1/codespine/analysis/flow.py +77 -0
- codespine-0.1.1/codespine/analysis/impact.py +90 -0
- codespine-0.1.1/codespine/cli.py +424 -0
- codespine-0.1.1/codespine/config.py +22 -0
- codespine-0.1.1/codespine/db/__init__.py +1 -0
- codespine-0.1.1/codespine/db/schema.py +82 -0
- codespine-0.1.1/codespine/db/store.py +313 -0
- codespine-0.1.1/codespine/diff/__init__.py +1 -0
- codespine-0.1.1/codespine/diff/branch_diff.py +163 -0
- codespine-0.1.1/codespine/indexer/__init__.py +1 -0
- codespine-0.1.1/codespine/indexer/call_resolver.py +137 -0
- codespine-0.1.1/codespine/indexer/engine.py +305 -0
- codespine-0.1.1/codespine/indexer/java_parser.py +350 -0
- codespine-0.1.1/codespine/indexer/symbol_builder.py +32 -0
- codespine-0.1.1/codespine/mcp/__init__.py +1 -0
- codespine-0.1.1/codespine/mcp/server.py +67 -0
- codespine-0.1.1/codespine/noise/__init__.py +1 -0
- codespine-0.1.1/codespine/noise/blocklist.py +37 -0
- codespine-0.1.1/codespine/search/__init__.py +1 -0
- codespine-0.1.1/codespine/search/bm25.py +52 -0
- codespine-0.1.1/codespine/search/fuzzy.py +36 -0
- codespine-0.1.1/codespine/search/hybrid.py +80 -0
- codespine-0.1.1/codespine/search/rrf.py +9 -0
- codespine-0.1.1/codespine/search/vector.py +113 -0
- codespine-0.1.1/codespine/watch/__init__.py +1 -0
- codespine-0.1.1/codespine/watch/watcher.py +38 -0
- codespine-0.1.1/codespine.egg-info/PKG-INFO +336 -0
- codespine-0.1.1/codespine.egg-info/SOURCES.txt +48 -0
- codespine-0.1.1/codespine.egg-info/dependency_links.txt +1 -0
- codespine-0.1.1/codespine.egg-info/entry_points.txt +3 -0
- codespine-0.1.1/codespine.egg-info/requires.txt +21 -0
- codespine-0.1.1/codespine.egg-info/top_level.txt +2 -0
- codespine-0.1.1/gindex.py +10 -0
- codespine-0.1.1/pyproject.toml +67 -0
- codespine-0.1.1/setup.cfg +4 -0
- codespine-0.1.1/setup.py +3 -0
- codespine-0.1.1/tests/test_branch_diff_normalize.py +14 -0
- codespine-0.1.1/tests/test_call_resolver.py +30 -0
- codespine-0.1.1/tests/test_index_and_hybrid.py +21 -0
- codespine-0.1.1/tests/test_java_parser.py +28 -0
- codespine-0.1.1/tests/test_search_ranking.py +17 -0
codespine-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 CodeSpine contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
codespine-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codespine
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
|
+
Author: CodeSpine contributors
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 CodeSpine contributors
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/vinayak3022/codeSpine
|
|
29
|
+
Project-URL: Repository, https://github.com/vinayak3022/codeSpine
|
|
30
|
+
Project-URL: Issues, https://github.com/vinayak3022/codeSpine/issues
|
|
31
|
+
Keywords: java,code-indexing,graph,kuzu,mcp
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
40
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
41
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: click
|
|
46
|
+
Requires-Dist: kuzu
|
|
47
|
+
Requires-Dist: tree-sitter
|
|
48
|
+
Requires-Dist: tree-sitter-java
|
|
49
|
+
Requires-Dist: fastmcp
|
|
50
|
+
Requires-Dist: psutil
|
|
51
|
+
Requires-Dist: watchfiles
|
|
52
|
+
Provides-Extra: ml
|
|
53
|
+
Requires-Dist: sentence-transformers; extra == "ml"
|
|
54
|
+
Requires-Dist: numpy; extra == "ml"
|
|
55
|
+
Provides-Extra: community
|
|
56
|
+
Requires-Dist: igraph; extra == "community"
|
|
57
|
+
Requires-Dist: leidenalg; extra == "community"
|
|
58
|
+
Provides-Extra: full
|
|
59
|
+
Requires-Dist: sentence-transformers; extra == "full"
|
|
60
|
+
Requires-Dist: numpy; extra == "full"
|
|
61
|
+
Requires-Dist: igraph; extra == "full"
|
|
62
|
+
Requires-Dist: leidenalg; extra == "full"
|
|
63
|
+
Dynamic: license-file
|
|
64
|
+
|
|
65
|
+
# CodeSpine
|
|
66
|
+
|
|
67
|
+
CodeSpine is a Java-native code intelligence graph for coding agents.
|
|
68
|
+
|
|
69
|
+
It indexes your Java codebase into a graph, then serves high-signal retrieval and
|
|
70
|
+
analysis APIs over CLI + MCP for refactoring, impact analysis, architecture
|
|
71
|
+
navigation, and safe change planning.
|
|
72
|
+
|
|
73
|
+
## Why CodeSpine
|
|
74
|
+
|
|
75
|
+
Most tools answer "where is this symbol?".
|
|
76
|
+
CodeSpine answers:
|
|
77
|
+
|
|
78
|
+
- What depends on this?
|
|
79
|
+
- What else changed with this historically?
|
|
80
|
+
- Is this dead or framework-exempt?
|
|
81
|
+
- Which architectural cluster/flow is this in?
|
|
82
|
+
- What changed between branches at symbol granularity?
|
|
83
|
+
|
|
84
|
+
## Core Capabilities
|
|
85
|
+
|
|
86
|
+
### 1) Hybrid Search (BM25 + Vector + Fuzzy + RRF)
|
|
87
|
+
- Lexical ranking (BM25-based)
|
|
88
|
+
- Semantic matching (local embeddings)
|
|
89
|
+
- Typo-tolerant fuzzy matching
|
|
90
|
+
- Reciprocal Rank Fusion with ranking multipliers
|
|
91
|
+
|
|
92
|
+
### 2) Impact Analysis
|
|
93
|
+
- Traverses call graph + type/inheritance edges + coupling edges
|
|
94
|
+
- Groups results by depth (`1`, `2`, `3+`)
|
|
95
|
+
- Carries confidence (`1.0`, `0.8`, `0.5`) per edge
|
|
96
|
+
|
|
97
|
+
### 3) Java-Aware Dead Code Detection
|
|
98
|
+
- Not just zero-callers: includes exemption passes for:
|
|
99
|
+
- constructors, tests, `main(String[] args)`
|
|
100
|
+
- override/interface contracts
|
|
101
|
+
- common lifecycle/framework annotations
|
|
102
|
+
- reflection/bean-friendly method patterns
|
|
103
|
+
|
|
104
|
+
### 4) Execution Flow Tracing
|
|
105
|
+
- Detects framework-agnostic entry points (`main`, tests, public roots)
|
|
106
|
+
- BFS flow traces with depth
|
|
107
|
+
- Flow classification (`intra_community`, `cross_community`)
|
|
108
|
+
|
|
109
|
+
### 5) Community Detection
|
|
110
|
+
- Leiden-based clustering when dependencies are present
|
|
111
|
+
- Heuristic fallback when Leiden stack is unavailable
|
|
112
|
+
- Queryable symbol-to-community mapping
|
|
113
|
+
|
|
114
|
+
### 6) Git Change Coupling
|
|
115
|
+
- Mines recent git history (default 6 months)
|
|
116
|
+
- Links co-changing files with coupling strength
|
|
117
|
+
- Surfaces hidden dependencies in impact workflows
|
|
118
|
+
|
|
119
|
+
### 7) Watch Mode
|
|
120
|
+
- Live file watching for changed Java files
|
|
121
|
+
- Incremental reindexing
|
|
122
|
+
- Periodic global refresh phases (community/flow/deadcode/coupling)
|
|
123
|
+
|
|
124
|
+
### 8) Branch Diff (Symbol-Level)
|
|
125
|
+
- Uses git worktrees
|
|
126
|
+
- Diffs class/method symbols (`added`, `removed`, `modified`)
|
|
127
|
+
- Uses normalized structural hashes to reduce formatting-only noise
|
|
128
|
+
|
|
129
|
+
## Performance Model
|
|
130
|
+
|
|
131
|
+
CodeSpine includes:
|
|
132
|
+
- Hash-based incremental invalidation (only changed files reindexed)
|
|
133
|
+
- Persistent embedding cache (`sqlite`) for repeat semantic queries
|
|
134
|
+
- Transactional write path during indexing to reduce commit overhead
|
|
135
|
+
|
|
136
|
+
## Install
|
|
137
|
+
|
|
138
|
+
### Local editable install
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
python3 -m venv .venv
|
|
142
|
+
source .venv/bin/activate
|
|
143
|
+
pip install -e .
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
If your environment is externally managed (PEP 668), use a virtualenv as above.
|
|
147
|
+
|
|
148
|
+
You can also use `pip3`:
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
pip3 install -e .
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Install from GitHub
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
pip install "git+https://github.com/vinayak3022/codeSpine.git"
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
or
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
pip3 install "git+https://github.com/vinayak3022/codeSpine.git"
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Install from PyPI (after first release is published)
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
pip install codespine
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
or
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
pip3 install codespine
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Optional extras
|
|
179
|
+
|
|
180
|
+
- `pip install -e .[ml]` for local embedding model dependencies
|
|
181
|
+
- `pip install -e .[community]` for Leiden community detection stack
|
|
182
|
+
- `pip install -e .[full]` for all optional features
|
|
183
|
+
|
|
184
|
+
## Quick Start
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# 1) index a repo
|
|
188
|
+
codespine analyse /path/to/java-project --full
|
|
189
|
+
|
|
190
|
+
# 2) search by concept/typo/name
|
|
191
|
+
codespine search "payment validation typo procss" --k 20 --json
|
|
192
|
+
|
|
193
|
+
# 3) get actionable context in one call
|
|
194
|
+
codespine context "processPayment" --max-depth 3 --json
|
|
195
|
+
|
|
196
|
+
# 4) estimate blast radius before refactor
|
|
197
|
+
codespine impact com.example.Service#processPayment(java.lang.String) --max-depth 4 --json
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Example output:
|
|
201
|
+
|
|
202
|
+
```text
|
|
203
|
+
$ codespine analyse .
|
|
204
|
+
Walking files... 142 files found
|
|
205
|
+
Parsing code... 142/142
|
|
206
|
+
Tracing calls... 847 calls resolved
|
|
207
|
+
Analyzing types... 234 type relationships
|
|
208
|
+
Detecting communities... 8 clusters found
|
|
209
|
+
Detecting execution flows... 34 processes found
|
|
210
|
+
Finding dead code... 12 unreachable symbols
|
|
211
|
+
Analyzing git history... 18 coupled file pairs
|
|
212
|
+
Generating embeddings... 623 vectors stored
|
|
213
|
+
|
|
214
|
+
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## CLI Commands
|
|
218
|
+
|
|
219
|
+
### Indexing and Retrieval
|
|
220
|
+
- `codespine analyse <path> [--full|--incremental]`
|
|
221
|
+
- `codespine search <query> [--k 20] [--json]`
|
|
222
|
+
- `codespine context <query> [--max-depth 3] [--json]`
|
|
223
|
+
|
|
224
|
+
### Analysis
|
|
225
|
+
- `codespine impact <symbol> [--max-depth 4] [--json]`
|
|
226
|
+
- `codespine deadcode [--limit 200] [--json]`
|
|
227
|
+
- `codespine flow [--entry <symbol>] [--max-depth 6] [--json]`
|
|
228
|
+
- `codespine community [--symbol <symbol>] [--json]`
|
|
229
|
+
- `codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]`
|
|
230
|
+
|
|
231
|
+
### Operations
|
|
232
|
+
- `codespine watch [--path .] [--global-interval 30]`
|
|
233
|
+
- `codespine diff <base>..<head> [--json]`
|
|
234
|
+
- `codespine cypher <query> [--json]`
|
|
235
|
+
- `codespine list [--json]`
|
|
236
|
+
- `codespine stats`
|
|
237
|
+
- `codespine status [--json]`
|
|
238
|
+
- `codespine setup`
|
|
239
|
+
- `codespine clean [--force]`
|
|
240
|
+
|
|
241
|
+
### MCP Service
|
|
242
|
+
- `codespine start`
|
|
243
|
+
- `codespine stop`
|
|
244
|
+
- `codespine serve` (alias of `start`)
|
|
245
|
+
- `codespine mcp` (foreground stdio MCP)
|
|
246
|
+
|
|
247
|
+
## MCP JSON (Paste Into `mcp.json`)
|
|
248
|
+
|
|
249
|
+
Use this if your MCP client supports stdio servers:
|
|
250
|
+
|
|
251
|
+
```json
|
|
252
|
+
{
|
|
253
|
+
"mcpServers": {
|
|
254
|
+
"codespine": {
|
|
255
|
+
"command": "codespine",
|
|
256
|
+
"args": ["mcp"]
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
If `codespine` is not on your PATH, use an absolute path for `command`, for example:
|
|
263
|
+
- macOS/Linux: `"/Users/<you>/path/to/venv/bin/codespine"`
|
|
264
|
+
- Windows: `"C:\\\\Users\\\\<you>\\\\path\\\\to\\\\venv\\\\Scripts\\\\codespine.exe"`
|
|
265
|
+
|
|
266
|
+
Optional working directory (recommended for repo-scoped usage):
|
|
267
|
+
|
|
268
|
+
```json
|
|
269
|
+
{
|
|
270
|
+
"mcpServers": {
|
|
271
|
+
"codespine": {
|
|
272
|
+
"command": "codespine",
|
|
273
|
+
"args": ["mcp"],
|
|
274
|
+
"cwd": "/absolute/path/to/your/repo"
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## MCP Tool Surface
|
|
281
|
+
|
|
282
|
+
- `search_hybrid(query, k=20)`
|
|
283
|
+
- `get_symbol_context(query, max_depth=3)`
|
|
284
|
+
- `get_impact(symbol, max_depth=4)`
|
|
285
|
+
- `detect_dead_code(limit=200)`
|
|
286
|
+
- `trace_execution_flows(entry_symbol=None, max_depth=6)`
|
|
287
|
+
- `get_symbol_community(symbol)`
|
|
288
|
+
- `get_change_coupling(symbol=None, months=6, min_strength=0.3, min_cochanges=3)`
|
|
289
|
+
- `compare_branches(base_ref, head_ref)`
|
|
290
|
+
- `get_codebase_stats()`
|
|
291
|
+
- `run_cypher(query)`
|
|
292
|
+
|
|
293
|
+
## Runtime Artifacts
|
|
294
|
+
|
|
295
|
+
- Graph DB: `~/.codespine_db`
|
|
296
|
+
- MCP PID: `~/.codespine.pid`
|
|
297
|
+
- Log file: `~/.codespine.log`
|
|
298
|
+
- Embedding cache: `~/.codespine_embedding_cache.sqlite3`
|
|
299
|
+
|
|
300
|
+
## Architecture
|
|
301
|
+
|
|
302
|
+
- `codespine/indexer`: Java parsing, symbols, call/type resolution
|
|
303
|
+
- `codespine/db`: Kuzu schema and persistence
|
|
304
|
+
- `codespine/search`: BM25/fuzzy/vector/RRF ranking
|
|
305
|
+
- `codespine/analysis`: impact/deadcode/flow/community/coupling/context
|
|
306
|
+
- `codespine/diff`: branch comparison at symbol level
|
|
307
|
+
- `codespine/watch`: incremental watch pipeline
|
|
308
|
+
- `codespine/mcp`: MCP tool server
|
|
309
|
+
- `codespine/noise`: noise blocklists for cleaner call graphs
|
|
310
|
+
|
|
311
|
+
## Security and Governance
|
|
312
|
+
|
|
313
|
+
- Security policy: [`SECURITY.md`](SECURITY.md)
|
|
314
|
+
- Contributions: [`CONTRIBUTING.md`](CONTRIBUTING.md)
|
|
315
|
+
- Code of conduct: [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md)
|
|
316
|
+
- Branch protection runbook: [`docs/GITHUB_HARDENING.md`](docs/GITHUB_HARDENING.md)
|
|
317
|
+
|
|
318
|
+
## Publish to PyPI
|
|
319
|
+
|
|
320
|
+
This repo includes a release workflow:
|
|
321
|
+
- [`.github/workflows/publish-pypi.yml`](.github/workflows/publish-pypi.yml)
|
|
322
|
+
|
|
323
|
+
Recommended setup (one-time):
|
|
324
|
+
1. Create project on PyPI with the same name (`codespine`) or update `project.name` if unavailable.
|
|
325
|
+
2. In PyPI, configure Trusted Publisher for this GitHub repo/workflow.
|
|
326
|
+
3. In GitHub, keep the `pypi` environment enabled for publishing.
|
|
327
|
+
|
|
328
|
+
Release flow:
|
|
329
|
+
1. Bump version in [`pyproject.toml`](pyproject.toml).
|
|
330
|
+
2. Push commit + tag (for example `v0.1.1`).
|
|
331
|
+
3. Create a GitHub Release for that tag.
|
|
332
|
+
4. Workflow builds and publishes to PyPI.
|
|
333
|
+
|
|
334
|
+
## Compatibility
|
|
335
|
+
|
|
336
|
+
`gindex.py` is retained as a compatibility shim for one release cycle.
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# CodeSpine
|
|
2
|
+
|
|
3
|
+
CodeSpine is a Java-native code intelligence graph for coding agents.
|
|
4
|
+
|
|
5
|
+
It indexes your Java codebase into a graph, then serves high-signal retrieval and
|
|
6
|
+
analysis APIs over CLI + MCP for refactoring, impact analysis, architecture
|
|
7
|
+
navigation, and safe change planning.
|
|
8
|
+
|
|
9
|
+
## Why CodeSpine
|
|
10
|
+
|
|
11
|
+
Most tools answer "where is this symbol?".
|
|
12
|
+
CodeSpine answers:
|
|
13
|
+
|
|
14
|
+
- What depends on this?
|
|
15
|
+
- What else changed with this historically?
|
|
16
|
+
- Is this dead or framework-exempt?
|
|
17
|
+
- Which architectural cluster/flow is this in?
|
|
18
|
+
- What changed between branches at symbol granularity?
|
|
19
|
+
|
|
20
|
+
## Core Capabilities
|
|
21
|
+
|
|
22
|
+
### 1) Hybrid Search (BM25 + Vector + Fuzzy + RRF)
|
|
23
|
+
- Lexical ranking (BM25-based)
|
|
24
|
+
- Semantic matching (local embeddings)
|
|
25
|
+
- Typo-tolerant fuzzy matching
|
|
26
|
+
- Reciprocal Rank Fusion with ranking multipliers
|
|
27
|
+
|
|
28
|
+
### 2) Impact Analysis
|
|
29
|
+
- Traverses call graph + type/inheritance edges + coupling edges
|
|
30
|
+
- Groups results by depth (`1`, `2`, `3+`)
|
|
31
|
+
- Carries confidence (`1.0`, `0.8`, `0.5`) per edge
|
|
32
|
+
|
|
33
|
+
### 3) Java-Aware Dead Code Detection
|
|
34
|
+
- Not just zero-callers: includes exemption passes for:
|
|
35
|
+
- constructors, tests, `main(String[] args)`
|
|
36
|
+
- override/interface contracts
|
|
37
|
+
- common lifecycle/framework annotations
|
|
38
|
+
- reflection/bean-friendly method patterns
|
|
39
|
+
|
|
40
|
+
### 4) Execution Flow Tracing
|
|
41
|
+
- Detects framework-agnostic entry points (`main`, tests, public roots)
|
|
42
|
+
- BFS flow traces with depth
|
|
43
|
+
- Flow classification (`intra_community`, `cross_community`)
|
|
44
|
+
|
|
45
|
+
### 5) Community Detection
|
|
46
|
+
- Leiden-based clustering when dependencies are present
|
|
47
|
+
- Heuristic fallback when Leiden stack is unavailable
|
|
48
|
+
- Queryable symbol-to-community mapping
|
|
49
|
+
|
|
50
|
+
### 6) Git Change Coupling
|
|
51
|
+
- Mines recent git history (default 6 months)
|
|
52
|
+
- Links co-changing files with coupling strength
|
|
53
|
+
- Surfaces hidden dependencies in impact workflows
|
|
54
|
+
|
|
55
|
+
### 7) Watch Mode
|
|
56
|
+
- Live file watching for changed Java files
|
|
57
|
+
- Incremental reindexing
|
|
58
|
+
- Periodic global refresh phases (community/flow/deadcode/coupling)
|
|
59
|
+
|
|
60
|
+
### 8) Branch Diff (Symbol-Level)
|
|
61
|
+
- Uses git worktrees
|
|
62
|
+
- Diffs class/method symbols (`added`, `removed`, `modified`)
|
|
63
|
+
- Uses normalized structural hashes to reduce formatting-only noise
|
|
64
|
+
|
|
65
|
+
## Performance Model
|
|
66
|
+
|
|
67
|
+
CodeSpine includes:
|
|
68
|
+
- Hash-based incremental invalidation (only changed files reindexed)
|
|
69
|
+
- Persistent embedding cache (`sqlite`) for repeat semantic queries
|
|
70
|
+
- Transactional write path during indexing to reduce commit overhead
|
|
71
|
+
|
|
72
|
+
## Install
|
|
73
|
+
|
|
74
|
+
### Local editable install
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
python3 -m venv .venv
|
|
78
|
+
source .venv/bin/activate
|
|
79
|
+
pip install -e .
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
If your environment is externally managed (PEP 668), use a virtualenv as above.
|
|
83
|
+
|
|
84
|
+
You can also use `pip3`:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip3 install -e .
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Install from GitHub
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
pip install "git+https://github.com/vinayak3022/codeSpine.git"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
or
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
pip3 install "git+https://github.com/vinayak3022/codeSpine.git"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Install from PyPI (after first release is published)
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install codespine
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
or
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pip3 install codespine
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Optional extras
|
|
115
|
+
|
|
116
|
+
- `pip install -e .[ml]` for local embedding model dependencies
|
|
117
|
+
- `pip install -e .[community]` for Leiden community detection stack
|
|
118
|
+
- `pip install -e .[full]` for all optional features
|
|
119
|
+
|
|
120
|
+
## Quick Start
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
# 1) index a repo
|
|
124
|
+
codespine analyse /path/to/java-project --full
|
|
125
|
+
|
|
126
|
+
# 2) search by concept/typo/name
|
|
127
|
+
codespine search "payment validation typo procss" --k 20 --json
|
|
128
|
+
|
|
129
|
+
# 3) get actionable context in one call
|
|
130
|
+
codespine context "processPayment" --max-depth 3 --json
|
|
131
|
+
|
|
132
|
+
# 4) estimate blast radius before refactor
|
|
133
|
+
codespine impact com.example.Service#processPayment(java.lang.String) --max-depth 4 --json
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Example output:
|
|
137
|
+
|
|
138
|
+
```text
|
|
139
|
+
$ codespine analyse .
|
|
140
|
+
Walking files... 142 files found
|
|
141
|
+
Parsing code... 142/142
|
|
142
|
+
Tracing calls... 847 calls resolved
|
|
143
|
+
Analyzing types... 234 type relationships
|
|
144
|
+
Detecting communities... 8 clusters found
|
|
145
|
+
Detecting execution flows... 34 processes found
|
|
146
|
+
Finding dead code... 12 unreachable symbols
|
|
147
|
+
Analyzing git history... 18 coupled file pairs
|
|
148
|
+
Generating embeddings... 623 vectors stored
|
|
149
|
+
|
|
150
|
+
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## CLI Commands
|
|
154
|
+
|
|
155
|
+
### Indexing and Retrieval
|
|
156
|
+
- `codespine analyse <path> [--full|--incremental]`
|
|
157
|
+
- `codespine search <query> [--k 20] [--json]`
|
|
158
|
+
- `codespine context <query> [--max-depth 3] [--json]`
|
|
159
|
+
|
|
160
|
+
### Analysis
|
|
161
|
+
- `codespine impact <symbol> [--max-depth 4] [--json]`
|
|
162
|
+
- `codespine deadcode [--limit 200] [--json]`
|
|
163
|
+
- `codespine flow [--entry <symbol>] [--max-depth 6] [--json]`
|
|
164
|
+
- `codespine community [--symbol <symbol>] [--json]`
|
|
165
|
+
- `codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]`
|
|
166
|
+
|
|
167
|
+
### Operations
|
|
168
|
+
- `codespine watch [--path .] [--global-interval 30]`
|
|
169
|
+
- `codespine diff <base>..<head> [--json]`
|
|
170
|
+
- `codespine cypher <query> [--json]`
|
|
171
|
+
- `codespine list [--json]`
|
|
172
|
+
- `codespine stats`
|
|
173
|
+
- `codespine status [--json]`
|
|
174
|
+
- `codespine setup`
|
|
175
|
+
- `codespine clean [--force]`
|
|
176
|
+
|
|
177
|
+
### MCP Service
|
|
178
|
+
- `codespine start`
|
|
179
|
+
- `codespine stop`
|
|
180
|
+
- `codespine serve` (alias of `start`)
|
|
181
|
+
- `codespine mcp` (foreground stdio MCP)
|
|
182
|
+
|
|
183
|
+
## MCP JSON (Paste Into `mcp.json`)
|
|
184
|
+
|
|
185
|
+
Use this if your MCP client supports stdio servers:
|
|
186
|
+
|
|
187
|
+
```json
|
|
188
|
+
{
|
|
189
|
+
"mcpServers": {
|
|
190
|
+
"codespine": {
|
|
191
|
+
"command": "codespine",
|
|
192
|
+
"args": ["mcp"]
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
If `codespine` is not on your PATH, use an absolute path for `command`, for example:
|
|
199
|
+
- macOS/Linux: `"/Users/<you>/path/to/venv/bin/codespine"`
|
|
200
|
+
- Windows: `"C:\\\\Users\\\\<you>\\\\path\\\\to\\\\venv\\\\Scripts\\\\codespine.exe"`
|
|
201
|
+
|
|
202
|
+
Optional working directory (recommended for repo-scoped usage):
|
|
203
|
+
|
|
204
|
+
```json
|
|
205
|
+
{
|
|
206
|
+
"mcpServers": {
|
|
207
|
+
"codespine": {
|
|
208
|
+
"command": "codespine",
|
|
209
|
+
"args": ["mcp"],
|
|
210
|
+
"cwd": "/absolute/path/to/your/repo"
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## MCP Tool Surface
|
|
217
|
+
|
|
218
|
+
- `search_hybrid(query, k=20)`
|
|
219
|
+
- `get_symbol_context(query, max_depth=3)`
|
|
220
|
+
- `get_impact(symbol, max_depth=4)`
|
|
221
|
+
- `detect_dead_code(limit=200)`
|
|
222
|
+
- `trace_execution_flows(entry_symbol=None, max_depth=6)`
|
|
223
|
+
- `get_symbol_community(symbol)`
|
|
224
|
+
- `get_change_coupling(symbol=None, months=6, min_strength=0.3, min_cochanges=3)`
|
|
225
|
+
- `compare_branches(base_ref, head_ref)`
|
|
226
|
+
- `get_codebase_stats()`
|
|
227
|
+
- `run_cypher(query)`
|
|
228
|
+
|
|
229
|
+
## Runtime Artifacts
|
|
230
|
+
|
|
231
|
+
- Graph DB: `~/.codespine_db`
|
|
232
|
+
- MCP PID: `~/.codespine.pid`
|
|
233
|
+
- Log file: `~/.codespine.log`
|
|
234
|
+
- Embedding cache: `~/.codespine_embedding_cache.sqlite3`
|
|
235
|
+
|
|
236
|
+
## Architecture
|
|
237
|
+
|
|
238
|
+
- `codespine/indexer`: Java parsing, symbols, call/type resolution
|
|
239
|
+
- `codespine/db`: Kuzu schema and persistence
|
|
240
|
+
- `codespine/search`: BM25/fuzzy/vector/RRF ranking
|
|
241
|
+
- `codespine/analysis`: impact/deadcode/flow/community/coupling/context
|
|
242
|
+
- `codespine/diff`: branch comparison at symbol level
|
|
243
|
+
- `codespine/watch`: incremental watch pipeline
|
|
244
|
+
- `codespine/mcp`: MCP tool server
|
|
245
|
+
- `codespine/noise`: noise blocklists for cleaner call graphs
|
|
246
|
+
|
|
247
|
+
## Security and Governance
|
|
248
|
+
|
|
249
|
+
- Security policy: [`SECURITY.md`](SECURITY.md)
|
|
250
|
+
- Contributions: [`CONTRIBUTING.md`](CONTRIBUTING.md)
|
|
251
|
+
- Code of conduct: [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md)
|
|
252
|
+
- Branch protection runbook: [`docs/GITHUB_HARDENING.md`](docs/GITHUB_HARDENING.md)
|
|
253
|
+
|
|
254
|
+
## Publish to PyPI
|
|
255
|
+
|
|
256
|
+
This repo includes a release workflow:
|
|
257
|
+
- [`.github/workflows/publish-pypi.yml`](.github/workflows/publish-pypi.yml)
|
|
258
|
+
|
|
259
|
+
Recommended setup (one-time):
|
|
260
|
+
1. Create project on PyPI with the same name (`codespine`) or update `project.name` if unavailable.
|
|
261
|
+
2. In PyPI, configure Trusted Publisher for this GitHub repo/workflow.
|
|
262
|
+
3. In GitHub, keep the `pypi` environment enabled for publishing.
|
|
263
|
+
|
|
264
|
+
Release flow:
|
|
265
|
+
1. Bump version in [`pyproject.toml`](pyproject.toml).
|
|
266
|
+
2. Push commit + tag (for example `v0.1.1`).
|
|
267
|
+
3. Create a GitHub Release for that tag.
|
|
268
|
+
4. Workflow builds and publishes to PyPI.
|
|
269
|
+
|
|
270
|
+
## Compatibility
|
|
271
|
+
|
|
272
|
+
`gindex.py` is retained as a compatibility shim for one release cycle.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Analysis layer."""
|