codespine 0.9.0__tar.gz → 0.9.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.9.0 → codespine-0.9.2}/PKG-INFO +92 -36
- {codespine-0.9.0 → codespine-0.9.2}/README.md +91 -35
- {codespine-0.9.0 → codespine-0.9.2}/codespine/__init__.py +1 -1
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/crossmodule.py +27 -9
- {codespine-0.9.0 → codespine-0.9.2}/codespine/db/store.py +155 -63
- {codespine-0.9.0 → codespine-0.9.2}/codespine/indexer/engine.py +72 -4
- {codespine-0.9.0 → codespine-0.9.2}/codespine/indexer/java_parser.py +46 -41
- {codespine-0.9.0 → codespine-0.9.2}/codespine/watch/watcher.py +5 -3
- {codespine-0.9.0 → codespine-0.9.2}/codespine.egg-info/PKG-INFO +92 -36
- {codespine-0.9.0 → codespine-0.9.2}/pyproject.toml +1 -1
- {codespine-0.9.0 → codespine-0.9.2}/LICENSE +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/community.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/context.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/coupling.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/flow.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/analysis/impact.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/cli.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/config.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/db/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/db/schema.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/diff/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/guide.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/indexer/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/indexer/di_resolver.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/mcp/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/mcp/server.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/noise/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/noise/blocklist.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/overlay/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/overlay/git_state.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/overlay/merge.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/overlay/store.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/search/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/search/bm25.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/search/fuzzy.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/search/hybrid.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/search/rrf.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/search/vector.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/watch/__init__.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine/watch/git_hook.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/gindex.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/setup.cfg +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_call_resolver.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_community_detection.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_deadcode.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_java_parser.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_multimodule_index.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_overlay.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_search_ranking.py +0 -0
- {codespine-0.9.0 → codespine-0.9.2}/tests/test_store_recovery.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.2
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -68,9 +68,9 @@ CodeSpine cuts token burn for coding agents working on Java codebases.
|
|
|
68
68
|
|
|
69
69
|
Instead of having an agent open dozens of `.java` files to answer one question, CodeSpine indexes the codebase once and serves the structure over MCP. The agent asks for symbols, callers, impact, flows, dead code, and module boundaries directly, which means fewer file reads, fewer wasted context windows, and fewer hallucinated code paths.
|
|
70
70
|
|
|
71
|
-
It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
71
|
+
It indexes classes, methods, calls, type relationships, DI bindings, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
File changes are written directly to the graph and are immediately queryable — no stale overlay merging, no OOM accumulation. The MCP daemon reloads from an atomic read replica the moment indexing or watch mode completes a batch.
|
|
74
74
|
|
|
75
75
|
The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
|
|
76
76
|
|
|
@@ -78,9 +78,10 @@ The MCP daemon and the indexer run independently. Querying while a full re-index
|
|
|
78
78
|
|
|
79
79
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
80
80
|
- Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
|
|
81
|
+
- DI bindings are first-class. `@Inject`, `@Autowired`, `@Bean`, and `@Provides` edges are resolved and included in impact analysis — Spring and Guice consumers are never missed.
|
|
81
82
|
- Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
|
|
82
83
|
- Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
|
|
83
|
-
- Active edits
|
|
84
|
+
- Active edits are visible immediately. Watch mode writes changes directly to the graph (not a slow overlay), so every MCP query reflects the latest file save.
|
|
84
85
|
|
|
85
86
|
## Install
|
|
86
87
|
|
|
@@ -96,8 +97,9 @@ pip install "codespine[ml]"
|
|
|
96
97
|
|
|
97
98
|
## What It Does
|
|
98
99
|
|
|
99
|
-
- Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`
|
|
100
|
-
- Impact analysis: callers,
|
|
100
|
+
- Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`; results carry `high/medium/low` confidence scores
|
|
101
|
+
- Impact analysis: callers, DI consumers, and confidence-scored edges; same-class callers separated from cross-class ones
|
|
102
|
+
- DI analysis: `@Inject`/`@Autowired`/`@Bean`/`@Provides` edges resolved into `INJECTS` + `BINDS_INTERFACE` graph relationships
|
|
101
103
|
- Dead code detection: Java-aware exemptions for tests, framework hooks, contracts, and common DI patterns
|
|
102
104
|
- Execution flows: traces from entry points through the call graph
|
|
103
105
|
- Community detection: structural clusters for architectural context
|
|
@@ -105,33 +107,42 @@ pip install "codespine[ml]"
|
|
|
105
107
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
106
108
|
- Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
|
|
107
109
|
- Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
|
|
108
|
-
-
|
|
110
|
+
- Git commit hook: optional post-commit hook re-indexes only the changed files within seconds
|
|
111
|
+
- MCP server: 44 structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
109
112
|
|
|
110
|
-
##
|
|
113
|
+
## Instant Change Visibility
|
|
111
114
|
|
|
112
|
-
CodeSpine
|
|
115
|
+
CodeSpine writes file changes directly to the graph — no O(N) overlay merging on every query.
|
|
113
116
|
|
|
114
|
-
|
|
115
|
-
|
|
117
|
+
When `codespine watch` detects a file save:
|
|
118
|
+
1. Parses the changed file with tree-sitter
|
|
119
|
+
2. Atomically clears then re-writes that file's methods, calls, and type relationships
|
|
120
|
+
3. Snapshots the write DB to the read replica
|
|
121
|
+
4. The MCP server picks up the new snapshot on its next tool call
|
|
116
122
|
|
|
117
|
-
|
|
123
|
+
The result is that every tool — `search_hybrid`, `get_impact`, `get_symbol_context`, `find_injections`, and all others — reflects unsaved work within the debounce window (default 1–2 s).
|
|
118
124
|
|
|
119
|
-
-
|
|
120
|
-
- `context`
|
|
121
|
-
- `impact`
|
|
122
|
-
- MCP `search_hybrid`
|
|
123
|
-
- MCP `find_symbol`
|
|
124
|
-
- MCP `get_symbol_context`
|
|
125
|
-
- MCP `get_impact`
|
|
125
|
+
### Git Commit Auto Re-index
|
|
126
126
|
|
|
127
|
-
|
|
127
|
+
Watch mode polls `git HEAD` every 5 seconds. When HEAD changes it uses `git diff --name-only` to find only the modified Java files and re-indexes those — not the entire project.
|
|
128
128
|
|
|
129
|
-
-
|
|
130
|
-
- `flow`
|
|
131
|
-
- `community`
|
|
132
|
-
- `coupling`
|
|
129
|
+
You can also install an optional post-commit hook so re-indexing fires immediately on every commit:
|
|
133
130
|
|
|
134
|
-
|
|
131
|
+
```bash
|
|
132
|
+
codespine watch --path . --install-hook
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Or via MCP:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
start_watch(path=".", install_hook=True)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The hook is idempotent and can be removed with:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
codespine watch --uninstall-hook --path .
|
|
145
|
+
```
|
|
135
146
|
|
|
136
147
|
## Quick Start
|
|
137
148
|
|
|
@@ -161,6 +172,7 @@ Walking files... 142 files found
|
|
|
161
172
|
Index mode... incremental (8 files to index, 0 deleted)
|
|
162
173
|
Parsing code... 8/8
|
|
163
174
|
Tracing calls... 847 calls resolved
|
|
175
|
+
Analyzing DI bindings... 63 INJECTS edges, 14 BINDS_INTERFACE edges
|
|
164
176
|
Analyzing types... 234 type relationships
|
|
165
177
|
Cross-module linking... skipped (single module)
|
|
166
178
|
Detecting communities... loading symbols
|
|
@@ -254,8 +266,8 @@ codespine guide --json # structured JSON for tooling
|
|
|
254
266
|
|
|
255
267
|
| Tool | Description |
|
|
256
268
|
|------|-------------|
|
|
257
|
-
| `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF). |
|
|
258
|
-
| `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup
|
|
269
|
+
| `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF) with confidence scores. |
|
|
270
|
+
| `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup; returns `primary_match` flag and disambiguated results. |
|
|
259
271
|
| `get_symbol_context(query, max_depth, project)` | One-shot deep context: search + impact + community + flows. |
|
|
260
272
|
| `get_neighborhood(symbol, project)` | Callers, callees, siblings, and override/implements. |
|
|
261
273
|
|
|
@@ -263,12 +275,32 @@ codespine guide --json # structured JSON for tooling
|
|
|
263
275
|
|
|
264
276
|
| Tool | Description |
|
|
265
277
|
|------|-------------|
|
|
266
|
-
| `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis
|
|
278
|
+
| `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis including DI consumers. Same-class callers in `self_callers`; cross-class in `impacted_callers`. |
|
|
279
|
+
| `find_injections(symbol, project)` | All classes that `@Inject`/`@Autowired` a given type, and all `@Bean`/`@Provides` providers. |
|
|
267
280
|
| `detect_dead_code(limit, project, strict)` | Methods with no callers (Java-aware exemptions). |
|
|
268
281
|
| `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points. |
|
|
269
282
|
| `get_symbol_community(symbol)` | Architectural community cluster for a symbol. |
|
|
270
283
|
| `get_change_coupling(days, min_strength, min_cochanges)` | Files that changed together in the last N days (default 5). |
|
|
271
284
|
|
|
285
|
+
**LLM-Native Tools**
|
|
286
|
+
|
|
287
|
+
Higher-level tools designed to answer full agent questions in a single call:
|
|
288
|
+
|
|
289
|
+
| Tool | Description |
|
|
290
|
+
|------|-------------|
|
|
291
|
+
| `ask(question, project)` | Answer a free-form question about the codebase using indexed structure. |
|
|
292
|
+
| `what_breaks(symbol, project)` | Plain-English summary of what could break if this symbol changes. |
|
|
293
|
+
| `explain(symbol, project)` | Explain what a class or method does and how it fits in the architecture. |
|
|
294
|
+
| `read_symbols(symbols, project)` | Bulk-resolve a list of symbol names to context in one call. |
|
|
295
|
+
| `semantic_summary(query, project)` | Narrative summary of modules or concepts matching a query. |
|
|
296
|
+
| `get_api_surface(project)` | All public entry points: REST controllers, gRPC services, CLI commands. |
|
|
297
|
+
| `file_context(file_path, project)` | Everything known about a file: classes, methods, callers, type deps. |
|
|
298
|
+
| `pre_flight_check(project)` | Readiness report: index freshness, coverage, missing embeddings, DI gaps. |
|
|
299
|
+
| `related(symbol, project)` | Symbols structurally or semantically related to the given one. |
|
|
300
|
+
| `test_coverage(symbol, project)` | Test classes and methods that exercise the given symbol. |
|
|
301
|
+
| `diff_impact(base_ref, head_ref, project)` | Impact analysis scoped to the symbols changed between two git refs. |
|
|
302
|
+
| `find_pattern(pattern, project)` | Find code matching a structural or naming pattern across the codebase. |
|
|
303
|
+
|
|
272
304
|
**Git**
|
|
273
305
|
|
|
274
306
|
| Tool | Description |
|
|
@@ -283,8 +315,8 @@ codespine guide --json # structured JSON for tooling
|
|
|
283
315
|
|------|-------------|
|
|
284
316
|
| `analyse_project(path, full, deep, embed)` | Index a Java project (background job). |
|
|
285
317
|
| `get_analyse_status()` | Poll analysis progress. |
|
|
286
|
-
| `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). |
|
|
287
|
-
| `start_watch(path)` | Watch for `.java` changes and
|
|
318
|
+
| `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). Changes are immediately queryable. |
|
|
319
|
+
| `start_watch(path, install_hook)` | Watch for `.java` changes and write directly to graph in real time. Pass `install_hook=True` to also install a post-commit git hook. |
|
|
288
320
|
| `stop_watch()` | Stop the background watch process. |
|
|
289
321
|
| `get_watch_status()` | Watch mode status: running, path, uptime. |
|
|
290
322
|
|
|
@@ -318,12 +350,13 @@ codespine analyse <path> # incremental index
|
|
|
318
350
|
codespine analyse <path> --full # full re-index
|
|
319
351
|
codespine analyse <path> --deep # + communities, flows, dead code, coupling
|
|
320
352
|
codespine analyse <path> --embed # + vector embeddings
|
|
321
|
-
codespine watch --path . # live re-index on file changes
|
|
353
|
+
codespine watch --path . # live re-index on file changes (direct-to-graph)
|
|
354
|
+
codespine watch --path . --install-hook # also install post-commit git hook
|
|
322
355
|
|
|
323
356
|
# Search & Analysis
|
|
324
357
|
codespine search "query" # hybrid search
|
|
325
358
|
codespine context "symbol" # one-shot deep context
|
|
326
|
-
codespine impact "symbol" # caller-tree impact
|
|
359
|
+
codespine impact "symbol" # caller-tree impact (includes DI consumers)
|
|
327
360
|
codespine deadcode # dead code candidates
|
|
328
361
|
codespine flow # execution flows
|
|
329
362
|
codespine community # architectural clusters
|
|
@@ -370,6 +403,30 @@ Project IDs are:
|
|
|
370
403
|
|
|
371
404
|
That same project ID can be passed into MCP tools and CLI analysis calls that support project scoping.
|
|
372
405
|
|
|
406
|
+
## DI / Injection Analysis
|
|
407
|
+
|
|
408
|
+
CodeSpine resolves dependency injection bindings at index time and stores them as first-class graph edges.
|
|
409
|
+
|
|
410
|
+
**What is indexed:**
|
|
411
|
+
|
|
412
|
+
- `@Inject` / `@Autowired` fields → `INJECTS(consumer → provider, confidence=0.85)`
|
|
413
|
+
- `@Provides` / `@Bean` methods → `INJECTS(config → return_type, confidence=0.90)`
|
|
414
|
+
- `@Component` / `@Service` implementing an interface → `BINDS_INTERFACE(impl → interface, confidence=0.95)`
|
|
415
|
+
|
|
416
|
+
**How it affects existing tools:**
|
|
417
|
+
|
|
418
|
+
- `get_impact("PaymentService")` now includes all classes that inject `PaymentService`, not just direct callers.
|
|
419
|
+
- `detect_dead_code` skips classes referenced only via DI edges.
|
|
420
|
+
|
|
421
|
+
**New tool:**
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
find_injections("PaymentProcessor")
|
|
425
|
+
# → all @Inject/@Autowired consumers
|
|
426
|
+
# → all @Bean/@Provides providers
|
|
427
|
+
# → all @Component/@Service implementations of the interface
|
|
428
|
+
```
|
|
429
|
+
|
|
373
430
|
## Deep Analysis Trade-Offs
|
|
374
431
|
|
|
375
432
|
`--deep` enables the expensive graph-wide passes:
|
|
@@ -381,8 +438,6 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
|
|
|
381
438
|
|
|
382
439
|
Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
|
|
383
440
|
|
|
384
|
-
When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
|
|
385
|
-
|
|
386
441
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
387
442
|
|
|
388
443
|
## Concurrent Indexing and Querying
|
|
@@ -405,12 +460,13 @@ Running `codespine analyse --deep --embed` on one project while querying a diffe
|
|
|
405
460
|
- `~/.codespine.log` - server log
|
|
406
461
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
407
462
|
- `~/.codespine_index_meta/` - incremental file metadata cache
|
|
408
|
-
- `~/.codespine_overlay/` - uncommitted dirty overlay state
|
|
463
|
+
- `~/.codespine_overlay/` - uncommitted dirty overlay state (legacy; direct-to-graph is now the primary path)
|
|
409
464
|
|
|
410
465
|
## Notes
|
|
411
466
|
|
|
412
467
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
413
|
-
- `codespine watch`
|
|
468
|
+
- `codespine watch` writes changes directly to the graph and snapshots the read replica after each batch. MCP queries reflect file saves within the debounce window.
|
|
469
|
+
- `git HEAD` is polled every 5 seconds. On a new commit, only the changed Java files are re-indexed using `git diff --name-only`, not the full project.
|
|
414
470
|
- `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
|
|
415
471
|
- `codespine force-reset` is the nuclear option — it deletes all data files without going through the DB engine. Use it when `clear-index` fails due to DB corruption.
|
|
416
472
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
@@ -4,9 +4,9 @@ CodeSpine cuts token burn for coding agents working on Java codebases.
|
|
|
4
4
|
|
|
5
5
|
Instead of having an agent open dozens of `.java` files to answer one question, CodeSpine indexes the codebase once and serves the structure over MCP. The agent asks for symbols, callers, impact, flows, dead code, and module boundaries directly, which means fewer file reads, fewer wasted context windows, and fewer hallucinated code paths.
|
|
6
6
|
|
|
7
|
-
It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
7
|
+
It indexes classes, methods, calls, type relationships, DI bindings, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
File changes are written directly to the graph and are immediately queryable — no stale overlay merging, no OOM accumulation. The MCP daemon reloads from an atomic read replica the moment indexing or watch mode completes a batch.
|
|
10
10
|
|
|
11
11
|
The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
|
|
12
12
|
|
|
@@ -14,9 +14,10 @@ The MCP daemon and the indexer run independently. Querying while a full re-index
|
|
|
14
14
|
|
|
15
15
|
- One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
|
|
16
16
|
- Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
|
|
17
|
+
- DI bindings are first-class. `@Inject`, `@Autowired`, `@Bean`, and `@Provides` edges are resolved and included in impact analysis — Spring and Guice consumers are never missed.
|
|
17
18
|
- Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
|
|
18
19
|
- Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
|
|
19
|
-
- Active edits
|
|
20
|
+
- Active edits are visible immediately. Watch mode writes changes directly to the graph (not a slow overlay), so every MCP query reflects the latest file save.
|
|
20
21
|
|
|
21
22
|
## Install
|
|
22
23
|
|
|
@@ -32,8 +33,9 @@ pip install "codespine[ml]"
|
|
|
32
33
|
|
|
33
34
|
## What It Does
|
|
34
35
|
|
|
35
|
-
- Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`
|
|
36
|
-
- Impact analysis: callers,
|
|
36
|
+
- Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`; results carry `high/medium/low` confidence scores
|
|
37
|
+
- Impact analysis: callers, DI consumers, and confidence-scored edges; same-class callers separated from cross-class ones
|
|
38
|
+
- DI analysis: `@Inject`/`@Autowired`/`@Bean`/`@Provides` edges resolved into `INJECTS` + `BINDS_INTERFACE` graph relationships
|
|
37
39
|
- Dead code detection: Java-aware exemptions for tests, framework hooks, contracts, and common DI patterns
|
|
38
40
|
- Execution flows: traces from entry points through the call graph
|
|
39
41
|
- Community detection: structural clusters for architectural context
|
|
@@ -41,33 +43,42 @@ pip install "codespine[ml]"
|
|
|
41
43
|
- Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
|
|
42
44
|
- Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
|
|
43
45
|
- Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
|
|
44
|
-
-
|
|
46
|
+
- Git commit hook: optional post-commit hook re-indexes only the changed files within seconds
|
|
47
|
+
- MCP server: 44 structured tools for Claude, Cursor, Cline, Copilot, and similar clients
|
|
45
48
|
|
|
46
|
-
##
|
|
49
|
+
## Instant Change Visibility
|
|
47
50
|
|
|
48
|
-
CodeSpine
|
|
51
|
+
CodeSpine writes file changes directly to the graph — no O(N) overlay merging on every query.
|
|
49
52
|
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
When `codespine watch` detects a file save:
|
|
54
|
+
1. Parses the changed file with tree-sitter
|
|
55
|
+
2. Atomically clears then re-writes that file's methods, calls, and type relationships
|
|
56
|
+
3. Snapshots the write DB to the read replica
|
|
57
|
+
4. The MCP server picks up the new snapshot on its next tool call
|
|
52
58
|
|
|
53
|
-
|
|
59
|
+
The result is that every tool — `search_hybrid`, `get_impact`, `get_symbol_context`, `find_injections`, and all others — reflects unsaved work within the debounce window (default 1–2 s).
|
|
54
60
|
|
|
55
|
-
-
|
|
56
|
-
- `context`
|
|
57
|
-
- `impact`
|
|
58
|
-
- MCP `search_hybrid`
|
|
59
|
-
- MCP `find_symbol`
|
|
60
|
-
- MCP `get_symbol_context`
|
|
61
|
-
- MCP `get_impact`
|
|
61
|
+
### Git Commit Auto Re-index
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
Watch mode polls `git HEAD` every 5 seconds. When HEAD changes it uses `git diff --name-only` to find only the modified Java files and re-indexes those — not the entire project.
|
|
64
64
|
|
|
65
|
-
-
|
|
66
|
-
- `flow`
|
|
67
|
-
- `community`
|
|
68
|
-
- `coupling`
|
|
65
|
+
You can also install an optional post-commit hook so re-indexing fires immediately on every commit:
|
|
69
66
|
|
|
70
|
-
|
|
67
|
+
```bash
|
|
68
|
+
codespine watch --path . --install-hook
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Or via MCP:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
start_watch(path=".", install_hook=True)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The hook is idempotent and can be removed with:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
codespine watch --uninstall-hook --path .
|
|
81
|
+
```
|
|
71
82
|
|
|
72
83
|
## Quick Start
|
|
73
84
|
|
|
@@ -97,6 +108,7 @@ Walking files... 142 files found
|
|
|
97
108
|
Index mode... incremental (8 files to index, 0 deleted)
|
|
98
109
|
Parsing code... 8/8
|
|
99
110
|
Tracing calls... 847 calls resolved
|
|
111
|
+
Analyzing DI bindings... 63 INJECTS edges, 14 BINDS_INTERFACE edges
|
|
100
112
|
Analyzing types... 234 type relationships
|
|
101
113
|
Cross-module linking... skipped (single module)
|
|
102
114
|
Detecting communities... loading symbols
|
|
@@ -190,8 +202,8 @@ codespine guide --json # structured JSON for tooling
|
|
|
190
202
|
|
|
191
203
|
| Tool | Description |
|
|
192
204
|
|------|-------------|
|
|
193
|
-
| `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF). |
|
|
194
|
-
| `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup
|
|
205
|
+
| `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF) with confidence scores. |
|
|
206
|
+
| `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup; returns `primary_match` flag and disambiguated results. |
|
|
195
207
|
| `get_symbol_context(query, max_depth, project)` | One-shot deep context: search + impact + community + flows. |
|
|
196
208
|
| `get_neighborhood(symbol, project)` | Callers, callees, siblings, and override/implements. |
|
|
197
209
|
|
|
@@ -199,12 +211,32 @@ codespine guide --json # structured JSON for tooling
|
|
|
199
211
|
|
|
200
212
|
| Tool | Description |
|
|
201
213
|
|------|-------------|
|
|
202
|
-
| `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis
|
|
214
|
+
| `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis including DI consumers. Same-class callers in `self_callers`; cross-class in `impacted_callers`. |
|
|
215
|
+
| `find_injections(symbol, project)` | All classes that `@Inject`/`@Autowired` a given type, and all `@Bean`/`@Provides` providers. |
|
|
203
216
|
| `detect_dead_code(limit, project, strict)` | Methods with no callers (Java-aware exemptions). |
|
|
204
217
|
| `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points. |
|
|
205
218
|
| `get_symbol_community(symbol)` | Architectural community cluster for a symbol. |
|
|
206
219
|
| `get_change_coupling(days, min_strength, min_cochanges)` | Files that changed together in the last N days (default 5). |
|
|
207
220
|
|
|
221
|
+
**LLM-Native Tools**
|
|
222
|
+
|
|
223
|
+
Higher-level tools designed to answer full agent questions in a single call:
|
|
224
|
+
|
|
225
|
+
| Tool | Description |
|
|
226
|
+
|------|-------------|
|
|
227
|
+
| `ask(question, project)` | Answer a free-form question about the codebase using indexed structure. |
|
|
228
|
+
| `what_breaks(symbol, project)` | Plain-English summary of what could break if this symbol changes. |
|
|
229
|
+
| `explain(symbol, project)` | Explain what a class or method does and how it fits in the architecture. |
|
|
230
|
+
| `read_symbols(symbols, project)` | Bulk-resolve a list of symbol names to context in one call. |
|
|
231
|
+
| `semantic_summary(query, project)` | Narrative summary of modules or concepts matching a query. |
|
|
232
|
+
| `get_api_surface(project)` | All public entry points: REST controllers, gRPC services, CLI commands. |
|
|
233
|
+
| `file_context(file_path, project)` | Everything known about a file: classes, methods, callers, type deps. |
|
|
234
|
+
| `pre_flight_check(project)` | Readiness report: index freshness, coverage, missing embeddings, DI gaps. |
|
|
235
|
+
| `related(symbol, project)` | Symbols structurally or semantically related to the given one. |
|
|
236
|
+
| `test_coverage(symbol, project)` | Test classes and methods that exercise the given symbol. |
|
|
237
|
+
| `diff_impact(base_ref, head_ref, project)` | Impact analysis scoped to the symbols changed between two git refs. |
|
|
238
|
+
| `find_pattern(pattern, project)` | Find code matching a structural or naming pattern across the codebase. |
|
|
239
|
+
|
|
208
240
|
**Git**
|
|
209
241
|
|
|
210
242
|
| Tool | Description |
|
|
@@ -219,8 +251,8 @@ codespine guide --json # structured JSON for tooling
|
|
|
219
251
|
|------|-------------|
|
|
220
252
|
| `analyse_project(path, full, deep, embed)` | Index a Java project (background job). |
|
|
221
253
|
| `get_analyse_status()` | Poll analysis progress. |
|
|
222
|
-
| `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). |
|
|
223
|
-
| `start_watch(path)` | Watch for `.java` changes and
|
|
254
|
+
| `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). Changes are immediately queryable. |
|
|
255
|
+
| `start_watch(path, install_hook)` | Watch for `.java` changes and write directly to graph in real time. Pass `install_hook=True` to also install a post-commit git hook. |
|
|
224
256
|
| `stop_watch()` | Stop the background watch process. |
|
|
225
257
|
| `get_watch_status()` | Watch mode status: running, path, uptime. |
|
|
226
258
|
|
|
@@ -254,12 +286,13 @@ codespine analyse <path> # incremental index
|
|
|
254
286
|
codespine analyse <path> --full # full re-index
|
|
255
287
|
codespine analyse <path> --deep # + communities, flows, dead code, coupling
|
|
256
288
|
codespine analyse <path> --embed # + vector embeddings
|
|
257
|
-
codespine watch --path . # live re-index on file changes
|
|
289
|
+
codespine watch --path . # live re-index on file changes (direct-to-graph)
|
|
290
|
+
codespine watch --path . --install-hook # also install post-commit git hook
|
|
258
291
|
|
|
259
292
|
# Search & Analysis
|
|
260
293
|
codespine search "query" # hybrid search
|
|
261
294
|
codespine context "symbol" # one-shot deep context
|
|
262
|
-
codespine impact "symbol" # caller-tree impact
|
|
295
|
+
codespine impact "symbol" # caller-tree impact (includes DI consumers)
|
|
263
296
|
codespine deadcode # dead code candidates
|
|
264
297
|
codespine flow # execution flows
|
|
265
298
|
codespine community # architectural clusters
|
|
@@ -306,6 +339,30 @@ Project IDs are:
|
|
|
306
339
|
|
|
307
340
|
That same project ID can be passed into MCP tools and CLI analysis calls that support project scoping.
|
|
308
341
|
|
|
342
|
+
## DI / Injection Analysis
|
|
343
|
+
|
|
344
|
+
CodeSpine resolves dependency injection bindings at index time and stores them as first-class graph edges.
|
|
345
|
+
|
|
346
|
+
**What is indexed:**
|
|
347
|
+
|
|
348
|
+
- `@Inject` / `@Autowired` fields → `INJECTS(consumer → provider, confidence=0.85)`
|
|
349
|
+
- `@Provides` / `@Bean` methods → `INJECTS(config → return_type, confidence=0.90)`
|
|
350
|
+
- `@Component` / `@Service` implementing an interface → `BINDS_INTERFACE(impl → interface, confidence=0.95)`
|
|
351
|
+
|
|
352
|
+
**How it affects existing tools:**
|
|
353
|
+
|
|
354
|
+
- `get_impact("PaymentService")` now includes all classes that inject `PaymentService`, not just direct callers.
|
|
355
|
+
- `detect_dead_code` skips classes referenced only via DI edges.
|
|
356
|
+
|
|
357
|
+
**New tool:**
|
|
358
|
+
|
|
359
|
+
```python
|
|
360
|
+
find_injections("PaymentProcessor")
|
|
361
|
+
# → all @Inject/@Autowired consumers
|
|
362
|
+
# → all @Bean/@Provides providers
|
|
363
|
+
# → all @Component/@Service implementations of the interface
|
|
364
|
+
```
|
|
365
|
+
|
|
309
366
|
## Deep Analysis Trade-Offs
|
|
310
367
|
|
|
311
368
|
`--deep` enables the expensive graph-wide passes:
|
|
@@ -317,8 +374,6 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
|
|
|
317
374
|
|
|
318
375
|
Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
|
|
319
376
|
|
|
320
|
-
When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
|
|
321
|
-
|
|
322
377
|
`--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
|
|
323
378
|
|
|
324
379
|
## Concurrent Indexing and Querying
|
|
@@ -341,12 +396,13 @@ Running `codespine analyse --deep --embed` on one project while querying a diffe
|
|
|
341
396
|
- `~/.codespine.log` - server log
|
|
342
397
|
- `~/.codespine_embedding_cache.json` - embedding cache
|
|
343
398
|
- `~/.codespine_index_meta/` - incremental file metadata cache
|
|
344
|
-
- `~/.codespine_overlay/` - uncommitted dirty overlay state
|
|
399
|
+
- `~/.codespine_overlay/` - uncommitted dirty overlay state (legacy; direct-to-graph is now the primary path)
|
|
345
400
|
|
|
346
401
|
## Notes
|
|
347
402
|
|
|
348
403
|
- `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
|
|
349
|
-
- `codespine watch`
|
|
404
|
+
- `codespine watch` writes changes directly to the graph and snapshots the read replica after each batch. MCP queries reflect file saves within the debounce window.
|
|
405
|
+
- `git HEAD` is polled every 5 seconds. On a new commit, only the changed Java files are re-indexed using `git diff --name-only`, not the full project.
|
|
350
406
|
- `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
|
|
351
407
|
- `codespine force-reset` is the nuclear option — it deletes all data files without going through the DB engine. Use it when `clear-index` fails due to DB corruption.
|
|
352
408
|
- For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
|
|
@@ -90,7 +90,31 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None, progres
|
|
|
90
90
|
if len(c["name"]) > _MIN_CLASS_NAME_LEN:
|
|
91
91
|
classes_per_project[c["pid"]].add(c["name"])
|
|
92
92
|
|
|
93
|
-
# ── 3.
|
|
93
|
+
# ── 3. Pre-load all destination-class methods in ONE bulk query ───────
|
|
94
|
+
# Collect every class ID that belongs to a project OTHER than its own so
|
|
95
|
+
# we can load their methods in one round-trip instead of one per class.
|
|
96
|
+
all_cross_cids: set[str] = set()
|
|
97
|
+
for c in all_classes:
|
|
98
|
+
if len(c["name"]) > _MIN_CLASS_NAME_LEN:
|
|
99
|
+
all_cross_cids.add(c["cid"])
|
|
100
|
+
|
|
101
|
+
_ping(f"loading methods for {len(all_cross_cids)} cross-module classes")
|
|
102
|
+
dst_methods_by_cid: dict[str, list[dict]] = defaultdict(list)
|
|
103
|
+
if all_cross_cids:
|
|
104
|
+
bulk = store.query_records(
|
|
105
|
+
"""
|
|
106
|
+
MATCH (m:Method)
|
|
107
|
+
WHERE m.class_id IN $cids
|
|
108
|
+
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
109
|
+
m.modifiers as modifiers, m.is_constructor as is_ctor,
|
|
110
|
+
m.class_id as cid
|
|
111
|
+
""",
|
|
112
|
+
{"cids": list(all_cross_cids)},
|
|
113
|
+
)
|
|
114
|
+
for dm in bulk:
|
|
115
|
+
dst_methods_by_cid[dm["cid"]].append(dm)
|
|
116
|
+
|
|
117
|
+
# ── 4. Scan methods for cross-project type references ─────────────
|
|
94
118
|
new_edges = 0
|
|
95
119
|
seen: set[tuple[str, str]] = set()
|
|
96
120
|
|
|
@@ -127,19 +151,13 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None, progres
|
|
|
127
151
|
if not matched_class_names:
|
|
128
152
|
continue
|
|
129
153
|
|
|
130
|
-
# For each matched class, create CALLS edges
|
|
154
|
+
# For each matched class, create CALLS edges using pre-loaded methods.
|
|
131
155
|
for class_name in matched_class_names:
|
|
132
156
|
for dst_cid, dst_pid in name_to_classes.get(class_name, []):
|
|
133
157
|
if dst_pid == src_pid:
|
|
134
158
|
continue # same project — not cross-module
|
|
135
159
|
|
|
136
|
-
|
|
137
|
-
dst_methods = store.query_records(
|
|
138
|
-
"""MATCH (m:Method) WHERE m.class_id = $cid
|
|
139
|
-
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
140
|
-
m.modifiers as modifiers, m.is_constructor as is_ctor""",
|
|
141
|
-
{"cid": dst_cid},
|
|
142
|
-
)
|
|
160
|
+
dst_methods = dst_methods_by_cid.get(dst_cid)
|
|
143
161
|
if not dst_methods:
|
|
144
162
|
continue
|
|
145
163
|
|