codespine 0.9.0__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {codespine-0.9.0 → codespine-0.9.1}/PKG-INFO +92 -36
  2. {codespine-0.9.0 → codespine-0.9.1}/README.md +91 -35
  3. {codespine-0.9.0 → codespine-0.9.1}/codespine/__init__.py +1 -1
  4. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/crossmodule.py +27 -9
  5. {codespine-0.9.0 → codespine-0.9.1}/codespine/db/store.py +44 -2
  6. {codespine-0.9.0 → codespine-0.9.1}/codespine/indexer/engine.py +72 -4
  7. {codespine-0.9.0 → codespine-0.9.1}/codespine/indexer/java_parser.py +46 -41
  8. {codespine-0.9.0 → codespine-0.9.1}/codespine/watch/watcher.py +5 -3
  9. {codespine-0.9.0 → codespine-0.9.1}/codespine.egg-info/PKG-INFO +92 -36
  10. {codespine-0.9.0 → codespine-0.9.1}/pyproject.toml +1 -1
  11. {codespine-0.9.0 → codespine-0.9.1}/LICENSE +0 -0
  12. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/__init__.py +0 -0
  13. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/community.py +0 -0
  14. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/context.py +0 -0
  15. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/coupling.py +0 -0
  16. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/deadcode.py +0 -0
  17. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/flow.py +0 -0
  18. {codespine-0.9.0 → codespine-0.9.1}/codespine/analysis/impact.py +0 -0
  19. {codespine-0.9.0 → codespine-0.9.1}/codespine/cli.py +0 -0
  20. {codespine-0.9.0 → codespine-0.9.1}/codespine/config.py +0 -0
  21. {codespine-0.9.0 → codespine-0.9.1}/codespine/db/__init__.py +0 -0
  22. {codespine-0.9.0 → codespine-0.9.1}/codespine/db/schema.py +0 -0
  23. {codespine-0.9.0 → codespine-0.9.1}/codespine/diff/__init__.py +0 -0
  24. {codespine-0.9.0 → codespine-0.9.1}/codespine/diff/branch_diff.py +0 -0
  25. {codespine-0.9.0 → codespine-0.9.1}/codespine/guide.py +0 -0
  26. {codespine-0.9.0 → codespine-0.9.1}/codespine/indexer/__init__.py +0 -0
  27. {codespine-0.9.0 → codespine-0.9.1}/codespine/indexer/call_resolver.py +0 -0
  28. {codespine-0.9.0 → codespine-0.9.1}/codespine/indexer/di_resolver.py +0 -0
  29. {codespine-0.9.0 → codespine-0.9.1}/codespine/indexer/symbol_builder.py +0 -0
  30. {codespine-0.9.0 → codespine-0.9.1}/codespine/mcp/__init__.py +0 -0
  31. {codespine-0.9.0 → codespine-0.9.1}/codespine/mcp/server.py +0 -0
  32. {codespine-0.9.0 → codespine-0.9.1}/codespine/noise/__init__.py +0 -0
  33. {codespine-0.9.0 → codespine-0.9.1}/codespine/noise/blocklist.py +0 -0
  34. {codespine-0.9.0 → codespine-0.9.1}/codespine/overlay/__init__.py +0 -0
  35. {codespine-0.9.0 → codespine-0.9.1}/codespine/overlay/git_state.py +0 -0
  36. {codespine-0.9.0 → codespine-0.9.1}/codespine/overlay/merge.py +0 -0
  37. {codespine-0.9.0 → codespine-0.9.1}/codespine/overlay/store.py +0 -0
  38. {codespine-0.9.0 → codespine-0.9.1}/codespine/search/__init__.py +0 -0
  39. {codespine-0.9.0 → codespine-0.9.1}/codespine/search/bm25.py +0 -0
  40. {codespine-0.9.0 → codespine-0.9.1}/codespine/search/fuzzy.py +0 -0
  41. {codespine-0.9.0 → codespine-0.9.1}/codespine/search/hybrid.py +0 -0
  42. {codespine-0.9.0 → codespine-0.9.1}/codespine/search/rrf.py +0 -0
  43. {codespine-0.9.0 → codespine-0.9.1}/codespine/search/vector.py +0 -0
  44. {codespine-0.9.0 → codespine-0.9.1}/codespine/watch/__init__.py +0 -0
  45. {codespine-0.9.0 → codespine-0.9.1}/codespine/watch/git_hook.py +0 -0
  46. {codespine-0.9.0 → codespine-0.9.1}/codespine.egg-info/SOURCES.txt +0 -0
  47. {codespine-0.9.0 → codespine-0.9.1}/codespine.egg-info/dependency_links.txt +0 -0
  48. {codespine-0.9.0 → codespine-0.9.1}/codespine.egg-info/entry_points.txt +0 -0
  49. {codespine-0.9.0 → codespine-0.9.1}/codespine.egg-info/requires.txt +0 -0
  50. {codespine-0.9.0 → codespine-0.9.1}/codespine.egg-info/top_level.txt +0 -0
  51. {codespine-0.9.0 → codespine-0.9.1}/gindex.py +0 -0
  52. {codespine-0.9.0 → codespine-0.9.1}/setup.cfg +0 -0
  53. {codespine-0.9.0 → codespine-0.9.1}/tests/test_branch_diff_normalize.py +0 -0
  54. {codespine-0.9.0 → codespine-0.9.1}/tests/test_call_resolver.py +0 -0
  55. {codespine-0.9.0 → codespine-0.9.1}/tests/test_community_detection.py +0 -0
  56. {codespine-0.9.0 → codespine-0.9.1}/tests/test_deadcode.py +0 -0
  57. {codespine-0.9.0 → codespine-0.9.1}/tests/test_index_and_hybrid.py +0 -0
  58. {codespine-0.9.0 → codespine-0.9.1}/tests/test_java_parser.py +0 -0
  59. {codespine-0.9.0 → codespine-0.9.1}/tests/test_multimodule_index.py +0 -0
  60. {codespine-0.9.0 → codespine-0.9.1}/tests/test_overlay.py +0 -0
  61. {codespine-0.9.0 → codespine-0.9.1}/tests/test_search_ranking.py +0 -0
  62. {codespine-0.9.0 → codespine-0.9.1}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.0
3
+ Version: 0.9.1
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -68,9 +68,9 @@ CodeSpine cuts token burn for coding agents working on Java codebases.
68
68
 
69
69
  Instead of having an agent open dozens of `.java` files to answer one question, CodeSpine indexes the codebase once and serves the structure over MCP. The agent asks for symbols, callers, impact, flows, dead code, and module boundaries directly, which means fewer file reads, fewer wasted context windows, and fewer hallucinated code paths.
70
70
 
71
- It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
71
+ It indexes classes, methods, calls, type relationships, DI bindings, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
72
72
 
73
- It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
73
+ File changes are written directly to the graph and are immediately queryable — no stale overlay merging, no OOM accumulation. The MCP daemon reloads from an atomic read replica the moment indexing or watch mode completes a batch.
74
74
 
75
75
  The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
76
76
 
@@ -78,9 +78,10 @@ The MCP daemon and the indexer run independently. Querying while a full re-index
78
78
 
79
79
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
80
80
  - Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
81
+ - DI bindings are first-class. `@Inject`, `@Autowired`, `@Bean`, and `@Provides` edges are resolved and included in impact analysis — Spring and Guice consumers are never missed.
81
82
  - Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
82
83
  - Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
83
- - Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
84
+ - Active edits are visible immediately. Watch mode writes changes directly to the graph (not a slow overlay), so every MCP query reflects the latest file save.
84
85
 
85
86
  ## Install
86
87
 
@@ -96,8 +97,9 @@ pip install "codespine[ml]"
96
97
 
97
98
  ## What It Does
98
99
 
99
- - Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`
100
- - Impact analysis: callers, dependencies, and confidence-scored edges
100
+ - Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`; results carry `high/medium/low` confidence scores
101
+ - Impact analysis: callers, DI consumers, and confidence-scored edges; same-class callers separated from cross-class ones
102
+ - DI analysis: `@Inject`/`@Autowired`/`@Bean`/`@Provides` edges resolved into `INJECTS` + `BINDS_INTERFACE` graph relationships
101
103
  - Dead code detection: Java-aware exemptions for tests, framework hooks, contracts, and common DI patterns
102
104
  - Execution flows: traces from entry points through the call graph
103
105
  - Community detection: structural clusters for architectural context
@@ -105,33 +107,42 @@ pip install "codespine[ml]"
105
107
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
106
108
  - Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
107
109
  - Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
108
- - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
110
+ - Git commit hook: optional post-commit hook re-indexes only the changed files within seconds
111
+ - MCP server: 44 structured tools for Claude, Cursor, Cline, Copilot, and similar clients
109
112
 
110
- ## Editing Without Stale Indexes
113
+ ## Instant Change Visibility
111
114
 
112
- CodeSpine uses a two-layer model:
115
+ CodeSpine writes file changes directly to the graph — no O(N) overlay merging on every query.
113
116
 
114
- - Base index: last committed state
115
- - Dirty overlay: uncommitted Java changes
117
+ When `codespine watch` detects a file save:
118
+ 1. Parses the changed file with tree-sitter
119
+ 2. Atomically clears then re-writes that file's methods, calls, and type relationships
120
+ 3. Snapshots the write DB to the read replica
121
+ 4. The MCP server picks up the new snapshot on its next tool call
116
122
 
117
- Fast tools read merged `base + overlay` state by default:
123
+ The result is that every tool — `search_hybrid`, `get_impact`, `get_symbol_context`, `find_injections`, and all others — reflects unsaved work within the debounce window (default 1–2 s).
118
124
 
119
- - `search`
120
- - `context`
121
- - `impact`
122
- - MCP `search_hybrid`
123
- - MCP `find_symbol`
124
- - MCP `get_symbol_context`
125
- - MCP `get_impact`
125
+ ### Git Commit Auto Re-index
126
126
 
127
- Deep analyses stay committed-only until promotion:
127
+ Watch mode polls `git HEAD` every 5 seconds. When HEAD changes it uses `git diff --name-only` to find only the modified Java files and re-indexes those — not the entire project.
128
128
 
129
- - `deadcode`
130
- - `flow`
131
- - `community`
132
- - `coupling`
129
+ You can also install an optional post-commit hook so re-indexing fires immediately on every commit:
133
130
 
134
- `codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
131
+ ```bash
132
+ codespine watch --path . --install-hook
133
+ ```
134
+
135
+ Or via MCP:
136
+
137
+ ```python
138
+ start_watch(path=".", install_hook=True)
139
+ ```
140
+
141
+ The hook is idempotent and can be removed with:
142
+
143
+ ```bash
144
+ codespine watch --uninstall-hook --path .
145
+ ```
135
146
 
136
147
  ## Quick Start
137
148
 
@@ -161,6 +172,7 @@ Walking files... 142 files found
161
172
  Index mode... incremental (8 files to index, 0 deleted)
162
173
  Parsing code... 8/8
163
174
  Tracing calls... 847 calls resolved
175
+ Analyzing DI bindings... 63 INJECTS edges, 14 BINDS_INTERFACE edges
164
176
  Analyzing types... 234 type relationships
165
177
  Cross-module linking... skipped (single module)
166
178
  Detecting communities... loading symbols
@@ -254,8 +266,8 @@ codespine guide --json # structured JSON for tooling
254
266
 
255
267
  | Tool | Description |
256
268
  |------|-------------|
257
- | `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF). |
258
- | `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup across all projects. |
269
+ | `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF) with confidence scores. |
270
+ | `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup; returns `primary_match` flag and disambiguated results. |
259
271
  | `get_symbol_context(query, max_depth, project)` | One-shot deep context: search + impact + community + flows. |
260
272
  | `get_neighborhood(symbol, project)` | Callers, callees, siblings, and override/implements. |
261
273
 
@@ -263,12 +275,32 @@ codespine guide --json # structured JSON for tooling
263
275
 
264
276
  | Tool | Description |
265
277
  |------|-------------|
266
- | `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis with confidence scores. |
278
+ | `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis including DI consumers. Same-class callers in `self_callers`; cross-class in `impacted_callers`. |
279
+ | `find_injections(symbol, project)` | All classes that `@Inject`/`@Autowired` a given type, and all `@Bean`/`@Provides` providers. |
267
280
  | `detect_dead_code(limit, project, strict)` | Methods with no callers (Java-aware exemptions). |
268
281
  | `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points. |
269
282
  | `get_symbol_community(symbol)` | Architectural community cluster for a symbol. |
270
283
  | `get_change_coupling(days, min_strength, min_cochanges)` | Files that changed together in the last N days (default 5). |
271
284
 
285
+ **LLM-Native Tools**
286
+
287
+ Higher-level tools designed to answer full agent questions in a single call:
288
+
289
+ | Tool | Description |
290
+ |------|-------------|
291
+ | `ask(question, project)` | Answer a free-form question about the codebase using indexed structure. |
292
+ | `what_breaks(symbol, project)` | Plain-English summary of what could break if this symbol changes. |
293
+ | `explain(symbol, project)` | Explain what a class or method does and how it fits in the architecture. |
294
+ | `read_symbols(symbols, project)` | Bulk-resolve a list of symbol names to context in one call. |
295
+ | `semantic_summary(query, project)` | Narrative summary of modules or concepts matching a query. |
296
+ | `get_api_surface(project)` | All public entry points: REST controllers, gRPC services, CLI commands. |
297
+ | `file_context(file_path, project)` | Everything known about a file: classes, methods, callers, type deps. |
298
+ | `pre_flight_check(project)` | Readiness report: index freshness, coverage, missing embeddings, DI gaps. |
299
+ | `related(symbol, project)` | Symbols structurally or semantically related to the given one. |
300
+ | `test_coverage(symbol, project)` | Test classes and methods that exercise the given symbol. |
301
+ | `diff_impact(base_ref, head_ref, project)` | Impact analysis scoped to the symbols changed between two git refs. |
302
+ | `find_pattern(pattern, project)` | Find code matching a structural or naming pattern across the codebase. |
303
+
272
304
  **Git**
273
305
 
274
306
  | Tool | Description |
@@ -283,8 +315,8 @@ codespine guide --json # structured JSON for tooling
283
315
  |------|-------------|
284
316
  | `analyse_project(path, full, deep, embed)` | Index a Java project (background job). |
285
317
  | `get_analyse_status()` | Poll analysis progress. |
286
- | `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). |
287
- | `start_watch(path)` | Watch for `.java` changes and update overlay in real time. |
318
+ | `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). Changes are immediately queryable. |
319
+ | `start_watch(path, install_hook)` | Watch for `.java` changes and write directly to graph in real time. Pass `install_hook=True` to also install a post-commit git hook. |
288
320
  | `stop_watch()` | Stop the background watch process. |
289
321
  | `get_watch_status()` | Watch mode status: running, path, uptime. |
290
322
 
@@ -318,12 +350,13 @@ codespine analyse <path> # incremental index
318
350
  codespine analyse <path> --full # full re-index
319
351
  codespine analyse <path> --deep # + communities, flows, dead code, coupling
320
352
  codespine analyse <path> --embed # + vector embeddings
321
- codespine watch --path . # live re-index on file changes
353
+ codespine watch --path . # live re-index on file changes (direct-to-graph)
354
+ codespine watch --path . --install-hook # also install post-commit git hook
322
355
 
323
356
  # Search & Analysis
324
357
  codespine search "query" # hybrid search
325
358
  codespine context "symbol" # one-shot deep context
326
- codespine impact "symbol" # caller-tree impact
359
+ codespine impact "symbol" # caller-tree impact (includes DI consumers)
327
360
  codespine deadcode # dead code candidates
328
361
  codespine flow # execution flows
329
362
  codespine community # architectural clusters
@@ -370,6 +403,30 @@ Project IDs are:
370
403
 
371
404
  That same project ID can be passed into MCP tools and CLI analysis calls that support project scoping.
372
405
 
406
+ ## DI / Injection Analysis
407
+
408
+ CodeSpine resolves dependency injection bindings at index time and stores them as first-class graph edges.
409
+
410
+ **What is indexed:**
411
+
412
+ - `@Inject` / `@Autowired` fields → `INJECTS(consumer → provider, confidence=0.85)`
413
+ - `@Provides` / `@Bean` methods → `INJECTS(config → return_type, confidence=0.90)`
414
+ - `@Component` / `@Service` implementing an interface → `BINDS_INTERFACE(impl → interface, confidence=0.95)`
415
+
416
+ **How it affects existing tools:**
417
+
418
+ - `get_impact("PaymentService")` now includes all classes that inject `PaymentService`, not just direct callers.
419
+ - `detect_dead_code` skips classes referenced only via DI edges.
420
+
421
+ **New tool:**
422
+
423
+ ```python
424
+ find_injections("PaymentProcessor")
425
+ # → all @Inject/@Autowired consumers
426
+ # → all @Bean/@Provides providers
427
+ # → all @Component/@Service implementations of the interface
428
+ ```
429
+
373
430
  ## Deep Analysis Trade-Offs
374
431
 
375
432
  `--deep` enables the expensive graph-wide passes:
@@ -381,8 +438,6 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
381
438
 
382
439
  Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
383
440
 
384
- When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
385
-
386
441
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
387
442
 
388
443
  ## Concurrent Indexing and Querying
@@ -405,12 +460,13 @@ Running `codespine analyse --deep --embed` on one project while querying a diffe
405
460
  - `~/.codespine.log` - server log
406
461
  - `~/.codespine_embedding_cache.json` - embedding cache
407
462
  - `~/.codespine_index_meta/` - incremental file metadata cache
408
- - `~/.codespine_overlay/` - uncommitted dirty overlay state
463
+ - `~/.codespine_overlay/` - uncommitted dirty overlay state (legacy; direct-to-graph is now the primary path)
409
464
 
410
465
  ## Notes
411
466
 
412
467
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
413
- - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
468
+ - `codespine watch` writes changes directly to the graph and snapshots the read replica after each batch. MCP queries reflect file saves within the debounce window.
469
+ - `git HEAD` is polled every 5 seconds. On a new commit, only the changed Java files are re-indexed using `git diff --name-only`, not the full project.
414
470
  - `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
415
471
  - `codespine force-reset` is the nuclear option — it deletes all data files without going through the DB engine. Use it when `clear-index` fails due to DB corruption.
416
472
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
@@ -4,9 +4,9 @@ CodeSpine cuts token burn for coding agents working on Java codebases.
4
4
 
5
5
  Instead of having an agent open dozens of `.java` files to answer one question, CodeSpine indexes the codebase once and serves the structure over MCP. The agent asks for symbols, callers, impact, flows, dead code, and module boundaries directly, which means fewer file reads, fewer wasted context windows, and fewer hallucinated code paths.
6
6
 
7
- It indexes classes, methods, calls, type relationships, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
7
+ It indexes classes, methods, calls, type relationships, DI bindings, cross-module links, git coupling, dead-code candidates, and execution flows so agents can work from graph answers first and source files second.
8
8
 
9
- It also keeps a separate dirty overlay for uncommitted Java edits, so agents can query current work-in-progress without forcing the committed base index to churn on every save.
9
+ File changes are written directly to the graph and are immediately queryable — no stale overlay merging, no OOM accumulation. The MCP daemon reloads from an atomic read replica the moment indexing or watch mode completes a batch.
10
10
 
11
11
  The MCP daemon and the indexer run independently. Querying while a full re-index is running no longer causes crashes or memory contention — reads go to an isolated snapshot that is atomically updated when indexing completes.
12
12
 
@@ -14,9 +14,10 @@ The MCP daemon and the indexer run independently. Querying while a full re-index
14
14
 
15
15
  - One MCP call can replace many file opens. `get_symbol_context("PaymentService")` returns a resolved neighborhood instead of forcing the agent to read every caller and callee file manually.
16
16
  - Search is structure-aware. Agents can ask for a symbol, concept, impact radius, or dead-code candidate without scanning entire packages.
17
+ - DI bindings are first-class. `@Inject`, `@Autowired`, `@Bean`, and `@Provides` edges are resolved and included in impact analysis — Spring and Guice consumers are never missed.
17
18
  - Multi-module repos stay scoped. Project-aware IDs and `project=` parameters reduce noise from unrelated modules and workspaces.
18
19
  - Repeat sessions get cheaper. Once indexed, the agent reuses the graph instead of re-discovering the same relationships every turn.
19
- - Active edits stay smooth. Dirty files are kept in an overlay and merged into fast queries until you commit, instead of hammering the main graph DB on each change.
20
+ - Active edits are visible immediately. Watch mode writes changes directly to the graph (not a slow overlay), so every MCP query reflects the latest file save.
20
21
 
21
22
  ## Install
22
23
 
@@ -32,8 +33,9 @@ pip install "codespine[ml]"
32
33
 
33
34
  ## What It Does
34
35
 
35
- - Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`
36
- - Impact analysis: callers, dependencies, and confidence-scored edges
36
+ - Hybrid search: BM25 + fuzzy by default, semantic vector search with `--embed`; results carry `high/medium/low` confidence scores
37
+ - Impact analysis: callers, DI consumers, and confidence-scored edges; same-class callers separated from cross-class ones
38
+ - DI analysis: `@Inject`/`@Autowired`/`@Bean`/`@Provides` edges resolved into `INJECTS` + `BINDS_INTERFACE` graph relationships
37
39
  - Dead code detection: Java-aware exemptions for tests, framework hooks, contracts, and common DI patterns
38
40
  - Execution flows: traces from entry points through the call graph
39
41
  - Community detection: structural clusters for architectural context
@@ -41,33 +43,42 @@ pip install "codespine[ml]"
41
43
  - Multi-project and multi-module indexing: workspaces, Maven modules, Gradle subprojects
42
44
  - Cross-module call linking: signature-based detection of calls between Maven/Gradle modules
43
45
  - Concurrent read/write isolation: MCP queries run against a read replica; the indexer writes separately, with no memory contention
44
- - MCP server: structured tools for Claude, Cursor, Cline, Copilot, and similar clients
46
+ - Git commit hook: optional post-commit hook re-indexes only the changed files within seconds
47
+ - MCP server: 44 structured tools for Claude, Cursor, Cline, Copilot, and similar clients
45
48
 
46
- ## Editing Without Stale Indexes
49
+ ## Instant Change Visibility
47
50
 
48
- CodeSpine uses a two-layer model:
51
+ CodeSpine writes file changes directly to the graph — no O(N) overlay merging on every query.
49
52
 
50
- - Base index: last committed state
51
- - Dirty overlay: uncommitted Java changes
53
+ When `codespine watch` detects a file save:
54
+ 1. Parses the changed file with tree-sitter
55
+ 2. Atomically clears then re-writes that file's methods, calls, and type relationships
56
+ 3. Snapshots the write DB to the read replica
57
+ 4. The MCP server picks up the new snapshot on its next tool call
52
58
 
53
- Fast tools read merged `base + overlay` state by default:
59
+ The result is that every tool — `search_hybrid`, `get_impact`, `get_symbol_context`, `find_injections`, and all others — reflects unsaved work within the debounce window (default 1–2 s).
54
60
 
55
- - `search`
56
- - `context`
57
- - `impact`
58
- - MCP `search_hybrid`
59
- - MCP `find_symbol`
60
- - MCP `get_symbol_context`
61
- - MCP `get_impact`
61
+ ### Git Commit Auto Re-index
62
62
 
63
- Deep analyses stay committed-only until promotion:
63
+ Watch mode polls `git HEAD` every 5 seconds. When HEAD changes it uses `git diff --name-only` to find only the modified Java files and re-indexes those — not the entire project.
64
64
 
65
- - `deadcode`
66
- - `flow`
67
- - `community`
68
- - `coupling`
65
+ You can also install an optional post-commit hook so re-indexing fires immediately on every commit:
69
66
 
70
- `codespine watch` updates the dirty overlay after a debounce window, then promotes it into the base index when local `HEAD` changes.
67
+ ```bash
68
+ codespine watch --path . --install-hook
69
+ ```
70
+
71
+ Or via MCP:
72
+
73
+ ```python
74
+ start_watch(path=".", install_hook=True)
75
+ ```
76
+
77
+ The hook is idempotent and can be removed with:
78
+
79
+ ```bash
80
+ codespine watch --uninstall-hook --path .
81
+ ```
71
82
 
72
83
  ## Quick Start
73
84
 
@@ -97,6 +108,7 @@ Walking files... 142 files found
97
108
  Index mode... incremental (8 files to index, 0 deleted)
98
109
  Parsing code... 8/8
99
110
  Tracing calls... 847 calls resolved
111
+ Analyzing DI bindings... 63 INJECTS edges, 14 BINDS_INTERFACE edges
100
112
  Analyzing types... 234 type relationships
101
113
  Cross-module linking... skipped (single module)
102
114
  Detecting communities... loading symbols
@@ -190,8 +202,8 @@ codespine guide --json # structured JSON for tooling
190
202
 
191
203
  | Tool | Description |
192
204
  |------|-------------|
193
- | `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF). |
194
- | `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup across all projects. |
205
+ | `search_hybrid(query, k, project)` | Ranked symbol search (BM25 + vector + fuzzy via RRF) with confidence scores. |
206
+ | `find_symbol(name, kind, project, limit)` | Exact/prefix name lookup; returns `primary_match` flag and disambiguated results. |
195
207
  | `get_symbol_context(query, max_depth, project)` | One-shot deep context: search + impact + community + flows. |
196
208
  | `get_neighborhood(symbol, project)` | Callers, callees, siblings, and override/implements. |
197
209
 
@@ -199,12 +211,32 @@ codespine guide --json # structured JSON for tooling
199
211
 
200
212
  | Tool | Description |
201
213
  |------|-------------|
202
- | `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis with confidence scores. |
214
+ | `get_impact(symbol, max_depth, project)` | Caller-tree impact analysis including DI consumers. Same-class callers in `self_callers`; cross-class in `impacted_callers`. |
215
+ | `find_injections(symbol, project)` | All classes that `@Inject`/`@Autowired` a given type, and all `@Bean`/`@Provides` providers. |
203
216
  | `detect_dead_code(limit, project, strict)` | Methods with no callers (Java-aware exemptions). |
204
217
  | `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points. |
205
218
  | `get_symbol_community(symbol)` | Architectural community cluster for a symbol. |
206
219
  | `get_change_coupling(days, min_strength, min_cochanges)` | Files that changed together in the last N days (default 5). |
207
220
 
221
+ **LLM-Native Tools**
222
+
223
+ Higher-level tools designed to answer full agent questions in a single call:
224
+
225
+ | Tool | Description |
226
+ |------|-------------|
227
+ | `ask(question, project)` | Answer a free-form question about the codebase using indexed structure. |
228
+ | `what_breaks(symbol, project)` | Plain-English summary of what could break if this symbol changes. |
229
+ | `explain(symbol, project)` | Explain what a class or method does and how it fits in the architecture. |
230
+ | `read_symbols(symbols, project)` | Bulk-resolve a list of symbol names to context in one call. |
231
+ | `semantic_summary(query, project)` | Narrative summary of modules or concepts matching a query. |
232
+ | `get_api_surface(project)` | All public entry points: REST controllers, gRPC services, CLI commands. |
233
+ | `file_context(file_path, project)` | Everything known about a file: classes, methods, callers, type deps. |
234
+ | `pre_flight_check(project)` | Readiness report: index freshness, coverage, missing embeddings, DI gaps. |
235
+ | `related(symbol, project)` | Symbols structurally or semantically related to the given one. |
236
+ | `test_coverage(symbol, project)` | Test classes and methods that exercise the given symbol. |
237
+ | `diff_impact(base_ref, head_ref, project)` | Impact analysis scoped to the symbols changed between two git refs. |
238
+ | `find_pattern(pattern, project)` | Find code matching a structural or naming pattern across the codebase. |
239
+
208
240
  **Git**
209
241
 
210
242
  | Tool | Description |
@@ -219,8 +251,8 @@ codespine guide --json # structured JSON for tooling
219
251
  |------|-------------|
220
252
  | `analyse_project(path, full, deep, embed)` | Index a Java project (background job). |
221
253
  | `get_analyse_status()` | Poll analysis progress. |
222
- | `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). |
223
- | `start_watch(path)` | Watch for `.java` changes and update overlay in real time. |
254
+ | `reindex_file(file_path, project)` | Re-index a single `.java` file (<1 s). Changes are immediately queryable. |
255
+ | `start_watch(path, install_hook)` | Watch for `.java` changes and write directly to graph in real time. Pass `install_hook=True` to also install a post-commit git hook. |
224
256
  | `stop_watch()` | Stop the background watch process. |
225
257
  | `get_watch_status()` | Watch mode status: running, path, uptime. |
226
258
 
@@ -254,12 +286,13 @@ codespine analyse <path> # incremental index
254
286
  codespine analyse <path> --full # full re-index
255
287
  codespine analyse <path> --deep # + communities, flows, dead code, coupling
256
288
  codespine analyse <path> --embed # + vector embeddings
257
- codespine watch --path . # live re-index on file changes
289
+ codespine watch --path . # live re-index on file changes (direct-to-graph)
290
+ codespine watch --path . --install-hook # also install post-commit git hook
258
291
 
259
292
  # Search & Analysis
260
293
  codespine search "query" # hybrid search
261
294
  codespine context "symbol" # one-shot deep context
262
- codespine impact "symbol" # caller-tree impact
295
+ codespine impact "symbol" # caller-tree impact (includes DI consumers)
263
296
  codespine deadcode # dead code candidates
264
297
  codespine flow # execution flows
265
298
  codespine community # architectural clusters
@@ -306,6 +339,30 @@ Project IDs are:
306
339
 
307
340
  That same project ID can be passed into MCP tools and CLI analysis calls that support project scoping.
308
341
 
342
+ ## DI / Injection Analysis
343
+
344
+ CodeSpine resolves dependency injection bindings at index time and stores them as first-class graph edges.
345
+
346
+ **What is indexed:**
347
+
348
+ - `@Inject` / `@Autowired` fields → `INJECTS(consumer → provider, confidence=0.85)`
349
+ - `@Provides` / `@Bean` methods → `INJECTS(config → return_type, confidence=0.90)`
350
+ - `@Component` / `@Service` implementing an interface → `BINDS_INTERFACE(impl → interface, confidence=0.95)`
351
+
352
+ **How it affects existing tools:**
353
+
354
+ - `get_impact("PaymentService")` now includes all classes that inject `PaymentService`, not just direct callers.
355
+ - `detect_dead_code` skips classes referenced only via DI edges.
356
+
357
+ **New tool:**
358
+
359
+ ```python
360
+ find_injections("PaymentProcessor")
361
+ # → all @Inject/@Autowired consumers
362
+ # → all @Bean/@Provides providers
363
+ # → all @Component/@Service implementations of the interface
364
+ ```
365
+
309
366
  ## Deep Analysis Trade-Offs
310
367
 
311
368
  `--deep` enables the expensive graph-wide passes:
@@ -317,8 +374,6 @@ That same project ID can be passed into MCP tools and CLI analysis calls that su
317
374
 
318
375
  Use it when you want architecture-level context. Skip it when you just need the graph refreshed for search, context, and impact.
319
376
 
320
- When a dirty overlay exists, deep-analysis results intentionally exclude those uncommitted edits until promotion.
321
-
322
377
  `--embed` is also optional. Without it, CodeSpine still supports exact, keyword, and fuzzy search. Add embeddings when you need concept-level retrieval.
323
378
 
324
379
  ## Concurrent Indexing and Querying
@@ -341,12 +396,13 @@ Running `codespine analyse --deep --embed` on one project while querying a diffe
341
396
  - `~/.codespine.log` - server log
342
397
  - `~/.codespine_embedding_cache.json` - embedding cache
343
398
  - `~/.codespine_index_meta/` - incremental file metadata cache
344
- - `~/.codespine_overlay/` - uncommitted dirty overlay state
399
+ - `~/.codespine_overlay/` - uncommitted dirty overlay state (legacy; direct-to-graph is now the primary path)
345
400
 
346
401
  ## Notes
347
402
 
348
403
  - `codespine start` launches a background MCP server. Most IDE MCP clients should use `codespine mcp` instead and manage the process themselves.
349
- - `codespine watch` updates the dirty overlay first; it does not rewrite the committed base index on every save.
404
+ - `codespine watch` writes changes directly to the graph and snapshots the read replica after each batch. MCP queries reflect file saves within the debounce window.
405
+ - `git HEAD` is polled every 5 seconds. On a new commit, only the changed Java files are re-indexed using `git diff --name-only`, not the full project.
350
406
  - `codespine clear-index` rebuilds the local index database from scratch. This also removes the read replica; run `analyse` again to republish it.
351
407
  - `codespine force-reset` is the nuclear option — it deletes all data files without going through the DB engine. Use it when `clear-index` fails due to DB corruption.
352
408
  - For large Spring or JPA-heavy repos, dead-code results should still be reviewed before deletion. The tool is conservative, not authoritative.
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.9.0"
4
+ __version__ = "0.9.1"
@@ -90,7 +90,31 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None, progres
90
90
  if len(c["name"]) > _MIN_CLASS_NAME_LEN:
91
91
  classes_per_project[c["pid"]].add(c["name"])
92
92
 
93
- # ── 3. Scan methods for cross-project type references ─────────────
93
+ # ── 3. Pre-load all destination-class methods in ONE bulk query ───────
94
+ # Collect every class ID that belongs to a project OTHER than its own so
95
+ # we can load their methods in one round-trip instead of one per class.
96
+ all_cross_cids: set[str] = set()
97
+ for c in all_classes:
98
+ if len(c["name"]) > _MIN_CLASS_NAME_LEN:
99
+ all_cross_cids.add(c["cid"])
100
+
101
+ _ping(f"loading methods for {len(all_cross_cids)} cross-module classes")
102
+ dst_methods_by_cid: dict[str, list[dict]] = defaultdict(list)
103
+ if all_cross_cids:
104
+ bulk = store.query_records(
105
+ """
106
+ MATCH (m:Method)
107
+ WHERE m.class_id IN $cids
108
+ RETURN m.id as mid, m.name as name, m.signature as sig,
109
+ m.modifiers as modifiers, m.is_constructor as is_ctor,
110
+ m.class_id as cid
111
+ """,
112
+ {"cids": list(all_cross_cids)},
113
+ )
114
+ for dm in bulk:
115
+ dst_methods_by_cid[dm["cid"]].append(dm)
116
+
117
+ # ── 4. Scan methods for cross-project type references ─────────────
94
118
  new_edges = 0
95
119
  seen: set[tuple[str, str]] = set()
96
120
 
@@ -127,19 +151,13 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None, progres
127
151
  if not matched_class_names:
128
152
  continue
129
153
 
130
- # For each matched class, create CALLS edges
154
+ # For each matched class, create CALLS edges using pre-loaded methods.
131
155
  for class_name in matched_class_names:
132
156
  for dst_cid, dst_pid in name_to_classes.get(class_name, []):
133
157
  if dst_pid == src_pid:
134
158
  continue # same project — not cross-module
135
159
 
136
- # Get methods of the destination class
137
- dst_methods = store.query_records(
138
- """MATCH (m:Method) WHERE m.class_id = $cid
139
- RETURN m.id as mid, m.name as name, m.signature as sig,
140
- m.modifiers as modifiers, m.is_constructor as is_ctor""",
141
- {"cid": dst_cid},
142
- )
160
+ dst_methods = dst_methods_by_cid.get(dst_cid)
143
161
  if not dst_methods:
144
162
  continue
145
163
 
@@ -820,15 +820,57 @@ class GraphStore:
820
820
  },
821
821
  )
822
822
 
823
+ # Lock and flag for background snapshot coalescing.
824
+ # Only one snapshot runs at a time; a pending request supersedes queued ones.
825
+ _snapshot_lock: threading.Lock = threading.Lock()
826
+ _snapshot_pending: threading.Event = threading.Event()
827
+
823
828
  @staticmethod
824
- def snapshot_to_read_replica() -> bool:
829
+ def snapshot_to_read_replica(background: bool = False) -> bool:
825
830
  """Atomically copy the write DB to the read-replica path.
826
831
 
827
832
  The read replica is used by the MCP daemon and all read-only CLI
828
833
  commands so they never contend with the write process's buffer pool.
829
- Returns True on success, False if the source DB does not exist.
834
+
835
+ Parameters
836
+ ----------
837
+ background:
838
+ When True the copy runs in a daemon thread and this call returns
839
+ immediately (always returns True). Only one copy runs at a time;
840
+ rapid successive background calls are coalesced — the next copy
841
+ starts only after the current one finishes, so the sentinel is
842
+ always written with the *latest* data.
843
+
844
+ Returns True on success (or when dispatched to background), False if
845
+ the source DB does not exist.
830
846
  """
831
847
  src = SETTINGS.db_path
848
+ if not os.path.exists(src):
849
+ return False
850
+
851
+ if background:
852
+ # Signal that a snapshot is wanted, then ensure a worker is running.
853
+ GraphStore._snapshot_pending.set()
854
+
855
+ def _worker() -> None:
856
+ while GraphStore._snapshot_pending.is_set():
857
+ GraphStore._snapshot_pending.clear()
858
+ with GraphStore._snapshot_lock:
859
+ GraphStore._do_snapshot()
860
+
861
+ if not GraphStore._snapshot_lock.locked():
862
+ t = threading.Thread(target=_worker, daemon=True, name="codespine-snapshot")
863
+ t.start()
864
+ return True
865
+
866
+ # Foreground (blocking) path — used by CLI analyse and tests.
867
+ with GraphStore._snapshot_lock:
868
+ return GraphStore._do_snapshot()
869
+
870
+ @staticmethod
871
+ def _do_snapshot() -> bool:
872
+ """Perform the actual copy. Must be called with _snapshot_lock held."""
873
+ src = SETTINGS.db_path
832
874
  dst = SETTINGS.db_snapshot_path
833
875
  if not os.path.exists(src):
834
876
  return False