codespine 0.1.8__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. codespine-0.3.0/PKG-INFO +333 -0
  2. codespine-0.3.0/README.md +269 -0
  3. {codespine-0.1.8 → codespine-0.3.0}/codespine/__init__.py +1 -1
  4. {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/context.py +4 -4
  5. {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/deadcode.py +35 -17
  6. codespine-0.3.0/codespine/analysis/flow.py +111 -0
  7. {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/impact.py +14 -7
  8. {codespine-0.1.8 → codespine-0.3.0}/codespine/cli.py +210 -34
  9. {codespine-0.1.8 → codespine-0.3.0}/codespine/config.py +1 -0
  10. {codespine-0.1.8 → codespine-0.3.0}/codespine/db/store.py +29 -10
  11. {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/engine.py +288 -41
  12. codespine-0.3.0/codespine/mcp/server.py +917 -0
  13. {codespine-0.1.8 → codespine-0.3.0}/codespine/search/hybrid.py +10 -4
  14. {codespine-0.1.8 → codespine-0.3.0}/codespine/search/vector.py +2 -2
  15. codespine-0.3.0/codespine/watch/watcher.py +75 -0
  16. codespine-0.3.0/codespine.egg-info/PKG-INFO +333 -0
  17. {codespine-0.1.8 → codespine-0.3.0}/pyproject.toml +1 -1
  18. codespine-0.3.0/tests/test_index_and_hybrid.py +36 -0
  19. codespine-0.1.8/PKG-INFO +0 -150
  20. codespine-0.1.8/README.md +0 -86
  21. codespine-0.1.8/codespine/analysis/flow.py +0 -77
  22. codespine-0.1.8/codespine/mcp/server.py +0 -67
  23. codespine-0.1.8/codespine/watch/watcher.py +0 -38
  24. codespine-0.1.8/codespine.egg-info/PKG-INFO +0 -150
  25. codespine-0.1.8/tests/test_index_and_hybrid.py +0 -21
  26. {codespine-0.1.8 → codespine-0.3.0}/LICENSE +0 -0
  27. {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/__init__.py +0 -0
  28. {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/community.py +0 -0
  29. {codespine-0.1.8 → codespine-0.3.0}/codespine/analysis/coupling.py +0 -0
  30. {codespine-0.1.8 → codespine-0.3.0}/codespine/db/__init__.py +0 -0
  31. {codespine-0.1.8 → codespine-0.3.0}/codespine/db/schema.py +0 -0
  32. {codespine-0.1.8 → codespine-0.3.0}/codespine/diff/__init__.py +0 -0
  33. {codespine-0.1.8 → codespine-0.3.0}/codespine/diff/branch_diff.py +0 -0
  34. {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/__init__.py +0 -0
  35. {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/call_resolver.py +0 -0
  36. {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/java_parser.py +0 -0
  37. {codespine-0.1.8 → codespine-0.3.0}/codespine/indexer/symbol_builder.py +0 -0
  38. {codespine-0.1.8 → codespine-0.3.0}/codespine/mcp/__init__.py +0 -0
  39. {codespine-0.1.8 → codespine-0.3.0}/codespine/noise/__init__.py +0 -0
  40. {codespine-0.1.8 → codespine-0.3.0}/codespine/noise/blocklist.py +0 -0
  41. {codespine-0.1.8 → codespine-0.3.0}/codespine/search/__init__.py +0 -0
  42. {codespine-0.1.8 → codespine-0.3.0}/codespine/search/bm25.py +0 -0
  43. {codespine-0.1.8 → codespine-0.3.0}/codespine/search/fuzzy.py +0 -0
  44. {codespine-0.1.8 → codespine-0.3.0}/codespine/search/rrf.py +0 -0
  45. {codespine-0.1.8 → codespine-0.3.0}/codespine/watch/__init__.py +0 -0
  46. {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/SOURCES.txt +0 -0
  47. {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/dependency_links.txt +0 -0
  48. {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/entry_points.txt +0 -0
  49. {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/requires.txt +0 -0
  50. {codespine-0.1.8 → codespine-0.3.0}/codespine.egg-info/top_level.txt +0 -0
  51. {codespine-0.1.8 → codespine-0.3.0}/gindex.py +0 -0
  52. {codespine-0.1.8 → codespine-0.3.0}/setup.cfg +0 -0
  53. {codespine-0.1.8 → codespine-0.3.0}/tests/test_branch_diff_normalize.py +0 -0
  54. {codespine-0.1.8 → codespine-0.3.0}/tests/test_call_resolver.py +0 -0
  55. {codespine-0.1.8 → codespine-0.3.0}/tests/test_java_parser.py +0 -0
  56. {codespine-0.1.8 → codespine-0.3.0}/tests/test_multimodule_index.py +0 -0
  57. {codespine-0.1.8 → codespine-0.3.0}/tests/test_search_ranking.py +0 -0
@@ -0,0 +1,333 @@
1
+ Metadata-Version: 2.4
2
+ Name: codespine
3
+ Version: 0.3.0
4
+ Summary: Local Java code intelligence indexer backed by a graph database
5
+ Author: CodeSpine contributors
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 CodeSpine contributors
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/vinayak3022/codeSpine
29
+ Project-URL: Repository, https://github.com/vinayak3022/codeSpine
30
+ Project-URL: Issues, https://github.com/vinayak3022/codeSpine/issues
31
+ Keywords: java,code-indexing,graph,kuzu,mcp
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.10
37
+ Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Programming Language :: Python :: 3.13
40
+ Classifier: Topic :: Software Development :: Libraries
41
+ Classifier: Topic :: Software Development :: Quality Assurance
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: click
46
+ Requires-Dist: kuzu
47
+ Requires-Dist: tree-sitter
48
+ Requires-Dist: tree-sitter-java
49
+ Requires-Dist: fastmcp
50
+ Requires-Dist: psutil
51
+ Requires-Dist: watchfiles
52
+ Provides-Extra: ml
53
+ Requires-Dist: sentence-transformers; extra == "ml"
54
+ Requires-Dist: numpy; extra == "ml"
55
+ Provides-Extra: community
56
+ Requires-Dist: igraph; extra == "community"
57
+ Requires-Dist: leidenalg; extra == "community"
58
+ Provides-Extra: full
59
+ Requires-Dist: sentence-transformers; extra == "full"
60
+ Requires-Dist: numpy; extra == "full"
61
+ Requires-Dist: igraph; extra == "full"
62
+ Requires-Dist: leidenalg; extra == "full"
63
+ Dynamic: license-file
64
+
65
+ # CodeSpine
66
+
67
+ **A code-intelligence layer for Java codebases — purpose-built for AI agents.**
68
+
69
+ Instead of making your agent read hundreds of raw source files, CodeSpine maps your entire codebase into a live graph and exposes it through 24 structured MCP tools.
70
+ Your agent asks a question, gets a precise answer — no file trawling, no wasted tokens, no hallucinated call chains.
71
+
72
+ > **Token efficiency in practice**: a `get_symbol_context` call returns a fully-resolved call graph for a symbol in one round-trip.
73
+ > The equivalent "read every relevant file" approach typically costs 10-50× more tokens and still misses transitive edges.
74
+
75
+ ---
76
+
77
+ ## How it works
78
+
79
+ ```
80
+ Your Java codebase
81
+
82
+ codespine analyse ← one-time (or on-demand) indexing
83
+
84
+ ~/.codespine_db ← Kuzu graph DB (symbols, calls, communities, flows …)
85
+
86
+ codespine mcp ← FastMCP server — 24 tools
87
+
88
+ Your AI agent (Claude, GPT, Cursor, Cline …)
89
+ ```
90
+
91
+ Agents talk to the MCP server. They never need to open a `.java` file unless they are actually editing it.
92
+
93
+ ---
94
+
95
+ ## Installation
96
+
97
+ ```bash
98
+ pip install codespine
99
+ ```
100
+
101
+ Optional: install `sentence-transformers` to enable semantic vector search (adds ~500 MB of model weight).
102
+
103
+ ```bash
104
+ pip install sentence-transformers
105
+ ```
106
+
107
+ ---
108
+
109
+ ## Quick Start
110
+
111
+ ### 1 — Index your codebase
112
+
113
+ ```bash
114
+ # Fast (BM25 + fuzzy search, no embeddings — recommended first run)
115
+ codespine analyse /path/to/your/project
116
+
117
+ # Full (adds semantic vector search, takes longer)
118
+ codespine analyse /path/to/your/project --embed
119
+
120
+ # Deep (+ dead code, execution flows, communities, git coupling)
121
+ codespine analyse /path/to/your/project --deep
122
+ ```
123
+
124
+ Example output:
125
+
126
+ ```
127
+ $ codespine analyse .
128
+ Walking files... 142 files found
129
+ Parsing code... 142/142 (parallel, 4 workers)
130
+ Tracing calls... 847 calls resolved
131
+ Analyzing types... 234 type relationships
132
+ Detecting communities... 8 clusters found
133
+ Detecting execution flows... 34 processes found
134
+ Finding dead code... 12 unreachable symbols
135
+ Analyzing git history... 18 coupled file pairs
136
+
137
+ Done in 18s — 623 symbols, 1 847 edges, 8 clusters, 34 flows
138
+ ```
139
+
140
+ ### 2 — Wire up MCP
141
+
142
+ Add to your MCP config (`~/.claude/mcp.json` or equivalent):
143
+
144
+ ```json
145
+ {
146
+ "mcpServers": {
147
+ "codespine": {
148
+ "command": "codespine",
149
+ "args": ["mcp"]
150
+ }
151
+ }
152
+ }
153
+ ```
154
+
155
+ ### 3 — Let the agent explore
156
+
157
+ The agent can now call tools like:
158
+
159
+ ```
160
+ search_hybrid("payment retry logic")
161
+ get_symbol_context("processPayment")
162
+ get_impact("com.example.PaymentService#charge")
163
+ detect_dead_code()
164
+ get_codebase_stats()
165
+ ```
166
+
167
+ ---
168
+
169
+ ## MCP Tools (24)
170
+
171
+ ### Connectivity & Discovery
172
+
173
+ | Tool | What it does |
174
+ |------|-------------|
175
+ | `ping()` | Confirm the server is alive. Call this first. |
176
+ | `get_capabilities()` | Returns what is indexed right now — projects, symbol counts, which features are available, and whether watch mode is running. Call before other tools to avoid trial-and-error. |
177
+ | `list_projects()` | List every indexed project with path, symbol count, and file count. |
178
+ | `get_codebase_stats()` | Per-project breakdown: files, classes, methods, calls, embeddings, totals. |
179
+
180
+ ### Search
181
+
182
+ | Tool | What it does |
183
+ |------|-------------|
184
+ | `search_hybrid(query, k, project)` | BM25 + semantic vector + fuzzy, fused with RRF. Scope to a project with `project=`. |
185
+ | `find_symbol(name, kind, project, limit)` | Exact / prefix name lookup returning **all** matches grouped by project. Use this when the same class name exists in multiple projects to pick the right one. |
186
+ | `list_packages(project, limit)` | All Java packages with class count, grouped by project. Good for structural orientation before searching. |
187
+
188
+ ### Analysis
189
+
190
+ | Tool | What it does |
191
+ |------|-------------|
192
+ | `get_symbol_context(query, max_depth, project)` | Full call graph context for a symbol — callers, callees, types, up to `max_depth` hops. |
193
+ | `get_impact(symbol, max_depth, project)` | Depth-grouped impact analysis with confidence scores. Shows what breaks if this symbol changes. |
194
+ | `detect_dead_code(limit, project)` | Unreachable symbols after applying framework exemptions (Spring, JPA, …). |
195
+ | `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points (or a specific symbol). |
196
+ | `get_symbol_community(symbol)` | Which community cluster a symbol belongs to, with co-members. |
197
+ | `get_change_coupling(months, min_strength, min_cochanges, project)` | Git-derived file pairs that change together — useful for predicting collateral changes. |
198
+
199
+ ### Git
200
+
201
+ | Tool | What it does |
202
+ |------|-------------|
203
+ | `git_log(file_path, limit, project)` | Commit history for a file or the whole repo. |
204
+ | `git_diff(ref, file_path, project)` | Diff against a ref (default `HEAD`). |
205
+ | `compare_branches(base_ref, head_ref)` | Symbol-level diff between two branches — which classes/methods changed. |
206
+
207
+ ### Watch Mode (live incremental reindex)
208
+
209
+ | Tool | What it does |
210
+ |------|-------------|
211
+ | `start_watch(path, global_interval)` | Start incremental reindexing in the background. Watches for file changes and updates the graph within `global_interval` seconds. **Recommended**: keep this running during active development sessions so the graph stays fresh. |
212
+ | `stop_watch()` | Gracefully stop the background watcher. |
213
+ | `get_watch_status()` | Check if watch is running — uptime, path, interval. |
214
+
215
+ ### On-demand Analysis (non-blocking)
216
+
217
+ | Tool | What it does |
218
+ |------|-------------|
219
+ | `analyse_project(path, full, deep, embed)` | Trigger a full re-analysis as a background job. Returns immediately. Poll `get_analyse_status()` for progress. |
220
+ | `get_analyse_status()` | Check background analysis — running / done / failed, last log lines. |
221
+
222
+ ### Index Management
223
+
224
+ | Tool | What it does |
225
+ |------|-------------|
226
+ | `reset_project(project_id)` | Delete all graph data for one project (clean-slate re-index). |
227
+ | `reset_index()` | Wipe the entire index — all projects, communities, flows. |
228
+
229
+ ### Power / Debug
230
+
231
+ | Tool | What it does |
232
+ |------|-------------|
233
+ | `run_cypher(query)` | Execute a raw Cypher read query against the graph (Kuzu dialect). For advanced exploration. |
234
+
235
+ ---
236
+
237
+ ## CLI Reference
238
+
239
+ ### Indexing
240
+
241
+ ```bash
242
+ codespine analyse <path> # fast index (no embeddings)
243
+ codespine analyse <path> --embed # + semantic vectors
244
+ codespine analyse <path> --full # force full re-index (skip incremental)
245
+ codespine analyse <path> --deep # + dead code, flows, communities, git coupling
246
+ codespine analyse <path> --deep --embed # everything
247
+ ```
248
+
249
+ ### Search & Analysis
250
+
251
+ ```bash
252
+ codespine search "payment retry bug" [--k 20] [--json]
253
+ codespine context "processPayment" [--max-depth 3] [--json]
254
+ codespine impact "com.example.Service#processPayment(java.lang.String)" [--max-depth 4] [--json]
255
+ codespine deadcode [--limit 200] [--json]
256
+ codespine flow [--entry <symbol>] [--max-depth 6] [--json]
257
+ codespine community [--symbol <symbol>] [--json]
258
+ codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]
259
+ codespine diff <base>..<head> [--json]
260
+ ```
261
+
262
+ ### Stats
263
+
264
+ ```bash
265
+ codespine stats # per-project table: files, classes, methods, calls, embeddings
266
+ codespine stats --json # machine-readable output
267
+ ```
268
+
269
+ ### Watch
270
+
271
+ ```bash
272
+ codespine watch [--path .] [--global-interval 30]
273
+ ```
274
+
275
+ ### Index Management
276
+
277
+ ```bash
278
+ codespine clear-project <project_id> # remove one project from the graph
279
+ codespine clear-index # wipe the entire index
280
+ ```
281
+
282
+ ---
283
+
284
+ ## Workspace / Multi-Project Support
285
+
286
+ CodeSpine understands three levels of hierarchy:
287
+
288
+ ```
289
+ ~/IdeaProjects/ ← workspace (a folder of independent projects)
290
+ ├── payments-service/ ← project (has its own .git / pom.xml)
291
+ │ ├── core/ ← module (Maven <module> or Gradle subproject)
292
+ │ └── api/ ← module
293
+ └── inventory-service/ ← project
294
+ └── (single-module)
295
+ ```
296
+
297
+ - **Workspace detection**: if the path you give to `analyse` has no `.git` or build file at its root, CodeSpine scans one level down for sub-projects and indexes them all.
298
+ - **Project IDs**: single-module → `payments-service`; multi-module → `payments-service::core`, `payments-service::api`.
299
+ - **Scoped queries**: every analysis and search tool accepts an optional `project=` parameter so agents can work within one project without noise from others.
300
+ - **Cross-project search**: omit `project=` to search across everything.
301
+
302
+ ---
303
+
304
+ ## Embedding / Speed Trade-off
305
+
306
+ | Flag | Index time | Search modes available |
307
+ |------|-----------|----------------------|
308
+ | *(no flag)* | Fast (~seconds–minutes) | BM25, fuzzy, exact |
309
+ | `--embed` | Slower (minutes, depends on model) | BM25, fuzzy, exact + **semantic vector** |
310
+
311
+ `sentence-transformers` must be installed for `--embed` to have any effect.
312
+ If it is not installed, indexing always skips embeddings silently.
313
+
314
+ Most agent workflows work great without embeddings — BM25 + fuzzy covers keyword, partial-name, and typo-tolerant search. Add `--embed` when you need concept-level similarity ("find all classes related to retry logic").
315
+
316
+ ---
317
+
318
+ ## Runtime Paths
319
+
320
+ | Path | Purpose |
321
+ |------|---------|
322
+ | `~/.codespine_db` | Kuzu graph database |
323
+ | `~/.codespine.pid` | Watch-mode PID file |
324
+ | `~/.codespine.log` | Watch-mode log |
325
+ | `~/.codespine_embedding_cache.sqlite3` | Embedding vector cache |
326
+
327
+ ---
328
+
329
+ ## Project Docs
330
+
331
+ - [Contributing](.github/CONTRIBUTING.md)
332
+ - [Security](.github/SECURITY.md)
333
+ - [Code of Conduct](.github/CODE_OF_CONDUCT.md)
@@ -0,0 +1,269 @@
1
+ # CodeSpine
2
+
3
+ **A code-intelligence layer for Java codebases — purpose-built for AI agents.**
4
+
5
+ Instead of making your agent read hundreds of raw source files, CodeSpine maps your entire codebase into a live graph and exposes it through 24 structured MCP tools.
6
+ Your agent asks a question, gets a precise answer — no file trawling, no wasted tokens, no hallucinated call chains.
7
+
8
+ > **Token efficiency in practice**: a `get_symbol_context` call returns a fully-resolved call graph for a symbol in one round-trip.
9
+ > The equivalent "read every relevant file" approach typically costs 10-50× more tokens and still misses transitive edges.
10
+
11
+ ---
12
+
13
+ ## How it works
14
+
15
+ ```
16
+ Your Java codebase
17
+
18
+ codespine analyse ← one-time (or on-demand) indexing
19
+
20
+ ~/.codespine_db ← Kuzu graph DB (symbols, calls, communities, flows …)
21
+
22
+ codespine mcp ← FastMCP server — 24 tools
23
+
24
+ Your AI agent (Claude, GPT, Cursor, Cline …)
25
+ ```
26
+
27
+ Agents talk to the MCP server. They never need to open a `.java` file unless they are actually editing it.
28
+
29
+ ---
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install codespine
35
+ ```
36
+
37
+ Optional: install `sentence-transformers` to enable semantic vector search (adds ~500 MB of model weight).
38
+
39
+ ```bash
40
+ pip install sentence-transformers
41
+ ```
42
+
43
+ ---
44
+
45
+ ## Quick Start
46
+
47
+ ### 1 — Index your codebase
48
+
49
+ ```bash
50
+ # Fast (BM25 + fuzzy search, no embeddings — recommended first run)
51
+ codespine analyse /path/to/your/project
52
+
53
+ # Full (adds semantic vector search, takes longer)
54
+ codespine analyse /path/to/your/project --embed
55
+
56
+ # Deep (+ dead code, execution flows, communities, git coupling)
57
+ codespine analyse /path/to/your/project --deep
58
+ ```
59
+
60
+ Example output:
61
+
62
+ ```
63
+ $ codespine analyse .
64
+ Walking files... 142 files found
65
+ Parsing code... 142/142 (parallel, 4 workers)
66
+ Tracing calls... 847 calls resolved
67
+ Analyzing types... 234 type relationships
68
+ Detecting communities... 8 clusters found
69
+ Detecting execution flows... 34 processes found
70
+ Finding dead code... 12 unreachable symbols
71
+ Analyzing git history... 18 coupled file pairs
72
+
73
+ Done in 18s — 623 symbols, 1 847 edges, 8 clusters, 34 flows
74
+ ```
75
+
76
+ ### 2 — Wire up MCP
77
+
78
+ Add to your MCP config (`~/.claude/mcp.json` or equivalent):
79
+
80
+ ```json
81
+ {
82
+ "mcpServers": {
83
+ "codespine": {
84
+ "command": "codespine",
85
+ "args": ["mcp"]
86
+ }
87
+ }
88
+ }
89
+ ```
90
+
91
+ ### 3 — Let the agent explore
92
+
93
+ The agent can now call tools like:
94
+
95
+ ```
96
+ search_hybrid("payment retry logic")
97
+ get_symbol_context("processPayment")
98
+ get_impact("com.example.PaymentService#charge")
99
+ detect_dead_code()
100
+ get_codebase_stats()
101
+ ```
102
+
103
+ ---
104
+
105
+ ## MCP Tools (24)
106
+
107
+ ### Connectivity & Discovery
108
+
109
+ | Tool | What it does |
110
+ |------|-------------|
111
+ | `ping()` | Confirm the server is alive. Call this first. |
112
+ | `get_capabilities()` | Returns what is indexed right now — projects, symbol counts, which features are available, and whether watch mode is running. Call before other tools to avoid trial-and-error. |
113
+ | `list_projects()` | List every indexed project with path, symbol count, and file count. |
114
+ | `get_codebase_stats()` | Per-project breakdown: files, classes, methods, calls, embeddings, totals. |
115
+
116
+ ### Search
117
+
118
+ | Tool | What it does |
119
+ |------|-------------|
120
+ | `search_hybrid(query, k, project)` | BM25 + semantic vector + fuzzy, fused with RRF. Scope to a project with `project=`. |
121
+ | `find_symbol(name, kind, project, limit)` | Exact / prefix name lookup returning **all** matches grouped by project. Use this when the same class name exists in multiple projects to pick the right one. |
122
+ | `list_packages(project, limit)` | All Java packages with class count, grouped by project. Good for structural orientation before searching. |
123
+
124
+ ### Analysis
125
+
126
+ | Tool | What it does |
127
+ |------|-------------|
128
+ | `get_symbol_context(query, max_depth, project)` | Full call graph context for a symbol — callers, callees, types, up to `max_depth` hops. |
129
+ | `get_impact(symbol, max_depth, project)` | Depth-grouped impact analysis with confidence scores. Shows what breaks if this symbol changes. |
130
+ | `detect_dead_code(limit, project)` | Unreachable symbols after applying framework exemptions (Spring, JPA, …). |
131
+ | `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points (or a specific symbol). |
132
+ | `get_symbol_community(symbol)` | Which community cluster a symbol belongs to, with co-members. |
133
+ | `get_change_coupling(months, min_strength, min_cochanges, project)` | Git-derived file pairs that change together — useful for predicting collateral changes. |
134
+
135
+ ### Git
136
+
137
+ | Tool | What it does |
138
+ |------|-------------|
139
+ | `git_log(file_path, limit, project)` | Commit history for a file or the whole repo. |
140
+ | `git_diff(ref, file_path, project)` | Diff against a ref (default `HEAD`). |
141
+ | `compare_branches(base_ref, head_ref)` | Symbol-level diff between two branches — which classes/methods changed. |
142
+
143
+ ### Watch Mode (live incremental reindex)
144
+
145
+ | Tool | What it does |
146
+ |------|-------------|
147
+ | `start_watch(path, global_interval)` | Start incremental reindexing in the background. Watches for file changes and updates the graph within `global_interval` seconds. **Recommended**: keep this running during active development sessions so the graph stays fresh. |
148
+ | `stop_watch()` | Gracefully stop the background watcher. |
149
+ | `get_watch_status()` | Check if watch is running — uptime, path, interval. |
150
+
151
+ ### On-demand Analysis (non-blocking)
152
+
153
+ | Tool | What it does |
154
+ |------|-------------|
155
+ | `analyse_project(path, full, deep, embed)` | Trigger a full re-analysis as a background job. Returns immediately. Poll `get_analyse_status()` for progress. |
156
+ | `get_analyse_status()` | Check background analysis — running / done / failed, last log lines. |
157
+
158
+ ### Index Management
159
+
160
+ | Tool | What it does |
161
+ |------|-------------|
162
+ | `reset_project(project_id)` | Delete all graph data for one project (clean-slate re-index). |
163
+ | `reset_index()` | Wipe the entire index — all projects, communities, flows. |
164
+
165
+ ### Power / Debug
166
+
167
+ | Tool | What it does |
168
+ |------|-------------|
169
+ | `run_cypher(query)` | Execute a raw Cypher read query against the graph (Kuzu dialect). For advanced exploration. |
170
+
171
+ ---
172
+
173
+ ## CLI Reference
174
+
175
+ ### Indexing
176
+
177
+ ```bash
178
+ codespine analyse <path> # fast index (no embeddings)
179
+ codespine analyse <path> --embed # + semantic vectors
180
+ codespine analyse <path> --full # force full re-index (skip incremental)
181
+ codespine analyse <path> --deep # + dead code, flows, communities, git coupling
182
+ codespine analyse <path> --deep --embed # everything
183
+ ```
184
+
185
+ ### Search & Analysis
186
+
187
+ ```bash
188
+ codespine search "payment retry bug" [--k 20] [--json]
189
+ codespine context "processPayment" [--max-depth 3] [--json]
190
+ codespine impact "com.example.Service#processPayment(java.lang.String)" [--max-depth 4] [--json]
191
+ codespine deadcode [--limit 200] [--json]
192
+ codespine flow [--entry <symbol>] [--max-depth 6] [--json]
193
+ codespine community [--symbol <symbol>] [--json]
194
+ codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]
195
+ codespine diff <base>..<head> [--json]
196
+ ```
197
+
198
+ ### Stats
199
+
200
+ ```bash
201
+ codespine stats # per-project table: files, classes, methods, calls, embeddings
202
+ codespine stats --json # machine-readable output
203
+ ```
204
+
205
+ ### Watch
206
+
207
+ ```bash
208
+ codespine watch [--path .] [--global-interval 30]
209
+ ```
210
+
211
+ ### Index Management
212
+
213
+ ```bash
214
+ codespine clear-project <project_id> # remove one project from the graph
215
+ codespine clear-index # wipe the entire index
216
+ ```
217
+
218
+ ---
219
+
220
+ ## Workspace / Multi-Project Support
221
+
222
+ CodeSpine understands three levels of hierarchy:
223
+
224
+ ```
225
+ ~/IdeaProjects/ ← workspace (a folder of independent projects)
226
+ ├── payments-service/ ← project (has its own .git / pom.xml)
227
+ │ ├── core/ ← module (Maven <module> or Gradle subproject)
228
+ │ └── api/ ← module
229
+ └── inventory-service/ ← project
230
+ └── (single-module)
231
+ ```
232
+
233
+ - **Workspace detection**: if the path you give to `analyse` has no `.git` or build file at its root, CodeSpine scans one level down for sub-projects and indexes them all.
234
+ - **Project IDs**: single-module → `payments-service`; multi-module → `payments-service::core`, `payments-service::api`.
235
+ - **Scoped queries**: every analysis and search tool accepts an optional `project=` parameter so agents can work within one project without noise from others.
236
+ - **Cross-project search**: omit `project=` to search across everything.
237
+
238
+ ---
239
+
240
+ ## Embedding / Speed Trade-off
241
+
242
+ | Flag | Index time | Search modes available |
243
+ |------|-----------|----------------------|
244
+ | *(no flag)* | Fast (~seconds–minutes) | BM25, fuzzy, exact |
245
+ | `--embed` | Slower (minutes, depends on model) | BM25, fuzzy, exact + **semantic vector** |
246
+
247
+ `sentence-transformers` must be installed for `--embed` to have any effect.
248
+ If it is not installed, indexing always skips embeddings silently.
249
+
250
+ Most agent workflows work great without embeddings — BM25 + fuzzy covers keyword, partial-name, and typo-tolerant search. Add `--embed` when you need concept-level similarity ("find all classes related to retry logic").
251
+
252
+ ---
253
+
254
+ ## Runtime Paths
255
+
256
+ | Path | Purpose |
257
+ |------|---------|
258
+ | `~/.codespine_db` | Kuzu graph database |
259
+ | `~/.codespine.pid` | Watch-mode PID file |
260
+ | `~/.codespine.log` | Watch-mode log |
261
+ | `~/.codespine_embedding_cache.sqlite3` | Embedding vector cache |
262
+
263
+ ---
264
+
265
+ ## Project Docs
266
+
267
+ - [Contributing](.github/CONTRIBUTING.md)
268
+ - [Security](.github/SECURITY.md)
269
+ - [Code of Conduct](.github/CODE_OF_CONDUCT.md)
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.1.8"
4
+ __version__ = "0.3.0"
@@ -6,13 +6,13 @@ from codespine.analysis.impact import analyze_impact
6
6
  from codespine.search.hybrid import hybrid_search
7
7
 
8
8
 
9
- def build_symbol_context(store, query: str, max_depth: int = 3) -> dict:
10
- search_results = hybrid_search(store, query, k=10)
9
+ def build_symbol_context(store, query: str, max_depth: int = 3, project: str | None = None) -> dict:
10
+ search_results = hybrid_search(store, query, k=10, project=project)
11
11
  focus = search_results[0] if search_results else None
12
12
 
13
- impact = analyze_impact(store, query, max_depth=max_depth)
13
+ impact = analyze_impact(store, query, max_depth=max_depth, project=project)
14
14
  community = symbol_community(store, query)
15
- flows = trace_execution_flows(store, entry_symbol=query, max_depth=max_depth + 2)
15
+ flows = trace_execution_flows(store, entry_symbol=query, max_depth=max_depth + 2, project=project)
16
16
 
17
17
  return {
18
18
  "query": query,
@@ -28,24 +28,42 @@ def _modifier_tokens(modifiers) -> set[str]:
28
28
  return {str(m).strip() for m in modifiers}
29
29
 
30
30
 
31
- def detect_dead_code(store, limit: int = 200) -> list[dict]:
31
+ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> list[dict]:
32
32
  """Java-aware dead code detection with exemption passes."""
33
- candidates = store.query_records(
34
- """
35
- MATCH (m:Method), (c:Class)
36
- WHERE m.class_id = c.id
37
- AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
38
- RETURN m.id as method_id,
39
- m.name as name,
40
- m.signature as signature,
41
- m.modifiers as modifiers,
42
- c.fqcn as class_fqcn,
43
- m.is_constructor as is_constructor,
44
- m.is_test as is_test
45
- LIMIT $limit
46
- """,
47
- {"limit": int(limit * 3)},
48
- )
33
+ if project:
34
+ candidates = store.query_records(
35
+ """
36
+ MATCH (m:Method), (c:Class), (f:File)
37
+ WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $proj
38
+ AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
39
+ RETURN m.id as method_id,
40
+ m.name as name,
41
+ m.signature as signature,
42
+ m.modifiers as modifiers,
43
+ c.fqcn as class_fqcn,
44
+ m.is_constructor as is_constructor,
45
+ m.is_test as is_test
46
+ LIMIT $limit
47
+ """,
48
+ {"limit": int(limit * 3), "proj": project},
49
+ )
50
+ else:
51
+ candidates = store.query_records(
52
+ """
53
+ MATCH (m:Method), (c:Class)
54
+ WHERE m.class_id = c.id
55
+ AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
56
+ RETURN m.id as method_id,
57
+ m.name as name,
58
+ m.signature as signature,
59
+ m.modifiers as modifiers,
60
+ c.fqcn as class_fqcn,
61
+ m.is_constructor as is_constructor,
62
+ m.is_test as is_test
63
+ LIMIT $limit
64
+ """,
65
+ {"limit": int(limit * 3)},
66
+ )
49
67
 
50
68
  if not candidates:
51
69
  return []