codespine 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codespine-0.1.3/PKG-INFO +150 -0
- codespine-0.1.3/README.md +86 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/db/store.py +8 -1
- {codespine-0.1.1 → codespine-0.1.3}/codespine/diff/branch_diff.py +56 -2
- {codespine-0.1.1 → codespine-0.1.3}/codespine/indexer/java_parser.py +80 -20
- {codespine-0.1.1 → codespine-0.1.3}/codespine/search/vector.py +10 -1
- codespine-0.1.3/codespine.egg-info/PKG-INFO +150 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine.egg-info/SOURCES.txt +0 -1
- {codespine-0.1.1 → codespine-0.1.3}/pyproject.toml +1 -1
- {codespine-0.1.1 → codespine-0.1.3}/tests/test_java_parser.py +1 -1
- codespine-0.1.1/PKG-INFO +0 -336
- codespine-0.1.1/README.md +0 -272
- codespine-0.1.1/codespine.egg-info/PKG-INFO +0 -336
- codespine-0.1.1/setup.py +0 -3
- {codespine-0.1.1 → codespine-0.1.3}/LICENSE +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/analysis/community.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/analysis/context.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/analysis/flow.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/analysis/impact.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/cli.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/config.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/db/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/db/schema.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/indexer/engine.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/mcp/server.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/search/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/search/bm25.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/search/hybrid.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/search/rrf.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine/watch/watcher.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/gindex.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/setup.cfg +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/tests/test_call_resolver.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.1.1 → codespine-0.1.3}/tests/test_search_ranking.py +0 -0
codespine-0.1.3/PKG-INFO
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codespine
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
|
+
Author: CodeSpine contributors
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 CodeSpine contributors
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/vinayak3022/codeSpine
|
|
29
|
+
Project-URL: Repository, https://github.com/vinayak3022/codeSpine
|
|
30
|
+
Project-URL: Issues, https://github.com/vinayak3022/codeSpine/issues
|
|
31
|
+
Keywords: java,code-indexing,graph,kuzu,mcp
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
40
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
41
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: click
|
|
46
|
+
Requires-Dist: kuzu
|
|
47
|
+
Requires-Dist: tree-sitter
|
|
48
|
+
Requires-Dist: tree-sitter-java
|
|
49
|
+
Requires-Dist: fastmcp
|
|
50
|
+
Requires-Dist: psutil
|
|
51
|
+
Requires-Dist: watchfiles
|
|
52
|
+
Provides-Extra: ml
|
|
53
|
+
Requires-Dist: sentence-transformers; extra == "ml"
|
|
54
|
+
Requires-Dist: numpy; extra == "ml"
|
|
55
|
+
Provides-Extra: community
|
|
56
|
+
Requires-Dist: igraph; extra == "community"
|
|
57
|
+
Requires-Dist: leidenalg; extra == "community"
|
|
58
|
+
Provides-Extra: full
|
|
59
|
+
Requires-Dist: sentence-transformers; extra == "full"
|
|
60
|
+
Requires-Dist: numpy; extra == "full"
|
|
61
|
+
Requires-Dist: igraph; extra == "full"
|
|
62
|
+
Requires-Dist: leidenalg; extra == "full"
|
|
63
|
+
Dynamic: license-file
|
|
64
|
+
|
|
65
|
+
# CodeSpine
|
|
66
|
+
|
|
67
|
+
CodeSpine is an intelligence layer for Java teams and AI coding agents.
|
|
68
|
+
It maps your codebase into a live graph so you can find anything fast, predict change impact, and ship safer refactors.
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install codespine
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Quick Start
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
codespine analyse .
|
|
80
|
+
codespine search "payment retry bug" --json
|
|
81
|
+
codespine context "processPayment" --json
|
|
82
|
+
codespine impact "com.example.Service#processPayment(java.lang.String)" --json
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Example analyze output:
|
|
86
|
+
|
|
87
|
+
```text
|
|
88
|
+
$ codespine analyse .
|
|
89
|
+
Walking files... 142 files found
|
|
90
|
+
Parsing code... 142/142
|
|
91
|
+
Tracing calls... 847 calls resolved
|
|
92
|
+
Analyzing types... 234 type relationships
|
|
93
|
+
Detecting communities... 8 clusters found
|
|
94
|
+
Detecting execution flows... 34 processes found
|
|
95
|
+
Finding dead code... 12 unreachable symbols
|
|
96
|
+
Analyzing git history... 18 coupled file pairs
|
|
97
|
+
Generating embeddings... 623 vectors stored
|
|
98
|
+
|
|
99
|
+
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## What You Get
|
|
103
|
+
|
|
104
|
+
- Hybrid search: BM25 + semantic vectors + fuzzy + RRF
|
|
105
|
+
- Impact analysis: depth groups with confidence scoring
|
|
106
|
+
- Java-aware dead code detection with exemption passes
|
|
107
|
+
- Execution flow tracing from entry points
|
|
108
|
+
- Community detection (Leiden + fallback)
|
|
109
|
+
- Git change coupling analysis
|
|
110
|
+
- Watch mode incremental reindexing
|
|
111
|
+
- Symbol-level branch diff
|
|
112
|
+
|
|
113
|
+
## Key Commands
|
|
114
|
+
|
|
115
|
+
- `codespine analyse <path> [--full|--incremental]`
|
|
116
|
+
- `codespine search <query> [--k 20] [--json]`
|
|
117
|
+
- `codespine context <query> [--max-depth 3] [--json]`
|
|
118
|
+
- `codespine impact <symbol> [--max-depth 4] [--json]`
|
|
119
|
+
- `codespine deadcode [--limit 200] [--json]`
|
|
120
|
+
- `codespine flow [--entry <symbol>] [--max-depth 6] [--json]`
|
|
121
|
+
- `codespine community [--symbol <symbol>] [--json]`
|
|
122
|
+
- `codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]`
|
|
123
|
+
- `codespine diff <base>..<head> [--json]`
|
|
124
|
+
- `codespine watch [--path .] [--global-interval 30]`
|
|
125
|
+
|
|
126
|
+
## MCP Setup (`mcp.json`)
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"mcpServers": {
|
|
131
|
+
"codespine": {
|
|
132
|
+
"command": "codespine",
|
|
133
|
+
"args": ["mcp"]
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Runtime Paths
|
|
140
|
+
|
|
141
|
+
- `~/.codespine_db`
|
|
142
|
+
- `~/.codespine.pid`
|
|
143
|
+
- `~/.codespine.log`
|
|
144
|
+
- `~/.codespine_embedding_cache.sqlite3`
|
|
145
|
+
|
|
146
|
+
## Project Docs
|
|
147
|
+
|
|
148
|
+
- [`.github/CONTRIBUTING.md`](.github/CONTRIBUTING.md)
|
|
149
|
+
- [`.github/SECURITY.md`](.github/SECURITY.md)
|
|
150
|
+
- [`.github/CODE_OF_CONDUCT.md`](.github/CODE_OF_CONDUCT.md)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# CodeSpine
|
|
2
|
+
|
|
3
|
+
CodeSpine is an intelligence layer for Java teams and AI coding agents.
|
|
4
|
+
It maps your codebase into a live graph so you can find anything fast, predict change impact, and ship safer refactors.
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install codespine
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
codespine analyse .
|
|
16
|
+
codespine search "payment retry bug" --json
|
|
17
|
+
codespine context "processPayment" --json
|
|
18
|
+
codespine impact "com.example.Service#processPayment(java.lang.String)" --json
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Example analyze output:
|
|
22
|
+
|
|
23
|
+
```text
|
|
24
|
+
$ codespine analyse .
|
|
25
|
+
Walking files... 142 files found
|
|
26
|
+
Parsing code... 142/142
|
|
27
|
+
Tracing calls... 847 calls resolved
|
|
28
|
+
Analyzing types... 234 type relationships
|
|
29
|
+
Detecting communities... 8 clusters found
|
|
30
|
+
Detecting execution flows... 34 processes found
|
|
31
|
+
Finding dead code... 12 unreachable symbols
|
|
32
|
+
Analyzing git history... 18 coupled file pairs
|
|
33
|
+
Generating embeddings... 623 vectors stored
|
|
34
|
+
|
|
35
|
+
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## What You Get
|
|
39
|
+
|
|
40
|
+
- Hybrid search: BM25 + semantic vectors + fuzzy + RRF
|
|
41
|
+
- Impact analysis: depth groups with confidence scoring
|
|
42
|
+
- Java-aware dead code detection with exemption passes
|
|
43
|
+
- Execution flow tracing from entry points
|
|
44
|
+
- Community detection (Leiden + fallback)
|
|
45
|
+
- Git change coupling analysis
|
|
46
|
+
- Watch mode incremental reindexing
|
|
47
|
+
- Symbol-level branch diff
|
|
48
|
+
|
|
49
|
+
## Key Commands
|
|
50
|
+
|
|
51
|
+
- `codespine analyse <path> [--full|--incremental]`
|
|
52
|
+
- `codespine search <query> [--k 20] [--json]`
|
|
53
|
+
- `codespine context <query> [--max-depth 3] [--json]`
|
|
54
|
+
- `codespine impact <symbol> [--max-depth 4] [--json]`
|
|
55
|
+
- `codespine deadcode [--limit 200] [--json]`
|
|
56
|
+
- `codespine flow [--entry <symbol>] [--max-depth 6] [--json]`
|
|
57
|
+
- `codespine community [--symbol <symbol>] [--json]`
|
|
58
|
+
- `codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]`
|
|
59
|
+
- `codespine diff <base>..<head> [--json]`
|
|
60
|
+
- `codespine watch [--path .] [--global-interval 30]`
|
|
61
|
+
|
|
62
|
+
## MCP Setup (`mcp.json`)
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"mcpServers": {
|
|
67
|
+
"codespine": {
|
|
68
|
+
"command": "codespine",
|
|
69
|
+
"args": ["mcp"]
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Runtime Paths
|
|
76
|
+
|
|
77
|
+
- `~/.codespine_db`
|
|
78
|
+
- `~/.codespine.pid`
|
|
79
|
+
- `~/.codespine.log`
|
|
80
|
+
- `~/.codespine_embedding_cache.sqlite3`
|
|
81
|
+
|
|
82
|
+
## Project Docs
|
|
83
|
+
|
|
84
|
+
- [`.github/CONTRIBUTING.md`](.github/CONTRIBUTING.md)
|
|
85
|
+
- [`.github/SECURITY.md`](.github/SECURITY.md)
|
|
86
|
+
- [`.github/CODE_OF_CONDUCT.md`](.github/CODE_OF_CONDUCT.md)
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
+
import os
|
|
6
7
|
from contextlib import contextmanager
|
|
7
8
|
from dataclasses import dataclass
|
|
8
9
|
from typing import Any
|
|
@@ -20,7 +21,13 @@ class GraphStore:
|
|
|
20
21
|
read_only: bool = False
|
|
21
22
|
|
|
22
23
|
def __post_init__(self) -> None:
|
|
23
|
-
|
|
24
|
+
db_path = SETTINGS.db_path
|
|
25
|
+
try:
|
|
26
|
+
self.db = kuzu.Database(db_path, buffer_pool_size=1024**3)
|
|
27
|
+
except Exception as exc:
|
|
28
|
+
fallback = os.path.join("/tmp", ".codespine_db")
|
|
29
|
+
LOGGER.warning("Primary DB path failed (%s). Falling back to %s", exc, fallback)
|
|
30
|
+
self.db = kuzu.Database(fallback, buffer_pool_size=1024**3)
|
|
24
31
|
self.conn = kuzu.Connection(self.db)
|
|
25
32
|
if not self.read_only:
|
|
26
33
|
ensure_schema(self.conn)
|
|
@@ -21,6 +21,59 @@ def _text(node) -> str:
|
|
|
21
21
|
return node.text.decode("utf-8")
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def _captures(query: Query, node) -> list[tuple]:
|
|
25
|
+
if hasattr(query, "captures"):
|
|
26
|
+
return query.captures(node)
|
|
27
|
+
|
|
28
|
+
from tree_sitter import QueryCursor
|
|
29
|
+
|
|
30
|
+
raw = None
|
|
31
|
+
try:
|
|
32
|
+
cursor = QueryCursor(query)
|
|
33
|
+
if hasattr(cursor, "captures"):
|
|
34
|
+
raw = cursor.captures(node)
|
|
35
|
+
except TypeError:
|
|
36
|
+
raw = None
|
|
37
|
+
|
|
38
|
+
if raw is None:
|
|
39
|
+
cursor = QueryCursor()
|
|
40
|
+
for call in (
|
|
41
|
+
lambda: cursor.captures(query, node),
|
|
42
|
+
lambda: cursor.captures(node, query),
|
|
43
|
+
):
|
|
44
|
+
try:
|
|
45
|
+
raw = call()
|
|
46
|
+
break
|
|
47
|
+
except TypeError:
|
|
48
|
+
continue
|
|
49
|
+
if raw is None:
|
|
50
|
+
return []
|
|
51
|
+
if isinstance(raw, dict):
|
|
52
|
+
out: list[tuple] = []
|
|
53
|
+
for tag, nodes in raw.items():
|
|
54
|
+
for n in nodes:
|
|
55
|
+
out.append((n, tag))
|
|
56
|
+
return out
|
|
57
|
+
out: list[tuple] = []
|
|
58
|
+
for item in raw:
|
|
59
|
+
if not isinstance(item, (tuple, list)) or len(item) < 2:
|
|
60
|
+
continue
|
|
61
|
+
n, t = item[0], item[1]
|
|
62
|
+
if isinstance(t, int):
|
|
63
|
+
tag = None
|
|
64
|
+
for attr in ("capture_name_for_id", "capture_name"):
|
|
65
|
+
if hasattr(query, attr):
|
|
66
|
+
try:
|
|
67
|
+
tag = getattr(query, attr)(t)
|
|
68
|
+
break
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
out.append((n, tag if tag else str(t)))
|
|
72
|
+
else:
|
|
73
|
+
out.append((n, t))
|
|
74
|
+
return out
|
|
75
|
+
|
|
76
|
+
|
|
24
77
|
def _hash_text(text: str) -> str:
|
|
25
78
|
return hashlib.sha1(_normalize_java_snippet(text).encode("utf-8")).hexdigest()
|
|
26
79
|
|
|
@@ -30,6 +83,7 @@ def _normalize_java_snippet(text: str) -> str:
|
|
|
30
83
|
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
|
|
31
84
|
text = re.sub(r"//.*?$", "", text, flags=re.MULTILINE)
|
|
32
85
|
text = re.sub(r"\s+", " ", text).strip()
|
|
86
|
+
text = re.sub(r"\s*([{}();,])\s*", r"\1", text)
|
|
33
87
|
return text
|
|
34
88
|
|
|
35
89
|
|
|
@@ -51,7 +105,7 @@ def _method_hashes(source: bytes) -> dict[str, dict]:
|
|
|
51
105
|
)
|
|
52
106
|
methods: dict[str, dict] = {}
|
|
53
107
|
grouped: dict[object, dict[str, str]] = {}
|
|
54
|
-
for node, tag in method_query
|
|
108
|
+
for node, tag in _captures(method_query, root):
|
|
55
109
|
key_node = node if tag == "decl" else node.parent
|
|
56
110
|
grouped.setdefault(key_node, {})[tag] = _text(node)
|
|
57
111
|
|
|
@@ -80,7 +134,7 @@ def _class_hashes(source: bytes) -> dict[str, str]:
|
|
|
80
134
|
""",
|
|
81
135
|
)
|
|
82
136
|
grouped: dict[object, dict[str, str]] = {}
|
|
83
|
-
for node, tag in class_query
|
|
137
|
+
for node, tag in _captures(class_query, root):
|
|
84
138
|
key_node = node if tag == "decl" else node.parent
|
|
85
139
|
grouped.setdefault(key_node, {})[tag] = _text(node)
|
|
86
140
|
out: dict[str, str] = {}
|
|
@@ -62,6 +62,66 @@ def _text(node) -> str:
|
|
|
62
62
|
return node.text.decode("utf-8")
|
|
63
63
|
|
|
64
64
|
|
|
65
|
+
def _captures(query: Query, node) -> list[tuple]:
|
|
66
|
+
"""Compatibility wrapper for tree-sitter Python bindings."""
|
|
67
|
+
if hasattr(query, "captures"):
|
|
68
|
+
return query.captures(node)
|
|
69
|
+
|
|
70
|
+
from tree_sitter import QueryCursor
|
|
71
|
+
|
|
72
|
+
raw = None
|
|
73
|
+
# API shape A: QueryCursor(query).captures(node)
|
|
74
|
+
try:
|
|
75
|
+
cursor = QueryCursor(query)
|
|
76
|
+
if hasattr(cursor, "captures"):
|
|
77
|
+
raw = cursor.captures(node)
|
|
78
|
+
except TypeError:
|
|
79
|
+
raw = None
|
|
80
|
+
|
|
81
|
+
# API shape B/C: QueryCursor().captures(...)
|
|
82
|
+
if raw is None:
|
|
83
|
+
cursor = QueryCursor()
|
|
84
|
+
for call in (
|
|
85
|
+
lambda: cursor.captures(query, node),
|
|
86
|
+
lambda: cursor.captures(node, query),
|
|
87
|
+
):
|
|
88
|
+
try:
|
|
89
|
+
raw = call()
|
|
90
|
+
break
|
|
91
|
+
except TypeError:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
if raw is None:
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
# Newer bindings may return {capture_name: [nodes...]}
|
|
98
|
+
if isinstance(raw, dict):
|
|
99
|
+
out: list[tuple] = []
|
|
100
|
+
for tag, nodes in raw.items():
|
|
101
|
+
for n in nodes:
|
|
102
|
+
out.append((n, tag))
|
|
103
|
+
return out
|
|
104
|
+
|
|
105
|
+
out: list[tuple] = []
|
|
106
|
+
for item in raw:
|
|
107
|
+
if not isinstance(item, (tuple, list)) or len(item) < 2:
|
|
108
|
+
continue
|
|
109
|
+
n, t = item[0], item[1]
|
|
110
|
+
if isinstance(t, int):
|
|
111
|
+
tag = None
|
|
112
|
+
for attr in ("capture_name_for_id", "capture_name"):
|
|
113
|
+
if hasattr(query, attr):
|
|
114
|
+
try:
|
|
115
|
+
tag = getattr(query, attr)(t)
|
|
116
|
+
break
|
|
117
|
+
except Exception:
|
|
118
|
+
pass
|
|
119
|
+
out.append((n, tag if tag else str(t)))
|
|
120
|
+
else:
|
|
121
|
+
out.append((n, t))
|
|
122
|
+
return out
|
|
123
|
+
|
|
124
|
+
|
|
65
125
|
def _hash_node(node) -> str:
|
|
66
126
|
return hashlib.sha1(_normalize_java_bytes(node.text).encode("utf-8")).hexdigest()
|
|
67
127
|
|
|
@@ -122,7 +182,7 @@ def _extract_local_types(method_node) -> dict[str, str]:
|
|
|
122
182
|
declarator: (variable_declarator name: (identifier) @name))
|
|
123
183
|
""",
|
|
124
184
|
)
|
|
125
|
-
captures = q
|
|
185
|
+
captures = _captures(q, method_node)
|
|
126
186
|
locals_map: dict[str, str] = {}
|
|
127
187
|
current_type = None
|
|
128
188
|
for node, tag in captures:
|
|
@@ -142,7 +202,7 @@ def _extract_field_types(class_node) -> dict[str, str]:
|
|
|
142
202
|
declarator: (variable_declarator name: (identifier) @name))
|
|
143
203
|
""",
|
|
144
204
|
)
|
|
145
|
-
captures = q
|
|
205
|
+
captures = _captures(q, class_node)
|
|
146
206
|
field_map: dict[str, str] = {}
|
|
147
207
|
current_type = None
|
|
148
208
|
for node, tag in captures:
|
|
@@ -156,16 +216,16 @@ def _extract_field_types(class_node) -> dict[str, str]:
|
|
|
156
216
|
def _extract_parameter_types(params_node) -> list[str]:
|
|
157
217
|
if params_node is None:
|
|
158
218
|
return []
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
"""
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
return [
|
|
219
|
+
types: list[str] = []
|
|
220
|
+
for child in params_node.named_children:
|
|
221
|
+
if child.type in {"formal_parameter", "spread_parameter"}:
|
|
222
|
+
tnode = child.child_by_field_name("type")
|
|
223
|
+
types.append(_node_type_name(tnode))
|
|
224
|
+
elif child.type == "receiver_parameter":
|
|
225
|
+
# Keep receiver as pseudo-type to stabilize signature arity
|
|
226
|
+
tnode = child.child_by_field_name("type")
|
|
227
|
+
types.append(_node_type_name(tnode))
|
|
228
|
+
return [t for t in types if t]
|
|
169
229
|
|
|
170
230
|
|
|
171
231
|
def _extract_inheritance(class_node) -> tuple[str | None, list[str]]:
|
|
@@ -189,7 +249,7 @@ def _extract_inheritance(class_node) -> tuple[str | None, list[str]]:
|
|
|
189
249
|
]
|
|
190
250
|
""",
|
|
191
251
|
)
|
|
192
|
-
interfaces = [_node_type_name(n) for n, tag in type_query
|
|
252
|
+
interfaces = [_node_type_name(n) for n, tag in _captures(type_query, iface_node) if tag == "t"]
|
|
193
253
|
|
|
194
254
|
# Fallback for grammar variants where interfaces are not exposed as a field.
|
|
195
255
|
if not interfaces:
|
|
@@ -206,7 +266,7 @@ def _extract_inheritance(class_node) -> tuple[str | None, list[str]]:
|
|
|
206
266
|
]
|
|
207
267
|
""",
|
|
208
268
|
)
|
|
209
|
-
interfaces.extend([_node_type_name(n) for n, tag in type_query
|
|
269
|
+
interfaces.extend([_node_type_name(n) for n, tag in _captures(type_query, child) if tag == "t"])
|
|
210
270
|
|
|
211
271
|
return extends_name, interfaces
|
|
212
272
|
|
|
@@ -229,12 +289,12 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
229
289
|
package_name = ""
|
|
230
290
|
imports: list[str] = []
|
|
231
291
|
|
|
232
|
-
for node, tag in pkg_query
|
|
292
|
+
for node, tag in _captures(pkg_query, root):
|
|
233
293
|
if tag == "pkg":
|
|
234
294
|
package_name = _text(node)
|
|
235
295
|
break
|
|
236
296
|
|
|
237
|
-
for node, tag in import_query
|
|
297
|
+
for node, tag in _captures(import_query, root):
|
|
238
298
|
if tag == "imp":
|
|
239
299
|
imports.append(_text(node))
|
|
240
300
|
|
|
@@ -267,7 +327,7 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
267
327
|
""",
|
|
268
328
|
)
|
|
269
329
|
|
|
270
|
-
for node, tag in cls_query
|
|
330
|
+
for node, tag in _captures(cls_query, root):
|
|
271
331
|
if tag != "class_decl":
|
|
272
332
|
continue
|
|
273
333
|
|
|
@@ -292,8 +352,8 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
292
352
|
body_hash=_hash_node(node),
|
|
293
353
|
)
|
|
294
354
|
|
|
295
|
-
method_nodes = [n for n, t in method_query
|
|
296
|
-
method_nodes.extend([n for n, t in ctor_query
|
|
355
|
+
method_nodes = [n for n, t in _captures(method_query, node) if t == "method_decl"]
|
|
356
|
+
method_nodes.extend([n for n, t in _captures(ctor_query, node) if t == "method_decl"])
|
|
297
357
|
|
|
298
358
|
for m_node in method_nodes:
|
|
299
359
|
m_name_node = m_node.child_by_field_name("name")
|
|
@@ -323,7 +383,7 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
323
383
|
body_node = m_node.child_by_field_name("body")
|
|
324
384
|
if body_node is not None:
|
|
325
385
|
grouped: dict[object, dict[str, str]] = {}
|
|
326
|
-
for c_node, c_tag in call_query
|
|
386
|
+
for c_node, c_tag in _captures(call_query, body_node):
|
|
327
387
|
inv_node = c_node if c_tag == "call_inv" else c_node.parent
|
|
328
388
|
grouped.setdefault(inv_node, {})[c_tag] = _text(c_node)
|
|
329
389
|
for inv_node, capture_map in grouped.items():
|
|
@@ -35,7 +35,16 @@ def _load_model():
|
|
|
35
35
|
|
|
36
36
|
@lru_cache(maxsize=1)
|
|
37
37
|
def _embedding_cache_conn():
|
|
38
|
-
|
|
38
|
+
path = SETTINGS.embedding_cache_db
|
|
39
|
+
try:
|
|
40
|
+
os_dir = path.rsplit("/", 1)[0] if "/" in path else ""
|
|
41
|
+
if os_dir:
|
|
42
|
+
import os
|
|
43
|
+
|
|
44
|
+
os.makedirs(os_dir, exist_ok=True)
|
|
45
|
+
conn = sqlite3.connect(path)
|
|
46
|
+
except Exception:
|
|
47
|
+
conn = sqlite3.connect("/tmp/.codespine_embedding_cache.sqlite3")
|
|
39
48
|
conn.execute(
|
|
40
49
|
"""
|
|
41
50
|
CREATE TABLE IF NOT EXISTS embedding_cache (
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codespine
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
|
+
Author: CodeSpine contributors
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 CodeSpine contributors
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/vinayak3022/codeSpine
|
|
29
|
+
Project-URL: Repository, https://github.com/vinayak3022/codeSpine
|
|
30
|
+
Project-URL: Issues, https://github.com/vinayak3022/codeSpine/issues
|
|
31
|
+
Keywords: java,code-indexing,graph,kuzu,mcp
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
40
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
41
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: click
|
|
46
|
+
Requires-Dist: kuzu
|
|
47
|
+
Requires-Dist: tree-sitter
|
|
48
|
+
Requires-Dist: tree-sitter-java
|
|
49
|
+
Requires-Dist: fastmcp
|
|
50
|
+
Requires-Dist: psutil
|
|
51
|
+
Requires-Dist: watchfiles
|
|
52
|
+
Provides-Extra: ml
|
|
53
|
+
Requires-Dist: sentence-transformers; extra == "ml"
|
|
54
|
+
Requires-Dist: numpy; extra == "ml"
|
|
55
|
+
Provides-Extra: community
|
|
56
|
+
Requires-Dist: igraph; extra == "community"
|
|
57
|
+
Requires-Dist: leidenalg; extra == "community"
|
|
58
|
+
Provides-Extra: full
|
|
59
|
+
Requires-Dist: sentence-transformers; extra == "full"
|
|
60
|
+
Requires-Dist: numpy; extra == "full"
|
|
61
|
+
Requires-Dist: igraph; extra == "full"
|
|
62
|
+
Requires-Dist: leidenalg; extra == "full"
|
|
63
|
+
Dynamic: license-file
|
|
64
|
+
|
|
65
|
+
# CodeSpine
|
|
66
|
+
|
|
67
|
+
CodeSpine is an intelligence layer for Java teams and AI coding agents.
|
|
68
|
+
It maps your codebase into a live graph so you can find anything fast, predict change impact, and ship safer refactors.
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install codespine
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Quick Start
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
codespine analyse .
|
|
80
|
+
codespine search "payment retry bug" --json
|
|
81
|
+
codespine context "processPayment" --json
|
|
82
|
+
codespine impact "com.example.Service#processPayment(java.lang.String)" --json
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Example analyze output:
|
|
86
|
+
|
|
87
|
+
```text
|
|
88
|
+
$ codespine analyse .
|
|
89
|
+
Walking files... 142 files found
|
|
90
|
+
Parsing code... 142/142
|
|
91
|
+
Tracing calls... 847 calls resolved
|
|
92
|
+
Analyzing types... 234 type relationships
|
|
93
|
+
Detecting communities... 8 clusters found
|
|
94
|
+
Detecting execution flows... 34 processes found
|
|
95
|
+
Finding dead code... 12 unreachable symbols
|
|
96
|
+
Analyzing git history... 18 coupled file pairs
|
|
97
|
+
Generating embeddings... 623 vectors stored
|
|
98
|
+
|
|
99
|
+
Done in 4.2s - 623 symbols, 1847 edges, 8 clusters, 34 flows
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## What You Get
|
|
103
|
+
|
|
104
|
+
- Hybrid search: BM25 + semantic vectors + fuzzy + RRF
|
|
105
|
+
- Impact analysis: depth groups with confidence scoring
|
|
106
|
+
- Java-aware dead code detection with exemption passes
|
|
107
|
+
- Execution flow tracing from entry points
|
|
108
|
+
- Community detection (Leiden + fallback)
|
|
109
|
+
- Git change coupling analysis
|
|
110
|
+
- Watch mode incremental reindexing
|
|
111
|
+
- Symbol-level branch diff
|
|
112
|
+
|
|
113
|
+
## Key Commands
|
|
114
|
+
|
|
115
|
+
- `codespine analyse <path> [--full|--incremental]`
|
|
116
|
+
- `codespine search <query> [--k 20] [--json]`
|
|
117
|
+
- `codespine context <query> [--max-depth 3] [--json]`
|
|
118
|
+
- `codespine impact <symbol> [--max-depth 4] [--json]`
|
|
119
|
+
- `codespine deadcode [--limit 200] [--json]`
|
|
120
|
+
- `codespine flow [--entry <symbol>] [--max-depth 6] [--json]`
|
|
121
|
+
- `codespine community [--symbol <symbol>] [--json]`
|
|
122
|
+
- `codespine coupling [--months 6] [--min-strength 0.3] [--min-cochanges 3] [--json]`
|
|
123
|
+
- `codespine diff <base>..<head> [--json]`
|
|
124
|
+
- `codespine watch [--path .] [--global-interval 30]`
|
|
125
|
+
|
|
126
|
+
## MCP Setup (`mcp.json`)
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"mcpServers": {
|
|
131
|
+
"codespine": {
|
|
132
|
+
"command": "codespine",
|
|
133
|
+
"args": ["mcp"]
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Runtime Paths
|
|
140
|
+
|
|
141
|
+
- `~/.codespine_db`
|
|
142
|
+
- `~/.codespine.pid`
|
|
143
|
+
- `~/.codespine.log`
|
|
144
|
+
- `~/.codespine_embedding_cache.sqlite3`
|
|
145
|
+
|
|
146
|
+
## Project Docs
|
|
147
|
+
|
|
148
|
+
- [`.github/CONTRIBUTING.md`](.github/CONTRIBUTING.md)
|
|
149
|
+
- [`.github/SECURITY.md`](.github/SECURITY.md)
|
|
150
|
+
- [`.github/CODE_OF_CONDUCT.md`](.github/CODE_OF_CONDUCT.md)
|