codespine 0.1.4__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.1.4 → codespine-0.1.7}/PKG-INFO +1 -1
- {codespine-0.1.4 → codespine-0.1.7}/codespine/__init__.py +1 -1
- {codespine-0.1.4 → codespine-0.1.7}/codespine/cli.py +97 -26
- {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/call_resolver.py +10 -7
- {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/engine.py +40 -6
- {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.1.4 → codespine-0.1.7}/pyproject.toml +1 -1
- {codespine-0.1.4 → codespine-0.1.7}/LICENSE +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/README.md +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/community.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/context.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/flow.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/impact.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/config.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/db/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/db/schema.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/db/store.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/mcp/server.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/search/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/search/bm25.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/search/hybrid.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/search/rrf.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/search/vector.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine/watch/watcher.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/gindex.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/setup.cfg +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/tests/test_call_resolver.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/tests/test_java_parser.py +0 -0
- {codespine-0.1.4 → codespine-0.1.7}/tests/test_search_ranking.py +0 -0
|
@@ -78,7 +78,8 @@ def main() -> None:
|
|
|
78
78
|
@main.command()
|
|
79
79
|
@click.argument("path", type=click.Path(exists=True))
|
|
80
80
|
@click.option("--full/--incremental", default=True, show_default=True)
|
|
81
|
-
|
|
81
|
+
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
|
|
82
|
+
def analyse(path: str, full: bool, deep: bool) -> None:
|
|
82
83
|
"""Index a local Java project."""
|
|
83
84
|
if _is_running():
|
|
84
85
|
click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
|
|
@@ -88,31 +89,101 @@ def analyse(path: str, full: bool) -> None:
|
|
|
88
89
|
abs_path = os.path.abspath(path)
|
|
89
90
|
store = GraphStore(read_only=False)
|
|
90
91
|
indexer = JavaIndexer(store)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
92
|
+
parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
|
|
93
|
+
call_state = {"shown": False, "count": 0, "last_ts": 0.0}
|
|
94
|
+
|
|
95
|
+
def _progress(event: str, payload: dict) -> None:
|
|
96
|
+
now = time.perf_counter()
|
|
97
|
+
if event == "scan_done":
|
|
98
|
+
_phase("Walking files...", f"{int(payload.get('files_found', 0))} files found")
|
|
99
|
+
return
|
|
100
|
+
if event == "plan_done":
|
|
101
|
+
to_index = int(payload.get("files_to_index", 0))
|
|
102
|
+
deleted = int(payload.get("deleted_files", 0))
|
|
103
|
+
mode = str(payload.get("mode", "incremental"))
|
|
104
|
+
parse_state["total"] = to_index
|
|
105
|
+
_phase("Index mode...", f"{mode} ({to_index} files to index, {deleted} deleted)")
|
|
106
|
+
if to_index == 0:
|
|
107
|
+
_phase("Parsing code...", "0/0")
|
|
108
|
+
return
|
|
109
|
+
if event == "parse_progress":
|
|
110
|
+
indexed = int(payload.get("indexed", 0))
|
|
111
|
+
total = int(payload.get("total", 0))
|
|
112
|
+
parse_state["indexed"] = indexed
|
|
113
|
+
parse_state["total"] = total
|
|
114
|
+
if total == 0:
|
|
115
|
+
return
|
|
116
|
+
if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
|
|
117
|
+
click.echo(f"\rParsing code... {indexed}/{total}", nl=False)
|
|
118
|
+
parse_state["shown"] = True
|
|
119
|
+
parse_state["last_ts"] = now
|
|
120
|
+
return
|
|
121
|
+
if event == "resolve_calls_start" and parse_state["shown"]:
|
|
122
|
+
click.echo()
|
|
123
|
+
parse_state["shown"] = False
|
|
124
|
+
_phase("Tracing calls...", "running")
|
|
125
|
+
return
|
|
126
|
+
if event == "resolve_calls_start":
|
|
127
|
+
_phase("Tracing calls...", "running")
|
|
128
|
+
return
|
|
129
|
+
if event == "resolve_calls_progress":
|
|
130
|
+
call_state["count"] = int(payload.get("calls_resolved", 0))
|
|
131
|
+
if (now - call_state["last_ts"]) >= 0.25:
|
|
132
|
+
click.echo(f"\rTracing calls... {call_state['count']} resolved", nl=False)
|
|
133
|
+
call_state["shown"] = True
|
|
134
|
+
call_state["last_ts"] = now
|
|
135
|
+
return
|
|
136
|
+
if event == "resolve_calls_done":
|
|
137
|
+
if call_state["shown"]:
|
|
138
|
+
click.echo()
|
|
139
|
+
call_state["shown"] = False
|
|
140
|
+
_phase("Tracing calls...", f"{int(payload.get('calls_resolved', 0))} calls resolved")
|
|
141
|
+
return
|
|
142
|
+
if event == "resolve_types_start":
|
|
143
|
+
_phase("Analyzing types...", "running")
|
|
144
|
+
return
|
|
145
|
+
if event == "resolve_types_done":
|
|
146
|
+
_phase("Analyzing types...", f"{int(payload.get('type_relationships', 0))} type relationships")
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
result = indexer.index_project(abs_path, full=full, progress=_progress)
|
|
150
|
+
if parse_state["shown"]:
|
|
151
|
+
click.echo()
|
|
152
|
+
if parse_state["total"] == 0:
|
|
153
|
+
_phase("Parsing code...", "0/0")
|
|
154
|
+
elif parse_state["indexed"] < parse_state["total"]:
|
|
155
|
+
_phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
|
|
156
|
+
|
|
157
|
+
communities: list[dict] = []
|
|
158
|
+
flows: list[dict] = []
|
|
159
|
+
dead: list[dict] = []
|
|
160
|
+
coupling_pairs: list[dict] = []
|
|
161
|
+
|
|
162
|
+
should_run_deep = deep or result.files_found <= 1200
|
|
163
|
+
if should_run_deep:
|
|
164
|
+
communities = detect_communities(store)
|
|
165
|
+
_phase("Detecting communities...", f"{len(communities)} clusters found")
|
|
166
|
+
|
|
167
|
+
flows = trace_execution_flows(store)
|
|
168
|
+
_phase("Detecting execution flows...", f"{len(flows)} processes found")
|
|
169
|
+
|
|
170
|
+
dead = detect_dead_code(store, limit=500)
|
|
171
|
+
_phase("Finding dead code...", f"{len(dead)} unreachable symbols")
|
|
172
|
+
|
|
173
|
+
coupling_pairs = compute_coupling(
|
|
174
|
+
store,
|
|
175
|
+
abs_path,
|
|
176
|
+
result.project_id,
|
|
177
|
+
months=SETTINGS.default_coupling_months,
|
|
178
|
+
min_strength=SETTINGS.default_min_coupling_strength,
|
|
179
|
+
min_cochanges=SETTINGS.default_min_cochanges,
|
|
180
|
+
)
|
|
181
|
+
_phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
|
|
182
|
+
else:
|
|
183
|
+
_phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
|
|
184
|
+
_phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
|
|
185
|
+
_phase("Finding dead code...", "skipped (large repo; rerun with --deep)")
|
|
186
|
+
_phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
|
|
116
187
|
|
|
117
188
|
vector_count = store.query_records(
|
|
118
189
|
"""
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections import defaultdict
|
|
4
|
+
from typing import Iterator
|
|
4
5
|
|
|
5
6
|
from codespine.noise.blocklist import NOISE_METHOD_NAMES
|
|
6
7
|
|
|
8
|
+
MAX_FUZZY_TARGETS = 12
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
def _simple_type_name(type_name: str | None) -> str:
|
|
9
12
|
if not type_name:
|
|
@@ -53,10 +56,10 @@ def resolve_calls(
|
|
|
53
56
|
calls: dict[str, list],
|
|
54
57
|
method_context: dict[str, dict],
|
|
55
58
|
class_catalog: dict[str, list[str]],
|
|
56
|
-
) ->
|
|
59
|
+
) -> Iterator[tuple[str, str, float, str]]:
|
|
57
60
|
"""Resolve call names to known method ids.
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
Yields tuples: (source_method_id, target_method_id, confidence, reason)
|
|
60
63
|
"""
|
|
61
64
|
name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
|
|
62
65
|
class_method_index: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
@@ -65,7 +68,6 @@ def resolve_calls(
|
|
|
65
68
|
name_arity_to_method_ids[key].append(method_id)
|
|
66
69
|
class_method_index[meta["class_fqcn"]][key].append(method_id)
|
|
67
70
|
|
|
68
|
-
edges: list[tuple[str, str, float, str]] = []
|
|
69
71
|
for source_id, call_sites in calls.items():
|
|
70
72
|
src_meta = method_catalog.get(source_id, {})
|
|
71
73
|
src_ctx = method_context.get(source_id, {})
|
|
@@ -126,12 +128,13 @@ def resolve_calls(
|
|
|
126
128
|
confidence = 1.0
|
|
127
129
|
reason = "exact_name_arity_unique"
|
|
128
130
|
elif len(targets) > 1:
|
|
131
|
+
if len(targets) > MAX_FUZZY_TARGETS:
|
|
132
|
+
# Avoid exploding low-confidence edges in large repos.
|
|
133
|
+
continue
|
|
129
134
|
confidence = 0.5
|
|
130
135
|
reason = "fuzzy_name_arity_ambiguous"
|
|
131
136
|
|
|
132
137
|
if not targets:
|
|
133
138
|
continue
|
|
134
|
-
for target_id in targets:
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return edges
|
|
139
|
+
for target_id in set(targets):
|
|
140
|
+
yield source_id, target_id, confidence, reason
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
+
from typing import Callable
|
|
5
6
|
|
|
6
7
|
from codespine.indexer.call_resolver import resolve_calls
|
|
7
8
|
from codespine.indexer.java_parser import parse_java_source
|
|
@@ -25,12 +26,18 @@ class JavaIndexer:
|
|
|
25
26
|
def __init__(self, store):
|
|
26
27
|
self.store = store
|
|
27
28
|
|
|
28
|
-
def index_project(
|
|
29
|
+
def index_project(
|
|
30
|
+
self,
|
|
31
|
+
root_path: str,
|
|
32
|
+
full: bool = True,
|
|
33
|
+
progress: Callable[[str, dict], None] | None = None,
|
|
34
|
+
) -> IndexResult:
|
|
29
35
|
root_path = os.path.abspath(root_path)
|
|
30
36
|
project_id = os.path.basename(root_path)
|
|
31
37
|
current_files = self._collect_java_files(root_path)
|
|
32
|
-
|
|
38
|
+
self._emit(progress, "scan_done", files_found=len(current_files))
|
|
33
39
|
db_files = self.store.project_file_hashes(project_id) if not full else {}
|
|
40
|
+
current_hashes = self._hash_files(project_id, root_path, current_files) if not full else {}
|
|
34
41
|
|
|
35
42
|
if full:
|
|
36
43
|
to_reindex = current_files
|
|
@@ -45,6 +52,13 @@ class JavaIndexer:
|
|
|
45
52
|
old = db_files.get(fid, {}).get("hash")
|
|
46
53
|
if old != digest:
|
|
47
54
|
to_reindex.append(file_path)
|
|
55
|
+
self._emit(
|
|
56
|
+
progress,
|
|
57
|
+
"plan_done",
|
|
58
|
+
files_to_index=len(to_reindex),
|
|
59
|
+
deleted_files=len(deleted_file_ids),
|
|
60
|
+
mode="full" if full else "incremental",
|
|
61
|
+
)
|
|
48
62
|
|
|
49
63
|
files_indexed = 0
|
|
50
64
|
classes_indexed = 0
|
|
@@ -151,12 +165,25 @@ class JavaIndexer:
|
|
|
151
165
|
}
|
|
152
166
|
class_methods[cls.fqcn][method.signature] = m_id
|
|
153
167
|
files_indexed += 1
|
|
168
|
+
self._emit(
|
|
169
|
+
progress,
|
|
170
|
+
"parse_progress",
|
|
171
|
+
indexed=files_indexed,
|
|
172
|
+
total=len(to_reindex),
|
|
173
|
+
file_path=file_path,
|
|
174
|
+
)
|
|
154
175
|
|
|
176
|
+
self._emit(progress, "resolve_calls_start")
|
|
155
177
|
for src, dst, confidence, reason in resolve_calls(method_catalog, method_calls, method_context, class_catalog):
|
|
156
178
|
self.store.add_call(src, dst, confidence, reason)
|
|
157
179
|
calls_resolved += 1
|
|
180
|
+
if calls_resolved % 2000 == 0:
|
|
181
|
+
self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
|
|
182
|
+
self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
|
|
158
183
|
|
|
184
|
+
self._emit(progress, "resolve_types_start")
|
|
159
185
|
type_relationships += self._build_inheritance_edges(class_meta, class_catalog, class_methods)
|
|
186
|
+
self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
|
|
160
187
|
|
|
161
188
|
return IndexResult(
|
|
162
189
|
project_id=project_id,
|
|
@@ -172,10 +199,11 @@ class JavaIndexer:
|
|
|
172
199
|
@staticmethod
|
|
173
200
|
def _collect_java_files(root_path: str) -> list[str]:
|
|
174
201
|
out: list[str] = []
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
202
|
+
skip_dirs = {".git", "target", "build", "out", ".idea", ".gradle", ".mvn", "node_modules"}
|
|
203
|
+
for root, dirs, files in os.walk(root_path, topdown=True):
|
|
204
|
+
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
|
205
|
+
normalized = root.replace("\\", "/")
|
|
206
|
+
if "/src/" not in normalized and not normalized.endswith("/src"):
|
|
179
207
|
continue
|
|
180
208
|
for filename in files:
|
|
181
209
|
if filename.endswith(".java"):
|
|
@@ -303,3 +331,9 @@ class JavaIndexer:
|
|
|
303
331
|
self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
|
|
304
332
|
rel_count += 1
|
|
305
333
|
return rel_count
|
|
334
|
+
|
|
335
|
+
@staticmethod
|
|
336
|
+
def _emit(progress: Callable[[str, dict], None] | None, event: str, **payload: object) -> None:
|
|
337
|
+
if progress is None:
|
|
338
|
+
return
|
|
339
|
+
progress(event, payload)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|