codespine 0.5.2__tar.gz → 0.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.5.2 → codespine-0.5.3}/PKG-INFO +1 -1
- {codespine-0.5.2 → codespine-0.5.3}/codespine/__init__.py +1 -1
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/community.py +14 -1
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/coupling.py +8 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/crossmodule.py +10 -1
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/flow.py +11 -2
- {codespine-0.5.2 → codespine-0.5.3}/codespine/cli.py +37 -11
- {codespine-0.5.2 → codespine-0.5.3}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.5.2 → codespine-0.5.3}/pyproject.toml +1 -1
- {codespine-0.5.2 → codespine-0.5.3}/LICENSE +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/README.md +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/context.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/analysis/impact.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/config.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/db/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/db/schema.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/db/store.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/diff/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/indexer/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/indexer/engine.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/mcp/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/mcp/server.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/noise/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/noise/blocklist.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/search/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/search/bm25.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/search/fuzzy.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/search/hybrid.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/search/rrf.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/search/vector.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/watch/__init__.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine/watch/watcher.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/gindex.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/setup.cfg +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/tests/test_call_resolver.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/tests/test_java_parser.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/tests/test_multimodule_index.py +0 -0
- {codespine-0.5.2 → codespine-0.5.3}/tests/test_search_ranking.py +0 -0
|
@@ -3,8 +3,14 @@ from __future__ import annotations
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def detect_communities(store) -> list[dict]:
|
|
6
|
+
def detect_communities(store, progress=None) -> list[dict]:
|
|
7
|
+
def _ping(msg: str) -> None:
|
|
8
|
+
if progress:
|
|
9
|
+
progress(msg)
|
|
10
|
+
|
|
11
|
+
_ping("loading symbols")
|
|
7
12
|
symbols = store.query_records("MATCH (s:Symbol) RETURN s.id as id, s.fqname as fqname")
|
|
13
|
+
_ping(f"{len(symbols)} symbols, loading edges")
|
|
8
14
|
edges = store.query_records(
|
|
9
15
|
"""
|
|
10
16
|
MATCH (a:Method)-[:CALLS]->(b:Method)
|
|
@@ -17,6 +23,7 @@ def detect_communities(store) -> list[dict]:
|
|
|
17
23
|
ids = [s["id"] for s in symbols]
|
|
18
24
|
index_of = {sid: i for i, sid in enumerate(ids)}
|
|
19
25
|
|
|
26
|
+
_ping(f"{len(edges)} edges, clustering")
|
|
20
27
|
membership: dict[str, int] = {}
|
|
21
28
|
try:
|
|
22
29
|
import igraph as ig
|
|
@@ -44,11 +51,17 @@ def detect_communities(store) -> list[dict]:
|
|
|
44
51
|
for sid, cid in membership.items():
|
|
45
52
|
grouped[cid].append(sid)
|
|
46
53
|
|
|
54
|
+
_ping(f"{len(grouped)} clusters, persisting")
|
|
47
55
|
communities: list[dict] = []
|
|
56
|
+
done_clusters = 0
|
|
57
|
+
total_clusters = len(grouped)
|
|
48
58
|
for cid, symbol_ids in grouped.items():
|
|
49
59
|
cohesion = 1.0 / max(len(symbol_ids), 1)
|
|
50
60
|
label = f"community_{cid}"
|
|
51
61
|
store.set_community(str(cid), label, cohesion, symbol_ids)
|
|
62
|
+
done_clusters += 1
|
|
63
|
+
if done_clusters % 200 == 0 or done_clusters == total_clusters:
|
|
64
|
+
_ping(f"persisting {done_clusters}/{total_clusters} clusters")
|
|
52
65
|
communities.append(
|
|
53
66
|
{
|
|
54
67
|
"community_id": str(cid),
|
|
@@ -46,11 +46,18 @@ def compute_coupling(
|
|
|
46
46
|
months: int = SETTINGS.default_coupling_months,
|
|
47
47
|
min_strength: float = SETTINGS.default_min_coupling_strength,
|
|
48
48
|
min_cochanges: int = SETTINGS.default_min_cochanges,
|
|
49
|
+
progress=None,
|
|
49
50
|
) -> list[dict]:
|
|
51
|
+
def _ping(msg: str) -> None:
|
|
52
|
+
if progress:
|
|
53
|
+
progress(msg)
|
|
54
|
+
|
|
55
|
+
_ping("reading git history")
|
|
50
56
|
changesets = _git_changed_file_sets(repo_path, months)
|
|
51
57
|
if not changesets:
|
|
52
58
|
return []
|
|
53
59
|
|
|
60
|
+
_ping(f"{len(changesets)} commits, computing co-changes")
|
|
54
61
|
file_changes = Counter()
|
|
55
62
|
co_changes: Counter[tuple[str, str]] = Counter()
|
|
56
63
|
|
|
@@ -60,6 +67,7 @@ def compute_coupling(
|
|
|
60
67
|
for a, b in itertools.combinations(sorted(cs), 2):
|
|
61
68
|
co_changes[(a, b)] += 1
|
|
62
69
|
|
|
70
|
+
_ping(f"{len(co_changes)} pairs, filtering and persisting")
|
|
63
71
|
results = []
|
|
64
72
|
for (a, b), pair_count in co_changes.items():
|
|
65
73
|
denom = max(file_changes[a], file_changes[b])
|
|
@@ -47,11 +47,15 @@ def _param_count(sig: str) -> int:
|
|
|
47
47
|
return 0 if not arg_str.strip() else arg_str.count(",") + 1
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
50
|
+
def link_cross_module_calls(store, project_ids: list[str] | None = None, progress=None) -> int:
|
|
51
51
|
"""Create CALLS edges between methods in different projects.
|
|
52
52
|
|
|
53
53
|
Returns the number of new cross-module call edges created.
|
|
54
|
+
*progress* is an optional ``(status_str) -> None`` callback for live updates.
|
|
54
55
|
"""
|
|
56
|
+
def _ping(msg: str) -> None:
|
|
57
|
+
if progress:
|
|
58
|
+
progress(msg)
|
|
55
59
|
if project_ids is None:
|
|
56
60
|
proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
|
|
57
61
|
project_ids = [r["id"] for r in proj_recs]
|
|
@@ -72,6 +76,8 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
|
72
76
|
"""
|
|
73
77
|
)
|
|
74
78
|
|
|
79
|
+
_ping(f"building class index ({len(all_classes)} classes)")
|
|
80
|
+
|
|
75
81
|
# class_name → [(class_id, project_id)]
|
|
76
82
|
name_to_classes: dict[str, list[tuple[str, str]]] = defaultdict(list)
|
|
77
83
|
for c in all_classes:
|
|
@@ -99,6 +105,8 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
|
99
105
|
if not other_class_names:
|
|
100
106
|
continue
|
|
101
107
|
|
|
108
|
+
_ping(f"scanning {src_pid} methods")
|
|
109
|
+
|
|
102
110
|
# Fetch all methods in this project
|
|
103
111
|
src_methods = store.query_records(
|
|
104
112
|
"""
|
|
@@ -181,5 +189,6 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
|
181
189
|
except Exception as exc:
|
|
182
190
|
LOGGER.debug("Fallback edge failed: %s", exc)
|
|
183
191
|
|
|
192
|
+
_ping(f"{new_edges} edges created")
|
|
184
193
|
LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
|
|
185
194
|
return new_edges
|
|
@@ -48,7 +48,12 @@ def _entry_methods(store, project: str | None = None) -> list[str]:
|
|
|
48
48
|
return [r["id"] for r in fallback]
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int = 6, project: str | None = None) -> list[dict]:
|
|
51
|
+
def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int = 6, project: str | None = None, progress=None) -> list[dict]:
|
|
52
|
+
def _ping(msg: str) -> None:
|
|
53
|
+
if progress:
|
|
54
|
+
progress(msg)
|
|
55
|
+
|
|
56
|
+
_ping("loading call graph")
|
|
52
57
|
edges = store.query_records(
|
|
53
58
|
"""
|
|
54
59
|
MATCH (a:Method)-[:CALLS]->(b:Method)
|
|
@@ -85,8 +90,11 @@ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int
|
|
|
85
90
|
else:
|
|
86
91
|
entries = _entry_methods(store, project=project)
|
|
87
92
|
|
|
93
|
+
_ping(f"{len(entries)} entry points, tracing")
|
|
88
94
|
flows = []
|
|
89
|
-
for e in entries:
|
|
95
|
+
for idx, e in enumerate(entries):
|
|
96
|
+
if idx % 50 == 0 and idx > 0:
|
|
97
|
+
_ping(f"traced {idx}/{len(entries)} entry points")
|
|
90
98
|
visited = {e}
|
|
91
99
|
q = deque([(e, 0)])
|
|
92
100
|
nodes_with_depth = [(e, 0)]
|
|
@@ -115,6 +123,7 @@ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int
|
|
|
115
123
|
# need a second round-trip to resolve raw method ID hashes.
|
|
116
124
|
# Collect all unique IDs across all flows, resolve in one bulk query.
|
|
117
125
|
# ------------------------------------------------------------------ #
|
|
126
|
+
_ping(f"{len(flows)} flows, enriching metadata")
|
|
118
127
|
all_ids = list({node["symbol"] for flow in flows for node in flow["nodes"]})
|
|
119
128
|
meta = _resolve_method_metadata(store, all_ids)
|
|
120
129
|
|
|
@@ -217,13 +217,25 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
217
217
|
elif parse_state["indexed"] < parse_state["total"]:
|
|
218
218
|
_phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
|
|
219
219
|
|
|
220
|
+
# ── Helper for in-place progress updates ────────────────────────────
|
|
221
|
+
def _live_phase(label: str, status: str) -> None:
|
|
222
|
+
"""Overwrite the current line with a status update."""
|
|
223
|
+
click.echo(f"\r{label:<30} {status:<50}", nl=False)
|
|
224
|
+
|
|
225
|
+
def _finish_phase(label: str, result: str) -> None:
|
|
226
|
+
"""Finalise an in-place phase line and move to the next line."""
|
|
227
|
+
click.echo(f"\r{label:<30} {result:<50}")
|
|
228
|
+
|
|
220
229
|
# ── Cross-module call linking ──────────────────────────────────────
|
|
221
|
-
# When multiple modules/projects are indexed, attempt to resolve call
|
|
222
|
-
# edges that span module boundaries using import + REFERENCES_TYPE info.
|
|
223
230
|
if is_multi and len(modules_with_ids) > 1:
|
|
231
|
+
xmod_label = "Cross-module linking..."
|
|
232
|
+
_live_phase(xmod_label, "running")
|
|
224
233
|
xmod_pids = [pid for _, pid in modules_with_ids]
|
|
225
|
-
xmod_edges = link_cross_module_calls(
|
|
226
|
-
|
|
234
|
+
xmod_edges = link_cross_module_calls(
|
|
235
|
+
store, project_ids=xmod_pids,
|
|
236
|
+
progress=lambda s: _live_phase(xmod_label, s),
|
|
237
|
+
)
|
|
238
|
+
_finish_phase(xmod_label, f"{xmod_edges} cross-module call edges")
|
|
227
239
|
else:
|
|
228
240
|
_phase("Cross-module linking...", "skipped (single module)")
|
|
229
241
|
|
|
@@ -234,16 +246,29 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
234
246
|
|
|
235
247
|
should_run_deep = deep or total_files_found <= 1200
|
|
236
248
|
if should_run_deep:
|
|
237
|
-
|
|
238
|
-
|
|
249
|
+
comm_label = "Detecting communities..."
|
|
250
|
+
_live_phase(comm_label, "running")
|
|
251
|
+
communities = detect_communities(
|
|
252
|
+
store,
|
|
253
|
+
progress=lambda s: _live_phase(comm_label, s),
|
|
254
|
+
)
|
|
255
|
+
_finish_phase(comm_label, f"{len(communities)} clusters found")
|
|
239
256
|
|
|
240
|
-
|
|
241
|
-
|
|
257
|
+
flow_label = "Detecting execution flows..."
|
|
258
|
+
_live_phase(flow_label, "running")
|
|
259
|
+
flows = trace_execution_flows(
|
|
260
|
+
store,
|
|
261
|
+
progress=lambda s: _live_phase(flow_label, s),
|
|
262
|
+
)
|
|
263
|
+
_finish_phase(flow_label, f"{len(flows)} processes found")
|
|
242
264
|
|
|
265
|
+
dead_label = "Finding dead code..."
|
|
266
|
+
_live_phase(dead_label, "running")
|
|
243
267
|
dead = detect_dead_code(store, limit=500)
|
|
244
|
-
|
|
268
|
+
_finish_phase(dead_label, f"{len(dead)} unreachable symbols")
|
|
245
269
|
|
|
246
|
-
|
|
270
|
+
coup_label = "Analyzing git history..."
|
|
271
|
+
_live_phase(coup_label, "running")
|
|
247
272
|
coupling_root = abs_path
|
|
248
273
|
coupling_project = root_basename if is_multi else (last_result.project_id if last_result else root_basename)
|
|
249
274
|
coupling_pairs = compute_coupling(
|
|
@@ -253,8 +278,9 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
253
278
|
months=SETTINGS.default_coupling_months,
|
|
254
279
|
min_strength=SETTINGS.default_min_coupling_strength,
|
|
255
280
|
min_cochanges=SETTINGS.default_min_cochanges,
|
|
281
|
+
progress=lambda s: _live_phase(coup_label, s),
|
|
256
282
|
)
|
|
257
|
-
|
|
283
|
+
_finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
|
|
258
284
|
else:
|
|
259
285
|
_phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
|
|
260
286
|
_phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|