codespine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codespine/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ """CodeSpine package."""
2
+
3
+ __all__ = ["__version__"]
4
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """Analysis layer."""
@@ -0,0 +1,75 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+
5
+
6
+ def detect_communities(store) -> list[dict]:
7
+ symbols = store.query_records("MATCH (s:Symbol) RETURN s.id as id, s.fqname as fqname")
8
+ edges = store.query_records(
9
+ """
10
+ MATCH (a:Method)-[:CALLS]->(b:Method)
11
+ RETURN a.id as src, b.id as dst
12
+ """
13
+ )
14
+ if not symbols:
15
+ return []
16
+
17
+ ids = [s["id"] for s in symbols]
18
+ index_of = {sid: i for i, sid in enumerate(ids)}
19
+
20
+ membership: dict[str, int] = {}
21
+ try:
22
+ import igraph as ig
23
+ import leidenalg
24
+
25
+ g = ig.Graph(directed=False)
26
+ g.add_vertices(len(ids))
27
+ graph_edges = []
28
+ for e in edges:
29
+ if e["src"] in index_of and e["dst"] in index_of:
30
+ graph_edges.append((index_of[e["src"]], index_of[e["dst"]]))
31
+ if graph_edges:
32
+ g.add_edges(graph_edges)
33
+ part = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
34
+ for idx, cid in enumerate(part.membership):
35
+ membership[ids[idx]] = int(cid)
36
+ except Exception:
37
+ # Fallback: group by package prefix from fqname.
38
+ for s in symbols:
39
+ fq = s.get("fqname") or ""
40
+ key = fq.rsplit(".", 2)[0] if "." in fq else fq
41
+ membership[s["id"]] = abs(hash(key)) % 10000
42
+
43
+ grouped: dict[int, list[str]] = defaultdict(list)
44
+ for sid, cid in membership.items():
45
+ grouped[cid].append(sid)
46
+
47
+ communities: list[dict] = []
48
+ for cid, symbol_ids in grouped.items():
49
+ cohesion = 1.0 / max(len(symbol_ids), 1)
50
+ label = f"community_{cid}"
51
+ store.set_community(str(cid), label, cohesion, symbol_ids)
52
+ communities.append(
53
+ {
54
+ "community_id": str(cid),
55
+ "label": label,
56
+ "cohesion": cohesion,
57
+ "size": len(symbol_ids),
58
+ }
59
+ )
60
+
61
+ communities.sort(key=lambda c: c["size"], reverse=True)
62
+ return communities
63
+
64
+
65
+ def symbol_community(store, symbol_query: str) -> dict:
66
+ recs = store.query_records(
67
+ """
68
+ MATCH (s:Symbol)-[:IN_COMMUNITY]->(c:Community)
69
+ WHERE s.id = $q OR lower(s.fqname) = lower($q) OR lower(s.name) = lower($q)
70
+ RETURN s.id as symbol_id, s.fqname as fqname, c.id as community_id, c.label as label, c.cohesion as cohesion
71
+ LIMIT 20
72
+ """,
73
+ {"q": symbol_query},
74
+ )
75
+ return {"query": symbol_query, "matches": recs}
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from codespine.analysis.community import symbol_community
4
+ from codespine.analysis.flow import trace_execution_flows
5
+ from codespine.analysis.impact import analyze_impact
6
+ from codespine.search.hybrid import hybrid_search
7
+
8
+
9
+ def build_symbol_context(store, query: str, max_depth: int = 3) -> dict:
10
+ search_results = hybrid_search(store, query, k=10)
11
+ focus = search_results[0] if search_results else None
12
+
13
+ impact = analyze_impact(store, query, max_depth=max_depth)
14
+ community = symbol_community(store, query)
15
+ flows = trace_execution_flows(store, entry_symbol=query, max_depth=max_depth + 2)
16
+
17
+ return {
18
+ "query": query,
19
+ "focus": focus,
20
+ "search_candidates": search_results,
21
+ "impact": impact,
22
+ "community": community,
23
+ "flows": flows,
24
+ }
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ import os
5
+ import subprocess
6
+ from collections import Counter, defaultdict
7
+
8
+ from codespine.config import SETTINGS
9
+ from codespine.indexer.symbol_builder import file_id
10
+
11
+
12
+ def _git_changed_file_sets(repo_path: str, months: int) -> list[set[str]]:
13
+ cmd = [
14
+ "git",
15
+ "-C",
16
+ repo_path,
17
+ "log",
18
+ "--name-only",
19
+ "--pretty=format:__COMMIT__",
20
+ f"--since={months}.months",
21
+ ]
22
+ proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
23
+ if proc.returncode != 0:
24
+ return []
25
+
26
+ changesets: list[set[str]] = []
27
+ current: set[str] = set()
28
+ for line in proc.stdout.splitlines():
29
+ line = line.strip()
30
+ if line == "__COMMIT__":
31
+ if current:
32
+ changesets.append(current)
33
+ current = set()
34
+ continue
35
+ if line:
36
+ current.add(line)
37
+ if current:
38
+ changesets.append(current)
39
+ return changesets
40
+
41
+
42
+ def compute_coupling(
43
+ store,
44
+ repo_path: str,
45
+ project_id: str,
46
+ months: int = SETTINGS.default_coupling_months,
47
+ min_strength: float = SETTINGS.default_min_coupling_strength,
48
+ min_cochanges: int = SETTINGS.default_min_cochanges,
49
+ ) -> list[dict]:
50
+ changesets = _git_changed_file_sets(repo_path, months)
51
+ if not changesets:
52
+ return []
53
+
54
+ file_changes = Counter()
55
+ co_changes: Counter[tuple[str, str]] = Counter()
56
+
57
+ for cs in changesets:
58
+ for path in cs:
59
+ file_changes[path] += 1
60
+ for a, b in itertools.combinations(sorted(cs), 2):
61
+ co_changes[(a, b)] += 1
62
+
63
+ results = []
64
+ for (a, b), pair_count in co_changes.items():
65
+ denom = max(file_changes[a], file_changes[b])
66
+ strength = pair_count / max(denom, 1)
67
+ if strength < min_strength or pair_count < min_cochanges:
68
+ continue
69
+
70
+ aid = file_id(project_id, a)
71
+ bid = file_id(project_id, b)
72
+ store.upsert_coupling(aid, bid, strength, pair_count, months)
73
+ results.append(
74
+ {
75
+ "file_a": a,
76
+ "file_b": b,
77
+ "strength": strength,
78
+ "cochanges": pair_count,
79
+ }
80
+ )
81
+
82
+ results.sort(key=lambda r: (r["strength"], r["cochanges"]), reverse=True)
83
+ return results
84
+
85
+
86
+ def get_coupling(store, symbol: str | None = None, months: int = 6, min_strength: float = 0.3, min_cochanges: int = 3) -> dict:
87
+ if symbol:
88
+ recs = store.query_records(
89
+ """
90
+ MATCH (s:Symbol)-[:DECLARES]-(f:File)-[r:CO_CHANGED_WITH]-(f2:File)
91
+ WHERE s.id = $q OR lower(s.fqname) = lower($q) OR lower(s.name) = lower($q)
92
+ AND r.strength >= $min_strength AND r.cochanges >= $min_cochanges
93
+ RETURN f.path as file, f2.path as coupled_file, r.strength as strength, r.cochanges as cochanges
94
+ ORDER BY strength DESC, cochanges DESC
95
+ LIMIT 200
96
+ """,
97
+ {
98
+ "q": symbol,
99
+ "min_strength": min_strength,
100
+ "min_cochanges": min_cochanges,
101
+ },
102
+ )
103
+ return {"symbol": symbol, "couplings": recs}
104
+
105
+ recs = store.query_records(
106
+ """
107
+ MATCH (f:File)-[r:CO_CHANGED_WITH]-(f2:File)
108
+ WHERE r.months = $months AND r.strength >= $min_strength AND r.cochanges >= $min_cochanges
109
+ RETURN f.path as file, f2.path as coupled_file, r.strength as strength, r.cochanges as cochanges
110
+ ORDER BY strength DESC, cochanges DESC
111
+ LIMIT 500
112
+ """,
113
+ {
114
+ "months": months,
115
+ "min_strength": min_strength,
116
+ "min_cochanges": min_cochanges,
117
+ },
118
+ )
119
+ return {"symbol": None, "couplings": recs}
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+
3
+ EXEMPT_ANNOTATIONS = {
4
+ "Override",
5
+ "Test",
6
+ "ParameterizedTest",
7
+ "Bean",
8
+ "PostConstruct",
9
+ "PreDestroy",
10
+ "Scheduled",
11
+ "KafkaListener",
12
+ "EventListener",
13
+ "JsonCreator",
14
+ "Inject",
15
+ }
16
+
17
+ EXEMPT_CONTRACT_METHODS = {
18
+ "toString",
19
+ "hashCode",
20
+ "equals",
21
+ "compareTo",
22
+ }
23
+
24
+
25
+ def _modifier_tokens(modifiers) -> set[str]:
26
+ if not modifiers:
27
+ return set()
28
+ return {str(m).strip() for m in modifiers}
29
+
30
+
31
+ def detect_dead_code(store, limit: int = 200) -> list[dict]:
32
+ """Java-aware dead code detection with exemption passes."""
33
+ candidates = store.query_records(
34
+ """
35
+ MATCH (m:Method), (c:Class)
36
+ WHERE m.class_id = c.id
37
+ AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
38
+ RETURN m.id as method_id,
39
+ m.name as name,
40
+ m.signature as signature,
41
+ m.modifiers as modifiers,
42
+ c.fqcn as class_fqcn,
43
+ m.is_constructor as is_constructor,
44
+ m.is_test as is_test
45
+ LIMIT $limit
46
+ """,
47
+ {"limit": int(limit * 3)},
48
+ )
49
+
50
+ if not candidates:
51
+ return []
52
+
53
+ exempt: set[str] = set()
54
+
55
+ # Exempt constructors, test methods, and Java main entrypoints.
56
+ for c in candidates:
57
+ sig = (c.get("signature") or "").lower()
58
+ name = c.get("name") or ""
59
+ mods = _modifier_tokens(c.get("modifiers"))
60
+ if c.get("is_constructor"):
61
+ exempt.add(c["method_id"])
62
+ if c.get("is_test"):
63
+ exempt.add(c["method_id"])
64
+ if name == "main" and "string[]" in sig:
65
+ exempt.add(c["method_id"])
66
+ if name in EXEMPT_CONTRACT_METHODS:
67
+ exempt.add(c["method_id"])
68
+ if any(m.lstrip("@") in EXEMPT_ANNOTATIONS for m in mods):
69
+ exempt.add(c["method_id"])
70
+ # Java bean-ish APIs often rely on reflection/serialization.
71
+ if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
72
+ exempt.add(c["method_id"])
73
+ # Reflection-style hooks
74
+ if name in {"valueOf", "fromString", "builder"}:
75
+ exempt.add(c["method_id"])
76
+
77
+ # Exempt override/interface contract methods if relation exists.
78
+ override_methods = store.query_records(
79
+ """
80
+ MATCH (m:Method)-[:OVERRIDES]->(:Method)
81
+ RETURN DISTINCT m.id as method_id
82
+ """
83
+ )
84
+ interface_methods = store.query_records(
85
+ """
86
+ MATCH (c:Class)-[:IMPLEMENTS]->(:Class), (m:Method)
87
+ WHERE m.class_id = c.id
88
+ RETURN DISTINCT m.id as method_id
89
+ """
90
+ )
91
+ exempt.update(r["method_id"] for r in override_methods)
92
+ exempt.update(r["method_id"] for r in interface_methods)
93
+
94
+ dead = []
95
+ for c in candidates:
96
+ if c["method_id"] in exempt:
97
+ continue
98
+ dead.append(
99
+ {
100
+ "method_id": c["method_id"],
101
+ "name": c.get("name"),
102
+ "signature": c.get("signature"),
103
+ "reason": "no_incoming_calls_after_exemptions",
104
+ }
105
+ )
106
+
107
+ return dead[:limit]
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict, deque
4
+
5
+
6
+ def _entry_methods(store) -> list[str]:
7
+ recs = store.query_records(
8
+ """
9
+ MATCH (m:Method)
10
+ WHERE m.name = 'main' OR m.is_test = true
11
+ RETURN m.id as id
12
+ """
13
+ )
14
+ ids = [r["id"] for r in recs]
15
+ if ids:
16
+ return ids
17
+ fallback = store.query_records(
18
+ """
19
+ MATCH (m:Method)
20
+ WITH m ORDER BY m.name LIMIT 10
21
+ RETURN m.id as id
22
+ """
23
+ )
24
+ return [r["id"] for r in fallback]
25
+
26
+
27
+ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int = 6) -> list[dict]:
28
+ edges = store.query_records(
29
+ """
30
+ MATCH (a:Method)-[:CALLS]->(b:Method)
31
+ RETURN a.id as src, b.id as dst
32
+ """
33
+ )
34
+ adj: dict[str, list[str]] = defaultdict(list)
35
+ for edge in edges:
36
+ adj[edge["src"]].append(edge["dst"])
37
+
38
+ if entry_symbol:
39
+ start = store.query_records(
40
+ """
41
+ MATCH (m:Method)
42
+ WHERE m.id = $q OR lower(m.name) = lower($q) OR lower(m.signature) CONTAINS lower($q)
43
+ RETURN m.id as id
44
+ LIMIT 10
45
+ """,
46
+ {"q": entry_symbol},
47
+ )
48
+ entries = [r["id"] for r in start]
49
+ else:
50
+ entries = _entry_methods(store)
51
+
52
+ flows = []
53
+ for e in entries:
54
+ visited = {e}
55
+ q = deque([(e, 0)])
56
+ nodes_with_depth = [(e, 0)]
57
+
58
+ while q:
59
+ node, depth = q.popleft()
60
+ if depth >= max_depth:
61
+ continue
62
+ for nxt in adj.get(node, []):
63
+ if nxt in visited:
64
+ continue
65
+ visited.add(nxt)
66
+ q.append((nxt, depth + 1))
67
+ nodes_with_depth.append((nxt, depth + 1))
68
+
69
+ flows.append(
70
+ {
71
+ "entry": e,
72
+ "kind": "cross_community" if len(nodes_with_depth) > 12 else "intra_community",
73
+ "nodes": [{"symbol": n, "depth": d} for n, d in nodes_with_depth],
74
+ }
75
+ )
76
+
77
+ return flows
@@ -0,0 +1,90 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict, deque
4
+
5
+
6
+ def _resolve_symbol_ids(store, symbol_query: str) -> list[str]:
7
+ recs = store.query_records(
8
+ """
9
+ MATCH (s:Symbol)
10
+ WHERE s.id = $q OR lower(s.name) = lower($q) OR lower(s.fqname) = lower($q) OR lower(s.fqname) CONTAINS lower($q)
11
+ RETURN s.id as id
12
+ LIMIT 50
13
+ """,
14
+ {"q": symbol_query},
15
+ )
16
+ return [r["id"] for r in recs]
17
+
18
+
19
+ def analyze_impact(store, symbol_query: str, max_depth: int = 4) -> dict:
20
+ target_symbol_ids = _resolve_symbol_ids(store, symbol_query)
21
+ if not target_symbol_ids:
22
+ return {"target": symbol_query, "depth_groups": {"1": [], "2": [], "3+": []}}
23
+
24
+ symbol_to_method = {
25
+ r["sid"]: r["mid"]
26
+ for r in store.query_records(
27
+ """
28
+ MATCH (s:Symbol),(m:Method)
29
+ WHERE s.kind = 'method' AND s.fqname CONTAINS m.signature
30
+ RETURN s.id as sid, m.id as mid
31
+ """
32
+ )
33
+ }
34
+
35
+ target_method_ids = [symbol_to_method[sid] for sid in target_symbol_ids if sid in symbol_to_method]
36
+ if not target_method_ids:
37
+ return {"target": symbol_query, "depth_groups": {"1": [], "2": [], "3+": []}}
38
+
39
+ edges = store.query_records(
40
+ """
41
+ MATCH (a:Method)-[r:CALLS]->(b:Method)
42
+ RETURN a.id as src, b.id as dst, 'CALLS' as edge_type,
43
+ coalesce(r.confidence, 0.5) as confidence,
44
+ coalesce(r.reason, 'unknown') as reason
45
+ """
46
+ )
47
+
48
+ reverse_adj: dict[str, list[dict]] = defaultdict(list)
49
+ for edge in edges:
50
+ reverse_adj[edge["dst"]].append(edge)
51
+
52
+ depth_groups: dict[str, list[dict]] = {"1": [], "2": [], "3+": []}
53
+ visited: set[str] = set(target_method_ids)
54
+ queue = deque([(mid, 0, [mid]) for mid in target_method_ids])
55
+
56
+ while queue:
57
+ node, depth, path = queue.popleft()
58
+ if depth >= max_depth:
59
+ continue
60
+ for edge in reverse_adj.get(node, []):
61
+ src = edge["src"]
62
+ if src in visited:
63
+ continue
64
+ visited.add(src)
65
+ next_depth = depth + 1
66
+ item = {
67
+ "symbol": src,
68
+ "depth": next_depth,
69
+ "edge_type": edge["edge_type"],
70
+ "confidence": float(edge["confidence"]),
71
+ "path": path + [src],
72
+ }
73
+ if next_depth == 1:
74
+ depth_groups["1"].append(item)
75
+ elif next_depth == 2:
76
+ depth_groups["2"].append(item)
77
+ else:
78
+ depth_groups["3+"].append(item)
79
+ queue.append((src, next_depth, path + [src]))
80
+
81
+ return {
82
+ "target": symbol_query,
83
+ "targets_resolved": target_method_ids,
84
+ "depth_groups": depth_groups,
85
+ "summary": {
86
+ "direct": len(depth_groups["1"]),
87
+ "indirect": len(depth_groups["2"]),
88
+ "transitive": len(depth_groups["3+"]),
89
+ },
90
+ }