codespine 0.1.5__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.1.5 → codespine-0.1.7}/PKG-INFO +1 -1
- {codespine-0.1.5 → codespine-0.1.7}/codespine/__init__.py +1 -1
- {codespine-0.1.5 → codespine-0.1.7}/codespine/cli.py +56 -21
- {codespine-0.1.5 → codespine-0.1.7}/codespine/indexer/call_resolver.py +10 -7
- {codespine-0.1.5 → codespine-0.1.7}/codespine/indexer/engine.py +2 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.1.5 → codespine-0.1.7}/pyproject.toml +1 -1
- {codespine-0.1.5 → codespine-0.1.7}/LICENSE +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/README.md +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/analysis/community.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/analysis/context.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/analysis/flow.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/analysis/impact.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/config.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/db/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/db/schema.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/db/store.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/mcp/server.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/search/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/search/bm25.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/search/hybrid.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/search/rrf.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/search/vector.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine/watch/watcher.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/gindex.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/setup.cfg +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/tests/test_call_resolver.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/tests/test_java_parser.py +0 -0
- {codespine-0.1.5 → codespine-0.1.7}/tests/test_search_ranking.py +0 -0
|
@@ -78,7 +78,8 @@ def main() -> None:
|
|
|
78
78
|
@main.command()
|
|
79
79
|
@click.argument("path", type=click.Path(exists=True))
|
|
80
80
|
@click.option("--full/--incremental", default=True, show_default=True)
|
|
81
|
-
|
|
81
|
+
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
|
|
82
|
+
def analyse(path: str, full: bool, deep: bool) -> None:
|
|
82
83
|
"""Index a local Java project."""
|
|
83
84
|
if _is_running():
|
|
84
85
|
click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
|
|
@@ -89,6 +90,7 @@ def analyse(path: str, full: bool) -> None:
|
|
|
89
90
|
store = GraphStore(read_only=False)
|
|
90
91
|
indexer = JavaIndexer(store)
|
|
91
92
|
parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
|
|
93
|
+
call_state = {"shown": False, "count": 0, "last_ts": 0.0}
|
|
92
94
|
|
|
93
95
|
def _progress(event: str, payload: dict) -> None:
|
|
94
96
|
now = time.perf_counter()
|
|
@@ -119,6 +121,29 @@ def analyse(path: str, full: bool) -> None:
|
|
|
119
121
|
if event == "resolve_calls_start" and parse_state["shown"]:
|
|
120
122
|
click.echo()
|
|
121
123
|
parse_state["shown"] = False
|
|
124
|
+
_phase("Tracing calls...", "running")
|
|
125
|
+
return
|
|
126
|
+
if event == "resolve_calls_start":
|
|
127
|
+
_phase("Tracing calls...", "running")
|
|
128
|
+
return
|
|
129
|
+
if event == "resolve_calls_progress":
|
|
130
|
+
call_state["count"] = int(payload.get("calls_resolved", 0))
|
|
131
|
+
if (now - call_state["last_ts"]) >= 0.25:
|
|
132
|
+
click.echo(f"\rTracing calls... {call_state['count']} resolved", nl=False)
|
|
133
|
+
call_state["shown"] = True
|
|
134
|
+
call_state["last_ts"] = now
|
|
135
|
+
return
|
|
136
|
+
if event == "resolve_calls_done":
|
|
137
|
+
if call_state["shown"]:
|
|
138
|
+
click.echo()
|
|
139
|
+
call_state["shown"] = False
|
|
140
|
+
_phase("Tracing calls...", f"{int(payload.get('calls_resolved', 0))} calls resolved")
|
|
141
|
+
return
|
|
142
|
+
if event == "resolve_types_start":
|
|
143
|
+
_phase("Analyzing types...", "running")
|
|
144
|
+
return
|
|
145
|
+
if event == "resolve_types_done":
|
|
146
|
+
_phase("Analyzing types...", f"{int(payload.get('type_relationships', 0))} type relationships")
|
|
122
147
|
return
|
|
123
148
|
|
|
124
149
|
result = indexer.index_project(abs_path, full=full, progress=_progress)
|
|
@@ -128,27 +153,37 @@ def analyse(path: str, full: bool) -> None:
|
|
|
128
153
|
_phase("Parsing code...", "0/0")
|
|
129
154
|
elif parse_state["indexed"] < parse_state["total"]:
|
|
130
155
|
_phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
|
|
131
|
-
_phase("Tracing calls...", f"{result.calls_resolved} calls resolved")
|
|
132
|
-
_phase("Analyzing types...", f"{result.type_relationships} type relationships")
|
|
133
|
-
|
|
134
|
-
communities = detect_communities(store)
|
|
135
|
-
_phase("Detecting communities...", f"{len(communities)} clusters found")
|
|
136
156
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
store
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
157
|
+
communities: list[dict] = []
|
|
158
|
+
flows: list[dict] = []
|
|
159
|
+
dead: list[dict] = []
|
|
160
|
+
coupling_pairs: list[dict] = []
|
|
161
|
+
|
|
162
|
+
should_run_deep = deep or result.files_found <= 1200
|
|
163
|
+
if should_run_deep:
|
|
164
|
+
communities = detect_communities(store)
|
|
165
|
+
_phase("Detecting communities...", f"{len(communities)} clusters found")
|
|
166
|
+
|
|
167
|
+
flows = trace_execution_flows(store)
|
|
168
|
+
_phase("Detecting execution flows...", f"{len(flows)} processes found")
|
|
169
|
+
|
|
170
|
+
dead = detect_dead_code(store, limit=500)
|
|
171
|
+
_phase("Finding dead code...", f"{len(dead)} unreachable symbols")
|
|
172
|
+
|
|
173
|
+
coupling_pairs = compute_coupling(
|
|
174
|
+
store,
|
|
175
|
+
abs_path,
|
|
176
|
+
result.project_id,
|
|
177
|
+
months=SETTINGS.default_coupling_months,
|
|
178
|
+
min_strength=SETTINGS.default_min_coupling_strength,
|
|
179
|
+
min_cochanges=SETTINGS.default_min_cochanges,
|
|
180
|
+
)
|
|
181
|
+
_phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
|
|
182
|
+
else:
|
|
183
|
+
_phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
|
|
184
|
+
_phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
|
|
185
|
+
_phase("Finding dead code...", "skipped (large repo; rerun with --deep)")
|
|
186
|
+
_phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
|
|
152
187
|
|
|
153
188
|
vector_count = store.query_records(
|
|
154
189
|
"""
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections import defaultdict
|
|
4
|
+
from typing import Iterator
|
|
4
5
|
|
|
5
6
|
from codespine.noise.blocklist import NOISE_METHOD_NAMES
|
|
6
7
|
|
|
8
|
+
MAX_FUZZY_TARGETS = 12
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
def _simple_type_name(type_name: str | None) -> str:
|
|
9
12
|
if not type_name:
|
|
@@ -53,10 +56,10 @@ def resolve_calls(
|
|
|
53
56
|
calls: dict[str, list],
|
|
54
57
|
method_context: dict[str, dict],
|
|
55
58
|
class_catalog: dict[str, list[str]],
|
|
56
|
-
) ->
|
|
59
|
+
) -> Iterator[tuple[str, str, float, str]]:
|
|
57
60
|
"""Resolve call names to known method ids.
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
Yields tuples: (source_method_id, target_method_id, confidence, reason)
|
|
60
63
|
"""
|
|
61
64
|
name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
|
|
62
65
|
class_method_index: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
@@ -65,7 +68,6 @@ def resolve_calls(
|
|
|
65
68
|
name_arity_to_method_ids[key].append(method_id)
|
|
66
69
|
class_method_index[meta["class_fqcn"]][key].append(method_id)
|
|
67
70
|
|
|
68
|
-
edges: list[tuple[str, str, float, str]] = []
|
|
69
71
|
for source_id, call_sites in calls.items():
|
|
70
72
|
src_meta = method_catalog.get(source_id, {})
|
|
71
73
|
src_ctx = method_context.get(source_id, {})
|
|
@@ -126,12 +128,13 @@ def resolve_calls(
|
|
|
126
128
|
confidence = 1.0
|
|
127
129
|
reason = "exact_name_arity_unique"
|
|
128
130
|
elif len(targets) > 1:
|
|
131
|
+
if len(targets) > MAX_FUZZY_TARGETS:
|
|
132
|
+
# Avoid exploding low-confidence edges in large repos.
|
|
133
|
+
continue
|
|
129
134
|
confidence = 0.5
|
|
130
135
|
reason = "fuzzy_name_arity_ambiguous"
|
|
131
136
|
|
|
132
137
|
if not targets:
|
|
133
138
|
continue
|
|
134
|
-
for target_id in targets:
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return edges
|
|
139
|
+
for target_id in set(targets):
|
|
140
|
+
yield source_id, target_id, confidence, reason
|
|
@@ -177,6 +177,8 @@ class JavaIndexer:
|
|
|
177
177
|
for src, dst, confidence, reason in resolve_calls(method_catalog, method_calls, method_context, class_catalog):
|
|
178
178
|
self.store.add_call(src, dst, confidence, reason)
|
|
179
179
|
calls_resolved += 1
|
|
180
|
+
if calls_resolved % 2000 == 0:
|
|
181
|
+
self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
|
|
180
182
|
self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
|
|
181
183
|
|
|
182
184
|
self._emit(progress, "resolve_types_start")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|