codespine 0.1.4__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {codespine-0.1.4 → codespine-0.1.7}/PKG-INFO +1 -1
  2. {codespine-0.1.4 → codespine-0.1.7}/codespine/__init__.py +1 -1
  3. {codespine-0.1.4 → codespine-0.1.7}/codespine/cli.py +97 -26
  4. {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/call_resolver.py +10 -7
  5. {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/engine.py +40 -6
  6. {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/PKG-INFO +1 -1
  7. {codespine-0.1.4 → codespine-0.1.7}/pyproject.toml +1 -1
  8. {codespine-0.1.4 → codespine-0.1.7}/LICENSE +0 -0
  9. {codespine-0.1.4 → codespine-0.1.7}/README.md +0 -0
  10. {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/__init__.py +0 -0
  11. {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/community.py +0 -0
  12. {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/context.py +0 -0
  13. {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/coupling.py +0 -0
  14. {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/deadcode.py +0 -0
  15. {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/flow.py +0 -0
  16. {codespine-0.1.4 → codespine-0.1.7}/codespine/analysis/impact.py +0 -0
  17. {codespine-0.1.4 → codespine-0.1.7}/codespine/config.py +0 -0
  18. {codespine-0.1.4 → codespine-0.1.7}/codespine/db/__init__.py +0 -0
  19. {codespine-0.1.4 → codespine-0.1.7}/codespine/db/schema.py +0 -0
  20. {codespine-0.1.4 → codespine-0.1.7}/codespine/db/store.py +0 -0
  21. {codespine-0.1.4 → codespine-0.1.7}/codespine/diff/__init__.py +0 -0
  22. {codespine-0.1.4 → codespine-0.1.7}/codespine/diff/branch_diff.py +0 -0
  23. {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/__init__.py +0 -0
  24. {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/java_parser.py +0 -0
  25. {codespine-0.1.4 → codespine-0.1.7}/codespine/indexer/symbol_builder.py +0 -0
  26. {codespine-0.1.4 → codespine-0.1.7}/codespine/mcp/__init__.py +0 -0
  27. {codespine-0.1.4 → codespine-0.1.7}/codespine/mcp/server.py +0 -0
  28. {codespine-0.1.4 → codespine-0.1.7}/codespine/noise/__init__.py +0 -0
  29. {codespine-0.1.4 → codespine-0.1.7}/codespine/noise/blocklist.py +0 -0
  30. {codespine-0.1.4 → codespine-0.1.7}/codespine/search/__init__.py +0 -0
  31. {codespine-0.1.4 → codespine-0.1.7}/codespine/search/bm25.py +0 -0
  32. {codespine-0.1.4 → codespine-0.1.7}/codespine/search/fuzzy.py +0 -0
  33. {codespine-0.1.4 → codespine-0.1.7}/codespine/search/hybrid.py +0 -0
  34. {codespine-0.1.4 → codespine-0.1.7}/codespine/search/rrf.py +0 -0
  35. {codespine-0.1.4 → codespine-0.1.7}/codespine/search/vector.py +0 -0
  36. {codespine-0.1.4 → codespine-0.1.7}/codespine/watch/__init__.py +0 -0
  37. {codespine-0.1.4 → codespine-0.1.7}/codespine/watch/watcher.py +0 -0
  38. {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/SOURCES.txt +0 -0
  39. {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/dependency_links.txt +0 -0
  40. {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/entry_points.txt +0 -0
  41. {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/requires.txt +0 -0
  42. {codespine-0.1.4 → codespine-0.1.7}/codespine.egg-info/top_level.txt +0 -0
  43. {codespine-0.1.4 → codespine-0.1.7}/gindex.py +0 -0
  44. {codespine-0.1.4 → codespine-0.1.7}/setup.cfg +0 -0
  45. {codespine-0.1.4 → codespine-0.1.7}/tests/test_branch_diff_normalize.py +0 -0
  46. {codespine-0.1.4 → codespine-0.1.7}/tests/test_call_resolver.py +0 -0
  47. {codespine-0.1.4 → codespine-0.1.7}/tests/test_index_and_hybrid.py +0 -0
  48. {codespine-0.1.4 → codespine-0.1.7}/tests/test_java_parser.py +0 -0
  49. {codespine-0.1.4 → codespine-0.1.7}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.4
3
+ Version: 0.1.7
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.1.0"
4
+ __version__ = "0.1.7"
@@ -78,7 +78,8 @@ def main() -> None:
78
78
  @main.command()
79
79
  @click.argument("path", type=click.Path(exists=True))
80
80
  @click.option("--full/--incremental", default=True, show_default=True)
81
- def analyse(path: str, full: bool) -> None:
81
+ @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
82
+ def analyse(path: str, full: bool, deep: bool) -> None:
82
83
  """Index a local Java project."""
83
84
  if _is_running():
84
85
  click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
@@ -88,31 +89,101 @@ def analyse(path: str, full: bool) -> None:
88
89
  abs_path = os.path.abspath(path)
89
90
  store = GraphStore(read_only=False)
90
91
  indexer = JavaIndexer(store)
91
-
92
- result = indexer.index_project(abs_path, full=full)
93
- _phase("Walking files...", f"{result.files_found} files found")
94
- _phase("Parsing code...", f"{result.files_indexed}/{result.files_found}")
95
- _phase("Tracing calls...", f"{result.calls_resolved} calls resolved")
96
- _phase("Analyzing types...", f"{result.type_relationships} type relationships")
97
-
98
- communities = detect_communities(store)
99
- _phase("Detecting communities...", f"{len(communities)} clusters found")
100
-
101
- flows = trace_execution_flows(store)
102
- _phase("Detecting execution flows...", f"{len(flows)} processes found")
103
-
104
- dead = detect_dead_code(store, limit=500)
105
- _phase("Finding dead code...", f"{len(dead)} unreachable symbols")
106
-
107
- coupling_pairs = compute_coupling(
108
- store,
109
- abs_path,
110
- result.project_id,
111
- months=SETTINGS.default_coupling_months,
112
- min_strength=SETTINGS.default_min_coupling_strength,
113
- min_cochanges=SETTINGS.default_min_cochanges,
114
- )
115
- _phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
92
+ parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
93
+ call_state = {"shown": False, "count": 0, "last_ts": 0.0}
94
+
95
+ def _progress(event: str, payload: dict) -> None:
96
+ now = time.perf_counter()
97
+ if event == "scan_done":
98
+ _phase("Walking files...", f"{int(payload.get('files_found', 0))} files found")
99
+ return
100
+ if event == "plan_done":
101
+ to_index = int(payload.get("files_to_index", 0))
102
+ deleted = int(payload.get("deleted_files", 0))
103
+ mode = str(payload.get("mode", "incremental"))
104
+ parse_state["total"] = to_index
105
+ _phase("Index mode...", f"{mode} ({to_index} files to index, {deleted} deleted)")
106
+ if to_index == 0:
107
+ _phase("Parsing code...", "0/0")
108
+ return
109
+ if event == "parse_progress":
110
+ indexed = int(payload.get("indexed", 0))
111
+ total = int(payload.get("total", 0))
112
+ parse_state["indexed"] = indexed
113
+ parse_state["total"] = total
114
+ if total == 0:
115
+ return
116
+ if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
117
+ click.echo(f"\rParsing code... {indexed}/{total}", nl=False)
118
+ parse_state["shown"] = True
119
+ parse_state["last_ts"] = now
120
+ return
121
+ if event == "resolve_calls_start" and parse_state["shown"]:
122
+ click.echo()
123
+ parse_state["shown"] = False
124
+ _phase("Tracing calls...", "running")
125
+ return
126
+ if event == "resolve_calls_start":
127
+ _phase("Tracing calls...", "running")
128
+ return
129
+ if event == "resolve_calls_progress":
130
+ call_state["count"] = int(payload.get("calls_resolved", 0))
131
+ if (now - call_state["last_ts"]) >= 0.25:
132
+ click.echo(f"\rTracing calls... {call_state['count']} resolved", nl=False)
133
+ call_state["shown"] = True
134
+ call_state["last_ts"] = now
135
+ return
136
+ if event == "resolve_calls_done":
137
+ if call_state["shown"]:
138
+ click.echo()
139
+ call_state["shown"] = False
140
+ _phase("Tracing calls...", f"{int(payload.get('calls_resolved', 0))} calls resolved")
141
+ return
142
+ if event == "resolve_types_start":
143
+ _phase("Analyzing types...", "running")
144
+ return
145
+ if event == "resolve_types_done":
146
+ _phase("Analyzing types...", f"{int(payload.get('type_relationships', 0))} type relationships")
147
+ return
148
+
149
+ result = indexer.index_project(abs_path, full=full, progress=_progress)
150
+ if parse_state["shown"]:
151
+ click.echo()
152
+ if parse_state["total"] == 0:
153
+ _phase("Parsing code...", "0/0")
154
+ elif parse_state["indexed"] < parse_state["total"]:
155
+ _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
156
+
157
+ communities: list[dict] = []
158
+ flows: list[dict] = []
159
+ dead: list[dict] = []
160
+ coupling_pairs: list[dict] = []
161
+
162
+ should_run_deep = deep or result.files_found <= 1200
163
+ if should_run_deep:
164
+ communities = detect_communities(store)
165
+ _phase("Detecting communities...", f"{len(communities)} clusters found")
166
+
167
+ flows = trace_execution_flows(store)
168
+ _phase("Detecting execution flows...", f"{len(flows)} processes found")
169
+
170
+ dead = detect_dead_code(store, limit=500)
171
+ _phase("Finding dead code...", f"{len(dead)} unreachable symbols")
172
+
173
+ coupling_pairs = compute_coupling(
174
+ store,
175
+ abs_path,
176
+ result.project_id,
177
+ months=SETTINGS.default_coupling_months,
178
+ min_strength=SETTINGS.default_min_coupling_strength,
179
+ min_cochanges=SETTINGS.default_min_cochanges,
180
+ )
181
+ _phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
182
+ else:
183
+ _phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
184
+ _phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
185
+ _phase("Finding dead code...", "skipped (large repo; rerun with --deep)")
186
+ _phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
116
187
 
117
188
  vector_count = store.query_records(
118
189
  """
@@ -1,9 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections import defaultdict
4
+ from typing import Iterator
4
5
 
5
6
  from codespine.noise.blocklist import NOISE_METHOD_NAMES
6
7
 
8
+ MAX_FUZZY_TARGETS = 12
9
+
7
10
 
8
11
  def _simple_type_name(type_name: str | None) -> str:
9
12
  if not type_name:
@@ -53,10 +56,10 @@ def resolve_calls(
53
56
  calls: dict[str, list],
54
57
  method_context: dict[str, dict],
55
58
  class_catalog: dict[str, list[str]],
56
- ) -> list[tuple[str, str, float, str]]:
59
+ ) -> Iterator[tuple[str, str, float, str]]:
57
60
  """Resolve call names to known method ids.
58
61
 
59
- Returns tuples: (source_method_id, target_method_id, confidence, reason)
62
+ Yields tuples: (source_method_id, target_method_id, confidence, reason)
60
63
  """
61
64
  name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
62
65
  class_method_index: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
@@ -65,7 +68,6 @@ def resolve_calls(
65
68
  name_arity_to_method_ids[key].append(method_id)
66
69
  class_method_index[meta["class_fqcn"]][key].append(method_id)
67
70
 
68
- edges: list[tuple[str, str, float, str]] = []
69
71
  for source_id, call_sites in calls.items():
70
72
  src_meta = method_catalog.get(source_id, {})
71
73
  src_ctx = method_context.get(source_id, {})
@@ -126,12 +128,13 @@ def resolve_calls(
126
128
  confidence = 1.0
127
129
  reason = "exact_name_arity_unique"
128
130
  elif len(targets) > 1:
131
+ if len(targets) > MAX_FUZZY_TARGETS:
132
+ # Avoid exploding low-confidence edges in large repos.
133
+ continue
129
134
  confidence = 0.5
130
135
  reason = "fuzzy_name_arity_ambiguous"
131
136
 
132
137
  if not targets:
133
138
  continue
134
- for target_id in targets:
135
- edges.append((source_id, target_id, confidence, reason))
136
-
137
- return edges
139
+ for target_id in set(targets):
140
+ yield source_id, target_id, confidence, reason
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import os
4
4
  from dataclasses import dataclass
5
+ from typing import Callable
5
6
 
6
7
  from codespine.indexer.call_resolver import resolve_calls
7
8
  from codespine.indexer.java_parser import parse_java_source
@@ -25,12 +26,18 @@ class JavaIndexer:
25
26
  def __init__(self, store):
26
27
  self.store = store
27
28
 
28
- def index_project(self, root_path: str, full: bool = True) -> IndexResult:
29
+ def index_project(
30
+ self,
31
+ root_path: str,
32
+ full: bool = True,
33
+ progress: Callable[[str, dict], None] | None = None,
34
+ ) -> IndexResult:
29
35
  root_path = os.path.abspath(root_path)
30
36
  project_id = os.path.basename(root_path)
31
37
  current_files = self._collect_java_files(root_path)
32
- current_hashes = self._hash_files(project_id, root_path, current_files)
38
+ self._emit(progress, "scan_done", files_found=len(current_files))
33
39
  db_files = self.store.project_file_hashes(project_id) if not full else {}
40
+ current_hashes = self._hash_files(project_id, root_path, current_files) if not full else {}
34
41
 
35
42
  if full:
36
43
  to_reindex = current_files
@@ -45,6 +52,13 @@ class JavaIndexer:
45
52
  old = db_files.get(fid, {}).get("hash")
46
53
  if old != digest:
47
54
  to_reindex.append(file_path)
55
+ self._emit(
56
+ progress,
57
+ "plan_done",
58
+ files_to_index=len(to_reindex),
59
+ deleted_files=len(deleted_file_ids),
60
+ mode="full" if full else "incremental",
61
+ )
48
62
 
49
63
  files_indexed = 0
50
64
  classes_indexed = 0
@@ -151,12 +165,25 @@ class JavaIndexer:
151
165
  }
152
166
  class_methods[cls.fqcn][method.signature] = m_id
153
167
  files_indexed += 1
168
+ self._emit(
169
+ progress,
170
+ "parse_progress",
171
+ indexed=files_indexed,
172
+ total=len(to_reindex),
173
+ file_path=file_path,
174
+ )
154
175
 
176
+ self._emit(progress, "resolve_calls_start")
155
177
  for src, dst, confidence, reason in resolve_calls(method_catalog, method_calls, method_context, class_catalog):
156
178
  self.store.add_call(src, dst, confidence, reason)
157
179
  calls_resolved += 1
180
+ if calls_resolved % 2000 == 0:
181
+ self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
182
+ self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
158
183
 
184
+ self._emit(progress, "resolve_types_start")
159
185
  type_relationships += self._build_inheritance_edges(class_meta, class_catalog, class_methods)
186
+ self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
160
187
 
161
188
  return IndexResult(
162
189
  project_id=project_id,
@@ -172,10 +199,11 @@ class JavaIndexer:
172
199
  @staticmethod
173
200
  def _collect_java_files(root_path: str) -> list[str]:
174
201
  out: list[str] = []
175
- for root, _, files in os.walk(root_path):
176
- if "src" not in root:
177
- continue
178
- if any(skip in root for skip in ["target", "build", "out", ".git"]):
202
+ skip_dirs = {".git", "target", "build", "out", ".idea", ".gradle", ".mvn", "node_modules"}
203
+ for root, dirs, files in os.walk(root_path, topdown=True):
204
+ dirs[:] = [d for d in dirs if d not in skip_dirs]
205
+ normalized = root.replace("\\", "/")
206
+ if "/src/" not in normalized and not normalized.endswith("/src"):
179
207
  continue
180
208
  for filename in files:
181
209
  if filename.endswith(".java"):
@@ -303,3 +331,9 @@ class JavaIndexer:
303
331
  self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
304
332
  rel_count += 1
305
333
  return rel_count
334
+
335
+ @staticmethod
336
+ def _emit(progress: Callable[[str, dict], None] | None, event: str, **payload: object) -> None:
337
+ if progress is None:
338
+ return
339
+ progress(event, payload)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.4
3
+ Version: 0.1.7
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.1.4"
7
+ version = "0.1.7"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes