codespine 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {codespine-0.4.1 → codespine-0.4.2}/PKG-INFO +1 -1
  2. {codespine-0.4.1 → codespine-0.4.2}/codespine/__init__.py +1 -1
  3. {codespine-0.4.1 → codespine-0.4.2}/codespine/analysis/flow.py +22 -0
  4. {codespine-0.4.1 → codespine-0.4.2}/codespine/analysis/impact.py +58 -1
  5. {codespine-0.4.1 → codespine-0.4.2}/codespine/mcp/server.py +5 -2
  6. {codespine-0.4.1 → codespine-0.4.2}/codespine/search/hybrid.py +17 -4
  7. {codespine-0.4.1 → codespine-0.4.2}/codespine.egg-info/PKG-INFO +1 -1
  8. {codespine-0.4.1 → codespine-0.4.2}/pyproject.toml +1 -1
  9. {codespine-0.4.1 → codespine-0.4.2}/LICENSE +0 -0
  10. {codespine-0.4.1 → codespine-0.4.2}/README.md +0 -0
  11. {codespine-0.4.1 → codespine-0.4.2}/codespine/analysis/__init__.py +0 -0
  12. {codespine-0.4.1 → codespine-0.4.2}/codespine/analysis/community.py +0 -0
  13. {codespine-0.4.1 → codespine-0.4.2}/codespine/analysis/context.py +0 -0
  14. {codespine-0.4.1 → codespine-0.4.2}/codespine/analysis/coupling.py +0 -0
  15. {codespine-0.4.1 → codespine-0.4.2}/codespine/analysis/deadcode.py +0 -0
  16. {codespine-0.4.1 → codespine-0.4.2}/codespine/cli.py +0 -0
  17. {codespine-0.4.1 → codespine-0.4.2}/codespine/config.py +0 -0
  18. {codespine-0.4.1 → codespine-0.4.2}/codespine/db/__init__.py +0 -0
  19. {codespine-0.4.1 → codespine-0.4.2}/codespine/db/schema.py +0 -0
  20. {codespine-0.4.1 → codespine-0.4.2}/codespine/db/store.py +0 -0
  21. {codespine-0.4.1 → codespine-0.4.2}/codespine/diff/__init__.py +0 -0
  22. {codespine-0.4.1 → codespine-0.4.2}/codespine/diff/branch_diff.py +0 -0
  23. {codespine-0.4.1 → codespine-0.4.2}/codespine/indexer/__init__.py +0 -0
  24. {codespine-0.4.1 → codespine-0.4.2}/codespine/indexer/call_resolver.py +0 -0
  25. {codespine-0.4.1 → codespine-0.4.2}/codespine/indexer/engine.py +0 -0
  26. {codespine-0.4.1 → codespine-0.4.2}/codespine/indexer/java_parser.py +0 -0
  27. {codespine-0.4.1 → codespine-0.4.2}/codespine/indexer/symbol_builder.py +0 -0
  28. {codespine-0.4.1 → codespine-0.4.2}/codespine/mcp/__init__.py +0 -0
  29. {codespine-0.4.1 → codespine-0.4.2}/codespine/noise/__init__.py +0 -0
  30. {codespine-0.4.1 → codespine-0.4.2}/codespine/noise/blocklist.py +0 -0
  31. {codespine-0.4.1 → codespine-0.4.2}/codespine/search/__init__.py +0 -0
  32. {codespine-0.4.1 → codespine-0.4.2}/codespine/search/bm25.py +0 -0
  33. {codespine-0.4.1 → codespine-0.4.2}/codespine/search/fuzzy.py +0 -0
  34. {codespine-0.4.1 → codespine-0.4.2}/codespine/search/rrf.py +0 -0
  35. {codespine-0.4.1 → codespine-0.4.2}/codespine/search/vector.py +0 -0
  36. {codespine-0.4.1 → codespine-0.4.2}/codespine/watch/__init__.py +0 -0
  37. {codespine-0.4.1 → codespine-0.4.2}/codespine/watch/watcher.py +0 -0
  38. {codespine-0.4.1 → codespine-0.4.2}/codespine.egg-info/SOURCES.txt +0 -0
  39. {codespine-0.4.1 → codespine-0.4.2}/codespine.egg-info/dependency_links.txt +0 -0
  40. {codespine-0.4.1 → codespine-0.4.2}/codespine.egg-info/entry_points.txt +0 -0
  41. {codespine-0.4.1 → codespine-0.4.2}/codespine.egg-info/requires.txt +0 -0
  42. {codespine-0.4.1 → codespine-0.4.2}/codespine.egg-info/top_level.txt +0 -0
  43. {codespine-0.4.1 → codespine-0.4.2}/gindex.py +0 -0
  44. {codespine-0.4.1 → codespine-0.4.2}/setup.cfg +0 -0
  45. {codespine-0.4.1 → codespine-0.4.2}/tests/test_branch_diff_normalize.py +0 -0
  46. {codespine-0.4.1 → codespine-0.4.2}/tests/test_call_resolver.py +0 -0
  47. {codespine-0.4.1 → codespine-0.4.2}/tests/test_index_and_hybrid.py +0 -0
  48. {codespine-0.4.1 → codespine-0.4.2}/tests/test_java_parser.py +0 -0
  49. {codespine-0.4.1 → codespine-0.4.2}/tests/test_multimodule_index.py +0 -0
  50. {codespine-0.4.1 → codespine-0.4.2}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.4.1"
4
+ __version__ = "0.4.2"
@@ -2,6 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  from collections import defaultdict, deque
4
4
 
5
+ from codespine.analysis.impact import _resolve_method_metadata
6
+
5
7
 
6
8
  def _entry_methods(store, project: str | None = None) -> list[str]:
7
9
  if project:
@@ -108,4 +110,24 @@ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int
108
110
  }
109
111
  )
110
112
 
113
+ # ------------------------------------------------------------------ #
114
+ # Enrich every node with human-readable metadata so AI agents don't
115
+ # need a second round-trip to resolve raw method ID hashes.
116
+ # Collect all unique IDs across all flows, resolve in one bulk query.
117
+ # ------------------------------------------------------------------ #
118
+ all_ids = list({node["symbol"] for flow in flows for node in flow["nodes"]})
119
+ meta = _resolve_method_metadata(store, all_ids)
120
+
121
+ for flow in flows:
122
+ entry_m = meta.get(flow["entry"], {})
123
+ flow["entry_name"] = entry_m.get("name")
124
+ flow["entry_fqname"] = entry_m.get("fqname")
125
+ flow["entry_file_path"] = entry_m.get("file_path")
126
+ for node in flow["nodes"]:
127
+ m = meta.get(node["symbol"], {})
128
+ node["name"] = m.get("name")
129
+ node["fqname"] = m.get("fqname")
130
+ node["file_path"] = m.get("file_path")
131
+ node["project_id"] = m.get("project_id")
132
+
111
133
  return flows
@@ -21,6 +21,27 @@ def _resolve_symbol_ids(store, symbol_query: str, project: str | None = None) ->
21
21
  return [r["id"] for r in recs]
22
22
 
23
23
 
24
+ def _resolve_method_metadata(store, method_ids: list[str]) -> dict[str, dict]:
25
+ """Bulk-resolve method IDs to human-readable metadata in a single query.
26
+
27
+ Returns a dict keyed by method ID with fields:
28
+ name, fqname (= m.signature), class_fqcn, file_path, project_id.
29
+ Any ID not found in the graph is silently omitted.
30
+ """
31
+ if not method_ids:
32
+ return {}
33
+ recs = store.query_records(
34
+ """
35
+ MATCH (m:Method), (c:Class), (f:File)
36
+ WHERE m.id IN $ids AND m.class_id = c.id AND c.file_id = f.id
37
+ RETURN m.id as id, m.name as name, m.signature as fqname,
38
+ c.fqcn as class_fqcn, f.path as file_path, f.project_id as project_id
39
+ """,
40
+ {"ids": method_ids},
41
+ )
42
+ return {r["id"]: r for r in recs}
43
+
44
+
24
45
  def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str | None = None) -> dict:
25
46
  target_symbol_ids = _resolve_symbol_ids(store, symbol_query, project=project)
26
47
  if not target_symbol_ids:
@@ -85,9 +106,45 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
85
106
  depth_groups["3+"].append(item)
86
107
  queue.append((src, next_depth, path + [src]))
87
108
 
109
+ # ------------------------------------------------------------------ #
110
+ # Enrich every caller entry with human-readable metadata so AI agents
111
+ # don't need a second round-trip to resolve raw ID hashes.
112
+ # A single bulk query resolves all collected method IDs at once.
113
+ # ------------------------------------------------------------------ #
114
+ all_caller_ids = [item["symbol"] for items in depth_groups.values() for item in items]
115
+ meta = _resolve_method_metadata(store, all_caller_ids)
116
+
117
+ for items in depth_groups.values():
118
+ for item in items:
119
+ m = meta.get(item["symbol"], {})
120
+ item["name"] = m.get("name")
121
+ item["fqname"] = m.get("fqname")
122
+ item["file_path"] = m.get("file_path")
123
+ item["project_id"] = m.get("project_id")
124
+ item["class_fqcn"] = m.get("class_fqcn")
125
+ # Convert the call-path from a list of raw IDs to human-readable names
126
+ # so an agent can read the chain without additional lookups.
127
+ item["path"] = [
128
+ meta.get(pid, {}).get("name") or pid
129
+ for pid in item["path"]
130
+ ]
131
+
132
+ # Also enrich the targets_resolved list for context
133
+ target_meta = _resolve_method_metadata(store, target_method_ids)
134
+ resolved_targets = [
135
+ {
136
+ "id": mid,
137
+ "name": target_meta.get(mid, {}).get("name"),
138
+ "fqname": target_meta.get(mid, {}).get("fqname"),
139
+ "file_path": target_meta.get(mid, {}).get("file_path"),
140
+ "class_fqcn": target_meta.get(mid, {}).get("class_fqcn"),
141
+ }
142
+ for mid in target_method_ids
143
+ ]
144
+
88
145
  return {
89
146
  "target": symbol_query,
90
- "targets_resolved": target_method_ids,
147
+ "targets_resolved": resolved_targets,
91
148
  "depth_groups": depth_groups,
92
149
  "summary": {
93
150
  "direct": len(depth_groups["1"]),
@@ -273,7 +273,10 @@ def build_mcp_server(store, repo_path_provider):
273
273
  @mcp.tool()
274
274
  def get_symbol_community(symbol: str):
275
275
  """Return the architectural community cluster a symbol belongs to."""
276
- detect_communities(store)
276
+ # NOTE: do NOT call detect_communities() here — the MCP server opens the
277
+ # graph DB read-only, so any write attempt raises "Cannot execute write
278
+ # operations in a read-only database!". Communities are computed once
279
+ # during 'codespine analyse --deep' and persisted; we just read them.
277
280
  result = symbol_community(store, symbol)
278
281
  if not result.get("matches"):
279
282
  return {"available": False, "note": "No community data yet. Run 'codespine analyse --deep'."}
@@ -507,7 +510,7 @@ def build_mcp_server(store, repo_path_provider):
507
510
  MATCH (c:Class), (f:File)
508
511
  WHERE c.file_id = f.id {project_clause}
509
512
  RETURN c.package as package, f.project_id as project_id, count(c) as class_count
510
- ORDER BY f.project_id, c.package
513
+ ORDER BY project_id, package
511
514
  LIMIT $lim
512
515
  """,
513
516
  params,
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from codespine.search.bm25 import rank_bm25
4
4
  from codespine.search.fuzzy import rank_fuzzy
5
5
  from codespine.search.rrf import reciprocal_rank_fusion
6
- from codespine.search.vector import rank_semantic
6
+ from codespine.search.vector import _load_model, rank_semantic
7
7
 
8
8
  _LOW_CONFIDENCE_THRESHOLD = 0.05
9
9
 
@@ -95,14 +95,27 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
95
95
  item["context"] = ctx
96
96
 
97
97
  # Warn when all scores are near zero — the results are likely noise.
98
+ # The threshold 0.05 is calibrated for embedding mode. Without sentence-
99
+ # transformers the hash-fallback vector and BM25/fuzzy signals produce lower
100
+ # RRF scores, so the warning fires on nearly every query. Make the note
101
+ # context-aware so the agent understands whether this is a calibration issue
102
+ # or a genuine low-relevance result.
98
103
  if top_k and top_k[0]["score"] < _LOW_CONFIDENCE_THRESHOLD:
104
+ has_model = _load_model() is not None
99
105
  for item in top_k:
100
106
  item["low_confidence"] = True
101
- top_k.append({
102
- "note": (
107
+ if has_model:
108
+ note = (
103
109
  "Low confidence results — all scores below threshold. "
104
110
  "If searching for an exact class or method name, use find_symbol instead."
105
111
  )
106
- })
112
+ else:
113
+ note = (
114
+ "Low confidence results — scores are lower in BM25/fuzzy-only mode "
115
+ "(no embedding model detected). "
116
+ "This is expected without 'codespine[ml]' installed; results may still be correct. "
117
+ "For exact name matches, use find_symbol instead."
118
+ )
119
+ top_k.append({"note": note})
107
120
 
108
121
  return top_k
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.4.1"
7
+ version = "0.4.2"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes