coderay 1.0.2__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {coderay-1.0.2/src/coderay.egg-info → coderay-1.0.3}/PKG-INFO +18 -18
  2. {coderay-1.0.2 → coderay-1.0.3}/pyproject.toml +18 -18
  3. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/chunking/chunker.py +1 -3
  4. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/cli/commands.py +1 -19
  5. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/core/models.py +0 -1
  6. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/graph/code_graph.py +4 -1
  7. coderay-1.0.3/src/coderay/mcp_server/errors.py +11 -0
  8. coderay-1.0.3/src/coderay/mcp_server/server.py +225 -0
  9. coderay-1.0.3/src/coderay/retrieval/models.py +63 -0
  10. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/retrieval/search.py +0 -2
  11. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/skeleton/extractor.py +4 -12
  12. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/storage/lancedb.py +1 -7
  13. {coderay-1.0.2 → coderay-1.0.3/src/coderay.egg-info}/PKG-INFO +18 -18
  14. {coderay-1.0.2 → coderay-1.0.3}/src/coderay.egg-info/SOURCES.txt +2 -0
  15. coderay-1.0.3/src/coderay.egg-info/requires.txt +34 -0
  16. coderay-1.0.2/src/coderay/mcp_server/server.py +0 -178
  17. coderay-1.0.2/src/coderay.egg-info/requires.txt +0 -34
  18. {coderay-1.0.2 → coderay-1.0.3}/LICENSE +0 -0
  19. {coderay-1.0.2 → coderay-1.0.3}/README.md +0 -0
  20. {coderay-1.0.2 → coderay-1.0.3}/setup.cfg +0 -0
  21. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/__init__.py +0 -0
  22. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/chunking/__init__.py +0 -0
  23. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/chunking/registry.py +0 -0
  24. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/cli/__init__.py +0 -0
  25. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/core/__init__.py +0 -0
  26. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/core/config.py +0 -0
  27. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/core/lock.py +0 -0
  28. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/core/timing.py +0 -0
  29. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/core/utils.py +0 -0
  30. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/embedding/__init__.py +0 -0
  31. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/embedding/base.py +0 -0
  32. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/embedding/local.py +0 -0
  33. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/embedding/openai.py +0 -0
  34. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/graph/__init__.py +0 -0
  35. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/graph/builder.py +0 -0
  36. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/graph/extractor.py +0 -0
  37. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/mcp_server/__init__.py +0 -0
  38. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/pipeline/__init__.py +0 -0
  39. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/pipeline/indexer.py +0 -0
  40. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/pipeline/watcher.py +0 -0
  41. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/retrieval/__init__.py +0 -0
  42. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/retrieval/boosting.py +0 -0
  43. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/skeleton/__init__.py +0 -0
  44. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/state/__init__.py +0 -0
  45. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/state/machine.py +0 -0
  46. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/state/version.py +0 -0
  47. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/storage/__init__.py +0 -0
  48. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/vcs/__init__.py +0 -0
  49. {coderay-1.0.2 → coderay-1.0.3}/src/coderay/vcs/git.py +0 -0
  50. {coderay-1.0.2 → coderay-1.0.3}/src/coderay.egg-info/dependency_links.txt +0 -0
  51. {coderay-1.0.2 → coderay-1.0.3}/src/coderay.egg-info/entry_points.txt +0 -0
  52. {coderay-1.0.2 → coderay-1.0.3}/src/coderay.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderay
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
5
5
  Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
6
6
  License-Expression: MIT
@@ -20,31 +20,31 @@ Classifier: Topic :: Text Processing :: Indexing
20
20
  Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: python-dotenv>=1.0.0
24
- Requires-Dist: networkx>=3.0
25
- Requires-Dist: tree-sitter>=0.24.0
23
+ Requires-Dist: python-dotenv>=1.2.0
24
+ Requires-Dist: networkx>=3.4
25
+ Requires-Dist: tree-sitter>=0.25.0
26
26
  Requires-Dist: tree-sitter-python>=0.25.0
27
- Requires-Dist: lancedb>=0.5.0
28
- Requires-Dist: pyyaml>=6.0
29
- Requires-Dist: click>=8.0
30
- Requires-Dist: filelock>=3.0
31
- Requires-Dist: fastembed>=0.4.0
32
- Requires-Dist: mcp>=1.0.0
33
- Requires-Dist: watchdog>=4.0.0
34
- Requires-Dist: pathspec>=0.12.0
27
+ Requires-Dist: lancedb>=0.29.0
28
+ Requires-Dist: pyyaml>=6.0.3
29
+ Requires-Dist: click>=8.3.0
30
+ Requires-Dist: filelock>=3.25.0
31
+ Requires-Dist: fastembed>=0.7.0
32
+ Requires-Dist: fastmcp==3.1.0
33
+ Requires-Dist: watchdog>=6.0.0
34
+ Requires-Dist: pathspec>=1.0.0
35
35
  Provides-Extra: openai
36
- Requires-Dist: openai>=1.0.0; extra == "openai"
36
+ Requires-Dist: openai>=2.0.0; extra == "openai"
37
37
  Provides-Extra: languages
38
- Requires-Dist: tree-sitter-javascript>=0.23.0; extra == "languages"
39
- Requires-Dist: tree-sitter-typescript>=0.23.0; extra == "languages"
40
- Requires-Dist: tree-sitter-go>=0.23.0; extra == "languages"
38
+ Requires-Dist: tree-sitter-javascript>=0.25.0; extra == "languages"
39
+ Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "languages"
40
+ Requires-Dist: tree-sitter-go>=0.25.0; extra == "languages"
41
41
  Provides-Extra: dev
42
42
  Requires-Dist: pytest>=7.0; extra == "dev"
43
43
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
44
44
  Requires-Dist: ruff>=0.8.0; extra == "dev"
45
45
  Requires-Dist: mypy>=1.0.0; extra == "dev"
46
- Requires-Dist: openai>=1.0.0; extra == "dev"
47
- Requires-Dist: httpx>=0.27.0; extra == "dev"
46
+ Requires-Dist: openai>=2.0.0; extra == "dev"
47
+ Requires-Dist: httpx>=0.28.0; extra == "dev"
48
48
  Provides-Extra: maintain
49
49
  Requires-Dist: pylance>=0.15.0; extra == "maintain"
50
50
  Provides-Extra: all
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coderay"
7
- version = "1.0.2"
7
+ version = "1.0.3"
8
8
  description = "X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -25,18 +25,18 @@ classifiers = [
25
25
  "Topic :: Text Processing :: Indexing",
26
26
  ]
27
27
  dependencies = [
28
- "python-dotenv>=1.0.0",
29
- "networkx>=3.0",
30
- "tree-sitter>=0.24.0",
28
+ "python-dotenv>=1.2.0",
29
+ "networkx>=3.4",
30
+ "tree-sitter>=0.25.0",
31
31
  "tree-sitter-python>=0.25.0",
32
- "lancedb>=0.5.0",
33
- "pyyaml>=6.0",
34
- "click>=8.0",
35
- "filelock>=3.0",
36
- "fastembed>=0.4.0",
37
- "mcp>=1.0.0",
38
- "watchdog>=4.0.0",
39
- "pathspec>=0.12.0",
32
+ "lancedb>=0.29.0",
33
+ "pyyaml>=6.0.3",
34
+ "click>=8.3.0",
35
+ "filelock>=3.25.0",
36
+ "fastembed>=0.7.0",
37
+ "fastmcp==3.1.0",
38
+ "watchdog>=6.0.0",
39
+ "pathspec>=1.0.0",
40
40
  ]
41
41
 
42
42
  [project.urls]
@@ -45,19 +45,19 @@ Repository = "https://github.com/bogdan-copocean/coderay"
45
45
  Issues = "https://github.com/bogdan-copocean/coderay/issues"
46
46
 
47
47
  [project.optional-dependencies]
48
- openai = ["openai>=1.0.0"]
48
+ openai = ["openai>=2.0.0"]
49
49
  languages = [
50
- "tree-sitter-javascript>=0.23.0",
51
- "tree-sitter-typescript>=0.23.0",
52
- "tree-sitter-go>=0.23.0",
50
+ "tree-sitter-javascript>=0.25.0",
51
+ "tree-sitter-typescript>=0.23.2",
52
+ "tree-sitter-go>=0.25.0",
53
53
  ]
54
54
  dev = [
55
55
  "pytest>=7.0",
56
56
  "pytest-cov>=4.0",
57
57
  "ruff>=0.8.0",
58
58
  "mypy>=1.0.0",
59
- "openai>=1.0.0",
60
- "httpx>=0.27.0",
59
+ "openai>=2.0.0",
60
+ "httpx>=0.28.0",
61
61
  ]
62
62
  maintain = [
63
63
  "pylance>=0.15.0",
@@ -91,7 +91,6 @@ def _chunk_file_with_config(
91
91
  start_line=start_line,
92
92
  end_line=end_line,
93
93
  symbol=symbol,
94
- language=lang_cfg.name,
95
94
  content=text,
96
95
  )
97
96
  )
@@ -107,7 +106,6 @@ def _chunk_file_with_config(
107
106
  start_line=1,
108
107
  end_line=root.end_point[0] + 1,
109
108
  symbol="<module>",
110
- language=lang_cfg.name,
111
109
  content="\n".join(preamble_lines),
112
110
  ),
113
111
  )
@@ -118,7 +116,7 @@ def _chunk_file_with_config(
118
116
  return chunks
119
117
 
120
118
 
121
- def chunk_file(path: str | Path, content: str, language: str = "python") -> list[Chunk]:
119
+ def chunk_file(path: str | Path, content: str) -> list[Chunk]:
122
120
  """Chunk a source file into semantic units (functions, classes, preamble)."""
123
121
  path_str = str(path) if isinstance(path, Path) else path
124
122
  if not (lang_cfg := get_language_for_file(path_str)):
@@ -132,14 +132,12 @@ def update(ctx: click.Context, repo: Path) -> None:
132
132
  @click.argument("query_text", required=True)
133
133
  @click.option("--top-k", "top_k", default=10, help="Number of results")
134
134
  @click.option("--path-prefix", help="Filter by path prefix")
135
- @click.option("--language", help="Filter by language (e.g. python)")
136
135
  @click.pass_context
137
136
  def search_cmd(
138
137
  ctx: click.Context,
139
138
  query_text: str,
140
139
  top_k: int,
141
140
  path_prefix: str | None,
142
- language: str | None,
143
141
  ) -> None:
144
142
  """Semantic search the index."""
145
143
  index_dir = ctx.obj["index_dir"]
@@ -162,7 +160,6 @@ def search_cmd(
162
160
  current_state=current_state,
163
161
  top_k=top_k,
164
162
  path_prefix=path_prefix,
165
- language=language,
166
163
  )
167
164
  elapsed = time.perf_counter() - t0
168
165
  click.echo(_color(f"Query took {elapsed:.2f}s", BOLD))
@@ -171,31 +168,16 @@ def search_cmd(
171
168
  click.echo(_color("No results.", YELLOW))
172
169
  return
173
170
 
174
- score_type = results[0].get("score_type", "cosine")
175
- if score_type == "rrf":
176
- click.echo(
177
- _color("Scoring: hybrid (RRF) — relative ranking, not a percentage", CYAN)
178
- )
179
- else:
180
- click.echo(_color("Scoring: cosine similarity (0-1)", CYAN))
181
-
182
171
  for i, r in enumerate(results, 1):
183
172
  path = r.get("path", "?")
184
173
  start = r.get("start_line", 0)
185
174
  end = r.get("end_line", 0)
186
175
  symbol = r.get("symbol", "?")
187
- score = r.get("score", 0)
188
- if score_type == "cosine":
189
- score_str = f"score={score:.4f} ({score:.0%})"
190
- else:
191
- score_str = f"score={score:.4f} (rrf)"
192
176
  preview = (r.get("content") or "")[:200].replace("\n", " ")
193
177
  if len(r.get("content") or "") > 200:
194
178
  preview += "..."
195
179
  click.echo("")
196
- click.echo(
197
- _color(f" {i}. {path}:{start}-{end} ({symbol}) {score_str}", GREEN)
198
- )
180
+ click.echo(_color(f" {i}. {path}:{start}-{end} ({symbol})", GREEN))
199
181
  click.echo(f" {preview}")
200
182
 
201
183
 
@@ -12,7 +12,6 @@ class Chunk:
12
12
  start_line: int
13
13
  end_line: int
14
14
  symbol: str
15
- language: str
16
15
  content: str
17
16
 
18
17
  def line_range(self) -> tuple[int, int]:
@@ -228,12 +228,15 @@ class CodeGraph:
228
228
  very large sets.
229
229
  """
230
230
  resolved = self.resolve_symbol(symbol) or symbol
231
+ if resolved not in self._g:
232
+ return []
231
233
  visited: set[str] = set()
232
234
  frontier = {resolved}
233
235
  for _ in range(depth):
234
236
  next_frontier: set[str] = set()
235
237
  for nid in frontier:
236
- # predecessors = nodes that have an edge pointing TO nid
238
+ if nid not in self._g:
239
+ continue
237
240
  for pred in self._g.predecessors(nid):
238
241
  if pred not in visited:
239
242
  visited.add(pred)
@@ -0,0 +1,11 @@
1
+ _DEFAULT_MSG = (
2
+ "Index not built. Ask the user to run 'coderay build' in their terminal, "
3
+ "then retry."
4
+ )
5
+
6
+
7
+ class IndexNotBuiltError(Exception):
8
+ """Raised when a tool requires a coderay index that has not been built."""
9
+
10
+ def __init__(self, message: str = _DEFAULT_MSG) -> None:
11
+ super().__init__(message)
@@ -0,0 +1,225 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Annotated, Any
6
+
7
+ from fastmcp import FastMCP
8
+ from mcp.types import ToolAnnotations
9
+ from pydantic import Field
10
+
11
+ from coderay.mcp_server.errors import IndexNotBuiltError
12
+ from coderay.retrieval.models import SearchResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ mcp = FastMCP(
17
+ name="coderay",
18
+ instructions=(
19
+ "CodeRay provides semantic code search, file skeletons, and "
20
+ "dependency impact analysis over a pre-built index.\n"
21
+ "\n"
22
+ "- semantic_search: search code by meaning. Best for "
23
+ "'how/where' questions. Use grep for exact symbol lookup.\n"
24
+ "- get_file_skeleton: signatures and docstrings only, no bodies. "
25
+ "Useful to check a file's API before reading full source. "
26
+ "Works without the index.\n"
27
+ "- get_impact_radius: reverse dependency traversal from the code "
28
+ "graph. Shows callers/dependents of a function or class.\n"
29
+ "\n"
30
+ "All tools except get_file_skeleton require a built index. "
31
+ "On index errors, ask the user to run 'coderay build'."
32
+ ),
33
+ )
34
+
35
+ DEFAULT_INDEX_DIR = ".index"
36
+
37
+ _retrieval_cache: dict[Path, Any] = {}
38
+ _state_machine_cache: dict[Path, Any] = {}
39
+
40
+
41
+ def _resolve_index_dir() -> Path:
42
+ """Resolve the index directory to an absolute path."""
43
+ return Path(DEFAULT_INDEX_DIR).resolve()
44
+
45
+
46
+ def _get_retrieval():
47
+ """Return a cached Retrieval instance for the given index directory."""
48
+ idx = _resolve_index_dir()
49
+ if idx not in _retrieval_cache:
50
+ from coderay.retrieval.search import Retrieval
51
+
52
+ _retrieval_cache[idx] = Retrieval(idx)
53
+ return _retrieval_cache[idx]
54
+
55
+
56
+ def _load_graph():
57
+ """Load the code graph from disk, or return None if absent."""
58
+ from coderay.graph.builder import load_graph
59
+
60
+ return load_graph(_resolve_index_dir())
61
+
62
+
63
+ def _get_state_machine():
64
+ """Return a cached StateMachine instance."""
65
+ idx = _resolve_index_dir()
66
+ if idx not in _state_machine_cache:
67
+ from coderay.state.machine import StateMachine
68
+
69
+ _state_machine_cache[idx] = StateMachine(idx)
70
+ return _state_machine_cache[idx]
71
+
72
+
73
+ def _load_state():
74
+ """Load the current IndexMeta state, or None if no run has completed."""
75
+ return _get_state_machine().current_state
76
+
77
+
78
+ READ_ONLY_ANNOTATIONS = ToolAnnotations(readOnlyHint=True, destructiveHint=False)
79
+
80
+
81
+ @mcp.tool(
82
+ description=(
83
+ "Search code by meaning. Returns chunks ranked by relevance, "
84
+ "each with path, line range, symbol, and content. "
85
+ "Best for 'how/where' questions; use grep for exact symbols."
86
+ ),
87
+ annotations=READ_ONLY_ANNOTATIONS,
88
+ tags={"search"},
89
+ )
90
+ def semantic_search(
91
+ query: Annotated[
92
+ str,
93
+ Field(description="Natural language question about the code"),
94
+ ],
95
+ top_k: int = 5,
96
+ path_prefix: Annotated[
97
+ str | None,
98
+ Field(
99
+ description=(
100
+ "Filter to files under this directory, e.g. 'src/coderay/graph/'"
101
+ ),
102
+ ),
103
+ ] = None,
104
+ ) -> dict:
105
+ """Search the semantic index."""
106
+ retrieval = _get_retrieval()
107
+ state = _load_state()
108
+ if state is None:
109
+ raise IndexNotBuiltError()
110
+
111
+ raw_results = retrieval.search(
112
+ query=query,
113
+ current_state=state,
114
+ top_k=top_k,
115
+ path_prefix=path_prefix,
116
+ )
117
+ results = [SearchResult.from_raw(r) for r in raw_results]
118
+ return {"results": [r.to_dict() for r in results]}
119
+
120
+
121
+ @mcp.tool(
122
+ description=(
123
+ "Extracts class/function signatures and docstrings from a "
124
+ "file — no bodies. Output is significantly shorter than "
125
+ "full source. Does not require the index."
126
+ ),
127
+ annotations=READ_ONLY_ANNOTATIONS,
128
+ tags={"analysis"},
129
+ )
130
+ def get_file_skeleton(
131
+ file_path: Annotated[
132
+ str,
133
+ Field(description="Absolute or relative path to the file"),
134
+ ],
135
+ ) -> str:
136
+ """Get the API surface of a file (signatures, no bodies)."""
137
+ from coderay.skeleton.extractor import extract_skeleton
138
+
139
+ p = Path(file_path)
140
+ if not p.is_file():
141
+ raise FileNotFoundError(f"File not found: {file_path}")
142
+ content = p.read_text(encoding="utf-8", errors="replace")
143
+ return extract_skeleton(p, content)
144
+
145
+
146
+ @mcp.tool(
147
+ description=(
148
+ "Reverse dependency traversal: lists callers and dependents "
149
+ "of a function or class from the code graph. Returns empty "
150
+ "results when node_id has no dependents. "
151
+ "Static analysis only; dynamic dispatch may be missed."
152
+ ),
153
+ annotations=READ_ONLY_ANNOTATIONS,
154
+ tags={"analysis"},
155
+ )
156
+ def get_impact_radius(
157
+ node_id: Annotated[
158
+ str,
159
+ Field(
160
+ description=(
161
+ "Fully qualified node ID, e.g. "
162
+ "'src/utils.py::parse_config' or "
163
+ "'src/models.py::User.save'"
164
+ ),
165
+ ),
166
+ ],
167
+ max_depth: Annotated[
168
+ int,
169
+ Field(description="How many caller/dependent levels to traverse"),
170
+ ] = 2,
171
+ ) -> dict:
172
+ """Analyze the blast radius of changing a function or module."""
173
+ graph = _load_graph()
174
+ if graph is None:
175
+ raise IndexNotBuiltError(
176
+ "No graph found. Ask the user to run 'coderay build' "
177
+ "in their terminal, then retry."
178
+ )
179
+ impact = graph.get_impact_radius(node_id, depth=max_depth)
180
+ return {
181
+ "results": [n.to_dict() for n in impact],
182
+ }
183
+
184
+
185
+ @mcp.resource(
186
+ "coderay://index/status",
187
+ description=("Index status: build state, branch, commit, and chunk count."),
188
+ tags={"status"},
189
+ )
190
+ def index_status() -> dict:
191
+ """Check health and status of the semantic index."""
192
+ state = _load_state()
193
+ if state is None:
194
+ raise IndexNotBuiltError()
195
+
196
+ from coderay.core.config import get_embedding_dimensions, load_config
197
+ from coderay.state.version import read_index_version
198
+ from coderay.storage.lancedb import index_exists as idx_exists
199
+
200
+ idx_dir = _resolve_index_dir()
201
+ has_store = idx_exists(idx_dir)
202
+ chunk_count = 0
203
+ if has_store:
204
+ from coderay.storage.lancedb import Store
205
+
206
+ config = load_config(idx_dir)
207
+ store = Store(idx_dir, dimensions=get_embedding_dimensions(config))
208
+ chunk_count = store.chunk_count()
209
+
210
+ return {
211
+ "status": state.state.value,
212
+ "branch": state.branch,
213
+ "last_commit": state.last_commit,
214
+ "chunk_count": chunk_count,
215
+ "schema_version": read_index_version(idx_dir),
216
+ "has_store": has_store,
217
+ }
218
+
219
+
220
+ def main():
221
+ mcp.run(transport="stdio")
222
+
223
+
224
+ if __name__ == "__main__":
225
+ main()
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ DEFAULT_MAX_CONTENT_LINES: int = 60
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class SearchResult:
10
+ """A single search hit with optional content truncation."""
11
+
12
+ path: str
13
+ start_line: int
14
+ end_line: int
15
+ symbol: str
16
+ content: str
17
+ truncated: bool = False
18
+
19
+ @classmethod
20
+ def from_raw(
21
+ cls,
22
+ row: dict,
23
+ *,
24
+ max_lines: int | None = DEFAULT_MAX_CONTENT_LINES,
25
+ ) -> SearchResult:
26
+ """Build from a raw dict returned by the storage layer.
27
+
28
+ Args:
29
+ row: Dict with keys path, start_line, end_line, symbol,
30
+ and content.
31
+ max_lines: Truncate content beyond this many lines.
32
+ None disables truncation.
33
+ """
34
+ content: str = row.get("content", "")
35
+ truncated = False
36
+
37
+ if max_lines is not None:
38
+ lines = content.split("\n")
39
+ if len(lines) > max_lines:
40
+ content = "\n".join(lines[:max_lines])
41
+ truncated = True
42
+
43
+ return cls(
44
+ path=row["path"],
45
+ start_line=row["start_line"],
46
+ end_line=row["end_line"],
47
+ symbol=row["symbol"],
48
+ content=content,
49
+ truncated=truncated,
50
+ )
51
+
52
+ def to_dict(self) -> dict:
53
+ """Serialize to a JSON-compatible dict for the MCP response."""
54
+ d: dict = {
55
+ "path": self.path,
56
+ "start_line": self.start_line,
57
+ "end_line": self.end_line,
58
+ "symbol": self.symbol,
59
+ "content": self.content,
60
+ }
61
+ if self.truncated:
62
+ d["truncated"] = True
63
+ return d
@@ -59,7 +59,6 @@ class Retrieval:
59
59
  *,
60
60
  top_k: int = 10,
61
61
  path_prefix: str | None = None,
62
- language: str | None = None,
63
62
  ) -> list[dict[str, Any]]:
64
63
  """Semantic search over the index."""
65
64
  if not index_exists(self.index_dir):
@@ -82,7 +81,6 @@ class Retrieval:
82
81
  query_embedding=query_vectors[0],
83
82
  top_k=top_k,
84
83
  path_prefix=path_prefix,
85
- language=language,
86
84
  query_text=query,
87
85
  )
88
86
  logger.info("Vector search took %.3fs", time.perf_counter() - t1)
@@ -11,17 +11,10 @@ logger = logging.getLogger(__name__)
11
11
  def extract_skeleton(
12
12
  path: str | Path,
13
13
  content: str,
14
- language: str | None = None,
15
14
  ) -> str:
16
15
  """Extract the skeleton of a source file (signatures, no bodies)."""
17
16
  path_str = str(path)
18
- lang_cfg = get_language_for_file(path_str) if language is None else None
19
- if lang_cfg is None and language:
20
- from coderay.chunking.registry import LANGUAGE_REGISTRY
21
-
22
- lang_cfg = LANGUAGE_REGISTRY.get(language)
23
- if lang_cfg is None:
24
- lang_cfg = get_language_for_file(path_str)
17
+ lang_cfg = get_language_for_file(path_str)
25
18
  if lang_cfg is None:
26
19
  return content
27
20
 
@@ -33,7 +26,7 @@ def extract_skeleton(
33
26
  source_bytes = content.encode("utf-8")
34
27
  tree = parser.parse(source_bytes)
35
28
  lines: list[str] = []
36
- _visit_skeleton(tree.root_node, source_bytes, lang_cfg.name, lines, depth=0)
29
+ _visit_skeleton(tree.root_node, source_bytes, lines, depth=0)
37
30
  return "\n".join(lines)
38
31
 
39
32
 
@@ -77,7 +70,6 @@ def _get_signature_line(node, source_bytes: bytes) -> str:
77
70
  def _visit_skeleton(
78
71
  node,
79
72
  source_bytes: bytes,
80
- language: str,
81
73
  lines: list[str],
82
74
  depth: int,
83
75
  ) -> None:
@@ -118,7 +110,7 @@ def _visit_skeleton(
118
110
  for child in node.children:
119
111
  if child.type in ("block", "class_body", "statement_block"):
120
112
  for member in child.children:
121
- _visit_skeleton(member, source_bytes, language, lines, depth + 1)
113
+ _visit_skeleton(member, source_bytes, lines, depth + 1)
122
114
  lines.append("")
123
115
  return
124
116
 
@@ -137,4 +129,4 @@ def _visit_skeleton(
137
129
  return
138
130
 
139
131
  for child in node.children:
140
- _visit_skeleton(child, source_bytes, language, lines, depth)
132
+ _visit_skeleton(child, source_bytes, lines, depth)
@@ -64,7 +64,6 @@ class Store:
64
64
  "start_line": chunk.start_line,
65
65
  "end_line": chunk.end_line,
66
66
  "symbol": chunk.symbol,
67
- "language": chunk.language,
68
67
  "content": chunk.content,
69
68
  "vector": emb,
70
69
  }
@@ -115,7 +114,6 @@ class Store:
115
114
  query_embedding: list[float],
116
115
  top_k: int = 10,
117
116
  path_prefix: str | None = None,
118
- language: str | None = None,
119
117
  query_text: str | None = None,
120
118
  ) -> list[dict[str, Any]]:
121
119
  """Nearest-neighbor search with optional hybrid scoring."""
@@ -146,9 +144,6 @@ class Store:
146
144
  if path_prefix:
147
145
  prefix = (path_prefix.rstrip("/") + "/").replace("'", "''")
148
146
  query = query.where(f"path LIKE '{prefix}%'")
149
- if language:
150
- lang = (language or "").replace("'", "''")
151
- query = query.where(f"language = '{lang}'")
152
147
 
153
148
  query = query.limit(top_k)
154
149
  rows = query.to_list()
@@ -164,7 +159,6 @@ class Store:
164
159
  else:
165
160
  score = row.pop("distance", 0.0)
166
161
  row["score"] = round(float(score), 4)
167
- row["score_type"] = "rrf" if use_hybrid else "cosine"
168
162
  row.pop("vector", None)
169
163
  results.append(row)
170
164
 
@@ -189,7 +183,7 @@ class Store:
189
183
  if n == 0:
190
184
  return []
191
185
 
192
- col_names = ["path", "start_line", "end_line", "symbol", "language"]
186
+ col_names = ["path", "start_line", "end_line", "symbol"]
193
187
 
194
188
  if path_prefix:
195
189
  prefix = (path_prefix.rstrip("/") + "/").replace("'", "''")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderay
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
5
5
  Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
6
6
  License-Expression: MIT
@@ -20,31 +20,31 @@ Classifier: Topic :: Text Processing :: Indexing
20
20
  Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: python-dotenv>=1.0.0
24
- Requires-Dist: networkx>=3.0
25
- Requires-Dist: tree-sitter>=0.24.0
23
+ Requires-Dist: python-dotenv>=1.2.0
24
+ Requires-Dist: networkx>=3.4
25
+ Requires-Dist: tree-sitter>=0.25.0
26
26
  Requires-Dist: tree-sitter-python>=0.25.0
27
- Requires-Dist: lancedb>=0.5.0
28
- Requires-Dist: pyyaml>=6.0
29
- Requires-Dist: click>=8.0
30
- Requires-Dist: filelock>=3.0
31
- Requires-Dist: fastembed>=0.4.0
32
- Requires-Dist: mcp>=1.0.0
33
- Requires-Dist: watchdog>=4.0.0
34
- Requires-Dist: pathspec>=0.12.0
27
+ Requires-Dist: lancedb>=0.29.0
28
+ Requires-Dist: pyyaml>=6.0.3
29
+ Requires-Dist: click>=8.3.0
30
+ Requires-Dist: filelock>=3.25.0
31
+ Requires-Dist: fastembed>=0.7.0
32
+ Requires-Dist: fastmcp==3.1.0
33
+ Requires-Dist: watchdog>=6.0.0
34
+ Requires-Dist: pathspec>=1.0.0
35
35
  Provides-Extra: openai
36
- Requires-Dist: openai>=1.0.0; extra == "openai"
36
+ Requires-Dist: openai>=2.0.0; extra == "openai"
37
37
  Provides-Extra: languages
38
- Requires-Dist: tree-sitter-javascript>=0.23.0; extra == "languages"
39
- Requires-Dist: tree-sitter-typescript>=0.23.0; extra == "languages"
40
- Requires-Dist: tree-sitter-go>=0.23.0; extra == "languages"
38
+ Requires-Dist: tree-sitter-javascript>=0.25.0; extra == "languages"
39
+ Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "languages"
40
+ Requires-Dist: tree-sitter-go>=0.25.0; extra == "languages"
41
41
  Provides-Extra: dev
42
42
  Requires-Dist: pytest>=7.0; extra == "dev"
43
43
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
44
44
  Requires-Dist: ruff>=0.8.0; extra == "dev"
45
45
  Requires-Dist: mypy>=1.0.0; extra == "dev"
46
- Requires-Dist: openai>=1.0.0; extra == "dev"
47
- Requires-Dist: httpx>=0.27.0; extra == "dev"
46
+ Requires-Dist: openai>=2.0.0; extra == "dev"
47
+ Requires-Dist: httpx>=0.28.0; extra == "dev"
48
48
  Provides-Extra: maintain
49
49
  Requires-Dist: pylance>=0.15.0; extra == "maintain"
50
50
  Provides-Extra: all
@@ -28,12 +28,14 @@ src/coderay/graph/builder.py
28
28
  src/coderay/graph/code_graph.py
29
29
  src/coderay/graph/extractor.py
30
30
  src/coderay/mcp_server/__init__.py
31
+ src/coderay/mcp_server/errors.py
31
32
  src/coderay/mcp_server/server.py
32
33
  src/coderay/pipeline/__init__.py
33
34
  src/coderay/pipeline/indexer.py
34
35
  src/coderay/pipeline/watcher.py
35
36
  src/coderay/retrieval/__init__.py
36
37
  src/coderay/retrieval/boosting.py
38
+ src/coderay/retrieval/models.py
37
39
  src/coderay/retrieval/search.py
38
40
  src/coderay/skeleton/__init__.py
39
41
  src/coderay/skeleton/extractor.py
@@ -0,0 +1,34 @@
1
+ python-dotenv>=1.2.0
2
+ networkx>=3.4
3
+ tree-sitter>=0.25.0
4
+ tree-sitter-python>=0.25.0
5
+ lancedb>=0.29.0
6
+ pyyaml>=6.0.3
7
+ click>=8.3.0
8
+ filelock>=3.25.0
9
+ fastembed>=0.7.0
10
+ fastmcp==3.1.0
11
+ watchdog>=6.0.0
12
+ pathspec>=1.0.0
13
+
14
+ [all]
15
+ coderay[dev,languages,maintain,openai]
16
+
17
+ [dev]
18
+ pytest>=7.0
19
+ pytest-cov>=4.0
20
+ ruff>=0.8.0
21
+ mypy>=1.0.0
22
+ openai>=2.0.0
23
+ httpx>=0.28.0
24
+
25
+ [languages]
26
+ tree-sitter-javascript>=0.25.0
27
+ tree-sitter-typescript>=0.23.2
28
+ tree-sitter-go>=0.25.0
29
+
30
+ [maintain]
31
+ pylance>=0.15.0
32
+
33
+ [openai]
34
+ openai>=2.0.0
@@ -1,178 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import logging
5
- from pathlib import Path
6
- from typing import Any
7
-
8
- from mcp.server.fastmcp import FastMCP
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
- mcp = FastMCP("coderay")
13
-
14
- DEFAULT_INDEX_DIR = ".index"
15
-
16
- _retrieval_cache: dict[Path, Any] = {}
17
- _state_machine_cache: dict[Path, Any] = {}
18
-
19
-
20
- def _resolve_index_dir(index_dir: str | None = None) -> Path:
21
- """Resolve the index directory to an absolute path."""
22
- return Path(index_dir or DEFAULT_INDEX_DIR).resolve()
23
-
24
-
25
- def _get_retrieval(index_dir: str | None = None):
26
- """Return a cached Retrieval instance for the given index directory."""
27
- idx = _resolve_index_dir(index_dir)
28
- if idx not in _retrieval_cache:
29
- from coderay.retrieval.search import Retrieval
30
-
31
- _retrieval_cache[idx] = Retrieval(idx)
32
- return _retrieval_cache[idx]
33
-
34
-
35
- def _load_graph(index_dir: str | None = None):
36
- """Load the code graph from disk, or return None if absent."""
37
- from coderay.graph.builder import load_graph
38
-
39
- return load_graph(_resolve_index_dir(index_dir))
40
-
41
-
42
- def _get_state_machine(index_dir: str | None = None):
43
- """Return a cached StateMachine instance for the given index directory."""
44
- idx = _resolve_index_dir(index_dir)
45
- if idx not in _state_machine_cache:
46
- from coderay.state.machine import StateMachine
47
-
48
- _state_machine_cache[idx] = StateMachine(idx)
49
- return _state_machine_cache[idx]
50
-
51
-
52
- def _load_state(index_dir: str | None = None):
53
- """Load the current IndexMeta state, or None if no run has completed."""
54
- return _get_state_machine(index_dir).current_state
55
-
56
-
57
- @mcp.tool()
58
- def semantic_search(
59
- query: str,
60
- top_k: int = 10,
61
- path_prefix: str | None = None,
62
- language: str | None = None,
63
- index_dir: str | None = None,
64
- ) -> str:
65
- """Search code by meaning."""
66
- retrieval = _get_retrieval(index_dir)
67
- state = _load_state(index_dir)
68
- if state is None:
69
- return json.dumps({"error": "No index state found. Run 'coderay build' first."})
70
- try:
71
- results = retrieval.search(
72
- query,
73
- state,
74
- top_k=top_k,
75
- path_prefix=path_prefix,
76
- language=language,
77
- )
78
- except RuntimeError as e:
79
- return json.dumps({"error": str(e)})
80
- score_type = results[0].get("score_type", "cosine") if results else "cosine"
81
- return json.dumps(
82
- {
83
- "score_type": score_type,
84
- "score_description": (
85
- "cosine similarity (0-1, higher = more similar)"
86
- if score_type == "cosine"
87
- else "RRF rank fusion (higher = more relevant, scale differs from cosine)"
88
- ),
89
- "results": results,
90
- },
91
- default=str,
92
- )
93
-
94
-
95
- @mcp.tool()
96
- def get_file_skeleton(file_path: str) -> str:
97
- """Get the API surface of a file (signatures, no bodies)."""
98
- from coderay.skeleton.extractor import extract_skeleton
99
-
100
- p = Path(file_path)
101
- if not p.is_file():
102
- return json.dumps({"error": f"File not found: {file_path}"})
103
- content = p.read_text(encoding="utf-8", errors="replace")
104
- return extract_skeleton(p, content)
105
-
106
-
107
- _STATIC_ANALYSIS_NOTE = (
108
- "Based on static analysis of source code. Calls through dependency "
109
- "injection, interfaces, dynamic dispatch (getattr), decorators, or "
110
- "framework routing may not be detected."
111
- )
112
-
113
-
114
- @mcp.tool()
115
- def get_impact_radius(
116
- node_id: str,
117
- max_depth: int = 3,
118
- index_dir: str | None = None,
119
- ) -> str:
120
- """Analyze the blast radius of changing a function or module."""
121
- graph = _load_graph(index_dir)
122
- if graph is None:
123
- return json.dumps({"error": "No graph found. Run 'coderay build' first."})
124
- impact = graph.get_impact_radius(node_id, depth=max_depth)
125
- return json.dumps(
126
- {
127
- "results": [n.to_dict() for n in impact],
128
- "note": _STATIC_ANALYSIS_NOTE,
129
- }
130
- )
131
-
132
-
133
- @mcp.tool()
134
- def index_status(index_dir: str | None = None) -> str:
135
- """Check health and status of the semantic index."""
136
- state = _load_state(index_dir)
137
- if state is None:
138
- return json.dumps({"status": "no_index", "message": "No index found."})
139
-
140
- from coderay.core.config import get_embedding_dimensions, load_config
141
- from coderay.state.version import read_index_version
142
- from coderay.storage.lancedb import index_exists as idx_exists
143
-
144
- idx_dir = _resolve_index_dir(index_dir)
145
- has_store = idx_exists(idx_dir)
146
- chunk_count = 0
147
- if has_store:
148
- from coderay.storage.lancedb import Store
149
-
150
- config = load_config(idx_dir)
151
- store = Store(idx_dir, dimensions=get_embedding_dimensions(config))
152
- chunk_count = store.chunk_count()
153
-
154
- return json.dumps(
155
- {
156
- "status": state.state.value,
157
- "branch": state.branch,
158
- "last_commit": state.last_commit,
159
- "chunk_count": chunk_count,
160
- "schema_version": read_index_version(idx_dir),
161
- "has_store": has_store,
162
- },
163
- default=str,
164
- )
165
-
166
-
167
- def main():
168
- """Entry point for the coderay-mcp command."""
169
- import sys
170
-
171
- transport = "stdio"
172
- if "--sse" in sys.argv:
173
- transport = "sse"
174
- mcp.run(transport=transport)
175
-
176
-
177
- if __name__ == "__main__":
178
- main()
@@ -1,34 +0,0 @@
1
- python-dotenv>=1.0.0
2
- networkx>=3.0
3
- tree-sitter>=0.24.0
4
- tree-sitter-python>=0.25.0
5
- lancedb>=0.5.0
6
- pyyaml>=6.0
7
- click>=8.0
8
- filelock>=3.0
9
- fastembed>=0.4.0
10
- mcp>=1.0.0
11
- watchdog>=4.0.0
12
- pathspec>=0.12.0
13
-
14
- [all]
15
- coderay[dev,languages,maintain,openai]
16
-
17
- [dev]
18
- pytest>=7.0
19
- pytest-cov>=4.0
20
- ruff>=0.8.0
21
- mypy>=1.0.0
22
- openai>=1.0.0
23
- httpx>=0.27.0
24
-
25
- [languages]
26
- tree-sitter-javascript>=0.23.0
27
- tree-sitter-typescript>=0.23.0
28
- tree-sitter-go>=0.23.0
29
-
30
- [maintain]
31
- pylance>=0.15.0
32
-
33
- [openai]
34
- openai>=1.0.0
File without changes
File without changes
File without changes
File without changes
File without changes