coderay 1.0.4__tar.gz → 1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {coderay-1.0.4/src/coderay.egg-info → coderay-1.0.6}/PKG-INFO +49 -33
  2. {coderay-1.0.4 → coderay-1.0.6}/README.md +48 -32
  3. {coderay-1.0.4 → coderay-1.0.6}/pyproject.toml +1 -1
  4. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/cli/commands.py +38 -75
  5. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/config.py +15 -1
  6. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/timing.py +12 -6
  7. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/builder.py +4 -1
  8. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/code_graph.py +62 -2
  9. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/extractor.py +7 -9
  10. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/pipeline/indexer.py +7 -67
  11. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/pipeline/watcher.py +6 -25
  12. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/search.py +10 -12
  13. {coderay-1.0.4 → coderay-1.0.6/src/coderay.egg-info}/PKG-INFO +49 -33
  14. {coderay-1.0.4 → coderay-1.0.6}/LICENSE +0 -0
  15. {coderay-1.0.4 → coderay-1.0.6}/setup.cfg +0 -0
  16. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/__init__.py +0 -0
  17. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/chunking/__init__.py +0 -0
  18. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/chunking/chunker.py +0 -0
  19. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/cli/__init__.py +0 -0
  20. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/__init__.py +0 -0
  21. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/lock.py +0 -0
  22. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/models.py +0 -0
  23. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/utils.py +0 -0
  24. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/embedding/__init__.py +0 -0
  25. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/embedding/base.py +0 -0
  26. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/embedding/local.py +0 -0
  27. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/__init__.py +0 -0
  28. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/mcp_server/__init__.py +0 -0
  29. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/mcp_server/errors.py +0 -0
  30. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/mcp_server/server.py +0 -0
  31. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/parsing/base.py +0 -0
  32. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/parsing/languages.py +0 -0
  33. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/pipeline/__init__.py +0 -0
  34. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/__init__.py +0 -0
  35. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/boosting.py +0 -0
  36. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/models.py +0 -0
  37. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/skeleton/__init__.py +0 -0
  38. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/skeleton/extractor.py +0 -0
  39. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/state/__init__.py +0 -0
  40. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/state/machine.py +0 -0
  41. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/state/version.py +0 -0
  42. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/storage/__init__.py +0 -0
  43. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/storage/lancedb.py +0 -0
  44. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/vcs/__init__.py +0 -0
  45. {coderay-1.0.4 → coderay-1.0.6}/src/coderay/vcs/git.py +0 -0
  46. {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/SOURCES.txt +0 -0
  47. {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/dependency_links.txt +0 -0
  48. {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/entry_points.txt +0 -0
  49. {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/requires.txt +0 -0
  50. {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderay
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
5
5
  Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
6
6
  License-Expression: MIT
@@ -64,6 +64,15 @@ AI coding assistants and a standalone CLI.
64
64
 
65
65
  ## Install
66
66
 
67
+ Create a virtual environment (recommended):
68
+
69
+ ```bash
70
+ python -m venv .venv
71
+ source .venv/bin/activate
72
+ ```
73
+
74
+ Then install:
75
+
67
76
  ```bash
68
77
  pip install coderay
69
78
  ```
@@ -79,6 +88,7 @@ For development:
79
88
  ```bash
80
89
  git clone https://github.com/bogdan-copocean/coderay.git
81
90
  cd coderay
91
+ python -m venv .venv && source .venv/bin/activate
82
92
  pip install -e ".[all]"
83
93
  ```
84
94
 
@@ -86,15 +96,22 @@ pip install -e ".[all]"
86
96
 
87
97
  ```bash
88
98
  cd /path/to/your/project
89
- coderay build --repo .
99
+ coderay watch --repo . # keeps index fresh while you work (recommended)
90
100
  coderay search "how does authentication work"
91
- coderay watch --repo .
92
101
  coderay graph --kind calls
93
102
  coderay skeleton src/app/main.py
94
103
  ```
95
104
 
105
+ > **Use `watch`, not `build`.** `coderay build` is a one-off; while you work, the index will get stale. `coderay watch` re-indexes on file changes and is the go-to for active development.
106
+
96
107
  ## MCP server (Claude Code / Cursor)
97
108
 
109
+ Find the MCP executable path:
110
+
111
+ ```bash
112
+ which coderay-mcp
113
+ ```
114
+
98
115
  Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
99
116
 
100
117
  ```json
@@ -108,13 +125,14 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
108
125
  }
109
126
  ```
110
127
 
128
+ Replace `/path/to/your/.venv/bin/coderay-mcp` with the output of `which coderay-mcp`.
129
+
111
130
  ## CLI reference
112
131
 
113
132
  | Command | Description |
114
133
  |---|---|
115
- | `coderay build [--full] --repo .` | Build index (incremental or full rebuild) |
116
- | `coderay update --repo .` | Incremental update (changed files only) |
117
- | `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
134
+ | `coderay watch --repo . [--debounce N]` | **Recommended.** Watch for file changes, re-index automatically |
135
+ | `coderay build [--full] --repo .` | Build or incremental update. Use `--full` for full rebuild |
118
136
  | `coderay search "query" [--top-k N]` | Semantic search |
119
137
  | `coderay list [--by-file]` | List indexed chunks |
120
138
  | `coderay status` | Index state, branch, commit, chunk count |
@@ -124,39 +142,37 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
124
142
 
125
143
  ## Configuration
126
144
 
127
- Optional `config.yaml` in the index directory:
145
+ File discovery and ignoring are based on `.git` and `.gitignore`. The `.git` directory is excluded; files matching `.gitignore` are not indexed. Config `exclude_patterns` add extra exclusions on top of that.
146
+
147
+ Optional `config.yaml` in the index directory (default: `.index/config.yaml`):
128
148
 
129
149
  ```yaml
130
150
  embedder:
131
- provider: local
132
- model: all-MiniLM-L6-v2
151
+ model: sentence-transformers/all-MiniLM-L6-v2
133
152
  dimensions: 384
134
153
 
135
- search:
136
- boost_rules:
137
- "tests/": 0.5
138
- "src/core/": 1.2
139
-
140
- graph:
141
- exclude_callees:
142
- - "our_sdk_helper"
143
- include_callees:
144
- - "isinstance"
145
-
146
- watch:
147
- debounce_seconds: 2
148
- branch_switch_threshold: 50
149
- exclude_patterns:
154
+ index:
155
+ exclude_patterns: # besides .gitignore
150
156
  - "*.log"
151
- ```
152
157
 
153
- ## Development
158
+ semantic_search:
159
+ boosting:
160
+ penalties:
161
+ - pattern: "(^|/)tests?/"
162
+ factor: 0.5
163
+ - pattern: "(^|/)test_[^/]+\\.py$"
164
+ factor: 0.5
165
+ bonuses:
166
+ - pattern: "(^|/)src/"
167
+ factor: 1.1
168
+ metric: cosine
169
+
170
+ watcher:
171
+ debounce: 2
172
+ exclude_patterns: # besides .gitignore
173
+ - "*.log"
154
174
 
155
- ```bash
156
- pip install -e ".[dev]"
157
- make test
158
- make lint
159
- make format
175
+ graph:
176
+ exclude_modules: [] # module names to exclude from CALLS/IMPORTS edges
177
+ include_modules: [] # force-include (override excludes)
160
178
  ```
161
-
162
- Requires Python >= 3.10 and Git.
@@ -15,6 +15,15 @@ AI coding assistants and a standalone CLI.
15
15
 
16
16
  ## Install
17
17
 
18
+ Create a virtual environment (recommended):
19
+
20
+ ```bash
21
+ python -m venv .venv
22
+ source .venv/bin/activate
23
+ ```
24
+
25
+ Then install:
26
+
18
27
  ```bash
19
28
  pip install coderay
20
29
  ```
@@ -30,6 +39,7 @@ For development:
30
39
  ```bash
31
40
  git clone https://github.com/bogdan-copocean/coderay.git
32
41
  cd coderay
42
+ python -m venv .venv && source .venv/bin/activate
33
43
  pip install -e ".[all]"
34
44
  ```
35
45
 
@@ -37,15 +47,22 @@ pip install -e ".[all]"
37
47
 
38
48
  ```bash
39
49
  cd /path/to/your/project
40
- coderay build --repo .
50
+ coderay watch --repo . # keeps index fresh while you work (recommended)
41
51
  coderay search "how does authentication work"
42
- coderay watch --repo .
43
52
  coderay graph --kind calls
44
53
  coderay skeleton src/app/main.py
45
54
  ```
46
55
 
56
+ > **Use `watch`, not `build`.** `coderay build` is a one-off; while you work, the index will get stale. `coderay watch` re-indexes on file changes and is the go-to for active development.
57
+
47
58
  ## MCP server (Claude Code / Cursor)
48
59
 
60
+ Find the MCP executable path:
61
+
62
+ ```bash
63
+ which coderay-mcp
64
+ ```
65
+
49
66
  Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
50
67
 
51
68
  ```json
@@ -59,13 +76,14 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
59
76
  }
60
77
  ```
61
78
 
79
+ Replace `/path/to/your/.venv/bin/coderay-mcp` with the output of `which coderay-mcp`.
80
+
62
81
  ## CLI reference
63
82
 
64
83
  | Command | Description |
65
84
  |---|---|
66
- | `coderay build [--full] --repo .` | Build index (incremental or full rebuild) |
67
- | `coderay update --repo .` | Incremental update (changed files only) |
68
- | `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
85
+ | `coderay watch --repo . [--debounce N]` | **Recommended.** Watch for file changes, re-index automatically |
86
+ | `coderay build [--full] --repo .` | Build or incremental update. Use `--full` for full rebuild |
69
87
  | `coderay search "query" [--top-k N]` | Semantic search |
70
88
  | `coderay list [--by-file]` | List indexed chunks |
71
89
  | `coderay status` | Index state, branch, commit, chunk count |
@@ -75,39 +93,37 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
75
93
 
76
94
  ## Configuration
77
95
 
78
- Optional `config.yaml` in the index directory:
96
+ File discovery and ignoring are based on `.git` and `.gitignore`. The `.git` directory is excluded; files matching `.gitignore` are not indexed. Config `exclude_patterns` add extra exclusions on top of that.
97
+
98
+ Optional `config.yaml` in the index directory (default: `.index/config.yaml`):
79
99
 
80
100
  ```yaml
81
101
  embedder:
82
- provider: local
83
- model: all-MiniLM-L6-v2
102
+ model: sentence-transformers/all-MiniLM-L6-v2
84
103
  dimensions: 384
85
104
 
86
- search:
87
- boost_rules:
88
- "tests/": 0.5
89
- "src/core/": 1.2
90
-
91
- graph:
92
- exclude_callees:
93
- - "our_sdk_helper"
94
- include_callees:
95
- - "isinstance"
96
-
97
- watch:
98
- debounce_seconds: 2
99
- branch_switch_threshold: 50
100
- exclude_patterns:
105
+ index:
106
+ exclude_patterns: # besides .gitignore
101
107
  - "*.log"
102
- ```
103
108
 
104
- ## Development
109
+ semantic_search:
110
+ boosting:
111
+ penalties:
112
+ - pattern: "(^|/)tests?/"
113
+ factor: 0.5
114
+ - pattern: "(^|/)test_[^/]+\\.py$"
115
+ factor: 0.5
116
+ bonuses:
117
+ - pattern: "(^|/)src/"
118
+ factor: 1.1
119
+ metric: cosine
120
+
121
+ watcher:
122
+ debounce: 2
123
+ exclude_patterns: # besides .gitignore
124
+ - "*.log"
105
125
 
106
- ```bash
107
- pip install -e ".[dev]"
108
- make test
109
- make lint
110
- make format
126
+ graph:
127
+ exclude_modules: [] # module names to exclude from CALLS/IMPORTS edges
128
+ include_modules: [] # force-include (override excludes)
111
129
  ```
112
-
113
- Requires Python >= 3.10 and Git.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coderay"
7
- version = "1.0.4"
7
+ version = "1.0.6"
8
8
  description = "X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -9,6 +9,7 @@ import click
9
9
  from dotenv import load_dotenv
10
10
 
11
11
  from coderay.core.lock import acquire_indexer_lock
12
+ from coderay.core.timing import timed_phase
12
13
  from coderay.pipeline.indexer import Indexer
13
14
  from coderay.retrieval.search import Retrieval
14
15
  from coderay.state.machine import StateMachine
@@ -45,6 +46,7 @@ def _setup_logging(verbose: bool = False) -> None:
45
46
 
46
47
 
47
48
  @click.group()
49
+ @click.version_option(package_name="coderay", prog_name="coderay")
48
50
  @click.option("-v", "--verbose", is_flag=True, default=False, help="Verbose logging")
49
51
  @click.pass_context
50
52
  def cli(ctx: click.Context, verbose: bool) -> None:
@@ -74,61 +76,20 @@ def build(ctx: click.Context, full: bool, repo: Path) -> None:
74
76
  index_dir = Path(config.index.path)
75
77
  index_dir.mkdir(parents=True, exist_ok=True)
76
78
  indexer = Indexer(repo)
77
- t0 = time.time()
78
79
  try:
79
- with acquire_indexer_lock(index_dir):
80
- if full or not indexer.index_exists():
81
- click.echo(_color("Building full index...", CYAN))
82
- result = indexer.build_full()
83
- click.echo(
84
- _color(
85
- f"{result} in {time.time() - t0:.2f}s",
86
- GREEN,
87
- )
88
- )
89
- else:
90
- click.echo(_color("Updating index (incremental)...", CYAN))
91
- result = indexer.update_incremental()
92
- click.echo(
93
- _color(
94
- f"{result} in {time.time() - t0:.2f}s",
95
- GREEN,
96
- )
97
- )
98
- indexer.maintain()
99
- except Exception as e:
100
- indexer.error(str(e))
101
- click.echo(_color(f"Error: {e}", RED))
102
- raise
103
-
104
-
105
- @cli.command()
106
- @click.option(
107
- "--repo",
108
- default=".",
109
- type=click.Path(exists=True, path_type=Path),
110
- help="Repo root",
111
- )
112
- @click.pass_context
113
- def update(ctx: click.Context, repo: Path) -> None:
114
- """Incremental update (only changed files). Uses file lock."""
115
- from coderay.core.config import get_config
116
-
117
- config = get_config()
118
- index_dir = Path(config.index.path)
119
- indexer = Indexer(repo)
120
- t0 = time.time()
121
-
122
- if not indexer.index_exists():
123
- click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
124
- ctx.exit(1)
125
-
126
- try:
127
- with acquire_indexer_lock(index_dir):
128
- click.echo(_color("Updating index...", CYAN))
129
- result = indexer.update_incremental()
130
- click.echo(_color(f"{result} in {time.time() - t0:.2f}s", GREEN))
80
+ with timed_phase("build", log=False) as tp:
81
+ with acquire_indexer_lock(index_dir):
82
+ if full:
83
+ click.echo(_color("Building full index...", CYAN))
84
+ result = indexer.build_full()
85
+ else:
86
+ if not indexer.index_exists():
87
+ click.echo(_color("Building full index...", CYAN))
88
+ else:
89
+ click.echo(_color("Updating index (incremental)...", CYAN))
90
+ result = indexer.ensure_index()
131
91
  indexer.maintain()
92
+ click.echo(_color(f"{result} in {tp.elapsed:.2f}s", GREEN))
132
93
  except Exception as e:
133
94
  indexer.error(str(e))
134
95
  click.echo(_color(f"Error: {e}", RED))
@@ -163,16 +124,15 @@ def search_cmd(
163
124
 
164
125
  retrieval = Retrieval()
165
126
  click.echo(_color(f"Searching: {query_text!r}", CYAN))
166
- t0 = time.perf_counter()
167
127
 
168
- results = retrieval.search(
169
- query=query_text,
170
- current_state=current_state,
171
- top_k=top_k,
172
- path_prefix=path_prefix,
173
- )
174
- elapsed = time.perf_counter() - t0
175
- click.echo(_color(f"Query took {elapsed:.2f}s", BOLD))
128
+ with timed_phase("search", log=False) as tp:
129
+ results = retrieval.search(
130
+ query=query_text,
131
+ current_state=current_state,
132
+ top_k=top_k,
133
+ path_prefix=path_prefix,
134
+ )
135
+ click.echo(_color(f"Query took {tp.elapsed:.2f}s", BOLD))
176
136
 
177
137
  if not results:
178
138
  click.echo(_color("No results.", YELLOW))
@@ -367,7 +327,7 @@ def graph_cmd(
367
327
  if not edges:
368
328
  click.echo(
369
329
  _color(
370
- "No graph data. Run 'coderay build' or 'coderay update' to build it.",
330
+ "No graph data. Run 'coderay build' to build it.",
371
331
  YELLOW,
372
332
  )
373
333
  )
@@ -412,20 +372,26 @@ def watch(
412
372
 
413
373
  config = get_config()
414
374
  index_dir = Path(config.index.path)
415
- if not index_exists(index_dir):
416
- click.echo(
417
- _color(
418
- "No index found. Run 'coderay build' first.",
419
- YELLOW,
420
- )
421
- )
422
- ctx.exit(1)
375
+ index_dir.mkdir(parents=True, exist_ok=True)
423
376
 
424
377
  if quiet:
425
378
  logging.getLogger("coderay.pipeline.watcher").setLevel(logging.WARNING)
426
379
 
427
- watcher = FileWatcher(repo, index_dir)
380
+ indexer = Indexer(repo)
381
+ try:
382
+ with timed_phase("watch_startup", log=False) as tp:
383
+ with acquire_indexer_lock(index_dir):
384
+ if not indexer.index_exists():
385
+ click.echo(_color("No index found. Building full index...", CYAN))
386
+ result = indexer.ensure_index()
387
+ indexer.maintain()
388
+ click.echo(_color(f"{result} in {tp.elapsed:.2f}s", GREEN))
389
+ except Exception as e:
390
+ indexer.error(str(e))
391
+ click.echo(_color(f"Error: {e}", RED))
392
+ raise
428
393
 
394
+ watcher = FileWatcher(repo, index_dir)
429
395
  click.echo(
430
396
  _color(
431
397
  f"Watching {repo.resolve()} "
@@ -433,9 +399,6 @@ def watch(
433
399
  CYAN,
434
400
  )
435
401
  )
436
- index_dir.mkdir(parents=True, exist_ok=True)
437
- indexer = Indexer(repo)
438
- indexer.update_incremental()
439
402
 
440
403
  watcher.start()
441
404
  try:
@@ -85,7 +85,18 @@ class SemanticSearchConfig:
85
85
  class WatcherConfig:
86
86
  debounce: Annotated[int, "in seconds"] = 2
87
87
  exclude_patterns: Annotated[str, "besides .gitignore"] | None = None
88
- branch_switch_threshold: int = 50
88
+
89
+
90
+ @dataclass(frozen=True)
91
+ class GraphConfig:
92
+ """Module filtering for the code graph (CALLS, IMPORTS edges)."""
93
+
94
+ exclude_modules: Annotated[
95
+ list[str], "module names/prefixes to exclude from graph edges"
96
+ ] = field(default_factory=list)
97
+ include_modules: Annotated[
98
+ list[str], "module names to force-include (override excludes)"
99
+ ] = field(default_factory=list)
89
100
 
90
101
 
91
102
  @dataclass(frozen=True)
@@ -94,6 +105,7 @@ class Config:
94
105
  index: IndexConfig = field(default_factory=IndexConfig)
95
106
  semantic_search: SemanticSearchConfig = field(default_factory=SemanticSearchConfig)
96
107
  watcher: WatcherConfig = field(default_factory=WatcherConfig)
108
+ graph: GraphConfig = field(default_factory=GraphConfig)
97
109
 
98
110
 
99
111
  def _parse_boosting(data: dict[str, Any]) -> BoostingConfig:
@@ -204,6 +216,7 @@ def _load_config_impl() -> Config:
204
216
  default_data.get("semantic_search", {}) or {}
205
217
  ),
206
218
  watcher=WatcherConfig(**default_data.get("watcher", {})),
219
+ graph=GraphConfig(**default_data.get("graph", {})),
207
220
  )
208
221
 
209
222
 
@@ -288,4 +301,5 @@ def _deep_merge(overrides: dict, *, index_dir: Path) -> Config:
288
301
  index=IndexConfig(**merged.get("index", {})),
289
302
  semantic_search=_parse_semantic_search(merged.get("semantic_search", {}) or {}),
290
303
  watcher=WatcherConfig(**merged.get("watcher", {})),
304
+ graph=GraphConfig(**merged.get("graph", {})),
291
305
  )
@@ -29,17 +29,23 @@ def timed(phase: str) -> Callable[[F], F]:
29
29
  return decorator
30
30
 
31
31
 
32
- class timed_phase:
33
- """Context manager that logs block execution time at INFO level."""
32
+ class TimedPhase:
33
+ """Context manager that measures block execution time and optionally logs it."""
34
34
 
35
- def __init__(self, phase: str) -> None:
35
+ def __init__(self, phase: str, *, log: bool = True) -> None:
36
36
  self.phase = phase
37
+ self.log = log
37
38
  self.t0: float = 0.0
39
+ self.elapsed: float = 0.0
38
40
 
39
- def __enter__(self) -> timed_phase:
41
+ def __enter__(self) -> TimedPhase:
40
42
  self.t0 = time.perf_counter()
41
43
  return self
42
44
 
43
45
  def __exit__(self, *args: object) -> None:
44
- elapsed = time.perf_counter() - self.t0
45
- logger.info("%s: %.3fs", self.phase, elapsed)
46
+ self.elapsed = time.perf_counter() - self.t0
47
+ if self.log:
48
+ logger.info("%s: %.3fs", self.phase, self.elapsed)
49
+
50
+
51
+ timed_phase = TimedPhase # Convenience alias for context manager usage
@@ -33,11 +33,13 @@ def build_graph(
33
33
  except Exception as exc:
34
34
  logger.warning("Graph extraction failed for %s: %s", file_path, exc)
35
35
  resolved = graph.resolve_edges()
36
+ pruned = graph.prune_phantom_edges()
36
37
  logger.info(
37
- "Graph built: %d nodes, %d edges (%d call edges resolved)",
38
+ "Graph built: %d nodes, %d edges (%d resolved, %d phantoms pruned)",
38
39
  graph.node_count,
39
40
  graph.edge_count,
40
41
  resolved,
42
+ pruned,
41
43
  )
42
44
  return graph
43
45
 
@@ -140,6 +142,7 @@ def build_and_save_graph(
140
142
  except Exception as exc:
141
143
  logger.warning("Graph extraction failed for %s: %s", fp, exc)
142
144
  existing_graph.resolve_edges()
145
+ existing_graph.prune_phantom_edges()
143
146
  graph = existing_graph
144
147
  logger.info(
145
148
  "Graph incremental update: re-parsed %d files",
@@ -74,6 +74,10 @@ class CodeGraph:
74
74
  # (e.g. two files both define a function called "helper").
75
75
  self._symbol_index: dict[str, set[str]] = defaultdict(set)
76
76
 
77
+ # qualified_name -> {full node IDs} — enables resolving dotted names
78
+ # like "ClassName.method" to "src/a.py::ClassName.method".
79
+ self._qualified_index: dict[str, set[str]] = defaultdict(set)
80
+
77
81
  # dotted module name -> node ID — maps Python-style import paths
78
82
  # (e.g. "core.models") to the MODULE node they refer to.
79
83
  self._module_index: dict[str, str] = {}
@@ -85,6 +89,8 @@ class CodeGraph:
85
89
  def _index_node(self, node: GraphNode) -> None:
86
90
  """Register a node in all secondary indexes."""
87
91
  self._symbol_index[node.name].add(node.id)
92
+ if node.qualified_name != node.name:
93
+ self._qualified_index[node.qualified_name].add(node.id)
88
94
  self._file_index[node.file_path].add(node.id)
89
95
  if node.kind == NodeKind.MODULE:
90
96
  # Register all suffix variants so that "import models" and
@@ -98,6 +104,10 @@ class CodeGraph:
98
104
  sym_entries = self._symbol_index.get(node.name)
99
105
  if sym_entries is not None:
100
106
  sym_entries.discard(node.id)
107
+ if node.qualified_name != node.name:
108
+ qual_entries = self._qualified_index.get(node.qualified_name)
109
+ if qual_entries is not None:
110
+ qual_entries.discard(node.id)
101
111
  file_entries = self._file_index.get(node.file_path)
102
112
  if file_entries is not None:
103
113
  file_entries.discard(node.id)
@@ -140,17 +150,27 @@ class CodeGraph:
140
150
  return len(to_remove)
141
151
 
142
152
  def resolve_symbol(self, name: str, caller_file: str | None = None) -> str | None:
143
- """Resolve a short/bare name to a fully-qualified node ID.
153
+ """Resolve a short, qualified, or dotted name to a fully-qualified node ID.
154
+
155
+ Lookup order:
156
+ 1. Exact node ID match (fast path).
157
+ 2. Bare name via ``_symbol_index`` (unique match only).
158
+ 3. Qualified name via ``_qualified_index`` (e.g. "ClassName.method").
144
159
 
145
160
  Returns:
146
161
  Full node ID, or None if the name cannot be uniquely resolved.
147
162
  """
148
- # Already a full node ID (e.g. "src/a.py::foo") — fast path
149
163
  if name in self._g and self._g.nodes[name].get("data") is not None:
150
164
  return name
165
+
151
166
  candidates = self._symbol_index.get(name, set())
152
167
  if len(candidates) == 1:
153
168
  return next(iter(candidates))
169
+
170
+ qual_candidates = self._qualified_index.get(name, set())
171
+ if len(qual_candidates) == 1:
172
+ return next(iter(qual_candidates))
173
+
154
174
  return None
155
175
 
156
176
  def resolve_edges(self) -> int:
@@ -194,6 +214,46 @@ class CodeGraph:
194
214
  logger.info("Resolved %d edges via symbol/module index", len(to_add))
195
215
  return len(to_add)
196
216
 
217
+ def prune_phantom_edges(self) -> int:
218
+ """Remove CALLS edges whose target is a phantom with no resolution candidates.
219
+
220
+ These are typically stdlib/third-party methods (``append``, ``get``,
221
+ ``join``, etc.) that will never resolve to a project node. Removing
222
+ them reduces noise and improves ``get_impact_radius`` traversal.
223
+
224
+ Returns:
225
+ Number of edges pruned.
226
+ """
227
+ to_remove: list[tuple[str, str]] = []
228
+ for u, v, data in self._g.edges(data=True):
229
+ if data.get("kind") != EdgeKind.CALLS:
230
+ continue
231
+ node_data = self._g.nodes.get(v, {})
232
+ if node_data and node_data.get("data") is not None:
233
+ continue
234
+ if (
235
+ not self._symbol_index.get(v)
236
+ and not self._qualified_index.get(v)
237
+ ):
238
+ to_remove.append((u, v))
239
+
240
+ for u, v in to_remove:
241
+ if self._g.has_edge(u, v):
242
+ self._g.remove_edge(u, v)
243
+
244
+ # Clean up orphan phantom nodes (no remaining edges)
245
+ phantom_nodes = [
246
+ n for n in list(self._g.nodes)
247
+ if self._g.nodes[n].get("data") is None
248
+ and self._g.degree(n) == 0
249
+ ]
250
+ for n in phantom_nodes:
251
+ self._g.remove_node(n)
252
+
253
+ if to_remove:
254
+ logger.info("Pruned %d phantom CALLS edges", len(to_remove))
255
+ return len(to_remove)
256
+
197
257
  def _resolve_path_target(self, target: str) -> str | None:
198
258
  """Try to match a path-style target to an existing MODULE node."""
199
259
  if "/" not in target:
@@ -86,11 +86,8 @@ def build_module_filter() -> frozenset[str]:
86
86
  from CALLS and IMPORTS edges.
87
87
  """
88
88
  config = get_config()
89
- graph_cfg = getattr(config, "graph", None) or {}
90
- if not isinstance(graph_cfg, dict):
91
- graph_cfg = {}
92
- extra_excludes = set(graph_cfg.get("exclude_modules") or [])
93
- force_includes = set(graph_cfg.get("include_modules") or [])
89
+ extra_excludes = set(config.graph.exclude_modules or [])
90
+ force_includes = set(config.graph.include_modules or [])
94
91
  return frozenset((_DEFAULT_EXCLUDED_MODULES | extra_excludes) - force_includes)
95
92
 
96
93
 
@@ -391,12 +388,13 @@ class GraphTreeSitterParser(BaseTreeSitterParser):
391
388
  )
392
389
  )
393
390
 
394
- # Top-level functions are registered in FileContext so calls
395
- # like ``my_func()`` resolve to the fully qualified node_id.
396
- # TODO: register class methods too (e.g. ClassName.method) so that
397
- # static/classmethod calls at module level can resolve
391
+ # Register in FileContext so calls resolve to the full node_id.
392
+ # Top-level: ``my_func()`` node_id
393
+ # Class methods: ``ClassName.method()`` node_id (via qualified name)
398
394
  if not scope_stack:
399
395
  self._file_ctx.register_definition(name, node_id)
396
+ else:
397
+ self._file_ctx.register_definition(qualified, node_id)
400
398
 
401
399
  # Recurse into the function body under a new scope
402
400
  new_scope = [*scope_stack, name]
@@ -80,18 +80,12 @@ class Indexer:
80
80
  """Full rebuild: discover, chunk, embed, and store all source files."""
81
81
 
82
82
  current = self._state.current_state
83
- last_branch = current.branch if current is not None else None
84
- branch_switched = self._git.is_branch_switched(last_branch=last_branch)
85
- if branch_switched:
86
- return self.update_incremental()
87
-
88
83
  current_run = current.current_run if current else None
89
84
  saved_paths = current_run.paths_to_process if current_run else []
90
85
  processed_count = current_run.processed_count if current_run else 0
91
86
 
92
87
  can_resume = (
93
- not branch_switched
94
- and self._state.is_in_progress
88
+ self._state.is_in_progress
95
89
  and self._state.has_partial_progress
96
90
  )
97
91
 
@@ -150,17 +144,6 @@ class Indexer:
150
144
  self._state.set_incomplete()
151
145
 
152
146
  current = self._state.current_state
153
- state_branch = current.branch if current else None
154
- active_branch = self._git.get_current_branch()
155
-
156
- if self._git.is_branch_switched(last_branch=state_branch):
157
- logger.info(
158
- "Branch switched %s -> %s; syncing index",
159
- state_branch,
160
- active_branch,
161
- )
162
- return self._sync_after_branch_switch()
163
-
164
147
  to_add, to_remove = self._git.get_files_to_index(
165
148
  last_commit=current.last_commit if current else None
166
149
  )
@@ -193,55 +176,6 @@ class Indexer:
193
176
  file_hashes=file_hashes,
194
177
  )
195
178
 
196
- def _sync_after_branch_switch(self) -> IndexResult:
197
- """Sync index to current branch after a switch. Returns IndexResult."""
198
- file_hashes = self._state.file_hashes.copy()
199
- py_files = self._git.discover_files()
200
-
201
- # All .py files were deleted from git
202
- to_remove: list[str] = []
203
- if not py_files:
204
- to_remove = list(file_hashes)
205
-
206
- if to_remove:
207
- self._store.delete_by_paths(to_remove)
208
- index_result = IndexResult(removed=len(file_hashes))
209
- file_hashes.clear()
210
-
211
- self._state.file_hashes = file_hashes
212
- self._state.finish(
213
- last_commit=self._git.get_head_commit(),
214
- branch=self._git.get_current_branch(),
215
- )
216
- self._refresh_graph()
217
- return index_result
218
-
219
- rel_paths_current = {str(p.relative_to(self._repo_root)) for p in py_files}
220
- # Deleted files on current branch
221
- to_remove = [p for p in file_hashes if p not in rel_paths_current]
222
- if to_remove:
223
- self._store.delete_by_paths(to_remove)
224
- for p in to_remove:
225
- file_hashes.pop(p, None)
226
-
227
- changed_files = files_with_changed_content(
228
- repo=self._repo_root, paths=py_files, file_hashes=file_hashes
229
- )
230
-
231
- if not changed_files and not to_remove:
232
- self._state.finish(
233
- last_commit=self._git.get_head_commit(),
234
- branch=self._git.get_current_branch(),
235
- )
236
- self._refresh_graph()
237
- logger.info("Branch switch: index already in sync (no changes)")
238
- return IndexResult(cached=len(self._state.file_hashes))
239
-
240
- return self._update(
241
- paths_to_add=changed_files,
242
- file_hashes=file_hashes,
243
- )
244
-
245
179
  def _run_batch_loop(
246
180
  self,
247
181
  rel_paths: list[str],
@@ -423,6 +357,12 @@ class Indexer:
423
357
  """Return True if the index exists at index_dir."""
424
358
  return index_exists(self._index_dir)
425
359
 
360
+ def ensure_index(self) -> IndexResult:
361
+ """Build full index if missing, else incremental update."""
362
+ if not self.index_exists():
363
+ return self.build_full()
364
+ return self.update_incremental()
365
+
426
366
  def error(self, exc: str) -> None:
427
367
  """Mark the current run as errored with the given exception message."""
428
368
  self._state.set_errored(exc=exc)
@@ -19,6 +19,7 @@ from watchdog.events import (
19
19
  from watchdog.observers import Observer
20
20
  from watchdog.observers.polling import PollingObserver
21
21
 
22
+ from coderay.core.timing import timed_phase
22
23
  from coderay.parsing.languages import get_supported_extensions
23
24
  from coderay.vcs.git import load_gitignore
24
25
 
@@ -35,7 +36,6 @@ class _DebouncedHandler:
35
36
  gitignore_spec: pathspec.PathSpec,
36
37
  supported_extensions: set[str],
37
38
  debounce_seconds: float,
38
- branch_switch_threshold: int,
39
39
  extra_exclude: list[str],
40
40
  on_batch: Callable[[set[str], set[str]], None],
41
41
  ) -> None:
@@ -45,7 +45,6 @@ class _DebouncedHandler:
45
45
  self._gitignore = gitignore_spec
46
46
  self._extensions = supported_extensions
47
47
  self._debounce = debounce_seconds
48
- self._threshold = branch_switch_threshold
49
48
  self._on_batch = on_batch
50
49
 
51
50
  extra_spec = pathspec.PathSpec.from_lines("gitignore", extra_exclude)
@@ -160,13 +159,6 @@ class _DebouncedHandler:
160
159
  if not changed and not removed:
161
160
  return
162
161
 
163
- total = len(changed) + len(removed)
164
- if total >= self._threshold:
165
- logger.info(
166
- "Branch switch detected (%d files); delegating to full sync",
167
- total,
168
- )
169
-
170
162
  try:
171
163
  self._on_batch(changed, removed)
172
164
  except Exception:
@@ -195,7 +187,6 @@ class FileWatcher:
195
187
 
196
188
  watch_cfg = self._config.watcher
197
189
  self._debounce = float(watch_cfg.debounce)
198
- self._threshold = int(watch_cfg.branch_switch_threshold)
199
190
  self._extra_exclude = list(watch_cfg.exclude_patterns or [])
200
191
 
201
192
  self._observer: Observer | PollingObserver | None = None
@@ -219,7 +210,6 @@ class FileWatcher:
219
210
  gitignore_spec=gitignore_spec,
220
211
  supported_extensions=extensions,
221
212
  debounce_seconds=self._debounce,
222
- branch_switch_threshold=self._threshold,
223
213
  extra_exclude=self._extra_exclude,
224
214
  on_batch=batch_fn,
225
215
  )
@@ -265,30 +255,21 @@ class FileWatcher:
265
255
  self._observer.join(timeout=timeout)
266
256
 
267
257
  def _default_batch(self, changed: set[str], removed: set[str]) -> None:
268
- """Default callback: acquire lock and run Indexer.update_paths."""
258
+ """Default callback: acquire lock and run Indexer.update_incremental."""
269
259
  from coderay.core.lock import acquire_indexer_lock
270
260
  from coderay.pipeline.indexer import Indexer
271
261
 
272
- total = len(changed) + len(removed)
273
- t0 = time.time()
274
-
275
262
  try:
276
- with acquire_indexer_lock(self._index_dir, timeout=30):
277
- indexer = Indexer(self._repo_root)
278
- if total >= self._threshold:
263
+ with timed_phase("update", log=False) as tp:
264
+ with acquire_indexer_lock(self._index_dir, timeout=30):
265
+ indexer = Indexer(self._repo_root)
279
266
  result = indexer.update_incremental()
280
- else:
281
- result = indexer.update_paths(
282
- changed=sorted(changed),
283
- removed=sorted(removed),
284
- )
285
- elapsed = time.time() - t0
286
267
  self._update_count += 1
287
268
  logger.info(
288
269
  "Update #%d: %s (%.2fs) [%d changed, %d removed]",
289
270
  self._update_count,
290
271
  result,
291
- elapsed,
272
+ tp.elapsed,
292
273
  len(changed),
293
274
  len(removed),
294
275
  )
@@ -1,11 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import time
5
4
  from pathlib import Path
6
5
  from typing import Any
7
6
 
8
7
  from coderay.core.config import Config, get_config
8
+ from coderay.core.timing import timed_phase
9
9
  from coderay.embedding.base import Embedder, load_embedder_from_config
10
10
  from coderay.graph.builder import load_graph
11
11
  from coderay.retrieval.boosting import StructuralBooster
@@ -65,20 +65,18 @@ class Retrieval:
65
65
 
66
66
  store = self._get_store()
67
67
 
68
- t0 = time.perf_counter()
69
- query_vectors = self._embedder.embed([query])
70
- logger.info("Query embed took %.3fs", time.perf_counter() - t0)
68
+ with timed_phase("embed"):
69
+ query_vectors = self._embedder.embed([query])
71
70
 
72
71
  if not query_vectors:
73
72
  return []
74
- t1 = time.perf_counter()
75
- results = store.search(
76
- query_embedding=query_vectors[0],
77
- top_k=top_k,
78
- path_prefix=path_prefix,
79
- query_text=query,
80
- )
81
- logger.info("Vector search took %.3fs", time.perf_counter() - t1)
73
+ with timed_phase("vector_search"):
74
+ results = store.search(
75
+ query_embedding=query_vectors[0],
76
+ top_k=top_k,
77
+ path_prefix=path_prefix,
78
+ query_text=query,
79
+ )
82
80
 
83
81
  return self._booster.boost(results)
84
82
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderay
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
5
5
  Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
6
6
  License-Expression: MIT
@@ -64,6 +64,15 @@ AI coding assistants and a standalone CLI.
64
64
 
65
65
  ## Install
66
66
 
67
+ Create a virtual environment (recommended):
68
+
69
+ ```bash
70
+ python -m venv .venv
71
+ source .venv/bin/activate
72
+ ```
73
+
74
+ Then install:
75
+
67
76
  ```bash
68
77
  pip install coderay
69
78
  ```
@@ -79,6 +88,7 @@ For development:
79
88
  ```bash
80
89
  git clone https://github.com/bogdan-copocean/coderay.git
81
90
  cd coderay
91
+ python -m venv .venv && source .venv/bin/activate
82
92
  pip install -e ".[all]"
83
93
  ```
84
94
 
@@ -86,15 +96,22 @@ pip install -e ".[all]"
86
96
 
87
97
  ```bash
88
98
  cd /path/to/your/project
89
- coderay build --repo .
99
+ coderay watch --repo . # keeps index fresh while you work (recommended)
90
100
  coderay search "how does authentication work"
91
- coderay watch --repo .
92
101
  coderay graph --kind calls
93
102
  coderay skeleton src/app/main.py
94
103
  ```
95
104
 
105
+ > **Use `watch`, not `build`.** `coderay build` is a one-off; while you work, the index will get stale. `coderay watch` re-indexes on file changes and is the go-to for active development.
106
+
96
107
  ## MCP server (Claude Code / Cursor)
97
108
 
109
+ Find the MCP executable path:
110
+
111
+ ```bash
112
+ which coderay-mcp
113
+ ```
114
+
98
115
  Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
99
116
 
100
117
  ```json
@@ -108,13 +125,14 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
108
125
  }
109
126
  ```
110
127
 
128
+ Replace `/path/to/your/.venv/bin/coderay-mcp` with the output of `which coderay-mcp`.
129
+
111
130
  ## CLI reference
112
131
 
113
132
  | Command | Description |
114
133
  |---|---|
115
- | `coderay build [--full] --repo .` | Build index (incremental or full rebuild) |
116
- | `coderay update --repo .` | Incremental update (changed files only) |
117
- | `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
134
+ | `coderay watch --repo . [--debounce N]` | **Recommended.** Watch for file changes, re-index automatically |
135
+ | `coderay build [--full] --repo .` | Build or incremental update. Use `--full` for full rebuild |
118
136
  | `coderay search "query" [--top-k N]` | Semantic search |
119
137
  | `coderay list [--by-file]` | List indexed chunks |
120
138
  | `coderay status` | Index state, branch, commit, chunk count |
@@ -124,39 +142,37 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
124
142
 
125
143
  ## Configuration
126
144
 
127
- Optional `config.yaml` in the index directory:
145
+ File discovery and ignoring are based on `.git` and `.gitignore`. The `.git` directory is excluded; files matching `.gitignore` are not indexed. Config `exclude_patterns` add extra exclusions on top of that.
146
+
147
+ Optional `config.yaml` in the index directory (default: `.index/config.yaml`):
128
148
 
129
149
  ```yaml
130
150
  embedder:
131
- provider: local
132
- model: all-MiniLM-L6-v2
151
+ model: sentence-transformers/all-MiniLM-L6-v2
133
152
  dimensions: 384
134
153
 
135
- search:
136
- boost_rules:
137
- "tests/": 0.5
138
- "src/core/": 1.2
139
-
140
- graph:
141
- exclude_callees:
142
- - "our_sdk_helper"
143
- include_callees:
144
- - "isinstance"
145
-
146
- watch:
147
- debounce_seconds: 2
148
- branch_switch_threshold: 50
149
- exclude_patterns:
154
+ index:
155
+ exclude_patterns: # besides .gitignore
150
156
  - "*.log"
151
- ```
152
157
 
153
- ## Development
158
+ semantic_search:
159
+ boosting:
160
+ penalties:
161
+ - pattern: "(^|/)tests?/"
162
+ factor: 0.5
163
+ - pattern: "(^|/)test_[^/]+\\.py$"
164
+ factor: 0.5
165
+ bonuses:
166
+ - pattern: "(^|/)src/"
167
+ factor: 1.1
168
+ metric: cosine
169
+
170
+ watcher:
171
+ debounce: 2
172
+ exclude_patterns: # besides .gitignore
173
+ - "*.log"
154
174
 
155
- ```bash
156
- pip install -e ".[dev]"
157
- make test
158
- make lint
159
- make format
175
+ graph:
176
+ exclude_modules: [] # module names to exclude from CALLS/IMPORTS edges
177
+ include_modules: [] # force-include (override excludes)
160
178
  ```
161
-
162
- Requires Python >= 3.10 and Git.
File without changes
File without changes
File without changes
File without changes