coderay 1.0.4__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderay-1.0.4/src/coderay.egg-info → coderay-1.0.6}/PKG-INFO +49 -33
- {coderay-1.0.4 → coderay-1.0.6}/README.md +48 -32
- {coderay-1.0.4 → coderay-1.0.6}/pyproject.toml +1 -1
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/cli/commands.py +38 -75
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/config.py +15 -1
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/timing.py +12 -6
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/builder.py +4 -1
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/code_graph.py +62 -2
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/extractor.py +7 -9
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/pipeline/indexer.py +7 -67
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/pipeline/watcher.py +6 -25
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/search.py +10 -12
- {coderay-1.0.4 → coderay-1.0.6/src/coderay.egg-info}/PKG-INFO +49 -33
- {coderay-1.0.4 → coderay-1.0.6}/LICENSE +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/setup.cfg +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/chunking/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/chunking/chunker.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/cli/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/lock.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/models.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/core/utils.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/embedding/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/embedding/base.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/embedding/local.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/graph/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/mcp_server/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/mcp_server/errors.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/mcp_server/server.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/parsing/base.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/parsing/languages.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/pipeline/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/boosting.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/retrieval/models.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/skeleton/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/skeleton/extractor.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/state/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/state/machine.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/state/version.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/storage/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/storage/lancedb.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/vcs/__init__.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay/vcs/git.py +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/SOURCES.txt +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/dependency_links.txt +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/entry_points.txt +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/requires.txt +0 -0
- {coderay-1.0.4 → coderay-1.0.6}/src/coderay.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderay
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
|
|
5
5
|
Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -64,6 +64,15 @@ AI coding assistants and a standalone CLI.
|
|
|
64
64
|
|
|
65
65
|
## Install
|
|
66
66
|
|
|
67
|
+
Create a virtual environment (recommended):
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
python -m venv .venv
|
|
71
|
+
source .venv/bin/activate
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Then install:
|
|
75
|
+
|
|
67
76
|
```bash
|
|
68
77
|
pip install coderay
|
|
69
78
|
```
|
|
@@ -79,6 +88,7 @@ For development:
|
|
|
79
88
|
```bash
|
|
80
89
|
git clone https://github.com/bogdan-copocean/coderay.git
|
|
81
90
|
cd coderay
|
|
91
|
+
python -m venv .venv && source .venv/bin/activate
|
|
82
92
|
pip install -e ".[all]"
|
|
83
93
|
```
|
|
84
94
|
|
|
@@ -86,15 +96,22 @@ pip install -e ".[all]"
|
|
|
86
96
|
|
|
87
97
|
```bash
|
|
88
98
|
cd /path/to/your/project
|
|
89
|
-
coderay
|
|
99
|
+
coderay watch --repo . # keeps index fresh while you work (recommended)
|
|
90
100
|
coderay search "how does authentication work"
|
|
91
|
-
coderay watch --repo .
|
|
92
101
|
coderay graph --kind calls
|
|
93
102
|
coderay skeleton src/app/main.py
|
|
94
103
|
```
|
|
95
104
|
|
|
105
|
+
> **Use `watch`, not `build`.** `coderay build` is a one-off; while you work, the index will get stale. `coderay watch` re-indexes on file changes and is the go-to for active development.
|
|
106
|
+
|
|
96
107
|
## MCP server (Claude Code / Cursor)
|
|
97
108
|
|
|
109
|
+
Find the MCP executable path:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
which coderay-mcp
|
|
113
|
+
```
|
|
114
|
+
|
|
98
115
|
Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
99
116
|
|
|
100
117
|
```json
|
|
@@ -108,13 +125,14 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
|
108
125
|
}
|
|
109
126
|
```
|
|
110
127
|
|
|
128
|
+
Replace `/path/to/your/.venv/bin/coderay-mcp` with the output of `which coderay-mcp`.
|
|
129
|
+
|
|
111
130
|
## CLI reference
|
|
112
131
|
|
|
113
132
|
| Command | Description |
|
|
114
133
|
|---|---|
|
|
115
|
-
| `coderay
|
|
116
|
-
| `coderay
|
|
117
|
-
| `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
|
|
134
|
+
| `coderay watch --repo . [--debounce N]` | **Recommended.** Watch for file changes, re-index automatically |
|
|
135
|
+
| `coderay build [--full] --repo .` | Build or incremental update. Use `--full` for full rebuild |
|
|
118
136
|
| `coderay search "query" [--top-k N]` | Semantic search |
|
|
119
137
|
| `coderay list [--by-file]` | List indexed chunks |
|
|
120
138
|
| `coderay status` | Index state, branch, commit, chunk count |
|
|
@@ -124,39 +142,37 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
|
124
142
|
|
|
125
143
|
## Configuration
|
|
126
144
|
|
|
127
|
-
|
|
145
|
+
File discovery and ignoring are based on `.git` and `.gitignore`. The `.git` directory is excluded; files matching `.gitignore` are not indexed. Config `exclude_patterns` add extra exclusions on top of that.
|
|
146
|
+
|
|
147
|
+
Optional `config.yaml` in the index directory (default: `.index/config.yaml`):
|
|
128
148
|
|
|
129
149
|
```yaml
|
|
130
150
|
embedder:
|
|
131
|
-
|
|
132
|
-
model: all-MiniLM-L6-v2
|
|
151
|
+
model: sentence-transformers/all-MiniLM-L6-v2
|
|
133
152
|
dimensions: 384
|
|
134
153
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
"tests/": 0.5
|
|
138
|
-
"src/core/": 1.2
|
|
139
|
-
|
|
140
|
-
graph:
|
|
141
|
-
exclude_callees:
|
|
142
|
-
- "our_sdk_helper"
|
|
143
|
-
include_callees:
|
|
144
|
-
- "isinstance"
|
|
145
|
-
|
|
146
|
-
watch:
|
|
147
|
-
debounce_seconds: 2
|
|
148
|
-
branch_switch_threshold: 50
|
|
149
|
-
exclude_patterns:
|
|
154
|
+
index:
|
|
155
|
+
exclude_patterns: # besides .gitignore
|
|
150
156
|
- "*.log"
|
|
151
|
-
```
|
|
152
157
|
|
|
153
|
-
|
|
158
|
+
semantic_search:
|
|
159
|
+
boosting:
|
|
160
|
+
penalties:
|
|
161
|
+
- pattern: "(^|/)tests?/"
|
|
162
|
+
factor: 0.5
|
|
163
|
+
- pattern: "(^|/)test_[^/]+\\.py$"
|
|
164
|
+
factor: 0.5
|
|
165
|
+
bonuses:
|
|
166
|
+
- pattern: "(^|/)src/"
|
|
167
|
+
factor: 1.1
|
|
168
|
+
metric: cosine
|
|
169
|
+
|
|
170
|
+
watcher:
|
|
171
|
+
debounce: 2
|
|
172
|
+
exclude_patterns: # besides .gitignore
|
|
173
|
+
- "*.log"
|
|
154
174
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
make lint
|
|
159
|
-
make format
|
|
175
|
+
graph:
|
|
176
|
+
exclude_modules: [] # module names to exclude from CALLS/IMPORTS edges
|
|
177
|
+
include_modules: [] # force-include (override excludes)
|
|
160
178
|
```
|
|
161
|
-
|
|
162
|
-
Requires Python >= 3.10 and Git.
|
|
@@ -15,6 +15,15 @@ AI coding assistants and a standalone CLI.
|
|
|
15
15
|
|
|
16
16
|
## Install
|
|
17
17
|
|
|
18
|
+
Create a virtual environment (recommended):
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
python -m venv .venv
|
|
22
|
+
source .venv/bin/activate
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Then install:
|
|
26
|
+
|
|
18
27
|
```bash
|
|
19
28
|
pip install coderay
|
|
20
29
|
```
|
|
@@ -30,6 +39,7 @@ For development:
|
|
|
30
39
|
```bash
|
|
31
40
|
git clone https://github.com/bogdan-copocean/coderay.git
|
|
32
41
|
cd coderay
|
|
42
|
+
python -m venv .venv && source .venv/bin/activate
|
|
33
43
|
pip install -e ".[all]"
|
|
34
44
|
```
|
|
35
45
|
|
|
@@ -37,15 +47,22 @@ pip install -e ".[all]"
|
|
|
37
47
|
|
|
38
48
|
```bash
|
|
39
49
|
cd /path/to/your/project
|
|
40
|
-
coderay
|
|
50
|
+
coderay watch --repo . # keeps index fresh while you work (recommended)
|
|
41
51
|
coderay search "how does authentication work"
|
|
42
|
-
coderay watch --repo .
|
|
43
52
|
coderay graph --kind calls
|
|
44
53
|
coderay skeleton src/app/main.py
|
|
45
54
|
```
|
|
46
55
|
|
|
56
|
+
> **Use `watch`, not `build`.** `coderay build` is a one-off; while you work, the index will get stale. `coderay watch` re-indexes on file changes and is the go-to for active development.
|
|
57
|
+
|
|
47
58
|
## MCP server (Claude Code / Cursor)
|
|
48
59
|
|
|
60
|
+
Find the MCP executable path:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
which coderay-mcp
|
|
64
|
+
```
|
|
65
|
+
|
|
49
66
|
Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
50
67
|
|
|
51
68
|
```json
|
|
@@ -59,13 +76,14 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
|
59
76
|
}
|
|
60
77
|
```
|
|
61
78
|
|
|
79
|
+
Replace `/path/to/your/.venv/bin/coderay-mcp` with the output of `which coderay-mcp`.
|
|
80
|
+
|
|
62
81
|
## CLI reference
|
|
63
82
|
|
|
64
83
|
| Command | Description |
|
|
65
84
|
|---|---|
|
|
66
|
-
| `coderay
|
|
67
|
-
| `coderay
|
|
68
|
-
| `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
|
|
85
|
+
| `coderay watch --repo . [--debounce N]` | **Recommended.** Watch for file changes, re-index automatically |
|
|
86
|
+
| `coderay build [--full] --repo .` | Build or incremental update. Use `--full` for full rebuild |
|
|
69
87
|
| `coderay search "query" [--top-k N]` | Semantic search |
|
|
70
88
|
| `coderay list [--by-file]` | List indexed chunks |
|
|
71
89
|
| `coderay status` | Index state, branch, commit, chunk count |
|
|
@@ -75,39 +93,37 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
|
75
93
|
|
|
76
94
|
## Configuration
|
|
77
95
|
|
|
78
|
-
|
|
96
|
+
File discovery and ignoring are based on `.git` and `.gitignore`. The `.git` directory is excluded; files matching `.gitignore` are not indexed. Config `exclude_patterns` add extra exclusions on top of that.
|
|
97
|
+
|
|
98
|
+
Optional `config.yaml` in the index directory (default: `.index/config.yaml`):
|
|
79
99
|
|
|
80
100
|
```yaml
|
|
81
101
|
embedder:
|
|
82
|
-
|
|
83
|
-
model: all-MiniLM-L6-v2
|
|
102
|
+
model: sentence-transformers/all-MiniLM-L6-v2
|
|
84
103
|
dimensions: 384
|
|
85
104
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
"tests/": 0.5
|
|
89
|
-
"src/core/": 1.2
|
|
90
|
-
|
|
91
|
-
graph:
|
|
92
|
-
exclude_callees:
|
|
93
|
-
- "our_sdk_helper"
|
|
94
|
-
include_callees:
|
|
95
|
-
- "isinstance"
|
|
96
|
-
|
|
97
|
-
watch:
|
|
98
|
-
debounce_seconds: 2
|
|
99
|
-
branch_switch_threshold: 50
|
|
100
|
-
exclude_patterns:
|
|
105
|
+
index:
|
|
106
|
+
exclude_patterns: # besides .gitignore
|
|
101
107
|
- "*.log"
|
|
102
|
-
```
|
|
103
108
|
|
|
104
|
-
|
|
109
|
+
semantic_search:
|
|
110
|
+
boosting:
|
|
111
|
+
penalties:
|
|
112
|
+
- pattern: "(^|/)tests?/"
|
|
113
|
+
factor: 0.5
|
|
114
|
+
- pattern: "(^|/)test_[^/]+\\.py$"
|
|
115
|
+
factor: 0.5
|
|
116
|
+
bonuses:
|
|
117
|
+
- pattern: "(^|/)src/"
|
|
118
|
+
factor: 1.1
|
|
119
|
+
metric: cosine
|
|
120
|
+
|
|
121
|
+
watcher:
|
|
122
|
+
debounce: 2
|
|
123
|
+
exclude_patterns: # besides .gitignore
|
|
124
|
+
- "*.log"
|
|
105
125
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
make lint
|
|
110
|
-
make format
|
|
126
|
+
graph:
|
|
127
|
+
exclude_modules: [] # module names to exclude from CALLS/IMPORTS edges
|
|
128
|
+
include_modules: [] # force-include (override excludes)
|
|
111
129
|
```
|
|
112
|
-
|
|
113
|
-
Requires Python >= 3.10 and Git.
|
|
@@ -9,6 +9,7 @@ import click
|
|
|
9
9
|
from dotenv import load_dotenv
|
|
10
10
|
|
|
11
11
|
from coderay.core.lock import acquire_indexer_lock
|
|
12
|
+
from coderay.core.timing import timed_phase
|
|
12
13
|
from coderay.pipeline.indexer import Indexer
|
|
13
14
|
from coderay.retrieval.search import Retrieval
|
|
14
15
|
from coderay.state.machine import StateMachine
|
|
@@ -45,6 +46,7 @@ def _setup_logging(verbose: bool = False) -> None:
|
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
@click.group()
|
|
49
|
+
@click.version_option(package_name="coderay", prog_name="coderay")
|
|
48
50
|
@click.option("-v", "--verbose", is_flag=True, default=False, help="Verbose logging")
|
|
49
51
|
@click.pass_context
|
|
50
52
|
def cli(ctx: click.Context, verbose: bool) -> None:
|
|
@@ -74,61 +76,20 @@ def build(ctx: click.Context, full: bool, repo: Path) -> None:
|
|
|
74
76
|
index_dir = Path(config.index.path)
|
|
75
77
|
index_dir.mkdir(parents=True, exist_ok=True)
|
|
76
78
|
indexer = Indexer(repo)
|
|
77
|
-
t0 = time.time()
|
|
78
79
|
try:
|
|
79
|
-
with
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
click.echo(_color("Updating index (incremental)...", CYAN))
|
|
91
|
-
result = indexer.update_incremental()
|
|
92
|
-
click.echo(
|
|
93
|
-
_color(
|
|
94
|
-
f"{result} in {time.time() - t0:.2f}s",
|
|
95
|
-
GREEN,
|
|
96
|
-
)
|
|
97
|
-
)
|
|
98
|
-
indexer.maintain()
|
|
99
|
-
except Exception as e:
|
|
100
|
-
indexer.error(str(e))
|
|
101
|
-
click.echo(_color(f"Error: {e}", RED))
|
|
102
|
-
raise
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
@cli.command()
|
|
106
|
-
@click.option(
|
|
107
|
-
"--repo",
|
|
108
|
-
default=".",
|
|
109
|
-
type=click.Path(exists=True, path_type=Path),
|
|
110
|
-
help="Repo root",
|
|
111
|
-
)
|
|
112
|
-
@click.pass_context
|
|
113
|
-
def update(ctx: click.Context, repo: Path) -> None:
|
|
114
|
-
"""Incremental update (only changed files). Uses file lock."""
|
|
115
|
-
from coderay.core.config import get_config
|
|
116
|
-
|
|
117
|
-
config = get_config()
|
|
118
|
-
index_dir = Path(config.index.path)
|
|
119
|
-
indexer = Indexer(repo)
|
|
120
|
-
t0 = time.time()
|
|
121
|
-
|
|
122
|
-
if not indexer.index_exists():
|
|
123
|
-
click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
|
|
124
|
-
ctx.exit(1)
|
|
125
|
-
|
|
126
|
-
try:
|
|
127
|
-
with acquire_indexer_lock(index_dir):
|
|
128
|
-
click.echo(_color("Updating index...", CYAN))
|
|
129
|
-
result = indexer.update_incremental()
|
|
130
|
-
click.echo(_color(f"{result} in {time.time() - t0:.2f}s", GREEN))
|
|
80
|
+
with timed_phase("build", log=False) as tp:
|
|
81
|
+
with acquire_indexer_lock(index_dir):
|
|
82
|
+
if full:
|
|
83
|
+
click.echo(_color("Building full index...", CYAN))
|
|
84
|
+
result = indexer.build_full()
|
|
85
|
+
else:
|
|
86
|
+
if not indexer.index_exists():
|
|
87
|
+
click.echo(_color("Building full index...", CYAN))
|
|
88
|
+
else:
|
|
89
|
+
click.echo(_color("Updating index (incremental)...", CYAN))
|
|
90
|
+
result = indexer.ensure_index()
|
|
131
91
|
indexer.maintain()
|
|
92
|
+
click.echo(_color(f"{result} in {tp.elapsed:.2f}s", GREEN))
|
|
132
93
|
except Exception as e:
|
|
133
94
|
indexer.error(str(e))
|
|
134
95
|
click.echo(_color(f"Error: {e}", RED))
|
|
@@ -163,16 +124,15 @@ def search_cmd(
|
|
|
163
124
|
|
|
164
125
|
retrieval = Retrieval()
|
|
165
126
|
click.echo(_color(f"Searching: {query_text!r}", CYAN))
|
|
166
|
-
t0 = time.perf_counter()
|
|
167
127
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
click.echo(_color(f"Query took {elapsed:.2f}s", BOLD))
|
|
128
|
+
with timed_phase("search", log=False) as tp:
|
|
129
|
+
results = retrieval.search(
|
|
130
|
+
query=query_text,
|
|
131
|
+
current_state=current_state,
|
|
132
|
+
top_k=top_k,
|
|
133
|
+
path_prefix=path_prefix,
|
|
134
|
+
)
|
|
135
|
+
click.echo(_color(f"Query took {tp.elapsed:.2f}s", BOLD))
|
|
176
136
|
|
|
177
137
|
if not results:
|
|
178
138
|
click.echo(_color("No results.", YELLOW))
|
|
@@ -367,7 +327,7 @@ def graph_cmd(
|
|
|
367
327
|
if not edges:
|
|
368
328
|
click.echo(
|
|
369
329
|
_color(
|
|
370
|
-
"No graph data. Run 'coderay build'
|
|
330
|
+
"No graph data. Run 'coderay build' to build it.",
|
|
371
331
|
YELLOW,
|
|
372
332
|
)
|
|
373
333
|
)
|
|
@@ -412,20 +372,26 @@ def watch(
|
|
|
412
372
|
|
|
413
373
|
config = get_config()
|
|
414
374
|
index_dir = Path(config.index.path)
|
|
415
|
-
|
|
416
|
-
click.echo(
|
|
417
|
-
_color(
|
|
418
|
-
"No index found. Run 'coderay build' first.",
|
|
419
|
-
YELLOW,
|
|
420
|
-
)
|
|
421
|
-
)
|
|
422
|
-
ctx.exit(1)
|
|
375
|
+
index_dir.mkdir(parents=True, exist_ok=True)
|
|
423
376
|
|
|
424
377
|
if quiet:
|
|
425
378
|
logging.getLogger("coderay.pipeline.watcher").setLevel(logging.WARNING)
|
|
426
379
|
|
|
427
|
-
|
|
380
|
+
indexer = Indexer(repo)
|
|
381
|
+
try:
|
|
382
|
+
with timed_phase("watch_startup", log=False) as tp:
|
|
383
|
+
with acquire_indexer_lock(index_dir):
|
|
384
|
+
if not indexer.index_exists():
|
|
385
|
+
click.echo(_color("No index found. Building full index...", CYAN))
|
|
386
|
+
result = indexer.ensure_index()
|
|
387
|
+
indexer.maintain()
|
|
388
|
+
click.echo(_color(f"{result} in {tp.elapsed:.2f}s", GREEN))
|
|
389
|
+
except Exception as e:
|
|
390
|
+
indexer.error(str(e))
|
|
391
|
+
click.echo(_color(f"Error: {e}", RED))
|
|
392
|
+
raise
|
|
428
393
|
|
|
394
|
+
watcher = FileWatcher(repo, index_dir)
|
|
429
395
|
click.echo(
|
|
430
396
|
_color(
|
|
431
397
|
f"Watching {repo.resolve()} "
|
|
@@ -433,9 +399,6 @@ def watch(
|
|
|
433
399
|
CYAN,
|
|
434
400
|
)
|
|
435
401
|
)
|
|
436
|
-
index_dir.mkdir(parents=True, exist_ok=True)
|
|
437
|
-
indexer = Indexer(repo)
|
|
438
|
-
indexer.update_incremental()
|
|
439
402
|
|
|
440
403
|
watcher.start()
|
|
441
404
|
try:
|
|
@@ -85,7 +85,18 @@ class SemanticSearchConfig:
|
|
|
85
85
|
class WatcherConfig:
|
|
86
86
|
debounce: Annotated[int, "in seconds"] = 2
|
|
87
87
|
exclude_patterns: Annotated[str, "besides .gitignore"] | None = None
|
|
88
|
-
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass(frozen=True)
|
|
91
|
+
class GraphConfig:
|
|
92
|
+
"""Module filtering for the code graph (CALLS, IMPORTS edges)."""
|
|
93
|
+
|
|
94
|
+
exclude_modules: Annotated[
|
|
95
|
+
list[str], "module names/prefixes to exclude from graph edges"
|
|
96
|
+
] = field(default_factory=list)
|
|
97
|
+
include_modules: Annotated[
|
|
98
|
+
list[str], "module names to force-include (override excludes)"
|
|
99
|
+
] = field(default_factory=list)
|
|
89
100
|
|
|
90
101
|
|
|
91
102
|
@dataclass(frozen=True)
|
|
@@ -94,6 +105,7 @@ class Config:
|
|
|
94
105
|
index: IndexConfig = field(default_factory=IndexConfig)
|
|
95
106
|
semantic_search: SemanticSearchConfig = field(default_factory=SemanticSearchConfig)
|
|
96
107
|
watcher: WatcherConfig = field(default_factory=WatcherConfig)
|
|
108
|
+
graph: GraphConfig = field(default_factory=GraphConfig)
|
|
97
109
|
|
|
98
110
|
|
|
99
111
|
def _parse_boosting(data: dict[str, Any]) -> BoostingConfig:
|
|
@@ -204,6 +216,7 @@ def _load_config_impl() -> Config:
|
|
|
204
216
|
default_data.get("semantic_search", {}) or {}
|
|
205
217
|
),
|
|
206
218
|
watcher=WatcherConfig(**default_data.get("watcher", {})),
|
|
219
|
+
graph=GraphConfig(**default_data.get("graph", {})),
|
|
207
220
|
)
|
|
208
221
|
|
|
209
222
|
|
|
@@ -288,4 +301,5 @@ def _deep_merge(overrides: dict, *, index_dir: Path) -> Config:
|
|
|
288
301
|
index=IndexConfig(**merged.get("index", {})),
|
|
289
302
|
semantic_search=_parse_semantic_search(merged.get("semantic_search", {}) or {}),
|
|
290
303
|
watcher=WatcherConfig(**merged.get("watcher", {})),
|
|
304
|
+
graph=GraphConfig(**merged.get("graph", {})),
|
|
291
305
|
)
|
|
@@ -29,17 +29,23 @@ def timed(phase: str) -> Callable[[F], F]:
|
|
|
29
29
|
return decorator
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
class
|
|
33
|
-
"""Context manager that
|
|
32
|
+
class TimedPhase:
|
|
33
|
+
"""Context manager that measures block execution time and optionally logs it."""
|
|
34
34
|
|
|
35
|
-
def __init__(self, phase: str) -> None:
|
|
35
|
+
def __init__(self, phase: str, *, log: bool = True) -> None:
|
|
36
36
|
self.phase = phase
|
|
37
|
+
self.log = log
|
|
37
38
|
self.t0: float = 0.0
|
|
39
|
+
self.elapsed: float = 0.0
|
|
38
40
|
|
|
39
|
-
def __enter__(self) ->
|
|
41
|
+
def __enter__(self) -> TimedPhase:
|
|
40
42
|
self.t0 = time.perf_counter()
|
|
41
43
|
return self
|
|
42
44
|
|
|
43
45
|
def __exit__(self, *args: object) -> None:
|
|
44
|
-
elapsed = time.perf_counter() - self.t0
|
|
45
|
-
|
|
46
|
+
self.elapsed = time.perf_counter() - self.t0
|
|
47
|
+
if self.log:
|
|
48
|
+
logger.info("%s: %.3fs", self.phase, self.elapsed)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
timed_phase = TimedPhase # Convenience alias for context manager usage
|
|
@@ -33,11 +33,13 @@ def build_graph(
|
|
|
33
33
|
except Exception as exc:
|
|
34
34
|
logger.warning("Graph extraction failed for %s: %s", file_path, exc)
|
|
35
35
|
resolved = graph.resolve_edges()
|
|
36
|
+
pruned = graph.prune_phantom_edges()
|
|
36
37
|
logger.info(
|
|
37
|
-
"Graph built: %d nodes, %d edges (%d
|
|
38
|
+
"Graph built: %d nodes, %d edges (%d resolved, %d phantoms pruned)",
|
|
38
39
|
graph.node_count,
|
|
39
40
|
graph.edge_count,
|
|
40
41
|
resolved,
|
|
42
|
+
pruned,
|
|
41
43
|
)
|
|
42
44
|
return graph
|
|
43
45
|
|
|
@@ -140,6 +142,7 @@ def build_and_save_graph(
|
|
|
140
142
|
except Exception as exc:
|
|
141
143
|
logger.warning("Graph extraction failed for %s: %s", fp, exc)
|
|
142
144
|
existing_graph.resolve_edges()
|
|
145
|
+
existing_graph.prune_phantom_edges()
|
|
143
146
|
graph = existing_graph
|
|
144
147
|
logger.info(
|
|
145
148
|
"Graph incremental update: re-parsed %d files",
|
|
@@ -74,6 +74,10 @@ class CodeGraph:
|
|
|
74
74
|
# (e.g. two files both define a function called "helper").
|
|
75
75
|
self._symbol_index: dict[str, set[str]] = defaultdict(set)
|
|
76
76
|
|
|
77
|
+
# qualified_name -> {full node IDs} — enables resolving dotted names
|
|
78
|
+
# like "ClassName.method" to "src/a.py::ClassName.method".
|
|
79
|
+
self._qualified_index: dict[str, set[str]] = defaultdict(set)
|
|
80
|
+
|
|
77
81
|
# dotted module name -> node ID — maps Python-style import paths
|
|
78
82
|
# (e.g. "core.models") to the MODULE node they refer to.
|
|
79
83
|
self._module_index: dict[str, str] = {}
|
|
@@ -85,6 +89,8 @@ class CodeGraph:
|
|
|
85
89
|
def _index_node(self, node: GraphNode) -> None:
|
|
86
90
|
"""Register a node in all secondary indexes."""
|
|
87
91
|
self._symbol_index[node.name].add(node.id)
|
|
92
|
+
if node.qualified_name != node.name:
|
|
93
|
+
self._qualified_index[node.qualified_name].add(node.id)
|
|
88
94
|
self._file_index[node.file_path].add(node.id)
|
|
89
95
|
if node.kind == NodeKind.MODULE:
|
|
90
96
|
# Register all suffix variants so that "import models" and
|
|
@@ -98,6 +104,10 @@ class CodeGraph:
|
|
|
98
104
|
sym_entries = self._symbol_index.get(node.name)
|
|
99
105
|
if sym_entries is not None:
|
|
100
106
|
sym_entries.discard(node.id)
|
|
107
|
+
if node.qualified_name != node.name:
|
|
108
|
+
qual_entries = self._qualified_index.get(node.qualified_name)
|
|
109
|
+
if qual_entries is not None:
|
|
110
|
+
qual_entries.discard(node.id)
|
|
101
111
|
file_entries = self._file_index.get(node.file_path)
|
|
102
112
|
if file_entries is not None:
|
|
103
113
|
file_entries.discard(node.id)
|
|
@@ -140,17 +150,27 @@ class CodeGraph:
|
|
|
140
150
|
return len(to_remove)
|
|
141
151
|
|
|
142
152
|
def resolve_symbol(self, name: str, caller_file: str | None = None) -> str | None:
|
|
143
|
-
"""Resolve a short
|
|
153
|
+
"""Resolve a short, qualified, or dotted name to a fully-qualified node ID.
|
|
154
|
+
|
|
155
|
+
Lookup order:
|
|
156
|
+
1. Exact node ID match (fast path).
|
|
157
|
+
2. Bare name via ``_symbol_index`` (unique match only).
|
|
158
|
+
3. Qualified name via ``_qualified_index`` (e.g. "ClassName.method").
|
|
144
159
|
|
|
145
160
|
Returns:
|
|
146
161
|
Full node ID, or None if the name cannot be uniquely resolved.
|
|
147
162
|
"""
|
|
148
|
-
# Already a full node ID (e.g. "src/a.py::foo") — fast path
|
|
149
163
|
if name in self._g and self._g.nodes[name].get("data") is not None:
|
|
150
164
|
return name
|
|
165
|
+
|
|
151
166
|
candidates = self._symbol_index.get(name, set())
|
|
152
167
|
if len(candidates) == 1:
|
|
153
168
|
return next(iter(candidates))
|
|
169
|
+
|
|
170
|
+
qual_candidates = self._qualified_index.get(name, set())
|
|
171
|
+
if len(qual_candidates) == 1:
|
|
172
|
+
return next(iter(qual_candidates))
|
|
173
|
+
|
|
154
174
|
return None
|
|
155
175
|
|
|
156
176
|
def resolve_edges(self) -> int:
|
|
@@ -194,6 +214,46 @@ class CodeGraph:
|
|
|
194
214
|
logger.info("Resolved %d edges via symbol/module index", len(to_add))
|
|
195
215
|
return len(to_add)
|
|
196
216
|
|
|
217
|
+
def prune_phantom_edges(self) -> int:
|
|
218
|
+
"""Remove CALLS edges whose target is a phantom with no resolution candidates.
|
|
219
|
+
|
|
220
|
+
These are typically stdlib/third-party methods (``append``, ``get``,
|
|
221
|
+
``join``, etc.) that will never resolve to a project node. Removing
|
|
222
|
+
them reduces noise and improves ``get_impact_radius`` traversal.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Number of edges pruned.
|
|
226
|
+
"""
|
|
227
|
+
to_remove: list[tuple[str, str]] = []
|
|
228
|
+
for u, v, data in self._g.edges(data=True):
|
|
229
|
+
if data.get("kind") != EdgeKind.CALLS:
|
|
230
|
+
continue
|
|
231
|
+
node_data = self._g.nodes.get(v, {})
|
|
232
|
+
if node_data and node_data.get("data") is not None:
|
|
233
|
+
continue
|
|
234
|
+
if (
|
|
235
|
+
not self._symbol_index.get(v)
|
|
236
|
+
and not self._qualified_index.get(v)
|
|
237
|
+
):
|
|
238
|
+
to_remove.append((u, v))
|
|
239
|
+
|
|
240
|
+
for u, v in to_remove:
|
|
241
|
+
if self._g.has_edge(u, v):
|
|
242
|
+
self._g.remove_edge(u, v)
|
|
243
|
+
|
|
244
|
+
# Clean up orphan phantom nodes (no remaining edges)
|
|
245
|
+
phantom_nodes = [
|
|
246
|
+
n for n in list(self._g.nodes)
|
|
247
|
+
if self._g.nodes[n].get("data") is None
|
|
248
|
+
and self._g.degree(n) == 0
|
|
249
|
+
]
|
|
250
|
+
for n in phantom_nodes:
|
|
251
|
+
self._g.remove_node(n)
|
|
252
|
+
|
|
253
|
+
if to_remove:
|
|
254
|
+
logger.info("Pruned %d phantom CALLS edges", len(to_remove))
|
|
255
|
+
return len(to_remove)
|
|
256
|
+
|
|
197
257
|
def _resolve_path_target(self, target: str) -> str | None:
|
|
198
258
|
"""Try to match a path-style target to an existing MODULE node."""
|
|
199
259
|
if "/" not in target:
|
|
@@ -86,11 +86,8 @@ def build_module_filter() -> frozenset[str]:
|
|
|
86
86
|
from CALLS and IMPORTS edges.
|
|
87
87
|
"""
|
|
88
88
|
config = get_config()
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
graph_cfg = {}
|
|
92
|
-
extra_excludes = set(graph_cfg.get("exclude_modules") or [])
|
|
93
|
-
force_includes = set(graph_cfg.get("include_modules") or [])
|
|
89
|
+
extra_excludes = set(config.graph.exclude_modules or [])
|
|
90
|
+
force_includes = set(config.graph.include_modules or [])
|
|
94
91
|
return frozenset((_DEFAULT_EXCLUDED_MODULES | extra_excludes) - force_includes)
|
|
95
92
|
|
|
96
93
|
|
|
@@ -391,12 +388,13 @@ class GraphTreeSitterParser(BaseTreeSitterParser):
|
|
|
391
388
|
)
|
|
392
389
|
)
|
|
393
390
|
|
|
394
|
-
#
|
|
395
|
-
#
|
|
396
|
-
#
|
|
397
|
-
# static/classmethod calls at module level can resolve
|
|
391
|
+
# Register in FileContext so calls resolve to the full node_id.
|
|
392
|
+
# Top-level: ``my_func()`` → node_id
|
|
393
|
+
# Class methods: ``ClassName.method()`` → node_id (via qualified name)
|
|
398
394
|
if not scope_stack:
|
|
399
395
|
self._file_ctx.register_definition(name, node_id)
|
|
396
|
+
else:
|
|
397
|
+
self._file_ctx.register_definition(qualified, node_id)
|
|
400
398
|
|
|
401
399
|
# Recurse into the function body under a new scope
|
|
402
400
|
new_scope = [*scope_stack, name]
|
|
@@ -80,18 +80,12 @@ class Indexer:
|
|
|
80
80
|
"""Full rebuild: discover, chunk, embed, and store all source files."""
|
|
81
81
|
|
|
82
82
|
current = self._state.current_state
|
|
83
|
-
last_branch = current.branch if current is not None else None
|
|
84
|
-
branch_switched = self._git.is_branch_switched(last_branch=last_branch)
|
|
85
|
-
if branch_switched:
|
|
86
|
-
return self.update_incremental()
|
|
87
|
-
|
|
88
83
|
current_run = current.current_run if current else None
|
|
89
84
|
saved_paths = current_run.paths_to_process if current_run else []
|
|
90
85
|
processed_count = current_run.processed_count if current_run else 0
|
|
91
86
|
|
|
92
87
|
can_resume = (
|
|
93
|
-
|
|
94
|
-
and self._state.is_in_progress
|
|
88
|
+
self._state.is_in_progress
|
|
95
89
|
and self._state.has_partial_progress
|
|
96
90
|
)
|
|
97
91
|
|
|
@@ -150,17 +144,6 @@ class Indexer:
|
|
|
150
144
|
self._state.set_incomplete()
|
|
151
145
|
|
|
152
146
|
current = self._state.current_state
|
|
153
|
-
state_branch = current.branch if current else None
|
|
154
|
-
active_branch = self._git.get_current_branch()
|
|
155
|
-
|
|
156
|
-
if self._git.is_branch_switched(last_branch=state_branch):
|
|
157
|
-
logger.info(
|
|
158
|
-
"Branch switched %s -> %s; syncing index",
|
|
159
|
-
state_branch,
|
|
160
|
-
active_branch,
|
|
161
|
-
)
|
|
162
|
-
return self._sync_after_branch_switch()
|
|
163
|
-
|
|
164
147
|
to_add, to_remove = self._git.get_files_to_index(
|
|
165
148
|
last_commit=current.last_commit if current else None
|
|
166
149
|
)
|
|
@@ -193,55 +176,6 @@ class Indexer:
|
|
|
193
176
|
file_hashes=file_hashes,
|
|
194
177
|
)
|
|
195
178
|
|
|
196
|
-
def _sync_after_branch_switch(self) -> IndexResult:
|
|
197
|
-
"""Sync index to current branch after a switch. Returns IndexResult."""
|
|
198
|
-
file_hashes = self._state.file_hashes.copy()
|
|
199
|
-
py_files = self._git.discover_files()
|
|
200
|
-
|
|
201
|
-
# All .py files were deleted from git
|
|
202
|
-
to_remove: list[str] = []
|
|
203
|
-
if not py_files:
|
|
204
|
-
to_remove = list(file_hashes)
|
|
205
|
-
|
|
206
|
-
if to_remove:
|
|
207
|
-
self._store.delete_by_paths(to_remove)
|
|
208
|
-
index_result = IndexResult(removed=len(file_hashes))
|
|
209
|
-
file_hashes.clear()
|
|
210
|
-
|
|
211
|
-
self._state.file_hashes = file_hashes
|
|
212
|
-
self._state.finish(
|
|
213
|
-
last_commit=self._git.get_head_commit(),
|
|
214
|
-
branch=self._git.get_current_branch(),
|
|
215
|
-
)
|
|
216
|
-
self._refresh_graph()
|
|
217
|
-
return index_result
|
|
218
|
-
|
|
219
|
-
rel_paths_current = {str(p.relative_to(self._repo_root)) for p in py_files}
|
|
220
|
-
# Deleted files on current branch
|
|
221
|
-
to_remove = [p for p in file_hashes if p not in rel_paths_current]
|
|
222
|
-
if to_remove:
|
|
223
|
-
self._store.delete_by_paths(to_remove)
|
|
224
|
-
for p in to_remove:
|
|
225
|
-
file_hashes.pop(p, None)
|
|
226
|
-
|
|
227
|
-
changed_files = files_with_changed_content(
|
|
228
|
-
repo=self._repo_root, paths=py_files, file_hashes=file_hashes
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
if not changed_files and not to_remove:
|
|
232
|
-
self._state.finish(
|
|
233
|
-
last_commit=self._git.get_head_commit(),
|
|
234
|
-
branch=self._git.get_current_branch(),
|
|
235
|
-
)
|
|
236
|
-
self._refresh_graph()
|
|
237
|
-
logger.info("Branch switch: index already in sync (no changes)")
|
|
238
|
-
return IndexResult(cached=len(self._state.file_hashes))
|
|
239
|
-
|
|
240
|
-
return self._update(
|
|
241
|
-
paths_to_add=changed_files,
|
|
242
|
-
file_hashes=file_hashes,
|
|
243
|
-
)
|
|
244
|
-
|
|
245
179
|
def _run_batch_loop(
|
|
246
180
|
self,
|
|
247
181
|
rel_paths: list[str],
|
|
@@ -423,6 +357,12 @@ class Indexer:
|
|
|
423
357
|
"""Return True if the index exists at index_dir."""
|
|
424
358
|
return index_exists(self._index_dir)
|
|
425
359
|
|
|
360
|
+
def ensure_index(self) -> IndexResult:
|
|
361
|
+
"""Build full index if missing, else incremental update."""
|
|
362
|
+
if not self.index_exists():
|
|
363
|
+
return self.build_full()
|
|
364
|
+
return self.update_incremental()
|
|
365
|
+
|
|
426
366
|
def error(self, exc: str) -> None:
|
|
427
367
|
"""Mark the current run as errored with the given exception message."""
|
|
428
368
|
self._state.set_errored(exc=exc)
|
|
@@ -19,6 +19,7 @@ from watchdog.events import (
|
|
|
19
19
|
from watchdog.observers import Observer
|
|
20
20
|
from watchdog.observers.polling import PollingObserver
|
|
21
21
|
|
|
22
|
+
from coderay.core.timing import timed_phase
|
|
22
23
|
from coderay.parsing.languages import get_supported_extensions
|
|
23
24
|
from coderay.vcs.git import load_gitignore
|
|
24
25
|
|
|
@@ -35,7 +36,6 @@ class _DebouncedHandler:
|
|
|
35
36
|
gitignore_spec: pathspec.PathSpec,
|
|
36
37
|
supported_extensions: set[str],
|
|
37
38
|
debounce_seconds: float,
|
|
38
|
-
branch_switch_threshold: int,
|
|
39
39
|
extra_exclude: list[str],
|
|
40
40
|
on_batch: Callable[[set[str], set[str]], None],
|
|
41
41
|
) -> None:
|
|
@@ -45,7 +45,6 @@ class _DebouncedHandler:
|
|
|
45
45
|
self._gitignore = gitignore_spec
|
|
46
46
|
self._extensions = supported_extensions
|
|
47
47
|
self._debounce = debounce_seconds
|
|
48
|
-
self._threshold = branch_switch_threshold
|
|
49
48
|
self._on_batch = on_batch
|
|
50
49
|
|
|
51
50
|
extra_spec = pathspec.PathSpec.from_lines("gitignore", extra_exclude)
|
|
@@ -160,13 +159,6 @@ class _DebouncedHandler:
|
|
|
160
159
|
if not changed and not removed:
|
|
161
160
|
return
|
|
162
161
|
|
|
163
|
-
total = len(changed) + len(removed)
|
|
164
|
-
if total >= self._threshold:
|
|
165
|
-
logger.info(
|
|
166
|
-
"Branch switch detected (%d files); delegating to full sync",
|
|
167
|
-
total,
|
|
168
|
-
)
|
|
169
|
-
|
|
170
162
|
try:
|
|
171
163
|
self._on_batch(changed, removed)
|
|
172
164
|
except Exception:
|
|
@@ -195,7 +187,6 @@ class FileWatcher:
|
|
|
195
187
|
|
|
196
188
|
watch_cfg = self._config.watcher
|
|
197
189
|
self._debounce = float(watch_cfg.debounce)
|
|
198
|
-
self._threshold = int(watch_cfg.branch_switch_threshold)
|
|
199
190
|
self._extra_exclude = list(watch_cfg.exclude_patterns or [])
|
|
200
191
|
|
|
201
192
|
self._observer: Observer | PollingObserver | None = None
|
|
@@ -219,7 +210,6 @@ class FileWatcher:
|
|
|
219
210
|
gitignore_spec=gitignore_spec,
|
|
220
211
|
supported_extensions=extensions,
|
|
221
212
|
debounce_seconds=self._debounce,
|
|
222
|
-
branch_switch_threshold=self._threshold,
|
|
223
213
|
extra_exclude=self._extra_exclude,
|
|
224
214
|
on_batch=batch_fn,
|
|
225
215
|
)
|
|
@@ -265,30 +255,21 @@ class FileWatcher:
|
|
|
265
255
|
self._observer.join(timeout=timeout)
|
|
266
256
|
|
|
267
257
|
def _default_batch(self, changed: set[str], removed: set[str]) -> None:
|
|
268
|
-
"""Default callback: acquire lock and run Indexer.
|
|
258
|
+
"""Default callback: acquire lock and run Indexer.update_incremental."""
|
|
269
259
|
from coderay.core.lock import acquire_indexer_lock
|
|
270
260
|
from coderay.pipeline.indexer import Indexer
|
|
271
261
|
|
|
272
|
-
total = len(changed) + len(removed)
|
|
273
|
-
t0 = time.time()
|
|
274
|
-
|
|
275
262
|
try:
|
|
276
|
-
with
|
|
277
|
-
|
|
278
|
-
|
|
263
|
+
with timed_phase("update", log=False) as tp:
|
|
264
|
+
with acquire_indexer_lock(self._index_dir, timeout=30):
|
|
265
|
+
indexer = Indexer(self._repo_root)
|
|
279
266
|
result = indexer.update_incremental()
|
|
280
|
-
else:
|
|
281
|
-
result = indexer.update_paths(
|
|
282
|
-
changed=sorted(changed),
|
|
283
|
-
removed=sorted(removed),
|
|
284
|
-
)
|
|
285
|
-
elapsed = time.time() - t0
|
|
286
267
|
self._update_count += 1
|
|
287
268
|
logger.info(
|
|
288
269
|
"Update #%d: %s (%.2fs) [%d changed, %d removed]",
|
|
289
270
|
self._update_count,
|
|
290
271
|
result,
|
|
291
|
-
elapsed,
|
|
272
|
+
tp.elapsed,
|
|
292
273
|
len(changed),
|
|
293
274
|
len(removed),
|
|
294
275
|
)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import time
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Any
|
|
7
6
|
|
|
8
7
|
from coderay.core.config import Config, get_config
|
|
8
|
+
from coderay.core.timing import timed_phase
|
|
9
9
|
from coderay.embedding.base import Embedder, load_embedder_from_config
|
|
10
10
|
from coderay.graph.builder import load_graph
|
|
11
11
|
from coderay.retrieval.boosting import StructuralBooster
|
|
@@ -65,20 +65,18 @@ class Retrieval:
|
|
|
65
65
|
|
|
66
66
|
store = self._get_store()
|
|
67
67
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
logger.info("Query embed took %.3fs", time.perf_counter() - t0)
|
|
68
|
+
with timed_phase("embed"):
|
|
69
|
+
query_vectors = self._embedder.embed([query])
|
|
71
70
|
|
|
72
71
|
if not query_vectors:
|
|
73
72
|
return []
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
logger.info("Vector search took %.3fs", time.perf_counter() - t1)
|
|
73
|
+
with timed_phase("vector_search"):
|
|
74
|
+
results = store.search(
|
|
75
|
+
query_embedding=query_vectors[0],
|
|
76
|
+
top_k=top_k,
|
|
77
|
+
path_prefix=path_prefix,
|
|
78
|
+
query_text=query,
|
|
79
|
+
)
|
|
82
80
|
|
|
83
81
|
return self._booster.boost(results)
|
|
84
82
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderay
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: X-ray your codebase — semantic search, code graphs, file skeletons, and MCP server
|
|
5
5
|
Author-email: Bogdan Copocean <bogdancopocean@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -64,6 +64,15 @@ AI coding assistants and a standalone CLI.
|
|
|
64
64
|
|
|
65
65
|
## Install
|
|
66
66
|
|
|
67
|
+
Create a virtual environment (recommended):
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
python -m venv .venv
|
|
71
|
+
source .venv/bin/activate
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Then install:
|
|
75
|
+
|
|
67
76
|
```bash
|
|
68
77
|
pip install coderay
|
|
69
78
|
```
|
|
@@ -79,6 +88,7 @@ For development:
|
|
|
79
88
|
```bash
|
|
80
89
|
git clone https://github.com/bogdan-copocean/coderay.git
|
|
81
90
|
cd coderay
|
|
91
|
+
python -m venv .venv && source .venv/bin/activate
|
|
82
92
|
pip install -e ".[all]"
|
|
83
93
|
```
|
|
84
94
|
|
|
@@ -86,15 +96,22 @@ pip install -e ".[all]"
|
|
|
86
96
|
|
|
87
97
|
```bash
|
|
88
98
|
cd /path/to/your/project
|
|
89
|
-
coderay
|
|
99
|
+
coderay watch --repo . # keeps index fresh while you work (recommended)
|
|
90
100
|
coderay search "how does authentication work"
|
|
91
|
-
coderay watch --repo .
|
|
92
101
|
coderay graph --kind calls
|
|
93
102
|
coderay skeleton src/app/main.py
|
|
94
103
|
```
|
|
95
104
|
|
|
105
|
+
> **Use `watch`, not `build`.** `coderay build` is a one-off; while you work, the index will get stale. `coderay watch` re-indexes on file changes and is the go-to for active development.
|
|
106
|
+
|
|
96
107
|
## MCP server (Claude Code / Cursor)
|
|
97
108
|
|
|
109
|
+
Find the MCP executable path:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
which coderay-mcp
|
|
113
|
+
```
|
|
114
|
+
|
|
98
115
|
Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
99
116
|
|
|
100
117
|
```json
|
|
@@ -108,13 +125,14 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
|
108
125
|
}
|
|
109
126
|
```
|
|
110
127
|
|
|
128
|
+
Replace `/path/to/your/.venv/bin/coderay-mcp` with the output of `which coderay-mcp`.
|
|
129
|
+
|
|
111
130
|
## CLI reference
|
|
112
131
|
|
|
113
132
|
| Command | Description |
|
|
114
133
|
|---|---|
|
|
115
|
-
| `coderay
|
|
116
|
-
| `coderay
|
|
117
|
-
| `coderay watch --repo . [--debounce N]` | Watch for file changes, re-index automatically |
|
|
134
|
+
| `coderay watch --repo . [--debounce N]` | **Recommended.** Watch for file changes, re-index automatically |
|
|
135
|
+
| `coderay build [--full] --repo .` | Build or incremental update. Use `--full` for full rebuild |
|
|
118
136
|
| `coderay search "query" [--top-k N]` | Semantic search |
|
|
119
137
|
| `coderay list [--by-file]` | List indexed chunks |
|
|
120
138
|
| `coderay status` | Index state, branch, commit, chunk count |
|
|
@@ -124,39 +142,37 @@ Add to `~/.claude/claude_code_config.json` or Cursor MCP settings:
|
|
|
124
142
|
|
|
125
143
|
## Configuration
|
|
126
144
|
|
|
127
|
-
|
|
145
|
+
File discovery and ignoring are based on `.git` and `.gitignore`. The `.git` directory is excluded; files matching `.gitignore` are not indexed. Config `exclude_patterns` add extra exclusions on top of that.
|
|
146
|
+
|
|
147
|
+
Optional `config.yaml` in the index directory (default: `.index/config.yaml`):
|
|
128
148
|
|
|
129
149
|
```yaml
|
|
130
150
|
embedder:
|
|
131
|
-
|
|
132
|
-
model: all-MiniLM-L6-v2
|
|
151
|
+
model: sentence-transformers/all-MiniLM-L6-v2
|
|
133
152
|
dimensions: 384
|
|
134
153
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
"tests/": 0.5
|
|
138
|
-
"src/core/": 1.2
|
|
139
|
-
|
|
140
|
-
graph:
|
|
141
|
-
exclude_callees:
|
|
142
|
-
- "our_sdk_helper"
|
|
143
|
-
include_callees:
|
|
144
|
-
- "isinstance"
|
|
145
|
-
|
|
146
|
-
watch:
|
|
147
|
-
debounce_seconds: 2
|
|
148
|
-
branch_switch_threshold: 50
|
|
149
|
-
exclude_patterns:
|
|
154
|
+
index:
|
|
155
|
+
exclude_patterns: # besides .gitignore
|
|
150
156
|
- "*.log"
|
|
151
|
-
```
|
|
152
157
|
|
|
153
|
-
|
|
158
|
+
semantic_search:
|
|
159
|
+
boosting:
|
|
160
|
+
penalties:
|
|
161
|
+
- pattern: "(^|/)tests?/"
|
|
162
|
+
factor: 0.5
|
|
163
|
+
- pattern: "(^|/)test_[^/]+\\.py$"
|
|
164
|
+
factor: 0.5
|
|
165
|
+
bonuses:
|
|
166
|
+
- pattern: "(^|/)src/"
|
|
167
|
+
factor: 1.1
|
|
168
|
+
metric: cosine
|
|
169
|
+
|
|
170
|
+
watcher:
|
|
171
|
+
debounce: 2
|
|
172
|
+
exclude_patterns: # besides .gitignore
|
|
173
|
+
- "*.log"
|
|
154
174
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
make lint
|
|
159
|
-
make format
|
|
175
|
+
graph:
|
|
176
|
+
exclude_modules: [] # module names to exclude from CALLS/IMPORTS edges
|
|
177
|
+
include_modules: [] # force-include (override excludes)
|
|
160
178
|
```
|
|
161
|
-
|
|
162
|
-
Requires Python >= 3.10 and Git.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|