coderay 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderay/__init__.py +1 -0
- coderay/chunking/__init__.py +0 -0
- coderay/chunking/chunker.py +127 -0
- coderay/chunking/registry.py +190 -0
- coderay/cli/__init__.py +3 -0
- coderay/cli/commands.py +475 -0
- coderay/core/__init__.py +0 -0
- coderay/core/config.py +73 -0
- coderay/core/lock.py +36 -0
- coderay/core/models.py +71 -0
- coderay/core/timing.py +45 -0
- coderay/core/utils.py +35 -0
- coderay/embedding/__init__.py +0 -0
- coderay/embedding/base.py +60 -0
- coderay/embedding/local.py +68 -0
- coderay/embedding/openai.py +87 -0
- coderay/graph/__init__.py +19 -0
- coderay/graph/builder.py +128 -0
- coderay/graph/code_graph.py +311 -0
- coderay/graph/extractor.py +315 -0
- coderay/mcp_server/__init__.py +0 -0
- coderay/mcp_server/server.py +178 -0
- coderay/pipeline/__init__.py +0 -0
- coderay/pipeline/indexer.py +417 -0
- coderay/pipeline/watcher.py +318 -0
- coderay/retrieval/__init__.py +3 -0
- coderay/retrieval/boosting.py +80 -0
- coderay/retrieval/search.py +121 -0
- coderay/skeleton/__init__.py +0 -0
- coderay/skeleton/extractor.py +140 -0
- coderay/state/__init__.py +8 -0
- coderay/state/machine.py +242 -0
- coderay/state/version.py +47 -0
- coderay/storage/__init__.py +0 -0
- coderay/storage/lancedb.py +268 -0
- coderay/vcs/__init__.py +0 -0
- coderay/vcs/git.py +193 -0
- coderay-1.0.0.dist-info/METADATA +145 -0
- coderay-1.0.0.dist-info/RECORD +42 -0
- coderay-1.0.0.dist-info/WHEEL +5 -0
- coderay-1.0.0.dist-info/entry_points.txt +3 -0
- coderay-1.0.0.dist-info/top_level.txt +1 -0
coderay/cli/commands.py
ADDED
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
11
|
+
from coderay.core.lock import acquire_indexer_lock
|
|
12
|
+
from coderay.pipeline.indexer import Indexer
|
|
13
|
+
from coderay.retrieval.search import Retrieval
|
|
14
|
+
from coderay.state.machine import StateMachine
|
|
15
|
+
from coderay.storage.lancedb import index_exists
|
|
16
|
+
|
|
17
|
+
# Load .env so OPENAI_API_KEY etc. are available (e.g. for embedder).
|
|
18
|
+
load_dotenv()
|
|
19
|
+
|
|
20
|
+
# ANSI colors (safe when not TTY we can strip or leave)
|
|
21
|
+
CYAN = "\033[36m"
|
|
22
|
+
GREEN = "\033[32m"
|
|
23
|
+
YELLOW = "\033[33m"
|
|
24
|
+
RED = "\033[31m"
|
|
25
|
+
BOLD = "\033[1m"
|
|
26
|
+
RESET = "\033[0m"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _color(text: str, color: str) -> str:
|
|
30
|
+
if not sys.stdout.isatty():
|
|
31
|
+
return text
|
|
32
|
+
return f"{color}{text}{RESET}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _setup_logging(verbose: bool = False) -> None:
|
|
36
|
+
level = logging.DEBUG if verbose else logging.INFO
|
|
37
|
+
logging.basicConfig(
|
|
38
|
+
format="%(asctime)s %(levelname)s %(name)s %(message)s",
|
|
39
|
+
level=level,
|
|
40
|
+
datefmt="%H:%M:%S",
|
|
41
|
+
)
|
|
42
|
+
# Suppress noisy OpenAI/HTTP logging; keep only warnings and errors
|
|
43
|
+
for name in ("openai", "httpx", "httpcore"):
|
|
44
|
+
logging.getLogger(name).setLevel(logging.WARNING)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@click.group()
|
|
48
|
+
@click.option("--index-dir", default=".index", help="Index directory (default .index)")
|
|
49
|
+
@click.option("-v", "--verbose", is_flag=True, default=False, help="Verbose logging")
|
|
50
|
+
@click.pass_context
|
|
51
|
+
def cli(ctx: click.Context, index_dir: str, verbose: bool) -> None:
|
|
52
|
+
"""CodeRay — build, update, search, and inspect the index."""
|
|
53
|
+
_setup_logging(verbose)
|
|
54
|
+
ctx.ensure_object(dict)
|
|
55
|
+
ctx.obj["index_dir"] = Path(index_dir)
|
|
56
|
+
ctx.obj["verbose"] = verbose
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@cli.command()
|
|
60
|
+
@click.option("--full", is_flag=True, help="Full rebuild (clear and re-index)")
|
|
61
|
+
@click.option(
|
|
62
|
+
"--repo",
|
|
63
|
+
default=".",
|
|
64
|
+
type=click.Path(exists=True, path_type=Path),
|
|
65
|
+
help="Repo root",
|
|
66
|
+
)
|
|
67
|
+
@click.pass_context
|
|
68
|
+
def build(ctx: click.Context, full: bool, repo: Path) -> None:
|
|
69
|
+
"""Build or rebuild the index."""
|
|
70
|
+
index_dir = ctx.obj["index_dir"]
|
|
71
|
+
index_dir.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
indexer = Indexer(repo, index_dir)
|
|
73
|
+
t0 = time.time()
|
|
74
|
+
try:
|
|
75
|
+
with acquire_indexer_lock(index_dir):
|
|
76
|
+
if full or not indexer.index_exists():
|
|
77
|
+
click.echo(_color("Building full index...", CYAN))
|
|
78
|
+
result = indexer.build_full()
|
|
79
|
+
click.echo(
|
|
80
|
+
_color(
|
|
81
|
+
f"{result} in {time.time() - t0:.2f}s",
|
|
82
|
+
GREEN,
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
click.echo(_color("Updating index (incremental)...", CYAN))
|
|
87
|
+
result = indexer.update_incremental()
|
|
88
|
+
click.echo(
|
|
89
|
+
_color(
|
|
90
|
+
f"{result} in {time.time() - t0:.2f}s",
|
|
91
|
+
GREEN,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
indexer.maintain()
|
|
95
|
+
except Exception as e:
|
|
96
|
+
indexer.error(str(e))
|
|
97
|
+
click.echo(_color(f"Error: {e}", RED))
|
|
98
|
+
raise
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@cli.command()
|
|
102
|
+
@click.option(
|
|
103
|
+
"--repo",
|
|
104
|
+
default=".",
|
|
105
|
+
type=click.Path(exists=True, path_type=Path),
|
|
106
|
+
help="Repo root",
|
|
107
|
+
)
|
|
108
|
+
@click.pass_context
|
|
109
|
+
def update(ctx: click.Context, repo: Path) -> None:
|
|
110
|
+
"""Incremental update (only changed files). Uses file lock."""
|
|
111
|
+
index_dir = ctx.obj["index_dir"]
|
|
112
|
+
indexer = Indexer(repo, index_dir)
|
|
113
|
+
t0 = time.time()
|
|
114
|
+
|
|
115
|
+
if not indexer.index_exists():
|
|
116
|
+
click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
|
|
117
|
+
ctx.exit(1)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
with acquire_indexer_lock(index_dir):
|
|
121
|
+
click.echo(_color("Updating index...", CYAN))
|
|
122
|
+
result = indexer.update_incremental()
|
|
123
|
+
click.echo(_color(f"{result} in {time.time() - t0:.2f}s", GREEN))
|
|
124
|
+
indexer.maintain()
|
|
125
|
+
except Exception as e:
|
|
126
|
+
indexer.error(str(e))
|
|
127
|
+
click.echo(_color(f"Error: {e}", RED))
|
|
128
|
+
raise
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@cli.command()
|
|
132
|
+
@click.argument("query_text", required=True)
|
|
133
|
+
@click.option("--top-k", "top_k", default=10, help="Number of results")
|
|
134
|
+
@click.option("--path-prefix", help="Filter by path prefix")
|
|
135
|
+
@click.option("--language", help="Filter by language (e.g. python)")
|
|
136
|
+
@click.pass_context
|
|
137
|
+
def search_cmd(
|
|
138
|
+
ctx: click.Context,
|
|
139
|
+
query_text: str,
|
|
140
|
+
top_k: int,
|
|
141
|
+
path_prefix: str | None,
|
|
142
|
+
language: str | None,
|
|
143
|
+
) -> None:
|
|
144
|
+
"""Semantic search the index."""
|
|
145
|
+
index_dir = ctx.obj["index_dir"]
|
|
146
|
+
if not index_exists(index_dir):
|
|
147
|
+
click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
|
|
148
|
+
ctx.exit(1)
|
|
149
|
+
|
|
150
|
+
sm = StateMachine(index_dir)
|
|
151
|
+
current_state = sm.current_state
|
|
152
|
+
if current_state is None:
|
|
153
|
+
click.echo(_color("No index state. Run 'coderay build' first.", YELLOW))
|
|
154
|
+
ctx.exit(1)
|
|
155
|
+
|
|
156
|
+
retrieval = Retrieval(index_dir)
|
|
157
|
+
click.echo(_color(f"Searching: {query_text!r}", CYAN))
|
|
158
|
+
t0 = time.perf_counter()
|
|
159
|
+
|
|
160
|
+
results = retrieval.search(
|
|
161
|
+
query=query_text,
|
|
162
|
+
current_state=current_state,
|
|
163
|
+
top_k=top_k,
|
|
164
|
+
path_prefix=path_prefix,
|
|
165
|
+
language=language,
|
|
166
|
+
)
|
|
167
|
+
elapsed = time.perf_counter() - t0
|
|
168
|
+
click.echo(_color(f"Query took {elapsed:.2f}s", BOLD))
|
|
169
|
+
|
|
170
|
+
if not results:
|
|
171
|
+
click.echo(_color("No results.", YELLOW))
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
score_type = results[0].get("score_type", "cosine")
|
|
175
|
+
if score_type == "rrf":
|
|
176
|
+
click.echo(
|
|
177
|
+
_color("Scoring: hybrid (RRF) — relative ranking, not a percentage", CYAN)
|
|
178
|
+
)
|
|
179
|
+
else:
|
|
180
|
+
click.echo(_color("Scoring: cosine similarity (0-1)", CYAN))
|
|
181
|
+
|
|
182
|
+
for i, r in enumerate(results, 1):
|
|
183
|
+
path = r.get("path", "?")
|
|
184
|
+
start = r.get("start_line", 0)
|
|
185
|
+
end = r.get("end_line", 0)
|
|
186
|
+
symbol = r.get("symbol", "?")
|
|
187
|
+
score = r.get("score", 0)
|
|
188
|
+
if score_type == "cosine":
|
|
189
|
+
score_str = f"score={score:.4f} ({score:.0%})"
|
|
190
|
+
else:
|
|
191
|
+
score_str = f"score={score:.4f} (rrf)"
|
|
192
|
+
preview = (r.get("content") or "")[:200].replace("\n", " ")
|
|
193
|
+
if len(r.get("content") or "") > 200:
|
|
194
|
+
preview += "..."
|
|
195
|
+
click.echo("")
|
|
196
|
+
click.echo(
|
|
197
|
+
_color(f" {i}. {path}:{start}-{end} ({symbol}) {score_str}", GREEN)
|
|
198
|
+
)
|
|
199
|
+
click.echo(f" {preview}")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@cli.command("list")
|
|
203
|
+
@click.option(
|
|
204
|
+
"--by-file",
|
|
205
|
+
is_flag=True,
|
|
206
|
+
help="Show only file path and chunk count (summary view).",
|
|
207
|
+
)
|
|
208
|
+
@click.option(
|
|
209
|
+
"--limit",
|
|
210
|
+
"chunk_limit",
|
|
211
|
+
default=50,
|
|
212
|
+
help="Max chunks to show when listing (default 50). Ignored if --by-file.",
|
|
213
|
+
)
|
|
214
|
+
@click.option("--path-prefix", help="Filter by path prefix (e.g. src/).")
|
|
215
|
+
@click.option(
|
|
216
|
+
"--show-content",
|
|
217
|
+
is_flag=True,
|
|
218
|
+
help="Include a short content preview per chunk.",
|
|
219
|
+
)
|
|
220
|
+
@click.pass_context
|
|
221
|
+
def list_cmd(
|
|
222
|
+
ctx: click.Context,
|
|
223
|
+
by_file: bool,
|
|
224
|
+
chunk_limit: int,
|
|
225
|
+
path_prefix: str | None,
|
|
226
|
+
show_content: bool,
|
|
227
|
+
) -> None:
|
|
228
|
+
"""Show what is in the index: chunk counts and/or chunk list."""
|
|
229
|
+
index_dir = ctx.obj["index_dir"]
|
|
230
|
+
retrieval = Retrieval(index_dir)
|
|
231
|
+
if not index_exists(index_dir):
|
|
232
|
+
click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
|
|
233
|
+
ctx.exit(1)
|
|
234
|
+
|
|
235
|
+
total = retrieval.chunk_count()
|
|
236
|
+
click.echo(_color(f"Total chunks: {total}", CYAN))
|
|
237
|
+
|
|
238
|
+
if by_file:
|
|
239
|
+
by_path = retrieval.chunks_by_path()
|
|
240
|
+
for path in sorted(by_path.keys()):
|
|
241
|
+
count = by_path[path]
|
|
242
|
+
click.echo(f" {path}: {count}")
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
chunks = retrieval.list_chunks(limit=chunk_limit, path_prefix=path_prefix)
|
|
246
|
+
for i, row in enumerate(chunks, 1):
|
|
247
|
+
path = row.get("path", "?")
|
|
248
|
+
start = row.get("start_line", 0)
|
|
249
|
+
end = row.get("end_line", 0)
|
|
250
|
+
symbol = row.get("symbol", "?")
|
|
251
|
+
line = f" {i}. {path}:{start}-{end} ({symbol})"
|
|
252
|
+
click.echo(_color(line, GREEN))
|
|
253
|
+
if show_content:
|
|
254
|
+
content = (row.get("content") or "")[:120].replace("\n", " ")
|
|
255
|
+
if len(row.get("content") or "") > 120:
|
|
256
|
+
content += "..."
|
|
257
|
+
click.echo(f" {content}")
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@cli.command()
|
|
261
|
+
@click.pass_context
|
|
262
|
+
def status(ctx: click.Context) -> None:
|
|
263
|
+
"""Show index status: state, branch, commit, chunk count."""
|
|
264
|
+
index_dir = ctx.obj["index_dir"]
|
|
265
|
+
if not index_exists(index_dir):
|
|
266
|
+
click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
|
|
267
|
+
ctx.exit(1)
|
|
268
|
+
|
|
269
|
+
sm = StateMachine(index_dir)
|
|
270
|
+
state = sm.current_state
|
|
271
|
+
if state is None:
|
|
272
|
+
click.echo(_color("No index state found.", YELLOW))
|
|
273
|
+
ctx.exit(1)
|
|
274
|
+
|
|
275
|
+
from coderay.core.config import get_embedding_dimensions, load_config
|
|
276
|
+
from coderay.state.version import read_index_version
|
|
277
|
+
from coderay.storage.lancedb import Store
|
|
278
|
+
|
|
279
|
+
config = load_config(index_dir)
|
|
280
|
+
store = Store(index_dir, dimensions=get_embedding_dimensions(config))
|
|
281
|
+
chunks = store.chunk_count()
|
|
282
|
+
version = read_index_version(index_dir)
|
|
283
|
+
|
|
284
|
+
click.echo(_color("Index Status", BOLD))
|
|
285
|
+
click.echo(f" State: {state.state.value}")
|
|
286
|
+
click.echo(f" Branch: {state.branch or '?'}")
|
|
287
|
+
click.echo(
|
|
288
|
+
f" Last commit: {state.last_commit[:12] if state.last_commit else '?'}"
|
|
289
|
+
)
|
|
290
|
+
click.echo(f" Chunks: {chunks}")
|
|
291
|
+
click.echo(f" Files tracked: {len(sm.file_hashes)}")
|
|
292
|
+
click.echo(f" Schema version: {version or '?'}")
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@cli.command()
|
|
296
|
+
@click.option(
|
|
297
|
+
"--repo",
|
|
298
|
+
default=".",
|
|
299
|
+
type=click.Path(exists=True, path_type=Path),
|
|
300
|
+
help="Repo root",
|
|
301
|
+
)
|
|
302
|
+
@click.pass_context
|
|
303
|
+
def maintain(ctx: click.Context, repo: Path) -> None:
|
|
304
|
+
"""Reclaim space and compact the index."""
|
|
305
|
+
index_dir = ctx.obj["index_dir"]
|
|
306
|
+
if not index_exists(index_dir):
|
|
307
|
+
click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
|
|
308
|
+
ctx.exit(1)
|
|
309
|
+
click.echo(_color("Maintaining index...", CYAN))
|
|
310
|
+
indexer = Indexer(repo, index_dir)
|
|
311
|
+
with acquire_indexer_lock(index_dir):
|
|
312
|
+
result = indexer.maintain()
|
|
313
|
+
if result.get("cleanup_done"):
|
|
314
|
+
click.echo(_color("Cleaned up old versions", GREEN))
|
|
315
|
+
if result.get("compact_done"):
|
|
316
|
+
click.echo(_color("Compacted fragments", GREEN))
|
|
317
|
+
if not result:
|
|
318
|
+
click.echo(_color("Nothing to maintain", CYAN))
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@cli.command()
|
|
322
|
+
@click.argument("file_path", type=click.Path(exists=True, path_type=Path))
|
|
323
|
+
def skeleton(file_path: Path) -> None:
|
|
324
|
+
"""Print the API skeleton (signatures, no bodies) for a source file."""
|
|
325
|
+
from coderay.skeleton.extractor import extract_skeleton
|
|
326
|
+
|
|
327
|
+
content = file_path.read_text(encoding="utf-8", errors="replace")
|
|
328
|
+
out = extract_skeleton(file_path, content)
|
|
329
|
+
click.echo(out)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
@cli.command("graph")
|
|
333
|
+
@click.option(
|
|
334
|
+
"--kind",
|
|
335
|
+
type=click.Choice(["calls", "imports"]),
|
|
336
|
+
default=None,
|
|
337
|
+
help="Show only this edge kind (default: all).",
|
|
338
|
+
)
|
|
339
|
+
@click.option(
|
|
340
|
+
"--from",
|
|
341
|
+
"from_node",
|
|
342
|
+
default=None,
|
|
343
|
+
help="Filter: source node contains this string.",
|
|
344
|
+
)
|
|
345
|
+
@click.option(
|
|
346
|
+
"--to", "to_node", default=None, help="Filter: target node contains this string."
|
|
347
|
+
)
|
|
348
|
+
@click.option(
|
|
349
|
+
"--limit", type=int, default=200, help="Max edges to print (default 200)."
|
|
350
|
+
)
|
|
351
|
+
@click.pass_context
|
|
352
|
+
def graph_cmd(
|
|
353
|
+
ctx: click.Context,
|
|
354
|
+
kind: str | None,
|
|
355
|
+
from_node: str | None,
|
|
356
|
+
to_node: str | None,
|
|
357
|
+
limit: int,
|
|
358
|
+
) -> None:
|
|
359
|
+
"""List call and import graph edges (who calls who, who imports what)."""
|
|
360
|
+
index_dir = ctx.obj["index_dir"]
|
|
361
|
+
retrieval = Retrieval(index_dir)
|
|
362
|
+
if not index_exists(index_dir):
|
|
363
|
+
click.echo(_color("No index found. Run 'coderay build' first.", YELLOW))
|
|
364
|
+
ctx.exit(1)
|
|
365
|
+
edges = retrieval.load_graph()
|
|
366
|
+
if not edges:
|
|
367
|
+
click.echo(
|
|
368
|
+
_color(
|
|
369
|
+
"No graph data. Run 'coderay build' or 'coderay update' to build it.",
|
|
370
|
+
YELLOW,
|
|
371
|
+
)
|
|
372
|
+
)
|
|
373
|
+
ctx.exit(0)
|
|
374
|
+
if kind:
|
|
375
|
+
edges = [e for e in edges if e.get("kind") == kind]
|
|
376
|
+
if from_node:
|
|
377
|
+
edges = [e for e in edges if from_node in str(e.get("source", ""))]
|
|
378
|
+
if to_node:
|
|
379
|
+
edges = [e for e in edges if to_node in str(e.get("target", ""))]
|
|
380
|
+
shown = edges[:limit]
|
|
381
|
+
for e in shown:
|
|
382
|
+
click.echo(
|
|
383
|
+
f" {e.get('source', '')} --{e.get('kind', '')}--> {e.get('target', '')}"
|
|
384
|
+
)
|
|
385
|
+
if len(edges) > limit:
|
|
386
|
+
click.echo(_color(f" ... and {len(edges) - limit} more (use --limit)", CYAN))
|
|
387
|
+
click.echo(_color(f"Total: {len(edges)} edges", CYAN))
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@cli.command()
|
|
391
|
+
@click.option(
|
|
392
|
+
"--repo",
|
|
393
|
+
default=".",
|
|
394
|
+
type=click.Path(exists=True, path_type=Path),
|
|
395
|
+
help="Repo root",
|
|
396
|
+
)
|
|
397
|
+
@click.option(
|
|
398
|
+
"--debounce",
|
|
399
|
+
type=float,
|
|
400
|
+
default=None,
|
|
401
|
+
help="Debounce seconds (default from config, typically 2s)",
|
|
402
|
+
)
|
|
403
|
+
@click.option(
|
|
404
|
+
"--quiet",
|
|
405
|
+
is_flag=True,
|
|
406
|
+
help="Suppress per-file output; show only update summaries.",
|
|
407
|
+
)
|
|
408
|
+
@click.pass_context
|
|
409
|
+
def watch(
|
|
410
|
+
ctx: click.Context,
|
|
411
|
+
repo: Path,
|
|
412
|
+
debounce: float | None,
|
|
413
|
+
quiet: bool,
|
|
414
|
+
) -> None:
|
|
415
|
+
"""Watch for file changes and re-index automatically."""
|
|
416
|
+
from coderay.core.config import load_config
|
|
417
|
+
from coderay.pipeline.watcher import FileWatcher
|
|
418
|
+
|
|
419
|
+
index_dir = ctx.obj["index_dir"]
|
|
420
|
+
if not index_exists(index_dir):
|
|
421
|
+
click.echo(
|
|
422
|
+
_color(
|
|
423
|
+
"No index found. Run 'coderay build' first.",
|
|
424
|
+
YELLOW,
|
|
425
|
+
)
|
|
426
|
+
)
|
|
427
|
+
ctx.exit(1)
|
|
428
|
+
|
|
429
|
+
config = load_config(index_dir)
|
|
430
|
+
if debounce is not None:
|
|
431
|
+
config.setdefault("watch", {})["debounce_seconds"] = debounce
|
|
432
|
+
|
|
433
|
+
if quiet:
|
|
434
|
+
logging.getLogger("coderay.pipeline.watcher").setLevel(logging.WARNING)
|
|
435
|
+
|
|
436
|
+
watcher = FileWatcher(repo, index_dir, config=config)
|
|
437
|
+
|
|
438
|
+
click.echo(
|
|
439
|
+
_color(
|
|
440
|
+
f"Watching {repo.resolve()} "
|
|
441
|
+
f"(debounce={config.get('watch', {}).get('debounce_seconds', 2)}s, "
|
|
442
|
+
f"Ctrl+C to stop)",
|
|
443
|
+
CYAN,
|
|
444
|
+
)
|
|
445
|
+
)
|
|
446
|
+
# Do an incremental update at start-up
|
|
447
|
+
index_dir = ctx.obj["index_dir"]
|
|
448
|
+
index_dir.mkdir(parents=True, exist_ok=True)
|
|
449
|
+
indexer = Indexer(repo, index_dir)
|
|
450
|
+
indexer.update_incremental()
|
|
451
|
+
|
|
452
|
+
watcher.start()
|
|
453
|
+
try:
|
|
454
|
+
while True:
|
|
455
|
+
time.sleep(1)
|
|
456
|
+
except KeyboardInterrupt:
|
|
457
|
+
click.echo("")
|
|
458
|
+
click.echo(_color("Stopping watcher...", CYAN))
|
|
459
|
+
finally:
|
|
460
|
+
watcher.stop()
|
|
461
|
+
click.echo(
|
|
462
|
+
_color(
|
|
463
|
+
f"Done. {watcher.update_count} update(s) performed.",
|
|
464
|
+
GREEN,
|
|
465
|
+
)
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def main() -> None:
|
|
470
|
+
"""Entry point for the ``coderay`` command."""
|
|
471
|
+
cli(obj={})
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
if __name__ == "__main__":
|
|
475
|
+
main()
|
coderay/core/__init__.py
ADDED
|
File without changes
|
coderay/core/config.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
DEFAULT_EMBEDDING_DIMENSIONS = 384
|
|
12
|
+
|
|
13
|
+
DEFAULT_CONFIG: dict[str, Any] = {
|
|
14
|
+
"embedder": {
|
|
15
|
+
"provider": "local",
|
|
16
|
+
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
17
|
+
"dimensions": DEFAULT_EMBEDDING_DIMENSIONS,
|
|
18
|
+
},
|
|
19
|
+
"index": {
|
|
20
|
+
"path": ".index",
|
|
21
|
+
"default_top_k": 10,
|
|
22
|
+
"exclude_patterns": [], # besides .gitignore
|
|
23
|
+
},
|
|
24
|
+
"search": {
|
|
25
|
+
"boost_rules": {},
|
|
26
|
+
},
|
|
27
|
+
"graph": {
|
|
28
|
+
"exclude_callees": [],
|
|
29
|
+
"include_callees": [],
|
|
30
|
+
},
|
|
31
|
+
"watch": {
|
|
32
|
+
"debounce_seconds": 2,
|
|
33
|
+
"branch_switch_threshold": 50,
|
|
34
|
+
"exclude_patterns": [],
|
|
35
|
+
},
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_embedding_dimensions(config: dict[str, Any]) -> int:
|
|
40
|
+
"""Return embedding dimension from config. Uses default if missing."""
|
|
41
|
+
return int(
|
|
42
|
+
(config.get("embedder") or {}).get("dimensions") or DEFAULT_EMBEDDING_DIMENSIONS
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def find_config(index_dir: Path) -> Path | None:
|
|
47
|
+
"""Return path to config.yaml if it exists under index_dir."""
|
|
48
|
+
cfg = index_dir / "config.yaml"
|
|
49
|
+
return cfg if cfg.is_file() else None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def load_config(index_dir: str | Path | None = None) -> dict[str, Any]:
|
|
53
|
+
"""Load config by merging defaults with optional config.yaml."""
|
|
54
|
+
base = Path(index_dir or Path.cwd() / ".index")
|
|
55
|
+
config = dict(DEFAULT_CONFIG)
|
|
56
|
+
cfg_path = find_config(base)
|
|
57
|
+
if cfg_path:
|
|
58
|
+
try:
|
|
59
|
+
with open(cfg_path) as f:
|
|
60
|
+
overrides = yaml.safe_load(f) or {}
|
|
61
|
+
_deep_merge(config, overrides)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.warning("Failed to load config from %s: %s", cfg_path, e)
|
|
64
|
+
return config
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _deep_merge(base: dict, overrides: dict) -> None:
|
|
68
|
+
"""Merge overrides into base in-place (only one level deep for our use)."""
|
|
69
|
+
for k, v in overrides.items():
|
|
70
|
+
if k in base and isinstance(base[k], dict) and isinstance(v, dict):
|
|
71
|
+
base[k] = {**base[k], **v}
|
|
72
|
+
else:
|
|
73
|
+
base[k] = v
|
coderay/core/lock.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from collections.abc import Generator
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from filelock import FileLock
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
LOCK_FILENAME = ".indexer.lock"
|
|
13
|
+
DEFAULT_TIMEOUT = 300 # seconds to wait for lock before giving up
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def lock_path(index_dir: str | Path) -> Path:
|
|
17
|
+
"""Path to the lock file inside the index directory."""
|
|
18
|
+
return Path(index_dir) / LOCK_FILENAME
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@contextmanager
|
|
22
|
+
def acquire_indexer_lock(
|
|
23
|
+
index_dir: str | Path,
|
|
24
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
25
|
+
) -> Generator[FileLock, None, None]:
|
|
26
|
+
"""Acquire exclusive file lock for index writes."""
|
|
27
|
+
path = lock_path(index_dir)
|
|
28
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
lock = FileLock(str(path), timeout=timeout)
|
|
30
|
+
try:
|
|
31
|
+
lock.acquire()
|
|
32
|
+
logger.debug("Acquired indexer lock at %s", path)
|
|
33
|
+
yield lock
|
|
34
|
+
finally:
|
|
35
|
+
lock.release()
|
|
36
|
+
logger.debug("Released indexer lock at %s", path)
|
coderay/core/models.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Chunk:
|
|
9
|
+
"""A single code chunk ready for embedding."""
|
|
10
|
+
|
|
11
|
+
path: str
|
|
12
|
+
start_line: int
|
|
13
|
+
end_line: int
|
|
14
|
+
symbol: str
|
|
15
|
+
language: str
|
|
16
|
+
content: str
|
|
17
|
+
|
|
18
|
+
def line_range(self) -> tuple[int, int]:
|
|
19
|
+
"""Return (start_line, end_line) for this chunk."""
|
|
20
|
+
return (self.start_line, self.end_line)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class NodeKind(str, Enum):
|
|
24
|
+
"""Kind of node in the code graph: module, class, or function."""
|
|
25
|
+
|
|
26
|
+
MODULE = "module"
|
|
27
|
+
CLASS = "class"
|
|
28
|
+
FUNCTION = "function"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class EdgeKind(str, Enum):
|
|
32
|
+
"""Kind of directed edge: imports, defines, calls, or inherits."""
|
|
33
|
+
|
|
34
|
+
IMPORTS = "imports"
|
|
35
|
+
DEFINES = "defines"
|
|
36
|
+
CALLS = "calls"
|
|
37
|
+
INHERITS = "inherits"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class GraphNode:
|
|
42
|
+
"""A node in the code graph (module, class, or function)."""
|
|
43
|
+
|
|
44
|
+
id: str
|
|
45
|
+
kind: NodeKind
|
|
46
|
+
file_path: str
|
|
47
|
+
start_line: int
|
|
48
|
+
end_line: int
|
|
49
|
+
name: str
|
|
50
|
+
qualified_name: str
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict:
|
|
53
|
+
"""Serialize to a JSON-compatible dict."""
|
|
54
|
+
return {
|
|
55
|
+
"id": self.id,
|
|
56
|
+
"kind": self.kind.value,
|
|
57
|
+
"file_path": self.file_path,
|
|
58
|
+
"start_line": self.start_line,
|
|
59
|
+
"end_line": self.end_line,
|
|
60
|
+
"name": self.name,
|
|
61
|
+
"qualified_name": self.qualified_name,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class GraphEdge:
|
|
67
|
+
"""A directed edge in the code graph."""
|
|
68
|
+
|
|
69
|
+
source: str
|
|
70
|
+
target: str
|
|
71
|
+
kind: EdgeKind
|
coderay/core/timing.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from typing import TypeVar
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
F = TypeVar("F", bound=Callable[..., object])
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def timed(phase: str) -> Callable[[F], F]:
|
|
15
|
+
"""Decorator that logs execution time at DEBUG level."""
|
|
16
|
+
|
|
17
|
+
def decorator(fn: F) -> F:
|
|
18
|
+
@functools.wraps(fn)
|
|
19
|
+
def wrapper(*args: object, **kwargs: object) -> object:
|
|
20
|
+
t0 = time.perf_counter()
|
|
21
|
+
try:
|
|
22
|
+
return fn(*args, **kwargs)
|
|
23
|
+
finally:
|
|
24
|
+
elapsed = time.perf_counter() - t0
|
|
25
|
+
logger.debug("%s: %.3fs", phase, elapsed)
|
|
26
|
+
|
|
27
|
+
return wrapper # type: ignore[return-value]
|
|
28
|
+
|
|
29
|
+
return decorator
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class timed_phase:
|
|
33
|
+
"""Context manager that logs block execution time at INFO level."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, phase: str) -> None:
|
|
36
|
+
self.phase = phase
|
|
37
|
+
self.t0: float = 0.0
|
|
38
|
+
|
|
39
|
+
def __enter__(self) -> timed_phase:
|
|
40
|
+
self.t0 = time.perf_counter()
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
def __exit__(self, *args: object) -> None:
|
|
44
|
+
elapsed = time.perf_counter() - self.t0
|
|
45
|
+
logger.info("%s: %.3fs", self.phase, elapsed)
|