codemap-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap/__init__.py +7 -0
- codemap/cli/__init__.py +3 -0
- codemap/cli/_common.py +90 -0
- codemap/cli/commands/__init__.py +3 -0
- codemap/cli/commands/callees.py +102 -0
- codemap/cli/commands/callers.py +107 -0
- codemap/cli/commands/config.py +78 -0
- codemap/cli/commands/diagnostics.py +142 -0
- codemap/cli/commands/doctor.py +158 -0
- codemap/cli/commands/get.py +93 -0
- codemap/cli/commands/index.py +725 -0
- codemap/cli/commands/routes.py +104 -0
- codemap/cli/commands/search.py +78 -0
- codemap/cli/commands/trace.py +179 -0
- codemap/cli/main.py +140 -0
- codemap/cli/renderers/__init__.py +3 -0
- codemap/cli/renderers/json.py +32 -0
- codemap/cli/renderers/text.py +24 -0
- codemap/config/__init__.py +31 -0
- codemap/config/loader.py +96 -0
- codemap/config/schema.py +122 -0
- codemap/core/__init__.py +7 -0
- codemap/core/bridge/__init__.py +8 -0
- codemap/core/bridge/base.py +38 -0
- codemap/core/bridge/http_route.py +374 -0
- codemap/core/bridge/python_cross_module.py +120 -0
- codemap/core/bridge/registry.py +117 -0
- codemap/core/graph.py +183 -0
- codemap/core/models.py +299 -0
- codemap/core/store.py +78 -0
- codemap/core/symbol.py +314 -0
- codemap/diagnostics/__init__.py +3 -0
- codemap/diagnostics/exit_codes.py +30 -0
- codemap/diagnostics/logging.py +65 -0
- codemap/diagnostics/progress.py +68 -0
- codemap/indexers/__init__.py +9 -0
- codemap/indexers/_example_lang.py +135 -0
- codemap/indexers/base.py +77 -0
- codemap/indexers/python.py +577 -0
- codemap/indexers/registry.py +104 -0
- codemap/io/__init__.py +8 -0
- codemap/io/atomic.py +97 -0
- codemap/io/base.py +12 -0
- codemap/io/json_store.py +433 -0
- codemap/io/lock.py +87 -0
- codemap/io/manifest.py +90 -0
- codemap/mcp/__init__.py +3 -0
- codemap_core-0.1.0.dist-info/METADATA +480 -0
- codemap_core-0.1.0.dist-info/RECORD +52 -0
- codemap_core-0.1.0.dist-info/WHEEL +4 -0
- codemap_core-0.1.0.dist-info/entry_points.txt +10 -0
- codemap_core-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
"""``codemap index`` — scan a project and write `.codemap/`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import hashlib
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from collections.abc import Iterable, Iterator
|
|
10
|
+
from datetime import UTC, datetime
|
|
11
|
+
from pathlib import Path, PurePosixPath
|
|
12
|
+
from typing import TYPE_CHECKING, Annotated, Any
|
|
13
|
+
|
|
14
|
+
import typer
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from codemap.indexers.registry import IndexerRegistry
|
|
18
|
+
|
|
19
|
+
from codemap import __version__
|
|
20
|
+
from codemap.cli.renderers import json as json_renderer
|
|
21
|
+
from codemap.cli.renderers import text
|
|
22
|
+
from codemap.config import Config, ConfigError, load_config
|
|
23
|
+
from codemap.config.schema import DEFAULT_PRUNE_DIRS
|
|
24
|
+
from codemap.core.bridge.registry import get_registry as get_bridges
|
|
25
|
+
from codemap.core.models import (
|
|
26
|
+
BridgeEntry,
|
|
27
|
+
Diagnostic,
|
|
28
|
+
FileEntry,
|
|
29
|
+
IndexerEntry,
|
|
30
|
+
Manifest,
|
|
31
|
+
)
|
|
32
|
+
from codemap.diagnostics.exit_codes import ExitCode
|
|
33
|
+
from codemap.diagnostics.progress import progress_bar
|
|
34
|
+
from codemap.indexers.base import IndexContext, Indexer
|
|
35
|
+
from codemap.indexers.registry import get_registry as get_indexers
|
|
36
|
+
from codemap.io.json_store import JsonStore
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
CODEMAP_DIR = ".codemap"
|
|
41
|
+
_PRUNE_DIRS = frozenset(DEFAULT_PRUNE_DIRS)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def register(app: typer.Typer) -> None:
|
|
45
|
+
@app.command("index")
|
|
46
|
+
def index(
|
|
47
|
+
ctx: typer.Context,
|
|
48
|
+
path: Annotated[
|
|
49
|
+
Path,
|
|
50
|
+
typer.Argument(
|
|
51
|
+
exists=True,
|
|
52
|
+
file_okay=False,
|
|
53
|
+
dir_okay=True,
|
|
54
|
+
resolve_path=True,
|
|
55
|
+
help="Project root to index.",
|
|
56
|
+
),
|
|
57
|
+
] = Path("."),
|
|
58
|
+
rebuild: Annotated[
|
|
59
|
+
bool,
|
|
60
|
+
typer.Option("--rebuild", help="Discard any existing `.codemap/` and rebuild."),
|
|
61
|
+
] = False,
|
|
62
|
+
incremental: Annotated[
|
|
63
|
+
bool,
|
|
64
|
+
typer.Option(
|
|
65
|
+
"--incremental",
|
|
66
|
+
help="Only re-parse files whose sha256 changed since the last run.",
|
|
67
|
+
),
|
|
68
|
+
] = False,
|
|
69
|
+
watch: Annotated[
|
|
70
|
+
bool,
|
|
71
|
+
typer.Option(
|
|
72
|
+
"--watch",
|
|
73
|
+
help="Stay running and re-index files as they change (requires watchdog).",
|
|
74
|
+
),
|
|
75
|
+
] = False,
|
|
76
|
+
dry_run: Annotated[
|
|
77
|
+
bool,
|
|
78
|
+
typer.Option(
|
|
79
|
+
"--dry-run",
|
|
80
|
+
help="Scan and report what would be indexed, but do not write to disk.",
|
|
81
|
+
),
|
|
82
|
+
] = False,
|
|
83
|
+
no_progress: Annotated[
|
|
84
|
+
bool,
|
|
85
|
+
typer.Option("--no-progress", help="Disable the progress bar."),
|
|
86
|
+
] = False,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""Walk the project and produce / refresh `.codemap/`."""
|
|
89
|
+
as_json: bool = ctx.obj["json_output"]
|
|
90
|
+
codemap_dir = path / CODEMAP_DIR
|
|
91
|
+
if rebuild and codemap_dir.exists():
|
|
92
|
+
_remove_index(codemap_dir)
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
config = load_config(codemap_dir if codemap_dir.exists() else None)
|
|
96
|
+
except ConfigError as exc:
|
|
97
|
+
text.console(stderr=True).print(f"[red]Config error:[/red] {exc}")
|
|
98
|
+
raise typer.Exit(code=int(ExitCode.CONFIG_ERROR)) from exc
|
|
99
|
+
|
|
100
|
+
registry = get_indexers()
|
|
101
|
+
indexer_list = _select_indexers(registry, config)
|
|
102
|
+
if not indexer_list:
|
|
103
|
+
_die_no_indexers(as_json)
|
|
104
|
+
|
|
105
|
+
files = _collect_files(path, indexer_list, config)
|
|
106
|
+
|
|
107
|
+
if dry_run:
|
|
108
|
+
_emit_dry_run(as_json, path, files, indexer_list, config)
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
if watch:
|
|
112
|
+
_run_watch_mode(
|
|
113
|
+
path,
|
|
114
|
+
codemap_dir,
|
|
115
|
+
indexer_list,
|
|
116
|
+
registry,
|
|
117
|
+
config,
|
|
118
|
+
no_progress=no_progress,
|
|
119
|
+
)
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
use_incremental = (
|
|
123
|
+
incremental and codemap_dir.exists() and (codemap_dir / "manifest.json").exists()
|
|
124
|
+
)
|
|
125
|
+
if incremental and not use_incremental:
|
|
126
|
+
logger.warning("--incremental requested but no existing index; running full build")
|
|
127
|
+
|
|
128
|
+
stats = _IndexStats()
|
|
129
|
+
with JsonStore.open(codemap_dir) as store:
|
|
130
|
+
if use_incremental:
|
|
131
|
+
_do_incremental(
|
|
132
|
+
path,
|
|
133
|
+
indexer_list,
|
|
134
|
+
registry,
|
|
135
|
+
config,
|
|
136
|
+
store,
|
|
137
|
+
stats,
|
|
138
|
+
no_progress=no_progress,
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
with progress_bar("Indexing", total=len(files), enabled=not no_progress) as bar:
|
|
142
|
+
for file_path in files:
|
|
143
|
+
_index_one(file_path, path, store, registry, stats, bar, config)
|
|
144
|
+
_run_bridges(store, stats, config)
|
|
145
|
+
m = _build_manifest(path, indexer_list, stats, config)
|
|
146
|
+
store.set_manifest(m)
|
|
147
|
+
store.commit()
|
|
148
|
+
|
|
149
|
+
if as_json:
|
|
150
|
+
json_renderer.emit(
|
|
151
|
+
"index",
|
|
152
|
+
{
|
|
153
|
+
"project_root": str(path),
|
|
154
|
+
"files_scanned": stats.files_scanned,
|
|
155
|
+
"files_indexed": stats.files_indexed,
|
|
156
|
+
"symbols": stats.symbols,
|
|
157
|
+
"edges": stats.edges,
|
|
158
|
+
"routes": stats.routes,
|
|
159
|
+
"diagnostics": stats.diagnostics,
|
|
160
|
+
"bridges_run": stats.bridges_run,
|
|
161
|
+
},
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
cons = text.console()
|
|
165
|
+
cons.print(f"[green]Indexed {stats.files_indexed} files[/green]")
|
|
166
|
+
cons.print(
|
|
167
|
+
text.table(
|
|
168
|
+
"Result",
|
|
169
|
+
["metric", "count"],
|
|
170
|
+
[
|
|
171
|
+
["files_scanned", stats.files_scanned],
|
|
172
|
+
["files_indexed", stats.files_indexed],
|
|
173
|
+
["symbols", stats.symbols],
|
|
174
|
+
["edges", stats.edges],
|
|
175
|
+
["routes", stats.routes],
|
|
176
|
+
["diagnostics", stats.diagnostics],
|
|
177
|
+
["bridges_run", stats.bridges_run],
|
|
178
|
+
],
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
# Helpers
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class _IndexStats:
|
|
189
|
+
def __init__(self) -> None:
|
|
190
|
+
self.files_scanned = 0
|
|
191
|
+
self.files_indexed = 0
|
|
192
|
+
self.symbols = 0
|
|
193
|
+
self.edges = 0
|
|
194
|
+
self.routes = 0
|
|
195
|
+
self.diagnostics = 0
|
|
196
|
+
self.bridges_run = 0
|
|
197
|
+
self.file_entries: dict[str, FileEntry] = {}
|
|
198
|
+
self.per_indexer: dict[str, int] = {}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _collect_files(
|
|
202
|
+
root: Path,
|
|
203
|
+
indexers: Iterable[Indexer],
|
|
204
|
+
config: Config,
|
|
205
|
+
) -> list[Path]:
|
|
206
|
+
patterns = [p for ix in indexers for p in ix.file_patterns]
|
|
207
|
+
ignore_patterns = config.index.ignore
|
|
208
|
+
out: list[Path] = []
|
|
209
|
+
for current_root, dirs, names in _walk(root, follow_symlinks=config.index.follow_symlinks):
|
|
210
|
+
for name in names:
|
|
211
|
+
if not any(fnmatch.fnmatch(name, pat) for pat in patterns):
|
|
212
|
+
continue
|
|
213
|
+
full = Path(current_root) / name
|
|
214
|
+
rel = full.relative_to(root).as_posix()
|
|
215
|
+
if _matches_any(rel, ignore_patterns):
|
|
216
|
+
continue
|
|
217
|
+
out.append(full)
|
|
218
|
+
dirs[:] = [
|
|
219
|
+
d
|
|
220
|
+
for d in dirs
|
|
221
|
+
if d not in _PRUNE_DIRS
|
|
222
|
+
and not _matches_any(d, ignore_patterns)
|
|
223
|
+
and not _matches_any(
|
|
224
|
+
(Path(current_root) / d).relative_to(root).as_posix(),
|
|
225
|
+
ignore_patterns,
|
|
226
|
+
)
|
|
227
|
+
]
|
|
228
|
+
return sorted(out)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _matches_any(candidate: str, patterns: list[str]) -> bool:
|
|
232
|
+
return any(fnmatch.fnmatch(candidate, pat) for pat in patterns)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _walk(
|
|
236
|
+
root: Path,
|
|
237
|
+
*,
|
|
238
|
+
follow_symlinks: bool,
|
|
239
|
+
) -> Iterator[tuple[str, list[str], list[str]]]:
|
|
240
|
+
"""``os.walk`` substitute that yields ``(root, dirs, names)`` from pathlib."""
|
|
241
|
+
yield from os.walk(root, followlinks=follow_symlinks)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _select_indexers(registry, config: Config) -> list[Indexer]: # type: ignore[no-untyped-def]
|
|
245
|
+
"""Filter the registry through ``config.indexers.{enabled,disabled}``."""
|
|
246
|
+
all_indexers = registry.all()
|
|
247
|
+
enabled_filter = config.indexers.enabled
|
|
248
|
+
disabled = set(config.indexers.disabled)
|
|
249
|
+
if enabled_filter == "all":
|
|
250
|
+
names = [n for n in all_indexers if n not in disabled]
|
|
251
|
+
else:
|
|
252
|
+
names = [n for n in enabled_filter if n in all_indexers and n not in disabled]
|
|
253
|
+
return [all_indexers[n] for n in names]
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _select_bridges(config: Config) -> list[Any]:
|
|
257
|
+
"""Filter bridges by config and return them in topological order."""
|
|
258
|
+
registry = get_bridges()
|
|
259
|
+
all_bridges = registry.all()
|
|
260
|
+
enabled_filter = config.bridges.enabled
|
|
261
|
+
disabled = set(config.bridges.disabled)
|
|
262
|
+
if enabled_filter == "all":
|
|
263
|
+
wanted = {n for n in all_bridges if n not in disabled}
|
|
264
|
+
else:
|
|
265
|
+
wanted = {n for n in enabled_filter if n in all_bridges and n not in disabled}
|
|
266
|
+
return [b for b in registry.topological_order() if b.name in wanted]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _index_one(
|
|
270
|
+
file_path: Path,
|
|
271
|
+
project_root: Path,
|
|
272
|
+
store: JsonStore,
|
|
273
|
+
registry: IndexerRegistry,
|
|
274
|
+
stats: _IndexStats,
|
|
275
|
+
bar: Any,
|
|
276
|
+
config: Config,
|
|
277
|
+
) -> None:
|
|
278
|
+
stats.files_scanned += 1
|
|
279
|
+
try:
|
|
280
|
+
size = file_path.stat().st_size
|
|
281
|
+
except OSError:
|
|
282
|
+
return
|
|
283
|
+
max_bytes = config.index.max_file_bytes
|
|
284
|
+
if size > max_bytes:
|
|
285
|
+
logger.warning("skipping %s (size %d > %d)", file_path, size, max_bytes)
|
|
286
|
+
return
|
|
287
|
+
rel = PurePosixPath(file_path.relative_to(project_root).as_posix())
|
|
288
|
+
|
|
289
|
+
enabled_names = {ix.name for ix in _select_indexers(registry, config)}
|
|
290
|
+
matches = [ix for ix in registry.for_path(file_path) if ix.name in enabled_names]
|
|
291
|
+
if not matches:
|
|
292
|
+
return
|
|
293
|
+
try:
|
|
294
|
+
source = file_path.read_bytes()
|
|
295
|
+
except OSError as exc:
|
|
296
|
+
logger.warning("cannot read %s: %s", file_path, exc)
|
|
297
|
+
store.upsert_diagnostics(
|
|
298
|
+
[
|
|
299
|
+
Diagnostic(
|
|
300
|
+
severity="error",
|
|
301
|
+
file=rel,
|
|
302
|
+
code="IO001",
|
|
303
|
+
message=f"cannot read file: {exc}",
|
|
304
|
+
producer="codemap.index",
|
|
305
|
+
)
|
|
306
|
+
]
|
|
307
|
+
)
|
|
308
|
+
stats.diagnostics += 1
|
|
309
|
+
return
|
|
310
|
+
|
|
311
|
+
digest = hashlib.sha256(source).hexdigest()
|
|
312
|
+
indexed_any = False
|
|
313
|
+
for ix in matches:
|
|
314
|
+
ctx = IndexContext(
|
|
315
|
+
project_root=project_root,
|
|
316
|
+
relative_path=rel,
|
|
317
|
+
language=(ix.languages[0] if ix.languages else "unknown"),
|
|
318
|
+
)
|
|
319
|
+
try:
|
|
320
|
+
result = ix.index_file(file_path, source, ctx)
|
|
321
|
+
except Exception as exc:
|
|
322
|
+
logger.exception("indexer %s failed on %s", ix.name, file_path)
|
|
323
|
+
store.upsert_diagnostics(
|
|
324
|
+
[
|
|
325
|
+
Diagnostic(
|
|
326
|
+
severity="error",
|
|
327
|
+
file=rel,
|
|
328
|
+
code="INDEXER_CRASH",
|
|
329
|
+
message=_short_exception_message(ix.name, exc),
|
|
330
|
+
producer=ix.name,
|
|
331
|
+
)
|
|
332
|
+
]
|
|
333
|
+
)
|
|
334
|
+
stats.diagnostics += 1
|
|
335
|
+
continue
|
|
336
|
+
store.upsert_symbols(result.symbols)
|
|
337
|
+
store.upsert_edges(result.edges)
|
|
338
|
+
store.upsert_routes(result.routes)
|
|
339
|
+
store.upsert_diagnostics(result.diagnostics)
|
|
340
|
+
stats.symbols += len(result.symbols)
|
|
341
|
+
stats.edges += len(result.edges)
|
|
342
|
+
stats.routes += len(result.routes)
|
|
343
|
+
stats.diagnostics += len(result.diagnostics)
|
|
344
|
+
stats.per_indexer[ix.name] = stats.per_indexer.get(ix.name, 0) + 1
|
|
345
|
+
indexed_any = True
|
|
346
|
+
# Use the first matching indexer's language for the file entry.
|
|
347
|
+
if str(rel) not in stats.file_entries:
|
|
348
|
+
stats.file_entries[str(rel)] = FileEntry(
|
|
349
|
+
sha256=digest,
|
|
350
|
+
mtime_ns=file_path.stat().st_mtime_ns,
|
|
351
|
+
size=size,
|
|
352
|
+
language=ctx.language,
|
|
353
|
+
indexer_version=ix.version,
|
|
354
|
+
symbol_count=len(result.symbols),
|
|
355
|
+
indexed_at=datetime.now(UTC),
|
|
356
|
+
)
|
|
357
|
+
if indexed_any:
|
|
358
|
+
stats.files_indexed += 1
|
|
359
|
+
bar.advance(0) # update spinner
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _run_bridges(store: JsonStore, stats: _IndexStats, config: Config) -> None:
|
|
363
|
+
bridges = _select_bridges(config)
|
|
364
|
+
for b in bridges:
|
|
365
|
+
try:
|
|
366
|
+
result = b.resolve(store)
|
|
367
|
+
except Exception as exc:
|
|
368
|
+
logger.exception("bridge %s failed", b.name)
|
|
369
|
+
store.upsert_diagnostics(
|
|
370
|
+
[
|
|
371
|
+
Diagnostic(
|
|
372
|
+
severity="error",
|
|
373
|
+
file=PurePosixPath("."),
|
|
374
|
+
code="BRIDGE_CRASH",
|
|
375
|
+
message=_short_exception_message(b.name, exc),
|
|
376
|
+
producer=b.name,
|
|
377
|
+
)
|
|
378
|
+
]
|
|
379
|
+
)
|
|
380
|
+
stats.diagnostics += 1
|
|
381
|
+
continue
|
|
382
|
+
store.upsert_edges(result.edges)
|
|
383
|
+
store.upsert_aliases(result.aliases)
|
|
384
|
+
store.upsert_routes(result.routes)
|
|
385
|
+
store.upsert_diagnostics(result.diagnostics)
|
|
386
|
+
stats.bridges_run += 1
|
|
387
|
+
stats.edges += len(result.edges)
|
|
388
|
+
stats.routes += len(result.routes)
|
|
389
|
+
stats.diagnostics += len(result.diagnostics)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _build_manifest(
|
|
393
|
+
root: Path,
|
|
394
|
+
indexers: list[Indexer],
|
|
395
|
+
stats: _IndexStats,
|
|
396
|
+
config: Config,
|
|
397
|
+
) -> Manifest:
|
|
398
|
+
return Manifest(
|
|
399
|
+
codemap_version=__version__,
|
|
400
|
+
project_root=str(root.resolve()),
|
|
401
|
+
storage_backend=config.storage.backend,
|
|
402
|
+
indexers=[
|
|
403
|
+
IndexerEntry(
|
|
404
|
+
name=ix.name,
|
|
405
|
+
version=ix.version,
|
|
406
|
+
file_count=stats.per_indexer.get(ix.name, 0),
|
|
407
|
+
)
|
|
408
|
+
for ix in indexers
|
|
409
|
+
],
|
|
410
|
+
bridges=[
|
|
411
|
+
BridgeEntry(name=b.name, version=b.version, edge_count=0)
|
|
412
|
+
for b in _select_bridges(config)
|
|
413
|
+
],
|
|
414
|
+
files=stats.file_entries,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _remove_index(codemap_dir: Path) -> None:
|
|
419
|
+
import shutil
|
|
420
|
+
|
|
421
|
+
shutil.rmtree(codemap_dir, ignore_errors=False)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def _do_incremental(
|
|
425
|
+
project_root: Path,
|
|
426
|
+
indexer_list: list[Indexer],
|
|
427
|
+
registry: IndexerRegistry,
|
|
428
|
+
config: Config,
|
|
429
|
+
store: JsonStore,
|
|
430
|
+
stats: _IndexStats,
|
|
431
|
+
*,
|
|
432
|
+
no_progress: bool,
|
|
433
|
+
) -> None:
|
|
434
|
+
"""Re-index only files whose sha256 changed since the last manifest."""
|
|
435
|
+
prev_manifest = store.manifest()
|
|
436
|
+
prev_files = dict(prev_manifest.files)
|
|
437
|
+
current_paths = _collect_files(project_root, indexer_list, config)
|
|
438
|
+
current_keys = {p.relative_to(project_root).as_posix() for p in current_paths}
|
|
439
|
+
|
|
440
|
+
deleted = set(prev_files.keys()) - current_keys
|
|
441
|
+
for rel in deleted:
|
|
442
|
+
store.delete_by_file(rel)
|
|
443
|
+
stats.files_scanned += 1
|
|
444
|
+
|
|
445
|
+
changed: list[tuple[Path, str, bytes, str, int]] = []
|
|
446
|
+
for f in current_paths:
|
|
447
|
+
rel = f.relative_to(project_root).as_posix()
|
|
448
|
+
stats.files_scanned += 1
|
|
449
|
+
try:
|
|
450
|
+
source = f.read_bytes()
|
|
451
|
+
except OSError as exc:
|
|
452
|
+
logger.warning("cannot read %s: %s", f, exc)
|
|
453
|
+
continue
|
|
454
|
+
digest = hashlib.sha256(source).hexdigest()
|
|
455
|
+
size = len(source)
|
|
456
|
+
prev = prev_files.get(rel)
|
|
457
|
+
if prev is not None and prev.sha256 == digest:
|
|
458
|
+
stats.file_entries[rel] = prev
|
|
459
|
+
continue
|
|
460
|
+
changed.append((f, rel, source, digest, size))
|
|
461
|
+
|
|
462
|
+
if changed:
|
|
463
|
+
for _, rel, _, _, _ in changed:
|
|
464
|
+
store.delete_by_file(rel)
|
|
465
|
+
with progress_bar("Re-indexing", total=len(changed), enabled=not no_progress) as bar:
|
|
466
|
+
for f, rel, source, digest, size in changed:
|
|
467
|
+
_index_one_prefetched(
|
|
468
|
+
f,
|
|
469
|
+
project_root,
|
|
470
|
+
rel,
|
|
471
|
+
source,
|
|
472
|
+
digest,
|
|
473
|
+
size,
|
|
474
|
+
store,
|
|
475
|
+
registry,
|
|
476
|
+
stats,
|
|
477
|
+
bar,
|
|
478
|
+
config,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
if changed or deleted:
|
|
482
|
+
store.clear_bridge_outputs()
|
|
483
|
+
_run_bridges(store, stats, config)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _index_one_prefetched(
|
|
487
|
+
file_path: Path,
|
|
488
|
+
project_root: Path,
|
|
489
|
+
rel: str,
|
|
490
|
+
source: bytes,
|
|
491
|
+
digest: str,
|
|
492
|
+
size: int,
|
|
493
|
+
store: JsonStore,
|
|
494
|
+
registry: IndexerRegistry,
|
|
495
|
+
stats: _IndexStats,
|
|
496
|
+
bar: Any,
|
|
497
|
+
config: Config,
|
|
498
|
+
) -> None:
|
|
499
|
+
"""Like ``_index_one`` but reuses the pre-fetched source / digest / size."""
|
|
500
|
+
rel_posix = PurePosixPath(rel)
|
|
501
|
+
enabled_names = {ix.name for ix in _select_indexers(registry, config)}
|
|
502
|
+
matches = [ix for ix in registry.for_path(file_path) if ix.name in enabled_names]
|
|
503
|
+
if not matches:
|
|
504
|
+
return
|
|
505
|
+
indexed_any = False
|
|
506
|
+
for ix in matches:
|
|
507
|
+
ctx = IndexContext(
|
|
508
|
+
project_root=project_root,
|
|
509
|
+
relative_path=rel_posix,
|
|
510
|
+
language=(ix.languages[0] if ix.languages else "unknown"),
|
|
511
|
+
)
|
|
512
|
+
try:
|
|
513
|
+
result = ix.index_file(file_path, source, ctx)
|
|
514
|
+
except Exception as exc:
|
|
515
|
+
logger.exception("indexer %s failed on %s", ix.name, file_path)
|
|
516
|
+
store.upsert_diagnostics(
|
|
517
|
+
[
|
|
518
|
+
Diagnostic(
|
|
519
|
+
severity="error",
|
|
520
|
+
file=rel_posix,
|
|
521
|
+
code="INDEXER_CRASH",
|
|
522
|
+
message=_short_exception_message(ix.name, exc),
|
|
523
|
+
producer=ix.name,
|
|
524
|
+
)
|
|
525
|
+
]
|
|
526
|
+
)
|
|
527
|
+
stats.diagnostics += 1
|
|
528
|
+
continue
|
|
529
|
+
store.upsert_symbols(result.symbols)
|
|
530
|
+
store.upsert_edges(result.edges)
|
|
531
|
+
store.upsert_routes(result.routes)
|
|
532
|
+
store.upsert_diagnostics(result.diagnostics)
|
|
533
|
+
stats.symbols += len(result.symbols)
|
|
534
|
+
stats.edges += len(result.edges)
|
|
535
|
+
stats.routes += len(result.routes)
|
|
536
|
+
stats.diagnostics += len(result.diagnostics)
|
|
537
|
+
stats.per_indexer[ix.name] = stats.per_indexer.get(ix.name, 0) + 1
|
|
538
|
+
indexed_any = True
|
|
539
|
+
if rel not in stats.file_entries:
|
|
540
|
+
stats.file_entries[rel] = FileEntry(
|
|
541
|
+
sha256=digest,
|
|
542
|
+
mtime_ns=file_path.stat().st_mtime_ns,
|
|
543
|
+
size=size,
|
|
544
|
+
language=ctx.language,
|
|
545
|
+
indexer_version=ix.version,
|
|
546
|
+
symbol_count=len(result.symbols),
|
|
547
|
+
indexed_at=datetime.now(UTC),
|
|
548
|
+
)
|
|
549
|
+
if indexed_any:
|
|
550
|
+
stats.files_indexed += 1
|
|
551
|
+
bar.advance(0)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def _run_watch_mode(
|
|
555
|
+
project_root: Path,
|
|
556
|
+
codemap_dir: Path,
|
|
557
|
+
indexer_list: list[Indexer],
|
|
558
|
+
registry: IndexerRegistry,
|
|
559
|
+
config: Config,
|
|
560
|
+
*,
|
|
561
|
+
no_progress: bool,
|
|
562
|
+
) -> None:
|
|
563
|
+
"""Run an initial incremental index, then watch the project tree."""
|
|
564
|
+
try:
|
|
565
|
+
from watchdog.events import FileSystemEvent, FileSystemEventHandler
|
|
566
|
+
from watchdog.observers import Observer
|
|
567
|
+
except ImportError as exc:
|
|
568
|
+
text.console(stderr=True).print(
|
|
569
|
+
"[red]--watch requires watchdog.[/red] Install it with `pip install codemap[watch]`."
|
|
570
|
+
)
|
|
571
|
+
raise typer.Exit(code=int(ExitCode.UNAVAILABLE)) from exc
|
|
572
|
+
|
|
573
|
+
import threading
|
|
574
|
+
import time
|
|
575
|
+
|
|
576
|
+
cons = text.console()
|
|
577
|
+
|
|
578
|
+
# Initial index (incremental if .codemap/ exists, else full).
|
|
579
|
+
def run_pass() -> tuple[int, int, int]:
|
|
580
|
+
stats = _IndexStats()
|
|
581
|
+
with JsonStore.open(codemap_dir) as store:
|
|
582
|
+
if (codemap_dir / "manifest.json").exists():
|
|
583
|
+
_do_incremental(
|
|
584
|
+
project_root,
|
|
585
|
+
indexer_list,
|
|
586
|
+
registry,
|
|
587
|
+
config,
|
|
588
|
+
store,
|
|
589
|
+
stats,
|
|
590
|
+
no_progress=True,
|
|
591
|
+
)
|
|
592
|
+
else:
|
|
593
|
+
files = _collect_files(project_root, indexer_list, config)
|
|
594
|
+
with progress_bar("Indexing", total=len(files), enabled=False) as bar:
|
|
595
|
+
for f in files:
|
|
596
|
+
_index_one(f, project_root, store, registry, stats, bar, config)
|
|
597
|
+
_run_bridges(store, stats, config)
|
|
598
|
+
m = _build_manifest(project_root, indexer_list, stats, config)
|
|
599
|
+
store.set_manifest(m)
|
|
600
|
+
store.commit()
|
|
601
|
+
return stats.files_indexed, stats.symbols, stats.edges
|
|
602
|
+
|
|
603
|
+
files_indexed, symbols, edges = run_pass()
|
|
604
|
+
cons.print(
|
|
605
|
+
f"[green]Initial index:[/green] {files_indexed} files, {symbols} symbols, {edges} edges"
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
lock = threading.Lock()
|
|
609
|
+
pending = threading.Event()
|
|
610
|
+
|
|
611
|
+
class Handler(FileSystemEventHandler):
|
|
612
|
+
def _record(self, src_path: str) -> None:
|
|
613
|
+
try:
|
|
614
|
+
p = Path(src_path).resolve()
|
|
615
|
+
except OSError:
|
|
616
|
+
return
|
|
617
|
+
try:
|
|
618
|
+
p.relative_to(codemap_dir.resolve())
|
|
619
|
+
return # our own write to .codemap/
|
|
620
|
+
except ValueError:
|
|
621
|
+
pass
|
|
622
|
+
with lock:
|
|
623
|
+
pending.set()
|
|
624
|
+
|
|
625
|
+
def on_modified(self, event: FileSystemEvent) -> None:
|
|
626
|
+
if not event.is_directory:
|
|
627
|
+
self._record(str(event.src_path))
|
|
628
|
+
|
|
629
|
+
def on_created(self, event: FileSystemEvent) -> None:
|
|
630
|
+
if not event.is_directory:
|
|
631
|
+
self._record(str(event.src_path))
|
|
632
|
+
|
|
633
|
+
def on_deleted(self, event: FileSystemEvent) -> None:
|
|
634
|
+
if not event.is_directory:
|
|
635
|
+
self._record(str(event.src_path))
|
|
636
|
+
|
|
637
|
+
observer = Observer()
|
|
638
|
+
observer.schedule(Handler(), str(project_root), recursive=True)
|
|
639
|
+
observer.start()
|
|
640
|
+
cons.print(f"[bold]Watching[/bold] {project_root} for changes (Ctrl-C to stop)...")
|
|
641
|
+
try:
|
|
642
|
+
while True:
|
|
643
|
+
triggered = pending.wait(timeout=1.0)
|
|
644
|
+
if not triggered:
|
|
645
|
+
continue
|
|
646
|
+
# Debounce: collect events for an extra 500 ms
|
|
647
|
+
time.sleep(0.5)
|
|
648
|
+
with lock:
|
|
649
|
+
pending.clear()
|
|
650
|
+
try:
|
|
651
|
+
files_indexed, symbols, edges = run_pass()
|
|
652
|
+
cons.print(
|
|
653
|
+
f"[dim]{datetime.now(UTC).strftime('%H:%M:%S')}[/dim] "
|
|
654
|
+
f"updated: {files_indexed} files, "
|
|
655
|
+
f"{symbols} symbols, {edges} edges"
|
|
656
|
+
)
|
|
657
|
+
except Exception as exc: # pragma: no cover - keep watcher alive
|
|
658
|
+
logger.exception("watch-mode pass failed: %s", exc)
|
|
659
|
+
except KeyboardInterrupt:
|
|
660
|
+
cons.print("\n[bold]Stopped.[/bold]")
|
|
661
|
+
finally:
|
|
662
|
+
observer.stop()
|
|
663
|
+
observer.join(timeout=2)
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def _short_exception_message(producer: str, exc: BaseException) -> str:
|
|
667
|
+
"""One-line summary of an exception suitable for a Diagnostic."""
|
|
668
|
+
return f"{producer} crashed: {type(exc).__name__}: {exc!s}"[:512]
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def _emit_dry_run(
|
|
672
|
+
as_json: bool,
|
|
673
|
+
project_path: Path,
|
|
674
|
+
files: list[Path],
|
|
675
|
+
indexers: list[Indexer],
|
|
676
|
+
config: Config,
|
|
677
|
+
) -> None:
|
|
678
|
+
"""Report what `codemap index` would do without writing anything."""
|
|
679
|
+
per_indexer: dict[str, int] = {ix.name: 0 for ix in indexers}
|
|
680
|
+
enabled_names = {ix.name for ix in indexers}
|
|
681
|
+
for f in files:
|
|
682
|
+
ix_registry = get_indexers()
|
|
683
|
+
for ix in ix_registry.for_path(f):
|
|
684
|
+
if ix.name in enabled_names:
|
|
685
|
+
per_indexer[ix.name] += 1
|
|
686
|
+
if as_json:
|
|
687
|
+
json_renderer.emit(
|
|
688
|
+
"index",
|
|
689
|
+
{
|
|
690
|
+
"dry_run": True,
|
|
691
|
+
"project_root": str(project_path),
|
|
692
|
+
"files_matched": len(files),
|
|
693
|
+
"per_indexer": per_indexer,
|
|
694
|
+
"indexers_enabled": list(enabled_names),
|
|
695
|
+
"bridges_enabled": [b.name for b in _select_bridges(config)],
|
|
696
|
+
},
|
|
697
|
+
)
|
|
698
|
+
return
|
|
699
|
+
cons = text.console()
|
|
700
|
+
cons.print(
|
|
701
|
+
f"[bold]Dry-run[/bold]: would index "
|
|
702
|
+
f"[green]{len(files)}[/green] files into {project_path / CODEMAP_DIR}"
|
|
703
|
+
)
|
|
704
|
+
if files:
|
|
705
|
+
cons.print(
|
|
706
|
+
text.table(
|
|
707
|
+
"Per indexer",
|
|
708
|
+
["indexer", "files"],
|
|
709
|
+
[[name, str(count)] for name, count in sorted(per_indexer.items())],
|
|
710
|
+
)
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _die_no_indexers(as_json: bool) -> None:
|
|
715
|
+
if as_json:
|
|
716
|
+
json_renderer.emit("index", {"error": "no indexers registered"})
|
|
717
|
+
else:
|
|
718
|
+
text.console(stderr=True).print(
|
|
719
|
+
"[red]No indexers registered.[/red] Install an indexer plugin or "
|
|
720
|
+
"ensure `pip install -e .` ran for this package."
|
|
721
|
+
)
|
|
722
|
+
raise typer.Exit(code=int(ExitCode.UNAVAILABLE))
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
__all__ = ["register"]
|