codespine 1.0.7__tar.gz → 1.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-1.0.7 → codespine-1.0.9}/PKG-INFO +9 -9
- {codespine-1.0.7 → codespine-1.0.9}/README.md +8 -8
- {codespine-1.0.7 → codespine-1.0.9}/codespine/__init__.py +1 -1
- {codespine-1.0.7 → codespine-1.0.9}/codespine/cli.py +282 -30
- {codespine-1.0.7 → codespine-1.0.9}/codespine/config.py +4 -2
- {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/call_resolver.py +6 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/engine.py +335 -235
- {codespine-1.0.7 → codespine-1.0.9}/codespine/mcp/server.py +3 -2
- {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/PKG-INFO +9 -9
- {codespine-1.0.7 → codespine-1.0.9}/pyproject.toml +1 -1
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_call_resolver.py +34 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_parse_resilience.py +96 -0
- {codespine-1.0.7 → codespine-1.0.9}/LICENSE +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/community.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/context.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/coupling.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/crossmodule.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/deadcode.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/flow.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/impact.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/cache/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/cache/result_cache.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/db/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/db/_cypher_compat.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/db/duckdb_store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/db/schema.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/db/store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/diff/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/diff/branch_diff.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/guide.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/di_resolver.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/java_parser.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/mcp/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/noise/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/noise/blocklist.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/git_state.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/merge.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/search/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/search/bm25.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/search/fuzzy.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/search/hybrid.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/search/rrf.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/search/vector.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/sharding/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/sharding/router.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/sharding/store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/watch/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/watch/git_hook.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine/watch/watcher.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/requires.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/top_level.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/gindex.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/setup.cfg +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_community_detection.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_cypher_compat.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_deadcode.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_duckdb_store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_index_and_hybrid.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_java_parser.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_multimodule_index.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_overlay.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_result_cache.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_search_ranking.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_sharding.py +0 -0
- {codespine-1.0.7 → codespine-1.0.9}/tests/test_store_recovery.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.9
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -124,8 +124,7 @@ Downloads and caches the embedding model. Only needed once. After this, `--embed
|
|
|
124
124
|
codespine analyse /path/to/java-project
|
|
125
125
|
|
|
126
126
|
# 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
|
|
127
|
-
|
|
128
|
-
codespine analyse /path/to/java-project --deep
|
|
127
|
+
codespine analyse /path/to/java-project --complete --deep
|
|
129
128
|
|
|
130
129
|
# 3. (Optional) Add semantic embeddings for concept-level search
|
|
131
130
|
codespine analyse /path/to/java-project --embed
|
|
@@ -313,8 +312,9 @@ Higher-level tools designed to answer full agent questions in a single call, wit
|
|
|
313
312
|
# Indexing
|
|
314
313
|
codespine analyse <path> # incremental index (default)
|
|
315
314
|
codespine analyse <path> --full # full re-index from scratch
|
|
316
|
-
codespine analyse <path> --
|
|
317
|
-
codespine analyse <path> --
|
|
315
|
+
codespine analyse <path> --budget 90 # fast index with a resolver deadline
|
|
316
|
+
codespine analyse <path> --complete --deep # + communities, flows, dead code, coupling
|
|
317
|
+
codespine analyse <path> --complete --incremental-deep
|
|
318
318
|
codespine analyse <path> --embed # + vector embeddings
|
|
319
319
|
|
|
320
320
|
# Live watch
|
|
@@ -360,7 +360,7 @@ codespine force-reset # emergency: delete all data files
|
|
|
360
360
|
|
|
361
361
|
`analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
|
|
362
362
|
|
|
363
|
-
|
|
363
|
+
`analyse` runs in fast mode by default: it indexes the core graph, publishes that read replica from a detached process, then continues communities, flows, dead code, coupling, and cross-module enrichment in the background. Use `--complete --deep` when you want those passes refreshed before the command returns.
|
|
364
364
|
|
|
365
365
|
---
|
|
366
366
|
|
|
@@ -546,12 +546,12 @@ The deep analysis phase covers four passes that are expensive but optional:
|
|
|
546
546
|
| Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
|
|
547
547
|
| Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
|
|
548
548
|
|
|
549
|
-
**
|
|
549
|
+
**Fast default:** `codespine analyse` prioritizes a queryable core index. Communities, flows, dead-code, git coupling, and cross-module links are queued in a detached background enrichment job unless you use `--complete`.
|
|
550
550
|
|
|
551
|
-
**
|
|
551
|
+
**Complete deep:** `--complete --deep` runs the expensive enrichment passes before returning. `--complete --incremental-deep` combines incremental file indexing with a forced full deep pass.
|
|
552
552
|
|
|
553
553
|
```bash
|
|
554
|
-
codespine analyse . --incremental-deep
|
|
554
|
+
codespine analyse . --complete --incremental-deep
|
|
555
555
|
```
|
|
556
556
|
|
|
557
557
|
**Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
|
|
@@ -59,8 +59,7 @@ Downloads and caches the embedding model. Only needed once. After this, `--embed
|
|
|
59
59
|
codespine analyse /path/to/java-project
|
|
60
60
|
|
|
61
61
|
# 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
|
|
62
|
-
|
|
63
|
-
codespine analyse /path/to/java-project --deep
|
|
62
|
+
codespine analyse /path/to/java-project --complete --deep
|
|
64
63
|
|
|
65
64
|
# 3. (Optional) Add semantic embeddings for concept-level search
|
|
66
65
|
codespine analyse /path/to/java-project --embed
|
|
@@ -248,8 +247,9 @@ Higher-level tools designed to answer full agent questions in a single call, wit
|
|
|
248
247
|
# Indexing
|
|
249
248
|
codespine analyse <path> # incremental index (default)
|
|
250
249
|
codespine analyse <path> --full # full re-index from scratch
|
|
251
|
-
codespine analyse <path> --
|
|
252
|
-
codespine analyse <path> --
|
|
250
|
+
codespine analyse <path> --budget 90 # fast index with a resolver deadline
|
|
251
|
+
codespine analyse <path> --complete --deep # + communities, flows, dead code, coupling
|
|
252
|
+
codespine analyse <path> --complete --incremental-deep
|
|
253
253
|
codespine analyse <path> --embed # + vector embeddings
|
|
254
254
|
|
|
255
255
|
# Live watch
|
|
@@ -295,7 +295,7 @@ codespine force-reset # emergency: delete all data files
|
|
|
295
295
|
|
|
296
296
|
`analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
|
|
297
297
|
|
|
298
|
-
|
|
298
|
+
`analyse` runs in fast mode by default: it indexes the core graph, publishes that read replica from a detached process, then continues communities, flows, dead code, coupling, and cross-module enrichment in the background. Use `--complete --deep` when you want those passes refreshed before the command returns.
|
|
299
299
|
|
|
300
300
|
---
|
|
301
301
|
|
|
@@ -481,12 +481,12 @@ The deep analysis phase covers four passes that are expensive but optional:
|
|
|
481
481
|
| Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
|
|
482
482
|
| Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
|
|
483
483
|
|
|
484
|
-
**
|
|
484
|
+
**Fast default:** `codespine analyse` prioritizes a queryable core index. Communities, flows, dead-code, git coupling, and cross-module links are queued in a detached background enrichment job unless you use `--complete`.
|
|
485
485
|
|
|
486
|
-
**
|
|
486
|
+
**Complete deep:** `--complete --deep` runs the expensive enrichment passes before returning. `--complete --incremental-deep` combines incremental file indexing with a forced full deep pass.
|
|
487
487
|
|
|
488
488
|
```bash
|
|
489
|
-
codespine analyse . --incremental-deep
|
|
489
|
+
codespine analyse . --complete --incremental-deep
|
|
490
490
|
```
|
|
491
491
|
|
|
492
492
|
**Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
|
|
@@ -66,6 +66,24 @@ def _open_store(read_only: bool = True) -> ShardedGraphStore:
|
|
|
66
66
|
return ShardedGraphStore(read_only=read_only)
|
|
67
67
|
|
|
68
68
|
|
|
69
|
+
def _spawn_background_enrichment(path: str) -> bool:
|
|
70
|
+
"""Publish the fast index, then enrich it in a detached process."""
|
|
71
|
+
try:
|
|
72
|
+
subprocess.Popen(
|
|
73
|
+
[sys.executable, "-m", "codespine.cli", "enrich-background", path],
|
|
74
|
+
stdin=subprocess.DEVNULL,
|
|
75
|
+
stdout=subprocess.DEVNULL,
|
|
76
|
+
stderr=subprocess.DEVNULL,
|
|
77
|
+
start_new_session=True,
|
|
78
|
+
cwd=os.getcwd(),
|
|
79
|
+
env=os.environ.copy(),
|
|
80
|
+
)
|
|
81
|
+
return True
|
|
82
|
+
except Exception as exc: # noqa: BLE001
|
|
83
|
+
LOGGER.warning("Unable to spawn background enrichment: %s", exc)
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
|
|
69
87
|
def _db_size_bytes(path: str) -> int:
|
|
70
88
|
if os.path.isfile(path):
|
|
71
89
|
return os.path.getsize(path)
|
|
@@ -110,6 +128,7 @@ def _index_shard_group(
|
|
|
110
128
|
sg,
|
|
111
129
|
full: bool,
|
|
112
130
|
embed: bool,
|
|
131
|
+
deadline: float | None,
|
|
113
132
|
output_lock: threading.Lock,
|
|
114
133
|
parallel: bool,
|
|
115
134
|
) -> tuple[int, list, int]:
|
|
@@ -145,6 +164,10 @@ def _index_shard_group(
|
|
|
145
164
|
}
|
|
146
165
|
call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
|
|
147
166
|
"started_at": 0.0}
|
|
167
|
+
db_state: dict = {
|
|
168
|
+
"shown": False, "done": 0, "total": 0, "last_ts": 0.0,
|
|
169
|
+
"started_at": 0.0,
|
|
170
|
+
}
|
|
148
171
|
|
|
149
172
|
def _progress(event: str, payload: dict) -> None:
|
|
150
173
|
now = time.perf_counter()
|
|
@@ -237,11 +260,101 @@ def _index_shard_group(
|
|
|
237
260
|
parse_state["shown"] = True
|
|
238
261
|
parse_state["last_ts"] = now
|
|
239
262
|
return
|
|
240
|
-
if event
|
|
263
|
+
if event == "db_write_start":
|
|
241
264
|
if parse_state["shown"]:
|
|
242
265
|
with output_lock:
|
|
243
266
|
click.echo()
|
|
244
267
|
parse_state["shown"] = False
|
|
268
|
+
total = int(payload.get("total", 0))
|
|
269
|
+
deleted = int(payload.get("deleted_files", 0))
|
|
270
|
+
db_state["done"] = 0
|
|
271
|
+
db_state["total"] = total
|
|
272
|
+
db_state["started_at"] = now
|
|
273
|
+
status = f"starting ({total} files"
|
|
274
|
+
if deleted:
|
|
275
|
+
status += f", {deleted} deleted"
|
|
276
|
+
status += ")"
|
|
277
|
+
with output_lock:
|
|
278
|
+
_phase(f"{prefix}Writing index...", status)
|
|
279
|
+
return
|
|
280
|
+
if event == "db_write_heartbeat":
|
|
281
|
+
done = int(payload.get("done", 0))
|
|
282
|
+
total = int(payload.get("total", 0))
|
|
283
|
+
classes = int(payload.get("classes", 0))
|
|
284
|
+
methods = int(payload.get("methods", 0))
|
|
285
|
+
phase = str(payload.get("phase", "writing"))
|
|
286
|
+
elapsed_s = float(payload.get("elapsed", 0.0))
|
|
287
|
+
db_state["done"] = done
|
|
288
|
+
db_state["total"] = total
|
|
289
|
+
if not parallel:
|
|
290
|
+
click.echo(
|
|
291
|
+
f"\r{_spinner_char()} {prefix}Writing index... "
|
|
292
|
+
f"{_bar(done, total)} {done}/{total} "
|
|
293
|
+
f"{classes} classes / {methods} methods "
|
|
294
|
+
f"{phase[:18]:<18} {elapsed_s:.0f}s ",
|
|
295
|
+
nl=False,
|
|
296
|
+
)
|
|
297
|
+
else:
|
|
298
|
+
with output_lock:
|
|
299
|
+
click.echo(
|
|
300
|
+
f"\r{prefix}Writing {done}/{total} "
|
|
301
|
+
f"({classes} classes, {methods} methods, {elapsed_s:.0f}s) ",
|
|
302
|
+
nl=False,
|
|
303
|
+
)
|
|
304
|
+
db_state["shown"] = True
|
|
305
|
+
db_state["last_ts"] = now
|
|
306
|
+
return
|
|
307
|
+
if event == "db_write_progress":
|
|
308
|
+
done = int(payload.get("done", 0))
|
|
309
|
+
total = int(payload.get("total", 0))
|
|
310
|
+
classes = int(payload.get("classes", 0))
|
|
311
|
+
methods = int(payload.get("methods", 0))
|
|
312
|
+
phase = str(payload.get("phase", "writing"))
|
|
313
|
+
db_state["done"] = done
|
|
314
|
+
db_state["total"] = total
|
|
315
|
+
if total == 0 and done == 0:
|
|
316
|
+
return
|
|
317
|
+
if done == total or (now - db_state["last_ts"]) >= 0.25:
|
|
318
|
+
elapsed_s = now - db_state["started_at"]
|
|
319
|
+
if not parallel:
|
|
320
|
+
click.echo(
|
|
321
|
+
f"\r{_spinner_char()} {prefix}Writing index... "
|
|
322
|
+
f"{_bar(done, total)} {done}/{total} "
|
|
323
|
+
f"{classes} classes / {methods} methods "
|
|
324
|
+
f"{phase[:18]:<18} {elapsed_s:.0f}s ",
|
|
325
|
+
nl=False,
|
|
326
|
+
)
|
|
327
|
+
else:
|
|
328
|
+
with output_lock:
|
|
329
|
+
click.echo(
|
|
330
|
+
f"\r{prefix}Writing {done}/{total} "
|
|
331
|
+
f"({classes} classes, {methods} methods, {elapsed_s:.0f}s) ",
|
|
332
|
+
nl=False,
|
|
333
|
+
)
|
|
334
|
+
db_state["shown"] = True
|
|
335
|
+
db_state["last_ts"] = now
|
|
336
|
+
return
|
|
337
|
+
if event == "db_write_done":
|
|
338
|
+
if db_state["shown"]:
|
|
339
|
+
with output_lock:
|
|
340
|
+
click.echo()
|
|
341
|
+
db_state["shown"] = False
|
|
342
|
+
files = int(payload.get("files_indexed", db_state["done"]))
|
|
343
|
+
classes = int(payload.get("classes", 0))
|
|
344
|
+
methods = int(payload.get("methods", 0))
|
|
345
|
+
elapsed_s = float(payload.get("elapsed", 0.0))
|
|
346
|
+
with output_lock:
|
|
347
|
+
_phase(
|
|
348
|
+
f"{prefix}Writing index...",
|
|
349
|
+
f"{files} files, {classes} classes, {methods} methods ({elapsed_s:.1f}s)",
|
|
350
|
+
)
|
|
351
|
+
return
|
|
352
|
+
if event in ("resolve_calls_start",):
|
|
353
|
+
if parse_state["shown"] or db_state["shown"]:
|
|
354
|
+
with output_lock:
|
|
355
|
+
click.echo()
|
|
356
|
+
parse_state["shown"] = False
|
|
357
|
+
db_state["shown"] = False
|
|
245
358
|
call_state["started_at"] = now
|
|
246
359
|
with output_lock:
|
|
247
360
|
_phase(f"{prefix}Tracing calls...", "starting...")
|
|
@@ -287,8 +400,9 @@ def _index_shard_group(
|
|
|
287
400
|
call_state["shown"] = False
|
|
288
401
|
elapsed_s = (now - call_state["started_at"]) if call_state["started_at"] else 0.0
|
|
289
402
|
n = int(payload.get("calls_resolved", 0))
|
|
403
|
+
suffix = " partial" if payload.get("partial") else ""
|
|
290
404
|
with output_lock:
|
|
291
|
-
_phase(f"{prefix}Tracing calls...", f"{n} calls resolved ({elapsed_s:.1f}s)")
|
|
405
|
+
_phase(f"{prefix}Tracing calls...", f"{n} calls resolved{suffix} ({elapsed_s:.1f}s)")
|
|
292
406
|
return
|
|
293
407
|
if event == "resolve_types_start":
|
|
294
408
|
with output_lock:
|
|
@@ -296,20 +410,26 @@ def _index_shard_group(
|
|
|
296
410
|
return
|
|
297
411
|
if event == "resolve_types_done":
|
|
298
412
|
n = int(payload.get("type_relationships", 0))
|
|
413
|
+
suffix = " partial" if payload.get("partial") else ""
|
|
299
414
|
with output_lock:
|
|
300
|
-
_phase(f"{prefix}Analyzing types...", f"{n} type relationships")
|
|
415
|
+
_phase(f"{prefix}Analyzing types...", f"{n} type relationships{suffix}")
|
|
301
416
|
return
|
|
302
417
|
|
|
303
418
|
shard_store = sg.shard(project_id)
|
|
304
419
|
indexer = JavaIndexer(shard_store)
|
|
305
420
|
result = indexer.index_project(
|
|
306
|
-
mod_path,
|
|
421
|
+
mod_path,
|
|
422
|
+
full=full,
|
|
423
|
+
progress=_progress,
|
|
424
|
+
project_id=project_id,
|
|
425
|
+
embed=embed,
|
|
426
|
+
deadline=deadline,
|
|
307
427
|
)
|
|
308
428
|
results.append(result)
|
|
309
429
|
total_files += result.files_found
|
|
310
430
|
|
|
311
431
|
# Flush any dangling progress line.
|
|
312
|
-
if parse_state["shown"]:
|
|
432
|
+
if parse_state["shown"] or db_state["shown"]:
|
|
313
433
|
with output_lock:
|
|
314
434
|
click.echo()
|
|
315
435
|
|
|
@@ -372,7 +492,21 @@ def main() -> None:
|
|
|
372
492
|
@main.command()
|
|
373
493
|
@click.argument("path", type=click.Path(exists=True))
|
|
374
494
|
@click.option("--full/--incremental", default=False, show_default=True)
|
|
375
|
-
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses
|
|
495
|
+
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses when used with --complete.")
|
|
496
|
+
@click.option(
|
|
497
|
+
"--fast/--complete",
|
|
498
|
+
default=True,
|
|
499
|
+
show_default=True,
|
|
500
|
+
help="Fast mode returns after the core index is queryable; complete mode runs enrichment in the foreground.",
|
|
501
|
+
)
|
|
502
|
+
@click.option(
|
|
503
|
+
"--budget",
|
|
504
|
+
"budget_seconds",
|
|
505
|
+
default=90.0,
|
|
506
|
+
show_default=True,
|
|
507
|
+
type=float,
|
|
508
|
+
help="Foreground time budget in seconds for fast mode; use 0 to disable the resolver deadline.",
|
|
509
|
+
)
|
|
376
510
|
@click.option(
|
|
377
511
|
"--incremental-deep",
|
|
378
512
|
is_flag=True,
|
|
@@ -381,17 +515,25 @@ def main() -> None:
|
|
|
381
515
|
)
|
|
382
516
|
@click.option(
|
|
383
517
|
"--embed/--no-embed",
|
|
384
|
-
default=
|
|
518
|
+
default=False,
|
|
385
519
|
show_default=True,
|
|
386
|
-
help="Generate vector embeddings.
|
|
520
|
+
help="Generate vector embeddings. Off by default so analyse stays fast; rerun with --embed when semantic vectors are needed.",
|
|
387
521
|
)
|
|
388
522
|
@click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
|
|
389
|
-
def analyse(
|
|
523
|
+
def analyse(
|
|
524
|
+
path: str,
|
|
525
|
+
full: bool,
|
|
526
|
+
deep: bool,
|
|
527
|
+
fast: bool,
|
|
528
|
+
budget_seconds: float,
|
|
529
|
+
incremental_deep: bool,
|
|
530
|
+
embed: bool,
|
|
531
|
+
allow_running: bool,
|
|
532
|
+
) -> None:
|
|
390
533
|
"""Index a local Java project (auto-detects workspace / Maven / Gradle layout).
|
|
391
534
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
a fast hash-based fallback provides basic vector search.
|
|
535
|
+
Fast mode indexes the core Java graph and returns quickly. Use --complete
|
|
536
|
+
for foreground communities, flows, dead-code, and git-coupling enrichment.
|
|
395
537
|
"""
|
|
396
538
|
if not allow_running and _is_running():
|
|
397
539
|
click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
|
|
@@ -400,6 +542,18 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
400
542
|
started = time.perf_counter()
|
|
401
543
|
abs_path = os.path.abspath(path)
|
|
402
544
|
|
|
545
|
+
if fast and (deep or incremental_deep):
|
|
546
|
+
click.secho(
|
|
547
|
+
"Fast mode runs deep analysis in the background. Use --complete --deep to wait for it.",
|
|
548
|
+
fg="yellow",
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
budget_deadline = (
|
|
552
|
+
started + budget_seconds
|
|
553
|
+
if fast and budget_seconds and budget_seconds > 0
|
|
554
|
+
else None
|
|
555
|
+
)
|
|
556
|
+
|
|
403
557
|
# Warn about hash fallback early so users know to install [ml]
|
|
404
558
|
if embed:
|
|
405
559
|
from codespine.search.vector import _load_model
|
|
@@ -516,7 +670,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
516
670
|
for s_idx, group in shard_groups.items():
|
|
517
671
|
f = ex.submit(
|
|
518
672
|
_index_shard_group,
|
|
519
|
-
s_idx, group, sg, full, embed, output_lock, True,
|
|
673
|
+
s_idx, group, sg, full, embed, budget_deadline, output_lock, True,
|
|
520
674
|
)
|
|
521
675
|
futures_map[f] = s_idx
|
|
522
676
|
|
|
@@ -538,7 +692,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
538
692
|
only_shard_idx = next(iter(shard_groups))
|
|
539
693
|
only_group = shard_groups[only_shard_idx]
|
|
540
694
|
_, all_results, total_files_found = _index_shard_group(
|
|
541
|
-
only_shard_idx, only_group, sg, full, embed, output_lock, False,
|
|
695
|
+
only_shard_idx, only_group, sg, full, embed, budget_deadline, output_lock, False,
|
|
542
696
|
)
|
|
543
697
|
if all_results:
|
|
544
698
|
last_result = all_results[-1]
|
|
@@ -558,7 +712,9 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
558
712
|
root_shard_store = sg.shard(root_project_id)
|
|
559
713
|
|
|
560
714
|
# ── Cross-module call linking ──────────────────────────────────────
|
|
561
|
-
if is_multi and len(modules_with_ids) > 1:
|
|
715
|
+
if fast and is_multi and len(modules_with_ids) > 1:
|
|
716
|
+
_phase("Cross-module linking...", "skipped (fast mode; use --complete)")
|
|
717
|
+
elif is_multi and len(modules_with_ids) > 1:
|
|
562
718
|
xmod_label = "Cross-module linking..."
|
|
563
719
|
_live_phase(xmod_label, "running")
|
|
564
720
|
xmod_pids = [pid for _, pid in modules_with_ids]
|
|
@@ -575,7 +731,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
575
731
|
dead: list[dict] = []
|
|
576
732
|
coupling_pairs: list[dict] = []
|
|
577
733
|
|
|
578
|
-
should_run_deep = deep or incremental_deep or total_files_found <= 3000
|
|
734
|
+
should_run_deep = (not fast) and (deep or incremental_deep or total_files_found <= 3000)
|
|
579
735
|
if should_run_deep:
|
|
580
736
|
comm_label = "Detecting communities..."
|
|
581
737
|
_live_phase(comm_label, "running")
|
|
@@ -613,6 +769,11 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
613
769
|
progress=lambda s: _live_phase(coup_label, s),
|
|
614
770
|
)
|
|
615
771
|
_finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
|
|
772
|
+
elif fast:
|
|
773
|
+
_phase("Detecting communities...", "queued in background")
|
|
774
|
+
_phase("Detecting execution flows...", "queued in background")
|
|
775
|
+
_phase("Finding dead code...", "queued in background")
|
|
776
|
+
_phase("Analyzing git history...", "queued in background")
|
|
616
777
|
else:
|
|
617
778
|
# Run lightweight versions of flow tracing and dead code from the call
|
|
618
779
|
# graph already built — no community detection or coupling (those are
|
|
@@ -674,30 +835,121 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
674
835
|
fg="green",
|
|
675
836
|
)
|
|
676
837
|
|
|
677
|
-
# Detect unresolved imports → hint about unindexed sibling projects
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
click.echo(
|
|
685
|
-
|
|
686
|
-
|
|
838
|
+
# Detect unresolved imports → hint about unindexed sibling projects.
|
|
839
|
+
# This is useful, but it is still another global query, so fast mode leaves
|
|
840
|
+
# it out of the foreground path.
|
|
841
|
+
if not fast:
|
|
842
|
+
try:
|
|
843
|
+
unresolved = JavaIndexer.detect_unresolved_imports(root_shard_store)
|
|
844
|
+
if unresolved:
|
|
845
|
+
click.echo()
|
|
846
|
+
click.secho("⚠ Unresolved imports — consider indexing these projects:", fg="yellow")
|
|
847
|
+
for pkg, samples in sorted(unresolved.items())[:8]:
|
|
848
|
+
click.echo(f" {pkg} (e.g. {samples[0]})")
|
|
849
|
+
except Exception:
|
|
850
|
+
pass # best-effort
|
|
687
851
|
|
|
688
852
|
# Publish a read replica so MCP and read-only CLI commands (search, stats…)
|
|
689
853
|
# run against an isolated snapshot rather than competing with the write
|
|
690
854
|
# process's buffer pool. Snapshot all open shards concurrently.
|
|
691
855
|
snap_label = "Publishing read replica..."
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
856
|
+
for store in sg.open_shards():
|
|
857
|
+
recycle = getattr(store, "_recycle_conn", None)
|
|
858
|
+
if callable(recycle):
|
|
859
|
+
recycle()
|
|
860
|
+
if fast and _spawn_background_enrichment(abs_path):
|
|
861
|
+
_phase(snap_label, "core snapshot now; enrichment continues in background")
|
|
862
|
+
else:
|
|
863
|
+
_live_phase(snap_label, "copying")
|
|
864
|
+
sg.snapshot_all(background=False)
|
|
865
|
+
_finish_phase(snap_label, "MCP will reload automatically")
|
|
696
866
|
|
|
697
867
|
# Restore original SIGINT handler now that we've finished cleanly.
|
|
698
868
|
signal.signal(signal.SIGINT, _old_sigint_handler)
|
|
699
869
|
|
|
700
870
|
|
|
871
|
+
@main.command("publish-snapshot", hidden=True)
|
|
872
|
+
def publish_snapshot() -> None:
|
|
873
|
+
"""Publish sharded read replicas for a recently completed analyse run."""
|
|
874
|
+
sg = ShardedGraphStore(read_only=False)
|
|
875
|
+
sg.snapshot_all(background=False)
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
@main.command("enrich-background", hidden=True)
|
|
879
|
+
@click.argument("path", type=click.Path(exists=True))
|
|
880
|
+
def enrich_background(path: str) -> None:
|
|
881
|
+
"""Run expensive post-index graph enrichment outside the analyse foreground."""
|
|
882
|
+
abs_path = os.path.abspath(path)
|
|
883
|
+
LOGGER.info("Background enrichment starting for %s", abs_path)
|
|
884
|
+
|
|
885
|
+
project_roots = JavaIndexer.detect_projects_in_workspace(abs_path)
|
|
886
|
+
modules_with_ids: list[tuple[str, str]] = []
|
|
887
|
+
for proj_root in project_roots:
|
|
888
|
+
proj_name = os.path.basename(proj_root)
|
|
889
|
+
module_dirs = JavaIndexer.detect_modules(proj_root)
|
|
890
|
+
is_multi_module = not (len(module_dirs) == 1 and module_dirs[0] == proj_root)
|
|
891
|
+
if is_multi_module:
|
|
892
|
+
for m in module_dirs:
|
|
893
|
+
modules_with_ids.append((m, f"{proj_name}::{os.path.basename(m)}"))
|
|
894
|
+
else:
|
|
895
|
+
modules_with_ids.append((proj_root, proj_name))
|
|
896
|
+
|
|
897
|
+
root_basename = os.path.basename(abs_path)
|
|
898
|
+
root_project_id = modules_with_ids[-1][1] if modules_with_ids else root_basename
|
|
899
|
+
is_multi = len(modules_with_ids) > 1
|
|
900
|
+
xmod_pids = [pid for _, pid in modules_with_ids]
|
|
901
|
+
|
|
902
|
+
sg = ShardedGraphStore(read_only=False)
|
|
903
|
+
root_shard_store = sg.shard(root_project_id)
|
|
904
|
+
|
|
905
|
+
try:
|
|
906
|
+
# Publish the fast core graph first so MCP/search can use it while the
|
|
907
|
+
# more expensive enrichment keeps working.
|
|
908
|
+
sg.snapshot_all(background=False)
|
|
909
|
+
|
|
910
|
+
if is_multi and len(xmod_pids) > 1:
|
|
911
|
+
xmod_edges = link_cross_module_calls(
|
|
912
|
+
root_shard_store,
|
|
913
|
+
project_ids=xmod_pids,
|
|
914
|
+
progress=lambda s: LOGGER.info("Cross-module linking: %s", s),
|
|
915
|
+
)
|
|
916
|
+
LOGGER.info("Background cross-module linking wrote %d edges", xmod_edges)
|
|
917
|
+
|
|
918
|
+
communities = detect_communities(
|
|
919
|
+
root_shard_store,
|
|
920
|
+
progress=lambda s: LOGGER.info("Community detection: %s", s),
|
|
921
|
+
)
|
|
922
|
+
LOGGER.info("Background community detection found %d clusters", len(communities))
|
|
923
|
+
|
|
924
|
+
flows = trace_execution_flows(
|
|
925
|
+
root_shard_store,
|
|
926
|
+
progress=lambda s: LOGGER.info("Execution flow tracing: %s", s),
|
|
927
|
+
)
|
|
928
|
+
LOGGER.info("Background flow tracing found %d flows", len(flows))
|
|
929
|
+
|
|
930
|
+
dead = detect_dead_code(root_shard_store, limit=500)
|
|
931
|
+
LOGGER.info("Background dead-code scan found %d candidates", _dead_result_count(dead))
|
|
932
|
+
|
|
933
|
+
root_shard_store.clear_coupling()
|
|
934
|
+
coupling_project = root_basename if is_multi else root_project_id
|
|
935
|
+
coupling_pairs = compute_coupling(
|
|
936
|
+
root_shard_store,
|
|
937
|
+
abs_path,
|
|
938
|
+
coupling_project,
|
|
939
|
+
days=SETTINGS.default_coupling_days,
|
|
940
|
+
min_strength=SETTINGS.default_min_coupling_strength,
|
|
941
|
+
min_cochanges=SETTINGS.default_min_cochanges,
|
|
942
|
+
progress=lambda s: LOGGER.info("Git coupling: %s", s),
|
|
943
|
+
)
|
|
944
|
+
LOGGER.info("Background coupling analysis found %d pairs", len(coupling_pairs))
|
|
945
|
+
|
|
946
|
+
sg.snapshot_all(background=False)
|
|
947
|
+
LOGGER.info("Background enrichment finished for %s", abs_path)
|
|
948
|
+
except Exception as exc: # noqa: BLE001
|
|
949
|
+
LOGGER.exception("Background enrichment failed for %s: %s", abs_path, exc)
|
|
950
|
+
raise
|
|
951
|
+
|
|
952
|
+
|
|
701
953
|
@main.command()
|
|
702
954
|
@click.argument("query")
|
|
703
955
|
@click.option("--k", default=20, show_default=True, type=int)
|
|
@@ -29,8 +29,10 @@ class Settings:
|
|
|
29
29
|
rrf_k: int = 60
|
|
30
30
|
semantic_candidate_pool: int = 2000
|
|
31
31
|
write_batch_size: int = 500
|
|
32
|
-
index_file_batch_size: int =
|
|
33
|
-
|
|
32
|
+
index_file_batch_size: int = 200
|
|
33
|
+
index_method_batch_size: int = 2000
|
|
34
|
+
index_symbol_batch_size: int = 2000
|
|
35
|
+
edge_write_batch_size: int = 5000
|
|
34
36
|
default_coupling_days: int = 5
|
|
35
37
|
default_min_coupling_strength: float = 0.3
|
|
36
38
|
default_min_cochanges: int = 3
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import time
|
|
3
4
|
from collections import defaultdict
|
|
4
5
|
from typing import Iterator
|
|
5
6
|
|
|
@@ -58,6 +59,7 @@ def resolve_calls(
|
|
|
58
59
|
class_catalog: dict[str, list[str]],
|
|
59
60
|
*,
|
|
60
61
|
scan_counter: list[int] | None = None,
|
|
62
|
+
deadline: float | None = None,
|
|
61
63
|
) -> Iterator[tuple[str, str, float, str]]:
|
|
62
64
|
"""Resolve call names to known method ids.
|
|
63
65
|
|
|
@@ -84,6 +86,8 @@ def resolve_calls(
|
|
|
84
86
|
class_method_index_by_fqcn[class_fqcn][key].append(method_id)
|
|
85
87
|
|
|
86
88
|
for source_id, call_sites in calls.items():
|
|
89
|
+
if deadline is not None and time.perf_counter() >= deadline:
|
|
90
|
+
return
|
|
87
91
|
if scan_counter is not None:
|
|
88
92
|
scan_counter[0] += 1
|
|
89
93
|
src_meta = method_catalog.get(source_id, {})
|
|
@@ -94,6 +98,8 @@ def resolve_calls(
|
|
|
94
98
|
field_types = src_ctx.get("field_types", {}) or {}
|
|
95
99
|
|
|
96
100
|
for call in call_sites:
|
|
101
|
+
if deadline is not None and time.perf_counter() >= deadline:
|
|
102
|
+
return
|
|
97
103
|
call_name = call.name
|
|
98
104
|
|
|
99
105
|
key = (call_name, int(call.arg_count))
|