codespine 1.0.7__tar.gz → 1.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-1.0.7 → codespine-1.0.8}/PKG-INFO +1 -1
- {codespine-1.0.7 → codespine-1.0.8}/codespine/__init__.py +1 -1
- {codespine-1.0.7 → codespine-1.0.8}/codespine/cli.py +96 -2
- {codespine-1.0.7 → codespine-1.0.8}/codespine/indexer/engine.py +271 -199
- {codespine-1.0.7 → codespine-1.0.8}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-1.0.7 → codespine-1.0.8}/pyproject.toml +1 -1
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_parse_resilience.py +96 -0
- {codespine-1.0.7 → codespine-1.0.8}/LICENSE +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/README.md +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/community.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/context.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/coupling.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/crossmodule.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/deadcode.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/flow.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/analysis/impact.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/cache/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/cache/result_cache.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/config.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/db/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/db/_cypher_compat.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/db/duckdb_store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/db/schema.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/db/store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/diff/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/diff/branch_diff.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/guide.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/indexer/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/indexer/call_resolver.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/indexer/di_resolver.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/indexer/java_parser.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/mcp/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/mcp/server.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/noise/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/noise/blocklist.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/overlay/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/overlay/git_state.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/overlay/merge.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/overlay/store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/search/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/search/bm25.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/search/fuzzy.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/search/hybrid.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/search/rrf.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/search/vector.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/sharding/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/sharding/router.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/sharding/store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/watch/__init__.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/watch/git_hook.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine/watch/watcher.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine.egg-info/requires.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/codespine.egg-info/top_level.txt +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/gindex.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/setup.cfg +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_call_resolver.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_community_detection.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_cypher_compat.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_deadcode.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_duckdb_store.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_index_and_hybrid.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_java_parser.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_multimodule_index.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_overlay.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_result_cache.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_search_ranking.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_sharding.py +0 -0
- {codespine-1.0.7 → codespine-1.0.8}/tests/test_store_recovery.py +0 -0
|
@@ -145,6 +145,10 @@ def _index_shard_group(
|
|
|
145
145
|
}
|
|
146
146
|
call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
|
|
147
147
|
"started_at": 0.0}
|
|
148
|
+
db_state: dict = {
|
|
149
|
+
"shown": False, "done": 0, "total": 0, "last_ts": 0.0,
|
|
150
|
+
"started_at": 0.0,
|
|
151
|
+
}
|
|
148
152
|
|
|
149
153
|
def _progress(event: str, payload: dict) -> None:
|
|
150
154
|
now = time.perf_counter()
|
|
@@ -237,11 +241,101 @@ def _index_shard_group(
|
|
|
237
241
|
parse_state["shown"] = True
|
|
238
242
|
parse_state["last_ts"] = now
|
|
239
243
|
return
|
|
240
|
-
if event
|
|
244
|
+
if event == "db_write_start":
|
|
241
245
|
if parse_state["shown"]:
|
|
242
246
|
with output_lock:
|
|
243
247
|
click.echo()
|
|
244
248
|
parse_state["shown"] = False
|
|
249
|
+
total = int(payload.get("total", 0))
|
|
250
|
+
deleted = int(payload.get("deleted_files", 0))
|
|
251
|
+
db_state["done"] = 0
|
|
252
|
+
db_state["total"] = total
|
|
253
|
+
db_state["started_at"] = now
|
|
254
|
+
status = f"starting ({total} files"
|
|
255
|
+
if deleted:
|
|
256
|
+
status += f", {deleted} deleted"
|
|
257
|
+
status += ")"
|
|
258
|
+
with output_lock:
|
|
259
|
+
_phase(f"{prefix}Writing index...", status)
|
|
260
|
+
return
|
|
261
|
+
if event == "db_write_heartbeat":
|
|
262
|
+
done = int(payload.get("done", 0))
|
|
263
|
+
total = int(payload.get("total", 0))
|
|
264
|
+
classes = int(payload.get("classes", 0))
|
|
265
|
+
methods = int(payload.get("methods", 0))
|
|
266
|
+
phase = str(payload.get("phase", "writing"))
|
|
267
|
+
elapsed_s = float(payload.get("elapsed", 0.0))
|
|
268
|
+
db_state["done"] = done
|
|
269
|
+
db_state["total"] = total
|
|
270
|
+
if not parallel:
|
|
271
|
+
click.echo(
|
|
272
|
+
f"\r{_spinner_char()} {prefix}Writing index... "
|
|
273
|
+
f"{_bar(done, total)} {done}/{total} "
|
|
274
|
+
f"{classes} classes / {methods} methods "
|
|
275
|
+
f"{phase[:18]:<18} {elapsed_s:.0f}s ",
|
|
276
|
+
nl=False,
|
|
277
|
+
)
|
|
278
|
+
else:
|
|
279
|
+
with output_lock:
|
|
280
|
+
click.echo(
|
|
281
|
+
f"\r{prefix}Writing {done}/{total} "
|
|
282
|
+
f"({classes} classes, {methods} methods, {elapsed_s:.0f}s) ",
|
|
283
|
+
nl=False,
|
|
284
|
+
)
|
|
285
|
+
db_state["shown"] = True
|
|
286
|
+
db_state["last_ts"] = now
|
|
287
|
+
return
|
|
288
|
+
if event == "db_write_progress":
|
|
289
|
+
done = int(payload.get("done", 0))
|
|
290
|
+
total = int(payload.get("total", 0))
|
|
291
|
+
classes = int(payload.get("classes", 0))
|
|
292
|
+
methods = int(payload.get("methods", 0))
|
|
293
|
+
phase = str(payload.get("phase", "writing"))
|
|
294
|
+
db_state["done"] = done
|
|
295
|
+
db_state["total"] = total
|
|
296
|
+
if total == 0 and done == 0:
|
|
297
|
+
return
|
|
298
|
+
if done == total or (now - db_state["last_ts"]) >= 0.25:
|
|
299
|
+
elapsed_s = now - db_state["started_at"]
|
|
300
|
+
if not parallel:
|
|
301
|
+
click.echo(
|
|
302
|
+
f"\r{_spinner_char()} {prefix}Writing index... "
|
|
303
|
+
f"{_bar(done, total)} {done}/{total} "
|
|
304
|
+
f"{classes} classes / {methods} methods "
|
|
305
|
+
f"{phase[:18]:<18} {elapsed_s:.0f}s ",
|
|
306
|
+
nl=False,
|
|
307
|
+
)
|
|
308
|
+
else:
|
|
309
|
+
with output_lock:
|
|
310
|
+
click.echo(
|
|
311
|
+
f"\r{prefix}Writing {done}/{total} "
|
|
312
|
+
f"({classes} classes, {methods} methods, {elapsed_s:.0f}s) ",
|
|
313
|
+
nl=False,
|
|
314
|
+
)
|
|
315
|
+
db_state["shown"] = True
|
|
316
|
+
db_state["last_ts"] = now
|
|
317
|
+
return
|
|
318
|
+
if event == "db_write_done":
|
|
319
|
+
if db_state["shown"]:
|
|
320
|
+
with output_lock:
|
|
321
|
+
click.echo()
|
|
322
|
+
db_state["shown"] = False
|
|
323
|
+
files = int(payload.get("files_indexed", db_state["done"]))
|
|
324
|
+
classes = int(payload.get("classes", 0))
|
|
325
|
+
methods = int(payload.get("methods", 0))
|
|
326
|
+
elapsed_s = float(payload.get("elapsed", 0.0))
|
|
327
|
+
with output_lock:
|
|
328
|
+
_phase(
|
|
329
|
+
f"{prefix}Writing index...",
|
|
330
|
+
f"{files} files, {classes} classes, {methods} methods ({elapsed_s:.1f}s)",
|
|
331
|
+
)
|
|
332
|
+
return
|
|
333
|
+
if event in ("resolve_calls_start",):
|
|
334
|
+
if parse_state["shown"] or db_state["shown"]:
|
|
335
|
+
with output_lock:
|
|
336
|
+
click.echo()
|
|
337
|
+
parse_state["shown"] = False
|
|
338
|
+
db_state["shown"] = False
|
|
245
339
|
call_state["started_at"] = now
|
|
246
340
|
with output_lock:
|
|
247
341
|
_phase(f"{prefix}Tracing calls...", "starting...")
|
|
@@ -309,7 +403,7 @@ def _index_shard_group(
|
|
|
309
403
|
total_files += result.files_found
|
|
310
404
|
|
|
311
405
|
# Flush any dangling progress line.
|
|
312
|
-
if parse_state["shown"]:
|
|
406
|
+
if parse_state["shown"] or db_state["shown"]:
|
|
313
407
|
with output_lock:
|
|
314
408
|
click.echo()
|
|
315
409
|
|
|
@@ -392,227 +392,299 @@ class JavaIndexer:
|
|
|
392
392
|
_parse_hb_stop.set()
|
|
393
393
|
_parse_hb_thread.join(timeout=3.0)
|
|
394
394
|
|
|
395
|
-
# ──
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
395
|
+
# ── DB-write heartbeat thread ─────────────────────────────────────────
|
|
396
|
+
# The DB write phase can be silent for many seconds on large repos:
|
|
397
|
+
# clearing stale rows, committing files/classes/methods/symbols, and
|
|
398
|
+
# recycling DuckDB connections all happen before call tracing starts.
|
|
399
|
+
# Keep emitting progress so the CLI never appears frozen after parsing.
|
|
400
|
+
_db_done_holder: list[int] = [0]
|
|
401
|
+
_db_classes_holder: list[int] = [0]
|
|
402
|
+
_db_methods_holder: list[int] = [0]
|
|
403
|
+
_db_phase_holder: list[str] = ["preparing"]
|
|
404
|
+
_db_hb_stop = threading.Event()
|
|
405
|
+
_db_start = time.perf_counter()
|
|
406
|
+
_db_total = len(parse_results)
|
|
407
|
+
|
|
408
|
+
def _db_heartbeat_worker() -> None:
|
|
409
|
+
while not _db_hb_stop.wait(_PARSE_HEARTBEAT_PERIOD):
|
|
410
|
+
self._emit(
|
|
411
|
+
progress,
|
|
412
|
+
"db_write_heartbeat",
|
|
413
|
+
done=_db_done_holder[0],
|
|
414
|
+
total=_db_total,
|
|
415
|
+
classes=_db_classes_holder[0],
|
|
416
|
+
methods=_db_methods_holder[0],
|
|
417
|
+
phase=_db_phase_holder[0],
|
|
418
|
+
elapsed=time.perf_counter() - _db_start,
|
|
419
|
+
)
|
|
417
420
|
|
|
418
|
-
|
|
421
|
+
_db_hb_thread = threading.Thread(
|
|
422
|
+
target=_db_heartbeat_worker, daemon=True, name="codespine-db-heartbeat"
|
|
423
|
+
)
|
|
424
|
+
_db_hb_thread.start()
|
|
425
|
+
self._emit(
|
|
426
|
+
progress,
|
|
427
|
+
"db_write_start",
|
|
428
|
+
total=_db_total,
|
|
429
|
+
deleted_files=len(deleted_file_ids),
|
|
430
|
+
)
|
|
419
431
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
432
|
+
try:
|
|
433
|
+
# ── Chunked DB writes ─────────────────────────────────────────────
|
|
434
|
+
_db_phase_holder[0] = "clearing"
|
|
435
|
+
if full:
|
|
436
|
+
self.store.clear_project(project_id)
|
|
437
|
+
elif deleted_file_ids:
|
|
438
|
+
for delete_chunk in self._chunked(deleted_file_ids, file_batch_size):
|
|
439
|
+
with self.store.transaction():
|
|
440
|
+
for fid in delete_chunk:
|
|
441
|
+
self.store.clear_file(fid)
|
|
442
|
+
self.store._recycle_conn()
|
|
425
443
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
parsed = pr["parsed"]
|
|
435
|
-
f_id = pr["f_id"]
|
|
436
|
-
file_digest = pr["digest"]
|
|
437
|
-
is_test = pr["is_test"]
|
|
438
|
-
scope = pr["scope"]
|
|
439
|
-
source = pr["source"]
|
|
440
|
-
|
|
441
|
-
file_rows.append(
|
|
442
|
-
{
|
|
443
|
-
"id": f_id,
|
|
444
|
-
"path": file_path,
|
|
445
|
-
"project_id": project_id,
|
|
446
|
-
"is_test": is_test,
|
|
447
|
-
"hash": file_digest,
|
|
448
|
-
}
|
|
444
|
+
# Clean up stale project entries that point to the same path under a
|
|
445
|
+
# different ID (e.g. re-indexing "vision-server" directly after it was
|
|
446
|
+
# previously indexed as "vision::vision-server" from a workspace root).
|
|
447
|
+
_db_phase_holder[0] = "preparing"
|
|
448
|
+
try:
|
|
449
|
+
stale = self.store.query_records(
|
|
450
|
+
"MATCH (p:Project) WHERE p.path = $path AND p.id <> $pid RETURN p.id as id",
|
|
451
|
+
{"path": root_path, "pid": project_id},
|
|
449
452
|
)
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
cls_symbol_id = symbol_id("class", cls.fqcn, scope)
|
|
482
|
-
symbol_rows.append(
|
|
453
|
+
for old in stale:
|
|
454
|
+
self.store.clear_project(old["id"])
|
|
455
|
+
except Exception:
|
|
456
|
+
pass # best-effort cleanup
|
|
457
|
+
|
|
458
|
+
self.store.upsert_project(project_id, root_path)
|
|
459
|
+
|
|
460
|
+
_db_phase_holder[0] = "building rows"
|
|
461
|
+
for parse_chunk in self._chunked(parse_results, file_batch_size):
|
|
462
|
+
file_rows: list[dict] = []
|
|
463
|
+
class_rows: list[dict] = []
|
|
464
|
+
method_rows: list[dict] = []
|
|
465
|
+
symbol_rows: list[dict] = []
|
|
466
|
+
|
|
467
|
+
for pr in parse_chunk:
|
|
468
|
+
# Skipped files (oversized, timeout) carry parsed=None.
|
|
469
|
+
# Still count as indexed for accurate reporting, but skip
|
|
470
|
+
# class/method/symbol extraction.
|
|
471
|
+
if pr.get("parsed") is None:
|
|
472
|
+
files_indexed += 1
|
|
473
|
+
_db_done_holder[0] = files_indexed
|
|
474
|
+
continue
|
|
475
|
+
file_path = pr["file_path"]
|
|
476
|
+
parsed = pr["parsed"]
|
|
477
|
+
f_id = pr["f_id"]
|
|
478
|
+
file_digest = pr["digest"]
|
|
479
|
+
is_test = pr["is_test"]
|
|
480
|
+
scope = pr["scope"]
|
|
481
|
+
source = pr["source"]
|
|
482
|
+
|
|
483
|
+
file_rows.append(
|
|
483
484
|
{
|
|
484
|
-
"id":
|
|
485
|
-
"
|
|
486
|
-
"
|
|
487
|
-
"
|
|
488
|
-
"
|
|
489
|
-
"line": cls.line,
|
|
490
|
-
"col": cls.col,
|
|
491
|
-
"embedding": embed_text(f"class {cls.fqcn}") if embed else None,
|
|
485
|
+
"id": f_id,
|
|
486
|
+
"path": file_path,
|
|
487
|
+
"project_id": project_id,
|
|
488
|
+
"is_test": is_test,
|
|
489
|
+
"hash": file_digest,
|
|
492
490
|
}
|
|
493
491
|
)
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
# Collect DI metadata for the resolver pass.
|
|
497
|
-
di_cls_entry: dict = {
|
|
498
|
-
"id": c_id,
|
|
499
|
-
"fqcn": cls.fqcn,
|
|
500
|
-
"name": cls.name,
|
|
501
|
-
"package": cls.package,
|
|
502
|
-
"annotations": cls.annotations,
|
|
503
|
-
"injected_fields": [
|
|
504
|
-
{
|
|
505
|
-
"name": f.name,
|
|
506
|
-
"type_name": f.type_name,
|
|
507
|
-
"injection_annotation": f.injection_annotation,
|
|
508
|
-
"qualifier": f.qualifier,
|
|
509
|
-
}
|
|
510
|
-
for f in cls.fields
|
|
511
|
-
if f.injection_annotation
|
|
512
|
-
],
|
|
513
|
-
"methods_with_provides": [
|
|
514
|
-
{
|
|
515
|
-
"name": m.name,
|
|
516
|
-
"provides_type": m.provides_type,
|
|
517
|
-
"provides_annotation": next(
|
|
518
|
-
(a for a in m.annotations if a.split(".")[-1] in {"Provides", "Bean"}),
|
|
519
|
-
"Provides",
|
|
520
|
-
),
|
|
521
|
-
}
|
|
522
|
-
for m in cls.methods
|
|
523
|
-
if m.provides_type
|
|
524
|
-
],
|
|
525
|
-
}
|
|
526
|
-
di_classes.append(di_cls_entry)
|
|
527
|
-
|
|
528
|
-
for fld in cls.fields:
|
|
529
|
-
fqfield = f"{cls.fqcn}#{fld.name}"
|
|
530
|
-
symbol_rows.append(
|
|
531
|
-
{
|
|
532
|
-
"id": symbol_id("field", fqfield, scope),
|
|
533
|
-
"kind": "field",
|
|
534
|
-
"name": fld.name,
|
|
535
|
-
"fqname": fqfield,
|
|
536
|
-
"file_id": f_id,
|
|
537
|
-
"line": fld.line,
|
|
538
|
-
"col": fld.col,
|
|
539
|
-
"embedding": embed_text(f"field {fqfield} {fld.type_name}") if embed else None,
|
|
540
|
-
}
|
|
541
|
-
)
|
|
492
|
+
self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source), imports=parsed.imports)
|
|
542
493
|
|
|
543
|
-
for
|
|
544
|
-
|
|
545
|
-
|
|
494
|
+
for cls in parsed.classes:
|
|
495
|
+
c_id = class_id(cls.fqcn, scope)
|
|
496
|
+
class_rows.append(
|
|
546
497
|
{
|
|
547
|
-
"id":
|
|
548
|
-
"
|
|
549
|
-
"name":
|
|
550
|
-
"
|
|
551
|
-
"
|
|
552
|
-
"modifiers": method.modifiers + [f"@{a}" for a in method.annotations],
|
|
553
|
-
"is_constructor": method.name == cls.name,
|
|
554
|
-
"is_test": is_test,
|
|
498
|
+
"id": c_id,
|
|
499
|
+
"fqcn": cls.fqcn,
|
|
500
|
+
"name": cls.name,
|
|
501
|
+
"package": cls.package,
|
|
502
|
+
"file_id": f_id,
|
|
555
503
|
}
|
|
556
504
|
)
|
|
505
|
+
class_catalog.setdefault(cls.name, [])
|
|
506
|
+
if cls.fqcn not in class_catalog[cls.name]:
|
|
507
|
+
class_catalog[cls.name].append(cls.fqcn)
|
|
508
|
+
fqcn_to_class_ids.setdefault(cls.fqcn, [])
|
|
509
|
+
if c_id not in fqcn_to_class_ids[cls.fqcn]:
|
|
510
|
+
fqcn_to_class_ids[cls.fqcn].append(c_id)
|
|
511
|
+
class_meta[c_id] = {
|
|
512
|
+
"id": c_id,
|
|
513
|
+
"fqcn": cls.fqcn,
|
|
514
|
+
"package": parsed.package,
|
|
515
|
+
"imports": parsed.imports,
|
|
516
|
+
"extends": cls.extends,
|
|
517
|
+
"interfaces": cls.interfaces,
|
|
518
|
+
"annotations": cls.annotations,
|
|
519
|
+
"scope": scope,
|
|
520
|
+
}
|
|
521
|
+
class_methods.setdefault(c_id, {})
|
|
557
522
|
|
|
558
|
-
|
|
523
|
+
cls_symbol_id = symbol_id("class", cls.fqcn, scope)
|
|
559
524
|
symbol_rows.append(
|
|
560
525
|
{
|
|
561
|
-
"id":
|
|
562
|
-
"kind": "
|
|
563
|
-
"name":
|
|
564
|
-
"fqname":
|
|
526
|
+
"id": cls_symbol_id,
|
|
527
|
+
"kind": "class",
|
|
528
|
+
"name": cls.name,
|
|
529
|
+
"fqname": cls.fqcn,
|
|
565
530
|
"file_id": f_id,
|
|
566
|
-
"line":
|
|
567
|
-
"col":
|
|
568
|
-
"embedding": embed_text(f"
|
|
531
|
+
"line": cls.line,
|
|
532
|
+
"col": cls.col,
|
|
533
|
+
"embedding": embed_text(f"class {cls.fqcn}") if embed else None,
|
|
569
534
|
}
|
|
570
535
|
)
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
"
|
|
577
|
-
"
|
|
578
|
-
"
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
536
|
+
classes_indexed += 1
|
|
537
|
+
_db_classes_holder[0] = classes_indexed
|
|
538
|
+
|
|
539
|
+
# Collect DI metadata for the resolver pass.
|
|
540
|
+
di_cls_entry: dict = {
|
|
541
|
+
"id": c_id,
|
|
542
|
+
"fqcn": cls.fqcn,
|
|
543
|
+
"name": cls.name,
|
|
544
|
+
"package": cls.package,
|
|
545
|
+
"annotations": cls.annotations,
|
|
546
|
+
"injected_fields": [
|
|
547
|
+
{
|
|
548
|
+
"name": f.name,
|
|
549
|
+
"type_name": f.type_name,
|
|
550
|
+
"injection_annotation": f.injection_annotation,
|
|
551
|
+
"qualifier": f.qualifier,
|
|
552
|
+
}
|
|
553
|
+
for f in cls.fields
|
|
554
|
+
if f.injection_annotation
|
|
555
|
+
],
|
|
556
|
+
"methods_with_provides": [
|
|
557
|
+
{
|
|
558
|
+
"name": m.name,
|
|
559
|
+
"provides_type": m.provides_type,
|
|
560
|
+
"provides_annotation": next(
|
|
561
|
+
(a for a in m.annotations if a.split(".")[-1] in {"Provides", "Bean"}),
|
|
562
|
+
"Provides",
|
|
563
|
+
),
|
|
564
|
+
}
|
|
565
|
+
for m in cls.methods
|
|
566
|
+
if m.provides_type
|
|
567
|
+
],
|
|
588
568
|
}
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
569
|
+
di_classes.append(di_cls_entry)
|
|
570
|
+
|
|
571
|
+
for fld in cls.fields:
|
|
572
|
+
fqfield = f"{cls.fqcn}#{fld.name}"
|
|
573
|
+
symbol_rows.append(
|
|
574
|
+
{
|
|
575
|
+
"id": symbol_id("field", fqfield, scope),
|
|
576
|
+
"kind": "field",
|
|
577
|
+
"name": fld.name,
|
|
578
|
+
"fqname": fqfield,
|
|
579
|
+
"file_id": f_id,
|
|
580
|
+
"line": fld.line,
|
|
581
|
+
"col": fld.col,
|
|
582
|
+
"embedding": embed_text(f"field {fqfield} {fld.type_name}") if embed else None,
|
|
583
|
+
}
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
for method in cls.methods:
|
|
587
|
+
m_id = method_id(cls.fqcn, method.signature, scope)
|
|
588
|
+
method_rows.append(
|
|
589
|
+
{
|
|
590
|
+
"id": m_id,
|
|
591
|
+
"class_id": c_id,
|
|
592
|
+
"name": method.name,
|
|
593
|
+
"signature": method.signature,
|
|
594
|
+
"return_type": method.return_type,
|
|
595
|
+
"modifiers": method.modifiers + [f"@{a}" for a in method.annotations],
|
|
596
|
+
"is_constructor": method.name == cls.name,
|
|
597
|
+
"is_test": is_test,
|
|
598
|
+
}
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
fqname = f"{cls.fqcn}#{method.signature}"
|
|
602
|
+
symbol_rows.append(
|
|
603
|
+
{
|
|
604
|
+
"id": symbol_id("method", fqname, scope),
|
|
605
|
+
"kind": "method",
|
|
606
|
+
"name": method.name,
|
|
607
|
+
"fqname": fqname,
|
|
608
|
+
"file_id": f_id,
|
|
609
|
+
"line": method.line,
|
|
610
|
+
"col": method.col,
|
|
611
|
+
"embedding": embed_text(f"method {fqname} returns {method.return_type}") if embed else None,
|
|
612
|
+
}
|
|
613
|
+
)
|
|
614
|
+
methods_indexed += 1
|
|
615
|
+
_db_methods_holder[0] = methods_indexed
|
|
616
|
+
|
|
617
|
+
method_catalog[m_id] = {
|
|
618
|
+
"signature": method.signature,
|
|
619
|
+
"name": method.name,
|
|
620
|
+
"param_count": len(method.parameter_types),
|
|
621
|
+
"class_fqcn": cls.fqcn,
|
|
622
|
+
"class_id": c_id,
|
|
623
|
+
}
|
|
624
|
+
method_calls[m_id] = method.calls
|
|
625
|
+
method_context[m_id] = {
|
|
626
|
+
"class_id": c_id,
|
|
627
|
+
"class_fqcn": cls.fqcn,
|
|
628
|
+
"local_types": method.local_types,
|
|
629
|
+
"field_types": cls.field_types,
|
|
630
|
+
"imports": parsed.imports,
|
|
631
|
+
"package": parsed.package,
|
|
632
|
+
}
|
|
633
|
+
class_methods[c_id][method.signature] = m_id
|
|
634
|
+
files_indexed += 1
|
|
635
|
+
_db_done_holder[0] = files_indexed
|
|
636
|
+
|
|
637
|
+
# For incremental re-indexes clear files in bulk first, then use
|
|
638
|
+
# CREATE (not MERGE) for all writes — after clear the nodes are
|
|
639
|
+
# guaranteed absent so we skip the costly existence-check MERGE pays.
|
|
640
|
+
_db_phase_holder[0] = "clearing files"
|
|
641
|
+
if not full:
|
|
642
|
+
for clear_sub in self._chunked([r["id"] for r in file_rows], 100):
|
|
643
|
+
with self.store.transaction():
|
|
644
|
+
self.store.clear_files_batch(clear_sub)
|
|
645
|
+
self.store._recycle_conn()
|
|
646
|
+
_db_phase_holder[0] = "writing files"
|
|
608
647
|
with self.store.transaction():
|
|
609
|
-
self.store.
|
|
648
|
+
self.store.upsert_files_batch(file_rows)
|
|
610
649
|
self.store._recycle_conn()
|
|
611
|
-
|
|
612
|
-
for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
|
|
650
|
+
_db_phase_holder[0] = "writing classes"
|
|
613
651
|
with self.store.transaction():
|
|
614
|
-
self.store.
|
|
652
|
+
self.store.upsert_classes_batch(class_rows)
|
|
615
653
|
self.store._recycle_conn()
|
|
654
|
+
_METHOD_SUB_BATCH = 200
|
|
655
|
+
_db_phase_holder[0] = "writing methods"
|
|
656
|
+
for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
|
|
657
|
+
with self.store.transaction():
|
|
658
|
+
self.store.upsert_methods_batch(method_sub)
|
|
659
|
+
self.store._recycle_conn()
|
|
660
|
+
_SYMBOL_SUB_BATCH = 200
|
|
661
|
+
_db_phase_holder[0] = "writing symbols"
|
|
662
|
+
for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
|
|
663
|
+
with self.store.transaction():
|
|
664
|
+
self.store.upsert_symbols_batch(symbol_sub)
|
|
665
|
+
self.store._recycle_conn()
|
|
666
|
+
self._emit(
|
|
667
|
+
progress,
|
|
668
|
+
"db_write_progress",
|
|
669
|
+
done=files_indexed,
|
|
670
|
+
total=_db_total,
|
|
671
|
+
classes=classes_indexed,
|
|
672
|
+
methods=methods_indexed,
|
|
673
|
+
phase=_db_phase_holder[0],
|
|
674
|
+
)
|
|
675
|
+
_db_phase_holder[0] = "building rows"
|
|
676
|
+
finally:
|
|
677
|
+
_db_hb_stop.set()
|
|
678
|
+
_db_hb_thread.join(timeout=3.0)
|
|
679
|
+
|
|
680
|
+
self._emit(
|
|
681
|
+
progress,
|
|
682
|
+
"db_write_done",
|
|
683
|
+
files_indexed=files_indexed,
|
|
684
|
+
classes=classes_indexed,
|
|
685
|
+
methods=methods_indexed,
|
|
686
|
+
elapsed=time.perf_counter() - _db_start,
|
|
687
|
+
)
|
|
616
688
|
|
|
617
689
|
self._emit(progress, "resolve_calls_start")
|
|
618
690
|
|
|
@@ -192,3 +192,99 @@ def test_parse_loop_skips_none_parsed_in_db_write(tmp_path):
|
|
|
192
192
|
_ = pr["parsed"].classes
|
|
193
193
|
|
|
194
194
|
assert files_indexed == 1, "Skipped sentinel should increment files_indexed"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# Test D: DB-write progress covers the silent post-parse phase
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def test_db_write_heartbeat_emits_until_write_done(tmp_path, monkeypatch):
|
|
203
|
+
"""The post-parse DB write phase must emit start/heartbeat/progress/done
|
|
204
|
+
events so the CLI does not look frozen before call tracing begins."""
|
|
205
|
+
import codespine.indexer.engine as eng
|
|
206
|
+
|
|
207
|
+
class _Txn:
|
|
208
|
+
def __enter__(self):
|
|
209
|
+
return self
|
|
210
|
+
|
|
211
|
+
def __exit__(self, exc_type, exc, tb):
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
class _SlowStore:
|
|
215
|
+
def query_records(self, *_args, **_kwargs):
|
|
216
|
+
return []
|
|
217
|
+
|
|
218
|
+
def clear_project(self, *_args, **_kwargs):
|
|
219
|
+
pass
|
|
220
|
+
|
|
221
|
+
def clear_file(self, *_args, **_kwargs):
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
def upsert_project(self, *_args, **_kwargs):
|
|
225
|
+
pass
|
|
226
|
+
|
|
227
|
+
def transaction(self):
|
|
228
|
+
return _Txn()
|
|
229
|
+
|
|
230
|
+
def clear_files_batch(self, *_args, **_kwargs):
|
|
231
|
+
pass
|
|
232
|
+
|
|
233
|
+
def upsert_files_batch(self, *_args, **_kwargs):
|
|
234
|
+
pass
|
|
235
|
+
|
|
236
|
+
def upsert_classes_batch(self, *_args, **_kwargs):
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
def upsert_methods_batch(self, *_args, **_kwargs):
|
|
240
|
+
time.sleep(0.03)
|
|
241
|
+
|
|
242
|
+
def upsert_symbols_batch(self, *_args, **_kwargs):
|
|
243
|
+
time.sleep(0.03)
|
|
244
|
+
|
|
245
|
+
def add_calls_batch(self, *_args, **_kwargs):
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
def add_references_batch(self, *_args, **_kwargs):
|
|
249
|
+
pass
|
|
250
|
+
|
|
251
|
+
def add_injections_batch(self, *_args, **_kwargs):
|
|
252
|
+
pass
|
|
253
|
+
|
|
254
|
+
def add_interface_bindings_batch(self, *_args, **_kwargs):
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
def _recycle_conn(self):
|
|
258
|
+
pass
|
|
259
|
+
|
|
260
|
+
monkeypatch.setattr(eng, "_PARSE_HEARTBEAT_PERIOD", 0.005)
|
|
261
|
+
monkeypatch.setattr(eng, "resolve_calls", lambda *_args, **_kwargs: iter(()))
|
|
262
|
+
|
|
263
|
+
java_file = tmp_path / "src/main/java/com/example/Foo.java"
|
|
264
|
+
java_file.parent.mkdir(parents=True)
|
|
265
|
+
java_file.write_text(
|
|
266
|
+
"package com.example; public class Foo { void run() {} }\n",
|
|
267
|
+
encoding="utf-8",
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
events: list[tuple[str, dict]] = []
|
|
271
|
+
indexer = eng.JavaIndexer(_SlowStore())
|
|
272
|
+
indexer.index_project(
|
|
273
|
+
str(tmp_path),
|
|
274
|
+
full=True,
|
|
275
|
+
progress=lambda event, payload: events.append((event, dict(payload))),
|
|
276
|
+
project_id="proj",
|
|
277
|
+
embed=False,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
names = [event for event, _payload in events]
|
|
281
|
+
assert "db_write_start" in names
|
|
282
|
+
assert "db_write_heartbeat" in names
|
|
283
|
+
assert "db_write_progress" in names
|
|
284
|
+
assert "db_write_done" in names
|
|
285
|
+
assert names.index("db_write_done") < names.index("resolve_calls_start")
|
|
286
|
+
|
|
287
|
+
done_payload = next(payload for event, payload in events if event == "db_write_done")
|
|
288
|
+
assert done_payload["files_indexed"] == 1
|
|
289
|
+
assert done_payload["classes"] == 1
|
|
290
|
+
assert done_payload["methods"] == 1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|