java-codebase-rag 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +1 -1
- build_ast_graph.py +833 -64
- graph_enrich.py +32 -0
- java_codebase_rag/cli.py +74 -5
- java_codebase_rag/config.py +70 -2
- java_codebase_rag/pipeline.py +55 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/METADATA +10 -4
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/RECORD +13 -13
- server.py +58 -6
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/WHEEL +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/entry_points.txt +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/top_level.txt +0 -0
build_ast_graph.py
CHANGED
|
@@ -401,6 +401,330 @@ class GraphTables:
|
|
|
401
401
|
type_role_by_node_id: dict[str, str] = field(default_factory=dict)
|
|
402
402
|
|
|
403
403
|
|
|
404
|
+
@dataclass
|
|
405
|
+
class IncrementalResult:
|
|
406
|
+
"""Result of an incremental graph rebuild."""
|
|
407
|
+
mode: str # "incremental" | "full_fallback"
|
|
408
|
+
files_changed: int
|
|
409
|
+
files_added: int
|
|
410
|
+
files_removed: int
|
|
411
|
+
dependents_reprocessed: int
|
|
412
|
+
elapsed_sec: float
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
class FileHashTracker:
|
|
416
|
+
"""Track content hashes for incremental graph rebuild."""
|
|
417
|
+
def __init__(self, index_dir: Path):
|
|
418
|
+
self._path = index_dir / ".graph_hashes.json"
|
|
419
|
+
self._hashes: dict[str, str] = {} # rel_path -> sha256_hex
|
|
420
|
+
|
|
421
|
+
def load(self) -> None:
|
|
422
|
+
"""Load hashes from disk. No-op if file missing (first run)."""
|
|
423
|
+
if not self._path.exists():
|
|
424
|
+
return
|
|
425
|
+
try:
|
|
426
|
+
with open(self._path, "r", encoding="utf-8") as f:
|
|
427
|
+
self._hashes = json.load(f)
|
|
428
|
+
except (json.JSONDecodeError, OSError):
|
|
429
|
+
# Corrupt or unreadable hash file; start fresh.
|
|
430
|
+
self._hashes = {}
|
|
431
|
+
|
|
432
|
+
def save(self) -> None:
|
|
433
|
+
"""Persist hashes to disk atomically (write .tmp, rename)."""
|
|
434
|
+
tmp_path = self._path.with_suffix(".json.tmp")
|
|
435
|
+
try:
|
|
436
|
+
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
437
|
+
json.dump(self._hashes, f, sort_keys=True)
|
|
438
|
+
os.replace(tmp_path, self._path)
|
|
439
|
+
except OSError as e:
|
|
440
|
+
# Fail gracefully; next run will treat as missing and rebuild.
|
|
441
|
+
log.warning("Failed to save hash file %s: %s; next run will rebuild from scratch", self._path, e)
|
|
442
|
+
|
|
443
|
+
def detect_changes(self, source_root: Path, ignore: LayeredIgnore) -> tuple[set[str], set[str], set[str]]:
|
|
444
|
+
"""Return (added, changed, removed) sets of relative POSIX paths."""
|
|
445
|
+
current_files: set[str] = set()
|
|
446
|
+
# Resolve source_root to handle symlinks
|
|
447
|
+
source_root_resolved = source_root.resolve()
|
|
448
|
+
for abs_path in iter_java_source_files(source_root, ignore=ignore):
|
|
449
|
+
# Resolve the absolute path and compute relative path
|
|
450
|
+
abs_path_resolved = abs_path.resolve()
|
|
451
|
+
try:
|
|
452
|
+
rel_path = abs_path_resolved.relative_to(source_root_resolved).as_posix()
|
|
453
|
+
except ValueError:
|
|
454
|
+
# Fallback to using the path as-is if it's not under source_root
|
|
455
|
+
rel_path = abs_path.as_posix()
|
|
456
|
+
current_files.add(rel_path)
|
|
457
|
+
|
|
458
|
+
added: set[str] = set()
|
|
459
|
+
changed: set[str] = set()
|
|
460
|
+
removed: set[str] = set()
|
|
461
|
+
|
|
462
|
+
# Detect added and changed files.
|
|
463
|
+
for rel_path in current_files:
|
|
464
|
+
abs_path = source_root / rel_path
|
|
465
|
+
try:
|
|
466
|
+
file_hash = _hash_file(abs_path)
|
|
467
|
+
except FileNotFoundError:
|
|
468
|
+
continue
|
|
469
|
+
stored_hash = self._hashes.get(rel_path)
|
|
470
|
+
if stored_hash is None:
|
|
471
|
+
added.add(rel_path)
|
|
472
|
+
elif stored_hash != file_hash:
|
|
473
|
+
changed.add(rel_path)
|
|
474
|
+
|
|
475
|
+
# Detect removed files.
|
|
476
|
+
for rel_path in self._hashes:
|
|
477
|
+
if rel_path not in current_files:
|
|
478
|
+
removed.add(rel_path)
|
|
479
|
+
|
|
480
|
+
return added, changed, removed
|
|
481
|
+
|
|
482
|
+
def update(self, rel_paths: set[str], source_root: Path) -> None:
|
|
483
|
+
"""Compute and store hashes for the given paths."""
|
|
484
|
+
for rel_path in rel_paths:
|
|
485
|
+
abs_path = source_root / rel_path
|
|
486
|
+
if abs_path.exists():
|
|
487
|
+
self._hashes[rel_path] = _hash_file(abs_path)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _hash_file(abs_path: Path) -> str:
|
|
491
|
+
"""Compute SHA-256 hash of a file's raw bytes."""
|
|
492
|
+
hasher = hashlib.sha256()
|
|
493
|
+
with open(abs_path, "rb") as f:
|
|
494
|
+
for chunk in iter(lambda: f.read(65536), b""):
|
|
495
|
+
hasher.update(chunk)
|
|
496
|
+
return hasher.hexdigest()
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
# ---------- incremental rebuild helpers ----------
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def _load_existing_types(conn: kuzu.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None:
|
|
503
|
+
"""Load type entries from existing Kuzu graph into tables for cross-file resolution.
|
|
504
|
+
|
|
505
|
+
When exclude_files is provided, only load types from files NOT in the set.
|
|
506
|
+
"""
|
|
507
|
+
if exclude_files is not None and not exclude_files:
|
|
508
|
+
return
|
|
509
|
+
|
|
510
|
+
where = "WHERE s.kind IN ['class', 'interface', 'enum', 'annotation', 'record']"
|
|
511
|
+
params: dict = {}
|
|
512
|
+
if exclude_files:
|
|
513
|
+
where += "\n AND NOT (s.filename IN $exclude_files)"
|
|
514
|
+
params["exclude_files"] = list(exclude_files)
|
|
515
|
+
|
|
516
|
+
query = f"""
|
|
517
|
+
MATCH (s:Symbol)
|
|
518
|
+
{where}
|
|
519
|
+
RETURN s.kind, s.fqn, s.name, s.filename, s.module, s.microservice, s.id
|
|
520
|
+
"""
|
|
521
|
+
result = conn.execute(query, params)
|
|
522
|
+
while result.has_next():
|
|
523
|
+
row = result.get_next()
|
|
524
|
+
kind, fqn, name, filename = row[0], row[1], row[2], row[3]
|
|
525
|
+
module = row[4] if len(row) > 4 else ""
|
|
526
|
+
microservice = row[5] if len(row) > 5 else ""
|
|
527
|
+
node_id = row[6] if len(row) > 6 else ""
|
|
528
|
+
|
|
529
|
+
decl = TypeDecl(name, kind, fqn)
|
|
530
|
+
package = fqn[: -(len(name) + 1)] if fqn.endswith("." + name) else ""
|
|
531
|
+
|
|
532
|
+
entry = TypeIndexEntry(
|
|
533
|
+
decl=decl,
|
|
534
|
+
file_path=filename,
|
|
535
|
+
module=module,
|
|
536
|
+
microservice=microservice,
|
|
537
|
+
package=package,
|
|
538
|
+
outer_fqn=None,
|
|
539
|
+
node_id=node_id,
|
|
540
|
+
)
|
|
541
|
+
tables.types[fqn] = entry
|
|
542
|
+
tables.by_simple_name.setdefault(name, []).append(entry)
|
|
543
|
+
tables.by_package.setdefault(package, []).append(entry)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _load_existing_members(conn: kuzu.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None:
|
|
547
|
+
"""Load member entries from existing Kuzu graph into tables.members.
|
|
548
|
+
|
|
549
|
+
When exclude_files is provided, only load members from files NOT in the set.
|
|
550
|
+
"""
|
|
551
|
+
if exclude_files is not None and not exclude_files:
|
|
552
|
+
return
|
|
553
|
+
|
|
554
|
+
where = "WHERE s.kind IN ['method', 'constructor']"
|
|
555
|
+
params: dict = {}
|
|
556
|
+
if exclude_files:
|
|
557
|
+
where += "\n AND NOT (s.filename IN $exclude_files)"
|
|
558
|
+
params["exclude_files"] = list(exclude_files)
|
|
559
|
+
|
|
560
|
+
query = f"""
|
|
561
|
+
MATCH (s:Symbol)
|
|
562
|
+
{where}
|
|
563
|
+
RETURN s.kind, s.name, s.filename, s.signature, s.parent_id, s.fqn, s.id
|
|
564
|
+
"""
|
|
565
|
+
result = conn.execute(query, params)
|
|
566
|
+
while result.has_next():
|
|
567
|
+
row = result.get_next()
|
|
568
|
+
kind, name, filename = row[0], row[1], row[2]
|
|
569
|
+
signature = row[3] if len(row) > 3 else ""
|
|
570
|
+
parent_id = row[4] if len(row) > 4 else ""
|
|
571
|
+
fqn = row[5] if len(row) > 5 else ""
|
|
572
|
+
node_id = row[6] if len(row) > 6 else ""
|
|
573
|
+
|
|
574
|
+
parent_fqn = fqn.split("#")[0] if "#" in fqn else ""
|
|
575
|
+
|
|
576
|
+
decl = MethodDecl(name, "", kind == "constructor")
|
|
577
|
+
decl.signature = signature
|
|
578
|
+
|
|
579
|
+
tables.members.append(MemberEntry(
|
|
580
|
+
kind=kind,
|
|
581
|
+
decl=decl,
|
|
582
|
+
parent_id=parent_id,
|
|
583
|
+
parent_fqn=parent_fqn,
|
|
584
|
+
file_path=filename,
|
|
585
|
+
module="",
|
|
586
|
+
microservice="",
|
|
587
|
+
node_id=node_id,
|
|
588
|
+
))
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def _find_dependents(conn: kuzu.Connection, changed_node_ids: set[str]) -> set[str]:
|
|
592
|
+
"""Find files whose nodes have edges pointing into changed nodes. Returns set of filenames."""
|
|
593
|
+
dependent_files: set[str] = set()
|
|
594
|
+
|
|
595
|
+
# Query each Symbol-to-Symbol edge table for incoming edges
|
|
596
|
+
edge_types = ["EXTENDS", "IMPLEMENTS", "INJECTS", "CALLS", "DECLARES", "OVERRIDES"]
|
|
597
|
+
params = {"changed_ids": list(changed_node_ids)}
|
|
598
|
+
|
|
599
|
+
for edge_type in edge_types:
|
|
600
|
+
query = f"""
|
|
601
|
+
MATCH (src:Symbol)-[e:{edge_type}]->(dst:Symbol)
|
|
602
|
+
WHERE dst.id IN $changed_ids
|
|
603
|
+
RETURN DISTINCT src.filename
|
|
604
|
+
"""
|
|
605
|
+
result = conn.execute(query, params)
|
|
606
|
+
while result.has_next():
|
|
607
|
+
row = result.get_next()
|
|
608
|
+
filename = row[0]
|
|
609
|
+
if filename: # Skip phantom nodes (filename = "")
|
|
610
|
+
dependent_files.add(filename)
|
|
611
|
+
|
|
612
|
+
return dependent_files
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _delete_file_scope(conn: kuzu.Connection, filenames: set[str]) -> None:
|
|
616
|
+
"""Delete all nodes and edges originating from the given files.
|
|
617
|
+
|
|
618
|
+
Skip phantom nodes (filename=""). Deletes ALL edge types in Phase 1,
|
|
619
|
+
then nodes in subsequent phases. Route/Client/Producer nodes use
|
|
620
|
+
DETACH DELETE as a safety net for any edges missed in Phase 1.
|
|
621
|
+
|
|
622
|
+
Edges are deleted in batch across all filenames first to avoid Kuzu
|
|
623
|
+
"has connected edges" errors when edges from one file point to nodes
|
|
624
|
+
in another file within the same scope.
|
|
625
|
+
"""
|
|
626
|
+
filename_list = list(filenames)
|
|
627
|
+
|
|
628
|
+
# Phase 1: Delete ALL edges from ALL scope files at once.
|
|
629
|
+
# This avoids ordering issues where file A has an edge from file B
|
|
630
|
+
# pointing into it; if we delete A's nodes before B's edges, Kuzu
|
|
631
|
+
# raises "has connected edges" errors.
|
|
632
|
+
edge_tables = [
|
|
633
|
+
"EXTENDS", "IMPLEMENTS", "INJECTS", "CALLS", "DECLARES", "OVERRIDES",
|
|
634
|
+
"UNRESOLVED_AT", "EXPOSES", "DECLARES_CLIENT", "DECLARES_PRODUCER",
|
|
635
|
+
"HTTP_CALLS", "ASYNC_CALLS",
|
|
636
|
+
]
|
|
637
|
+
for edge_type in edge_tables:
|
|
638
|
+
query = f"""
|
|
639
|
+
MATCH (src)-[e:{edge_type}]->(dst)
|
|
640
|
+
WHERE e.source_file IN $filenames
|
|
641
|
+
DELETE e
|
|
642
|
+
"""
|
|
643
|
+
conn.execute(query, {"filenames": filename_list})
|
|
644
|
+
|
|
645
|
+
# Phase 2: Collect all Symbol node IDs for UnresolvedCallSite cleanup.
|
|
646
|
+
symbol_ids: list[str] = []
|
|
647
|
+
symbol_ids_query = """
|
|
648
|
+
MATCH (s:Symbol)
|
|
649
|
+
WHERE s.filename IN $filenames
|
|
650
|
+
RETURN s.id
|
|
651
|
+
"""
|
|
652
|
+
result = conn.execute(symbol_ids_query, {"filenames": filename_list})
|
|
653
|
+
while result.has_next():
|
|
654
|
+
row = result.get_next()
|
|
655
|
+
symbol_ids.append(row[0])
|
|
656
|
+
|
|
657
|
+
# Delete UnresolvedCallSite nodes whose caller_id is in the collected set
|
|
658
|
+
if symbol_ids:
|
|
659
|
+
unresolved_query = """
|
|
660
|
+
MATCH (u:UnresolvedCallSite)
|
|
661
|
+
WHERE u.caller_id IN $symbol_ids
|
|
662
|
+
DELETE u
|
|
663
|
+
"""
|
|
664
|
+
conn.execute(unresolved_query, {"symbol_ids": symbol_ids})
|
|
665
|
+
|
|
666
|
+
# Phase 3: Delete Symbol nodes.
|
|
667
|
+
delete_symbols_query = """
|
|
668
|
+
MATCH (s:Symbol)
|
|
669
|
+
WHERE s.filename IN $filenames
|
|
670
|
+
DELETE s
|
|
671
|
+
"""
|
|
672
|
+
conn.execute(delete_symbols_query, {"filenames": filename_list})
|
|
673
|
+
|
|
674
|
+
# Phase 4: Delete Route, Client, Producer nodes.
|
|
675
|
+
# Use DETACH DELETE as a safety net in case any edges were missed in Phase 1.
|
|
676
|
+
for label in ["Route", "Client", "Producer"]:
|
|
677
|
+
conn.execute(
|
|
678
|
+
f"MATCH (n:{label}) WHERE n.filename IN $filenames DETACH DELETE n",
|
|
679
|
+
{"filenames": filename_list},
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def _scoped_write(conn: kuzu.Connection, tables: GraphTables, *, project_root: Path, meta_chain: dict[str, frozenset[str]] | None) -> None:
|
|
684
|
+
"""Write nodes and edges to existing Kuzu database without drop/create schema.
|
|
685
|
+
|
|
686
|
+
Like write_kuzu() but without _drop_all()/_create_schema(). The caller is
|
|
687
|
+
responsible for calling _populate_declares_rows() and _populate_overrides_rows()
|
|
688
|
+
before invoking this function.
|
|
689
|
+
|
|
690
|
+
Uses MERGE instead of CREATE to handle cases where nodes already exist.
|
|
691
|
+
"""
|
|
692
|
+
t0 = time.time()
|
|
693
|
+
_write_nodes_merge(
|
|
694
|
+
conn,
|
|
695
|
+
tables,
|
|
696
|
+
project_root=project_root,
|
|
697
|
+
meta_chain=meta_chain,
|
|
698
|
+
)
|
|
699
|
+
elapsed = time.time() - t0
|
|
700
|
+
if elapsed > 0.1: # Only log if significant
|
|
701
|
+
_verbose_stderr_line(f"[graph] scoped write · nodes written in {elapsed:.2f}s")
|
|
702
|
+
|
|
703
|
+
t1 = time.time()
|
|
704
|
+
_fbyid = _build_file_by_node_id(tables)
|
|
705
|
+
_write_edges(conn, tables, _fbyid)
|
|
706
|
+
elapsed = time.time() - t1
|
|
707
|
+
if elapsed > 0.1:
|
|
708
|
+
_verbose_stderr_line(f"[graph] scoped write · edges written in {elapsed:.2f}s")
|
|
709
|
+
|
|
710
|
+
t2 = time.time()
|
|
711
|
+
_write_routes_and_exposes(conn, tables, _fbyid)
|
|
712
|
+
elapsed = time.time() - t2
|
|
713
|
+
if elapsed > 0.1:
|
|
714
|
+
_verbose_stderr_line(f"[graph] scoped write · routes/exposes written in {elapsed:.2f}s")
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _write_nodes_merge(
|
|
718
|
+
conn: kuzu.Connection,
|
|
719
|
+
tables: GraphTables,
|
|
720
|
+
*,
|
|
721
|
+
project_root: Path,
|
|
722
|
+
meta_chain: dict[str, frozenset[str]] | None,
|
|
723
|
+
) -> None:
|
|
724
|
+
"""Write nodes to existing Kuzu database using MERGE to handle existing nodes."""
|
|
725
|
+
_write_nodes_impl(conn, tables, project_root=project_root, meta_chain=meta_chain, symbol_query=_MERGE_SYMBOL)
|
|
726
|
+
|
|
727
|
+
|
|
404
728
|
# ---------- file walk (see `path_filtering.iter_java_source_files`) ----------
|
|
405
729
|
|
|
406
730
|
|
|
@@ -461,8 +785,15 @@ def _register_type(
|
|
|
461
785
|
return entry
|
|
462
786
|
|
|
463
787
|
|
|
464
|
-
def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str, JavaFileAst]:
|
|
465
|
-
"""Walk files, parse them, populate node indexes. Returns path -> AST.
|
|
788
|
+
def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool, scope_files: set[str] | None = None) -> dict[str, JavaFileAst]:
|
|
789
|
+
"""Walk files, parse them, populate node indexes. Returns path -> AST.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
root: Source root directory.
|
|
793
|
+
tables: GraphTables to populate.
|
|
794
|
+
verbose: Whether to emit progress output.
|
|
795
|
+
scope_files: Optional set of relative POSIX paths to parse. If None, parse all files.
|
|
796
|
+
"""
|
|
466
797
|
asts: dict[str, JavaFileAst] = {}
|
|
467
798
|
ignore = LayeredIgnore(root)
|
|
468
799
|
t0 = time.time()
|
|
@@ -480,6 +811,13 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
|
|
|
480
811
|
if verbose and slow_sec > 0:
|
|
481
812
|
time.sleep(slow_sec)
|
|
482
813
|
for p in iter_java_source_files(root, ignore=ignore):
|
|
814
|
+
# Skip files not in scope (if scope is provided)
|
|
815
|
+
try:
|
|
816
|
+
rel = p.resolve().relative_to(root.resolve()).as_posix()
|
|
817
|
+
except ValueError:
|
|
818
|
+
rel = p.as_posix()
|
|
819
|
+
if scope_files is not None and rel not in scope_files:
|
|
820
|
+
continue
|
|
483
821
|
n_files += 1
|
|
484
822
|
try:
|
|
485
823
|
content = p.read_bytes()
|
|
@@ -488,10 +826,6 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
|
|
|
488
826
|
continue
|
|
489
827
|
if not content.strip():
|
|
490
828
|
continue
|
|
491
|
-
try:
|
|
492
|
-
rel = p.resolve().relative_to(root.resolve()).as_posix()
|
|
493
|
-
except ValueError:
|
|
494
|
-
rel = p.as_posix()
|
|
495
829
|
try:
|
|
496
830
|
ast = parse_java(content, filename=rel, verbose=verbose)
|
|
497
831
|
except Exception:
|
|
@@ -2414,22 +2748,22 @@ _SCHEMA_PRODUCER = (
|
|
|
2414
2748
|
|
|
2415
2749
|
_SCHEMA_EXTENDS = (
|
|
2416
2750
|
"CREATE REL TABLE EXTENDS(FROM Symbol TO Symbol, "
|
|
2417
|
-
"dst_name STRING, dst_fqn STRING, resolved BOOLEAN)"
|
|
2751
|
+
"source_file STRING, dst_name STRING, dst_fqn STRING, resolved BOOLEAN)"
|
|
2418
2752
|
)
|
|
2419
2753
|
_SCHEMA_IMPLEMENTS = (
|
|
2420
2754
|
"CREATE REL TABLE IMPLEMENTS(FROM Symbol TO Symbol, "
|
|
2421
|
-
"dst_name STRING, dst_fqn STRING, resolved BOOLEAN)"
|
|
2755
|
+
"source_file STRING, dst_name STRING, dst_fqn STRING, resolved BOOLEAN)"
|
|
2422
2756
|
)
|
|
2423
2757
|
_SCHEMA_INJECTS = (
|
|
2424
2758
|
"CREATE REL TABLE INJECTS(FROM Symbol TO Symbol, "
|
|
2425
|
-
"dst_name STRING, dst_fqn STRING, resolved BOOLEAN, "
|
|
2759
|
+
"source_file STRING, dst_name STRING, dst_fqn STRING, resolved BOOLEAN, "
|
|
2426
2760
|
"mechanism STRING, annotation STRING, field_or_param STRING)"
|
|
2427
2761
|
)
|
|
2428
|
-
_SCHEMA_DECLARES = "CREATE REL TABLE DECLARES(FROM Symbol TO Symbol)"
|
|
2429
|
-
_SCHEMA_OVERRIDES = "CREATE REL TABLE OVERRIDES(FROM Symbol TO Symbol)"
|
|
2762
|
+
_SCHEMA_DECLARES = "CREATE REL TABLE DECLARES(FROM Symbol TO Symbol, source_file STRING)"
|
|
2763
|
+
_SCHEMA_OVERRIDES = "CREATE REL TABLE OVERRIDES(FROM Symbol TO Symbol, source_file STRING)"
|
|
2430
2764
|
_SCHEMA_CALLS = (
|
|
2431
2765
|
"CREATE REL TABLE CALLS(FROM Symbol TO Symbol, "
|
|
2432
|
-
"call_site_line INT64, call_site_byte INT64, arg_count INT64, "
|
|
2766
|
+
"source_file STRING, call_site_line INT64, call_site_byte INT64, arg_count INT64, "
|
|
2433
2767
|
"confidence DOUBLE, strategy STRING, source STRING, resolved BOOLEAN, "
|
|
2434
2768
|
"callee_declaring_role STRING)"
|
|
2435
2769
|
)
|
|
@@ -2439,27 +2773,27 @@ _SCHEMA_UNRESOLVED_CALL_SITE = (
|
|
|
2439
2773
|
"arg_count INT64, callee_simple STRING, receiver_expr STRING, reason STRING, "
|
|
2440
2774
|
"PRIMARY KEY(id))"
|
|
2441
2775
|
)
|
|
2442
|
-
_SCHEMA_UNRESOLVED_AT = "CREATE REL TABLE UNRESOLVED_AT(FROM Symbol TO UnresolvedCallSite)"
|
|
2776
|
+
_SCHEMA_UNRESOLVED_AT = "CREATE REL TABLE UNRESOLVED_AT(FROM Symbol TO UnresolvedCallSite, source_file STRING)"
|
|
2443
2777
|
_SCHEMA_EXPOSES = (
|
|
2444
2778
|
"CREATE REL TABLE EXPOSES(FROM Symbol TO Route, "
|
|
2445
|
-
"confidence DOUBLE, strategy STRING)"
|
|
2779
|
+
"source_file STRING, confidence DOUBLE, strategy STRING)"
|
|
2446
2780
|
)
|
|
2447
2781
|
_SCHEMA_DECLARES_CLIENT = (
|
|
2448
2782
|
"CREATE REL TABLE DECLARES_CLIENT(FROM Symbol TO Client, "
|
|
2449
|
-
"confidence DOUBLE, strategy STRING)"
|
|
2783
|
+
"source_file STRING, confidence DOUBLE, strategy STRING)"
|
|
2450
2784
|
)
|
|
2451
2785
|
_SCHEMA_DECLARES_PRODUCER = (
|
|
2452
2786
|
"CREATE REL TABLE DECLARES_PRODUCER(FROM Symbol TO Producer, "
|
|
2453
|
-
"confidence DOUBLE, strategy STRING)"
|
|
2787
|
+
"source_file STRING, confidence DOUBLE, strategy STRING)"
|
|
2454
2788
|
)
|
|
2455
2789
|
_SCHEMA_HTTP_CALLS = (
|
|
2456
2790
|
"CREATE REL TABLE HTTP_CALLS(FROM Client TO Route, "
|
|
2457
|
-
"confidence DOUBLE, strategy STRING, "
|
|
2791
|
+
"source_file STRING, confidence DOUBLE, strategy STRING, "
|
|
2458
2792
|
"method_call STRING, raw_uri STRING, match STRING)"
|
|
2459
2793
|
)
|
|
2460
2794
|
_SCHEMA_ASYNC_CALLS = (
|
|
2461
2795
|
"CREATE REL TABLE ASYNC_CALLS(FROM Producer TO Route, "
|
|
2462
|
-
"confidence DOUBLE, strategy STRING, "
|
|
2796
|
+
"source_file STRING, confidence DOUBLE, strategy STRING, "
|
|
2463
2797
|
"direction STRING, raw_topic STRING, match STRING)"
|
|
2464
2798
|
)
|
|
2465
2799
|
|
|
@@ -2538,13 +2872,25 @@ _CREATE_SYMBOL = (
|
|
|
2538
2872
|
"role: $role, signature: $signature, parent_id: $parent_id, resolved: $resolved})"
|
|
2539
2873
|
)
|
|
2540
2874
|
|
|
2875
|
+
_MERGE_SYMBOL = (
|
|
2876
|
+
"MERGE (n:Symbol {id: $id}) "
|
|
2877
|
+
"SET n.kind = $kind, n.name = $name, n.fqn = $fqn, "
|
|
2878
|
+
"n.package = $package, n.module = $module, n.microservice = $microservice, "
|
|
2879
|
+
"n.filename = $filename, "
|
|
2880
|
+
"n.start_line = $start_line, n.end_line = $end_line, "
|
|
2881
|
+
"n.start_byte = $start_byte, n.end_byte = $end_byte, "
|
|
2882
|
+
"n.modifiers = $modifiers, n.annotations = $annotations, n.capabilities = $capabilities, "
|
|
2883
|
+
"n.role = $role, n.signature = $signature, n.parent_id = $parent_id, n.resolved = $resolved"
|
|
2884
|
+
)
|
|
2541
2885
|
|
|
2542
|
-
|
|
2886
|
+
|
|
2887
|
+
def _write_nodes_impl(
|
|
2543
2888
|
conn: kuzu.Connection,
|
|
2544
2889
|
tables: GraphTables,
|
|
2545
2890
|
*,
|
|
2546
2891
|
project_root: Path,
|
|
2547
2892
|
meta_chain: dict[str, frozenset[str]] | None,
|
|
2893
|
+
symbol_query: str,
|
|
2548
2894
|
) -> None:
|
|
2549
2895
|
overrides = load_brownfield_overrides(project_root)
|
|
2550
2896
|
try:
|
|
@@ -2555,12 +2901,12 @@ def _write_nodes(
|
|
|
2555
2901
|
mch = meta_chain
|
|
2556
2902
|
# packages
|
|
2557
2903
|
for pkg, pid in tables.packages.items():
|
|
2558
|
-
conn.execute(
|
|
2904
|
+
conn.execute(symbol_query, _node_row(
|
|
2559
2905
|
id=pid, kind="package", name=pkg.rsplit(".", 1)[-1], fqn=pkg, package=pkg,
|
|
2560
2906
|
))
|
|
2561
2907
|
# files
|
|
2562
2908
|
for path, fid in tables.files.items():
|
|
2563
|
-
conn.execute(
|
|
2909
|
+
conn.execute(symbol_query, _node_row(
|
|
2564
2910
|
id=fid, kind="file", name=Path(path).name, fqn=path, filename=path,
|
|
2565
2911
|
))
|
|
2566
2912
|
# types
|
|
@@ -2572,7 +2918,7 @@ def _write_nodes(
|
|
|
2572
2918
|
meta_chain=mch,
|
|
2573
2919
|
)
|
|
2574
2920
|
tables.type_role_by_node_id[entry.node_id] = role
|
|
2575
|
-
conn.execute(
|
|
2921
|
+
conn.execute(symbol_query, _node_row(
|
|
2576
2922
|
id=entry.node_id, kind=d.kind, name=d.name, fqn=d.fqn,
|
|
2577
2923
|
package=entry.package,
|
|
2578
2924
|
module=entry.module, microservice=entry.microservice,
|
|
@@ -2588,7 +2934,7 @@ def _write_nodes(
|
|
|
2588
2934
|
))
|
|
2589
2935
|
# members (methods / constructors)
|
|
2590
2936
|
for m in tables.members:
|
|
2591
|
-
conn.execute(
|
|
2937
|
+
conn.execute(symbol_query, _node_row(
|
|
2592
2938
|
id=m.node_id, kind=m.kind, name=m.decl.name,
|
|
2593
2939
|
fqn=f"{m.parent_fqn}#{m.decl.signature}",
|
|
2594
2940
|
package=tables.types[m.parent_fqn].package if m.parent_fqn in tables.types else "",
|
|
@@ -2602,33 +2948,44 @@ def _write_nodes(
|
|
|
2602
2948
|
))
|
|
2603
2949
|
# phantoms
|
|
2604
2950
|
for pid, row in tables.phantoms.items():
|
|
2605
|
-
conn.execute(
|
|
2951
|
+
conn.execute(symbol_query, row)
|
|
2952
|
+
|
|
2953
|
+
|
|
2954
|
+
def _write_nodes(
|
|
2955
|
+
conn: kuzu.Connection,
|
|
2956
|
+
tables: GraphTables,
|
|
2957
|
+
*,
|
|
2958
|
+
project_root: Path,
|
|
2959
|
+
meta_chain: dict[str, frozenset[str]] | None,
|
|
2960
|
+
) -> None:
|
|
2961
|
+
_write_nodes_impl(conn, tables, project_root=project_root, meta_chain=meta_chain, symbol_query=_CREATE_SYMBOL)
|
|
2606
2962
|
|
|
2607
2963
|
|
|
2608
2964
|
_CREATE_EXT = (
|
|
2609
2965
|
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2610
|
-
"CREATE (a)-[:EXTENDS {dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved}]->(b)"
|
|
2966
|
+
"CREATE (a)-[:EXTENDS {source_file: $source_file, dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved}]->(b)"
|
|
2611
2967
|
)
|
|
2612
2968
|
_CREATE_IMPL = (
|
|
2613
2969
|
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2614
|
-
"CREATE (a)-[:IMPLEMENTS {dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved}]->(b)"
|
|
2970
|
+
"CREATE (a)-[:IMPLEMENTS {source_file: $source_file, dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved}]->(b)"
|
|
2615
2971
|
)
|
|
2616
2972
|
_CREATE_INJ = (
|
|
2617
2973
|
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2618
|
-
"CREATE (a)-[:INJECTS {dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved, "
|
|
2974
|
+
"CREATE (a)-[:INJECTS {source_file: $source_file, dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved, "
|
|
2619
2975
|
"mechanism: $mechanism, annotation: $annotation, field_or_param: $field_or_param}]->(b)"
|
|
2620
2976
|
)
|
|
2621
2977
|
_CREATE_DECL = (
|
|
2622
2978
|
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2623
|
-
"CREATE (a)-[:DECLARES]->(b)"
|
|
2979
|
+
"CREATE (a)-[:DECLARES {source_file: $source_file}]->(b)"
|
|
2624
2980
|
)
|
|
2625
2981
|
_CREATE_OVERRIDES = (
|
|
2626
2982
|
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2627
|
-
"CREATE (a)-[:OVERRIDES]->(b)"
|
|
2983
|
+
"CREATE (a)-[:OVERRIDES {source_file: $source_file}]->(b)"
|
|
2628
2984
|
)
|
|
2629
2985
|
_CREATE_CALL = (
|
|
2630
2986
|
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2631
2987
|
"CREATE (a)-[:CALLS {"
|
|
2988
|
+
"source_file: $source_file, "
|
|
2632
2989
|
"call_site_line: $line, call_site_byte: $byte, arg_count: $argc, "
|
|
2633
2990
|
"confidence: $conf, strategy: $strat, source: $src_kind, resolved: $resolved, "
|
|
2634
2991
|
"callee_declaring_role: $callee_declaring_role"
|
|
@@ -2656,11 +3013,11 @@ _CREATE_CLIENT = (
|
|
|
2656
3013
|
|
|
2657
3014
|
_CREATE_EXPOSES = (
|
|
2658
3015
|
"MATCH (s:Symbol {id: $sid}), (r:Route {id: $rid}) "
|
|
2659
|
-
"CREATE (s)-[:EXPOSES {confidence: $confidence, strategy: $strategy}]->(r)"
|
|
3016
|
+
"CREATE (s)-[:EXPOSES {source_file: $source_file, confidence: $confidence, strategy: $strategy}]->(r)"
|
|
2660
3017
|
)
|
|
2661
3018
|
_CREATE_DECLARES_CLIENT = (
|
|
2662
3019
|
"MATCH (s:Symbol {id: $sid}), (c:Client {id: $cid}) "
|
|
2663
|
-
"CREATE (s)-[:DECLARES_CLIENT {confidence: $confidence, strategy: $strategy}]->(c)"
|
|
3020
|
+
"CREATE (s)-[:DECLARES_CLIENT {source_file: $source_file, confidence: $confidence, strategy: $strategy}]->(c)"
|
|
2664
3021
|
)
|
|
2665
3022
|
_CREATE_PRODUCER = (
|
|
2666
3023
|
"CREATE (:Producer {"
|
|
@@ -2673,16 +3030,16 @@ _CREATE_PRODUCER = (
|
|
|
2673
3030
|
)
|
|
2674
3031
|
_CREATE_DECLARES_PRODUCER = (
|
|
2675
3032
|
"MATCH (s:Symbol {id: $sid}), (p:Producer {id: $pid}) "
|
|
2676
|
-
"CREATE (s)-[:DECLARES_PRODUCER {confidence: $confidence, strategy: $strategy}]->(p)"
|
|
3033
|
+
"CREATE (s)-[:DECLARES_PRODUCER {source_file: $source_file, confidence: $confidence, strategy: $strategy}]->(p)"
|
|
2677
3034
|
)
|
|
2678
3035
|
_CREATE_HTTP_CALL = (
|
|
2679
3036
|
"MATCH (c:Client {id: $cid}), (r:Route {id: $rid}) "
|
|
2680
|
-
"CREATE (c)-[:HTTP_CALLS {confidence: $confidence, strategy: $strategy, "
|
|
3037
|
+
"CREATE (c)-[:HTTP_CALLS {source_file: $source_file, confidence: $confidence, strategy: $strategy, "
|
|
2681
3038
|
"method_call: $method_call, raw_uri: $raw_uri, match: $match}]->(r)"
|
|
2682
3039
|
)
|
|
2683
3040
|
_CREATE_ASYNC_CALL = (
|
|
2684
3041
|
"MATCH (p:Producer {id: $pid}), (r:Route {id: $rid}) "
|
|
2685
|
-
"CREATE (p)-[:ASYNC_CALLS {confidence: $confidence, strategy: $strategy, "
|
|
3042
|
+
"CREATE (p)-[:ASYNC_CALLS {source_file: $source_file, confidence: $confidence, strategy: $strategy, "
|
|
2686
3043
|
"direction: $direction, raw_topic: $raw_topic, match: $match}]->(r)"
|
|
2687
3044
|
)
|
|
2688
3045
|
|
|
@@ -2732,30 +3089,53 @@ def _populate_overrides_rows(tables: GraphTables) -> None:
|
|
|
2732
3089
|
]
|
|
2733
3090
|
|
|
2734
3091
|
|
|
2735
|
-
def
|
|
3092
|
+
def _build_file_by_node_id(tables: GraphTables) -> dict[str, str]:
|
|
3093
|
+
"""Build node_id -> file_path lookup for source_file resolution."""
|
|
3094
|
+
lookup: dict[str, str] = {}
|
|
3095
|
+
for entry in tables.types.values():
|
|
3096
|
+
lookup[entry.node_id] = entry.file_path
|
|
3097
|
+
for m in tables.members:
|
|
3098
|
+
lookup[m.node_id] = m.file_path
|
|
3099
|
+
return lookup
|
|
3100
|
+
|
|
3101
|
+
|
|
3102
|
+
def _write_edges(conn: kuzu.Connection, tables: GraphTables, _file_by_node_id: dict[str, str] | None = None) -> None:
|
|
3103
|
+
# Build node_id -> file_path lookup for source_file resolution.
|
|
3104
|
+
if _file_by_node_id is None:
|
|
3105
|
+
_file_by_node_id = _build_file_by_node_id(tables)
|
|
3106
|
+
|
|
2736
3107
|
for r in tables.extends_rows:
|
|
2737
3108
|
conn.execute(_CREATE_EXT, {
|
|
2738
3109
|
"src": r.src_id, "dst": r.dst_id,
|
|
3110
|
+
"source_file": _file_by_node_id.get(r.src_id, ""),
|
|
2739
3111
|
"dst_name": r.dst_name, "dst_fqn": r.dst_fqn, "resolved": r.resolved,
|
|
2740
3112
|
})
|
|
2741
3113
|
for r in tables.implements_rows:
|
|
2742
3114
|
conn.execute(_CREATE_IMPL, {
|
|
2743
3115
|
"src": r.src_id, "dst": r.dst_id,
|
|
3116
|
+
"source_file": _file_by_node_id.get(r.src_id, ""),
|
|
2744
3117
|
"dst_name": r.dst_name, "dst_fqn": r.dst_fqn, "resolved": r.resolved,
|
|
2745
3118
|
})
|
|
2746
3119
|
for r in tables.injects_rows:
|
|
2747
3120
|
conn.execute(_CREATE_INJ, {
|
|
2748
3121
|
"src": r.src_id, "dst": r.dst_id,
|
|
3122
|
+
"source_file": _file_by_node_id.get(r.src_id, ""),
|
|
2749
3123
|
"dst_name": r.dst_name, "dst_fqn": r.dst_fqn, "resolved": r.resolved,
|
|
2750
3124
|
"mechanism": r.mechanism, "annotation": r.annotation,
|
|
2751
3125
|
"field_or_param": r.field_or_param,
|
|
2752
3126
|
})
|
|
2753
3127
|
|
|
2754
3128
|
for row in tables.declares_rows:
|
|
2755
|
-
conn.execute(_CREATE_DECL, {
|
|
3129
|
+
conn.execute(_CREATE_DECL, {
|
|
3130
|
+
"src": row.src_id, "dst": row.dst_id,
|
|
3131
|
+
"source_file": _file_by_node_id.get(row.src_id, ""),
|
|
3132
|
+
})
|
|
2756
3133
|
|
|
2757
3134
|
for row in tables.overrides_rows:
|
|
2758
|
-
conn.execute(_CREATE_OVERRIDES, {
|
|
3135
|
+
conn.execute(_CREATE_OVERRIDES, {
|
|
3136
|
+
"src": row.src_id, "dst": row.dst_id,
|
|
3137
|
+
"source_file": _file_by_node_id.get(row.src_id, ""),
|
|
3138
|
+
})
|
|
2759
3139
|
|
|
2760
3140
|
seen_calls: set[tuple[str, str, int, int]] = set()
|
|
2761
3141
|
unique_calls: list[CallsRow] = []
|
|
@@ -2769,6 +3149,7 @@ def _write_edges(conn: kuzu.Connection, tables: GraphTables) -> None:
|
|
|
2769
3149
|
for row in unique_calls:
|
|
2770
3150
|
conn.execute(_CREATE_CALL, {
|
|
2771
3151
|
"src": row.src_id, "dst": row.dst_id,
|
|
3152
|
+
"source_file": _file_by_node_id.get(row.src_id, ""),
|
|
2772
3153
|
"line": row.call_site_line,
|
|
2773
3154
|
"byte": row.call_site_byte,
|
|
2774
3155
|
"argc": row.arg_count,
|
|
@@ -2789,7 +3170,7 @@ def _write_edges(conn: kuzu.Connection, tables: GraphTables) -> None:
|
|
|
2789
3170
|
)
|
|
2790
3171
|
_CREATE_UNRESOLVED_AT = (
|
|
2791
3172
|
"MATCH (a:Symbol {id: $caller}), (u:UnresolvedCallSite {id: $ucs}) "
|
|
2792
|
-
"CREATE (a)-[:UNRESOLVED_AT]->(u)"
|
|
3173
|
+
"CREATE (a)-[:UNRESOLVED_AT {source_file: $source_file}]->(u)"
|
|
2793
3174
|
)
|
|
2794
3175
|
seen_ucs: set[str] = set()
|
|
2795
3176
|
for row in tables.unresolved_call_site_rows:
|
|
@@ -2806,10 +3187,23 @@ def _write_edges(conn: kuzu.Connection, tables: GraphTables) -> None:
|
|
|
2806
3187
|
"recv": row.receiver_expr,
|
|
2807
3188
|
"reason": row.reason,
|
|
2808
3189
|
})
|
|
2809
|
-
conn.execute(_CREATE_UNRESOLVED_AT, {
|
|
3190
|
+
conn.execute(_CREATE_UNRESOLVED_AT, {
|
|
3191
|
+
"caller": row.caller_id, "ucs": row.id,
|
|
3192
|
+
"source_file": _file_by_node_id.get(row.caller_id, ""),
|
|
3193
|
+
})
|
|
2810
3194
|
|
|
2811
3195
|
|
|
2812
|
-
def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> None:
|
|
3196
|
+
def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables, _file_by_node_id: dict[str, str] | None = None) -> None:
|
|
3197
|
+
# Build node_id -> file_path lookup for source_file resolution (for Symbol sources).
|
|
3198
|
+
if _file_by_node_id is None:
|
|
3199
|
+
_file_by_node_id = _build_file_by_node_id(tables)
|
|
3200
|
+
|
|
3201
|
+
# Build client_id -> filename lookup for HTTP_CALLS source_file.
|
|
3202
|
+
_file_by_client_id: dict[str, str] = {row.id: row.filename for row in tables.client_rows}
|
|
3203
|
+
|
|
3204
|
+
# Build producer_id -> filename lookup for ASYNC_CALLS source_file.
|
|
3205
|
+
_file_by_producer_id: dict[str, str] = {row.id: row.filename for row in tables.producer_rows}
|
|
3206
|
+
|
|
2813
3207
|
for row in tables.routes_rows:
|
|
2814
3208
|
conn.execute(_CREATE_ROUTE, {
|
|
2815
3209
|
"id": row.id,
|
|
@@ -2834,6 +3228,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non
|
|
|
2834
3228
|
conn.execute(_CREATE_EXPOSES, {
|
|
2835
3229
|
"sid": row.symbol_id,
|
|
2836
3230
|
"rid": row.route_id,
|
|
3231
|
+
"source_file": _file_by_node_id.get(row.symbol_id, ""),
|
|
2837
3232
|
"confidence": row.confidence,
|
|
2838
3233
|
"strategy": row.strategy,
|
|
2839
3234
|
})
|
|
@@ -2843,6 +3238,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non
|
|
|
2843
3238
|
conn.execute(_CREATE_DECLARES_CLIENT, {
|
|
2844
3239
|
"sid": row.symbol_id,
|
|
2845
3240
|
"cid": row.client_id,
|
|
3241
|
+
"source_file": _file_by_node_id.get(row.symbol_id, ""),
|
|
2846
3242
|
"confidence": row.confidence,
|
|
2847
3243
|
"strategy": row.strategy,
|
|
2848
3244
|
})
|
|
@@ -2852,6 +3248,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non
|
|
|
2852
3248
|
conn.execute(_CREATE_DECLARES_PRODUCER, {
|
|
2853
3249
|
"sid": row.symbol_id,
|
|
2854
3250
|
"pid": row.producer_id,
|
|
3251
|
+
"source_file": _file_by_node_id.get(row.symbol_id, ""),
|
|
2855
3252
|
"confidence": row.confidence,
|
|
2856
3253
|
"strategy": row.strategy,
|
|
2857
3254
|
})
|
|
@@ -2859,6 +3256,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non
|
|
|
2859
3256
|
conn.execute(_CREATE_HTTP_CALL, {
|
|
2860
3257
|
"cid": row.client_id,
|
|
2861
3258
|
"rid": row.route_id,
|
|
3259
|
+
"source_file": _file_by_client_id.get(row.client_id, ""),
|
|
2862
3260
|
"confidence": row.confidence,
|
|
2863
3261
|
"strategy": row.strategy,
|
|
2864
3262
|
"method_call": row.method_call,
|
|
@@ -2869,6 +3267,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non
|
|
|
2869
3267
|
conn.execute(_CREATE_ASYNC_CALL, {
|
|
2870
3268
|
"pid": row.producer_id,
|
|
2871
3269
|
"rid": row.route_id,
|
|
3270
|
+
"source_file": _file_by_producer_id.get(row.producer_id, ""),
|
|
2872
3271
|
"confidence": row.confidence,
|
|
2873
3272
|
"strategy": row.strategy,
|
|
2874
3273
|
"direction": row.direction,
|
|
@@ -2929,28 +3328,29 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) -
|
|
|
2929
3328
|
clients_by_kind = dict(sorted(client_stats.clients_by_kind.items()))
|
|
2930
3329
|
producers_by_kind = dict(sorted(producer_stats.producers_by_kind.items()))
|
|
2931
3330
|
conn.execute(
|
|
2932
|
-
"
|
|
2933
|
-
"
|
|
2934
|
-
"
|
|
2935
|
-
"
|
|
2936
|
-
"
|
|
2937
|
-
"
|
|
2938
|
-
"
|
|
2939
|
-
"
|
|
2940
|
-
"
|
|
2941
|
-
"
|
|
2942
|
-
"
|
|
2943
|
-
"
|
|
2944
|
-
"
|
|
2945
|
-
"
|
|
2946
|
-
"
|
|
2947
|
-
"
|
|
2948
|
-
"
|
|
2949
|
-
"
|
|
2950
|
-
"
|
|
2951
|
-
"
|
|
2952
|
-
"
|
|
2953
|
-
"
|
|
3331
|
+
"MERGE (m:GraphMeta {key: $k}) "
|
|
3332
|
+
"SET m.ontology_version = $ov, m.built_at = $t, "
|
|
3333
|
+
"m.source_root = $sr, m.counts_json = $cj, m.parse_errors = $pe, "
|
|
3334
|
+
"m.routes_total = $routes_total, m.exposes_total = $exposes_total, "
|
|
3335
|
+
"m.routes_by_framework = $routes_by_framework, m.routes_resolved_pct = $routes_resolved_pct, "
|
|
3336
|
+
"m.routes_from_brownfield_pct = $routes_from_brownfield_pct, m.routes_by_layer = $routes_by_layer, "
|
|
3337
|
+
"m.clients_total = $clients_total, m.declares_client_total = $declares_client_total, "
|
|
3338
|
+
"m.clients_by_kind = $clients_by_kind, "
|
|
3339
|
+
"m.producers_total = $producers_total, m.declares_producer_total = $declares_producer_total, "
|
|
3340
|
+
"m.producers_by_kind = $producers_by_kind, "
|
|
3341
|
+
"m.http_calls_total = $http_calls_total, m.async_calls_total = $async_calls_total, "
|
|
3342
|
+
"m.http_calls_by_strategy = $http_calls_by_strategy, m.async_calls_by_strategy = $async_calls_by_strategy, "
|
|
3343
|
+
"m.http_calls_resolved_pct = $http_calls_resolved_pct, m.async_calls_resolved_pct = $async_calls_resolved_pct, "
|
|
3344
|
+
"m.http_clients_from_brownfield_pct = $http_clients_from_brownfield_pct, "
|
|
3345
|
+
"m.async_producers_from_brownfield_pct = $async_producers_from_brownfield_pct, "
|
|
3346
|
+
"m.http_calls_match_breakdown = $http_calls_match_breakdown, "
|
|
3347
|
+
"m.async_calls_match_breakdown = $async_calls_match_breakdown, "
|
|
3348
|
+
"m.cross_service_calls_total = $cross_service_calls_total, "
|
|
3349
|
+
"m.pass3_skipped_cross_service = $pass3_skipped_cross_service, "
|
|
3350
|
+
"m.pass3_unresolved_phantom_receiver = $pass3_unresolved_phantom_receiver, "
|
|
3351
|
+
"m.pass3_unresolved_chained = $pass3_unresolved_chained, "
|
|
3352
|
+
"m.pass4_exposes_suppressed_feign = $pass4_exposes_suppressed_feign, "
|
|
3353
|
+
"m.cross_service_resolution = $cross_service_resolution",
|
|
2954
3354
|
{
|
|
2955
3355
|
"k": "graph",
|
|
2956
3356
|
"ov": ONTOLOGY_VERSION,
|
|
@@ -2990,6 +3390,359 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) -
|
|
|
2990
3390
|
)
|
|
2991
3391
|
|
|
2992
3392
|
|
|
3393
|
+
def incremental_rebuild(
|
|
3394
|
+
source_root: Path,
|
|
3395
|
+
kuzu_path: Path,
|
|
3396
|
+
*,
|
|
3397
|
+
verbose: bool,
|
|
3398
|
+
expansion_cap: int = 50,
|
|
3399
|
+
) -> IncrementalResult:
|
|
3400
|
+
"""Incrementally rebuild the Kuzu graph, processing only changed files and their dependents.
|
|
3401
|
+
|
|
3402
|
+
Returns IncrementalResult with statistics about the rebuild.
|
|
3403
|
+
Falls back to full rebuild if:
|
|
3404
|
+
- No previous graph exists
|
|
3405
|
+
- Ontology version < 17 (missing source_file on edges)
|
|
3406
|
+
- Crash marker exists (previous incremental run failed)
|
|
3407
|
+
- Dependent expansion exceeds expansion_cap
|
|
3408
|
+
"""
|
|
3409
|
+
t_start = time.time()
|
|
3410
|
+
|
|
3411
|
+
# Step 1: Load existing graph and detect changes
|
|
3412
|
+
if not kuzu_path.exists():
|
|
3413
|
+
if verbose:
|
|
3414
|
+
_verbose_stderr_line("[increment] no existing graph; falling back to full rebuild")
|
|
3415
|
+
# Fall back to full rebuild
|
|
3416
|
+
tables = GraphTables()
|
|
3417
|
+
asts = pass1_parse(source_root, tables, verbose=verbose)
|
|
3418
|
+
pass2_edges(tables, asts, verbose=verbose)
|
|
3419
|
+
pass3_calls(tables, asts, verbose=verbose)
|
|
3420
|
+
pass4_routes(tables, asts, source_root=source_root, verbose=verbose)
|
|
3421
|
+
pass5_imperative_edges(tables, asts, source_root=source_root, verbose=verbose)
|
|
3422
|
+
pass6_match_edges(tables, verbose=verbose)
|
|
3423
|
+
write_kuzu(kuzu_path, tables, source_root=source_root, verbose=verbose)
|
|
3424
|
+
|
|
3425
|
+
n_files = _init_hash_tracker(source_root, kuzu_path)
|
|
3426
|
+
|
|
3427
|
+
return IncrementalResult(
|
|
3428
|
+
mode="full_fallback",
|
|
3429
|
+
files_changed=0,
|
|
3430
|
+
files_added=n_files,
|
|
3431
|
+
files_removed=0,
|
|
3432
|
+
dependents_reprocessed=0,
|
|
3433
|
+
elapsed_sec=time.time() - t_start,
|
|
3434
|
+
)
|
|
3435
|
+
|
|
3436
|
+
db = kuzu.Database(str(kuzu_path))
|
|
3437
|
+
conn = kuzu.Connection(db)
|
|
3438
|
+
|
|
3439
|
+
# Check ontology version
|
|
3440
|
+
try:
|
|
3441
|
+
meta_result = conn.execute("MATCH (m:GraphMeta) RETURN m.ontology_version AS version")
|
|
3442
|
+
if meta_result.has_next():
|
|
3443
|
+
row = meta_result.get_next()
|
|
3444
|
+
version = row[0] if row else 0
|
|
3445
|
+
if version < 17:
|
|
3446
|
+
if verbose:
|
|
3447
|
+
_verbose_stderr_line(f"[increment] ontology version {version} < 17; falling back to full rebuild")
|
|
3448
|
+
conn.close()
|
|
3449
|
+
del conn, db
|
|
3450
|
+
return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
|
|
3451
|
+
except Exception as e:
|
|
3452
|
+
if verbose:
|
|
3453
|
+
_verbose_stderr_line(f"[increment] failed to read ontology version: {e}; falling back to full rebuild")
|
|
3454
|
+
try:
|
|
3455
|
+
conn.close()
|
|
3456
|
+
except Exception:
|
|
3457
|
+
pass
|
|
3458
|
+
del conn, db
|
|
3459
|
+
return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
|
|
3460
|
+
|
|
3461
|
+
index_dir = kuzu_path.parent
|
|
3462
|
+
tracker = FileHashTracker(index_dir)
|
|
3463
|
+
tracker.load()
|
|
3464
|
+
|
|
3465
|
+
ignore = LayeredIgnore(source_root)
|
|
3466
|
+
added, changed, removed = tracker.detect_changes(source_root, ignore=ignore)
|
|
3467
|
+
|
|
3468
|
+
changed_files = added | changed | removed
|
|
3469
|
+
|
|
3470
|
+
if not changed_files:
|
|
3471
|
+
if verbose:
|
|
3472
|
+
_verbose_stderr_line("[increment] no changes detected; no-op")
|
|
3473
|
+
conn.close()
|
|
3474
|
+
return IncrementalResult(
|
|
3475
|
+
mode="incremental",
|
|
3476
|
+
files_changed=0,
|
|
3477
|
+
files_added=0,
|
|
3478
|
+
files_removed=0,
|
|
3479
|
+
dependents_reprocessed=0,
|
|
3480
|
+
elapsed_sec=time.time() - t_start,
|
|
3481
|
+
)
|
|
3482
|
+
|
|
3483
|
+
if verbose:
|
|
3484
|
+
_verbose_stderr_line(f"[increment] detected {len(added)} added, {len(changed)} changed, {len(removed)} removed files")
|
|
3485
|
+
|
|
3486
|
+
# Step 2: Crash marker check
|
|
3487
|
+
crash_marker_path = index_dir / ".graph_increment_in_progress"
|
|
3488
|
+
if crash_marker_path.exists():
|
|
3489
|
+
if verbose:
|
|
3490
|
+
_verbose_stderr_line("[increment] crash marker exists; falling back to full rebuild")
|
|
3491
|
+
conn.close()
|
|
3492
|
+
crash_marker_path.unlink(missing_ok=True)
|
|
3493
|
+
return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
|
|
3494
|
+
|
|
3495
|
+
# Write crash marker
|
|
3496
|
+
crash_marker_path.write_text("", encoding="utf-8")
|
|
3497
|
+
|
|
3498
|
+
try:
|
|
3499
|
+
# Step 3: Dependent expansion
|
|
3500
|
+
# Collect node IDs for changed files (single query instead of N+1)
|
|
3501
|
+
changed_node_ids: set[str] = set()
|
|
3502
|
+
result = conn.execute(
|
|
3503
|
+
"MATCH (s:Symbol) WHERE s.filename IN $filenames RETURN s.id",
|
|
3504
|
+
{"filenames": list(changed_files)},
|
|
3505
|
+
)
|
|
3506
|
+
while result.has_next():
|
|
3507
|
+
row = result.get_next()
|
|
3508
|
+
changed_node_ids.add(row[0])
|
|
3509
|
+
|
|
3510
|
+
# Find dependents
|
|
3511
|
+
dependent_files = _find_dependents(conn, changed_node_ids)
|
|
3512
|
+
|
|
3513
|
+
# Union changed files with dependents
|
|
3514
|
+
scope_files = changed_files | dependent_files
|
|
3515
|
+
|
|
3516
|
+
if len(scope_files) > expansion_cap:
|
|
3517
|
+
if verbose:
|
|
3518
|
+
_verbose_stderr_line(f"[increment] dependent expansion cap ({expansion_cap}) exceeded ({len(scope_files)} files); falling back to full rebuild")
|
|
3519
|
+
conn.close()
|
|
3520
|
+
crash_marker_path.unlink(missing_ok=True)
|
|
3521
|
+
return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
|
|
3522
|
+
|
|
3523
|
+
if verbose:
|
|
3524
|
+
_verbose_stderr_line(f"[increment] processing {len(scope_files)} files ({len(changed_files)} changed + {len(dependent_files)} dependents)")
|
|
3525
|
+
|
|
3526
|
+
# Step 4: Scoped deletion
|
|
3527
|
+
if verbose:
|
|
3528
|
+
_verbose_stderr_line("[increment] deleting outdated nodes and edges")
|
|
3529
|
+
_delete_file_scope(conn, scope_files)
|
|
3530
|
+
|
|
3531
|
+
# Force deletion to be applied by running a dummy query
|
|
3532
|
+
conn.execute("MATCH (s:Symbol) RETURN count(*)")
|
|
3533
|
+
|
|
3534
|
+
# Step 5: Scoped pass 1-4
|
|
3535
|
+
if verbose:
|
|
3536
|
+
_verbose_stderr_line("[increment] rebuilding scoped files (passes 1-4)")
|
|
3537
|
+
|
|
3538
|
+
tables = GraphTables()
|
|
3539
|
+
asts = pass1_parse(source_root, tables, verbose=verbose, scope_files=scope_files)
|
|
3540
|
+
|
|
3541
|
+
# Load existing types and members for cross-file resolution (only from unchanged files)
|
|
3542
|
+
_load_existing_types(conn, tables, exclude_files=scope_files)
|
|
3543
|
+
_load_existing_members(conn, tables, exclude_files=scope_files)
|
|
3544
|
+
|
|
3545
|
+
pass2_edges(tables, asts, verbose=verbose)
|
|
3546
|
+
pass3_calls(tables, asts, verbose=verbose)
|
|
3547
|
+
pass4_routes(tables, asts, source_root=source_root, verbose=verbose)
|
|
3548
|
+
|
|
3549
|
+
# Populate declares and overrides rows
|
|
3550
|
+
_populate_declares_rows(tables)
|
|
3551
|
+
_populate_overrides_rows(tables)
|
|
3552
|
+
|
|
3553
|
+
# Write scoped nodes and edges
|
|
3554
|
+
meta_chain = collect_annotation_meta_chain(str(source_root.resolve()))
|
|
3555
|
+
_scoped_write(conn, tables, project_root=source_root, meta_chain=meta_chain)
|
|
3556
|
+
|
|
3557
|
+
# Step 6: Global pass 5-6
|
|
3558
|
+
if verbose:
|
|
3559
|
+
_verbose_stderr_line("[increment] running global passes 5-6")
|
|
3560
|
+
|
|
3561
|
+
# Rebuild full tables for global pass 5-6 (pass1 populates members from scratch)
|
|
3562
|
+
tables_for_global = GraphTables()
|
|
3563
|
+
global_asts = pass1_parse(source_root, tables_for_global, verbose=verbose)
|
|
3564
|
+
|
|
3565
|
+
pass5_imperative_edges(tables_for_global, global_asts, source_root=source_root, verbose=verbose)
|
|
3566
|
+
|
|
3567
|
+
# Delete existing Client, Producer, and their edges
|
|
3568
|
+
conn.execute("MATCH (c:Client) DETACH DELETE c")
|
|
3569
|
+
conn.execute("MATCH (p:Producer) DETACH DELETE p")
|
|
3570
|
+
|
|
3571
|
+
pass6_match_edges(tables_for_global, verbose=verbose)
|
|
3572
|
+
|
|
3573
|
+
# Write Client, Producer, and cross-service edges
|
|
3574
|
+
_write_clients_producers_and_calls(conn, tables_for_global)
|
|
3575
|
+
|
|
3576
|
+
# Step 7: Update hash store and metadata
|
|
3577
|
+
if verbose:
|
|
3578
|
+
_verbose_stderr_line("[increment] updating hash store and metadata")
|
|
3579
|
+
|
|
3580
|
+
# Update hashes for processed files
|
|
3581
|
+
tracker.update(scope_files, source_root)
|
|
3582
|
+
|
|
3583
|
+
# Remove hashes for deleted files
|
|
3584
|
+
for filename in removed:
|
|
3585
|
+
if filename in tracker._hashes:
|
|
3586
|
+
del tracker._hashes[filename]
|
|
3587
|
+
|
|
3588
|
+
tracker.save()
|
|
3589
|
+
|
|
3590
|
+
# Update GraphMeta
|
|
3591
|
+
_write_meta(conn, tables_for_global, source_root)
|
|
3592
|
+
|
|
3593
|
+
# Remove crash marker
|
|
3594
|
+
crash_marker_path.unlink(missing_ok=True)
|
|
3595
|
+
|
|
3596
|
+
conn.close()
|
|
3597
|
+
|
|
3598
|
+
elapsed = time.time() - t_start
|
|
3599
|
+
if verbose:
|
|
3600
|
+
_verbose_stderr_line(f"[increment] completed in {elapsed:.2f}s")
|
|
3601
|
+
|
|
3602
|
+
return IncrementalResult(
|
|
3603
|
+
mode="incremental",
|
|
3604
|
+
files_changed=len(changed),
|
|
3605
|
+
files_added=len(added),
|
|
3606
|
+
files_removed=len(removed),
|
|
3607
|
+
dependents_reprocessed=len(dependent_files),
|
|
3608
|
+
elapsed_sec=elapsed,
|
|
3609
|
+
)
|
|
3610
|
+
|
|
3611
|
+
except Exception as e:
|
|
3612
|
+
# On error, remove crash marker and fall back to full rebuild
|
|
3613
|
+
if verbose:
|
|
3614
|
+
_verbose_stderr_line(f"[increment] error during incremental rebuild: {e}; falling back to full rebuild")
|
|
3615
|
+
conn.close()
|
|
3616
|
+
crash_marker_path.unlink(missing_ok=True)
|
|
3617
|
+
return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
|
|
3618
|
+
|
|
3619
|
+
|
|
3620
|
+
def _init_hash_tracker(source_root: Path, kuzu_path: Path) -> int:
|
|
3621
|
+
"""Initialize hash tracker for all Java files. Returns number of files hashed."""
|
|
3622
|
+
index_dir = kuzu_path.parent
|
|
3623
|
+
tracker = FileHashTracker(index_dir)
|
|
3624
|
+
tracker.load()
|
|
3625
|
+
ignore = LayeredIgnore(source_root)
|
|
3626
|
+
all_files: set[str] = set()
|
|
3627
|
+
source_root_resolved = source_root.resolve()
|
|
3628
|
+
for p in iter_java_source_files(source_root, ignore=ignore):
|
|
3629
|
+
p_resolved = p.resolve()
|
|
3630
|
+
try:
|
|
3631
|
+
rel_path = p_resolved.relative_to(source_root_resolved).as_posix()
|
|
3632
|
+
except ValueError:
|
|
3633
|
+
rel_path = p.as_posix()
|
|
3634
|
+
all_files.add(rel_path)
|
|
3635
|
+
tracker.update(all_files, source_root)
|
|
3636
|
+
tracker.save()
|
|
3637
|
+
return len(all_files)
|
|
3638
|
+
|
|
3639
|
+
|
|
3640
|
+
def _fallback_to_full(source_root: Path, kuzu_path: Path, verbose: bool, t_start: float) -> IncrementalResult:
|
|
3641
|
+
"""Fallback to full rebuild."""
|
|
3642
|
+
tables = GraphTables()
|
|
3643
|
+
asts = pass1_parse(source_root, tables, verbose=verbose)
|
|
3644
|
+
pass2_edges(tables, asts, verbose=verbose)
|
|
3645
|
+
pass3_calls(tables, asts, verbose=verbose)
|
|
3646
|
+
pass4_routes(tables, asts, source_root=source_root, verbose=verbose)
|
|
3647
|
+
pass5_imperative_edges(tables, asts, source_root=source_root, verbose=verbose)
|
|
3648
|
+
pass6_match_edges(tables, verbose=verbose)
|
|
3649
|
+
write_kuzu(kuzu_path, tables, source_root=source_root, verbose=verbose)
|
|
3650
|
+
|
|
3651
|
+
n_files = _init_hash_tracker(source_root, kuzu_path)
|
|
3652
|
+
|
|
3653
|
+
return IncrementalResult(
|
|
3654
|
+
mode="full_fallback",
|
|
3655
|
+
files_changed=0,
|
|
3656
|
+
files_added=n_files,
|
|
3657
|
+
files_removed=0,
|
|
3658
|
+
dependents_reprocessed=0,
|
|
3659
|
+
elapsed_sec=time.time() - t_start,
|
|
3660
|
+
)
|
|
3661
|
+
|
|
3662
|
+
|
|
3663
|
+
def _write_clients_producers_and_calls(conn: kuzu.Connection, tables: GraphTables) -> None:
|
|
3664
|
+
"""Write Route, Client, Producer, and cross-service edges to Kuzu.
|
|
3665
|
+
|
|
3666
|
+
Used by the incremental rebuild's global pass 5-6 step. Writes phantom
|
|
3667
|
+
Route nodes (created by pass5 for cross-service calls) that wouldn't
|
|
3668
|
+
otherwise exist in Kuzu.
|
|
3669
|
+
"""
|
|
3670
|
+
# Write phantom routes that don't already exist (pass5 creates these for cross-service calls)
|
|
3671
|
+
for row in tables.routes_rows:
|
|
3672
|
+
# MERGE to avoid duplicates with routes written during scoped step
|
|
3673
|
+
conn.execute(
|
|
3674
|
+
"MERGE (r:Route {id: $id}) "
|
|
3675
|
+
"SET r.kind = $kind, r.framework = $framework, r.method = $method, "
|
|
3676
|
+
"r.path = $path, r.path_template = $path_template, r.path_regex = $path_regex, "
|
|
3677
|
+
"r.topic = $topic, r.broker = $broker, r.feign_name = $feign_name, r.feign_url = $feign_url, "
|
|
3678
|
+
"r.microservice = $microservice, r.module = $module, r.filename = $filename, "
|
|
3679
|
+
"r.start_line = $start_line, r.end_line = $end_line, r.resolved = $resolved",
|
|
3680
|
+
asdict(row),
|
|
3681
|
+
)
|
|
3682
|
+
|
|
3683
|
+
# Build node_id lookup for members and types
|
|
3684
|
+
member_by_id = {m.node_id: m for m in tables.members}
|
|
3685
|
+
|
|
3686
|
+
# Write clients and producers using asdict (same pattern as _write_routes_and_exposes)
|
|
3687
|
+
for row in tables.client_rows:
|
|
3688
|
+
conn.execute(_CREATE_CLIENT, asdict(row))
|
|
3689
|
+
for row in tables.producer_rows:
|
|
3690
|
+
conn.execute(_CREATE_PRODUCER, asdict(row))
|
|
3691
|
+
|
|
3692
|
+
client_by_id = {c.id: c for c in tables.client_rows}
|
|
3693
|
+
producer_by_id = {p.id: p for p in tables.producer_rows}
|
|
3694
|
+
|
|
3695
|
+
# Write declares_client edges
|
|
3696
|
+
for row in tables.declares_client_rows:
|
|
3697
|
+
source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
|
|
3698
|
+
conn.execute(_CREATE_DECLARES_CLIENT, {
|
|
3699
|
+
"sid": row.symbol_id,
|
|
3700
|
+
"cid": row.client_id,
|
|
3701
|
+
"source_file": source_file,
|
|
3702
|
+
"confidence": row.confidence,
|
|
3703
|
+
"strategy": row.strategy,
|
|
3704
|
+
})
|
|
3705
|
+
|
|
3706
|
+
# Write declares_producer edges
|
|
3707
|
+
for row in tables.declares_producer_rows:
|
|
3708
|
+
source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
|
|
3709
|
+
conn.execute(_CREATE_DECLARES_PRODUCER, {
|
|
3710
|
+
"sid": row.symbol_id,
|
|
3711
|
+
"pid": row.producer_id,
|
|
3712
|
+
"source_file": source_file,
|
|
3713
|
+
"confidence": row.confidence,
|
|
3714
|
+
"strategy": row.strategy,
|
|
3715
|
+
})
|
|
3716
|
+
|
|
3717
|
+
# Write HTTP_CALLS edges
|
|
3718
|
+
for row in tables.http_call_rows:
|
|
3719
|
+
client = client_by_id.get(row.client_id)
|
|
3720
|
+
conn.execute(_CREATE_HTTP_CALL, {
|
|
3721
|
+
"cid": row.client_id,
|
|
3722
|
+
"rid": row.route_id,
|
|
3723
|
+
"source_file": client.filename if client else "",
|
|
3724
|
+
"confidence": row.confidence,
|
|
3725
|
+
"strategy": row.strategy,
|
|
3726
|
+
"method_call": row.method_call,
|
|
3727
|
+
"raw_uri": row.raw_uri,
|
|
3728
|
+
"match": row.match,
|
|
3729
|
+
})
|
|
3730
|
+
|
|
3731
|
+
# Write ASYNC_CALLS edges
|
|
3732
|
+
for row in tables.async_call_rows:
|
|
3733
|
+
producer = producer_by_id.get(row.producer_id)
|
|
3734
|
+
conn.execute(_CREATE_ASYNC_CALL, {
|
|
3735
|
+
"pid": row.producer_id,
|
|
3736
|
+
"rid": row.route_id,
|
|
3737
|
+
"source_file": producer.filename if producer else "",
|
|
3738
|
+
"confidence": row.confidence,
|
|
3739
|
+
"strategy": row.strategy,
|
|
3740
|
+
"direction": row.direction,
|
|
3741
|
+
"raw_topic": row.raw_topic,
|
|
3742
|
+
"match": row.match,
|
|
3743
|
+
})
|
|
3744
|
+
|
|
3745
|
+
|
|
2993
3746
|
def write_kuzu(
|
|
2994
3747
|
db_path: Path,
|
|
2995
3748
|
tables: GraphTables,
|
|
@@ -3022,11 +3775,12 @@ def write_kuzu(
|
|
|
3022
3775
|
_populate_declares_rows(tables)
|
|
3023
3776
|
_populate_overrides_rows(tables)
|
|
3024
3777
|
t1 = time.time()
|
|
3025
|
-
|
|
3778
|
+
_fbyid = _build_file_by_node_id(tables)
|
|
3779
|
+
_write_edges(conn, tables, _fbyid)
|
|
3026
3780
|
if verbose:
|
|
3027
3781
|
_verbose_stderr_line(f"[graph] writing · edges written in {time.time() - t1:.2f}s")
|
|
3028
3782
|
t2 = time.time()
|
|
3029
|
-
_write_routes_and_exposes(conn, tables)
|
|
3783
|
+
_write_routes_and_exposes(conn, tables, _fbyid)
|
|
3030
3784
|
if verbose:
|
|
3031
3785
|
_verbose_stderr_line(f"[graph] writing · routes/exposes written in {time.time() - t2:.2f}s")
|
|
3032
3786
|
_write_meta(conn, tables, source_root)
|
|
@@ -3055,6 +3809,7 @@ def main() -> int:
|
|
|
3055
3809
|
),
|
|
3056
3810
|
)
|
|
3057
3811
|
parser.add_argument("--verbose", action="store_true")
|
|
3812
|
+
parser.add_argument("--incremental", action="store_true", help="Run incremental rebuild instead of full rebuild")
|
|
3058
3813
|
args = parser.parse_args()
|
|
3059
3814
|
|
|
3060
3815
|
root = Path(args.source_root).expanduser().resolve() if args.source_root else Path.cwd().resolve()
|
|
@@ -3064,6 +3819,20 @@ def main() -> int:
|
|
|
3064
3819
|
|
|
3065
3820
|
kuzu_path = Path(args.kuzu_path).expanduser() if args.kuzu_path else _default_kuzu_path()
|
|
3066
3821
|
|
|
3822
|
+
if args.incremental:
|
|
3823
|
+
result = incremental_rebuild(root, kuzu_path, verbose=args.verbose)
|
|
3824
|
+
print(json.dumps({
|
|
3825
|
+
"mode": result.mode,
|
|
3826
|
+
"files_changed": result.files_changed,
|
|
3827
|
+
"files_added": result.files_added,
|
|
3828
|
+
"files_removed": result.files_removed,
|
|
3829
|
+
"dependents_reprocessed": result.dependents_reprocessed,
|
|
3830
|
+
"elapsed_sec": result.elapsed_sec,
|
|
3831
|
+
}))
|
|
3832
|
+
if args.verbose:
|
|
3833
|
+
_verbose_stderr_line(f"[graph] done · mode={result.mode} files_changed={result.files_changed} files_added={result.files_added} files_removed={result.files_removed} dependents={result.dependents_reprocessed} elapsed={result.elapsed_sec:.2f}s")
|
|
3834
|
+
return 0
|
|
3835
|
+
|
|
3067
3836
|
tables = GraphTables()
|
|
3068
3837
|
asts = pass1_parse(root, tables, verbose=args.verbose)
|
|
3069
3838
|
pass2_edges(tables, asts, verbose=args.verbose)
|