@pmaddire/gcie 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +256 -0
- package/AGENT_USAGE.md +231 -0
- package/ARCHITECTURE.md +151 -0
- package/CLAUDE.md +69 -0
- package/DEBUGGING_PLAYBOOK.md +160 -0
- package/KNOWLEDGE_INDEX.md +154 -0
- package/POTENTIAL_UPDATES +130 -0
- package/PROJECT.md +141 -0
- package/README.md +371 -0
- package/REPO_DIGITAL_TWIN.md +98 -0
- package/ROADMAP.md +301 -0
- package/SETUP_ANY_REPO.md +85 -0
- package/bin/gcie-init.js +20 -0
- package/bin/gcie.js +45 -0
- package/cli/__init__.py +1 -0
- package/cli/app.py +163 -0
- package/cli/commands/__init__.py +1 -0
- package/cli/commands/cache.py +35 -0
- package/cli/commands/context.py +2426 -0
- package/cli/commands/context_slices.py +617 -0
- package/cli/commands/debug.py +24 -0
- package/cli/commands/index.py +17 -0
- package/cli/commands/query.py +20 -0
- package/cli/commands/setup.py +73 -0
- package/config/__init__.py +1 -0
- package/config/scanner_config.py +82 -0
- package/context/__init__.py +1 -0
- package/context/architecture_bootstrap.py +170 -0
- package/context/architecture_index.py +185 -0
- package/context/architecture_parser.py +170 -0
- package/context/architecture_slicer.py +308 -0
- package/context/context_router.py +70 -0
- package/context/fallback_evaluator.py +21 -0
- package/coverage_integration/__init__.py +1 -0
- package/coverage_integration/coverage_loader.py +55 -0
- package/debugging/__init__.py +12 -0
- package/debugging/bug_localizer.py +81 -0
- package/debugging/execution_path_analyzer.py +42 -0
- package/embeddings/__init__.py +6 -0
- package/embeddings/encoder.py +45 -0
- package/embeddings/faiss_index.py +72 -0
- package/git_integration/__init__.py +1 -0
- package/git_integration/git_miner.py +78 -0
- package/graphs/__init__.py +17 -0
- package/graphs/call_graph.py +70 -0
- package/graphs/code_graph.py +81 -0
- package/graphs/execution_graph.py +35 -0
- package/graphs/git_graph.py +43 -0
- package/graphs/graph_store.py +25 -0
- package/graphs/node_factory.py +21 -0
- package/graphs/test_graph.py +65 -0
- package/graphs/validators.py +28 -0
- package/graphs/variable_graph.py +51 -0
- package/knowledge_index/__init__.py +1 -0
- package/knowledge_index/index_builder.py +60 -0
- package/knowledge_index/models.py +35 -0
- package/knowledge_index/query_api.py +38 -0
- package/knowledge_index/store.py +23 -0
- package/llm_context/__init__.py +6 -0
- package/llm_context/context_builder.py +67 -0
- package/llm_context/snippet_selector.py +57 -0
- package/package.json +14 -0
- package/parser/__init__.py +18 -0
- package/parser/ast_parser.py +216 -0
- package/parser/call_resolver.py +52 -0
- package/parser/models.py +75 -0
- package/parser/tree_sitter_adapter.py +56 -0
- package/parser/variable_extractor.py +31 -0
- package/retrieval/__init__.py +17 -0
- package/retrieval/cache.py +22 -0
- package/retrieval/hybrid_retriever.py +249 -0
- package/retrieval/query_parser.py +38 -0
- package/retrieval/ranking.py +43 -0
- package/retrieval/semantic_retriever.py +39 -0
- package/retrieval/symbolic_retriever.py +80 -0
- package/scanner/__init__.py +5 -0
- package/scanner/file_filters.py +37 -0
- package/scanner/models.py +44 -0
- package/scanner/repository_scanner.py +55 -0
- package/scripts/bootstrap_from_github.ps1 +41 -0
- package/tracing/__init__.py +1 -0
- package/tracing/runtime_tracer.py +60 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""FAISS-compatible vector index with in-memory fallback."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True, slots=True)
|
|
10
|
+
class SearchHit:
|
|
11
|
+
idx: int
|
|
12
|
+
score: float
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _dot(a: list[float], b: list[float]) -> float:
|
|
16
|
+
return sum(x * y for x, y in zip(a, b))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class VectorIndex:
|
|
20
|
+
"""Vector index that prefers FAISS and falls back to brute-force cosine."""
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self._faiss = None
|
|
24
|
+
self._index = None
|
|
25
|
+
self._vectors: list[list[float]] = []
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
import faiss # type: ignore
|
|
29
|
+
|
|
30
|
+
self._faiss = faiss
|
|
31
|
+
except Exception:
|
|
32
|
+
self._faiss = None
|
|
33
|
+
|
|
34
|
+
def add(self, vectors: list[list[float]]) -> None:
|
|
35
|
+
if not vectors:
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
if self._faiss is None:
|
|
39
|
+
self._vectors.extend(vectors)
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
import numpy as np
|
|
43
|
+
|
|
44
|
+
arr = np.array(vectors, dtype="float32")
|
|
45
|
+
if self._index is None:
|
|
46
|
+
dim = arr.shape[1]
|
|
47
|
+
self._index = self._faiss.IndexFlatIP(dim)
|
|
48
|
+
self._index.add(arr)
|
|
49
|
+
|
|
50
|
+
def search(self, query: list[float], top_k: int = 5) -> tuple[SearchHit, ...]:
|
|
51
|
+
if top_k <= 0:
|
|
52
|
+
return ()
|
|
53
|
+
|
|
54
|
+
if self._faiss is None:
|
|
55
|
+
scored = [SearchHit(idx=i, score=_dot(query, vec)) for i, vec in enumerate(self._vectors)]
|
|
56
|
+
scored.sort(key=lambda h: h.score, reverse=True)
|
|
57
|
+
return tuple(scored[:top_k])
|
|
58
|
+
|
|
59
|
+
import numpy as np
|
|
60
|
+
|
|
61
|
+
if self._index is None or self._index.ntotal == 0:
|
|
62
|
+
return ()
|
|
63
|
+
|
|
64
|
+
q = np.array([query], dtype="float32")
|
|
65
|
+
scores, indices = self._index.search(q, top_k)
|
|
66
|
+
|
|
67
|
+
hits: list[SearchHit] = []
|
|
68
|
+
for score, idx in zip(scores[0], indices[0]):
|
|
69
|
+
if idx < 0:
|
|
70
|
+
continue
|
|
71
|
+
hits.append(SearchHit(idx=int(idx), score=float(score)))
|
|
72
|
+
return tuple(hits)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Git integration package."""
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Git history miner using GitPython."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from git import Repo
|
|
9
|
+
from git.exc import GitCommandError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True, slots=True)
|
|
13
|
+
class FileChange:
|
|
14
|
+
"""Single file-level change in a commit."""
|
|
15
|
+
|
|
16
|
+
path: str
|
|
17
|
+
change_type: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True, slots=True)
|
|
21
|
+
class CommitRecord:
|
|
22
|
+
"""Normalized commit metadata."""
|
|
23
|
+
|
|
24
|
+
hexsha: str
|
|
25
|
+
author: str
|
|
26
|
+
committed_date: int
|
|
27
|
+
summary: str
|
|
28
|
+
files: tuple[FileChange, ...]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def mine_commit_history(repo_path: str | Path, *, max_count: int | None = None) -> tuple[CommitRecord, ...]:
|
|
32
|
+
"""Return normalized commit history from a git repository."""
|
|
33
|
+
repo = Repo(Path(repo_path))
|
|
34
|
+
if repo.bare:
|
|
35
|
+
repo.close()
|
|
36
|
+
return ()
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
try:
|
|
40
|
+
commits = list(repo.iter_commits("HEAD", max_count=max_count))
|
|
41
|
+
except (ValueError, GitCommandError):
|
|
42
|
+
# No commits/HEAD yet.
|
|
43
|
+
return ()
|
|
44
|
+
|
|
45
|
+
out: list[CommitRecord] = []
|
|
46
|
+
|
|
47
|
+
for commit in commits:
|
|
48
|
+
file_changes: list[FileChange] = []
|
|
49
|
+
|
|
50
|
+
if commit.parents:
|
|
51
|
+
parent = commit.parents[0]
|
|
52
|
+
diffs = parent.diff(commit, create_patch=False)
|
|
53
|
+
for diff in diffs:
|
|
54
|
+
new_path = diff.b_path or diff.a_path or ""
|
|
55
|
+
file_changes.append(
|
|
56
|
+
FileChange(
|
|
57
|
+
path=new_path,
|
|
58
|
+
change_type=(diff.change_type or "M").upper(),
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
for path in commit.stats.files.keys():
|
|
63
|
+
file_changes.append(FileChange(path=path, change_type="A"))
|
|
64
|
+
|
|
65
|
+
out.append(
|
|
66
|
+
CommitRecord(
|
|
67
|
+
hexsha=commit.hexsha,
|
|
68
|
+
author=str(commit.author),
|
|
69
|
+
committed_date=int(commit.committed_date),
|
|
70
|
+
summary=commit.summary,
|
|
71
|
+
files=tuple(file_changes),
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return tuple(out)
|
|
76
|
+
finally:
|
|
77
|
+
repo.git.clear_cache()
|
|
78
|
+
repo.close()
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Graph package for GCIE."""
|
|
2
|
+
|
|
3
|
+
from .call_graph import build_call_graph
|
|
4
|
+
from .code_graph import build_code_structure_graph
|
|
5
|
+
from .execution_graph import build_execution_graph
|
|
6
|
+
from .git_graph import build_git_graph
|
|
7
|
+
from .test_graph import build_test_coverage_graph
|
|
8
|
+
from .variable_graph import build_variable_graph
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"build_call_graph",
|
|
12
|
+
"build_code_structure_graph",
|
|
13
|
+
"build_execution_graph",
|
|
14
|
+
"build_git_graph",
|
|
15
|
+
"build_test_coverage_graph",
|
|
16
|
+
"build_variable_graph",
|
|
17
|
+
]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Call graph builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
|
|
10
|
+
from parser.call_resolver import resolve_calls
|
|
11
|
+
from parser.models import ModuleParseResult
|
|
12
|
+
|
|
13
|
+
from .node_factory import function_node_id
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _normalize_path(file_path: Path, root: Path | None) -> Path:
|
|
17
|
+
if root is None:
|
|
18
|
+
return file_path
|
|
19
|
+
try:
|
|
20
|
+
return file_path.resolve().relative_to(root.resolve())
|
|
21
|
+
except ValueError:
|
|
22
|
+
return file_path
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_call_graph(modules: Iterable[ModuleParseResult], *, root: Path | None = None) -> nx.DiGraph:
|
|
26
|
+
"""Build caller-callee graph with unresolved external call nodes preserved."""
|
|
27
|
+
graph = nx.DiGraph()
|
|
28
|
+
|
|
29
|
+
module_list = list(modules)
|
|
30
|
+
for module in module_list:
|
|
31
|
+
rel_file = _normalize_path(module.file, root)
|
|
32
|
+
for fn in module.functions:
|
|
33
|
+
node_id = function_node_id(rel_file, fn.name)
|
|
34
|
+
graph.add_node(
|
|
35
|
+
node_id,
|
|
36
|
+
type="function",
|
|
37
|
+
label=fn.name,
|
|
38
|
+
file=rel_file.as_posix(),
|
|
39
|
+
qualified_name=f"{rel_file.as_posix()}::{fn.name}",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
local_name_to_node: dict[tuple[str, str], str] = {}
|
|
43
|
+
for module in module_list:
|
|
44
|
+
rel_file = _normalize_path(module.file, root)
|
|
45
|
+
for fn in module.functions:
|
|
46
|
+
local_name_to_node[(rel_file.as_posix(), fn.name)] = function_node_id(rel_file, fn.name)
|
|
47
|
+
|
|
48
|
+
for module in module_list:
|
|
49
|
+
rel_file = _normalize_path(module.file, root)
|
|
50
|
+
for resolved in resolve_calls(module):
|
|
51
|
+
caller_id = local_name_to_node[(rel_file.as_posix(), resolved.caller)]
|
|
52
|
+
|
|
53
|
+
if resolved.resolved:
|
|
54
|
+
callee_id = local_name_to_node.get((rel_file.as_posix(), resolved.callee))
|
|
55
|
+
if callee_id is None:
|
|
56
|
+
# Should not happen for local resolution, but keep graph robust.
|
|
57
|
+
callee_id = f"external:{resolved.callee}"
|
|
58
|
+
graph.add_node(callee_id, type="external_function", label=resolved.callee)
|
|
59
|
+
else:
|
|
60
|
+
callee_id = f"external:{resolved.callee}"
|
|
61
|
+
graph.add_node(callee_id, type="external_function", label=resolved.callee)
|
|
62
|
+
|
|
63
|
+
graph.add_edge(
|
|
64
|
+
caller_id,
|
|
65
|
+
callee_id,
|
|
66
|
+
type="CALLS",
|
|
67
|
+
resolved=resolved.resolved,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return graph
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Code structure graph builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
|
|
10
|
+
from parser.models import ModuleParseResult
|
|
11
|
+
|
|
12
|
+
from .node_factory import (
|
|
13
|
+
class_node_id,
|
|
14
|
+
file_node_id,
|
|
15
|
+
function_node_id,
|
|
16
|
+
module_node_id,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _normalize_path(file_path: Path, root: Path | None) -> Path:
|
|
21
|
+
if root is None:
|
|
22
|
+
return file_path
|
|
23
|
+
try:
|
|
24
|
+
return file_path.resolve().relative_to(root.resolve())
|
|
25
|
+
except ValueError:
|
|
26
|
+
return file_path
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_code_structure_graph(
|
|
30
|
+
modules: Iterable[ModuleParseResult],
|
|
31
|
+
*,
|
|
32
|
+
root: Path | None = None,
|
|
33
|
+
) -> nx.DiGraph:
|
|
34
|
+
"""Build file/class/function/import structural graph from parsed modules."""
|
|
35
|
+
graph = nx.DiGraph()
|
|
36
|
+
|
|
37
|
+
for module in modules:
|
|
38
|
+
rel_file = _normalize_path(module.file, root)
|
|
39
|
+
file_id = file_node_id(rel_file)
|
|
40
|
+
graph.add_node(
|
|
41
|
+
file_id,
|
|
42
|
+
type="file",
|
|
43
|
+
label=rel_file.as_posix(),
|
|
44
|
+
path=rel_file.as_posix(),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
for cls in module.classes:
|
|
48
|
+
class_id = class_node_id(rel_file, cls.name)
|
|
49
|
+
graph.add_node(
|
|
50
|
+
class_id,
|
|
51
|
+
type="class",
|
|
52
|
+
label=cls.name,
|
|
53
|
+
file=rel_file.as_posix(),
|
|
54
|
+
start_line=cls.start_line,
|
|
55
|
+
end_line=cls.end_line,
|
|
56
|
+
)
|
|
57
|
+
graph.add_edge(file_id, class_id, type="DEFINES")
|
|
58
|
+
graph.add_edge(file_id, class_id, type="CONTAINS")
|
|
59
|
+
|
|
60
|
+
for fn in module.functions:
|
|
61
|
+
function_id = function_node_id(rel_file, fn.name)
|
|
62
|
+
graph.add_node(
|
|
63
|
+
function_id,
|
|
64
|
+
type="function",
|
|
65
|
+
label=fn.name,
|
|
66
|
+
file=rel_file.as_posix(),
|
|
67
|
+
start_line=fn.start_line,
|
|
68
|
+
end_line=fn.end_line,
|
|
69
|
+
)
|
|
70
|
+
graph.add_edge(file_id, function_id, type="DEFINES")
|
|
71
|
+
graph.add_edge(file_id, function_id, type="CONTAINS")
|
|
72
|
+
|
|
73
|
+
for imp in module.imports:
|
|
74
|
+
import_targets = imp.names if imp.names else ((imp.module,) if imp.module else ())
|
|
75
|
+
for symbol in import_targets:
|
|
76
|
+
module_name = f"{imp.module}.{symbol}" if imp.module else symbol
|
|
77
|
+
module_id = module_node_id(module_name)
|
|
78
|
+
graph.add_node(module_id, type="module", label=module_name)
|
|
79
|
+
graph.add_edge(file_id, module_id, type="IMPORTS")
|
|
80
|
+
|
|
81
|
+
return graph
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Execution trace graph builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
|
|
7
|
+
import networkx as nx
|
|
8
|
+
|
|
9
|
+
from tracing.runtime_tracer import TraceEvent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_execution_graph(events: Iterable[TraceEvent]) -> nx.DiGraph:
|
|
13
|
+
"""Build ordered runtime execution graph from trace events."""
|
|
14
|
+
graph = nx.DiGraph()
|
|
15
|
+
ordered = list(events)
|
|
16
|
+
|
|
17
|
+
for idx, event in enumerate(ordered):
|
|
18
|
+
node_id = f"event:{idx}:{event.function_name}:{event.event}"
|
|
19
|
+
graph.add_node(
|
|
20
|
+
node_id,
|
|
21
|
+
type="execution_event",
|
|
22
|
+
label=f"{event.function_name}:{event.event}",
|
|
23
|
+
function=event.function_name,
|
|
24
|
+
event=event.event,
|
|
25
|
+
file=event.file_path,
|
|
26
|
+
line=event.line_no,
|
|
27
|
+
timestamp=event.timestamp,
|
|
28
|
+
order=idx,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if idx > 0:
|
|
32
|
+
prev = f"event:{idx - 1}:{ordered[idx - 1].function_name}:{ordered[idx - 1].event}"
|
|
33
|
+
graph.add_edge(prev, node_id, type="EXECUTES")
|
|
34
|
+
|
|
35
|
+
return graph
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Git history graph builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
|
|
10
|
+
from git_integration.git_miner import CommitRecord
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _commit_node_id(hexsha: str) -> str:
|
|
14
|
+
return f"commit:{hexsha}"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _file_node_id(path: str) -> str:
|
|
18
|
+
return f"file:{Path(path).as_posix()}"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_git_graph(records: Iterable[CommitRecord]) -> nx.DiGraph:
|
|
22
|
+
"""Build commit-file change graph using CHANGED_IN edges."""
|
|
23
|
+
graph = nx.DiGraph()
|
|
24
|
+
|
|
25
|
+
for record in records:
|
|
26
|
+
commit_id = _commit_node_id(record.hexsha)
|
|
27
|
+
graph.add_node(
|
|
28
|
+
commit_id,
|
|
29
|
+
type="commit",
|
|
30
|
+
label=record.hexsha[:10],
|
|
31
|
+
author=record.author,
|
|
32
|
+
committed_date=record.committed_date,
|
|
33
|
+
summary=record.summary,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
for change in record.files:
|
|
37
|
+
if not change.path:
|
|
38
|
+
continue
|
|
39
|
+
file_id = _file_node_id(change.path)
|
|
40
|
+
graph.add_node(file_id, type="file", label=Path(change.path).as_posix(), path=Path(change.path).as_posix())
|
|
41
|
+
graph.add_edge(file_id, commit_id, type="CHANGED_IN", change_type=change.change_type)
|
|
42
|
+
|
|
43
|
+
return graph
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Graph storage utilities for incremental workflows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
import networkx as nx
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class GraphStore:
|
|
12
|
+
_snapshots: dict[str, nx.DiGraph]
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
self._snapshots = {}
|
|
16
|
+
|
|
17
|
+
def put(self, key: str, graph: nx.DiGraph) -> None:
|
|
18
|
+
self._snapshots[key] = graph.copy()
|
|
19
|
+
|
|
20
|
+
def get(self, key: str) -> nx.DiGraph | None:
|
|
21
|
+
graph = self._snapshots.get(key)
|
|
22
|
+
return None if graph is None else graph.copy()
|
|
23
|
+
|
|
24
|
+
def keys(self) -> tuple[str, ...]:
|
|
25
|
+
return tuple(sorted(self._snapshots.keys()))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Node identity and attribute helpers for graph construction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def file_node_id(path: Path) -> str:
|
|
9
|
+
return f"file:{path.as_posix()}"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def class_node_id(file_path: Path, class_name: str) -> str:
|
|
13
|
+
return f"class:{file_path.as_posix()}::{class_name}"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def function_node_id(file_path: Path, function_name: str) -> str:
|
|
17
|
+
return f"function:{file_path.as_posix()}::{function_name}"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def module_node_id(module_name: str) -> str:
|
|
21
|
+
return f"module:{module_name}"
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Test coverage graph builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
|
|
10
|
+
from coverage_integration.coverage_loader import CoverageReport
|
|
11
|
+
from parser.models import ModuleParseResult
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _test_node_id(test_name: str) -> str:
|
|
15
|
+
return f"test:{test_name}"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _file_node_id(path: str) -> str:
|
|
19
|
+
return f"file:{Path(path).as_posix()}"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _function_node_id(path: str, fn_name: str) -> str:
|
|
23
|
+
return f"function:{Path(path).as_posix()}::{fn_name}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def build_test_coverage_graph(
|
|
27
|
+
report: CoverageReport,
|
|
28
|
+
*,
|
|
29
|
+
test_name: str,
|
|
30
|
+
parsed_modules: Iterable[ModuleParseResult] = (),
|
|
31
|
+
) -> nx.DiGraph:
|
|
32
|
+
"""Build coverage graph linking tests to covered files/functions."""
|
|
33
|
+
graph = nx.DiGraph()
|
|
34
|
+
|
|
35
|
+
test_id = _test_node_id(test_name)
|
|
36
|
+
graph.add_node(test_id, type="test", label=test_name)
|
|
37
|
+
|
|
38
|
+
module_map = {Path(m.file).as_posix(): m for m in parsed_modules}
|
|
39
|
+
|
|
40
|
+
for rec in report.files:
|
|
41
|
+
file_id = _file_node_id(rec.path)
|
|
42
|
+
graph.add_node(
|
|
43
|
+
file_id,
|
|
44
|
+
type="file",
|
|
45
|
+
label=rec.path,
|
|
46
|
+
coverage_percent=rec.percent_covered,
|
|
47
|
+
num_statements=rec.num_statements,
|
|
48
|
+
num_branches=rec.num_branches,
|
|
49
|
+
num_partial_branches=rec.num_partial_branches,
|
|
50
|
+
)
|
|
51
|
+
graph.add_edge(test_id, file_id, type="COVERED_BY")
|
|
52
|
+
|
|
53
|
+
mod = module_map.get(rec.path)
|
|
54
|
+
if mod is None:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
executed = set(rec.executed_lines)
|
|
58
|
+
for fn in mod.functions:
|
|
59
|
+
line_span = set(range(fn.start_line, fn.end_line + 1))
|
|
60
|
+
if executed.intersection(line_span):
|
|
61
|
+
fn_id = _function_node_id(rec.path, fn.name)
|
|
62
|
+
graph.add_node(fn_id, type="function", label=fn.name, file=rec.path)
|
|
63
|
+
graph.add_edge(test_id, fn_id, type="COVERED_BY")
|
|
64
|
+
|
|
65
|
+
return graph
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Graph validation helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import networkx as nx
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
REQUIRED_NODE_ATTRS = {"type", "label"}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def validate_graph_integrity(graph: nx.DiGraph) -> list[str]:
|
|
12
|
+
"""Return a list of graph integrity errors, empty when valid."""
|
|
13
|
+
errors: list[str] = []
|
|
14
|
+
|
|
15
|
+
for node, attrs in graph.nodes(data=True):
|
|
16
|
+
missing = REQUIRED_NODE_ATTRS.difference(attrs.keys())
|
|
17
|
+
if missing:
|
|
18
|
+
errors.append(f"node {node} missing attrs: {', '.join(sorted(missing))}")
|
|
19
|
+
|
|
20
|
+
for source, target, attrs in graph.edges(data=True):
|
|
21
|
+
if source not in graph.nodes:
|
|
22
|
+
errors.append(f"edge source missing: {source}")
|
|
23
|
+
if target not in graph.nodes:
|
|
24
|
+
errors.append(f"edge target missing: {target}")
|
|
25
|
+
if "type" not in attrs:
|
|
26
|
+
errors.append(f"edge {source}->{target} missing type")
|
|
27
|
+
|
|
28
|
+
return errors
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Variable dependency graph builder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
|
|
10
|
+
from parser.models import ModuleParseResult
|
|
11
|
+
from parser.variable_extractor import extract_variable_dependencies
|
|
12
|
+
|
|
13
|
+
from .node_factory import function_node_id
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _normalize_path(file_path: Path, root: Path | None) -> Path:
|
|
17
|
+
if root is None:
|
|
18
|
+
return file_path
|
|
19
|
+
try:
|
|
20
|
+
return file_path.resolve().relative_to(root.resolve())
|
|
21
|
+
except ValueError:
|
|
22
|
+
return file_path
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _variable_node_id(name: str) -> str:
|
|
26
|
+
return f"variable:{name}"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_variable_graph(modules: Iterable[ModuleParseResult], *, root: Path | None = None) -> nx.DiGraph:
|
|
30
|
+
"""Build function-variable dependency graph with READS/WRITES/MODIFIES edges."""
|
|
31
|
+
graph = nx.DiGraph()
|
|
32
|
+
|
|
33
|
+
for module in modules:
|
|
34
|
+
rel_file = _normalize_path(module.file, root)
|
|
35
|
+
|
|
36
|
+
for fn in module.functions:
|
|
37
|
+
fn_id = function_node_id(rel_file, fn.name)
|
|
38
|
+
graph.add_node(
|
|
39
|
+
fn_id,
|
|
40
|
+
type="function",
|
|
41
|
+
label=fn.name,
|
|
42
|
+
file=rel_file.as_posix(),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
for dep in extract_variable_dependencies(module):
|
|
46
|
+
fn_id = function_node_id(rel_file, dep.function_name)
|
|
47
|
+
var_id = _variable_node_id(dep.variable_name)
|
|
48
|
+
graph.add_node(var_id, type="variable", label=dep.variable_name)
|
|
49
|
+
graph.add_edge(fn_id, var_id, type=dep.access_type)
|
|
50
|
+
|
|
51
|
+
return graph
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Knowledge index package."""
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Build knowledge index from parsed modules."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from parser.models import ModuleParseResult
|
|
9
|
+
|
|
10
|
+
from .models import ClassIndexEntry, FileIndexEntry, FunctionIndexEntry
|
|
11
|
+
from .store import InMemoryKnowledgeStore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_knowledge_index(modules: Iterable[ModuleParseResult]) -> InMemoryKnowledgeStore:
|
|
15
|
+
"""Build in-memory knowledge index from parse outputs."""
|
|
16
|
+
store = InMemoryKnowledgeStore()
|
|
17
|
+
|
|
18
|
+
for module in modules:
|
|
19
|
+
file_path = Path(module.file).as_posix()
|
|
20
|
+
imports = tuple(sorted({
|
|
21
|
+
f"{imp.module}.{name}" if imp.module else name
|
|
22
|
+
for imp in module.imports
|
|
23
|
+
for name in (imp.names or ())
|
|
24
|
+
}))
|
|
25
|
+
|
|
26
|
+
file_entry = FileIndexEntry(
|
|
27
|
+
path=file_path,
|
|
28
|
+
imports=imports,
|
|
29
|
+
classes_defined=tuple(sorted(cls.name for cls in module.classes)),
|
|
30
|
+
functions_defined=tuple(sorted(fn.name for fn in module.functions)),
|
|
31
|
+
)
|
|
32
|
+
store.add_file(file_entry)
|
|
33
|
+
|
|
34
|
+
for cls in module.classes:
|
|
35
|
+
store.add_class(
|
|
36
|
+
ClassIndexEntry(
|
|
37
|
+
name=cls.name,
|
|
38
|
+
file=file_path,
|
|
39
|
+
methods=cls.methods,
|
|
40
|
+
attributes=cls.attributes,
|
|
41
|
+
base_classes=cls.base_classes,
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
for fn in module.functions:
|
|
46
|
+
store.add_function(
|
|
47
|
+
FunctionIndexEntry(
|
|
48
|
+
name=fn.name,
|
|
49
|
+
file=file_path,
|
|
50
|
+
start_line=fn.start_line,
|
|
51
|
+
end_line=fn.end_line,
|
|
52
|
+
parameters=fn.parameters,
|
|
53
|
+
variables_read=fn.variables_read,
|
|
54
|
+
variables_written=fn.variables_written,
|
|
55
|
+
functions_called=fn.functions_called,
|
|
56
|
+
docstring=fn.docstring,
|
|
57
|
+
)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return store
|