@pmaddire/gcie 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/AGENT.md +256 -0
  2. package/AGENT_USAGE.md +231 -0
  3. package/ARCHITECTURE.md +151 -0
  4. package/CLAUDE.md +69 -0
  5. package/DEBUGGING_PLAYBOOK.md +160 -0
  6. package/KNOWLEDGE_INDEX.md +154 -0
  7. package/POTENTIAL_UPDATES +130 -0
  8. package/PROJECT.md +141 -0
  9. package/README.md +371 -0
  10. package/REPO_DIGITAL_TWIN.md +98 -0
  11. package/ROADMAP.md +301 -0
  12. package/SETUP_ANY_REPO.md +85 -0
  13. package/bin/gcie-init.js +20 -0
  14. package/bin/gcie.js +45 -0
  15. package/cli/__init__.py +1 -0
  16. package/cli/app.py +163 -0
  17. package/cli/commands/__init__.py +1 -0
  18. package/cli/commands/cache.py +35 -0
  19. package/cli/commands/context.py +2426 -0
  20. package/cli/commands/context_slices.py +617 -0
  21. package/cli/commands/debug.py +24 -0
  22. package/cli/commands/index.py +17 -0
  23. package/cli/commands/query.py +20 -0
  24. package/cli/commands/setup.py +73 -0
  25. package/config/__init__.py +1 -0
  26. package/config/scanner_config.py +82 -0
  27. package/context/__init__.py +1 -0
  28. package/context/architecture_bootstrap.py +170 -0
  29. package/context/architecture_index.py +185 -0
  30. package/context/architecture_parser.py +170 -0
  31. package/context/architecture_slicer.py +308 -0
  32. package/context/context_router.py +70 -0
  33. package/context/fallback_evaluator.py +21 -0
  34. package/coverage_integration/__init__.py +1 -0
  35. package/coverage_integration/coverage_loader.py +55 -0
  36. package/debugging/__init__.py +12 -0
  37. package/debugging/bug_localizer.py +81 -0
  38. package/debugging/execution_path_analyzer.py +42 -0
  39. package/embeddings/__init__.py +6 -0
  40. package/embeddings/encoder.py +45 -0
  41. package/embeddings/faiss_index.py +72 -0
  42. package/git_integration/__init__.py +1 -0
  43. package/git_integration/git_miner.py +78 -0
  44. package/graphs/__init__.py +17 -0
  45. package/graphs/call_graph.py +70 -0
  46. package/graphs/code_graph.py +81 -0
  47. package/graphs/execution_graph.py +35 -0
  48. package/graphs/git_graph.py +43 -0
  49. package/graphs/graph_store.py +25 -0
  50. package/graphs/node_factory.py +21 -0
  51. package/graphs/test_graph.py +65 -0
  52. package/graphs/validators.py +28 -0
  53. package/graphs/variable_graph.py +51 -0
  54. package/knowledge_index/__init__.py +1 -0
  55. package/knowledge_index/index_builder.py +60 -0
  56. package/knowledge_index/models.py +35 -0
  57. package/knowledge_index/query_api.py +38 -0
  58. package/knowledge_index/store.py +23 -0
  59. package/llm_context/__init__.py +6 -0
  60. package/llm_context/context_builder.py +67 -0
  61. package/llm_context/snippet_selector.py +57 -0
  62. package/package.json +14 -0
  63. package/parser/__init__.py +18 -0
  64. package/parser/ast_parser.py +216 -0
  65. package/parser/call_resolver.py +52 -0
  66. package/parser/models.py +75 -0
  67. package/parser/tree_sitter_adapter.py +56 -0
  68. package/parser/variable_extractor.py +31 -0
  69. package/retrieval/__init__.py +17 -0
  70. package/retrieval/cache.py +22 -0
  71. package/retrieval/hybrid_retriever.py +249 -0
  72. package/retrieval/query_parser.py +38 -0
  73. package/retrieval/ranking.py +43 -0
  74. package/retrieval/semantic_retriever.py +39 -0
  75. package/retrieval/symbolic_retriever.py +80 -0
  76. package/scanner/__init__.py +5 -0
  77. package/scanner/file_filters.py +37 -0
  78. package/scanner/models.py +44 -0
  79. package/scanner/repository_scanner.py +55 -0
  80. package/scripts/bootstrap_from_github.ps1 +41 -0
  81. package/tracing/__init__.py +1 -0
  82. package/tracing/runtime_tracer.py +60 -0
@@ -0,0 +1,72 @@
1
+ """FAISS-compatible vector index with in-memory fallback."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass(frozen=True, slots=True)
10
+ class SearchHit:
11
+ idx: int
12
+ score: float
13
+
14
+
15
+ def _dot(a: list[float], b: list[float]) -> float:
16
+ return sum(x * y for x, y in zip(a, b))
17
+
18
+
19
+ class VectorIndex:
20
+ """Vector index that prefers FAISS and falls back to brute-force cosine."""
21
+
22
+ def __init__(self) -> None:
23
+ self._faiss = None
24
+ self._index = None
25
+ self._vectors: list[list[float]] = []
26
+
27
+ try:
28
+ import faiss # type: ignore
29
+
30
+ self._faiss = faiss
31
+ except Exception:
32
+ self._faiss = None
33
+
34
+ def add(self, vectors: list[list[float]]) -> None:
35
+ if not vectors:
36
+ return
37
+
38
+ if self._faiss is None:
39
+ self._vectors.extend(vectors)
40
+ return
41
+
42
+ import numpy as np
43
+
44
+ arr = np.array(vectors, dtype="float32")
45
+ if self._index is None:
46
+ dim = arr.shape[1]
47
+ self._index = self._faiss.IndexFlatIP(dim)
48
+ self._index.add(arr)
49
+
50
+ def search(self, query: list[float], top_k: int = 5) -> tuple[SearchHit, ...]:
51
+ if top_k <= 0:
52
+ return ()
53
+
54
+ if self._faiss is None:
55
+ scored = [SearchHit(idx=i, score=_dot(query, vec)) for i, vec in enumerate(self._vectors)]
56
+ scored.sort(key=lambda h: h.score, reverse=True)
57
+ return tuple(scored[:top_k])
58
+
59
+ import numpy as np
60
+
61
+ if self._index is None or self._index.ntotal == 0:
62
+ return ()
63
+
64
+ q = np.array([query], dtype="float32")
65
+ scores, indices = self._index.search(q, top_k)
66
+
67
+ hits: list[SearchHit] = []
68
+ for score, idx in zip(scores[0], indices[0]):
69
+ if idx < 0:
70
+ continue
71
+ hits.append(SearchHit(idx=int(idx), score=float(score)))
72
+ return tuple(hits)
@@ -0,0 +1 @@
1
+ """Git integration package."""
@@ -0,0 +1,78 @@
1
+ """Git history miner using GitPython."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from git import Repo
9
+ from git.exc import GitCommandError
10
+
11
+
12
+ @dataclass(frozen=True, slots=True)
13
+ class FileChange:
14
+ """Single file-level change in a commit."""
15
+
16
+ path: str
17
+ change_type: str
18
+
19
+
20
+ @dataclass(frozen=True, slots=True)
21
+ class CommitRecord:
22
+ """Normalized commit metadata."""
23
+
24
+ hexsha: str
25
+ author: str
26
+ committed_date: int
27
+ summary: str
28
+ files: tuple[FileChange, ...]
29
+
30
+
31
+ def mine_commit_history(repo_path: str | Path, *, max_count: int | None = None) -> tuple[CommitRecord, ...]:
32
+ """Return normalized commit history from a git repository."""
33
+ repo = Repo(Path(repo_path))
34
+ if repo.bare:
35
+ repo.close()
36
+ return ()
37
+
38
+ try:
39
+ try:
40
+ commits = list(repo.iter_commits("HEAD", max_count=max_count))
41
+ except (ValueError, GitCommandError):
42
+ # No commits/HEAD yet.
43
+ return ()
44
+
45
+ out: list[CommitRecord] = []
46
+
47
+ for commit in commits:
48
+ file_changes: list[FileChange] = []
49
+
50
+ if commit.parents:
51
+ parent = commit.parents[0]
52
+ diffs = parent.diff(commit, create_patch=False)
53
+ for diff in diffs:
54
+ new_path = diff.b_path or diff.a_path or ""
55
+ file_changes.append(
56
+ FileChange(
57
+ path=new_path,
58
+ change_type=(diff.change_type or "M").upper(),
59
+ )
60
+ )
61
+ else:
62
+ for path in commit.stats.files.keys():
63
+ file_changes.append(FileChange(path=path, change_type="A"))
64
+
65
+ out.append(
66
+ CommitRecord(
67
+ hexsha=commit.hexsha,
68
+ author=str(commit.author),
69
+ committed_date=int(commit.committed_date),
70
+ summary=commit.summary,
71
+ files=tuple(file_changes),
72
+ )
73
+ )
74
+
75
+ return tuple(out)
76
+ finally:
77
+ repo.git.clear_cache()
78
+ repo.close()
@@ -0,0 +1,17 @@
1
+ """Graph package for GCIE."""
2
+
3
+ from .call_graph import build_call_graph
4
+ from .code_graph import build_code_structure_graph
5
+ from .execution_graph import build_execution_graph
6
+ from .git_graph import build_git_graph
7
+ from .test_graph import build_test_coverage_graph
8
+ from .variable_graph import build_variable_graph
9
+
10
+ __all__ = [
11
+ "build_call_graph",
12
+ "build_code_structure_graph",
13
+ "build_execution_graph",
14
+ "build_git_graph",
15
+ "build_test_coverage_graph",
16
+ "build_variable_graph",
17
+ ]
@@ -0,0 +1,70 @@
1
+ """Call graph builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable
7
+
8
+ import networkx as nx
9
+
10
+ from parser.call_resolver import resolve_calls
11
+ from parser.models import ModuleParseResult
12
+
13
+ from .node_factory import function_node_id
14
+
15
+
16
+ def _normalize_path(file_path: Path, root: Path | None) -> Path:
17
+ if root is None:
18
+ return file_path
19
+ try:
20
+ return file_path.resolve().relative_to(root.resolve())
21
+ except ValueError:
22
+ return file_path
23
+
24
+
25
+ def build_call_graph(modules: Iterable[ModuleParseResult], *, root: Path | None = None) -> nx.DiGraph:
26
+ """Build caller-callee graph with unresolved external call nodes preserved."""
27
+ graph = nx.DiGraph()
28
+
29
+ module_list = list(modules)
30
+ for module in module_list:
31
+ rel_file = _normalize_path(module.file, root)
32
+ for fn in module.functions:
33
+ node_id = function_node_id(rel_file, fn.name)
34
+ graph.add_node(
35
+ node_id,
36
+ type="function",
37
+ label=fn.name,
38
+ file=rel_file.as_posix(),
39
+ qualified_name=f"{rel_file.as_posix()}::{fn.name}",
40
+ )
41
+
42
+ local_name_to_node: dict[tuple[str, str], str] = {}
43
+ for module in module_list:
44
+ rel_file = _normalize_path(module.file, root)
45
+ for fn in module.functions:
46
+ local_name_to_node[(rel_file.as_posix(), fn.name)] = function_node_id(rel_file, fn.name)
47
+
48
+ for module in module_list:
49
+ rel_file = _normalize_path(module.file, root)
50
+ for resolved in resolve_calls(module):
51
+ caller_id = local_name_to_node[(rel_file.as_posix(), resolved.caller)]
52
+
53
+ if resolved.resolved:
54
+ callee_id = local_name_to_node.get((rel_file.as_posix(), resolved.callee))
55
+ if callee_id is None:
56
+ # Should not happen for local resolution, but keep graph robust.
57
+ callee_id = f"external:{resolved.callee}"
58
+ graph.add_node(callee_id, type="external_function", label=resolved.callee)
59
+ else:
60
+ callee_id = f"external:{resolved.callee}"
61
+ graph.add_node(callee_id, type="external_function", label=resolved.callee)
62
+
63
+ graph.add_edge(
64
+ caller_id,
65
+ callee_id,
66
+ type="CALLS",
67
+ resolved=resolved.resolved,
68
+ )
69
+
70
+ return graph
@@ -0,0 +1,81 @@
1
+ """Code structure graph builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable
7
+
8
+ import networkx as nx
9
+
10
+ from parser.models import ModuleParseResult
11
+
12
+ from .node_factory import (
13
+ class_node_id,
14
+ file_node_id,
15
+ function_node_id,
16
+ module_node_id,
17
+ )
18
+
19
+
20
+ def _normalize_path(file_path: Path, root: Path | None) -> Path:
21
+ if root is None:
22
+ return file_path
23
+ try:
24
+ return file_path.resolve().relative_to(root.resolve())
25
+ except ValueError:
26
+ return file_path
27
+
28
+
29
+ def build_code_structure_graph(
30
+ modules: Iterable[ModuleParseResult],
31
+ *,
32
+ root: Path | None = None,
33
+ ) -> nx.DiGraph:
34
+ """Build file/class/function/import structural graph from parsed modules."""
35
+ graph = nx.DiGraph()
36
+
37
+ for module in modules:
38
+ rel_file = _normalize_path(module.file, root)
39
+ file_id = file_node_id(rel_file)
40
+ graph.add_node(
41
+ file_id,
42
+ type="file",
43
+ label=rel_file.as_posix(),
44
+ path=rel_file.as_posix(),
45
+ )
46
+
47
+ for cls in module.classes:
48
+ class_id = class_node_id(rel_file, cls.name)
49
+ graph.add_node(
50
+ class_id,
51
+ type="class",
52
+ label=cls.name,
53
+ file=rel_file.as_posix(),
54
+ start_line=cls.start_line,
55
+ end_line=cls.end_line,
56
+ )
57
+ graph.add_edge(file_id, class_id, type="DEFINES")
58
+ graph.add_edge(file_id, class_id, type="CONTAINS")
59
+
60
+ for fn in module.functions:
61
+ function_id = function_node_id(rel_file, fn.name)
62
+ graph.add_node(
63
+ function_id,
64
+ type="function",
65
+ label=fn.name,
66
+ file=rel_file.as_posix(),
67
+ start_line=fn.start_line,
68
+ end_line=fn.end_line,
69
+ )
70
+ graph.add_edge(file_id, function_id, type="DEFINES")
71
+ graph.add_edge(file_id, function_id, type="CONTAINS")
72
+
73
+ for imp in module.imports:
74
+ import_targets = imp.names if imp.names else ((imp.module,) if imp.module else ())
75
+ for symbol in import_targets:
76
+ module_name = f"{imp.module}.{symbol}" if imp.module else symbol
77
+ module_id = module_node_id(module_name)
78
+ graph.add_node(module_id, type="module", label=module_name)
79
+ graph.add_edge(file_id, module_id, type="IMPORTS")
80
+
81
+ return graph
@@ -0,0 +1,35 @@
1
+ """Execution trace graph builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Iterable
6
+
7
+ import networkx as nx
8
+
9
+ from tracing.runtime_tracer import TraceEvent
10
+
11
+
12
+ def build_execution_graph(events: Iterable[TraceEvent]) -> nx.DiGraph:
13
+ """Build ordered runtime execution graph from trace events."""
14
+ graph = nx.DiGraph()
15
+ ordered = list(events)
16
+
17
+ for idx, event in enumerate(ordered):
18
+ node_id = f"event:{idx}:{event.function_name}:{event.event}"
19
+ graph.add_node(
20
+ node_id,
21
+ type="execution_event",
22
+ label=f"{event.function_name}:{event.event}",
23
+ function=event.function_name,
24
+ event=event.event,
25
+ file=event.file_path,
26
+ line=event.line_no,
27
+ timestamp=event.timestamp,
28
+ order=idx,
29
+ )
30
+
31
+ if idx > 0:
32
+ prev = f"event:{idx - 1}:{ordered[idx - 1].function_name}:{ordered[idx - 1].event}"
33
+ graph.add_edge(prev, node_id, type="EXECUTES")
34
+
35
+ return graph
@@ -0,0 +1,43 @@
1
+ """Git history graph builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable
7
+
8
+ import networkx as nx
9
+
10
+ from git_integration.git_miner import CommitRecord
11
+
12
+
13
+ def _commit_node_id(hexsha: str) -> str:
14
+ return f"commit:{hexsha}"
15
+
16
+
17
+ def _file_node_id(path: str) -> str:
18
+ return f"file:{Path(path).as_posix()}"
19
+
20
+
21
+ def build_git_graph(records: Iterable[CommitRecord]) -> nx.DiGraph:
22
+ """Build commit-file change graph using CHANGED_IN edges."""
23
+ graph = nx.DiGraph()
24
+
25
+ for record in records:
26
+ commit_id = _commit_node_id(record.hexsha)
27
+ graph.add_node(
28
+ commit_id,
29
+ type="commit",
30
+ label=record.hexsha[:10],
31
+ author=record.author,
32
+ committed_date=record.committed_date,
33
+ summary=record.summary,
34
+ )
35
+
36
+ for change in record.files:
37
+ if not change.path:
38
+ continue
39
+ file_id = _file_node_id(change.path)
40
+ graph.add_node(file_id, type="file", label=Path(change.path).as_posix(), path=Path(change.path).as_posix())
41
+ graph.add_edge(file_id, commit_id, type="CHANGED_IN", change_type=change.change_type)
42
+
43
+ return graph
@@ -0,0 +1,25 @@
1
+ """Graph storage utilities for incremental workflows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import networkx as nx
8
+
9
+
10
+ @dataclass(slots=True)
11
+ class GraphStore:
12
+ _snapshots: dict[str, nx.DiGraph]
13
+
14
+ def __init__(self) -> None:
15
+ self._snapshots = {}
16
+
17
+ def put(self, key: str, graph: nx.DiGraph) -> None:
18
+ self._snapshots[key] = graph.copy()
19
+
20
+ def get(self, key: str) -> nx.DiGraph | None:
21
+ graph = self._snapshots.get(key)
22
+ return None if graph is None else graph.copy()
23
+
24
+ def keys(self) -> tuple[str, ...]:
25
+ return tuple(sorted(self._snapshots.keys()))
@@ -0,0 +1,21 @@
1
+ """Node identity and attribute helpers for graph construction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def file_node_id(path: Path) -> str:
9
+ return f"file:{path.as_posix()}"
10
+
11
+
12
+ def class_node_id(file_path: Path, class_name: str) -> str:
13
+ return f"class:{file_path.as_posix()}::{class_name}"
14
+
15
+
16
+ def function_node_id(file_path: Path, function_name: str) -> str:
17
+ return f"function:{file_path.as_posix()}::{function_name}"
18
+
19
+
20
+ def module_node_id(module_name: str) -> str:
21
+ return f"module:{module_name}"
@@ -0,0 +1,65 @@
1
+ """Test coverage graph builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable
7
+
8
+ import networkx as nx
9
+
10
+ from coverage_integration.coverage_loader import CoverageReport
11
+ from parser.models import ModuleParseResult
12
+
13
+
14
+ def _test_node_id(test_name: str) -> str:
15
+ return f"test:{test_name}"
16
+
17
+
18
+ def _file_node_id(path: str) -> str:
19
+ return f"file:{Path(path).as_posix()}"
20
+
21
+
22
+ def _function_node_id(path: str, fn_name: str) -> str:
23
+ return f"function:{Path(path).as_posix()}::{fn_name}"
24
+
25
+
26
+ def build_test_coverage_graph(
27
+ report: CoverageReport,
28
+ *,
29
+ test_name: str,
30
+ parsed_modules: Iterable[ModuleParseResult] = (),
31
+ ) -> nx.DiGraph:
32
+ """Build coverage graph linking tests to covered files/functions."""
33
+ graph = nx.DiGraph()
34
+
35
+ test_id = _test_node_id(test_name)
36
+ graph.add_node(test_id, type="test", label=test_name)
37
+
38
+ module_map = {Path(m.file).as_posix(): m for m in parsed_modules}
39
+
40
+ for rec in report.files:
41
+ file_id = _file_node_id(rec.path)
42
+ graph.add_node(
43
+ file_id,
44
+ type="file",
45
+ label=rec.path,
46
+ coverage_percent=rec.percent_covered,
47
+ num_statements=rec.num_statements,
48
+ num_branches=rec.num_branches,
49
+ num_partial_branches=rec.num_partial_branches,
50
+ )
51
+ graph.add_edge(test_id, file_id, type="COVERED_BY")
52
+
53
+ mod = module_map.get(rec.path)
54
+ if mod is None:
55
+ continue
56
+
57
+ executed = set(rec.executed_lines)
58
+ for fn in mod.functions:
59
+ line_span = set(range(fn.start_line, fn.end_line + 1))
60
+ if executed.intersection(line_span):
61
+ fn_id = _function_node_id(rec.path, fn.name)
62
+ graph.add_node(fn_id, type="function", label=fn.name, file=rec.path)
63
+ graph.add_edge(test_id, fn_id, type="COVERED_BY")
64
+
65
+ return graph
@@ -0,0 +1,28 @@
1
+ """Graph validation helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import networkx as nx
6
+
7
+
8
+ REQUIRED_NODE_ATTRS = {"type", "label"}
9
+
10
+
11
+ def validate_graph_integrity(graph: nx.DiGraph) -> list[str]:
12
+ """Return a list of graph integrity errors, empty when valid."""
13
+ errors: list[str] = []
14
+
15
+ for node, attrs in graph.nodes(data=True):
16
+ missing = REQUIRED_NODE_ATTRS.difference(attrs.keys())
17
+ if missing:
18
+ errors.append(f"node {node} missing attrs: {', '.join(sorted(missing))}")
19
+
20
+ for source, target, attrs in graph.edges(data=True):
21
+ if source not in graph.nodes:
22
+ errors.append(f"edge source missing: {source}")
23
+ if target not in graph.nodes:
24
+ errors.append(f"edge target missing: {target}")
25
+ if "type" not in attrs:
26
+ errors.append(f"edge {source}->{target} missing type")
27
+
28
+ return errors
@@ -0,0 +1,51 @@
1
+ """Variable dependency graph builder."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable
7
+
8
+ import networkx as nx
9
+
10
+ from parser.models import ModuleParseResult
11
+ from parser.variable_extractor import extract_variable_dependencies
12
+
13
+ from .node_factory import function_node_id
14
+
15
+
16
+ def _normalize_path(file_path: Path, root: Path | None) -> Path:
17
+ if root is None:
18
+ return file_path
19
+ try:
20
+ return file_path.resolve().relative_to(root.resolve())
21
+ except ValueError:
22
+ return file_path
23
+
24
+
25
+ def _variable_node_id(name: str) -> str:
26
+ return f"variable:{name}"
27
+
28
+
29
+ def build_variable_graph(modules: Iterable[ModuleParseResult], *, root: Path | None = None) -> nx.DiGraph:
30
+ """Build function-variable dependency graph with READS/WRITES/MODIFIES edges."""
31
+ graph = nx.DiGraph()
32
+
33
+ for module in modules:
34
+ rel_file = _normalize_path(module.file, root)
35
+
36
+ for fn in module.functions:
37
+ fn_id = function_node_id(rel_file, fn.name)
38
+ graph.add_node(
39
+ fn_id,
40
+ type="function",
41
+ label=fn.name,
42
+ file=rel_file.as_posix(),
43
+ )
44
+
45
+ for dep in extract_variable_dependencies(module):
46
+ fn_id = function_node_id(rel_file, dep.function_name)
47
+ var_id = _variable_node_id(dep.variable_name)
48
+ graph.add_node(var_id, type="variable", label=dep.variable_name)
49
+ graph.add_edge(fn_id, var_id, type=dep.access_type)
50
+
51
+ return graph
@@ -0,0 +1 @@
1
+ """Knowledge index package."""
@@ -0,0 +1,60 @@
1
+ """Build knowledge index from parsed modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable
7
+
8
+ from parser.models import ModuleParseResult
9
+
10
+ from .models import ClassIndexEntry, FileIndexEntry, FunctionIndexEntry
11
+ from .store import InMemoryKnowledgeStore
12
+
13
+
14
+ def build_knowledge_index(modules: Iterable[ModuleParseResult]) -> InMemoryKnowledgeStore:
15
+ """Build in-memory knowledge index from parse outputs."""
16
+ store = InMemoryKnowledgeStore()
17
+
18
+ for module in modules:
19
+ file_path = Path(module.file).as_posix()
20
+ imports = tuple(sorted({
21
+ f"{imp.module}.{name}" if imp.module else name
22
+ for imp in module.imports
23
+ for name in (imp.names or ())
24
+ }))
25
+
26
+ file_entry = FileIndexEntry(
27
+ path=file_path,
28
+ imports=imports,
29
+ classes_defined=tuple(sorted(cls.name for cls in module.classes)),
30
+ functions_defined=tuple(sorted(fn.name for fn in module.functions)),
31
+ )
32
+ store.add_file(file_entry)
33
+
34
+ for cls in module.classes:
35
+ store.add_class(
36
+ ClassIndexEntry(
37
+ name=cls.name,
38
+ file=file_path,
39
+ methods=cls.methods,
40
+ attributes=cls.attributes,
41
+ base_classes=cls.base_classes,
42
+ )
43
+ )
44
+
45
+ for fn in module.functions:
46
+ store.add_function(
47
+ FunctionIndexEntry(
48
+ name=fn.name,
49
+ file=file_path,
50
+ start_line=fn.start_line,
51
+ end_line=fn.end_line,
52
+ parameters=fn.parameters,
53
+ variables_read=fn.variables_read,
54
+ variables_written=fn.variables_written,
55
+ functions_called=fn.functions_called,
56
+ docstring=fn.docstring,
57
+ )
58
+ )
59
+
60
+ return store