@pmaddire/gcie 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +256 -0
- package/AGENT_USAGE.md +231 -0
- package/ARCHITECTURE.md +151 -0
- package/CLAUDE.md +69 -0
- package/DEBUGGING_PLAYBOOK.md +160 -0
- package/KNOWLEDGE_INDEX.md +154 -0
- package/POTENTIAL_UPDATES +130 -0
- package/PROJECT.md +141 -0
- package/README.md +371 -0
- package/REPO_DIGITAL_TWIN.md +98 -0
- package/ROADMAP.md +301 -0
- package/SETUP_ANY_REPO.md +85 -0
- package/bin/gcie-init.js +20 -0
- package/bin/gcie.js +45 -0
- package/cli/__init__.py +1 -0
- package/cli/app.py +163 -0
- package/cli/commands/__init__.py +1 -0
- package/cli/commands/cache.py +35 -0
- package/cli/commands/context.py +2426 -0
- package/cli/commands/context_slices.py +617 -0
- package/cli/commands/debug.py +24 -0
- package/cli/commands/index.py +17 -0
- package/cli/commands/query.py +20 -0
- package/cli/commands/setup.py +73 -0
- package/config/__init__.py +1 -0
- package/config/scanner_config.py +82 -0
- package/context/__init__.py +1 -0
- package/context/architecture_bootstrap.py +170 -0
- package/context/architecture_index.py +185 -0
- package/context/architecture_parser.py +170 -0
- package/context/architecture_slicer.py +308 -0
- package/context/context_router.py +70 -0
- package/context/fallback_evaluator.py +21 -0
- package/coverage_integration/__init__.py +1 -0
- package/coverage_integration/coverage_loader.py +55 -0
- package/debugging/__init__.py +12 -0
- package/debugging/bug_localizer.py +81 -0
- package/debugging/execution_path_analyzer.py +42 -0
- package/embeddings/__init__.py +6 -0
- package/embeddings/encoder.py +45 -0
- package/embeddings/faiss_index.py +72 -0
- package/git_integration/__init__.py +1 -0
- package/git_integration/git_miner.py +78 -0
- package/graphs/__init__.py +17 -0
- package/graphs/call_graph.py +70 -0
- package/graphs/code_graph.py +81 -0
- package/graphs/execution_graph.py +35 -0
- package/graphs/git_graph.py +43 -0
- package/graphs/graph_store.py +25 -0
- package/graphs/node_factory.py +21 -0
- package/graphs/test_graph.py +65 -0
- package/graphs/validators.py +28 -0
- package/graphs/variable_graph.py +51 -0
- package/knowledge_index/__init__.py +1 -0
- package/knowledge_index/index_builder.py +60 -0
- package/knowledge_index/models.py +35 -0
- package/knowledge_index/query_api.py +38 -0
- package/knowledge_index/store.py +23 -0
- package/llm_context/__init__.py +6 -0
- package/llm_context/context_builder.py +67 -0
- package/llm_context/snippet_selector.py +57 -0
- package/package.json +14 -0
- package/parser/__init__.py +18 -0
- package/parser/ast_parser.py +216 -0
- package/parser/call_resolver.py +52 -0
- package/parser/models.py +75 -0
- package/parser/tree_sitter_adapter.py +56 -0
- package/parser/variable_extractor.py +31 -0
- package/retrieval/__init__.py +17 -0
- package/retrieval/cache.py +22 -0
- package/retrieval/hybrid_retriever.py +249 -0
- package/retrieval/query_parser.py +38 -0
- package/retrieval/ranking.py +43 -0
- package/retrieval/semantic_retriever.py +39 -0
- package/retrieval/symbolic_retriever.py +80 -0
- package/scanner/__init__.py +5 -0
- package/scanner/file_filters.py +37 -0
- package/scanner/models.py +44 -0
- package/scanner/repository_scanner.py +55 -0
- package/scripts/bootstrap_from_github.ps1 +41 -0
- package/tracing/__init__.py +1 -0
- package/tracing/runtime_tracer.py +60 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""One-command repository setup for GCIE."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from context.architecture_bootstrap import ensure_initialized
|
|
8
|
+
|
|
9
|
+
from .index import run_index
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _repo_root() -> Path:
|
|
13
|
+
return Path(__file__).resolve().parents[2]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
|
|
17
|
+
if not source.exists():
|
|
18
|
+
return "source_missing"
|
|
19
|
+
if target.exists() and not force:
|
|
20
|
+
return "skipped_existing"
|
|
21
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8")
|
|
23
|
+
return "written"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def run_setup(
|
|
27
|
+
path: str,
|
|
28
|
+
*,
|
|
29
|
+
force: bool = False,
|
|
30
|
+
include_agent_usage: bool = True,
|
|
31
|
+
include_setup_doc: bool = True,
|
|
32
|
+
run_index_pass: bool = True,
|
|
33
|
+
) -> dict:
|
|
34
|
+
"""Initialize a repository so GCIE can be used immediately."""
|
|
35
|
+
target = Path(path).resolve()
|
|
36
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
config = ensure_initialized(target)
|
|
39
|
+
gcie_dir = target / ".gcie"
|
|
40
|
+
|
|
41
|
+
status: dict[str, object] = {
|
|
42
|
+
"repo": target.as_posix(),
|
|
43
|
+
"gcie_dir": gcie_dir.as_posix(),
|
|
44
|
+
"architecture_initialized": True,
|
|
45
|
+
"files": {},
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
source_root = _repo_root()
|
|
49
|
+
copied: dict[str, str] = {}
|
|
50
|
+
|
|
51
|
+
if include_agent_usage:
|
|
52
|
+
copied["AGENT_USAGE.md"] = _copy_if_needed(
|
|
53
|
+
source_root / "AGENT_USAGE.md",
|
|
54
|
+
target / "AGENT_USAGE.md",
|
|
55
|
+
force=force,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if include_setup_doc:
|
|
59
|
+
copied["SETUP_ANY_REPO.md"] = _copy_if_needed(
|
|
60
|
+
source_root / "SETUP_ANY_REPO.md",
|
|
61
|
+
target / "SETUP_ANY_REPO.md",
|
|
62
|
+
force=force,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
status["files"] = copied
|
|
66
|
+
status["context_config"] = config
|
|
67
|
+
|
|
68
|
+
if run_index_pass:
|
|
69
|
+
status["index"] = run_index(target.as_posix())
|
|
70
|
+
else:
|
|
71
|
+
status["index"] = {"skipped": True}
|
|
72
|
+
|
|
73
|
+
return status
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Configuration package for GCIE."""
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Scanner configuration models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Iterable
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class ScannerConfig:
|
|
12
|
+
"""Configuration for repository scanning."""
|
|
13
|
+
|
|
14
|
+
include_extensions: set[str] = field(
|
|
15
|
+
default_factory=lambda: {
|
|
16
|
+
".py",
|
|
17
|
+
".pyi",
|
|
18
|
+
".md",
|
|
19
|
+
".txt",
|
|
20
|
+
".toml",
|
|
21
|
+
".yaml",
|
|
22
|
+
".yml",
|
|
23
|
+
".json",
|
|
24
|
+
".ini",
|
|
25
|
+
".cfg",
|
|
26
|
+
".rst",
|
|
27
|
+
".sh",
|
|
28
|
+
}
|
|
29
|
+
)
|
|
30
|
+
exclude_dirs: set[str] = field(
|
|
31
|
+
default_factory=lambda: {
|
|
32
|
+
".git",
|
|
33
|
+
".hg",
|
|
34
|
+
".svn",
|
|
35
|
+
".venv",
|
|
36
|
+
"venv",
|
|
37
|
+
"__pycache__",
|
|
38
|
+
"node_modules",
|
|
39
|
+
"build",
|
|
40
|
+
"dist",
|
|
41
|
+
".pytest_cache",
|
|
42
|
+
".mypy_cache",
|
|
43
|
+
".idea",
|
|
44
|
+
".vscode",
|
|
45
|
+
}
|
|
46
|
+
)
|
|
47
|
+
exclude_globs: tuple[str, ...] = ()
|
|
48
|
+
max_file_size_bytes: int = 1_000_000
|
|
49
|
+
include_hidden: bool = False
|
|
50
|
+
|
|
51
|
+
def is_excluded_dir(self, directory_name: str) -> bool:
|
|
52
|
+
"""Return True if a directory should be skipped during scanning."""
|
|
53
|
+
if not self.include_hidden and directory_name.startswith("."):
|
|
54
|
+
return True
|
|
55
|
+
return directory_name in self.exclude_dirs
|
|
56
|
+
|
|
57
|
+
def allows_extension(self, path: Path) -> bool:
|
|
58
|
+
"""Return True when the file extension is in the allow-list."""
|
|
59
|
+
return path.suffix.lower() in self.include_extensions
|
|
60
|
+
|
|
61
|
+
def matches_exclude_glob(self, relative_path: Path) -> bool:
|
|
62
|
+
"""Return True when the path matches any configured exclusion glob."""
|
|
63
|
+
return any(relative_path.match(pattern) for pattern in self.exclude_globs)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_extensions(
|
|
67
|
+
cls,
|
|
68
|
+
include_extensions: Iterable[str],
|
|
69
|
+
*,
|
|
70
|
+
max_file_size_bytes: int = 1_000_000,
|
|
71
|
+
include_hidden: bool = False,
|
|
72
|
+
) -> "ScannerConfig":
|
|
73
|
+
"""Build config from extension iterable."""
|
|
74
|
+
normalized = {
|
|
75
|
+
ext.lower() if ext.startswith(".") else f".{ext.lower()}"
|
|
76
|
+
for ext in include_extensions
|
|
77
|
+
}
|
|
78
|
+
return cls(
|
|
79
|
+
include_extensions=normalized,
|
|
80
|
+
max_file_size_bytes=max_file_size_bytes,
|
|
81
|
+
include_hidden=include_hidden,
|
|
82
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Bootstrap GCIE-managed architecture artifacts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .architecture_index import build_architecture_index, refresh_architecture_if_needed, write_architecture_index
|
|
10
|
+
from .architecture_parser import parse_architecture
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_DEFAULT_DOC_CANDIDATES = [
|
|
14
|
+
"ARCHITECTURE.md",
|
|
15
|
+
"README.md",
|
|
16
|
+
"PROJECT.md",
|
|
17
|
+
"docs/architecture.md",
|
|
18
|
+
"docs/system_design.md",
|
|
19
|
+
"docs/design.md",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
_EXCLUDED_DIRS = {".git", ".gcie", ".venv", "node_modules", "__pycache__"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def find_user_architecture_docs(repo_path: Path) -> list[Path]:
|
|
26
|
+
"""Find likely user-managed architecture documents in the repo."""
|
|
27
|
+
docs = []
|
|
28
|
+
for candidate in _DEFAULT_DOC_CANDIDATES:
|
|
29
|
+
path = repo_path / candidate
|
|
30
|
+
if path.exists() and path.is_file():
|
|
31
|
+
docs.append(path)
|
|
32
|
+
return docs
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _summarize_docs(docs: list[tuple[Path, str]]) -> str:
|
|
36
|
+
if not docs:
|
|
37
|
+
return "No user-managed architecture docs were found."
|
|
38
|
+
|
|
39
|
+
lines = []
|
|
40
|
+
for path, content in docs:
|
|
41
|
+
excerpt = ""
|
|
42
|
+
for line in content.splitlines():
|
|
43
|
+
if line.strip():
|
|
44
|
+
excerpt = line.strip()
|
|
45
|
+
break
|
|
46
|
+
lines.append(f"- {path.as_posix()}: {excerpt[:120]}")
|
|
47
|
+
return "\n".join(lines)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _discover_subsystems(repo_path: Path) -> list[tuple[str, list[str]]]:
|
|
51
|
+
subsystems = []
|
|
52
|
+
for child in repo_path.iterdir():
|
|
53
|
+
if not child.is_dir() or child.name in _EXCLUDED_DIRS:
|
|
54
|
+
continue
|
|
55
|
+
key_files = []
|
|
56
|
+
for path in child.rglob("*"):
|
|
57
|
+
if path.is_file() and path.suffix.lower() in {".py", ".js", ".jsx", ".ts", ".tsx"}:
|
|
58
|
+
key_files.append(path.relative_to(repo_path).as_posix())
|
|
59
|
+
if len(key_files) >= 5:
|
|
60
|
+
break
|
|
61
|
+
subsystems.append((child.name, key_files))
|
|
62
|
+
return subsystems
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _render_architecture(repo_path: Path, docs: list[tuple[Path, str]]) -> str:
|
|
66
|
+
summary = _summarize_docs(docs)
|
|
67
|
+
subsystems = _discover_subsystems(repo_path)
|
|
68
|
+
active_work = "\n".join(f"- {name}" for name, _ in subsystems) or "- core"
|
|
69
|
+
|
|
70
|
+
subsystem_blocks = []
|
|
71
|
+
for name, key_files in subsystems:
|
|
72
|
+
key_lines = "\n".join(f"- {path}" for path in key_files) or "- "
|
|
73
|
+
subsystem_blocks.append(
|
|
74
|
+
"\n".join(
|
|
75
|
+
[
|
|
76
|
+
f"### Subsystem: {name}",
|
|
77
|
+
"Purpose: ",
|
|
78
|
+
"Status: active",
|
|
79
|
+
"Key Files:",
|
|
80
|
+
key_lines,
|
|
81
|
+
"Interfaces:",
|
|
82
|
+
"- ",
|
|
83
|
+
"Depends On:",
|
|
84
|
+
"- ",
|
|
85
|
+
"Used By:",
|
|
86
|
+
"- ",
|
|
87
|
+
"Failure Modes:",
|
|
88
|
+
"- ",
|
|
89
|
+
"Notes:",
|
|
90
|
+
"- ",
|
|
91
|
+
]
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return "\n".join(
|
|
96
|
+
[
|
|
97
|
+
"# GCIE Architecture",
|
|
98
|
+
"",
|
|
99
|
+
"## Project Summary",
|
|
100
|
+
summary,
|
|
101
|
+
"",
|
|
102
|
+
"## System Stage",
|
|
103
|
+
"unknown",
|
|
104
|
+
"",
|
|
105
|
+
"## Global Constraints",
|
|
106
|
+
"",
|
|
107
|
+
"## Subsystems",
|
|
108
|
+
"",
|
|
109
|
+
"\n\n".join(subsystem_blocks) if subsystem_blocks else "### Subsystem: core\nPurpose: ",
|
|
110
|
+
"",
|
|
111
|
+
"## Data Flow",
|
|
112
|
+
"",
|
|
113
|
+
"## Entry Points",
|
|
114
|
+
"",
|
|
115
|
+
"## Active Work Areas",
|
|
116
|
+
active_work,
|
|
117
|
+
"",
|
|
118
|
+
"## Known Risks",
|
|
119
|
+
"",
|
|
120
|
+
]
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _write_context_config(config_path: Path, *, architecture_source: str) -> dict:
|
|
125
|
+
config = {
|
|
126
|
+
"architecture_slicer_enabled": True,
|
|
127
|
+
"fallback_to_normal_on_low_confidence": True,
|
|
128
|
+
"confidence_threshold": 0.2,
|
|
129
|
+
"architecture_source": architecture_source,
|
|
130
|
+
"last_bootstrap_time": datetime.now(timezone.utc).isoformat(),
|
|
131
|
+
"last_architecture_update": None,
|
|
132
|
+
}
|
|
133
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
config_path.write_text(json.dumps(config, indent=2), encoding="utf-8")
|
|
135
|
+
return config
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def ensure_initialized(repo_path: Path) -> dict:
|
|
139
|
+
"""Ensure GCIE-managed architecture artifacts exist for the repo."""
|
|
140
|
+
gcie_dir = repo_path / ".gcie"
|
|
141
|
+
architecture_path = gcie_dir / "architecture.md"
|
|
142
|
+
index_path = gcie_dir / "architecture_index.json"
|
|
143
|
+
config_path = gcie_dir / "context_config.json"
|
|
144
|
+
|
|
145
|
+
config = None
|
|
146
|
+
if config_path.exists():
|
|
147
|
+
try:
|
|
148
|
+
config = json.loads(config_path.read_text(encoding="utf-8"))
|
|
149
|
+
except Exception:
|
|
150
|
+
config = None
|
|
151
|
+
|
|
152
|
+
if not architecture_path.exists():
|
|
153
|
+
docs = [(path, path.read_text(encoding="utf-8")) for path in find_user_architecture_docs(repo_path)]
|
|
154
|
+
architecture_text = _render_architecture(repo_path, docs)
|
|
155
|
+
gcie_dir.mkdir(parents=True, exist_ok=True)
|
|
156
|
+
architecture_path.write_text(architecture_text, encoding="utf-8")
|
|
157
|
+
|
|
158
|
+
if not index_path.exists() and architecture_path.exists():
|
|
159
|
+
parsed = parse_architecture(architecture_path.read_text(encoding="utf-8"))
|
|
160
|
+
index_data = build_architecture_index(parsed, repo_path)
|
|
161
|
+
write_architecture_index(index_path, index_data)
|
|
162
|
+
|
|
163
|
+
if config is None:
|
|
164
|
+
config = _write_context_config(config_path, architecture_source=architecture_path.as_posix())
|
|
165
|
+
|
|
166
|
+
if refresh_architecture_if_needed(repo_path, architecture_path, index_path):
|
|
167
|
+
config["last_architecture_update"] = datetime.now(timezone.utc).isoformat()
|
|
168
|
+
config_path.write_text(json.dumps(config, indent=2), encoding="utf-8")
|
|
169
|
+
|
|
170
|
+
return config or {}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Build and maintain architecture index data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .architecture_parser import ArchitectureDoc
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
_EXCLUDED_DIRS = {".git", ".gcie", ".venv", "node_modules", "__pycache__"}
|
|
13
|
+
_CODE_EXTENSIONS = {".py", ".pyi", ".js", ".jsx", ".ts", ".tsx"}
|
|
14
|
+
_CORE_HINTS = {
|
|
15
|
+
"router",
|
|
16
|
+
"routing",
|
|
17
|
+
"fallback",
|
|
18
|
+
"context",
|
|
19
|
+
"slicer",
|
|
20
|
+
"architecture",
|
|
21
|
+
"validation",
|
|
22
|
+
"mode",
|
|
23
|
+
"confidence",
|
|
24
|
+
}
|
|
25
|
+
_CORE_DIRS = {"context", "router", "routing"}
|
|
26
|
+
_CORE_EXCLUDED_DIRS = {"tests", "test"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def compute_repo_fingerprint(repo_path: Path) -> dict:
|
|
30
|
+
"""Compute a lightweight fingerprint for detecting structural changes."""
|
|
31
|
+
top_level_dirs = []
|
|
32
|
+
file_count = 0
|
|
33
|
+
|
|
34
|
+
for child in repo_path.iterdir():
|
|
35
|
+
if child.is_dir() and child.name not in _EXCLUDED_DIRS:
|
|
36
|
+
top_level_dirs.append(child.name)
|
|
37
|
+
|
|
38
|
+
for path in repo_path.rglob("*"):
|
|
39
|
+
if path.is_dir():
|
|
40
|
+
if path.name in _EXCLUDED_DIRS:
|
|
41
|
+
continue
|
|
42
|
+
if path.is_file() and path.suffix.lower() in _CODE_EXTENSIONS:
|
|
43
|
+
if any(part in _EXCLUDED_DIRS for part in path.parts):
|
|
44
|
+
continue
|
|
45
|
+
file_count += 1
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
"top_level_dirs": sorted(top_level_dirs),
|
|
49
|
+
"code_file_count": file_count,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _is_core_infrastructure(path: Path) -> bool:
|
|
54
|
+
lowered = path.as_posix().lower()
|
|
55
|
+
parts = {part.lower() for part in path.parts}
|
|
56
|
+
if parts & _CORE_EXCLUDED_DIRS:
|
|
57
|
+
return False
|
|
58
|
+
if parts & _CORE_DIRS:
|
|
59
|
+
return True
|
|
60
|
+
return any(hint in lowered for hint in _CORE_HINTS)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _discover_core_infrastructure(repo_path: Path) -> list[str]:
|
|
64
|
+
core_files: list[str] = []
|
|
65
|
+
for path in repo_path.rglob("*"):
|
|
66
|
+
if path.is_dir():
|
|
67
|
+
if path.name in _EXCLUDED_DIRS:
|
|
68
|
+
continue
|
|
69
|
+
if not path.is_file() or path.suffix.lower() not in _CODE_EXTENSIONS:
|
|
70
|
+
continue
|
|
71
|
+
if any(part in _EXCLUDED_DIRS for part in path.parts):
|
|
72
|
+
continue
|
|
73
|
+
if _is_core_infrastructure(path):
|
|
74
|
+
core_files.append(path.relative_to(repo_path).as_posix())
|
|
75
|
+
return sorted(set(core_files))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def build_architecture_index(doc: ArchitectureDoc, repo_path: Path) -> dict:
|
|
79
|
+
"""Build an index structure from parsed architecture data."""
|
|
80
|
+
subsystems = []
|
|
81
|
+
file_map: dict[str, list[str]] = {}
|
|
82
|
+
|
|
83
|
+
for subsystem in doc.subsystems or []:
|
|
84
|
+
subsystems.append(
|
|
85
|
+
{
|
|
86
|
+
"name": subsystem.name,
|
|
87
|
+
"purpose": subsystem.purpose,
|
|
88
|
+
"status": subsystem.status,
|
|
89
|
+
"key_files": subsystem.key_files or [],
|
|
90
|
+
"interfaces": subsystem.interfaces or [],
|
|
91
|
+
"depends_on": subsystem.depends_on or [],
|
|
92
|
+
"used_by": subsystem.used_by or [],
|
|
93
|
+
"failure_modes": subsystem.failure_modes or [],
|
|
94
|
+
"notes": subsystem.notes or [],
|
|
95
|
+
}
|
|
96
|
+
)
|
|
97
|
+
for path in subsystem.key_files or []:
|
|
98
|
+
file_map.setdefault(path, []).append(subsystem.name)
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
102
|
+
"repo_fingerprint": compute_repo_fingerprint(repo_path),
|
|
103
|
+
"subsystems": subsystems,
|
|
104
|
+
"file_map": file_map,
|
|
105
|
+
"core_infrastructure": _discover_core_infrastructure(repo_path),
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def write_architecture_index(path: Path, index_data: dict) -> None:
|
|
110
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
path.write_text(json.dumps(index_data, indent=2), encoding="utf-8")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def load_architecture_index(path: Path) -> dict | None:
|
|
115
|
+
if not path.exists():
|
|
116
|
+
return None
|
|
117
|
+
try:
|
|
118
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
119
|
+
except Exception:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def has_structural_change(repo_path: Path, index_data: dict) -> bool:
|
|
124
|
+
"""Detect whether the repo has structural changes since last index."""
|
|
125
|
+
current = compute_repo_fingerprint(repo_path)
|
|
126
|
+
previous = index_data.get("repo_fingerprint", {})
|
|
127
|
+
|
|
128
|
+
if set(current.get("top_level_dirs", [])) != set(previous.get("top_level_dirs", [])):
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
prev_count = previous.get("code_file_count", 0)
|
|
132
|
+
curr_count = current.get("code_file_count", 0)
|
|
133
|
+
if prev_count == 0:
|
|
134
|
+
return False
|
|
135
|
+
delta = abs(curr_count - prev_count) / max(prev_count, 1)
|
|
136
|
+
return delta >= 0.15
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _replace_section(text: str, section_title: str, new_body: str) -> str:
|
|
140
|
+
heading = f"## {section_title}"
|
|
141
|
+
if heading not in text:
|
|
142
|
+
return text.rstrip() + f"\n\n{heading}\n{new_body}\n"
|
|
143
|
+
|
|
144
|
+
parts = text.split(heading)
|
|
145
|
+
before = parts[0].rstrip()
|
|
146
|
+
after = heading.join(parts[1:])
|
|
147
|
+
remainder = after.split("\n## ", 1)
|
|
148
|
+
tail = ""
|
|
149
|
+
if len(remainder) == 2:
|
|
150
|
+
tail = "\n## " + remainder[1]
|
|
151
|
+
return f"{before}\n\n{heading}\n{new_body}\n{tail}".strip() + "\n"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def refresh_architecture_if_needed(
|
|
155
|
+
repo_path: Path,
|
|
156
|
+
architecture_path: Path,
|
|
157
|
+
index_path: Path,
|
|
158
|
+
) -> bool:
|
|
159
|
+
"""Refresh architecture artifacts when structural changes are detected."""
|
|
160
|
+
index_data = load_architecture_index(index_path)
|
|
161
|
+
if index_data is None:
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
if not has_structural_change(repo_path, index_data):
|
|
165
|
+
if not index_data.get("core_infrastructure"):
|
|
166
|
+
index_data["core_infrastructure"] = _discover_core_infrastructure(repo_path)
|
|
167
|
+
index_data["generated_at"] = datetime.now(timezone.utc).isoformat()
|
|
168
|
+
write_architecture_index(index_path, index_data)
|
|
169
|
+
return True
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
if architecture_path.exists():
|
|
173
|
+
text = architecture_path.read_text(encoding="utf-8")
|
|
174
|
+
fingerprint = compute_repo_fingerprint(repo_path)
|
|
175
|
+
active = "\n".join(f"- {name}" for name in fingerprint.get("top_level_dirs", []))
|
|
176
|
+
updated = _replace_section(text, "Active Work Areas", active)
|
|
177
|
+
architecture_path.write_text(updated, encoding="utf-8")
|
|
178
|
+
|
|
179
|
+
index_data["repo_fingerprint"] = compute_repo_fingerprint(repo_path)
|
|
180
|
+
index_data["generated_at"] = datetime.now(timezone.utc).isoformat()
|
|
181
|
+
index_data["core_infrastructure"] = _discover_core_infrastructure(repo_path)
|
|
182
|
+
write_architecture_index(index_path, index_data)
|
|
183
|
+
return True
|
|
184
|
+
|
|
185
|
+
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Parse GCIE-managed architecture files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Subsystem:
|
|
10
|
+
name: str
|
|
11
|
+
purpose: str = ""
|
|
12
|
+
status: str = ""
|
|
13
|
+
key_files: list[str] | None = None
|
|
14
|
+
interfaces: list[str] | None = None
|
|
15
|
+
depends_on: list[str] | None = None
|
|
16
|
+
used_by: list[str] | None = None
|
|
17
|
+
failure_modes: list[str] | None = None
|
|
18
|
+
notes: list[str] | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ArchitectureDoc:
|
|
23
|
+
project_summary: str = ""
|
|
24
|
+
system_stage: str = ""
|
|
25
|
+
global_constraints: str = ""
|
|
26
|
+
subsystems: list[Subsystem] | None = None
|
|
27
|
+
data_flow: str = ""
|
|
28
|
+
entry_points: str = ""
|
|
29
|
+
active_work_areas: str = ""
|
|
30
|
+
known_risks: str = ""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
_LIST_FIELDS = {
|
|
34
|
+
"Key Files": "key_files",
|
|
35
|
+
"Interfaces": "interfaces",
|
|
36
|
+
"Depends On": "depends_on",
|
|
37
|
+
"Used By": "used_by",
|
|
38
|
+
"Failure Modes": "failure_modes",
|
|
39
|
+
"Notes": "notes",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
_REQUIRED_SECTIONS = {
|
|
43
|
+
"Project Summary",
|
|
44
|
+
"System Stage",
|
|
45
|
+
"Global Constraints",
|
|
46
|
+
"Subsystems",
|
|
47
|
+
"Data Flow",
|
|
48
|
+
"Entry Points",
|
|
49
|
+
"Active Work Areas",
|
|
50
|
+
"Known Risks",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ArchitectureParseError(ValueError):
|
|
55
|
+
"""Raised when a GCIE architecture document is malformed."""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def parse_architecture(text: str) -> ArchitectureDoc:
|
|
59
|
+
"""Parse a GCIE architecture.md file into a structured object.
|
|
60
|
+
|
|
61
|
+
Raises ArchitectureParseError when required sections are missing.
|
|
62
|
+
"""
|
|
63
|
+
lines = text.splitlines()
|
|
64
|
+
|
|
65
|
+
if not lines or not lines[0].strip().startswith("# GCIE Architecture"):
|
|
66
|
+
raise ArchitectureParseError("missing_header")
|
|
67
|
+
|
|
68
|
+
subsystems: list[Subsystem] = []
|
|
69
|
+
|
|
70
|
+
project_summary = ""
|
|
71
|
+
system_stage = ""
|
|
72
|
+
global_constraints = ""
|
|
73
|
+
data_flow = ""
|
|
74
|
+
entry_points = ""
|
|
75
|
+
active_work_areas = ""
|
|
76
|
+
known_risks = ""
|
|
77
|
+
|
|
78
|
+
seen_sections: set[str] = set()
|
|
79
|
+
current_section = ""
|
|
80
|
+
current_subsystem: Subsystem | None = None
|
|
81
|
+
current_list_field: str | None = None
|
|
82
|
+
buffer: list[str] = []
|
|
83
|
+
|
|
84
|
+
def flush_section() -> str:
|
|
85
|
+
content = "\n".join(line.strip() for line in buffer if line.strip())
|
|
86
|
+
buffer.clear()
|
|
87
|
+
return content
|
|
88
|
+
|
|
89
|
+
def commit_subsystem() -> None:
|
|
90
|
+
nonlocal current_subsystem
|
|
91
|
+
if current_subsystem is not None:
|
|
92
|
+
subsystems.append(current_subsystem)
|
|
93
|
+
current_subsystem = None
|
|
94
|
+
|
|
95
|
+
for line in lines[1:]:
|
|
96
|
+
stripped = line.strip()
|
|
97
|
+
if stripped.startswith("## "):
|
|
98
|
+
if current_subsystem is None and current_section:
|
|
99
|
+
content = flush_section()
|
|
100
|
+
if current_section == "Project Summary":
|
|
101
|
+
project_summary = content
|
|
102
|
+
elif current_section == "System Stage":
|
|
103
|
+
system_stage = content
|
|
104
|
+
elif current_section == "Global Constraints":
|
|
105
|
+
global_constraints = content
|
|
106
|
+
elif current_section == "Data Flow":
|
|
107
|
+
data_flow = content
|
|
108
|
+
elif current_section == "Entry Points":
|
|
109
|
+
entry_points = content
|
|
110
|
+
elif current_section == "Active Work Areas":
|
|
111
|
+
active_work_areas = content
|
|
112
|
+
elif current_section == "Known Risks":
|
|
113
|
+
known_risks = content
|
|
114
|
+
current_section = stripped[len("## ") :]
|
|
115
|
+
seen_sections.add(current_section)
|
|
116
|
+
current_list_field = None
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
if stripped.startswith("### Subsystem:"):
|
|
120
|
+
commit_subsystem()
|
|
121
|
+
name = stripped.split(":", 1)[1].strip()
|
|
122
|
+
if not name:
|
|
123
|
+
raise ArchitectureParseError("subsystem_missing_name")
|
|
124
|
+
current_subsystem = Subsystem(name=name)
|
|
125
|
+
current_list_field = None
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
if current_subsystem is not None:
|
|
129
|
+
if stripped.endswith(":") and stripped[:-1] in _LIST_FIELDS:
|
|
130
|
+
current_list_field = _LIST_FIELDS[stripped[:-1]]
|
|
131
|
+
setattr(current_subsystem, current_list_field, [])
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
if stripped.startswith("Purpose:"):
|
|
135
|
+
current_subsystem.purpose = stripped.split(":", 1)[1].strip()
|
|
136
|
+
current_list_field = None
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if stripped.startswith("Status:"):
|
|
140
|
+
current_subsystem.status = stripped.split(":", 1)[1].strip()
|
|
141
|
+
current_list_field = None
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
if stripped.startswith("-") and current_list_field:
|
|
145
|
+
value = stripped.lstrip("- ").strip()
|
|
146
|
+
if value:
|
|
147
|
+
target = getattr(current_subsystem, current_list_field)
|
|
148
|
+
if target is not None:
|
|
149
|
+
target.append(value)
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
if current_section and current_subsystem is None:
|
|
153
|
+
buffer.append(line)
|
|
154
|
+
|
|
155
|
+
commit_subsystem()
|
|
156
|
+
|
|
157
|
+
missing_sections = _REQUIRED_SECTIONS - seen_sections
|
|
158
|
+
if missing_sections:
|
|
159
|
+
raise ArchitectureParseError("missing_sections")
|
|
160
|
+
|
|
161
|
+
return ArchitectureDoc(
|
|
162
|
+
project_summary=project_summary,
|
|
163
|
+
system_stage=system_stage,
|
|
164
|
+
global_constraints=global_constraints,
|
|
165
|
+
subsystems=subsystems,
|
|
166
|
+
data_flow=data_flow,
|
|
167
|
+
entry_points=entry_points,
|
|
168
|
+
active_work_areas=active_work_areas,
|
|
169
|
+
known_risks=known_risks,
|
|
170
|
+
)
|