@pmaddire/gcie 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/AGENT.md +256 -0
  2. package/AGENT_USAGE.md +231 -0
  3. package/ARCHITECTURE.md +151 -0
  4. package/CLAUDE.md +69 -0
  5. package/DEBUGGING_PLAYBOOK.md +160 -0
  6. package/KNOWLEDGE_INDEX.md +154 -0
  7. package/POTENTIAL_UPDATES +130 -0
  8. package/PROJECT.md +141 -0
  9. package/README.md +371 -0
  10. package/REPO_DIGITAL_TWIN.md +98 -0
  11. package/ROADMAP.md +301 -0
  12. package/SETUP_ANY_REPO.md +85 -0
  13. package/bin/gcie-init.js +20 -0
  14. package/bin/gcie.js +45 -0
  15. package/cli/__init__.py +1 -0
  16. package/cli/app.py +163 -0
  17. package/cli/commands/__init__.py +1 -0
  18. package/cli/commands/cache.py +35 -0
  19. package/cli/commands/context.py +2426 -0
  20. package/cli/commands/context_slices.py +617 -0
  21. package/cli/commands/debug.py +24 -0
  22. package/cli/commands/index.py +17 -0
  23. package/cli/commands/query.py +20 -0
  24. package/cli/commands/setup.py +73 -0
  25. package/config/__init__.py +1 -0
  26. package/config/scanner_config.py +82 -0
  27. package/context/__init__.py +1 -0
  28. package/context/architecture_bootstrap.py +170 -0
  29. package/context/architecture_index.py +185 -0
  30. package/context/architecture_parser.py +170 -0
  31. package/context/architecture_slicer.py +308 -0
  32. package/context/context_router.py +70 -0
  33. package/context/fallback_evaluator.py +21 -0
  34. package/coverage_integration/__init__.py +1 -0
  35. package/coverage_integration/coverage_loader.py +55 -0
  36. package/debugging/__init__.py +12 -0
  37. package/debugging/bug_localizer.py +81 -0
  38. package/debugging/execution_path_analyzer.py +42 -0
  39. package/embeddings/__init__.py +6 -0
  40. package/embeddings/encoder.py +45 -0
  41. package/embeddings/faiss_index.py +72 -0
  42. package/git_integration/__init__.py +1 -0
  43. package/git_integration/git_miner.py +78 -0
  44. package/graphs/__init__.py +17 -0
  45. package/graphs/call_graph.py +70 -0
  46. package/graphs/code_graph.py +81 -0
  47. package/graphs/execution_graph.py +35 -0
  48. package/graphs/git_graph.py +43 -0
  49. package/graphs/graph_store.py +25 -0
  50. package/graphs/node_factory.py +21 -0
  51. package/graphs/test_graph.py +65 -0
  52. package/graphs/validators.py +28 -0
  53. package/graphs/variable_graph.py +51 -0
  54. package/knowledge_index/__init__.py +1 -0
  55. package/knowledge_index/index_builder.py +60 -0
  56. package/knowledge_index/models.py +35 -0
  57. package/knowledge_index/query_api.py +38 -0
  58. package/knowledge_index/store.py +23 -0
  59. package/llm_context/__init__.py +6 -0
  60. package/llm_context/context_builder.py +67 -0
  61. package/llm_context/snippet_selector.py +57 -0
  62. package/package.json +14 -0
  63. package/parser/__init__.py +18 -0
  64. package/parser/ast_parser.py +216 -0
  65. package/parser/call_resolver.py +52 -0
  66. package/parser/models.py +75 -0
  67. package/parser/tree_sitter_adapter.py +56 -0
  68. package/parser/variable_extractor.py +31 -0
  69. package/retrieval/__init__.py +17 -0
  70. package/retrieval/cache.py +22 -0
  71. package/retrieval/hybrid_retriever.py +249 -0
  72. package/retrieval/query_parser.py +38 -0
  73. package/retrieval/ranking.py +43 -0
  74. package/retrieval/semantic_retriever.py +39 -0
  75. package/retrieval/symbolic_retriever.py +80 -0
  76. package/scanner/__init__.py +5 -0
  77. package/scanner/file_filters.py +37 -0
  78. package/scanner/models.py +44 -0
  79. package/scanner/repository_scanner.py +55 -0
  80. package/scripts/bootstrap_from_github.ps1 +41 -0
  81. package/tracing/__init__.py +1 -0
  82. package/tracing/runtime_tracer.py +60 -0
@@ -0,0 +1,73 @@
1
+ """One-command repository setup for GCIE."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from context.architecture_bootstrap import ensure_initialized
8
+
9
+ from .index import run_index
10
+
11
+
12
+ def _repo_root() -> Path:
13
+ return Path(__file__).resolve().parents[2]
14
+
15
+
16
+ def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
17
+ if not source.exists():
18
+ return "source_missing"
19
+ if target.exists() and not force:
20
+ return "skipped_existing"
21
+ target.parent.mkdir(parents=True, exist_ok=True)
22
+ target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8")
23
+ return "written"
24
+
25
+
26
+ def run_setup(
27
+ path: str,
28
+ *,
29
+ force: bool = False,
30
+ include_agent_usage: bool = True,
31
+ include_setup_doc: bool = True,
32
+ run_index_pass: bool = True,
33
+ ) -> dict:
34
+ """Initialize a repository so GCIE can be used immediately."""
35
+ target = Path(path).resolve()
36
+ target.mkdir(parents=True, exist_ok=True)
37
+
38
+ config = ensure_initialized(target)
39
+ gcie_dir = target / ".gcie"
40
+
41
+ status: dict[str, object] = {
42
+ "repo": target.as_posix(),
43
+ "gcie_dir": gcie_dir.as_posix(),
44
+ "architecture_initialized": True,
45
+ "files": {},
46
+ }
47
+
48
+ source_root = _repo_root()
49
+ copied: dict[str, str] = {}
50
+
51
+ if include_agent_usage:
52
+ copied["AGENT_USAGE.md"] = _copy_if_needed(
53
+ source_root / "AGENT_USAGE.md",
54
+ target / "AGENT_USAGE.md",
55
+ force=force,
56
+ )
57
+
58
+ if include_setup_doc:
59
+ copied["SETUP_ANY_REPO.md"] = _copy_if_needed(
60
+ source_root / "SETUP_ANY_REPO.md",
61
+ target / "SETUP_ANY_REPO.md",
62
+ force=force,
63
+ )
64
+
65
+ status["files"] = copied
66
+ status["context_config"] = config
67
+
68
+ if run_index_pass:
69
+ status["index"] = run_index(target.as_posix())
70
+ else:
71
+ status["index"] = {"skipped": True}
72
+
73
+ return status
@@ -0,0 +1 @@
1
+ """Configuration package for GCIE."""
@@ -0,0 +1,82 @@
1
+ """Scanner configuration models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Iterable
8
+
9
+
10
+ @dataclass(slots=True)
11
+ class ScannerConfig:
12
+ """Configuration for repository scanning."""
13
+
14
+ include_extensions: set[str] = field(
15
+ default_factory=lambda: {
16
+ ".py",
17
+ ".pyi",
18
+ ".md",
19
+ ".txt",
20
+ ".toml",
21
+ ".yaml",
22
+ ".yml",
23
+ ".json",
24
+ ".ini",
25
+ ".cfg",
26
+ ".rst",
27
+ ".sh",
28
+ }
29
+ )
30
+ exclude_dirs: set[str] = field(
31
+ default_factory=lambda: {
32
+ ".git",
33
+ ".hg",
34
+ ".svn",
35
+ ".venv",
36
+ "venv",
37
+ "__pycache__",
38
+ "node_modules",
39
+ "build",
40
+ "dist",
41
+ ".pytest_cache",
42
+ ".mypy_cache",
43
+ ".idea",
44
+ ".vscode",
45
+ }
46
+ )
47
+ exclude_globs: tuple[str, ...] = ()
48
+ max_file_size_bytes: int = 1_000_000
49
+ include_hidden: bool = False
50
+
51
+ def is_excluded_dir(self, directory_name: str) -> bool:
52
+ """Return True if a directory should be skipped during scanning."""
53
+ if not self.include_hidden and directory_name.startswith("."):
54
+ return True
55
+ return directory_name in self.exclude_dirs
56
+
57
+ def allows_extension(self, path: Path) -> bool:
58
+ """Return True when the file extension is in the allow-list."""
59
+ return path.suffix.lower() in self.include_extensions
60
+
61
+ def matches_exclude_glob(self, relative_path: Path) -> bool:
62
+ """Return True when the path matches any configured exclusion glob."""
63
+ return any(relative_path.match(pattern) for pattern in self.exclude_globs)
64
+
65
+ @classmethod
66
+ def from_extensions(
67
+ cls,
68
+ include_extensions: Iterable[str],
69
+ *,
70
+ max_file_size_bytes: int = 1_000_000,
71
+ include_hidden: bool = False,
72
+ ) -> "ScannerConfig":
73
+ """Build config from extension iterable."""
74
+ normalized = {
75
+ ext.lower() if ext.startswith(".") else f".{ext.lower()}"
76
+ for ext in include_extensions
77
+ }
78
+ return cls(
79
+ include_extensions=normalized,
80
+ max_file_size_bytes=max_file_size_bytes,
81
+ include_hidden=include_hidden,
82
+ )
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,170 @@
1
+ """Bootstrap GCIE-managed architecture artifacts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+
9
+ from .architecture_index import build_architecture_index, refresh_architecture_if_needed, write_architecture_index
10
+ from .architecture_parser import parse_architecture
11
+
12
+
13
+ _DEFAULT_DOC_CANDIDATES = [
14
+ "ARCHITECTURE.md",
15
+ "README.md",
16
+ "PROJECT.md",
17
+ "docs/architecture.md",
18
+ "docs/system_design.md",
19
+ "docs/design.md",
20
+ ]
21
+
22
+ _EXCLUDED_DIRS = {".git", ".gcie", ".venv", "node_modules", "__pycache__"}
23
+
24
+
25
+ def find_user_architecture_docs(repo_path: Path) -> list[Path]:
26
+ """Find likely user-managed architecture documents in the repo."""
27
+ docs = []
28
+ for candidate in _DEFAULT_DOC_CANDIDATES:
29
+ path = repo_path / candidate
30
+ if path.exists() and path.is_file():
31
+ docs.append(path)
32
+ return docs
33
+
34
+
35
+ def _summarize_docs(docs: list[tuple[Path, str]]) -> str:
36
+ if not docs:
37
+ return "No user-managed architecture docs were found."
38
+
39
+ lines = []
40
+ for path, content in docs:
41
+ excerpt = ""
42
+ for line in content.splitlines():
43
+ if line.strip():
44
+ excerpt = line.strip()
45
+ break
46
+ lines.append(f"- {path.as_posix()}: {excerpt[:120]}")
47
+ return "\n".join(lines)
48
+
49
+
50
+ def _discover_subsystems(repo_path: Path) -> list[tuple[str, list[str]]]:
51
+ subsystems = []
52
+ for child in repo_path.iterdir():
53
+ if not child.is_dir() or child.name in _EXCLUDED_DIRS:
54
+ continue
55
+ key_files = []
56
+ for path in child.rglob("*"):
57
+ if path.is_file() and path.suffix.lower() in {".py", ".js", ".jsx", ".ts", ".tsx"}:
58
+ key_files.append(path.relative_to(repo_path).as_posix())
59
+ if len(key_files) >= 5:
60
+ break
61
+ subsystems.append((child.name, key_files))
62
+ return subsystems
63
+
64
+
65
+ def _render_architecture(repo_path: Path, docs: list[tuple[Path, str]]) -> str:
66
+ summary = _summarize_docs(docs)
67
+ subsystems = _discover_subsystems(repo_path)
68
+ active_work = "\n".join(f"- {name}" for name, _ in subsystems) or "- core"
69
+
70
+ subsystem_blocks = []
71
+ for name, key_files in subsystems:
72
+ key_lines = "\n".join(f"- {path}" for path in key_files) or "- "
73
+ subsystem_blocks.append(
74
+ "\n".join(
75
+ [
76
+ f"### Subsystem: {name}",
77
+ "Purpose: ",
78
+ "Status: active",
79
+ "Key Files:",
80
+ key_lines,
81
+ "Interfaces:",
82
+ "- ",
83
+ "Depends On:",
84
+ "- ",
85
+ "Used By:",
86
+ "- ",
87
+ "Failure Modes:",
88
+ "- ",
89
+ "Notes:",
90
+ "- ",
91
+ ]
92
+ )
93
+ )
94
+
95
+ return "\n".join(
96
+ [
97
+ "# GCIE Architecture",
98
+ "",
99
+ "## Project Summary",
100
+ summary,
101
+ "",
102
+ "## System Stage",
103
+ "unknown",
104
+ "",
105
+ "## Global Constraints",
106
+ "",
107
+ "## Subsystems",
108
+ "",
109
+ "\n\n".join(subsystem_blocks) if subsystem_blocks else "### Subsystem: core\nPurpose: ",
110
+ "",
111
+ "## Data Flow",
112
+ "",
113
+ "## Entry Points",
114
+ "",
115
+ "## Active Work Areas",
116
+ active_work,
117
+ "",
118
+ "## Known Risks",
119
+ "",
120
+ ]
121
+ )
122
+
123
+
124
+ def _write_context_config(config_path: Path, *, architecture_source: str) -> dict:
125
+ config = {
126
+ "architecture_slicer_enabled": True,
127
+ "fallback_to_normal_on_low_confidence": True,
128
+ "confidence_threshold": 0.2,
129
+ "architecture_source": architecture_source,
130
+ "last_bootstrap_time": datetime.now(timezone.utc).isoformat(),
131
+ "last_architecture_update": None,
132
+ }
133
+ config_path.parent.mkdir(parents=True, exist_ok=True)
134
+ config_path.write_text(json.dumps(config, indent=2), encoding="utf-8")
135
+ return config
136
+
137
+
138
+ def ensure_initialized(repo_path: Path) -> dict:
139
+ """Ensure GCIE-managed architecture artifacts exist for the repo."""
140
+ gcie_dir = repo_path / ".gcie"
141
+ architecture_path = gcie_dir / "architecture.md"
142
+ index_path = gcie_dir / "architecture_index.json"
143
+ config_path = gcie_dir / "context_config.json"
144
+
145
+ config = None
146
+ if config_path.exists():
147
+ try:
148
+ config = json.loads(config_path.read_text(encoding="utf-8"))
149
+ except Exception:
150
+ config = None
151
+
152
+ if not architecture_path.exists():
153
+ docs = [(path, path.read_text(encoding="utf-8")) for path in find_user_architecture_docs(repo_path)]
154
+ architecture_text = _render_architecture(repo_path, docs)
155
+ gcie_dir.mkdir(parents=True, exist_ok=True)
156
+ architecture_path.write_text(architecture_text, encoding="utf-8")
157
+
158
+ if not index_path.exists() and architecture_path.exists():
159
+ parsed = parse_architecture(architecture_path.read_text(encoding="utf-8"))
160
+ index_data = build_architecture_index(parsed, repo_path)
161
+ write_architecture_index(index_path, index_data)
162
+
163
+ if config is None:
164
+ config = _write_context_config(config_path, architecture_source=architecture_path.as_posix())
165
+
166
+ if refresh_architecture_if_needed(repo_path, architecture_path, index_path):
167
+ config["last_architecture_update"] = datetime.now(timezone.utc).isoformat()
168
+ config_path.write_text(json.dumps(config, indent=2), encoding="utf-8")
169
+
170
+ return config or {}
@@ -0,0 +1,185 @@
1
+ """Build and maintain architecture index data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+
9
+ from .architecture_parser import ArchitectureDoc
10
+
11
+
12
+ _EXCLUDED_DIRS = {".git", ".gcie", ".venv", "node_modules", "__pycache__"}
13
+ _CODE_EXTENSIONS = {".py", ".pyi", ".js", ".jsx", ".ts", ".tsx"}
14
+ _CORE_HINTS = {
15
+ "router",
16
+ "routing",
17
+ "fallback",
18
+ "context",
19
+ "slicer",
20
+ "architecture",
21
+ "validation",
22
+ "mode",
23
+ "confidence",
24
+ }
25
+ _CORE_DIRS = {"context", "router", "routing"}
26
+ _CORE_EXCLUDED_DIRS = {"tests", "test"}
27
+
28
+
29
+ def compute_repo_fingerprint(repo_path: Path) -> dict:
30
+ """Compute a lightweight fingerprint for detecting structural changes."""
31
+ top_level_dirs = []
32
+ file_count = 0
33
+
34
+ for child in repo_path.iterdir():
35
+ if child.is_dir() and child.name not in _EXCLUDED_DIRS:
36
+ top_level_dirs.append(child.name)
37
+
38
+ for path in repo_path.rglob("*"):
39
+ if path.is_dir():
40
+ if path.name in _EXCLUDED_DIRS:
41
+ continue
42
+ if path.is_file() and path.suffix.lower() in _CODE_EXTENSIONS:
43
+ if any(part in _EXCLUDED_DIRS for part in path.parts):
44
+ continue
45
+ file_count += 1
46
+
47
+ return {
48
+ "top_level_dirs": sorted(top_level_dirs),
49
+ "code_file_count": file_count,
50
+ }
51
+
52
+
53
+ def _is_core_infrastructure(path: Path) -> bool:
54
+ lowered = path.as_posix().lower()
55
+ parts = {part.lower() for part in path.parts}
56
+ if parts & _CORE_EXCLUDED_DIRS:
57
+ return False
58
+ if parts & _CORE_DIRS:
59
+ return True
60
+ return any(hint in lowered for hint in _CORE_HINTS)
61
+
62
+
63
+ def _discover_core_infrastructure(repo_path: Path) -> list[str]:
64
+ core_files: list[str] = []
65
+ for path in repo_path.rglob("*"):
66
+ if path.is_dir():
67
+ if path.name in _EXCLUDED_DIRS:
68
+ continue
69
+ if not path.is_file() or path.suffix.lower() not in _CODE_EXTENSIONS:
70
+ continue
71
+ if any(part in _EXCLUDED_DIRS for part in path.parts):
72
+ continue
73
+ if _is_core_infrastructure(path):
74
+ core_files.append(path.relative_to(repo_path).as_posix())
75
+ return sorted(set(core_files))
76
+
77
+
78
+ def build_architecture_index(doc: ArchitectureDoc, repo_path: Path) -> dict:
79
+ """Build an index structure from parsed architecture data."""
80
+ subsystems = []
81
+ file_map: dict[str, list[str]] = {}
82
+
83
+ for subsystem in doc.subsystems or []:
84
+ subsystems.append(
85
+ {
86
+ "name": subsystem.name,
87
+ "purpose": subsystem.purpose,
88
+ "status": subsystem.status,
89
+ "key_files": subsystem.key_files or [],
90
+ "interfaces": subsystem.interfaces or [],
91
+ "depends_on": subsystem.depends_on or [],
92
+ "used_by": subsystem.used_by or [],
93
+ "failure_modes": subsystem.failure_modes or [],
94
+ "notes": subsystem.notes or [],
95
+ }
96
+ )
97
+ for path in subsystem.key_files or []:
98
+ file_map.setdefault(path, []).append(subsystem.name)
99
+
100
+ return {
101
+ "generated_at": datetime.now(timezone.utc).isoformat(),
102
+ "repo_fingerprint": compute_repo_fingerprint(repo_path),
103
+ "subsystems": subsystems,
104
+ "file_map": file_map,
105
+ "core_infrastructure": _discover_core_infrastructure(repo_path),
106
+ }
107
+
108
+
109
+ def write_architecture_index(path: Path, index_data: dict) -> None:
110
+ path.parent.mkdir(parents=True, exist_ok=True)
111
+ path.write_text(json.dumps(index_data, indent=2), encoding="utf-8")
112
+
113
+
114
+ def load_architecture_index(path: Path) -> dict | None:
115
+ if not path.exists():
116
+ return None
117
+ try:
118
+ return json.loads(path.read_text(encoding="utf-8"))
119
+ except Exception:
120
+ return None
121
+
122
+
123
+ def has_structural_change(repo_path: Path, index_data: dict) -> bool:
124
+ """Detect whether the repo has structural changes since last index."""
125
+ current = compute_repo_fingerprint(repo_path)
126
+ previous = index_data.get("repo_fingerprint", {})
127
+
128
+ if set(current.get("top_level_dirs", [])) != set(previous.get("top_level_dirs", [])):
129
+ return True
130
+
131
+ prev_count = previous.get("code_file_count", 0)
132
+ curr_count = current.get("code_file_count", 0)
133
+ if prev_count == 0:
134
+ return False
135
+ delta = abs(curr_count - prev_count) / max(prev_count, 1)
136
+ return delta >= 0.15
137
+
138
+
139
+ def _replace_section(text: str, section_title: str, new_body: str) -> str:
140
+ heading = f"## {section_title}"
141
+ if heading not in text:
142
+ return text.rstrip() + f"\n\n{heading}\n{new_body}\n"
143
+
144
+ parts = text.split(heading)
145
+ before = parts[0].rstrip()
146
+ after = heading.join(parts[1:])
147
+ remainder = after.split("\n## ", 1)
148
+ tail = ""
149
+ if len(remainder) == 2:
150
+ tail = "\n## " + remainder[1]
151
+ return f"{before}\n\n{heading}\n{new_body}\n{tail}".strip() + "\n"
152
+
153
+
154
+ def refresh_architecture_if_needed(
155
+ repo_path: Path,
156
+ architecture_path: Path,
157
+ index_path: Path,
158
+ ) -> bool:
159
+ """Refresh architecture artifacts when structural changes are detected."""
160
+ index_data = load_architecture_index(index_path)
161
+ if index_data is None:
162
+ return False
163
+
164
+ if not has_structural_change(repo_path, index_data):
165
+ if not index_data.get("core_infrastructure"):
166
+ index_data["core_infrastructure"] = _discover_core_infrastructure(repo_path)
167
+ index_data["generated_at"] = datetime.now(timezone.utc).isoformat()
168
+ write_architecture_index(index_path, index_data)
169
+ return True
170
+ return False
171
+
172
+ if architecture_path.exists():
173
+ text = architecture_path.read_text(encoding="utf-8")
174
+ fingerprint = compute_repo_fingerprint(repo_path)
175
+ active = "\n".join(f"- {name}" for name in fingerprint.get("top_level_dirs", []))
176
+ updated = _replace_section(text, "Active Work Areas", active)
177
+ architecture_path.write_text(updated, encoding="utf-8")
178
+
179
+ index_data["repo_fingerprint"] = compute_repo_fingerprint(repo_path)
180
+ index_data["generated_at"] = datetime.now(timezone.utc).isoformat()
181
+ index_data["core_infrastructure"] = _discover_core_infrastructure(repo_path)
182
+ write_architecture_index(index_path, index_data)
183
+ return True
184
+
185
+
@@ -0,0 +1,170 @@
1
+ """Parse GCIE-managed architecture files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class Subsystem:
10
+ name: str
11
+ purpose: str = ""
12
+ status: str = ""
13
+ key_files: list[str] | None = None
14
+ interfaces: list[str] | None = None
15
+ depends_on: list[str] | None = None
16
+ used_by: list[str] | None = None
17
+ failure_modes: list[str] | None = None
18
+ notes: list[str] | None = None
19
+
20
+
21
+ @dataclass
22
+ class ArchitectureDoc:
23
+ project_summary: str = ""
24
+ system_stage: str = ""
25
+ global_constraints: str = ""
26
+ subsystems: list[Subsystem] | None = None
27
+ data_flow: str = ""
28
+ entry_points: str = ""
29
+ active_work_areas: str = ""
30
+ known_risks: str = ""
31
+
32
+
33
+ _LIST_FIELDS = {
34
+ "Key Files": "key_files",
35
+ "Interfaces": "interfaces",
36
+ "Depends On": "depends_on",
37
+ "Used By": "used_by",
38
+ "Failure Modes": "failure_modes",
39
+ "Notes": "notes",
40
+ }
41
+
42
+ _REQUIRED_SECTIONS = {
43
+ "Project Summary",
44
+ "System Stage",
45
+ "Global Constraints",
46
+ "Subsystems",
47
+ "Data Flow",
48
+ "Entry Points",
49
+ "Active Work Areas",
50
+ "Known Risks",
51
+ }
52
+
53
+
54
+ class ArchitectureParseError(ValueError):
55
+ """Raised when a GCIE architecture document is malformed."""
56
+
57
+
58
+ def parse_architecture(text: str) -> ArchitectureDoc:
59
+ """Parse a GCIE architecture.md file into a structured object.
60
+
61
+ Raises ArchitectureParseError when required sections are missing.
62
+ """
63
+ lines = text.splitlines()
64
+
65
+ if not lines or not lines[0].strip().startswith("# GCIE Architecture"):
66
+ raise ArchitectureParseError("missing_header")
67
+
68
+ subsystems: list[Subsystem] = []
69
+
70
+ project_summary = ""
71
+ system_stage = ""
72
+ global_constraints = ""
73
+ data_flow = ""
74
+ entry_points = ""
75
+ active_work_areas = ""
76
+ known_risks = ""
77
+
78
+ seen_sections: set[str] = set()
79
+ current_section = ""
80
+ current_subsystem: Subsystem | None = None
81
+ current_list_field: str | None = None
82
+ buffer: list[str] = []
83
+
84
+ def flush_section() -> str:
85
+ content = "\n".join(line.strip() for line in buffer if line.strip())
86
+ buffer.clear()
87
+ return content
88
+
89
+ def commit_subsystem() -> None:
90
+ nonlocal current_subsystem
91
+ if current_subsystem is not None:
92
+ subsystems.append(current_subsystem)
93
+ current_subsystem = None
94
+
95
+ for line in lines[1:]:
96
+ stripped = line.strip()
97
+ if stripped.startswith("## "):
98
+ if current_subsystem is None and current_section:
99
+ content = flush_section()
100
+ if current_section == "Project Summary":
101
+ project_summary = content
102
+ elif current_section == "System Stage":
103
+ system_stage = content
104
+ elif current_section == "Global Constraints":
105
+ global_constraints = content
106
+ elif current_section == "Data Flow":
107
+ data_flow = content
108
+ elif current_section == "Entry Points":
109
+ entry_points = content
110
+ elif current_section == "Active Work Areas":
111
+ active_work_areas = content
112
+ elif current_section == "Known Risks":
113
+ known_risks = content
114
+ current_section = stripped[len("## ") :]
115
+ seen_sections.add(current_section)
116
+ current_list_field = None
117
+ continue
118
+
119
+ if stripped.startswith("### Subsystem:"):
120
+ commit_subsystem()
121
+ name = stripped.split(":", 1)[1].strip()
122
+ if not name:
123
+ raise ArchitectureParseError("subsystem_missing_name")
124
+ current_subsystem = Subsystem(name=name)
125
+ current_list_field = None
126
+ continue
127
+
128
+ if current_subsystem is not None:
129
+ if stripped.endswith(":") and stripped[:-1] in _LIST_FIELDS:
130
+ current_list_field = _LIST_FIELDS[stripped[:-1]]
131
+ setattr(current_subsystem, current_list_field, [])
132
+ continue
133
+
134
+ if stripped.startswith("Purpose:"):
135
+ current_subsystem.purpose = stripped.split(":", 1)[1].strip()
136
+ current_list_field = None
137
+ continue
138
+
139
+ if stripped.startswith("Status:"):
140
+ current_subsystem.status = stripped.split(":", 1)[1].strip()
141
+ current_list_field = None
142
+ continue
143
+
144
+ if stripped.startswith("-") and current_list_field:
145
+ value = stripped.lstrip("- ").strip()
146
+ if value:
147
+ target = getattr(current_subsystem, current_list_field)
148
+ if target is not None:
149
+ target.append(value)
150
+ continue
151
+
152
+ if current_section and current_subsystem is None:
153
+ buffer.append(line)
154
+
155
+ commit_subsystem()
156
+
157
+ missing_sections = _REQUIRED_SECTIONS - seen_sections
158
+ if missing_sections:
159
+ raise ArchitectureParseError("missing_sections")
160
+
161
+ return ArchitectureDoc(
162
+ project_summary=project_summary,
163
+ system_stage=system_stage,
164
+ global_constraints=global_constraints,
165
+ subsystems=subsystems,
166
+ data_flow=data_flow,
167
+ entry_points=entry_points,
168
+ active_work_areas=active_work_areas,
169
+ known_risks=known_risks,
170
+ )