atomadic-forge 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. atomadic_forge/__init__.py +12 -0
  2. atomadic_forge/__main__.py +5 -0
  3. atomadic_forge/a0_qk_constants/__init__.py +1 -0
  4. atomadic_forge/a0_qk_constants/agent_plan_schema.py +120 -0
  5. atomadic_forge/a0_qk_constants/commandsmith_types.py +49 -0
  6. atomadic_forge/a0_qk_constants/config_defaults.py +38 -0
  7. atomadic_forge/a0_qk_constants/emergent_types.py +77 -0
  8. atomadic_forge/a0_qk_constants/error_codes.py +296 -0
  9. atomadic_forge/a0_qk_constants/forge_types.py +89 -0
  10. atomadic_forge/a0_qk_constants/gen_language.py +116 -0
  11. atomadic_forge/a0_qk_constants/lang_extensions.py +150 -0
  12. atomadic_forge/a0_qk_constants/policy_schema.py +48 -0
  13. atomadic_forge/a0_qk_constants/receipt_schema.py +311 -0
  14. atomadic_forge/a0_qk_constants/roi_constants.py +96 -0
  15. atomadic_forge/a0_qk_constants/semantic_types.py +61 -0
  16. atomadic_forge/a0_qk_constants/sidecar_schema.py +81 -0
  17. atomadic_forge/a0_qk_constants/synergy_types.py +62 -0
  18. atomadic_forge/a0_qk_constants/tier_names.py +47 -0
  19. atomadic_forge/a1_at_functions/__init__.py +1 -0
  20. atomadic_forge/a1_at_functions/agent_context_pack.py +193 -0
  21. atomadic_forge/a1_at_functions/agent_memory.py +139 -0
  22. atomadic_forge/a1_at_functions/agent_plan_emitter.py +324 -0
  23. atomadic_forge/a1_at_functions/agent_summary.py +277 -0
  24. atomadic_forge/a1_at_functions/body_extractor.py +306 -0
  25. atomadic_forge/a1_at_functions/card_renderer.py +210 -0
  26. atomadic_forge/a1_at_functions/certify_checks.py +445 -0
  27. atomadic_forge/a1_at_functions/chat_context.py +170 -0
  28. atomadic_forge/a1_at_functions/cherry_pick.py +71 -0
  29. atomadic_forge/a1_at_functions/classify_tier.py +115 -0
  30. atomadic_forge/a1_at_functions/commandsmith_discover.py +167 -0
  31. atomadic_forge/a1_at_functions/commandsmith_render.py +267 -0
  32. atomadic_forge/a1_at_functions/compiler_feedback.py +94 -0
  33. atomadic_forge/a1_at_functions/compliance_checker.py +228 -0
  34. atomadic_forge/a1_at_functions/config_io.py +68 -0
  35. atomadic_forge/a1_at_functions/cs1_renderer.py +588 -0
  36. atomadic_forge/a1_at_functions/doc_synthesizer.py +205 -0
  37. atomadic_forge/a1_at_functions/emergent_compose.py +192 -0
  38. atomadic_forge/a1_at_functions/emergent_rank.py +116 -0
  39. atomadic_forge/a1_at_functions/emergent_signature_extract.py +242 -0
  40. atomadic_forge/a1_at_functions/emergent_synthesize.py +88 -0
  41. atomadic_forge/a1_at_functions/enforce_planner.py +208 -0
  42. atomadic_forge/a1_at_functions/error_hints.py +105 -0
  43. atomadic_forge/a1_at_functions/evolution_log.py +94 -0
  44. atomadic_forge/a1_at_functions/forge_feedback.py +433 -0
  45. atomadic_forge/a1_at_functions/generation_quality.py +322 -0
  46. atomadic_forge/a1_at_functions/import_repair.py +211 -0
  47. atomadic_forge/a1_at_functions/import_smoke.py +102 -0
  48. atomadic_forge/a1_at_functions/js_parser.py +539 -0
  49. atomadic_forge/a1_at_functions/lineage_chain.py +144 -0
  50. atomadic_forge/a1_at_functions/lineage_reader.py +107 -0
  51. atomadic_forge/a1_at_functions/llm_client.py +554 -0
  52. atomadic_forge/a1_at_functions/local_signer.py +134 -0
  53. atomadic_forge/a1_at_functions/lsp_protocol.py +379 -0
  54. atomadic_forge/a1_at_functions/manifest_diff.py +314 -0
  55. atomadic_forge/a1_at_functions/mcp_protocol.py +1066 -0
  56. atomadic_forge/a1_at_functions/patch_scorer.py +267 -0
  57. atomadic_forge/a1_at_functions/plan_adapter.py +75 -0
  58. atomadic_forge/a1_at_functions/policy_loader.py +107 -0
  59. atomadic_forge/a1_at_functions/preflight_change.py +227 -0
  60. atomadic_forge/a1_at_functions/progress_reporter.py +81 -0
  61. atomadic_forge/a1_at_functions/provider_detect.py +157 -0
  62. atomadic_forge/a1_at_functions/provider_resolver.py +48 -0
  63. atomadic_forge/a1_at_functions/receipt_emitter.py +291 -0
  64. atomadic_forge/a1_at_functions/recipes.py +186 -0
  65. atomadic_forge/a1_at_functions/repo_explainer.py +124 -0
  66. atomadic_forge/a1_at_functions/roi_calculator.py +265 -0
  67. atomadic_forge/a1_at_functions/rollback_planner.py +147 -0
  68. atomadic_forge/a1_at_functions/sbom_emitter.py +155 -0
  69. atomadic_forge/a1_at_functions/scaffold_js.py +55 -0
  70. atomadic_forge/a1_at_functions/scaffold_pyproject.py +62 -0
  71. atomadic_forge/a1_at_functions/scaffold_starter.py +94 -0
  72. atomadic_forge/a1_at_functions/scout_walk.py +309 -0
  73. atomadic_forge/a1_at_functions/sidecar_parser.py +161 -0
  74. atomadic_forge/a1_at_functions/sidecar_validator.py +202 -0
  75. atomadic_forge/a1_at_functions/stub_detector.py +158 -0
  76. atomadic_forge/a1_at_functions/synergy_detect.py +166 -0
  77. atomadic_forge/a1_at_functions/synergy_render.py +252 -0
  78. atomadic_forge/a1_at_functions/synergy_surface_extract.py +163 -0
  79. atomadic_forge/a1_at_functions/test_runner.py +196 -0
  80. atomadic_forge/a1_at_functions/test_selector.py +122 -0
  81. atomadic_forge/a1_at_functions/tier_init_rebuild.py +122 -0
  82. atomadic_forge/a1_at_functions/tool_composer.py +130 -0
  83. atomadic_forge/a1_at_functions/transcript_log.py +70 -0
  84. atomadic_forge/a1_at_functions/wire_check.py +260 -0
  85. atomadic_forge/a2_mo_composites/__init__.py +1 -0
  86. atomadic_forge/a2_mo_composites/lineage_chain_store.py +122 -0
  87. atomadic_forge/a2_mo_composites/manifest_store.py +46 -0
  88. atomadic_forge/a2_mo_composites/plan_store.py +164 -0
  89. atomadic_forge/a2_mo_composites/receipt_signer.py +231 -0
  90. atomadic_forge/a3_og_features/__init__.py +1 -0
  91. atomadic_forge/a3_og_features/commandsmith_feature.py +267 -0
  92. atomadic_forge/a3_og_features/demo_packages/mixed_py_js/src/mixed_pkg/__init__.py +3 -0
  93. atomadic_forge/a3_og_features/demo_packages/mixed_py_js/src/mixed_pkg/a0_qk_constants/__init__.py +4 -0
  94. atomadic_forge/a3_og_features/demo_packages/mixed_py_js/src/mixed_pkg/a1_at_functions/__init__.py +14 -0
  95. atomadic_forge/a3_og_features/demo_packages/mixed_py_js/tests/conftest.py +10 -0
  96. atomadic_forge/a3_og_features/demo_packages/mixed_py_js/tests/test_mixed.py +18 -0
  97. atomadic_forge/a3_og_features/demo_runner.py +502 -0
  98. atomadic_forge/a3_og_features/emergent_feature.py +95 -0
  99. atomadic_forge/a3_og_features/emergent_pipeline_integration.py +154 -0
  100. atomadic_forge/a3_og_features/forge_enforce.py +107 -0
  101. atomadic_forge/a3_og_features/forge_evolve.py +176 -0
  102. atomadic_forge/a3_og_features/forge_loop.py +528 -0
  103. atomadic_forge/a3_og_features/forge_pipeline.py +295 -0
  104. atomadic_forge/a3_og_features/forge_plan_apply.py +222 -0
  105. atomadic_forge/a3_og_features/lsp_server.py +98 -0
  106. atomadic_forge/a3_og_features/mcp_server.py +160 -0
  107. atomadic_forge/a3_og_features/setup_wizard.py +337 -0
  108. atomadic_forge/a3_og_features/synergy_feature.py +65 -0
  109. atomadic_forge/a4_sy_orchestration/__init__.py +1 -0
  110. atomadic_forge/a4_sy_orchestration/cli.py +1284 -0
  111. atomadic_forge/commands/__init__.py +1 -0
  112. atomadic_forge/commands/_registry.py +36 -0
  113. atomadic_forge/commands/audit.py +142 -0
  114. atomadic_forge/commands/chat.py +133 -0
  115. atomadic_forge/commands/commandsmith.py +178 -0
  116. atomadic_forge/commands/config_cmd.py +145 -0
  117. atomadic_forge/commands/demo.py +142 -0
  118. atomadic_forge/commands/emergent.py +124 -0
  119. atomadic_forge/commands/emergent_then_synergy.py +70 -0
  120. atomadic_forge/commands/evolve.py +122 -0
  121. atomadic_forge/commands/evolve_then_iterate.py +70 -0
  122. atomadic_forge/commands/feature_then_emergent.py +111 -0
  123. atomadic_forge/commands/iterate.py +140 -0
  124. atomadic_forge/commands/synergy.py +96 -0
  125. atomadic_forge/commands/synergy_then_emergent.py +70 -0
  126. atomadic_forge-0.3.2.dist-info/METADATA +471 -0
  127. atomadic_forge-0.3.2.dist-info/RECORD +131 -0
  128. atomadic_forge-0.3.2.dist-info/WHEEL +5 -0
  129. atomadic_forge-0.3.2.dist-info/entry_points.txt +3 -0
  130. atomadic_forge-0.3.2.dist-info/licenses/LICENSE +15 -0
  131. atomadic_forge-0.3.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,309 @@
1
+ """Tier a1 — pure repo walker + symbol harvester for the scout phase.
2
+
3
+ Walks Python AND JavaScript / TypeScript. Each file is classified into a
4
+ monadic tier and reduced to a list of ``symbols`` with the same shape across
5
+ languages so downstream stages (cherry, finalize, certify) work polyglot.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import ast
11
+ from collections.abc import Callable, Iterable
12
+ from pathlib import Path
13
+
14
+ from ..a0_qk_constants.lang_extensions import (
15
+ ALL_SOURCE_EXTS,
16
+ IGNORED_DIRS,
17
+ JAVASCRIPT_EXTS,
18
+ PYTHON_EXTS,
19
+ TYPESCRIPT_EXTS,
20
+ file_class_for_path,
21
+ path_parts_contain_ignored_dir,
22
+ )
23
+ from .body_extractor import _detect_state_markers
24
+ from .classify_tier import classify_tier, detect_effects
25
+ from .js_parser import classify_js_tier, detect_js_effects, parse_surface
26
+
27
+ # Backwards-compatible alias — keep _SKIP_DIRS available for any third-party
28
+ # code that imports it. The canonical list lives in lang_extensions.IGNORED_DIRS.
29
+ _SKIP_DIRS = IGNORED_DIRS
30
+
31
+
32
+ def _under_skip_dir(rel_parts: tuple[str, ...]) -> bool:
33
+ """Return True if any segment of the path is an ignored directory.
34
+
35
+ A leading-dot segment is only treated as ignored when it matches an
36
+ entry in IGNORED_DIRS (e.g. ``.github``, ``.venv``). Application
37
+ folders that legitimately start with a dot (none today, but keeping
38
+ the door open) won't be skipped just for the leading dot — only for
39
+ being on the explicit list.
40
+ """
41
+ return path_parts_contain_ignored_dir(rel_parts)
42
+
43
+
44
+ def iter_python_files(root: Path) -> Iterable[Path]:
45
+ root = root.resolve()
46
+ for p in root.rglob("*.py"):
47
+ if _under_skip_dir(p.relative_to(root).parts):
48
+ continue
49
+ if p.name.startswith("_"):
50
+ continue
51
+ yield p
52
+
53
+
54
+ def iter_source_files(root: Path) -> Iterable[Path]:
55
+ """Yield every Python / JS / TS file under ``root`` we want to classify.
56
+
57
+ Filters out vendored / build / cache directories. Hidden filenames
58
+ starting with ``_`` (Python convention) are skipped, but JS files
59
+ starting with ``_`` are kept — the underscore is meaningless in JS.
60
+ """
61
+ root = root.resolve()
62
+ for p in root.rglob("*"):
63
+ if not p.is_file():
64
+ continue
65
+ suffix = p.suffix.lower()
66
+ if suffix not in ALL_SOURCE_EXTS:
67
+ continue
68
+ rel_parts = p.relative_to(root).parts
69
+ if _under_skip_dir(rel_parts):
70
+ continue
71
+ if suffix in PYTHON_EXTS and p.name.startswith("_"):
72
+ continue
73
+ yield p
74
+
75
+
76
+ def _harvest_python_file(f: Path, rel: str, *, symbols: list[dict],
77
+ tier_dist: dict[str, int],
78
+ effect_dist: dict[str, int]) -> None:
79
+ try:
80
+ text = f.read_text(encoding="utf-8", errors="replace")
81
+ tree = ast.parse(text, filename=str(f))
82
+ except (SyntaxError, OSError):
83
+ return
84
+ for node in tree.body:
85
+ if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef):
86
+ _collect_symbol(symbols, node, rel, kind="function",
87
+ qualname=node.name, tier_dist=tier_dist,
88
+ effect_dist=effect_dist)
89
+ elif isinstance(node, ast.ClassDef) and not node.name.startswith("_"):
90
+ self_assign, class_collect = _detect_state_markers(node)
91
+ _collect_symbol(symbols, node, rel, kind="class",
92
+ qualname=node.name, tier_dist=tier_dist,
93
+ effect_dist=effect_dist,
94
+ body_signals={
95
+ "has_self_assign": self_assign,
96
+ "has_class_attr_collections": class_collect,
97
+ })
98
+ for sub in node.body:
99
+ if isinstance(sub, ast.FunctionDef | ast.AsyncFunctionDef):
100
+ if sub.name.startswith("_") and sub.name != "__init__":
101
+ continue
102
+ _collect_symbol(symbols, sub, rel, kind="method",
103
+ qualname=f"{node.name}.{sub.name}",
104
+ tier_dist=tier_dist,
105
+ effect_dist=effect_dist)
106
+
107
+
108
+ def _harvest_js_file(f: Path, rel: str, language: str, *, symbols: list[dict],
109
+ tier_dist: dict[str, int],
110
+ effect_dist: dict[str, int]) -> None:
111
+ try:
112
+ text = f.read_text(encoding="utf-8", errors="replace")
113
+ except OSError:
114
+ return
115
+ surface = parse_surface(text)
116
+ file_tier = classify_js_tier(path=rel, surface=surface)
117
+ file_effects = detect_js_effects(text)
118
+
119
+ # Track the file itself as a symbol so even an empty-export module
120
+ # (e.g. an HTML-glue static page) shows up in scout output.
121
+ file_record = {
122
+ "name": f.name,
123
+ "qualname": f.stem,
124
+ "kind": "module",
125
+ "file": rel,
126
+ "lineno": 1,
127
+ "tier_guess": file_tier,
128
+ "suggested_tier": file_tier,
129
+ "effects": file_effects,
130
+ "complexity": surface.statement_count,
131
+ "has_self_assign": False,
132
+ "language": language,
133
+ "exports": surface.all_exports,
134
+ "imports": surface.imports,
135
+ }
136
+ symbols.append(file_record)
137
+ tier_dist[file_tier] = tier_dist.get(file_tier, 0) + 1
138
+ for e in file_effects:
139
+ if e in effect_dist:
140
+ effect_dist[e] += 1
141
+
142
+ # Also surface each named export as its own symbol so cherry-pick + emergent
143
+ # treat JS like Python: pick by qualname.
144
+ for name in surface.exported_functions:
145
+ _push_js_symbol(symbols, name, "function", rel, file_tier,
146
+ language, ["pure"], tier_dist, effect_dist)
147
+ for name in surface.exported_classes:
148
+ _push_js_symbol(symbols, name, "class", rel, file_tier,
149
+ language, ["state"], tier_dist, effect_dist)
150
+ for name in surface.exported_consts:
151
+ _push_js_symbol(symbols, name, "const", rel, file_tier,
152
+ language, ["pure"], tier_dist, effect_dist)
153
+
154
+
155
+ def _push_js_symbol(symbols: list[dict], name: str, kind: str, rel: str,
156
+ tier: str, language: str, effects: list[str],
157
+ tier_dist: dict[str, int],
158
+ effect_dist: dict[str, int]) -> None:
159
+ rec = {
160
+ "name": name,
161
+ "qualname": name,
162
+ "kind": kind,
163
+ "file": rel,
164
+ "lineno": 0,
165
+ "tier_guess": tier,
166
+ "suggested_tier": tier,
167
+ "effects": effects,
168
+ "complexity": 0,
169
+ "has_self_assign": False,
170
+ "language": language,
171
+ }
172
+ symbols.append(rec)
173
+ tier_dist[tier] = tier_dist.get(tier, 0) + 1
174
+ for e in effects:
175
+ if e in effect_dist:
176
+ effect_dist[e] += 1
177
+
178
+
179
+ def _file_class_counts(root: Path) -> dict[str, int]:
180
+ """Count every file under ``root`` by class (source / docs / config /
181
+ asset / other), respecting IGNORED_DIRS. Used by harvest_repo and
182
+ by certify so non-source files don't harsh the layout score."""
183
+ counts = {"source": 0, "documentation": 0, "config": 0, "asset": 0, "other": 0}
184
+ for p in root.rglob("*"):
185
+ if not p.is_file():
186
+ continue
187
+ rel_parts = p.relative_to(root).parts
188
+ if _under_skip_dir(rel_parts):
189
+ continue
190
+ cls = file_class_for_path(p.as_posix())
191
+ counts[cls] = counts.get(cls, 0) + 1
192
+ return counts
193
+
194
+
195
+ def harvest_repo(
196
+ root: Path,
197
+ *,
198
+ progress: Callable[[int, int, str], None] | None = None,
199
+ ) -> dict:
200
+ """Walk a repo, classify every public symbol, return a scout-shaped dict.
201
+
202
+ ``progress`` (optional): a callback invoked once per source file as
203
+ ``progress(processed_count, total_count, relative_path)``. Pure
204
+ function — no I/O of its own. The CLI layer wires this to a stderr
205
+ reporter; tests can pass a list-appender.
206
+ """
207
+ root = Path(root).resolve()
208
+ src_files = list(iter_source_files(root))
209
+ total = len(src_files)
210
+ file_class_counts = _file_class_counts(root)
211
+ symbols: list[dict] = []
212
+ tier_dist: dict[str, int] = {}
213
+ effect_dist: dict[str, int] = {"pure": 0, "state": 0, "io": 0}
214
+
215
+ py_count = 0
216
+ js_count = 0
217
+ ts_count = 0
218
+
219
+ for idx, f in enumerate(src_files, start=1):
220
+ rel = f.relative_to(root).as_posix()
221
+ suffix = f.suffix.lower()
222
+ if suffix in PYTHON_EXTS:
223
+ py_count += 1
224
+ _harvest_python_file(f, rel, symbols=symbols,
225
+ tier_dist=tier_dist,
226
+ effect_dist=effect_dist)
227
+ elif suffix in JAVASCRIPT_EXTS:
228
+ js_count += 1
229
+ _harvest_js_file(f, rel, "javascript", symbols=symbols,
230
+ tier_dist=tier_dist,
231
+ effect_dist=effect_dist)
232
+ elif suffix in TYPESCRIPT_EXTS:
233
+ ts_count += 1
234
+ _harvest_js_file(f, rel, "typescript", symbols=symbols,
235
+ tier_dist=tier_dist,
236
+ effect_dist=effect_dist)
237
+ if progress is not None:
238
+ progress(idx, total, rel)
239
+
240
+ languages = {
241
+ "python": py_count,
242
+ "javascript": js_count,
243
+ "typescript": ts_count,
244
+ }
245
+ primary = max(languages, key=lambda k: languages[k]) if any(languages.values()) else "python"
246
+
247
+ recommendations: list[str] = []
248
+ if tier_dist.get("a4_sy_orchestration", 0) > tier_dist.get("a1_at_functions", 0):
249
+ recommendations.append("Top-heavy at a4 — extract pure helpers into a1.")
250
+ total = sum(effect_dist.values()) or 1
251
+ if effect_dist["io"] / total > 0.3:
252
+ recommendations.append("High I/O ratio — consider pushing I/O to a4 boundaries.")
253
+ if tier_dist.get("a1_at_functions", 0) == 0 and symbols:
254
+ recommendations.append("No pure functions detected — extract validators/parsers.")
255
+ if js_count + ts_count > 0 and not any(
256
+ f"/{t}/" in s["file"] or s["file"].startswith(f"{t}/")
257
+ for s in symbols if s.get("language") in ("javascript", "typescript")
258
+ for t in ("a0_qk_constants", "a1_at_functions", "a2_mo_composites",
259
+ "a3_og_features", "a4_sy_orchestration")
260
+ ):
261
+ recommendations.append(
262
+ "JS/TS files are not yet split into aN_* tier directories — "
263
+ "see suggested_tier per file in symbols[]."
264
+ )
265
+
266
+ return {
267
+ "schema_version": "atomadic-forge.scout/v1",
268
+ "repo": str(root),
269
+ # `file_count` is the raw rglob walk (legacy) — matches v1 callers.
270
+ "file_count": len(list(root.rglob("*"))),
271
+ # `file_class_counts` excludes IGNORED_DIRS and breaks files into
272
+ # source / documentation / config / asset / other. Tier-layout
273
+ # scoring should use this, not the raw walk.
274
+ "file_class_counts": file_class_counts,
275
+ "python_file_count": py_count,
276
+ "javascript_file_count": js_count,
277
+ "typescript_file_count": ts_count,
278
+ "language_distribution": languages,
279
+ "primary_language": primary,
280
+ "symbol_count": len(symbols),
281
+ "tier_distribution": tier_dist,
282
+ "effect_distribution": effect_dist,
283
+ "symbols": symbols,
284
+ "recommendations": recommendations,
285
+ }
286
+
287
+
288
+ def _collect_symbol(symbols: list, node, rel_path: str, *, kind: str,
289
+ qualname: str, tier_dist: dict, effect_dist: dict,
290
+ body_signals: dict | None = None) -> None:
291
+ effects = detect_effects(node) if not isinstance(node, ast.ClassDef) else ["pure"]
292
+ tier = classify_tier(name=qualname, kind=kind, path=rel_path,
293
+ body_signals=body_signals)
294
+ rec = {
295
+ "name": getattr(node, "name", qualname),
296
+ "qualname": qualname,
297
+ "kind": kind,
298
+ "file": rel_path,
299
+ "lineno": getattr(node, "lineno", 0),
300
+ "tier_guess": tier,
301
+ "effects": effects,
302
+ "complexity": len(ast.dump(node)),
303
+ "has_self_assign": bool(body_signals and body_signals.get("has_self_assign")),
304
+ }
305
+ symbols.append(rec)
306
+ tier_dist[tier] = tier_dist.get(tier, 0) + 1
307
+ for e in effects:
308
+ if e in effect_dist:
309
+ effect_dist[e] += 1
@@ -0,0 +1,161 @@
1
+ """Tier a1 — pure .forge sidecar parser (Lane D W8).
2
+
3
+ Reads a YAML sidecar file and returns a structured ``SidecarFile``
4
+ dict. Validates required fields + effect-kind enum membership.
5
+ Pure: one bounded read; never raises on unknown fields (preserved
6
+ in ``extra``).
7
+
8
+ Lane D W11 will add the cross-validator (compares the sidecar's
9
+ declared effects against the source AST). Lane D W20 will dispatch
10
+ the ``proves:`` clauses through the Lean4 obligation discharger.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from pathlib import Path
15
+ from typing import TypedDict
16
+
17
+ import yaml
18
+
19
+ from ..a0_qk_constants.sidecar_schema import (
20
+ REQUIRED_SIDECAR_FIELDS,
21
+ REQUIRED_SYMBOL_FIELDS,
22
+ SCHEMA_VERSION_SIDECAR_V1,
23
+ VALID_EFFECTS,
24
+ SidecarFile,
25
+ SidecarSymbol,
26
+ )
27
+
28
+
29
+ class SidecarParseError(ValueError):
30
+ """Raised when a sidecar file is malformed beyond soft recovery."""
31
+
32
+
33
+ class ParseResult(TypedDict, total=False):
34
+ schema_version: str
35
+ sidecar: SidecarFile | None
36
+ errors: list[str]
37
+ warnings: list[str]
38
+
39
+
40
+ def parse_sidecar_text(text: str, *, source: str = "<inline>") -> ParseResult:
41
+ """Parse a YAML sidecar string and return a structured result.
42
+
43
+ Returns a ParseResult with sidecar=None + populated errors when
44
+ the document is unrecoverable; otherwise sidecar is the typed
45
+ dict and warnings list any soft issues (unknown effect kinds
46
+ are downgraded to warnings, not errors).
47
+ """
48
+ out: ParseResult = {
49
+ "schema_version": SCHEMA_VERSION_SIDECAR_V1,
50
+ "sidecar": None,
51
+ "errors": [],
52
+ "warnings": [],
53
+ }
54
+ try:
55
+ data = yaml.safe_load(text) or {}
56
+ except yaml.YAMLError as exc:
57
+ out["errors"].append(f"YAML parse error in {source}: {exc}")
58
+ return out
59
+ if not isinstance(data, dict):
60
+ out["errors"].append(
61
+ f"{source}: top-level must be a mapping, got "
62
+ f"{type(data).__name__}"
63
+ )
64
+ return out
65
+ for f in REQUIRED_SIDECAR_FIELDS:
66
+ if f not in data:
67
+ out["errors"].append(f"{source}: missing required field {f!r}")
68
+ declared_schema = data.get("schema_version", "")
69
+ if declared_schema and declared_schema != SCHEMA_VERSION_SIDECAR_V1:
70
+ out["warnings"].append(
71
+ f"{source}: declares schema_version={declared_schema!r}; "
72
+ f"expected {SCHEMA_VERSION_SIDECAR_V1!r}"
73
+ )
74
+ if out["errors"]:
75
+ return out
76
+
77
+ raw_symbols = data.get("symbols") or []
78
+ if not isinstance(raw_symbols, list):
79
+ out["errors"].append(
80
+ f"{source}: 'symbols' must be a list, got "
81
+ f"{type(raw_symbols).__name__}"
82
+ )
83
+ return out
84
+ parsed_symbols: list[SidecarSymbol] = []
85
+ for i, raw in enumerate(raw_symbols):
86
+ if not isinstance(raw, dict):
87
+ out["errors"].append(
88
+ f"{source}: symbols[{i}] must be a mapping"
89
+ )
90
+ continue
91
+ for f in REQUIRED_SYMBOL_FIELDS:
92
+ if f not in raw:
93
+ out["errors"].append(
94
+ f"{source}: symbols[{i}] missing required {f!r}"
95
+ )
96
+ if not isinstance(raw.get("name", ""), str):
97
+ out["errors"].append(
98
+ f"{source}: symbols[{i}].name must be a string"
99
+ )
100
+ continue
101
+ effect = str(raw.get("effect", ""))
102
+ if effect and effect not in VALID_EFFECTS:
103
+ out["warnings"].append(
104
+ f"{source}: symbols[{i}] effect={effect!r} not in "
105
+ f"VALID_EFFECTS — preserved as-is for forward-compat"
106
+ )
107
+ sym = SidecarSymbol(name=str(raw["name"]),
108
+ effect=effect) # type: ignore[typeddict-item]
109
+ if isinstance(raw.get("compose_with"), list):
110
+ sym["compose_with"] = [str(s) for s in raw["compose_with"]]
111
+ if isinstance(raw.get("proves"), list):
112
+ sym["proves"] = [str(s) for s in raw["proves"]]
113
+ if isinstance(raw.get("tier"), str):
114
+ sym["tier"] = raw["tier"]
115
+ if isinstance(raw.get("notes"), list):
116
+ sym["notes"] = [str(s) for s in raw["notes"]]
117
+ parsed_symbols.append(sym)
118
+
119
+ if out["errors"]:
120
+ return out
121
+
122
+ sidecar: SidecarFile = SidecarFile(
123
+ schema_version=SCHEMA_VERSION_SIDECAR_V1,
124
+ target=str(data["target"]),
125
+ symbols=parsed_symbols,
126
+ )
127
+ # Forward-compat: stash any unrecognised top-level keys.
128
+ known = set(REQUIRED_SIDECAR_FIELDS)
129
+ extra = {k: v for k, v in data.items() if k not in known}
130
+ if extra:
131
+ sidecar["extra"] = extra
132
+ out["sidecar"] = sidecar
133
+ return out
134
+
135
+
136
+ def parse_sidecar_file(path: Path) -> ParseResult:
137
+ """Read a sidecar file from disk and parse it."""
138
+ path = Path(path)
139
+ if not path.exists():
140
+ return {
141
+ "schema_version": SCHEMA_VERSION_SIDECAR_V1,
142
+ "sidecar": None,
143
+ "errors": [f"sidecar file not found: {path}"],
144
+ "warnings": [],
145
+ }
146
+ try:
147
+ text = path.read_text(encoding="utf-8")
148
+ except OSError as exc:
149
+ return {
150
+ "schema_version": SCHEMA_VERSION_SIDECAR_V1,
151
+ "sidecar": None,
152
+ "errors": [f"could not read {path}: {exc}"],
153
+ "warnings": [],
154
+ }
155
+ return parse_sidecar_text(text, source=str(path))
156
+
157
+
158
+ def find_sidecar_for(source_file: Path) -> Path:
159
+ """Convention: ``users/auth.py`` → ``users/auth.py.forge``."""
160
+ p = Path(source_file)
161
+ return p.with_suffix(p.suffix + ".forge")
@@ -0,0 +1,202 @@
1
+ """Tier a1 — pure .forge sidecar cross-validator (Lane D W11).
2
+
3
+ Compares a parsed SidecarFile against the source file's AST and
4
+ returns a structured report of mismatches. Catches the seven
5
+ classes of drift the Golden Path names:
6
+
7
+ 1. sidecar declares a symbol the source doesn't have
8
+ 2. source has a public symbol the sidecar didn't declare
9
+ 3. effect=Pure declared but source uses obvious I/O / network
10
+ 4. effect=Pure declared but source has Mutation patterns
11
+ 5. compose_with names a symbol that doesn't exist in any imported
12
+ module (best-effort lexical check; not a full resolver)
13
+ 6. tier declared but source path lives in a different tier
14
+ 7. proves clauses naming lemmas with no entry in the local Lean4
15
+ manifest (W20 — soft-skipped today)
16
+
17
+ Pure: walks AST + the sidecar dict; no execution, no LLM, no
18
+ network. Soft on parse failures (returns 'failed_to_parse_source').
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import ast
23
+ from pathlib import Path
24
+ from typing import TypedDict
25
+
26
+ from ..a0_qk_constants.error_codes import SIDECAR_S_TO_F
27
+ from ..a0_qk_constants.sidecar_schema import SidecarFile
28
+
29
+ SCHEMA_VERSION_VALIDATE_V1 = "atomadic-forge.sidecar.validate/v1"
30
+
31
+
32
+ class ValidationFinding(TypedDict, total=False):
33
+ code: str # Drift class label (S0001..S0007) — local
34
+ f_code: str # Global F-code (F0100..F0109) — registered
35
+ severity: str # 'error' | 'warn' | 'info'
36
+ symbol: str
37
+ message: str
38
+
39
+
40
+ class ValidationReport(TypedDict, total=False):
41
+ schema_version: str
42
+ target: str
43
+ finding_count: int
44
+ findings: list[ValidationFinding]
45
+ verdict: str # 'PASS' | 'FAIL' | 'unparseable'
46
+
47
+
48
+ # Heuristic patterns. Conservative — false positives downgraded to
49
+ # 'warn' rather than 'error' so the validator never blocks merges
50
+ # on its own; F-codes do that.
51
+ _IO_HINTS = ("open(", "read(", "write(", "Path(", ".write_text",
52
+ ".read_text", "subprocess.", "os.system")
53
+ _NET_HINTS = ("requests.", "urllib.", "http.client", "socket.",
54
+ "urlopen(", ".post(", ".get(")
55
+ _RANDOM_HINTS = ("random.", "secrets.", "uuid.", "datetime.now(",
56
+ "time.time(")
57
+
58
+
59
+ def _collect_top_level_symbols(tree: ast.AST) -> dict[str, ast.AST]:
60
+ """Map name -> AST node for every top-level def / class."""
61
+ out: dict[str, ast.AST] = {}
62
+ for node in tree.body if hasattr(tree, "body") else []: # type: ignore[attr-defined]
63
+ if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef):
64
+ if not node.name.startswith("_"):
65
+ out[node.name] = node
66
+ return out
67
+
68
+
69
+ def _node_source_text(source: str, node: ast.AST) -> str:
70
+ try:
71
+ return ast.unparse(node)
72
+ except Exception: # noqa: BLE001
73
+ # Fall back to raw line span when ast.unparse fails.
74
+ start = (getattr(node, "lineno", 1) or 1) - 1
75
+ end = (getattr(node, "end_lineno", start + 1) or start + 1)
76
+ return "\n".join(source.splitlines()[start:end])
77
+
78
+
79
+ def _check_pure_against_source(node_text: str) -> list[str]:
80
+ """Return drift-hint strings if a Pure-declared symbol does
81
+ obviously-non-pure things in its body."""
82
+ hits: list[str] = []
83
+ if any(h in node_text for h in _NET_HINTS):
84
+ hits.append("network call detected")
85
+ if any(h in node_text for h in _IO_HINTS):
86
+ hits.append("filesystem / IO call detected")
87
+ if any(h in node_text for h in _RANDOM_HINTS):
88
+ hits.append("non-deterministic input detected")
89
+ return hits
90
+
91
+
92
+ def _detect_tier(path: str) -> str | None:
93
+ parts = Path(path).parts
94
+ for p in parts:
95
+ if p in ("a0_qk_constants", "a1_at_functions",
96
+ "a2_mo_composites", "a3_og_features",
97
+ "a4_sy_orchestration"):
98
+ return p
99
+ return None
100
+
101
+
102
+ def validate_sidecar(
103
+ sidecar: SidecarFile,
104
+ *,
105
+ source_text: str,
106
+ source_path: Path | str | None = None,
107
+ ) -> ValidationReport:
108
+ """Cross-check ``sidecar`` against the actual source.
109
+
110
+ Pure: parses ``source_text`` once + walks both inputs. Returns
111
+ a structured report; never raises.
112
+ """
113
+ findings: list[ValidationFinding] = []
114
+ target = sidecar.get("target", "<unknown>")
115
+
116
+ try:
117
+ tree = ast.parse(source_text)
118
+ except SyntaxError as exc:
119
+ return ValidationReport(
120
+ schema_version=SCHEMA_VERSION_VALIDATE_V1,
121
+ target=target,
122
+ finding_count=1,
123
+ findings=[ValidationFinding(
124
+ code="S0000",
125
+ severity="error",
126
+ symbol="(file)",
127
+ message=f"source did not parse: {exc}",
128
+ )],
129
+ verdict="unparseable",
130
+ )
131
+
132
+ symbols = _collect_top_level_symbols(tree)
133
+ declared = {s.get("name", ""): s for s in sidecar.get("symbols") or []}
134
+
135
+ # S0001 — sidecar declares a symbol the source doesn't have.
136
+ for name in declared:
137
+ if name and name not in symbols:
138
+ findings.append(ValidationFinding(
139
+ code="S0001", severity="error", symbol=name,
140
+ message=f"sidecar declares {name!r} but source has no "
141
+ "top-level public symbol with that name",
142
+ ))
143
+
144
+ # S0002 — source has a public symbol the sidecar didn't declare.
145
+ for name in symbols:
146
+ if name not in declared:
147
+ findings.append(ValidationFinding(
148
+ code="S0002", severity="warn", symbol=name,
149
+ message=f"source has public symbol {name!r} not declared "
150
+ "in sidecar (gradual coverage is OK; this is "
151
+ "advisory)",
152
+ ))
153
+
154
+ # S0003 + S0004 — Pure declared but source does I/O / non-determinism.
155
+ for name, decl in declared.items():
156
+ if name not in symbols:
157
+ continue
158
+ if decl.get("effect") != "Pure":
159
+ continue
160
+ node_text = _node_source_text(source_text, symbols[name])
161
+ hits = _check_pure_against_source(node_text)
162
+ for h in hits:
163
+ findings.append(ValidationFinding(
164
+ code="S0003", severity="error", symbol=name,
165
+ message=f"Pure-declared symbol {name!r} appears to "
166
+ f"violate purity: {h}",
167
+ ))
168
+
169
+ # S0006 — declared tier vs detected path tier.
170
+ if source_path is not None:
171
+ path_tier = _detect_tier(str(source_path))
172
+ for name, decl in declared.items():
173
+ declared_tier = decl.get("tier")
174
+ if declared_tier and path_tier and declared_tier != path_tier:
175
+ findings.append(ValidationFinding(
176
+ code="S0006", severity="warn", symbol=name,
177
+ message=f"sidecar declares tier={declared_tier!r} "
178
+ f"but source lives in tier {path_tier!r}",
179
+ ))
180
+
181
+ # S0005 / S0007 — compose_with name resolution + Lean4 proves
182
+ # discharge are reserved for Lane D W20 (Bao-Rompf checker).
183
+ # Today we record but don't enforce.
184
+
185
+ # Promote each S-code to its registered F-code so downstream
186
+ # tools (forge audit / agent_summary / score_patch) can address
187
+ # sidecar drift in the same namespace as wire violations.
188
+ for f in findings:
189
+ s_code = f.get("code", "")
190
+ if s_code in SIDECAR_S_TO_F:
191
+ f["f_code"] = SIDECAR_S_TO_F[s_code]
192
+
193
+ error_count = sum(1 for f in findings if f.get("severity") == "error")
194
+ verdict = "PASS" if error_count == 0 else "FAIL"
195
+
196
+ return ValidationReport(
197
+ schema_version=SCHEMA_VERSION_VALIDATE_V1,
198
+ target=target,
199
+ finding_count=len(findings),
200
+ findings=findings,
201
+ verdict=verdict,
202
+ )