atomadic-forge 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomadic_forge/__init__.py +12 -0
- atomadic_forge/__main__.py +5 -0
- atomadic_forge/a0_qk_constants/__init__.py +1 -0
- atomadic_forge/a0_qk_constants/agent_plan_schema.py +120 -0
- atomadic_forge/a0_qk_constants/commandsmith_types.py +49 -0
- atomadic_forge/a0_qk_constants/config_defaults.py +38 -0
- atomadic_forge/a0_qk_constants/emergent_types.py +77 -0
- atomadic_forge/a0_qk_constants/error_codes.py +296 -0
- atomadic_forge/a0_qk_constants/forge_types.py +89 -0
- atomadic_forge/a0_qk_constants/gen_language.py +116 -0
- atomadic_forge/a0_qk_constants/lang_extensions.py +150 -0
- atomadic_forge/a0_qk_constants/policy_schema.py +48 -0
- atomadic_forge/a0_qk_constants/receipt_schema.py +311 -0
- atomadic_forge/a0_qk_constants/roi_constants.py +96 -0
- atomadic_forge/a0_qk_constants/semantic_types.py +61 -0
- atomadic_forge/a0_qk_constants/sidecar_schema.py +81 -0
- atomadic_forge/a0_qk_constants/synergy_types.py +62 -0
- atomadic_forge/a0_qk_constants/tier_names.py +47 -0
- atomadic_forge/a1_at_functions/__init__.py +1 -0
- atomadic_forge/a1_at_functions/agent_context_pack.py +193 -0
- atomadic_forge/a1_at_functions/agent_memory.py +139 -0
- atomadic_forge/a1_at_functions/agent_plan_emitter.py +324 -0
- atomadic_forge/a1_at_functions/agent_summary.py +277 -0
- atomadic_forge/a1_at_functions/body_extractor.py +306 -0
- atomadic_forge/a1_at_functions/card_renderer.py +210 -0
- atomadic_forge/a1_at_functions/certify_checks.py +445 -0
- atomadic_forge/a1_at_functions/chat_context.py +170 -0
- atomadic_forge/a1_at_functions/cherry_pick.py +71 -0
- atomadic_forge/a1_at_functions/classify_tier.py +115 -0
- atomadic_forge/a1_at_functions/commandsmith_discover.py +167 -0
- atomadic_forge/a1_at_functions/commandsmith_render.py +267 -0
- atomadic_forge/a1_at_functions/compiler_feedback.py +94 -0
- atomadic_forge/a1_at_functions/compliance_checker.py +228 -0
- atomadic_forge/a1_at_functions/config_io.py +68 -0
- atomadic_forge/a1_at_functions/cs1_renderer.py +588 -0
- atomadic_forge/a1_at_functions/doc_synthesizer.py +205 -0
- atomadic_forge/a1_at_functions/emergent_compose.py +192 -0
- atomadic_forge/a1_at_functions/emergent_rank.py +116 -0
- atomadic_forge/a1_at_functions/emergent_signature_extract.py +242 -0
- atomadic_forge/a1_at_functions/emergent_synthesize.py +88 -0
- atomadic_forge/a1_at_functions/enforce_planner.py +208 -0
- atomadic_forge/a1_at_functions/error_hints.py +105 -0
- atomadic_forge/a1_at_functions/evolution_log.py +94 -0
- atomadic_forge/a1_at_functions/forge_feedback.py +433 -0
- atomadic_forge/a1_at_functions/generation_quality.py +322 -0
- atomadic_forge/a1_at_functions/import_repair.py +211 -0
- atomadic_forge/a1_at_functions/import_smoke.py +102 -0
- atomadic_forge/a1_at_functions/js_parser.py +539 -0
- atomadic_forge/a1_at_functions/lineage_chain.py +144 -0
- atomadic_forge/a1_at_functions/lineage_reader.py +107 -0
- atomadic_forge/a1_at_functions/llm_client.py +554 -0
- atomadic_forge/a1_at_functions/local_signer.py +134 -0
- atomadic_forge/a1_at_functions/lsp_protocol.py +379 -0
- atomadic_forge/a1_at_functions/manifest_diff.py +314 -0
- atomadic_forge/a1_at_functions/mcp_protocol.py +1066 -0
- atomadic_forge/a1_at_functions/patch_scorer.py +267 -0
- atomadic_forge/a1_at_functions/plan_adapter.py +75 -0
- atomadic_forge/a1_at_functions/policy_loader.py +107 -0
- atomadic_forge/a1_at_functions/preflight_change.py +227 -0
- atomadic_forge/a1_at_functions/progress_reporter.py +81 -0
- atomadic_forge/a1_at_functions/provider_detect.py +157 -0
- atomadic_forge/a1_at_functions/provider_resolver.py +48 -0
- atomadic_forge/a1_at_functions/receipt_emitter.py +291 -0
- atomadic_forge/a1_at_functions/recipes.py +186 -0
- atomadic_forge/a1_at_functions/repo_explainer.py +124 -0
- atomadic_forge/a1_at_functions/roi_calculator.py +265 -0
- atomadic_forge/a1_at_functions/rollback_planner.py +147 -0
- atomadic_forge/a1_at_functions/sbom_emitter.py +155 -0
- atomadic_forge/a1_at_functions/scaffold_js.py +55 -0
- atomadic_forge/a1_at_functions/scaffold_pyproject.py +62 -0
- atomadic_forge/a1_at_functions/scaffold_starter.py +94 -0
- atomadic_forge/a1_at_functions/scout_walk.py +309 -0
- atomadic_forge/a1_at_functions/sidecar_parser.py +161 -0
- atomadic_forge/a1_at_functions/sidecar_validator.py +202 -0
- atomadic_forge/a1_at_functions/stub_detector.py +158 -0
- atomadic_forge/a1_at_functions/synergy_detect.py +166 -0
- atomadic_forge/a1_at_functions/synergy_render.py +252 -0
- atomadic_forge/a1_at_functions/synergy_surface_extract.py +163 -0
- atomadic_forge/a1_at_functions/test_runner.py +196 -0
- atomadic_forge/a1_at_functions/test_selector.py +122 -0
- atomadic_forge/a1_at_functions/tier_init_rebuild.py +122 -0
- atomadic_forge/a1_at_functions/tool_composer.py +130 -0
- atomadic_forge/a1_at_functions/transcript_log.py +70 -0
- atomadic_forge/a1_at_functions/wire_check.py +260 -0
- atomadic_forge/a2_mo_composites/__init__.py +1 -0
- atomadic_forge/a2_mo_composites/lineage_chain_store.py +122 -0
- atomadic_forge/a2_mo_composites/manifest_store.py +46 -0
- atomadic_forge/a2_mo_composites/plan_store.py +164 -0
- atomadic_forge/a2_mo_composites/receipt_signer.py +231 -0
- atomadic_forge/a3_og_features/__init__.py +1 -0
- atomadic_forge/a3_og_features/commandsmith_feature.py +267 -0
- atomadic_forge/a3_og_features/demo_packages/mixed_py_js/src/mixed_pkg/__init__.py +3 -0
- atomadic_forge/a3_og_features/demo_packages/mixed_py_js/src/mixed_pkg/a0_qk_constants/__init__.py +4 -0
- atomadic_forge/a3_og_features/demo_packages/mixed_py_js/src/mixed_pkg/a1_at_functions/__init__.py +14 -0
- atomadic_forge/a3_og_features/demo_packages/mixed_py_js/tests/conftest.py +10 -0
- atomadic_forge/a3_og_features/demo_packages/mixed_py_js/tests/test_mixed.py +18 -0
- atomadic_forge/a3_og_features/demo_runner.py +502 -0
- atomadic_forge/a3_og_features/emergent_feature.py +95 -0
- atomadic_forge/a3_og_features/emergent_pipeline_integration.py +154 -0
- atomadic_forge/a3_og_features/forge_enforce.py +107 -0
- atomadic_forge/a3_og_features/forge_evolve.py +176 -0
- atomadic_forge/a3_og_features/forge_loop.py +528 -0
- atomadic_forge/a3_og_features/forge_pipeline.py +295 -0
- atomadic_forge/a3_og_features/forge_plan_apply.py +222 -0
- atomadic_forge/a3_og_features/lsp_server.py +98 -0
- atomadic_forge/a3_og_features/mcp_server.py +160 -0
- atomadic_forge/a3_og_features/setup_wizard.py +337 -0
- atomadic_forge/a3_og_features/synergy_feature.py +65 -0
- atomadic_forge/a4_sy_orchestration/__init__.py +1 -0
- atomadic_forge/a4_sy_orchestration/cli.py +1284 -0
- atomadic_forge/commands/__init__.py +1 -0
- atomadic_forge/commands/_registry.py +36 -0
- atomadic_forge/commands/audit.py +142 -0
- atomadic_forge/commands/chat.py +133 -0
- atomadic_forge/commands/commandsmith.py +178 -0
- atomadic_forge/commands/config_cmd.py +145 -0
- atomadic_forge/commands/demo.py +142 -0
- atomadic_forge/commands/emergent.py +124 -0
- atomadic_forge/commands/emergent_then_synergy.py +70 -0
- atomadic_forge/commands/evolve.py +122 -0
- atomadic_forge/commands/evolve_then_iterate.py +70 -0
- atomadic_forge/commands/feature_then_emergent.py +111 -0
- atomadic_forge/commands/iterate.py +140 -0
- atomadic_forge/commands/synergy.py +96 -0
- atomadic_forge/commands/synergy_then_emergent.py +70 -0
- atomadic_forge-0.3.2.dist-info/METADATA +471 -0
- atomadic_forge-0.3.2.dist-info/RECORD +131 -0
- atomadic_forge-0.3.2.dist-info/WHEEL +5 -0
- atomadic_forge-0.3.2.dist-info/entry_points.txt +3 -0
- atomadic_forge-0.3.2.dist-info/licenses/LICENSE +15 -0
- atomadic_forge-0.3.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""Tier a1 — pure repo walker + symbol harvester for the scout phase.
|
|
2
|
+
|
|
3
|
+
Walks Python AND JavaScript / TypeScript. Each file is classified into a
|
|
4
|
+
monadic tier and reduced to a list of ``symbols`` with the same shape across
|
|
5
|
+
languages so downstream stages (cherry, finalize, certify) work polyglot.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import ast
|
|
11
|
+
from collections.abc import Callable, Iterable
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from ..a0_qk_constants.lang_extensions import (
|
|
15
|
+
ALL_SOURCE_EXTS,
|
|
16
|
+
IGNORED_DIRS,
|
|
17
|
+
JAVASCRIPT_EXTS,
|
|
18
|
+
PYTHON_EXTS,
|
|
19
|
+
TYPESCRIPT_EXTS,
|
|
20
|
+
file_class_for_path,
|
|
21
|
+
path_parts_contain_ignored_dir,
|
|
22
|
+
)
|
|
23
|
+
from .body_extractor import _detect_state_markers
|
|
24
|
+
from .classify_tier import classify_tier, detect_effects
|
|
25
|
+
from .js_parser import classify_js_tier, detect_js_effects, parse_surface
|
|
26
|
+
|
|
27
|
+
# Backwards-compatible alias — keep _SKIP_DIRS available for any third-party
|
|
28
|
+
# code that imports it. The canonical list lives in lang_extensions.IGNORED_DIRS.
|
|
29
|
+
_SKIP_DIRS = IGNORED_DIRS
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _under_skip_dir(rel_parts: tuple[str, ...]) -> bool:
|
|
33
|
+
"""Return True if any segment of the path is an ignored directory.
|
|
34
|
+
|
|
35
|
+
A leading-dot segment is only treated as ignored when it matches an
|
|
36
|
+
entry in IGNORED_DIRS (e.g. ``.github``, ``.venv``). Application
|
|
37
|
+
folders that legitimately start with a dot (none today, but keeping
|
|
38
|
+
the door open) won't be skipped just for the leading dot — only for
|
|
39
|
+
being on the explicit list.
|
|
40
|
+
"""
|
|
41
|
+
return path_parts_contain_ignored_dir(rel_parts)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def iter_python_files(root: Path) -> Iterable[Path]:
|
|
45
|
+
root = root.resolve()
|
|
46
|
+
for p in root.rglob("*.py"):
|
|
47
|
+
if _under_skip_dir(p.relative_to(root).parts):
|
|
48
|
+
continue
|
|
49
|
+
if p.name.startswith("_"):
|
|
50
|
+
continue
|
|
51
|
+
yield p
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def iter_source_files(root: Path) -> Iterable[Path]:
|
|
55
|
+
"""Yield every Python / JS / TS file under ``root`` we want to classify.
|
|
56
|
+
|
|
57
|
+
Filters out vendored / build / cache directories. Hidden filenames
|
|
58
|
+
starting with ``_`` (Python convention) are skipped, but JS files
|
|
59
|
+
starting with ``_`` are kept — the underscore is meaningless in JS.
|
|
60
|
+
"""
|
|
61
|
+
root = root.resolve()
|
|
62
|
+
for p in root.rglob("*"):
|
|
63
|
+
if not p.is_file():
|
|
64
|
+
continue
|
|
65
|
+
suffix = p.suffix.lower()
|
|
66
|
+
if suffix not in ALL_SOURCE_EXTS:
|
|
67
|
+
continue
|
|
68
|
+
rel_parts = p.relative_to(root).parts
|
|
69
|
+
if _under_skip_dir(rel_parts):
|
|
70
|
+
continue
|
|
71
|
+
if suffix in PYTHON_EXTS and p.name.startswith("_"):
|
|
72
|
+
continue
|
|
73
|
+
yield p
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _harvest_python_file(f: Path, rel: str, *, symbols: list[dict],
|
|
77
|
+
tier_dist: dict[str, int],
|
|
78
|
+
effect_dist: dict[str, int]) -> None:
|
|
79
|
+
try:
|
|
80
|
+
text = f.read_text(encoding="utf-8", errors="replace")
|
|
81
|
+
tree = ast.parse(text, filename=str(f))
|
|
82
|
+
except (SyntaxError, OSError):
|
|
83
|
+
return
|
|
84
|
+
for node in tree.body:
|
|
85
|
+
if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef):
|
|
86
|
+
_collect_symbol(symbols, node, rel, kind="function",
|
|
87
|
+
qualname=node.name, tier_dist=tier_dist,
|
|
88
|
+
effect_dist=effect_dist)
|
|
89
|
+
elif isinstance(node, ast.ClassDef) and not node.name.startswith("_"):
|
|
90
|
+
self_assign, class_collect = _detect_state_markers(node)
|
|
91
|
+
_collect_symbol(symbols, node, rel, kind="class",
|
|
92
|
+
qualname=node.name, tier_dist=tier_dist,
|
|
93
|
+
effect_dist=effect_dist,
|
|
94
|
+
body_signals={
|
|
95
|
+
"has_self_assign": self_assign,
|
|
96
|
+
"has_class_attr_collections": class_collect,
|
|
97
|
+
})
|
|
98
|
+
for sub in node.body:
|
|
99
|
+
if isinstance(sub, ast.FunctionDef | ast.AsyncFunctionDef):
|
|
100
|
+
if sub.name.startswith("_") and sub.name != "__init__":
|
|
101
|
+
continue
|
|
102
|
+
_collect_symbol(symbols, sub, rel, kind="method",
|
|
103
|
+
qualname=f"{node.name}.{sub.name}",
|
|
104
|
+
tier_dist=tier_dist,
|
|
105
|
+
effect_dist=effect_dist)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _harvest_js_file(f: Path, rel: str, language: str, *, symbols: list[dict],
|
|
109
|
+
tier_dist: dict[str, int],
|
|
110
|
+
effect_dist: dict[str, int]) -> None:
|
|
111
|
+
try:
|
|
112
|
+
text = f.read_text(encoding="utf-8", errors="replace")
|
|
113
|
+
except OSError:
|
|
114
|
+
return
|
|
115
|
+
surface = parse_surface(text)
|
|
116
|
+
file_tier = classify_js_tier(path=rel, surface=surface)
|
|
117
|
+
file_effects = detect_js_effects(text)
|
|
118
|
+
|
|
119
|
+
# Track the file itself as a symbol so even an empty-export module
|
|
120
|
+
# (e.g. an HTML-glue static page) shows up in scout output.
|
|
121
|
+
file_record = {
|
|
122
|
+
"name": f.name,
|
|
123
|
+
"qualname": f.stem,
|
|
124
|
+
"kind": "module",
|
|
125
|
+
"file": rel,
|
|
126
|
+
"lineno": 1,
|
|
127
|
+
"tier_guess": file_tier,
|
|
128
|
+
"suggested_tier": file_tier,
|
|
129
|
+
"effects": file_effects,
|
|
130
|
+
"complexity": surface.statement_count,
|
|
131
|
+
"has_self_assign": False,
|
|
132
|
+
"language": language,
|
|
133
|
+
"exports": surface.all_exports,
|
|
134
|
+
"imports": surface.imports,
|
|
135
|
+
}
|
|
136
|
+
symbols.append(file_record)
|
|
137
|
+
tier_dist[file_tier] = tier_dist.get(file_tier, 0) + 1
|
|
138
|
+
for e in file_effects:
|
|
139
|
+
if e in effect_dist:
|
|
140
|
+
effect_dist[e] += 1
|
|
141
|
+
|
|
142
|
+
# Also surface each named export as its own symbol so cherry-pick + emergent
|
|
143
|
+
# treat JS like Python: pick by qualname.
|
|
144
|
+
for name in surface.exported_functions:
|
|
145
|
+
_push_js_symbol(symbols, name, "function", rel, file_tier,
|
|
146
|
+
language, ["pure"], tier_dist, effect_dist)
|
|
147
|
+
for name in surface.exported_classes:
|
|
148
|
+
_push_js_symbol(symbols, name, "class", rel, file_tier,
|
|
149
|
+
language, ["state"], tier_dist, effect_dist)
|
|
150
|
+
for name in surface.exported_consts:
|
|
151
|
+
_push_js_symbol(symbols, name, "const", rel, file_tier,
|
|
152
|
+
language, ["pure"], tier_dist, effect_dist)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _push_js_symbol(symbols: list[dict], name: str, kind: str, rel: str,
|
|
156
|
+
tier: str, language: str, effects: list[str],
|
|
157
|
+
tier_dist: dict[str, int],
|
|
158
|
+
effect_dist: dict[str, int]) -> None:
|
|
159
|
+
rec = {
|
|
160
|
+
"name": name,
|
|
161
|
+
"qualname": name,
|
|
162
|
+
"kind": kind,
|
|
163
|
+
"file": rel,
|
|
164
|
+
"lineno": 0,
|
|
165
|
+
"tier_guess": tier,
|
|
166
|
+
"suggested_tier": tier,
|
|
167
|
+
"effects": effects,
|
|
168
|
+
"complexity": 0,
|
|
169
|
+
"has_self_assign": False,
|
|
170
|
+
"language": language,
|
|
171
|
+
}
|
|
172
|
+
symbols.append(rec)
|
|
173
|
+
tier_dist[tier] = tier_dist.get(tier, 0) + 1
|
|
174
|
+
for e in effects:
|
|
175
|
+
if e in effect_dist:
|
|
176
|
+
effect_dist[e] += 1
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _file_class_counts(root: Path) -> dict[str, int]:
|
|
180
|
+
"""Count every file under ``root`` by class (source / docs / config /
|
|
181
|
+
asset / other), respecting IGNORED_DIRS. Used by harvest_repo and
|
|
182
|
+
by certify so non-source files don't harsh the layout score."""
|
|
183
|
+
counts = {"source": 0, "documentation": 0, "config": 0, "asset": 0, "other": 0}
|
|
184
|
+
for p in root.rglob("*"):
|
|
185
|
+
if not p.is_file():
|
|
186
|
+
continue
|
|
187
|
+
rel_parts = p.relative_to(root).parts
|
|
188
|
+
if _under_skip_dir(rel_parts):
|
|
189
|
+
continue
|
|
190
|
+
cls = file_class_for_path(p.as_posix())
|
|
191
|
+
counts[cls] = counts.get(cls, 0) + 1
|
|
192
|
+
return counts
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def harvest_repo(
|
|
196
|
+
root: Path,
|
|
197
|
+
*,
|
|
198
|
+
progress: Callable[[int, int, str], None] | None = None,
|
|
199
|
+
) -> dict:
|
|
200
|
+
"""Walk a repo, classify every public symbol, return a scout-shaped dict.
|
|
201
|
+
|
|
202
|
+
``progress`` (optional): a callback invoked once per source file as
|
|
203
|
+
``progress(processed_count, total_count, relative_path)``. Pure
|
|
204
|
+
function — no I/O of its own. The CLI layer wires this to a stderr
|
|
205
|
+
reporter; tests can pass a list-appender.
|
|
206
|
+
"""
|
|
207
|
+
root = Path(root).resolve()
|
|
208
|
+
src_files = list(iter_source_files(root))
|
|
209
|
+
total = len(src_files)
|
|
210
|
+
file_class_counts = _file_class_counts(root)
|
|
211
|
+
symbols: list[dict] = []
|
|
212
|
+
tier_dist: dict[str, int] = {}
|
|
213
|
+
effect_dist: dict[str, int] = {"pure": 0, "state": 0, "io": 0}
|
|
214
|
+
|
|
215
|
+
py_count = 0
|
|
216
|
+
js_count = 0
|
|
217
|
+
ts_count = 0
|
|
218
|
+
|
|
219
|
+
for idx, f in enumerate(src_files, start=1):
|
|
220
|
+
rel = f.relative_to(root).as_posix()
|
|
221
|
+
suffix = f.suffix.lower()
|
|
222
|
+
if suffix in PYTHON_EXTS:
|
|
223
|
+
py_count += 1
|
|
224
|
+
_harvest_python_file(f, rel, symbols=symbols,
|
|
225
|
+
tier_dist=tier_dist,
|
|
226
|
+
effect_dist=effect_dist)
|
|
227
|
+
elif suffix in JAVASCRIPT_EXTS:
|
|
228
|
+
js_count += 1
|
|
229
|
+
_harvest_js_file(f, rel, "javascript", symbols=symbols,
|
|
230
|
+
tier_dist=tier_dist,
|
|
231
|
+
effect_dist=effect_dist)
|
|
232
|
+
elif suffix in TYPESCRIPT_EXTS:
|
|
233
|
+
ts_count += 1
|
|
234
|
+
_harvest_js_file(f, rel, "typescript", symbols=symbols,
|
|
235
|
+
tier_dist=tier_dist,
|
|
236
|
+
effect_dist=effect_dist)
|
|
237
|
+
if progress is not None:
|
|
238
|
+
progress(idx, total, rel)
|
|
239
|
+
|
|
240
|
+
languages = {
|
|
241
|
+
"python": py_count,
|
|
242
|
+
"javascript": js_count,
|
|
243
|
+
"typescript": ts_count,
|
|
244
|
+
}
|
|
245
|
+
primary = max(languages, key=lambda k: languages[k]) if any(languages.values()) else "python"
|
|
246
|
+
|
|
247
|
+
recommendations: list[str] = []
|
|
248
|
+
if tier_dist.get("a4_sy_orchestration", 0) > tier_dist.get("a1_at_functions", 0):
|
|
249
|
+
recommendations.append("Top-heavy at a4 — extract pure helpers into a1.")
|
|
250
|
+
total = sum(effect_dist.values()) or 1
|
|
251
|
+
if effect_dist["io"] / total > 0.3:
|
|
252
|
+
recommendations.append("High I/O ratio — consider pushing I/O to a4 boundaries.")
|
|
253
|
+
if tier_dist.get("a1_at_functions", 0) == 0 and symbols:
|
|
254
|
+
recommendations.append("No pure functions detected — extract validators/parsers.")
|
|
255
|
+
if js_count + ts_count > 0 and not any(
|
|
256
|
+
f"/{t}/" in s["file"] or s["file"].startswith(f"{t}/")
|
|
257
|
+
for s in symbols if s.get("language") in ("javascript", "typescript")
|
|
258
|
+
for t in ("a0_qk_constants", "a1_at_functions", "a2_mo_composites",
|
|
259
|
+
"a3_og_features", "a4_sy_orchestration")
|
|
260
|
+
):
|
|
261
|
+
recommendations.append(
|
|
262
|
+
"JS/TS files are not yet split into aN_* tier directories — "
|
|
263
|
+
"see suggested_tier per file in symbols[]."
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return {
|
|
267
|
+
"schema_version": "atomadic-forge.scout/v1",
|
|
268
|
+
"repo": str(root),
|
|
269
|
+
# `file_count` is the raw rglob walk (legacy) — matches v1 callers.
|
|
270
|
+
"file_count": len(list(root.rglob("*"))),
|
|
271
|
+
# `file_class_counts` excludes IGNORED_DIRS and breaks files into
|
|
272
|
+
# source / documentation / config / asset / other. Tier-layout
|
|
273
|
+
# scoring should use this, not the raw walk.
|
|
274
|
+
"file_class_counts": file_class_counts,
|
|
275
|
+
"python_file_count": py_count,
|
|
276
|
+
"javascript_file_count": js_count,
|
|
277
|
+
"typescript_file_count": ts_count,
|
|
278
|
+
"language_distribution": languages,
|
|
279
|
+
"primary_language": primary,
|
|
280
|
+
"symbol_count": len(symbols),
|
|
281
|
+
"tier_distribution": tier_dist,
|
|
282
|
+
"effect_distribution": effect_dist,
|
|
283
|
+
"symbols": symbols,
|
|
284
|
+
"recommendations": recommendations,
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _collect_symbol(symbols: list, node, rel_path: str, *, kind: str,
|
|
289
|
+
qualname: str, tier_dist: dict, effect_dist: dict,
|
|
290
|
+
body_signals: dict | None = None) -> None:
|
|
291
|
+
effects = detect_effects(node) if not isinstance(node, ast.ClassDef) else ["pure"]
|
|
292
|
+
tier = classify_tier(name=qualname, kind=kind, path=rel_path,
|
|
293
|
+
body_signals=body_signals)
|
|
294
|
+
rec = {
|
|
295
|
+
"name": getattr(node, "name", qualname),
|
|
296
|
+
"qualname": qualname,
|
|
297
|
+
"kind": kind,
|
|
298
|
+
"file": rel_path,
|
|
299
|
+
"lineno": getattr(node, "lineno", 0),
|
|
300
|
+
"tier_guess": tier,
|
|
301
|
+
"effects": effects,
|
|
302
|
+
"complexity": len(ast.dump(node)),
|
|
303
|
+
"has_self_assign": bool(body_signals and body_signals.get("has_self_assign")),
|
|
304
|
+
}
|
|
305
|
+
symbols.append(rec)
|
|
306
|
+
tier_dist[tier] = tier_dist.get(tier, 0) + 1
|
|
307
|
+
for e in effects:
|
|
308
|
+
if e in effect_dist:
|
|
309
|
+
effect_dist[e] += 1
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Tier a1 — pure .forge sidecar parser (Lane D W8).
|
|
2
|
+
|
|
3
|
+
Reads a YAML sidecar file and returns a structured ``SidecarFile``
|
|
4
|
+
dict. Validates required fields + effect-kind enum membership.
|
|
5
|
+
Pure: one bounded read; never raises on unknown fields (preserved
|
|
6
|
+
in ``extra``).
|
|
7
|
+
|
|
8
|
+
Lane D W11 will add the cross-validator (compares the sidecar's
|
|
9
|
+
declared effects against the source AST). Lane D W20 will dispatch
|
|
10
|
+
the ``proves:`` clauses through the Lean4 obligation discharger.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import TypedDict
|
|
16
|
+
|
|
17
|
+
import yaml
|
|
18
|
+
|
|
19
|
+
from ..a0_qk_constants.sidecar_schema import (
|
|
20
|
+
REQUIRED_SIDECAR_FIELDS,
|
|
21
|
+
REQUIRED_SYMBOL_FIELDS,
|
|
22
|
+
SCHEMA_VERSION_SIDECAR_V1,
|
|
23
|
+
VALID_EFFECTS,
|
|
24
|
+
SidecarFile,
|
|
25
|
+
SidecarSymbol,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SidecarParseError(ValueError):
|
|
30
|
+
"""Raised when a sidecar file is malformed beyond soft recovery."""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ParseResult(TypedDict, total=False):
|
|
34
|
+
schema_version: str
|
|
35
|
+
sidecar: SidecarFile | None
|
|
36
|
+
errors: list[str]
|
|
37
|
+
warnings: list[str]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def parse_sidecar_text(text: str, *, source: str = "<inline>") -> ParseResult:
|
|
41
|
+
"""Parse a YAML sidecar string and return a structured result.
|
|
42
|
+
|
|
43
|
+
Returns a ParseResult with sidecar=None + populated errors when
|
|
44
|
+
the document is unrecoverable; otherwise sidecar is the typed
|
|
45
|
+
dict and warnings list any soft issues (unknown effect kinds
|
|
46
|
+
are downgraded to warnings, not errors).
|
|
47
|
+
"""
|
|
48
|
+
out: ParseResult = {
|
|
49
|
+
"schema_version": SCHEMA_VERSION_SIDECAR_V1,
|
|
50
|
+
"sidecar": None,
|
|
51
|
+
"errors": [],
|
|
52
|
+
"warnings": [],
|
|
53
|
+
}
|
|
54
|
+
try:
|
|
55
|
+
data = yaml.safe_load(text) or {}
|
|
56
|
+
except yaml.YAMLError as exc:
|
|
57
|
+
out["errors"].append(f"YAML parse error in {source}: {exc}")
|
|
58
|
+
return out
|
|
59
|
+
if not isinstance(data, dict):
|
|
60
|
+
out["errors"].append(
|
|
61
|
+
f"{source}: top-level must be a mapping, got "
|
|
62
|
+
f"{type(data).__name__}"
|
|
63
|
+
)
|
|
64
|
+
return out
|
|
65
|
+
for f in REQUIRED_SIDECAR_FIELDS:
|
|
66
|
+
if f not in data:
|
|
67
|
+
out["errors"].append(f"{source}: missing required field {f!r}")
|
|
68
|
+
declared_schema = data.get("schema_version", "")
|
|
69
|
+
if declared_schema and declared_schema != SCHEMA_VERSION_SIDECAR_V1:
|
|
70
|
+
out["warnings"].append(
|
|
71
|
+
f"{source}: declares schema_version={declared_schema!r}; "
|
|
72
|
+
f"expected {SCHEMA_VERSION_SIDECAR_V1!r}"
|
|
73
|
+
)
|
|
74
|
+
if out["errors"]:
|
|
75
|
+
return out
|
|
76
|
+
|
|
77
|
+
raw_symbols = data.get("symbols") or []
|
|
78
|
+
if not isinstance(raw_symbols, list):
|
|
79
|
+
out["errors"].append(
|
|
80
|
+
f"{source}: 'symbols' must be a list, got "
|
|
81
|
+
f"{type(raw_symbols).__name__}"
|
|
82
|
+
)
|
|
83
|
+
return out
|
|
84
|
+
parsed_symbols: list[SidecarSymbol] = []
|
|
85
|
+
for i, raw in enumerate(raw_symbols):
|
|
86
|
+
if not isinstance(raw, dict):
|
|
87
|
+
out["errors"].append(
|
|
88
|
+
f"{source}: symbols[{i}] must be a mapping"
|
|
89
|
+
)
|
|
90
|
+
continue
|
|
91
|
+
for f in REQUIRED_SYMBOL_FIELDS:
|
|
92
|
+
if f not in raw:
|
|
93
|
+
out["errors"].append(
|
|
94
|
+
f"{source}: symbols[{i}] missing required {f!r}"
|
|
95
|
+
)
|
|
96
|
+
if not isinstance(raw.get("name", ""), str):
|
|
97
|
+
out["errors"].append(
|
|
98
|
+
f"{source}: symbols[{i}].name must be a string"
|
|
99
|
+
)
|
|
100
|
+
continue
|
|
101
|
+
effect = str(raw.get("effect", ""))
|
|
102
|
+
if effect and effect not in VALID_EFFECTS:
|
|
103
|
+
out["warnings"].append(
|
|
104
|
+
f"{source}: symbols[{i}] effect={effect!r} not in "
|
|
105
|
+
f"VALID_EFFECTS — preserved as-is for forward-compat"
|
|
106
|
+
)
|
|
107
|
+
sym = SidecarSymbol(name=str(raw["name"]),
|
|
108
|
+
effect=effect) # type: ignore[typeddict-item]
|
|
109
|
+
if isinstance(raw.get("compose_with"), list):
|
|
110
|
+
sym["compose_with"] = [str(s) for s in raw["compose_with"]]
|
|
111
|
+
if isinstance(raw.get("proves"), list):
|
|
112
|
+
sym["proves"] = [str(s) for s in raw["proves"]]
|
|
113
|
+
if isinstance(raw.get("tier"), str):
|
|
114
|
+
sym["tier"] = raw["tier"]
|
|
115
|
+
if isinstance(raw.get("notes"), list):
|
|
116
|
+
sym["notes"] = [str(s) for s in raw["notes"]]
|
|
117
|
+
parsed_symbols.append(sym)
|
|
118
|
+
|
|
119
|
+
if out["errors"]:
|
|
120
|
+
return out
|
|
121
|
+
|
|
122
|
+
sidecar: SidecarFile = SidecarFile(
|
|
123
|
+
schema_version=SCHEMA_VERSION_SIDECAR_V1,
|
|
124
|
+
target=str(data["target"]),
|
|
125
|
+
symbols=parsed_symbols,
|
|
126
|
+
)
|
|
127
|
+
# Forward-compat: stash any unrecognised top-level keys.
|
|
128
|
+
known = set(REQUIRED_SIDECAR_FIELDS)
|
|
129
|
+
extra = {k: v for k, v in data.items() if k not in known}
|
|
130
|
+
if extra:
|
|
131
|
+
sidecar["extra"] = extra
|
|
132
|
+
out["sidecar"] = sidecar
|
|
133
|
+
return out
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def parse_sidecar_file(path: Path) -> ParseResult:
|
|
137
|
+
"""Read a sidecar file from disk and parse it."""
|
|
138
|
+
path = Path(path)
|
|
139
|
+
if not path.exists():
|
|
140
|
+
return {
|
|
141
|
+
"schema_version": SCHEMA_VERSION_SIDECAR_V1,
|
|
142
|
+
"sidecar": None,
|
|
143
|
+
"errors": [f"sidecar file not found: {path}"],
|
|
144
|
+
"warnings": [],
|
|
145
|
+
}
|
|
146
|
+
try:
|
|
147
|
+
text = path.read_text(encoding="utf-8")
|
|
148
|
+
except OSError as exc:
|
|
149
|
+
return {
|
|
150
|
+
"schema_version": SCHEMA_VERSION_SIDECAR_V1,
|
|
151
|
+
"sidecar": None,
|
|
152
|
+
"errors": [f"could not read {path}: {exc}"],
|
|
153
|
+
"warnings": [],
|
|
154
|
+
}
|
|
155
|
+
return parse_sidecar_text(text, source=str(path))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def find_sidecar_for(source_file: Path) -> Path:
|
|
159
|
+
"""Convention: ``users/auth.py`` → ``users/auth.py.forge``."""
|
|
160
|
+
p = Path(source_file)
|
|
161
|
+
return p.with_suffix(p.suffix + ".forge")
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Tier a1 — pure .forge sidecar cross-validator (Lane D W11).
|
|
2
|
+
|
|
3
|
+
Compares a parsed SidecarFile against the source file's AST and
|
|
4
|
+
returns a structured report of mismatches. Catches the seven
|
|
5
|
+
classes of drift the Golden Path names:
|
|
6
|
+
|
|
7
|
+
1. sidecar declares a symbol the source doesn't have
|
|
8
|
+
2. source has a public symbol the sidecar didn't declare
|
|
9
|
+
3. effect=Pure declared but source uses obvious I/O / network
|
|
10
|
+
4. effect=Pure declared but source has Mutation patterns
|
|
11
|
+
5. compose_with names a symbol that doesn't exist in any imported
|
|
12
|
+
module (best-effort lexical check; not a full resolver)
|
|
13
|
+
6. tier declared but source path lives in a different tier
|
|
14
|
+
7. proves clauses naming lemmas with no entry in the local Lean4
|
|
15
|
+
manifest (W20 — soft-skipped today)
|
|
16
|
+
|
|
17
|
+
Pure: walks AST + the sidecar dict; no execution, no LLM, no
|
|
18
|
+
network. Soft on parse failures (returns 'failed_to_parse_source').
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import ast
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import TypedDict
|
|
25
|
+
|
|
26
|
+
from ..a0_qk_constants.error_codes import SIDECAR_S_TO_F
|
|
27
|
+
from ..a0_qk_constants.sidecar_schema import SidecarFile
|
|
28
|
+
|
|
29
|
+
SCHEMA_VERSION_VALIDATE_V1 = "atomadic-forge.sidecar.validate/v1"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ValidationFinding(TypedDict, total=False):
|
|
33
|
+
code: str # Drift class label (S0001..S0007) — local
|
|
34
|
+
f_code: str # Global F-code (F0100..F0109) — registered
|
|
35
|
+
severity: str # 'error' | 'warn' | 'info'
|
|
36
|
+
symbol: str
|
|
37
|
+
message: str
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ValidationReport(TypedDict, total=False):
|
|
41
|
+
schema_version: str
|
|
42
|
+
target: str
|
|
43
|
+
finding_count: int
|
|
44
|
+
findings: list[ValidationFinding]
|
|
45
|
+
verdict: str # 'PASS' | 'FAIL' | 'unparseable'
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Heuristic patterns. Conservative — false positives downgraded to
|
|
49
|
+
# 'warn' rather than 'error' so the validator never blocks merges
|
|
50
|
+
# on its own; F-codes do that.
|
|
51
|
+
_IO_HINTS = ("open(", "read(", "write(", "Path(", ".write_text",
|
|
52
|
+
".read_text", "subprocess.", "os.system")
|
|
53
|
+
_NET_HINTS = ("requests.", "urllib.", "http.client", "socket.",
|
|
54
|
+
"urlopen(", ".post(", ".get(")
|
|
55
|
+
_RANDOM_HINTS = ("random.", "secrets.", "uuid.", "datetime.now(",
|
|
56
|
+
"time.time(")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _collect_top_level_symbols(tree: ast.AST) -> dict[str, ast.AST]:
|
|
60
|
+
"""Map name -> AST node for every top-level def / class."""
|
|
61
|
+
out: dict[str, ast.AST] = {}
|
|
62
|
+
for node in tree.body if hasattr(tree, "body") else []: # type: ignore[attr-defined]
|
|
63
|
+
if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef):
|
|
64
|
+
if not node.name.startswith("_"):
|
|
65
|
+
out[node.name] = node
|
|
66
|
+
return out
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _node_source_text(source: str, node: ast.AST) -> str:
|
|
70
|
+
try:
|
|
71
|
+
return ast.unparse(node)
|
|
72
|
+
except Exception: # noqa: BLE001
|
|
73
|
+
# Fall back to raw line span when ast.unparse fails.
|
|
74
|
+
start = (getattr(node, "lineno", 1) or 1) - 1
|
|
75
|
+
end = (getattr(node, "end_lineno", start + 1) or start + 1)
|
|
76
|
+
return "\n".join(source.splitlines()[start:end])
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _check_pure_against_source(node_text: str) -> list[str]:
|
|
80
|
+
"""Return drift-hint strings if a Pure-declared symbol does
|
|
81
|
+
obviously-non-pure things in its body."""
|
|
82
|
+
hits: list[str] = []
|
|
83
|
+
if any(h in node_text for h in _NET_HINTS):
|
|
84
|
+
hits.append("network call detected")
|
|
85
|
+
if any(h in node_text for h in _IO_HINTS):
|
|
86
|
+
hits.append("filesystem / IO call detected")
|
|
87
|
+
if any(h in node_text for h in _RANDOM_HINTS):
|
|
88
|
+
hits.append("non-deterministic input detected")
|
|
89
|
+
return hits
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _detect_tier(path: str) -> str | None:
|
|
93
|
+
parts = Path(path).parts
|
|
94
|
+
for p in parts:
|
|
95
|
+
if p in ("a0_qk_constants", "a1_at_functions",
|
|
96
|
+
"a2_mo_composites", "a3_og_features",
|
|
97
|
+
"a4_sy_orchestration"):
|
|
98
|
+
return p
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def validate_sidecar(
|
|
103
|
+
sidecar: SidecarFile,
|
|
104
|
+
*,
|
|
105
|
+
source_text: str,
|
|
106
|
+
source_path: Path | str | None = None,
|
|
107
|
+
) -> ValidationReport:
|
|
108
|
+
"""Cross-check ``sidecar`` against the actual source.
|
|
109
|
+
|
|
110
|
+
Pure: parses ``source_text`` once + walks both inputs. Returns
|
|
111
|
+
a structured report; never raises.
|
|
112
|
+
"""
|
|
113
|
+
findings: list[ValidationFinding] = []
|
|
114
|
+
target = sidecar.get("target", "<unknown>")
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
tree = ast.parse(source_text)
|
|
118
|
+
except SyntaxError as exc:
|
|
119
|
+
return ValidationReport(
|
|
120
|
+
schema_version=SCHEMA_VERSION_VALIDATE_V1,
|
|
121
|
+
target=target,
|
|
122
|
+
finding_count=1,
|
|
123
|
+
findings=[ValidationFinding(
|
|
124
|
+
code="S0000",
|
|
125
|
+
severity="error",
|
|
126
|
+
symbol="(file)",
|
|
127
|
+
message=f"source did not parse: {exc}",
|
|
128
|
+
)],
|
|
129
|
+
verdict="unparseable",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
symbols = _collect_top_level_symbols(tree)
|
|
133
|
+
declared = {s.get("name", ""): s for s in sidecar.get("symbols") or []}
|
|
134
|
+
|
|
135
|
+
# S0001 — sidecar declares a symbol the source doesn't have.
|
|
136
|
+
for name in declared:
|
|
137
|
+
if name and name not in symbols:
|
|
138
|
+
findings.append(ValidationFinding(
|
|
139
|
+
code="S0001", severity="error", symbol=name,
|
|
140
|
+
message=f"sidecar declares {name!r} but source has no "
|
|
141
|
+
"top-level public symbol with that name",
|
|
142
|
+
))
|
|
143
|
+
|
|
144
|
+
# S0002 — source has a public symbol the sidecar didn't declare.
|
|
145
|
+
for name in symbols:
|
|
146
|
+
if name not in declared:
|
|
147
|
+
findings.append(ValidationFinding(
|
|
148
|
+
code="S0002", severity="warn", symbol=name,
|
|
149
|
+
message=f"source has public symbol {name!r} not declared "
|
|
150
|
+
"in sidecar (gradual coverage is OK; this is "
|
|
151
|
+
"advisory)",
|
|
152
|
+
))
|
|
153
|
+
|
|
154
|
+
# S0003 + S0004 — Pure declared but source does I/O / non-determinism.
|
|
155
|
+
for name, decl in declared.items():
|
|
156
|
+
if name not in symbols:
|
|
157
|
+
continue
|
|
158
|
+
if decl.get("effect") != "Pure":
|
|
159
|
+
continue
|
|
160
|
+
node_text = _node_source_text(source_text, symbols[name])
|
|
161
|
+
hits = _check_pure_against_source(node_text)
|
|
162
|
+
for h in hits:
|
|
163
|
+
findings.append(ValidationFinding(
|
|
164
|
+
code="S0003", severity="error", symbol=name,
|
|
165
|
+
message=f"Pure-declared symbol {name!r} appears to "
|
|
166
|
+
f"violate purity: {h}",
|
|
167
|
+
))
|
|
168
|
+
|
|
169
|
+
# S0006 — declared tier vs detected path tier.
|
|
170
|
+
if source_path is not None:
|
|
171
|
+
path_tier = _detect_tier(str(source_path))
|
|
172
|
+
for name, decl in declared.items():
|
|
173
|
+
declared_tier = decl.get("tier")
|
|
174
|
+
if declared_tier and path_tier and declared_tier != path_tier:
|
|
175
|
+
findings.append(ValidationFinding(
|
|
176
|
+
code="S0006", severity="warn", symbol=name,
|
|
177
|
+
message=f"sidecar declares tier={declared_tier!r} "
|
|
178
|
+
f"but source lives in tier {path_tier!r}",
|
|
179
|
+
))
|
|
180
|
+
|
|
181
|
+
# S0005 / S0007 — compose_with name resolution + Lean4 proves
|
|
182
|
+
# discharge are reserved for Lane D W20 (Bao-Rompf checker).
|
|
183
|
+
# Today we record but don't enforce.
|
|
184
|
+
|
|
185
|
+
# Promote each S-code to its registered F-code so downstream
|
|
186
|
+
# tools (forge audit / agent_summary / score_patch) can address
|
|
187
|
+
# sidecar drift in the same namespace as wire violations.
|
|
188
|
+
for f in findings:
|
|
189
|
+
s_code = f.get("code", "")
|
|
190
|
+
if s_code in SIDECAR_S_TO_F:
|
|
191
|
+
f["f_code"] = SIDECAR_S_TO_F[s_code]
|
|
192
|
+
|
|
193
|
+
error_count = sum(1 for f in findings if f.get("severity") == "error")
|
|
194
|
+
verdict = "PASS" if error_count == 0 else "FAIL"
|
|
195
|
+
|
|
196
|
+
return ValidationReport(
|
|
197
|
+
schema_version=SCHEMA_VERSION_VALIDATE_V1,
|
|
198
|
+
target=target,
|
|
199
|
+
finding_count=len(findings),
|
|
200
|
+
findings=findings,
|
|
201
|
+
verdict=verdict,
|
|
202
|
+
)
|