agentbundle 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentbundle/__init__.py +14 -0
- agentbundle/__main__.py +5 -0
- agentbundle/_data/adapter.schema.json +270 -0
- agentbundle/_data/adapter.toml +584 -0
- agentbundle/_data/install-marker.py +1099 -0
- agentbundle/_data/pack.schema.json +152 -0
- agentbundle/_data/plugin-manifest.derived.schema.json +33 -0
- agentbundle/_data/plugin-manifest.schema.json +18 -0
- agentbundle/build/__init__.py +206 -0
- agentbundle/build/__main__.py +8 -0
- agentbundle/build/adapter_root_bins.py +336 -0
- agentbundle/build/adapters/__init__.py +46 -0
- agentbundle/build/adapters/claude_code.py +142 -0
- agentbundle/build/adapters/codex.py +227 -0
- agentbundle/build/adapters/copilot.py +149 -0
- agentbundle/build/adapters/kiro.py +608 -0
- agentbundle/build/adapters/kiro_cli.py +53 -0
- agentbundle/build/adapters/kiro_ide.py +275 -0
- agentbundle/build/contract.py +20 -0
- agentbundle/build/lint_packs.py +555 -0
- agentbundle/build/main.py +596 -0
- agentbundle/build/phase_order.py +40 -0
- agentbundle/build/projections/__init__.py +13 -0
- agentbundle/build/projections/codex_agent_toml.py +232 -0
- agentbundle/build/projections/copilot_agent_md.py +206 -0
- agentbundle/build/projections/copilot_hooks_json.py +142 -0
- agentbundle/build/projections/direct_directory.py +41 -0
- agentbundle/build/projections/hook_id.py +27 -0
- agentbundle/build/projections/kiro_ide_hook.py +256 -0
- agentbundle/build/projections/merge_into_agent_json.py +264 -0
- agentbundle/build/projections/merge_json.py +58 -0
- agentbundle/build/projections/user_merge_json.py +324 -0
- agentbundle/build/scope_rails.py +728 -0
- agentbundle/build/self_host.py +1486 -0
- agentbundle/build/shared_libs.py +309 -0
- agentbundle/build/target_resolver.py +85 -0
- agentbundle/build/tests/__init__.py +0 -0
- agentbundle/build/tests/test_adapter_claude_code.py +275 -0
- agentbundle/build/tests/test_adapter_codex.py +699 -0
- agentbundle/build/tests/test_adapter_copilot.py +91 -0
- agentbundle/build/tests/test_adapter_kiro.py +449 -0
- agentbundle/build/tests/test_adapter_kiro_alias.py +105 -0
- agentbundle/build/tests/test_adapter_kiro_cli.py +102 -0
- agentbundle/build/tests/test_adapter_kiro_ide.py +173 -0
- agentbundle/build/tests/test_adapter_root_bins_projection.py +429 -0
- agentbundle/build/tests/test_build_ships_seeds.py +78 -0
- agentbundle/build/tests/test_contract.py +582 -0
- agentbundle/build/tests/test_contract_scope.py +224 -0
- agentbundle/build/tests/test_contract_v07.py +191 -0
- agentbundle/build/tests/test_contract_v08.py +230 -0
- agentbundle/build/tests/test_direct_directory_cleanup.py +65 -0
- agentbundle/build/tests/test_end_to_end_build.py +227 -0
- agentbundle/build/tests/test_lint_agents_md_legacy_block.py +135 -0
- agentbundle/build/tests/test_lint_agents_md_risk_block.py +116 -0
- agentbundle/build/tests/test_lint_packs.py +703 -0
- agentbundle/build/tests/test_load_pack_hook_wiring_safely.py +176 -0
- agentbundle/build/tests/test_pack_schema.py +265 -0
- agentbundle/build/tests/test_pack_schema_allowed_adapters.py +258 -0
- agentbundle/build/tests/test_pack_schema_install.py +305 -0
- agentbundle/build/tests/test_pipeline.py +272 -0
- agentbundle/build/tests/test_plugin_manifest_schema.py +327 -0
- agentbundle/build/tests/test_projections_merge_json.py +148 -0
- agentbundle/build/tests/test_scope_rails.py +398 -0
- agentbundle/build/tests/test_security.py +97 -0
- agentbundle/build/tests/test_self_host_check.py +2100 -0
- agentbundle/build/tests/test_shared_libs_projection.py +415 -0
- agentbundle/build/tests/test_shipped_packs_v07_declarations.py +100 -0
- agentbundle/build/tests/test_shipped_packs_v08_declarations.py +80 -0
- agentbundle/build/tests/test_validate.py +250 -0
- agentbundle/build/validate.py +141 -0
- agentbundle/catalogue.py +164 -0
- agentbundle/cli.py +486 -0
- agentbundle/commands/__init__.py +5 -0
- agentbundle/commands/_common.py +174 -0
- agentbundle/commands/_drop_warning.py +329 -0
- agentbundle/commands/adapt.py +343 -0
- agentbundle/commands/config.py +125 -0
- agentbundle/commands/diff.py +211 -0
- agentbundle/commands/init_state.py +279 -0
- agentbundle/commands/install.py +3026 -0
- agentbundle/commands/list_packs.py +170 -0
- agentbundle/commands/list_targets.py +23 -0
- agentbundle/commands/reconcile.py +161 -0
- agentbundle/commands/render.py +165 -0
- agentbundle/commands/scaffold.py +69 -0
- agentbundle/commands/uninstall.py +294 -0
- agentbundle/commands/upgrade.py +699 -0
- agentbundle/commands/validate.py +688 -0
- agentbundle/config.py +747 -0
- agentbundle/render.py +123 -0
- agentbundle/safety.py +633 -0
- agentbundle/scope.py +319 -0
- agentbundle/user_config.py +284 -0
- agentbundle/version.py +49 -0
- agentbundle-0.2.0.dist-info/METADATA +37 -0
- agentbundle-0.2.0.dist-info/RECORD +99 -0
- agentbundle-0.2.0.dist-info/WHEEL +5 -0
- agentbundle-0.2.0.dist-info/entry_points.txt +2 -0
- agentbundle-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1486 @@
|
|
|
1
|
+
"""Self-host build mode — `make build-self` and `make build-check`.
|
|
2
|
+
|
|
3
|
+
Real-write (`make build-self`, no `DRY_RUN=1`) projects adapters
|
|
4
|
+
**directly into the working tree**, so the adapters' merge / splice
|
|
5
|
+
logic operates against the working tree's existing content — that's
|
|
6
|
+
what makes `merge-managed-key-only` (Claude Code) and
|
|
7
|
+
`preserve-outside-block` (Codex) correct against the adopter's actual
|
|
8
|
+
files.
|
|
9
|
+
|
|
10
|
+
Dry-run (`make build-self DRY_RUN=1`, and `make build-check`) clones
|
|
11
|
+
the adapter target subtree (`.claude/`, `tools/hooks/`, `.github/`,
|
|
12
|
+
`AGENTS.md`) into a fresh temp dir first, projects into the clone,
|
|
13
|
+
then diffs the clone against the working tree. The clone-then-project
|
|
14
|
+
pattern keeps the existing-content merge semantics intact under
|
|
15
|
+
dry-run too.
|
|
16
|
+
|
|
17
|
+
Marker resolution (`<adapt:NAME>` → discovery value) is the ONE place
|
|
18
|
+
install-time substitution happens — every other build mode copies
|
|
19
|
+
markers through unchanged (spec § Boundaries — Never do). The
|
|
20
|
+
`.adapt-discovery.toml` *materialisation* lives in the
|
|
21
|
+
`adapt-to-project` skill, out of scope here. T7 ships only the
|
|
22
|
+
consumer.
|
|
23
|
+
|
|
24
|
+
Self-host scope (see docs/specs/self-hosting/spec.md § Phased rollout):
|
|
25
|
+
the `SELF_HOST_ADAPTERS` allow-list runs `claude-code` only.
|
|
26
|
+
Kiro and Copilot stay distribution-only so self-host does not project
|
|
27
|
+
`.kiro/` or `.github/instructions/`.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import fnmatch
|
|
33
|
+
import hashlib
|
|
34
|
+
import importlib.util
|
|
35
|
+
import json
|
|
36
|
+
import os
|
|
37
|
+
import re
|
|
38
|
+
import shutil
|
|
39
|
+
import stat
|
|
40
|
+
import subprocess
|
|
41
|
+
import sys
|
|
42
|
+
import tempfile
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
|
|
45
|
+
from agentbundle.build.adapters import ADAPTERS, registry
|
|
46
|
+
from agentbundle.build.contract import load as load_contract
|
|
47
|
+
from agentbundle.build.main import (
|
|
48
|
+
CONTRACT_PATH,
|
|
49
|
+
REPO_ROOT,
|
|
50
|
+
discover_packs,
|
|
51
|
+
validate_pack_uniqueness,
|
|
52
|
+
)
|
|
53
|
+
from agentbundle.build.shared_libs import (
|
|
54
|
+
apply_projection as _shared_libs_apply,
|
|
55
|
+
check_drift as _shared_libs_check_drift,
|
|
56
|
+
)
|
|
57
|
+
from agentbundle.build.adapter_root_bins import (
|
|
58
|
+
apply_projection as _adapter_root_bins_apply,
|
|
59
|
+
check_drift as _adapter_root_bins_check_drift,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# AC14: canonical lowercase-hyphen marker grammar. The self-host
|
|
63
|
+
# regex narrows from the prior wide `[A-Za-z0-9_-]+` form to match
|
|
64
|
+
# what the adapt-to-project skill writes. Legacy UPPER_SNAKE markers
|
|
65
|
+
# are tolerated with a one-shot warning per file (see `resolve_markers`).
|
|
66
|
+
ADAPT_MARKER_RE = re.compile(r"<adapt:([a-z][a-z0-9-]*)>")
|
|
67
|
+
_LEGACY_UPPER_RE = re.compile(r"<adapt:([A-Z_][A-Z0-9_]*)>")
|
|
68
|
+
|
|
69
|
+
# The adapter-target subtree — paths every adapter could touch. Used
|
|
70
|
+
# to clone working-tree state into a dry-run shadow.
|
|
71
|
+
TARGET_PATHS = (
|
|
72
|
+
Path(".claude"),
|
|
73
|
+
Path("tools") / "hooks",
|
|
74
|
+
Path(".github") / "instructions",
|
|
75
|
+
Path("AGENTS.md"),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Self-host allow-list (see self-hosting spec § Phased rollout).
|
|
79
|
+
# Kiro and Copilot remain in the contract for distribution builds but
|
|
80
|
+
# are excluded from the self-host runner.
|
|
81
|
+
SELF_HOST_ADAPTERS: tuple[str, ...] = ("claude-code",)
|
|
82
|
+
|
|
83
|
+
# Self-host *pack* allow-list. This repo is the catalogue's home, not
|
|
84
|
+
# an adopter — `make build-self` should only project the in-house
|
|
85
|
+
# packs into the working tree. User-scope-default packs (architect,
|
|
86
|
+
# atlassian, contracts, converters, credential-brokers, figma) are
|
|
87
|
+
# advertised via `marketplace.json` but their primitives don't belong
|
|
88
|
+
# in this repo's `.claude/skills/` (or future `.kiro/skills/`) tree.
|
|
89
|
+
# `monorepo-extras` is repo-only by metadata but the bundle is not a
|
|
90
|
+
# canonical monorepo consumer of `new-package`, so it's left out too.
|
|
91
|
+
# `_aggregate_marketplace` intentionally ignores this filter — the
|
|
92
|
+
# catalogue advertises every pack.
|
|
93
|
+
SELF_HOST_PACKS: tuple[str, ...] = (
|
|
94
|
+
"core",
|
|
95
|
+
"governance-extras",
|
|
96
|
+
"user-guide-diataxis",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _filter_self_host_packs(pack_paths: list[Path]) -> list[Path]:
|
|
101
|
+
"""Return the subset of *pack_paths* whose directory name is in
|
|
102
|
+
`SELF_HOST_PACKS`. Order is preserved.
|
|
103
|
+
"""
|
|
104
|
+
allow = set(SELF_HOST_PACKS)
|
|
105
|
+
return [p for p in pack_paths if p.name in allow]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def is_dirty_tree(working_tree: Path) -> bool:
|
|
109
|
+
"""Return True if `git status --porcelain` against working_tree is non-empty.
|
|
110
|
+
|
|
111
|
+
Fail-closed semantics — if git is missing, the directory isn't a
|
|
112
|
+
git repo, or the call fails for any reason, return True so the
|
|
113
|
+
destructive `--self` write still requires `--force`. The operator
|
|
114
|
+
who knows the directory is safe can always pass `--force`; the
|
|
115
|
+
operator who doesn't know what's there is protected.
|
|
116
|
+
"""
|
|
117
|
+
try:
|
|
118
|
+
result = subprocess.run(
|
|
119
|
+
["git", "status", "--porcelain"],
|
|
120
|
+
cwd=working_tree,
|
|
121
|
+
capture_output=True,
|
|
122
|
+
text=True,
|
|
123
|
+
check=False,
|
|
124
|
+
)
|
|
125
|
+
except FileNotFoundError:
|
|
126
|
+
print(
|
|
127
|
+
f"self-host: warning — `git` binary not on PATH; treating "
|
|
128
|
+
f"{working_tree} as dirty.",
|
|
129
|
+
file=sys.stderr,
|
|
130
|
+
)
|
|
131
|
+
return True
|
|
132
|
+
if result.returncode != 0:
|
|
133
|
+
print(
|
|
134
|
+
f"self-host: warning — `git status` failed in {working_tree} "
|
|
135
|
+
f"(exit {result.returncode}); treating as dirty.",
|
|
136
|
+
file=sys.stderr,
|
|
137
|
+
)
|
|
138
|
+
return True
|
|
139
|
+
return bool(result.stdout.strip())
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def resolve_markers(
|
|
143
|
+
root: Path,
|
|
144
|
+
discovery: dict[str, str],
|
|
145
|
+
extra_paths: list[Path] | None = None,
|
|
146
|
+
) -> int:
|
|
147
|
+
"""Walk the bundle-owned subtree under `root` and substitute
|
|
148
|
+
`<adapt:NAME>` markers.
|
|
149
|
+
|
|
150
|
+
Scope is `TARGET_PATHS` (the adapter-target subtree) plus any
|
|
151
|
+
`extra_paths` the caller passes — typically the seed-projected
|
|
152
|
+
paths and the aggregated marketplace path. This avoids silently
|
|
153
|
+
rewriting adopter-private files outside the bundle's owned region
|
|
154
|
+
while still covering Phase-1's widened projection.
|
|
155
|
+
"""
|
|
156
|
+
modified = 0
|
|
157
|
+
candidates: list[Path] = []
|
|
158
|
+
scope = list(TARGET_PATHS)
|
|
159
|
+
if extra_paths:
|
|
160
|
+
scope.extend(extra_paths)
|
|
161
|
+
for relative in scope:
|
|
162
|
+
target = root / relative
|
|
163
|
+
if target.is_file():
|
|
164
|
+
candidates.append(target)
|
|
165
|
+
elif target.is_dir():
|
|
166
|
+
candidates.extend(p for p in target.rglob("*") if p.is_file())
|
|
167
|
+
for path in candidates:
|
|
168
|
+
if not path.is_file():
|
|
169
|
+
continue
|
|
170
|
+
try:
|
|
171
|
+
text = path.read_text(encoding="utf-8")
|
|
172
|
+
except UnicodeDecodeError:
|
|
173
|
+
continue
|
|
174
|
+
if "<adapt:" not in text:
|
|
175
|
+
continue
|
|
176
|
+
# AC14: legacy UPPER_SNAKE markers emit a single per-file warning
|
|
177
|
+
# and are left in place (the narrowed regex below won't match
|
|
178
|
+
# them; the warning surfaces them for the adopter to migrate).
|
|
179
|
+
if _LEGACY_UPPER_RE.search(text):
|
|
180
|
+
try:
|
|
181
|
+
rel_label = path.relative_to(root)
|
|
182
|
+
except ValueError:
|
|
183
|
+
rel_label = path
|
|
184
|
+
print(
|
|
185
|
+
f"self-host: warning: legacy UPPER_SNAKE marker(s) in {rel_label}; "
|
|
186
|
+
f"left in place (canonical form is <adapt:[a-z][a-z0-9-]*>)",
|
|
187
|
+
file=sys.stderr,
|
|
188
|
+
)
|
|
189
|
+
replaced = ADAPT_MARKER_RE.sub(
|
|
190
|
+
lambda match: discovery.get(match.group(1), match.group(0)),
|
|
191
|
+
text,
|
|
192
|
+
)
|
|
193
|
+
if replaced != text:
|
|
194
|
+
path.write_text(replaced, encoding="utf-8")
|
|
195
|
+
modified += 1
|
|
196
|
+
return modified
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _clone_target_subtree(working_tree: Path, destination: Path) -> None:
|
|
200
|
+
"""Copy adapter-target paths from working_tree into destination."""
|
|
201
|
+
for relative in TARGET_PATHS:
|
|
202
|
+
source = working_tree / relative
|
|
203
|
+
if not source.exists():
|
|
204
|
+
continue
|
|
205
|
+
target = destination / relative
|
|
206
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
if source.is_dir():
|
|
208
|
+
shutil.copytree(source, target)
|
|
209
|
+
else:
|
|
210
|
+
shutil.copy2(source, target)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _project_all_adapters(
|
|
214
|
+
output_root: Path,
|
|
215
|
+
packs_dir: Path,
|
|
216
|
+
contract: dict,
|
|
217
|
+
) -> None:
|
|
218
|
+
"""Run direct self-host adapter projections against the
|
|
219
|
+
`SELF_HOST_PACKS`-filtered pack list. Pack uniqueness validation
|
|
220
|
+
still runs across every discovered pack so naming collisions in
|
|
221
|
+
user-scope-default packs aren't masked by the filter.
|
|
222
|
+
"""
|
|
223
|
+
packs = discover_packs(packs_dir)
|
|
224
|
+
for pack in packs:
|
|
225
|
+
validate_pack_uniqueness(pack)
|
|
226
|
+
pack_paths = _filter_self_host_packs([pack.path for pack in packs])
|
|
227
|
+
for adapter_name in ADAPTERS:
|
|
228
|
+
if adapter_name not in contract["adapter"]:
|
|
229
|
+
continue
|
|
230
|
+
if adapter_name not in SELF_HOST_ADAPTERS:
|
|
231
|
+
continue
|
|
232
|
+
adapter_module = registry[adapter_name.replace("-", "_")]
|
|
233
|
+
adapter_module.project_packs(pack_paths, contract, output_root)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _compose_agents_md(
|
|
237
|
+
packs_dir: Path,
|
|
238
|
+
output_root: Path,
|
|
239
|
+
contract: dict,
|
|
240
|
+
) -> Path | None:
|
|
241
|
+
"""Compose root AGENTS.md from the core body seed and optional
|
|
242
|
+
core footer fragment.
|
|
243
|
+
|
|
244
|
+
Post-RFC-0009: Codex projects full skill bodies to `.agents/skills/`
|
|
245
|
+
rather than splicing a managed block into AGENTS.md. The Codex
|
|
246
|
+
adapter is not invoked from self-host — Codex correctness is gated
|
|
247
|
+
by unit tests + the AC29 tempdir projection test, not by
|
|
248
|
+
self-host's working-tree drift gate. Keeping Codex out of
|
|
249
|
+
`SELF_HOST_ADAPTERS` avoids carrying a duplicate
|
|
250
|
+
`.agents/skills/` tree in the working tree.
|
|
251
|
+
"""
|
|
252
|
+
body_path = packs_dir / "core" / "seeds" / "AGENTS.md"
|
|
253
|
+
if not body_path.exists():
|
|
254
|
+
return None
|
|
255
|
+
footer_path = packs_dir / "core" / "seeds" / "_agents-footer.md"
|
|
256
|
+
target_path = output_root / "AGENTS.md"
|
|
257
|
+
|
|
258
|
+
# Mirror the preserve-on-disk gate `_project_seeds` applies to
|
|
259
|
+
# Manual paths. `AGENTS.md` is in `EXCLUDED_PATTERNS` since the
|
|
260
|
+
# 2026-05-25 amendment (Manual file, adopter-owned); composition
|
|
261
|
+
# must not clobber the adopter's living instance. Returning `None`
|
|
262
|
+
# signals "didn't compose" so the caller skips marker resolution
|
|
263
|
+
# against this path (the live file has no unresolved markers by
|
|
264
|
+
# contract — they were resolved at install).
|
|
265
|
+
if target_path.exists() and _is_excluded(Path("AGENTS.md")):
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
body = body_path.read_text(encoding="utf-8").replace("\r\n", "\n")
|
|
269
|
+
if body and not body.endswith("\n"):
|
|
270
|
+
body += "\n"
|
|
271
|
+
target_path.write_text(body, encoding="utf-8")
|
|
272
|
+
|
|
273
|
+
if footer_path.exists():
|
|
274
|
+
text = target_path.read_text(encoding="utf-8")
|
|
275
|
+
footer = footer_path.read_text(encoding="utf-8").replace("\r\n", "\n")
|
|
276
|
+
if text and not text.endswith("\n"):
|
|
277
|
+
text += "\n"
|
|
278
|
+
if footer and not footer.endswith("\n"):
|
|
279
|
+
footer += "\n"
|
|
280
|
+
target_path.write_text(text + footer, encoding="utf-8")
|
|
281
|
+
return target_path
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# ---------------------------------------------------------------------------
|
|
285
|
+
# Self-host follow-up additions (per docs/specs/self-hosting/spec.md):
|
|
286
|
+
# seed projection, marketplace aggregation, CLAUDE.md symlink recreation,
|
|
287
|
+
# missing-discovery fail-fast, drift source-naming, info-line emission.
|
|
288
|
+
# Comparison-rule strengthening (LF norm / mode bits / lstat) remains open.
|
|
289
|
+
# ---------------------------------------------------------------------------
|
|
290
|
+
|
|
291
|
+
# Excluded path patterns per RFC-0002 § What stays out. Phase-1
|
|
292
|
+
# implementation uses glob patterns matched against POSIX-style
|
|
293
|
+
# relative paths. `*` matches one path segment; `**` matches zero or
|
|
294
|
+
# more segments (including empty). Patterns *without* `/` (e.g.
|
|
295
|
+
# `README.md`) are anchored to the repo root — they do NOT match the
|
|
296
|
+
# same filename nested under subdirectories. Reviewers: extend this
|
|
297
|
+
# constant when an RFC authorises a new excluded class.
|
|
298
|
+
EXCLUDED_PATTERNS: tuple[str, ...] = (
|
|
299
|
+
".context/**",
|
|
300
|
+
".claude/settings.local.json",
|
|
301
|
+
"docs/rfc/[0-9][0-9][0-9][0-9]-*.md",
|
|
302
|
+
"docs/adr/[0-9][0-9][0-9][0-9]-*.md",
|
|
303
|
+
"docs/specs/*/spec.md",
|
|
304
|
+
"docs/specs/*/plan.md",
|
|
305
|
+
"docs/specs/*/state.json",
|
|
306
|
+
"docs/specs/*/notes/**",
|
|
307
|
+
"docs/contracts/**",
|
|
308
|
+
"docs/architecture/*.md",
|
|
309
|
+
"docs/product/*.md",
|
|
310
|
+
"docs/knowledge/*.md",
|
|
311
|
+
"docs/guides/**/*.md",
|
|
312
|
+
# Manual seed-projected paths (RFC-0002 amendment 2026-05-25). The
|
|
313
|
+
# `docs/<area>/*.md` patterns above cover 11 of the 19 reclassified
|
|
314
|
+
# paths; the following 8 are not matched by any pattern and need
|
|
315
|
+
# explicit listing. See `docs/specs/self-hosting/spec.md` AC20.
|
|
316
|
+
"docs/CHARTER.md",
|
|
317
|
+
"docs/backlog.md", # per-instance work/deferral register; seed is
|
|
318
|
+
# placeholder, on-disk file is the curated instance
|
|
319
|
+
# (RFC-0016 mechanism 5). Preserved like docs/product/*.
|
|
320
|
+
"docs/knowledge/patterns.jsonl",
|
|
321
|
+
"docs/rfc/README.md",
|
|
322
|
+
"docs/adr/README.md",
|
|
323
|
+
"docs/specs/README.md",
|
|
324
|
+
"packages/README.md",
|
|
325
|
+
"packages/_example/README.md",
|
|
326
|
+
"packages/_example/AGENTS.md",
|
|
327
|
+
"README.md", # root-level; nested README.md not excluded
|
|
328
|
+
"LICENSE-*",
|
|
329
|
+
".gitignore",
|
|
330
|
+
".github/**",
|
|
331
|
+
"AGENTS.local.md",
|
|
332
|
+
"AGENTS.md", # root-level; nested AGENTS.md not excluded
|
|
333
|
+
".kiro/**",
|
|
334
|
+
"packages/agentbundle/**",
|
|
335
|
+
"packs/**",
|
|
336
|
+
"tools/**",
|
|
337
|
+
".adapt-discovery.toml",
|
|
338
|
+
"Makefile",
|
|
339
|
+
"dist/**",
|
|
340
|
+
".worktrees/**",
|
|
341
|
+
"*.upstream.*", # adopter-local upstream stash sidecars
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _glob_to_regex(pattern: str) -> re.Pattern[str]:
|
|
346
|
+
"""Translate an excluded-pattern glob into an anchored regex.
|
|
347
|
+
|
|
348
|
+
`*` → one path segment (no slash); `**` → zero or more segments
|
|
349
|
+
(including empty); `**/` → zero or more leading segments.
|
|
350
|
+
Anchored to start and end so root-only patterns like `README.md`
|
|
351
|
+
don't match nested files of the same name.
|
|
352
|
+
"""
|
|
353
|
+
# Tokenise on `**` first so `*` doesn't grab `**` greedily.
|
|
354
|
+
out: list[str] = []
|
|
355
|
+
i = 0
|
|
356
|
+
while i < len(pattern):
|
|
357
|
+
if pattern.startswith("**/", i):
|
|
358
|
+
out.append("(?:.*/)?")
|
|
359
|
+
i += 3
|
|
360
|
+
elif pattern.startswith("**", i):
|
|
361
|
+
out.append(".*")
|
|
362
|
+
i += 2
|
|
363
|
+
elif pattern[i] == "*":
|
|
364
|
+
out.append("[^/]*")
|
|
365
|
+
i += 1
|
|
366
|
+
elif pattern[i] == "?":
|
|
367
|
+
out.append("[^/]")
|
|
368
|
+
i += 1
|
|
369
|
+
elif pattern[i] == "[":
|
|
370
|
+
# Pass character class through as-is, find closing ']'
|
|
371
|
+
end = pattern.find("]", i + 1)
|
|
372
|
+
if end == -1:
|
|
373
|
+
out.append(re.escape(pattern[i]))
|
|
374
|
+
i += 1
|
|
375
|
+
else:
|
|
376
|
+
out.append(pattern[i : end + 1])
|
|
377
|
+
i = end + 1
|
|
378
|
+
else:
|
|
379
|
+
out.append(re.escape(pattern[i]))
|
|
380
|
+
i += 1
|
|
381
|
+
return re.compile(r"\A" + "".join(out) + r"\Z")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
_EXCLUDED_REGEXES: tuple[re.Pattern[str], ...] = tuple(
|
|
385
|
+
_glob_to_regex(p) for p in EXCLUDED_PATTERNS
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Hardcoded "Projected README" allow-list — paths classified as
|
|
389
|
+
# *Projected* even when EXCLUDED_PATTERNS would otherwise catch them.
|
|
390
|
+
#
|
|
391
|
+
# The 2026-05-25 amendment to RFC-0002 reclassified 19 paths Projected
|
|
392
|
+
# → Manual; this allow-list shrank to one entry (`docs/CONVENTIONS.md`)
|
|
393
|
+
# accordingly. The reclassified paths now fall through to
|
|
394
|
+
# EXCLUDED_PATTERNS coverage (`docs/architecture/*.md`,
|
|
395
|
+
# `docs/product/*.md`, `docs/knowledge/*.md`, `docs/guides/**/*.md`,
|
|
396
|
+
# and the 8 explicit additions listed above). See RFC-0002 §
|
|
397
|
+
# Amendments § 2026-05-25.
|
|
398
|
+
PROJECTED_README_OVERRIDES: tuple[str, ...] = (
|
|
399
|
+
"docs/CONVENTIONS.md",
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _is_excluded(relative: Path) -> bool:
|
|
404
|
+
"""Return True if `relative` matches any EXCLUDED_PATTERNS entry, after
|
|
405
|
+
honouring PROJECTED_README_OVERRIDES (a path appearing there is
|
|
406
|
+
Projected even if an excluded pattern would also catch it)."""
|
|
407
|
+
posix = relative.as_posix()
|
|
408
|
+
if posix in PROJECTED_README_OVERRIDES:
|
|
409
|
+
return False
|
|
410
|
+
for regex in _EXCLUDED_REGEXES:
|
|
411
|
+
if regex.match(posix):
|
|
412
|
+
return True
|
|
413
|
+
return False
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def _project_seeds(packs_dir: Path, output_root: Path) -> dict[Path, Path]:
|
|
417
|
+
"""Copy `packs/<pack>/seeds/**` into `output_root` at seed-relative paths.
|
|
418
|
+
|
|
419
|
+
Two packs may contribute to the same directory (historical canonical
|
|
420
|
+
case: `docs/_templates/` — retired 2026-05-24 when each template
|
|
421
|
+
moved into its owning skill's `assets/` folder; the merge rule still
|
|
422
|
+
holds in principle for any future shared seed directory). File-level
|
|
423
|
+
collisions (same target path, *different* content) raise `ValueError`
|
|
424
|
+
naming both source paths — per spec § *Ask first* and AC7.
|
|
425
|
+
|
|
426
|
+
Returns a `{relative_target → source}` map for use by the drift
|
|
427
|
+
source-naming logic.
|
|
428
|
+
"""
|
|
429
|
+
# Two-pass design (per spec § Always do): build the full
|
|
430
|
+
# {relative → source} map and detect collisions *before* writing
|
|
431
|
+
# anything, so a collision-mid-real-write doesn't leave a partial
|
|
432
|
+
# projection on disk.
|
|
433
|
+
seen: dict[Path, Path] = {}
|
|
434
|
+
allow = set(SELF_HOST_PACKS)
|
|
435
|
+
for pack_path in sorted(packs_dir.iterdir()):
|
|
436
|
+
if not pack_path.is_dir() or not (pack_path / "pack.toml").exists():
|
|
437
|
+
continue
|
|
438
|
+
if pack_path.name not in allow:
|
|
439
|
+
continue
|
|
440
|
+
seeds_dir = pack_path / "seeds"
|
|
441
|
+
if not seeds_dir.is_dir():
|
|
442
|
+
continue
|
|
443
|
+
for src in sorted(seeds_dir.rglob("*")):
|
|
444
|
+
if not src.is_file():
|
|
445
|
+
continue
|
|
446
|
+
# Underscore-prefixed files are *composition fragments*
|
|
447
|
+
# (e.g. `_agents-footer.md`), not standalone projection
|
|
448
|
+
# targets. They live in seeds so adopters can edit them;
|
|
449
|
+
# the Phase-2 composite-agents-md recipe consumes them
|
|
450
|
+
# by reading `packs/core/seeds/_agents-footer.md`
|
|
451
|
+
# directly. Skip standalone projection. Convention
|
|
452
|
+
# documented in docs/CONVENTIONS.md § Pack source-of-truth
|
|
453
|
+
# split.
|
|
454
|
+
if src.name.startswith("_"):
|
|
455
|
+
continue
|
|
456
|
+
relative = src.relative_to(seeds_dir)
|
|
457
|
+
if relative in seen:
|
|
458
|
+
if src.read_bytes() != seen[relative].read_bytes():
|
|
459
|
+
raise ValueError(
|
|
460
|
+
f"seed collision at {relative.as_posix()}: "
|
|
461
|
+
f"{seen[relative]} and {src} differ — rename or "
|
|
462
|
+
f"consolidate one of them."
|
|
463
|
+
)
|
|
464
|
+
continue
|
|
465
|
+
seen[relative] = src
|
|
466
|
+
# Second pass: collisions are clean, now write.
|
|
467
|
+
#
|
|
468
|
+
# Per the RFC-0002 2026-05-25 amendment: paths that are Manual
|
|
469
|
+
# (and therefore matched by EXCLUDED_PATTERNS without being
|
|
470
|
+
# rescued by PROJECTED_README_OVERRIDES) carry placeholder seeds
|
|
471
|
+
# but their on-disk content is the adopter's living instance —
|
|
472
|
+
# `_project_seeds` MUST NOT overwrite them. The previous behavior
|
|
473
|
+
# (blind write) clobbered our living docs when `make build-self`
|
|
474
|
+
# was run after the override-shrink.
|
|
475
|
+
#
|
|
476
|
+
# Predicate: write if the target does NOT yet exist on disk, OR
|
|
477
|
+
# if the path is not Excluded (i.e., is genuinely Projected per
|
|
478
|
+
# the source-of-truth split). For first-install scenarios (clean
|
|
479
|
+
# adopter repo) the target is absent → seed lands as scaffold.
|
|
480
|
+
# For re-install / self-host against this repo, Manual targets
|
|
481
|
+
# exist and are preserved.
|
|
482
|
+
for relative, src in seen.items():
|
|
483
|
+
if _is_excluded(relative) and (output_root / relative).exists():
|
|
484
|
+
# Manual file on disk — leave it alone. The seed is
|
|
485
|
+
# placeholder; the on-disk file is the adopter's
|
|
486
|
+
# filled-in instance per RFC-0002 § Amendments § 2026-05-25.
|
|
487
|
+
continue
|
|
488
|
+
target = output_root / relative
|
|
489
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
490
|
+
shutil.copy2(src, target, follow_symlinks=False)
|
|
491
|
+
return seen
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _aggregate_marketplace(
|
|
495
|
+
packs_dir: Path,
|
|
496
|
+
output_root: Path,
|
|
497
|
+
owner: str = "eugenelim",
|
|
498
|
+
) -> Path:
|
|
499
|
+
"""Aggregate `packs/*/.claude-plugin/plugin.json` into
|
|
500
|
+
`output_root/.claude-plugin/marketplace.json` so this repo is itself
|
|
501
|
+
a usable marketplace at HEAD. `owner` defaults to this repo's
|
|
502
|
+
concrete value but `run_self_host` overrides it from
|
|
503
|
+
`.adapt-discovery.toml[adapt].owner` so adopters get their own."""
|
|
504
|
+
entries: list[dict] = []
|
|
505
|
+
for pack_path in sorted(packs_dir.iterdir()):
|
|
506
|
+
if not pack_path.is_dir() or not (pack_path / "pack.toml").exists():
|
|
507
|
+
continue
|
|
508
|
+
manifest = pack_path / ".claude-plugin" / "plugin.json"
|
|
509
|
+
if manifest.exists():
|
|
510
|
+
entries.append(json.loads(manifest.read_text(encoding="utf-8")))
|
|
511
|
+
target = output_root / ".claude-plugin" / "marketplace.json"
|
|
512
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
513
|
+
payload = {
|
|
514
|
+
"owner": {"name": owner},
|
|
515
|
+
"plugins": entries,
|
|
516
|
+
}
|
|
517
|
+
target.write_text(
|
|
518
|
+
json.dumps(payload, indent=2, sort_keys=True) + "\n",
|
|
519
|
+
encoding="utf-8",
|
|
520
|
+
)
|
|
521
|
+
return target
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _is_windows() -> bool:
|
|
525
|
+
"""Detect native Windows. Both checks point at the same OS; using
|
|
526
|
+
either alone is enough but checking both is robust against a future
|
|
527
|
+
embedded build that fakes one without the other."""
|
|
528
|
+
return sys.platform == "win32" or os.name == "nt"
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _recreate_claude_symlink(output_root: Path, *, force_copy: bool = False) -> Path:
|
|
532
|
+
"""Ensure `output_root/CLAUDE.md` mirrors `AGENTS.md`.
|
|
533
|
+
|
|
534
|
+
On macOS / Linux (default) the mirror is a relative symlink to
|
|
535
|
+
`AGENTS.md`, idempotent — leaves a correctly-pointing symlink alone.
|
|
536
|
+
|
|
537
|
+
On native Windows, or when `force_copy=True` (set by the CLI's
|
|
538
|
+
`--no-symlink` flag), the mirror is a regular file copy of
|
|
539
|
+
`AGENTS.md`. Symlink creation on Windows requires Developer Mode or
|
|
540
|
+
admin privileges; the copy path is the portable fallback. Emits a
|
|
541
|
+
one-line stderr warning when the fallback fires so the operator
|
|
542
|
+
knows the resulting CLAUDE.md is a copy, not a link, and must be
|
|
543
|
+
regenerated when AGENTS.md changes.
|
|
544
|
+
"""
|
|
545
|
+
claude = output_root / "CLAUDE.md"
|
|
546
|
+
desired_target = "AGENTS.md"
|
|
547
|
+
source = output_root / desired_target
|
|
548
|
+
use_copy = force_copy or _is_windows()
|
|
549
|
+
|
|
550
|
+
if use_copy:
|
|
551
|
+
if not source.exists():
|
|
552
|
+
# The POSIX symlink branch would create a dangling link
|
|
553
|
+
# here (and some test fixtures rely on that); on Windows
|
|
554
|
+
# the closest semantic equivalent is "no CLAUDE.md at
|
|
555
|
+
# all" because we can't fabricate a copy of a missing
|
|
556
|
+
# source. Log the divergence and return without writing.
|
|
557
|
+
print(
|
|
558
|
+
f"self-host: skipping CLAUDE.md copy — source {source} "
|
|
559
|
+
f"missing; on POSIX a dangling symlink would have been "
|
|
560
|
+
f"created instead.",
|
|
561
|
+
file=sys.stderr,
|
|
562
|
+
)
|
|
563
|
+
return claude
|
|
564
|
+
source_bytes = source.read_bytes()
|
|
565
|
+
if (
|
|
566
|
+
claude.is_file()
|
|
567
|
+
and not claude.is_symlink()
|
|
568
|
+
and claude.read_bytes() == source_bytes
|
|
569
|
+
):
|
|
570
|
+
return claude
|
|
571
|
+
if claude.is_symlink() or claude.exists():
|
|
572
|
+
claude.unlink()
|
|
573
|
+
claude.write_bytes(source_bytes)
|
|
574
|
+
reason = "--no-symlink" if force_copy else "Windows host"
|
|
575
|
+
print(
|
|
576
|
+
f"self-host: CLAUDE.md written as a copy of AGENTS.md ({reason}); "
|
|
577
|
+
f"regenerate after AGENTS.md changes.",
|
|
578
|
+
file=sys.stderr,
|
|
579
|
+
)
|
|
580
|
+
return claude
|
|
581
|
+
|
|
582
|
+
if claude.is_symlink():
|
|
583
|
+
try:
|
|
584
|
+
if os.readlink(claude) == desired_target:
|
|
585
|
+
return claude
|
|
586
|
+
except OSError:
|
|
587
|
+
pass
|
|
588
|
+
claude.unlink()
|
|
589
|
+
elif claude.exists():
|
|
590
|
+
# Regular file at CLAUDE.md — replace with symlink per spec.
|
|
591
|
+
claude.unlink()
|
|
592
|
+
claude.symlink_to(desired_target)
|
|
593
|
+
return claude
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def _build_projected_to_source_map(
|
|
597
|
+
packs_dir: Path,
|
|
598
|
+
contract: dict,
|
|
599
|
+
) -> dict[Path, Path]:
|
|
600
|
+
"""Build `{projected_relative_path → source_path}` for Phase-1
|
|
601
|
+
self-host output. Used by `diff_against_working_tree` to name the
|
|
602
|
+
source path + regeneration command in drift messages."""
|
|
603
|
+
mapping: dict[Path, Path] = {}
|
|
604
|
+
if "primitive" not in contract or "adapter" not in contract:
|
|
605
|
+
return mapping
|
|
606
|
+
allow = set(SELF_HOST_PACKS)
|
|
607
|
+
for pack_path in sorted(packs_dir.iterdir()):
|
|
608
|
+
if not pack_path.is_dir() or not (pack_path / "pack.toml").exists():
|
|
609
|
+
continue
|
|
610
|
+
if pack_path.name not in allow:
|
|
611
|
+
continue
|
|
612
|
+
for adapter_name in SELF_HOST_ADAPTERS:
|
|
613
|
+
if adapter_name not in contract["adapter"]:
|
|
614
|
+
continue
|
|
615
|
+
for rule in contract["adapter"][adapter_name].get("projection", []):
|
|
616
|
+
primitive_name = rule["primitive"]
|
|
617
|
+
mode = rule["mode"]
|
|
618
|
+
if mode in ("dropped", "degraded-info-log"):
|
|
619
|
+
continue
|
|
620
|
+
primitive = contract["primitive"].get(primitive_name, {})
|
|
621
|
+
source_path = primitive.get("source-path", "").rstrip("/")
|
|
622
|
+
if not source_path:
|
|
623
|
+
continue
|
|
624
|
+
source_dir = pack_path / source_path
|
|
625
|
+
if not source_dir.exists():
|
|
626
|
+
continue
|
|
627
|
+
target_prefix = Path(rule["target-path"].rstrip("/"))
|
|
628
|
+
if mode == "direct-directory":
|
|
629
|
+
for entry in source_dir.iterdir():
|
|
630
|
+
if entry.is_dir():
|
|
631
|
+
mapping[target_prefix / entry.name] = entry
|
|
632
|
+
elif mode == "direct-file":
|
|
633
|
+
for entry in source_dir.iterdir():
|
|
634
|
+
if entry.is_file():
|
|
635
|
+
mapping[target_prefix / entry.name] = entry
|
|
636
|
+
elif mode in ("merge-json", "managed-block-inline"):
|
|
637
|
+
mapping.setdefault(
|
|
638
|
+
Path(rule["target-path"].lstrip("/")),
|
|
639
|
+
source_dir,
|
|
640
|
+
)
|
|
641
|
+
# Seeds
|
|
642
|
+
seeds_dir = pack_path / "seeds"
|
|
643
|
+
if seeds_dir.is_dir():
|
|
644
|
+
for entry in seeds_dir.rglob("*"):
|
|
645
|
+
if entry.is_file():
|
|
646
|
+
mapping.setdefault(entry.relative_to(seeds_dir), entry)
|
|
647
|
+
return mapping
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _lookup_source(
|
|
651
|
+
projected_rel: Path,
|
|
652
|
+
mapping: dict[Path, Path],
|
|
653
|
+
) -> Path | None:
|
|
654
|
+
"""Find the source path for a projected relative path. Walks up the
|
|
655
|
+
projected path looking for a directory-level match and appends the
|
|
656
|
+
remainder (e.g. `.claude/skills/work-loop/SKILL.md` →
|
|
657
|
+
`packs/core/.apm/skills/work-loop/SKILL.md`)."""
|
|
658
|
+
if projected_rel in mapping:
|
|
659
|
+
return mapping[projected_rel]
|
|
660
|
+
for ancestor in projected_rel.parents:
|
|
661
|
+
if ancestor == Path("."):
|
|
662
|
+
continue
|
|
663
|
+
if ancestor in mapping:
|
|
664
|
+
anchor = mapping[ancestor]
|
|
665
|
+
if anchor.is_dir():
|
|
666
|
+
try:
|
|
667
|
+
remainder = projected_rel.relative_to(ancestor)
|
|
668
|
+
except ValueError:
|
|
669
|
+
continue
|
|
670
|
+
return anchor / remainder
|
|
671
|
+
return None
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def _emit_info_for_unclassified(
|
|
675
|
+
working_tree: Path,
|
|
676
|
+
projected_paths: set[Path],
|
|
677
|
+
) -> None:
|
|
678
|
+
"""Walk `git ls-files --cached --others --exclude-standard` and emit
|
|
679
|
+
`[info]` lines for paths that are neither Projected nor Excluded.
|
|
680
|
+
|
|
681
|
+
Info-level — does not fail the build. Surfaces omissions so the
|
|
682
|
+
next PR can classify them (per spec § *Always do*). If the working
|
|
683
|
+
tree is not a git repo or git is unavailable, emits a single
|
|
684
|
+
warning rather than silently skipping classification — an operator
|
|
685
|
+
seeing "zero info lines" should not mis-attribute it to "fully
|
|
686
|
+
classified."
|
|
687
|
+
"""
|
|
688
|
+
try:
|
|
689
|
+
result = subprocess.run(
|
|
690
|
+
["git", "ls-files", "--cached", "--others", "--exclude-standard"],
|
|
691
|
+
cwd=working_tree,
|
|
692
|
+
capture_output=True,
|
|
693
|
+
text=True,
|
|
694
|
+
check=False,
|
|
695
|
+
)
|
|
696
|
+
except FileNotFoundError:
|
|
697
|
+
print(
|
|
698
|
+
"self-host: warning — `git` binary not on PATH; "
|
|
699
|
+
"skipping unclassified-path enumeration.",
|
|
700
|
+
file=sys.stderr,
|
|
701
|
+
)
|
|
702
|
+
return
|
|
703
|
+
if result.returncode != 0:
|
|
704
|
+
print(
|
|
705
|
+
f"self-host: warning — `git ls-files` failed in "
|
|
706
|
+
f"{working_tree} (exit {result.returncode}); skipping "
|
|
707
|
+
"unclassified-path enumeration.",
|
|
708
|
+
file=sys.stderr,
|
|
709
|
+
)
|
|
710
|
+
return
|
|
711
|
+
for line in result.stdout.splitlines():
|
|
712
|
+
path_str = line.strip()
|
|
713
|
+
if not path_str:
|
|
714
|
+
continue
|
|
715
|
+
relative = Path(path_str)
|
|
716
|
+
if relative in projected_paths:
|
|
717
|
+
continue
|
|
718
|
+
if _is_excluded(relative):
|
|
719
|
+
continue
|
|
720
|
+
on_disk = working_tree / relative
|
|
721
|
+
if on_disk.is_symlink():
|
|
722
|
+
# CLAUDE.md (projected as symlink) and any other symlinks are
|
|
723
|
+
# implicitly classified — symlink target comparison is Phase 2.
|
|
724
|
+
continue
|
|
725
|
+
print(f"[info] unclassified: {relative.as_posix()}", file=sys.stderr)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _is_text_like(data: bytes) -> bool:
|
|
729
|
+
"""A file that decodes as UTF-8 is text-like for LF normalisation.
|
|
730
|
+
|
|
731
|
+
Empty files are text. Anything that fails UTF-8 decode is binary —
|
|
732
|
+
binaries that happen to contain a 0x0D 0x0A byte pair must not be
|
|
733
|
+
normalised, since the bytes carry value beyond line termination.
|
|
734
|
+
"""
|
|
735
|
+
if not data:
|
|
736
|
+
return True
|
|
737
|
+
try:
|
|
738
|
+
data.decode("utf-8")
|
|
739
|
+
except UnicodeDecodeError:
|
|
740
|
+
return False
|
|
741
|
+
return True
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
def _normalise_lf(data: bytes) -> bytes:
|
|
745
|
+
"""Replace CRLF with LF for text-like equality comparison.
|
|
746
|
+
|
|
747
|
+
Bare CR is left in place — it's neither a portable line terminator
|
|
748
|
+
nor a `core.autocrlf` artefact, and rewriting it would hide real
|
|
749
|
+
content drift in test fixtures that exercise mac-classic endings.
|
|
750
|
+
"""
|
|
751
|
+
return data.replace(b"\r\n", b"\n")
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def _is_equivalent_claude_md_shape(on_disk: Path, agents_md: Path) -> bool:
|
|
755
|
+
"""Three on-disk shapes are equivalent for the repo-root `CLAUDE.md`
|
|
756
|
+
alias and must not count as drift against the shadow self-host emits:
|
|
757
|
+
|
|
758
|
+
1. A real symlink whose target is ``"AGENTS.md"`` — the POSIX shape
|
|
759
|
+
``_recreate_claude_symlink`` writes on macOS/Linux.
|
|
760
|
+
2. A regular file whose content is byte-equal (after LF
|
|
761
|
+
normalisation) to the disk-side ``AGENTS.md`` — the shape
|
|
762
|
+
``--no-symlink`` and the Windows fallback write.
|
|
763
|
+
3. A regular file whose stripped content is ``"AGENTS.md"`` —
|
|
764
|
+
the shape Git for Windows materialises when ``core.symlinks
|
|
765
|
+
= false``. Trailing-whitespace tolerance (CRLF, LF, none,
|
|
766
|
+
trailing blank lines) mirrors ``lint-agents-md.py`` check
|
|
767
|
+
#2's ``.strip() == "AGENTS.md"`` semantics so an adopter
|
|
768
|
+
that passes the lint also passes the drift gate.
|
|
769
|
+
|
|
770
|
+
The three shapes resolve to the same user-visible content (the
|
|
771
|
+
Claude Code CLI reads either path identically), so cross-shape
|
|
772
|
+
drift is presentational, not substantive. Tampering — a regular
|
|
773
|
+
file with arbitrary unrelated content — still drifts: the helper
|
|
774
|
+
returns ``False`` and the caller falls through to the strict
|
|
775
|
+
comparison path.
|
|
776
|
+
|
|
777
|
+
The shadow side is trusted by construction: every shadow CLAUDE.md
|
|
778
|
+
is produced by ``_recreate_claude_symlink``, which only emits
|
|
779
|
+
shapes 1 or 2. The helper therefore only inspects the on-disk
|
|
780
|
+
side; cross-shape pairings (any shadow × any disk) collapse to
|
|
781
|
+
"is the disk shape a valid CLAUDE.md?".
|
|
782
|
+
"""
|
|
783
|
+
try:
|
|
784
|
+
st = os.lstat(on_disk)
|
|
785
|
+
except OSError:
|
|
786
|
+
return False
|
|
787
|
+
if stat.S_ISLNK(st.st_mode):
|
|
788
|
+
try:
|
|
789
|
+
return os.readlink(on_disk) == "AGENTS.md"
|
|
790
|
+
except OSError:
|
|
791
|
+
return False
|
|
792
|
+
if not stat.S_ISREG(st.st_mode):
|
|
793
|
+
return False
|
|
794
|
+
try:
|
|
795
|
+
disk_bytes = on_disk.read_bytes()
|
|
796
|
+
except OSError:
|
|
797
|
+
return False
|
|
798
|
+
# Decode-then-strip so the helper handles Unicode whitespace
|
|
799
|
+
# (NBSP, IDEOGRAPHIC SPACE, …) the same way the lint's
|
|
800
|
+
# `read_text(errors="replace").strip()` does. Bytewise
|
|
801
|
+
# `disk_bytes.strip()` would only strip ASCII whitespace, leaving
|
|
802
|
+
# a narrow lint-passes / gate-fails asymmetry the docstring's
|
|
803
|
+
# parity promise wouldn't hold. `errors="replace"` matches the
|
|
804
|
+
# lint exactly — invalid UTF-8 maps to U+FFFD and falls out as
|
|
805
|
+
# "not the literal `AGENTS.md` string", correctly routing to the
|
|
806
|
+
# byte-equality fallback below.
|
|
807
|
+
if disk_bytes.decode("utf-8", errors="replace").strip() == "AGENTS.md":
|
|
808
|
+
return True
|
|
809
|
+
try:
|
|
810
|
+
agents_bytes = agents_md.read_bytes()
|
|
811
|
+
except OSError:
|
|
812
|
+
return False
|
|
813
|
+
return _normalise_lf(disk_bytes) == _normalise_lf(agents_bytes)
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def diff_against_working_tree(
|
|
817
|
+
shadow: Path,
|
|
818
|
+
working_tree: Path,
|
|
819
|
+
source_map: dict[Path, Path] | None = None,
|
|
820
|
+
) -> list[str]:
|
|
821
|
+
"""Compare every file in `shadow` against the corresponding path in
|
|
822
|
+
`working_tree`. When `source_map` is provided, drift messages name
|
|
823
|
+
the source path and regeneration command per spec § *Always do*
|
|
824
|
+
(`[drift] <projected>: edit <source>; run: make build-self`).
|
|
825
|
+
|
|
826
|
+
Phase-2 strengthening per the self-hosting spec:
|
|
827
|
+
|
|
828
|
+
- **CRLF→LF normalisation** for text-like files (those that decode
|
|
829
|
+
as UTF-8). Binary content is compared byte-for-byte. A CRLF-on-
|
|
830
|
+
disk text file no longer drifts against an LF-in-source file
|
|
831
|
+
— the same content shape ``git status`` already accommodates via
|
|
832
|
+
``core.autocrlf``.
|
|
833
|
+
- **File-mode permission bits** for regular files. A projected
|
|
834
|
+
``0o644`` against an on-disk ``0o755`` drifts. Only the low 9
|
|
835
|
+
permission bits are compared; setuid/setgid/sticky are not part
|
|
836
|
+
of the projection contract.
|
|
837
|
+
- **Symlink targets via ``lstat``** — the gate never follows a
|
|
838
|
+
symlink. A symlink/regular type mismatch drifts; matching
|
|
839
|
+
symlinks with different targets drift.
|
|
840
|
+
"""
|
|
841
|
+
drifts: list[str] = []
|
|
842
|
+
for rendered in shadow.rglob("*"):
|
|
843
|
+
try:
|
|
844
|
+
shadow_st = os.lstat(rendered)
|
|
845
|
+
except OSError:
|
|
846
|
+
continue
|
|
847
|
+
if not (stat.S_ISREG(shadow_st.st_mode) or stat.S_ISLNK(shadow_st.st_mode)):
|
|
848
|
+
continue
|
|
849
|
+
relative = rendered.relative_to(shadow)
|
|
850
|
+
# Honour EXCLUDED_PATTERNS: paths self-host projects advisorily
|
|
851
|
+
# (e.g. gitignored adopter-overrides like `.claude/settings.local.json`)
|
|
852
|
+
# must not count as drift when absent from disk. Same exclusion
|
|
853
|
+
# list the unclassified-path enumeration honours at line 639.
|
|
854
|
+
# `PROJECTED_README_OVERRIDES` still re-includes named paths.
|
|
855
|
+
if _is_excluded(relative):
|
|
856
|
+
continue
|
|
857
|
+
on_disk = working_tree / relative
|
|
858
|
+
is_claude_md_row = relative == Path("CLAUDE.md")
|
|
859
|
+
|
|
860
|
+
if is_claude_md_row and _is_equivalent_claude_md_shape(
|
|
861
|
+
on_disk, working_tree / "AGENTS.md"
|
|
862
|
+
):
|
|
863
|
+
continue
|
|
864
|
+
|
|
865
|
+
hint = ""
|
|
866
|
+
if source_map is not None:
|
|
867
|
+
source = _lookup_source(relative, source_map)
|
|
868
|
+
if source is not None:
|
|
869
|
+
hint = (
|
|
870
|
+
f": edit {source.as_posix()}; run: make build-self"
|
|
871
|
+
)
|
|
872
|
+
if is_claude_md_row:
|
|
873
|
+
# Operator-facing hint mirroring lint-agents-md.py check #2.
|
|
874
|
+
# The equivalence helper rejected this on-disk shape; name
|
|
875
|
+
# the three accepted shapes so the operator can fix without
|
|
876
|
+
# reading the spec.
|
|
877
|
+
hint = (
|
|
878
|
+
f"{hint} [expected one of: symlink → AGENTS.md, "
|
|
879
|
+
f"content-copy of AGENTS.md, or one-line file containing "
|
|
880
|
+
f"'AGENTS.md']"
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
try:
|
|
884
|
+
disk_st = os.lstat(on_disk)
|
|
885
|
+
except FileNotFoundError:
|
|
886
|
+
drifts.append(
|
|
887
|
+
f"[drift] {relative.as_posix()} (missing on disk){hint}"
|
|
888
|
+
)
|
|
889
|
+
continue
|
|
890
|
+
except OSError as exc:
|
|
891
|
+
drifts.append(
|
|
892
|
+
f"[drift] {relative.as_posix()} (unreadable: {exc}){hint}"
|
|
893
|
+
)
|
|
894
|
+
continue
|
|
895
|
+
|
|
896
|
+
shadow_is_link = stat.S_ISLNK(shadow_st.st_mode)
|
|
897
|
+
disk_is_link = stat.S_ISLNK(disk_st.st_mode)
|
|
898
|
+
|
|
899
|
+
if shadow_is_link != disk_is_link:
|
|
900
|
+
expected = "symlink" if shadow_is_link else "regular file"
|
|
901
|
+
found = "regular file" if shadow_is_link else "symlink"
|
|
902
|
+
drifts.append(
|
|
903
|
+
f"[drift] {relative.as_posix()} "
|
|
904
|
+
f"(expected {expected}, found {found} on disk){hint}"
|
|
905
|
+
)
|
|
906
|
+
continue
|
|
907
|
+
|
|
908
|
+
if shadow_is_link:
|
|
909
|
+
try:
|
|
910
|
+
shadow_target = os.readlink(rendered)
|
|
911
|
+
disk_target = os.readlink(on_disk)
|
|
912
|
+
except OSError as exc:
|
|
913
|
+
drifts.append(
|
|
914
|
+
f"[drift] {relative.as_posix()} "
|
|
915
|
+
f"(unreadable symlink: {exc}){hint}"
|
|
916
|
+
)
|
|
917
|
+
continue
|
|
918
|
+
if shadow_target != disk_target:
|
|
919
|
+
drifts.append(
|
|
920
|
+
f"[drift] {relative.as_posix()} "
|
|
921
|
+
f"(symlink target differs: {disk_target!r} vs {shadow_target!r})"
|
|
922
|
+
f"{hint}"
|
|
923
|
+
)
|
|
924
|
+
continue
|
|
925
|
+
|
|
926
|
+
reasons: list[str] = []
|
|
927
|
+
|
|
928
|
+
shadow_mode = stat.S_IMODE(shadow_st.st_mode)
|
|
929
|
+
disk_mode = stat.S_IMODE(disk_st.st_mode)
|
|
930
|
+
if shadow_mode != disk_mode:
|
|
931
|
+
reasons.append(f"mode {oct(disk_mode)} vs {oct(shadow_mode)}")
|
|
932
|
+
|
|
933
|
+
try:
|
|
934
|
+
shadow_bytes = rendered.read_bytes()
|
|
935
|
+
disk_bytes = on_disk.read_bytes()
|
|
936
|
+
except OSError as exc:
|
|
937
|
+
drifts.append(
|
|
938
|
+
f"[drift] {relative.as_posix()} (unreadable: {exc}){hint}"
|
|
939
|
+
)
|
|
940
|
+
continue
|
|
941
|
+
|
|
942
|
+
if shadow_bytes != disk_bytes:
|
|
943
|
+
if _is_text_like(shadow_bytes) and _is_text_like(disk_bytes):
|
|
944
|
+
if _normalise_lf(shadow_bytes) != _normalise_lf(disk_bytes):
|
|
945
|
+
reasons.append("content differs")
|
|
946
|
+
else:
|
|
947
|
+
reasons.append("content differs")
|
|
948
|
+
|
|
949
|
+
if reasons:
|
|
950
|
+
tag = " (" + "; ".join(reasons) + ")"
|
|
951
|
+
drifts.append(f"[drift] {relative.as_posix()}{tag}{hint}")
|
|
952
|
+
return drifts
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
def run_self_host(
|
|
956
|
+
working_tree: Path,
|
|
957
|
+
packs_dir: Path,
|
|
958
|
+
dry_run: bool,
|
|
959
|
+
force: bool,
|
|
960
|
+
contract: dict | None = None,
|
|
961
|
+
no_symlink: bool = False,
|
|
962
|
+
) -> int:
|
|
963
|
+
"""Execute `make build-self` (or `make build-self DRY_RUN=1`).
|
|
964
|
+
|
|
965
|
+
Phase-1 orchestration: dirty-tree refusal → fail-fast on missing
|
|
966
|
+
`.adapt-discovery.toml` → adapter projection (allow-listed) → seed
|
|
967
|
+
projection → marketplace aggregation → CLAUDE.md symlink → marker
|
|
968
|
+
resolution. Under `dry_run`, all writes happen in a shadow temp
|
|
969
|
+
dir and the result is diffed against the working tree.
|
|
970
|
+
"""
|
|
971
|
+
if contract is None:
|
|
972
|
+
contract = load_contract(CONTRACT_PATH)
|
|
973
|
+
|
|
974
|
+
if not dry_run and is_dirty_tree(working_tree) and not force:
|
|
975
|
+
print(
|
|
976
|
+
"self-host: working tree is dirty — refusing to write. "
|
|
977
|
+
"Pass --force to override (the dirty-tree check only).",
|
|
978
|
+
file=sys.stderr,
|
|
979
|
+
)
|
|
980
|
+
return 2
|
|
981
|
+
|
|
982
|
+
# AC14: fail-fast when .adapt-discovery.toml is missing. The file is
|
|
983
|
+
# required by `make build-self` even when no source carries
|
|
984
|
+
# `<adapt:NAME>` markers today — the contract is "if you run
|
|
985
|
+
# build-self, you affirm the discovery values exist."
|
|
986
|
+
discovery_path = working_tree / ".adapt-discovery.toml"
|
|
987
|
+
if not discovery_path.exists():
|
|
988
|
+
print(
|
|
989
|
+
"missing .adapt-discovery.toml required by --self",
|
|
990
|
+
file=sys.stderr,
|
|
991
|
+
)
|
|
992
|
+
return 3
|
|
993
|
+
|
|
994
|
+
# AC9: read `.adapt-discovery.toml` via the typed loader. Legacy
|
|
995
|
+
# `[adapt]` table, unknown `discovery-schema-version`, and any
|
|
996
|
+
# other invalid shape surface as `ConfigError` and refuse with
|
|
997
|
+
# the `self-host: ` prefix per spec.
|
|
998
|
+
from agentbundle.config import ConfigError, load_adapt_discovery_typed
|
|
999
|
+
|
|
1000
|
+
try:
|
|
1001
|
+
discovery = load_adapt_discovery_typed(discovery_path, scope="repo")
|
|
1002
|
+
except ConfigError as exc:
|
|
1003
|
+
print(f"self-host: {exc}", file=sys.stderr)
|
|
1004
|
+
return 3
|
|
1005
|
+
discovery_flat = dict(discovery.markers)
|
|
1006
|
+
owner = discovery_flat.get("owner", "eugenelim")
|
|
1007
|
+
|
|
1008
|
+
if dry_run:
|
|
1009
|
+
with tempfile.TemporaryDirectory(prefix="agentbundle-shadow-") as shadow_str:
|
|
1010
|
+
shadow = Path(shadow_str)
|
|
1011
|
+
_clone_target_subtree(working_tree, shadow)
|
|
1012
|
+
_project_all_adapters(shadow, packs_dir, contract)
|
|
1013
|
+
# Compose AGENTS.md BEFORE seed projection: on a fresh tree the
|
|
1014
|
+
# composed output (body + footer) must win over the body-only
|
|
1015
|
+
# seed at `packs/core/seeds/AGENTS.md`; on an existing tree
|
|
1016
|
+
# both layers honour the preserve-on-disk gate and leave the
|
|
1017
|
+
# live file alone.
|
|
1018
|
+
agents_path = _compose_agents_md(packs_dir, shadow, contract)
|
|
1019
|
+
try:
|
|
1020
|
+
seed_map = _project_seeds(packs_dir, shadow)
|
|
1021
|
+
except ValueError as exc:
|
|
1022
|
+
print(f"self-host: {exc}", file=sys.stderr)
|
|
1023
|
+
return 4
|
|
1024
|
+
_aggregate_marketplace(packs_dir, shadow, owner=owner)
|
|
1025
|
+
_recreate_claude_symlink(shadow, force_copy=no_symlink)
|
|
1026
|
+
extra_marker_paths = list(seed_map.keys()) + [
|
|
1027
|
+
Path(".claude-plugin") / "marketplace.json",
|
|
1028
|
+
]
|
|
1029
|
+
if agents_path is not None:
|
|
1030
|
+
extra_marker_paths.append(Path("AGENTS.md"))
|
|
1031
|
+
resolve_markers(shadow, discovery_flat, extra_paths=extra_marker_paths)
|
|
1032
|
+
source_map = _build_projected_to_source_map(packs_dir, contract)
|
|
1033
|
+
projected_paths = {
|
|
1034
|
+
rendered.relative_to(shadow)
|
|
1035
|
+
for rendered in shadow.rglob("*")
|
|
1036
|
+
if rendered.is_file() or rendered.is_symlink()
|
|
1037
|
+
}
|
|
1038
|
+
drifts = diff_against_working_tree(shadow, working_tree, source_map)
|
|
1039
|
+
# AC6: info-level lines for unclassified paths.
|
|
1040
|
+
_emit_info_for_unclassified(working_tree, projected_paths)
|
|
1041
|
+
if drifts:
|
|
1042
|
+
print(
|
|
1043
|
+
f"self-host: dry-run found {len(drifts)} drift(s):",
|
|
1044
|
+
file=sys.stderr,
|
|
1045
|
+
)
|
|
1046
|
+
for drift in drifts:
|
|
1047
|
+
print(f" {drift}", file=sys.stderr)
|
|
1048
|
+
return 1
|
|
1049
|
+
return 0
|
|
1050
|
+
|
|
1051
|
+
# Real write: project directly into the working tree so adapter
|
|
1052
|
+
# merge/splice logic sees existing content.
|
|
1053
|
+
# T4: project shared-libs/ into consumer-skill scripts/ FIRST so the
|
|
1054
|
+
# adapter projection that follows picks up the freshest shim files.
|
|
1055
|
+
# Inter-pack basename collision raises ValueError; surface as
|
|
1056
|
+
# self-host: <msg> and exit 5.
|
|
1057
|
+
try:
|
|
1058
|
+
_shared_libs_apply(packs_dir)
|
|
1059
|
+
except ValueError as exc:
|
|
1060
|
+
print(f"self-host: {exc}", file=sys.stderr)
|
|
1061
|
+
return 5
|
|
1062
|
+
# T6: project adapter-root-bins/ into <working_tree>/.agentbundle/bin/
|
|
1063
|
+
# with 0o755 on POSIX. Inter-pack basename collision raises
|
|
1064
|
+
# ValueError; surface as self-host: <msg> and exit 5.
|
|
1065
|
+
try:
|
|
1066
|
+
_adapter_root_bins_apply(working_tree, packs_dir)
|
|
1067
|
+
except ValueError as exc:
|
|
1068
|
+
print(f"self-host: {exc}", file=sys.stderr)
|
|
1069
|
+
return 5
|
|
1070
|
+
_project_all_adapters(working_tree, packs_dir, contract)
|
|
1071
|
+
# Compose AGENTS.md BEFORE seed projection — see dry-run branch for
|
|
1072
|
+
# rationale (the body-only seed at packs/core/seeds/AGENTS.md must
|
|
1073
|
+
# not race the body+footer composition on fresh trees).
|
|
1074
|
+
agents_path = _compose_agents_md(packs_dir, working_tree, contract)
|
|
1075
|
+
try:
|
|
1076
|
+
seed_map = _project_seeds(packs_dir, working_tree)
|
|
1077
|
+
except ValueError as exc:
|
|
1078
|
+
print(f"self-host: {exc}", file=sys.stderr)
|
|
1079
|
+
return 4
|
|
1080
|
+
_aggregate_marketplace(packs_dir, working_tree, owner=owner)
|
|
1081
|
+
_recreate_claude_symlink(working_tree, force_copy=no_symlink)
|
|
1082
|
+
extra_marker_paths = list(seed_map.keys()) + [
|
|
1083
|
+
Path(".claude-plugin") / "marketplace.json",
|
|
1084
|
+
]
|
|
1085
|
+
if agents_path is not None:
|
|
1086
|
+
extra_marker_paths.append(Path("AGENTS.md"))
|
|
1087
|
+
resolve_markers(working_tree, discovery_flat, extra_paths=extra_marker_paths)
|
|
1088
|
+
return 0
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
# ---------------------------------------------------------------------------
|
|
1092
|
+
# Build-check drift gates (AC10 gate 2 + AC20a + AC20b)
|
|
1093
|
+
# ---------------------------------------------------------------------------
|
|
1094
|
+
|
|
1095
|
+
# Fixed corpus for the _emit_basic_string parity check (AC20b).
|
|
1096
|
+
# Covers: control chars (including each short-escape table entry),
|
|
1097
|
+
# byte-boundary cases at \x20 and \x7e, embedded quote + backslash,
|
|
1098
|
+
# empty string, multi-byte BMP unicode, non-BMP (4-byte UTF-8), and
|
|
1099
|
+
# a lone surrogate codepoint — the "attack-shaped inputs" the spec
|
|
1100
|
+
# names plus the branch surface the source primitive exposes.
|
|
1101
|
+
#
|
|
1102
|
+
# Maintenance contract: extend this corpus whenever
|
|
1103
|
+
# `agentbundle.config._emit_basic_string` learns a new input class
|
|
1104
|
+
# (new short-escape, new refusal rail). The parity check is only as
|
|
1105
|
+
# wide as the corpus.
|
|
1106
|
+
_EMIT_BASIC_STRING_CORPUS: tuple[str, ...] = (
|
|
1107
|
+
"\x00", # NUL — must be \\u0000
|
|
1108
|
+
"\x01", # SOH
|
|
1109
|
+
"\x08", # backspace — short-escape \\b
|
|
1110
|
+
"\t", # tab — short-escape \\t
|
|
1111
|
+
"\n", # newline — short-escape \\n
|
|
1112
|
+
"\x0c", # form feed — short-escape \\f
|
|
1113
|
+
"\r", # carriage return — short-escape \\r
|
|
1114
|
+
"\x1f", # US — last forbidden control char before space
|
|
1115
|
+
"\x20", # SPACE — first verbatim byte (boundary)
|
|
1116
|
+
"\x7e", # ~ — last verbatim byte before DEL (boundary)
|
|
1117
|
+
"\x7f", # DEL — must be \\u007F
|
|
1118
|
+
'"', # embedded double-quote
|
|
1119
|
+
"\\", # embedded backslash
|
|
1120
|
+
"", # empty string
|
|
1121
|
+
"café", # multi-byte BMP (U+00E9, 2-byte UTF-8)
|
|
1122
|
+
"\U0001F4A9", # non-BMP (U+1F4A9, 4-byte UTF-8 / surrogate-pair territory)
|
|
1123
|
+
"\ud800", # lone high surrogate — invalid as UTF-8, but Python str accepts it
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
def _resolve_install_marker_template_path() -> Path:
|
|
1128
|
+
"""Return a real filesystem Path for install-marker.py.
|
|
1129
|
+
|
|
1130
|
+
Resolution order (mirrors _read_install_marker_template in main.py):
|
|
1131
|
+
1. ``<package>/_data/install-marker.py`` via importlib.resources — works
|
|
1132
|
+
for filesystem installs whose `_data/` directory carries the synced
|
|
1133
|
+
copy.
|
|
1134
|
+
2. ``<repo>/packages/agentbundle/templates/install-marker.py`` — dev
|
|
1135
|
+
fallback for source trees whose ``_data/`` hasn't been synced.
|
|
1136
|
+
|
|
1137
|
+
Returns a bare ``Path``. Zipapp-internal resources (where the resource
|
|
1138
|
+
has no on-disk filesystem path) are not yet supported; this is fine in
|
|
1139
|
+
practice because the build-check gate runs against repo checkouts and
|
|
1140
|
+
pip-installed packages, not zipapps. If zipapp coverage is needed
|
|
1141
|
+
later, this function should be extended to materialise the resource
|
|
1142
|
+
to a tempfile.
|
|
1143
|
+
"""
|
|
1144
|
+
try:
|
|
1145
|
+
from importlib.resources import files
|
|
1146
|
+
|
|
1147
|
+
resource = files("agentbundle").joinpath("_data/install-marker.py")
|
|
1148
|
+
if resource.is_file():
|
|
1149
|
+
# On a real filesystem, resource.is_file() gives a Path-like whose
|
|
1150
|
+
# __str__ is a real filesystem path.
|
|
1151
|
+
candidate = Path(str(resource))
|
|
1152
|
+
if candidate.exists():
|
|
1153
|
+
return candidate
|
|
1154
|
+
except (FileNotFoundError, ModuleNotFoundError):
|
|
1155
|
+
pass
|
|
1156
|
+
# Dev-checkout fallback.
|
|
1157
|
+
return REPO_ROOT / "packages" / "agentbundle" / "templates" / "install-marker.py"
|
|
1158
|
+
|
|
1159
|
+
|
|
1160
|
+
def _load_emit_basic_string_from_template(
|
|
1161
|
+
template_path: Path,
|
|
1162
|
+
) -> object:
|
|
1163
|
+
"""Load the ``_emit_basic_string`` function from the writer template.
|
|
1164
|
+
|
|
1165
|
+
Uses ``importlib.util.spec_from_file_location`` to import the template
|
|
1166
|
+
as a module without running its ``__main__`` block (the script guards
|
|
1167
|
+
with ``if __name__ == "__main__":``).
|
|
1168
|
+
|
|
1169
|
+
Returns the function object, or raises ``ImportError`` / ``AttributeError``
|
|
1170
|
+
if the template does not expose the expected symbol.
|
|
1171
|
+
"""
|
|
1172
|
+
spec = importlib.util.spec_from_file_location(
|
|
1173
|
+
"_install_marker_template_for_check", template_path
|
|
1174
|
+
)
|
|
1175
|
+
if spec is None or spec.loader is None:
|
|
1176
|
+
raise ImportError(
|
|
1177
|
+
f"build-check: cannot load template module from {template_path}"
|
|
1178
|
+
)
|
|
1179
|
+
mod = importlib.util.module_from_spec(spec)
|
|
1180
|
+
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
|
1181
|
+
return getattr(mod, "_emit_basic_string")
|
|
1182
|
+
|
|
1183
|
+
|
|
1184
|
+
def run_build_check_drift_gates(
|
|
1185
|
+
output_dir: Path,
|
|
1186
|
+
packs_dir: Path,
|
|
1187
|
+
) -> int:
|
|
1188
|
+
"""Run the three mechanical drift-gate assertions wired into ``make build-check``.
|
|
1189
|
+
|
|
1190
|
+
1. **Writer-template drift (AC20a):** every derived
|
|
1191
|
+
``dist/claude-plugins/<pack>/.claude-plugin/scripts/install-marker.py``
|
|
1192
|
+
must be byte-identical to the canonical template.
|
|
1193
|
+
2. **Source-shape plugin.json (AC10 gate 2):** every
|
|
1194
|
+
``packs/<pack>/.claude-plugin/plugin.json`` must NOT carry a ``hooks``
|
|
1195
|
+
block (defence-in-depth, in-Python rail).
|
|
1196
|
+
3. **Vendored ``_emit_basic_string`` parity (AC20b):** the template's
|
|
1197
|
+
vendored copy must produce byte-identical output to the source primitive
|
|
1198
|
+
``agentbundle.config._emit_basic_string`` across the fixed corpus.
|
|
1199
|
+
|
|
1200
|
+
Returns 0 on success, 1 on any failure (all failures reported to stderr
|
|
1201
|
+
before exit so the operator sees all drift in one run).
|
|
1202
|
+
"""
|
|
1203
|
+
failures: list[str] = []
|
|
1204
|
+
|
|
1205
|
+
# ------------------------------------------------------------------
|
|
1206
|
+
# Gate 1: Writer-template drift (AC20a)
|
|
1207
|
+
#
|
|
1208
|
+
# Cross-validate `packs/` (source of truth) against
|
|
1209
|
+
# `<output_dir>/dist/claude-plugins/` (build output). For every source
|
|
1210
|
+
# pack carrying `.claude-plugin/plugin.json`, the derived projection
|
|
1211
|
+
# MUST exist and MUST be byte-identical to the canonical template.
|
|
1212
|
+
# `make build-check` depends on `build` so the `dist/` tree is always
|
|
1213
|
+
# populated when this gate runs; a missing `dist/` is a hard failure,
|
|
1214
|
+
# not a silent skip.
|
|
1215
|
+
# ------------------------------------------------------------------
|
|
1216
|
+
template_path = _resolve_install_marker_template_path()
|
|
1217
|
+
if not template_path.exists():
|
|
1218
|
+
failures.append(
|
|
1219
|
+
f"build-check: canonical install-marker template not found at "
|
|
1220
|
+
f"{template_path}; cannot run writer-template drift check"
|
|
1221
|
+
)
|
|
1222
|
+
elif not packs_dir.is_dir():
|
|
1223
|
+
failures.append(
|
|
1224
|
+
f"build-check: packs_dir {packs_dir} not a directory; cannot "
|
|
1225
|
+
f"enumerate Claude-plugins-route packs for drift check"
|
|
1226
|
+
)
|
|
1227
|
+
else:
|
|
1228
|
+
template_hash = hashlib.sha256(template_path.read_bytes()).hexdigest()
|
|
1229
|
+
dist_plugins = output_dir / "dist" / "claude-plugins"
|
|
1230
|
+
expected_packs = [
|
|
1231
|
+
pack_dir
|
|
1232
|
+
for pack_dir in sorted(packs_dir.iterdir())
|
|
1233
|
+
if pack_dir.is_dir()
|
|
1234
|
+
and (pack_dir / "pack.toml").exists()
|
|
1235
|
+
and (pack_dir / ".claude-plugin" / "plugin.json").exists()
|
|
1236
|
+
]
|
|
1237
|
+
if expected_packs and not dist_plugins.is_dir():
|
|
1238
|
+
failures.append(
|
|
1239
|
+
f"build-check: writer-template drift — dist/claude-plugins/ "
|
|
1240
|
+
f"not present at {dist_plugins} (run `make build` before "
|
|
1241
|
+
f"`make build-check`, or use the `build-check` target which "
|
|
1242
|
+
f"depends on `build`)"
|
|
1243
|
+
)
|
|
1244
|
+
else:
|
|
1245
|
+
for pack_dir in expected_packs:
|
|
1246
|
+
derived_marker = (
|
|
1247
|
+
dist_plugins
|
|
1248
|
+
/ pack_dir.name
|
|
1249
|
+
/ ".claude-plugin"
|
|
1250
|
+
/ "scripts"
|
|
1251
|
+
/ "install-marker.py"
|
|
1252
|
+
)
|
|
1253
|
+
if not derived_marker.exists():
|
|
1254
|
+
failures.append(
|
|
1255
|
+
f"build-check: writer-template drift — "
|
|
1256
|
+
f"pack {pack_dir.name} has a source plugin.json but "
|
|
1257
|
+
f"no projected install-marker.py at {derived_marker} "
|
|
1258
|
+
f"(derivation rail broken or partial build)"
|
|
1259
|
+
)
|
|
1260
|
+
continue
|
|
1261
|
+
derived_hash = hashlib.sha256(
|
|
1262
|
+
derived_marker.read_bytes()
|
|
1263
|
+
).hexdigest()
|
|
1264
|
+
if derived_hash != template_hash:
|
|
1265
|
+
failures.append(
|
|
1266
|
+
f"build-check: writer-template drift — "
|
|
1267
|
+
f"{pack_dir.name}/.claude-plugin/scripts/install-marker.py "
|
|
1268
|
+
f"diverges from canonical template at {template_path}"
|
|
1269
|
+
)
|
|
1270
|
+
|
|
1271
|
+
# ------------------------------------------------------------------
|
|
1272
|
+
# Gate 1b: _data/ ↔ templates/ parity (Concern 6)
|
|
1273
|
+
#
|
|
1274
|
+
# `packages/agentbundle/agentbundle/_data/install-marker.py` is the
|
|
1275
|
+
# zipapp-reachable copy of the canonical template at
|
|
1276
|
+
# `packages/agentbundle/templates/install-marker.py`. They must be
|
|
1277
|
+
# byte-identical; drift means a security fix was applied to one but
|
|
1278
|
+
# not the other. Resync with:
|
|
1279
|
+
# cp packages/agentbundle/templates/install-marker.py \
|
|
1280
|
+
# packages/agentbundle/agentbundle/_data/install-marker.py
|
|
1281
|
+
# ------------------------------------------------------------------
|
|
1282
|
+
_data_path = REPO_ROOT / "packages" / "agentbundle" / "agentbundle" / "_data" / "install-marker.py"
|
|
1283
|
+
_tmpl_path = REPO_ROOT / "packages" / "agentbundle" / "templates" / "install-marker.py"
|
|
1284
|
+
if _data_path.exists() and _tmpl_path.exists():
|
|
1285
|
+
if _data_path.read_bytes() != _tmpl_path.read_bytes():
|
|
1286
|
+
failures.append(
|
|
1287
|
+
"build-check: _data/install-marker.py diverges from "
|
|
1288
|
+
"templates/install-marker.py — run "
|
|
1289
|
+
"`cp packages/agentbundle/templates/install-marker.py "
|
|
1290
|
+
"packages/agentbundle/agentbundle/_data/install-marker.py` "
|
|
1291
|
+
"to re-sync"
|
|
1292
|
+
)
|
|
1293
|
+
|
|
1294
|
+
# ------------------------------------------------------------------
|
|
1295
|
+
# Gate 1c: APM writer-template drift (apm-install-route-parity AC16 a)
|
|
1296
|
+
#
|
|
1297
|
+
# Every dist/apm/<pack>/.apm/hooks/install-marker.py must be byte-
|
|
1298
|
+
# identical to the canonical template. Same rail as Gate 1 (claude-
|
|
1299
|
+
# plugins side); extends the surface to the APM projection so a future
|
|
1300
|
+
# implementer who accidentally diverges the APM-projected writer (or
|
|
1301
|
+
# forgets to refresh dist/apm/ after editing the template) is caught
|
|
1302
|
+
# at make build-check. APM packs are every pack — the apm derivation
|
|
1303
|
+
# runs on the full packs_dir, not just packs declaring claude-plugin.
|
|
1304
|
+
# ------------------------------------------------------------------
|
|
1305
|
+
if template_path.exists() and packs_dir.is_dir():
|
|
1306
|
+
template_hash_apm = hashlib.sha256(template_path.read_bytes()).hexdigest()
|
|
1307
|
+
dist_apm = output_dir / "dist" / "apm"
|
|
1308
|
+
apm_packs = [
|
|
1309
|
+
pack_dir
|
|
1310
|
+
for pack_dir in sorted(packs_dir.iterdir())
|
|
1311
|
+
if pack_dir.is_dir() and (pack_dir / "pack.toml").exists()
|
|
1312
|
+
]
|
|
1313
|
+
if apm_packs and not dist_apm.is_dir():
|
|
1314
|
+
failures.append(
|
|
1315
|
+
f"build-check: APM writer-template drift — dist/apm/ not "
|
|
1316
|
+
f"present at {dist_apm} (run `make build` before "
|
|
1317
|
+
f"`make build-check`)"
|
|
1318
|
+
)
|
|
1319
|
+
else:
|
|
1320
|
+
for pack_dir in apm_packs:
|
|
1321
|
+
apm_marker = (
|
|
1322
|
+
dist_apm / pack_dir.name / ".apm" / "hooks" / "install-marker.py"
|
|
1323
|
+
)
|
|
1324
|
+
if not apm_marker.exists():
|
|
1325
|
+
failures.append(
|
|
1326
|
+
f"build-check: APM writer-template drift — "
|
|
1327
|
+
f"pack {pack_dir.name} has no projected APM "
|
|
1328
|
+
f"install-marker.py at {apm_marker} "
|
|
1329
|
+
f"(APM derivation rail broken or partial build)"
|
|
1330
|
+
)
|
|
1331
|
+
continue
|
|
1332
|
+
if hashlib.sha256(apm_marker.read_bytes()).hexdigest() != template_hash_apm:
|
|
1333
|
+
failures.append(
|
|
1334
|
+
f"build-check: APM writer-template drift — "
|
|
1335
|
+
f"dist/apm/{pack_dir.name}/.apm/hooks/install-marker.py "
|
|
1336
|
+
f"diverges from canonical template at {template_path}"
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
# ------------------------------------------------------------------
|
|
1340
|
+
# Gate 2: Source-shape plugin.json (AC10 gate 2)
|
|
1341
|
+
# ------------------------------------------------------------------
|
|
1342
|
+
if packs_dir.is_dir():
|
|
1343
|
+
for pack_dir in sorted(packs_dir.iterdir()):
|
|
1344
|
+
if not pack_dir.is_dir() or not (pack_dir / "pack.toml").exists():
|
|
1345
|
+
continue
|
|
1346
|
+
plugin_json_path = pack_dir / ".claude-plugin" / "plugin.json"
|
|
1347
|
+
if not plugin_json_path.exists():
|
|
1348
|
+
continue
|
|
1349
|
+
try:
|
|
1350
|
+
manifest = json.loads(plugin_json_path.read_text(encoding="utf-8"))
|
|
1351
|
+
except (json.JSONDecodeError, OSError) as exc:
|
|
1352
|
+
failures.append(
|
|
1353
|
+
f"build-check: source-shape drift — "
|
|
1354
|
+
f"packs/{pack_dir.name}/.claude-plugin/plugin.json "
|
|
1355
|
+
f"could not be parsed: {exc}"
|
|
1356
|
+
)
|
|
1357
|
+
continue
|
|
1358
|
+
if "hooks" in manifest:
|
|
1359
|
+
failures.append(
|
|
1360
|
+
f"build-check: source-shape drift — "
|
|
1361
|
+
f"packs/{pack_dir.name}/.claude-plugin/plugin.json "
|
|
1362
|
+
f"carries a hooks block (forbidden at source per AC10)"
|
|
1363
|
+
)
|
|
1364
|
+
|
|
1365
|
+
# ------------------------------------------------------------------
|
|
1366
|
+
# Gate 3: Vendored _emit_basic_string parity (AC20b)
|
|
1367
|
+
# ------------------------------------------------------------------
|
|
1368
|
+
if template_path.exists():
|
|
1369
|
+
try:
|
|
1370
|
+
template_emit = _load_emit_basic_string_from_template(template_path)
|
|
1371
|
+
except (ImportError, AttributeError) as exc:
|
|
1372
|
+
failures.append(
|
|
1373
|
+
f"build-check: _emit_basic_string parity — "
|
|
1374
|
+
f"failed to load from template: {exc}"
|
|
1375
|
+
)
|
|
1376
|
+
template_emit = None
|
|
1377
|
+
|
|
1378
|
+
if template_emit is not None:
|
|
1379
|
+
try:
|
|
1380
|
+
from agentbundle.config import _emit_basic_string as source_emit
|
|
1381
|
+
except ImportError as exc:
|
|
1382
|
+
failures.append(
|
|
1383
|
+
f"build-check: _emit_basic_string parity — "
|
|
1384
|
+
f"failed to import source: {exc}"
|
|
1385
|
+
)
|
|
1386
|
+
source_emit = None
|
|
1387
|
+
|
|
1388
|
+
if source_emit is not None:
|
|
1389
|
+
# The corpus is all-str; neither function should raise for
|
|
1390
|
+
# any input. Catch the narrow set of exceptions either side
|
|
1391
|
+
# is documented to raise, and include the exception class in
|
|
1392
|
+
# the sentinel so a type-only divergence (same message,
|
|
1393
|
+
# different exception class) is detected. Any other
|
|
1394
|
+
# exception class indicates an internal bug — let it
|
|
1395
|
+
# propagate so build-check reports "internal error" rather
|
|
1396
|
+
# than mislabelled drift.
|
|
1397
|
+
_PARITY_EXPECTED_EXC = (ValueError, TypeError)
|
|
1398
|
+
for test_input in _EMIT_BASIC_STRING_CORPUS:
|
|
1399
|
+
try:
|
|
1400
|
+
source_out: object = source_emit(test_input)
|
|
1401
|
+
except _PARITY_EXPECTED_EXC as exc:
|
|
1402
|
+
source_out = f"<{type(exc).__name__}: {exc}>"
|
|
1403
|
+
try:
|
|
1404
|
+
template_out: object = template_emit(test_input)
|
|
1405
|
+
except _PARITY_EXPECTED_EXC as exc:
|
|
1406
|
+
template_out = f"<{type(exc).__name__}: {exc}>"
|
|
1407
|
+
if source_out != template_out:
|
|
1408
|
+
failures.append(
|
|
1409
|
+
f"build-check: emit_basic_string drift — "
|
|
1410
|
+
f"vendored copy diverges from source on input "
|
|
1411
|
+
f"{test_input!r}: source={source_out!r}, "
|
|
1412
|
+
f"vendored={template_out!r}"
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
# ------------------------------------------------------------------
|
|
1416
|
+
# Gate: shared-libs projection drift (RFC-0013 § 4c).
|
|
1417
|
+
#
|
|
1418
|
+
# Three outcomes — modified / missing / orphaned — surfaced as one
|
|
1419
|
+
# line per drift. Inter-pack basename collision short-circuits to
|
|
1420
|
+
# a single description (the projection cannot proceed).
|
|
1421
|
+
# ------------------------------------------------------------------
|
|
1422
|
+
for msg in _shared_libs_check_drift(packs_dir):
|
|
1423
|
+
failures.append(msg)
|
|
1424
|
+
|
|
1425
|
+
# ------------------------------------------------------------------
|
|
1426
|
+
# Gate: adapter-root-bins projection drift (RFC-0013 § 4d).
|
|
1427
|
+
#
|
|
1428
|
+
# Same three outcomes — modified / missing / orphaned. Single-target
|
|
1429
|
+
# projection (not many-to-many like shared-libs) so the diagnostic
|
|
1430
|
+
# shape is simpler.
|
|
1431
|
+
# ------------------------------------------------------------------
|
|
1432
|
+
for msg in _adapter_root_bins_check_drift(output_dir, packs_dir):
|
|
1433
|
+
failures.append(msg)
|
|
1434
|
+
|
|
1435
|
+
if failures:
|
|
1436
|
+
for msg in failures:
|
|
1437
|
+
print(msg, file=sys.stderr)
|
|
1438
|
+
return 1
|
|
1439
|
+
return 0
|
|
1440
|
+
|
|
1441
|
+
|
|
1442
|
+
def cmd_self(args) -> int:
|
|
1443
|
+
return run_self_host(
|
|
1444
|
+
working_tree=Path(args.output_dir).resolve(),
|
|
1445
|
+
packs_dir=Path(args.packs_dir).resolve(),
|
|
1446
|
+
dry_run=args.dry_run,
|
|
1447
|
+
force=args.force,
|
|
1448
|
+
no_symlink=getattr(args, "no_symlink", False),
|
|
1449
|
+
)
|
|
1450
|
+
|
|
1451
|
+
|
|
1452
|
+
def cmd_check(args) -> int:
|
|
1453
|
+
"""`make build-check` — strict dry-run against the working tree.
|
|
1454
|
+
|
|
1455
|
+
Runs two phases:
|
|
1456
|
+
1. The existing self-host dry-run (adapter projection drift check).
|
|
1457
|
+
2. The three new mechanical drift gates (AC10 gate 2 + AC20a + AC20b):
|
|
1458
|
+
writer-template byte-identity, source-shape plugin.json, and vendored
|
|
1459
|
+
``_emit_basic_string`` parity across the fixed attack corpus.
|
|
1460
|
+
|
|
1461
|
+
Both phases must succeed (exit 0) for the overall check to pass.
|
|
1462
|
+
"""
|
|
1463
|
+
output_dir = Path(args.output_dir).resolve()
|
|
1464
|
+
packs_dir = Path(args.packs_dir).resolve()
|
|
1465
|
+
|
|
1466
|
+
self_host_rc = run_self_host(
|
|
1467
|
+
working_tree=output_dir,
|
|
1468
|
+
packs_dir=packs_dir,
|
|
1469
|
+
dry_run=True,
|
|
1470
|
+
force=False,
|
|
1471
|
+
no_symlink=getattr(args, "no_symlink", False),
|
|
1472
|
+
)
|
|
1473
|
+
drift_rc = run_build_check_drift_gates(output_dir, packs_dir)
|
|
1474
|
+
# Return the worse of the two exit codes.
|
|
1475
|
+
return max(self_host_rc, drift_rc)
|
|
1476
|
+
|
|
1477
|
+
|
|
1478
|
+
# Re-export project_to_temp for any external caller that still relies
|
|
1479
|
+
# on the older API (tests previously imported this helper). The new
|
|
1480
|
+
# self-host implementation uses _project_all_adapters internally
|
|
1481
|
+
# against the working tree (or a shadow clone of it).
|
|
1482
|
+
def project_to_temp(working_tree: Path, packs_dir: Path, contract: dict) -> Path:
|
|
1483
|
+
temp_dir = Path(tempfile.mkdtemp(prefix="agentbundle-self-"))
|
|
1484
|
+
_clone_target_subtree(working_tree, temp_dir)
|
|
1485
|
+
_project_all_adapters(temp_dir, packs_dir, contract)
|
|
1486
|
+
return temp_dir
|