@event4u/agent-config 5.4.1 → 5.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/image/analyse.md +51 -0
- package/.agent-src/commands/image/create.md +53 -0
- package/.agent-src/commands/image/verify.md +48 -0
- package/.agent-src/commands/image.md +69 -0
- package/.agent-src/commands/knowledge/cross-repo.md +71 -0
- package/.agent-src/commands/knowledge.md +2 -0
- package/.agent-src/commands/skill/preview.md +67 -0
- package/.agent-src/commands/skill.md +48 -0
- package/.agent-src/commands/skills/discover.md +76 -0
- package/.agent-src/commands/skills.md +56 -0
- package/.agent-src/commands/video/from-song.md +351 -0
- package/.agent-src/commands/video.md +19 -9
- package/.agent-src/contexts/authority/commit-mechanics.md +8 -0
- package/.agent-src/rules/commit-policy.md +3 -8
- package/.agent-src/rules/linked-projects-onboarding-gate.md +1 -1
- package/.agent-src/rules/media-sync-ground-truth.md +58 -0
- package/.agent-src/skills/image-analyser/SKILL.md +121 -0
- package/.agent-src/skills/image-analyser/canon-spec.md +109 -0
- package/.agent-src/skills/image-analyser/evals/triggers.json +16 -0
- package/.agent-src/skills/image-creator/SKILL.md +117 -0
- package/.agent-src/skills/image-creator/evals/triggers.json +16 -0
- package/.agent-src/skills/song-to-script/SKILL.md +216 -0
- package/.claude-plugin/marketplace.json +15 -2
- package/CHANGELOG.md +84 -0
- package/CONTRIBUTING.md +6 -0
- package/README.md +3 -3
- package/config/agent-settings.template.yml +18 -0
- package/dist/cli/registry.js +1 -0
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +327 -20
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +4 -4
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +24 -10
- package/dist/discovery/trust-report.md +3 -3
- package/dist/discovery/workspaces.json +20 -6
- package/dist/mcp/registry-manifest.json +3 -3
- package/dist/router.json +1 -1
- package/dist/server/schemas/settings.js +4 -0
- package/dist/server/schemas/settings.js.map +1 -1
- package/docs/architecture.md +3 -3
- package/docs/catalog.md +20 -6
- package/docs/contracts/benchmark-report-schema.md +12 -10
- package/docs/contracts/command-clusters.md +5 -1
- package/docs/contracts/cross-repo-retrieval.md +64 -0
- package/docs/contracts/rule-router.md +39 -0
- package/docs/contracts/skill-discovery.md +80 -0
- package/docs/contracts/skill-dry-run.md +47 -0
- package/docs/contracts/value-dashboard-spec.md +7 -3
- package/docs/contracts/value-report-schema.md +6 -1
- package/docs/decisions/ADR-032-linked-projects-scope.md +7 -3
- package/docs/getting-started.md +2 -2
- package/docs/guides/cross-repo-linked-projects.md +7 -0
- package/docs/guides/cross-repo-retrieval.md +61 -0
- package/docs/guides/skill-discovery.md +71 -0
- package/docs/guides/skill-preview.md +71 -0
- package/docs/value.md +17 -17
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_dispatch.bash +10 -0
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/_lib/bench_report.py +13 -14
- package/scripts/_lib/bench_telegraph_report.py +1 -2
- package/scripts/_lib/token_count.py +95 -0
- package/scripts/_lib/value_report.py +3 -3
- package/scripts/ai-video/adapters/higgsfield.sh +163 -6
- package/scripts/ai-video/adapters/openai-images.sh +92 -6
- package/scripts/ai-video/lib/probe-audio.sh +181 -0
- package/scripts/audit_auto_rules.py +22 -6
- package/scripts/audit_command_surface.py +6 -1
- package/scripts/audit_initial_context.py +210 -0
- package/scripts/bench_ab_diff.py +4 -11
- package/scripts/bench_run.py +2 -3
- package/scripts/bench_runner.py +2 -2
- package/scripts/condense.py +44 -3
- package/scripts/cross_repo_retrieve.py +172 -0
- package/scripts/inventory_meta_layers.py +288 -0
- package/scripts/iron_law_sha.py +14 -5
- package/scripts/linked_projects_list.py +91 -0
- package/scripts/measure_rule_budget.py +15 -0
- package/scripts/memory_lookup.py +53 -2
- package/scripts/project_thin_rules.py +168 -0
- package/scripts/render_value_md.py +14 -23
- package/scripts/schemas/command.schema.json +1 -1
- package/scripts/schemas/rule.schema.json +1 -1
- package/scripts/schemas/skill.schema.json +2 -2
- package/scripts/skill_discovery.py +254 -0
- package/scripts/skill_linter.py +8 -4
- package/scripts/skill_preview.py +179 -0
- package/scripts/trigger_coverage.py +129 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""Cross-repo retrieval — read-only, targeted, opt-in (ADR-032 Option A).
|
|
2
|
+
|
|
3
|
+
Phase 4 of `road-to-leaner-core-and-discovery`. Given a query and the opted-in
|
|
4
|
+
`linked_projects` siblings, runs a bounded *targeted* search (path-glob +
|
|
5
|
+
content grep — never a full walk) and returns the retrieval envelope defined in
|
|
6
|
+
`docs/contracts/cross-repo-retrieval.md`. Reuses the redaction + chunking floor
|
|
7
|
+
from `knowledge_ingest.py` so no secret crosses a repo boundary.
|
|
8
|
+
|
|
9
|
+
Scope guards (Option A):
|
|
10
|
+
- read-only, no writes, no network;
|
|
11
|
+
- only siblings with `include: true` in agents/settings/.agent-settings.local.yml;
|
|
12
|
+
- `large`-flagged siblings REQUIRE a `--path-scope` (reject an unscoped query);
|
|
13
|
+
- ≤ --max-chunks results, one concept per query.
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
python3 scripts/cross_repo_retrieve.py "<query>" [--path-scope GLOB]
|
|
17
|
+
[--max-chunks N] [--format text|json] [--root PATH]
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import fnmatch
|
|
23
|
+
import json
|
|
24
|
+
import subprocess
|
|
25
|
+
import sys
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
29
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
30
|
+
sys.path.insert(0, str(REPO_ROOT / "packages" / "core" / "installer" / "python"))
|
|
31
|
+
from linked_projects_list import collect as collect_siblings # type: ignore # noqa: E402
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from knowledge_ingest import redact, chunk_text # type: ignore
|
|
35
|
+
except Exception: # pragma: no cover - keep retrieval usable if ingest moves
|
|
36
|
+
def redact(text, counters): # type: ignore
|
|
37
|
+
return text, 0
|
|
38
|
+
|
|
39
|
+
def chunk_text(text, target_bytes=2048): # type: ignore
|
|
40
|
+
return [text[:target_bytes]] if text else []
|
|
41
|
+
|
|
42
|
+
DEFAULT_MAX_CHUNKS = 8
|
|
43
|
+
MAX_FILES_SCANNED = 2000 # hard ceiling on the targeted walk, defence-in-depth
|
|
44
|
+
TEXT_SUFFIXES = {".md", ".txt", ".py", ".ts", ".tsx", ".js", ".jsx", ".php", ".go",
|
|
45
|
+
".rs", ".rb", ".java", ".json", ".yml", ".yaml", ".toml", ".sql", ".sh"}
|
|
46
|
+
SKIP_DIRS = {".git", "node_modules", "dist", "vendor", ".venv", "__pycache__",
|
|
47
|
+
".idea", ".vscode", "build", "target", ".next", "coverage"}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _freshness(repo: Path, rel: str) -> str:
|
|
51
|
+
try:
|
|
52
|
+
out = subprocess.run(
|
|
53
|
+
["git", "log", "-1", "--format=%ad", "--date=short", "--", rel],
|
|
54
|
+
cwd=repo, capture_output=True, text=True, timeout=8, check=False,
|
|
55
|
+
)
|
|
56
|
+
if out.returncode == 0 and out.stdout.strip():
|
|
57
|
+
return out.stdout.strip()
|
|
58
|
+
except (OSError, subprocess.SubprocessError):
|
|
59
|
+
pass
|
|
60
|
+
try:
|
|
61
|
+
from datetime import datetime, timezone
|
|
62
|
+
ts = (repo / rel).stat().st_mtime
|
|
63
|
+
return datetime.fromtimestamp(ts, timezone.utc).date().isoformat()
|
|
64
|
+
except OSError:
|
|
65
|
+
return "unknown"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _iter_files(repo: Path, path_scope: str | None):
|
|
69
|
+
count = 0
|
|
70
|
+
for p in sorted(repo.rglob("*")):
|
|
71
|
+
if count >= MAX_FILES_SCANNED:
|
|
72
|
+
break
|
|
73
|
+
if not p.is_file() or p.suffix.lower() not in TEXT_SUFFIXES:
|
|
74
|
+
continue
|
|
75
|
+
if any(part in SKIP_DIRS for part in p.relative_to(repo).parts):
|
|
76
|
+
continue
|
|
77
|
+
rel = str(p.relative_to(repo))
|
|
78
|
+
if path_scope and not fnmatch.fnmatch(rel, path_scope):
|
|
79
|
+
continue
|
|
80
|
+
count += 1
|
|
81
|
+
yield p, rel
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _terms(query: str) -> list[str]:
|
|
85
|
+
return [t for t in query.lower().replace(",", " ").split() if len(t) > 2]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def search_sibling(repo: Path, query: str, terms: list[str], path_scope: str | None,
|
|
89
|
+
budget: int) -> list[dict]:
|
|
90
|
+
hits: list[dict] = []
|
|
91
|
+
repo_name = repo.name
|
|
92
|
+
for p, rel in _iter_files(repo, path_scope):
|
|
93
|
+
if len(hits) >= budget:
|
|
94
|
+
break
|
|
95
|
+
rel_lower = rel.lower()
|
|
96
|
+
path_match = any(t in rel_lower for t in terms)
|
|
97
|
+
try:
|
|
98
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
99
|
+
except OSError:
|
|
100
|
+
continue
|
|
101
|
+
text_lower = text.lower()
|
|
102
|
+
content_terms = [t for t in terms if t in text_lower]
|
|
103
|
+
if not path_match and not content_terms:
|
|
104
|
+
continue
|
|
105
|
+
# Pull the most relevant chunk (first chunk containing a term, else the head).
|
|
106
|
+
chunks = chunk_text(text)
|
|
107
|
+
chosen = next((c for c in chunks if any(t in c.lower() for t in terms)), chunks[0] if chunks else "")
|
|
108
|
+
redacted, _ = redact(chosen, {})
|
|
109
|
+
reason = (f"path matches: {rel}" if path_match
|
|
110
|
+
else f"content term(s): {', '.join(content_terms[:3])}")
|
|
111
|
+
hits.append({
|
|
112
|
+
"source_repo": repo_name,
|
|
113
|
+
"path": rel,
|
|
114
|
+
"chunk": redacted[:2048],
|
|
115
|
+
"freshness": _freshness(repo, rel),
|
|
116
|
+
"match_reason": reason,
|
|
117
|
+
})
|
|
118
|
+
return hits
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def retrieve(root: Path, query: str, path_scope: str | None, max_chunks: int) -> dict:
|
|
122
|
+
siblings = collect_siblings(root, show_all=False) # opted-in only
|
|
123
|
+
if not siblings:
|
|
124
|
+
return {"query": query, "matches": [], "note": "no opted-in linked-project siblings — nothing to search"}
|
|
125
|
+
terms = _terms(query)
|
|
126
|
+
if not terms:
|
|
127
|
+
return {"query": query, "matches": [], "note": "query too short — give at least one term > 2 chars"}
|
|
128
|
+
matches: list[dict] = []
|
|
129
|
+
skipped: list[str] = []
|
|
130
|
+
for sib in siblings:
|
|
131
|
+
if len(matches) >= max_chunks:
|
|
132
|
+
break
|
|
133
|
+
repo = Path(sib["path"])
|
|
134
|
+
if sib.get("large") and not path_scope:
|
|
135
|
+
skipped.append(sib["path"])
|
|
136
|
+
continue
|
|
137
|
+
matches.extend(search_sibling(repo, query, terms, path_scope, max_chunks - len(matches)))
|
|
138
|
+
out: dict = {"query": query, "matches": matches[:max_chunks]}
|
|
139
|
+
if skipped:
|
|
140
|
+
out["note"] = ("large sibling(s) skipped — supply --path-scope to search them: "
|
|
141
|
+
+ "; ".join(skipped))
|
|
142
|
+
return out
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def render_text(result: dict) -> str:
|
|
146
|
+
matches = result["matches"]
|
|
147
|
+
if not matches:
|
|
148
|
+
return result.get("note", "no matches")
|
|
149
|
+
lines = ["| source_repo | path | freshness | why |", "|---|---|---|---|"]
|
|
150
|
+
for m in matches:
|
|
151
|
+
lines.append(f"| {m['source_repo']} | {m['path']} | {m['freshness']} | {m['match_reason']} |")
|
|
152
|
+
if result.get("note"):
|
|
153
|
+
lines += ["", f"> {result['note']}"]
|
|
154
|
+
return "\n".join(lines)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def main(argv: list[str] | None = None) -> int:
|
|
158
|
+
ap = argparse.ArgumentParser(description="Targeted, read-only cross-repo retrieval (ADR-032 Option A).")
|
|
159
|
+
ap.add_argument("query", help="What to retrieve (one concept; ≥ 1 term > 2 chars).")
|
|
160
|
+
ap.add_argument("--path-scope", default=None, help="Glob to scope the search (required for large siblings).")
|
|
161
|
+
ap.add_argument("--max-chunks", type=int, default=DEFAULT_MAX_CHUNKS)
|
|
162
|
+
ap.add_argument("--format", choices=("text", "json"), default="text")
|
|
163
|
+
ap.add_argument("--root", default=".")
|
|
164
|
+
args = ap.parse_args(argv)
|
|
165
|
+
|
|
166
|
+
result = retrieve(Path(args.root).resolve(), args.query, args.path_scope, args.max_chunks)
|
|
167
|
+
print(json.dumps(result, indent=2) if args.format == "json" else render_text(result))
|
|
168
|
+
return 0
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
if __name__ == "__main__":
|
|
172
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""Meta-layer / concept-surface inventory — read-only discovery pass.
|
|
2
|
+
|
|
3
|
+
Drives Phase 1 of `agents/roadmaps/road-to-leaner-core-and-discovery.md`.
|
|
4
|
+
Sibling to `scripts/inventory_abstraction_budget.py`: that tool counts
|
|
5
|
+
per-artefact references + frontmatter bloat; this one inventories the
|
|
6
|
+
*concept surface* the post-5.x feedback names as meta-complexity.
|
|
7
|
+
|
|
8
|
+
For each concept it emits one row:
|
|
9
|
+
concept · surfaces it lives in · line cost · last-touched · overlap candidates
|
|
10
|
+
|
|
11
|
+
Concept = a normalized token shared by ≥ 2 stable artefacts (a rule, a
|
|
12
|
+
contract, a guideline, or a context) — i.e. a single idea defined in
|
|
13
|
+
more than one surface. Plus the curated meta-layer families the
|
|
14
|
+
feedback names explicitly (iron-laws, value, roadmap, linked-projects,
|
|
15
|
+
marketplace, governance). Also tabulates always-loaded rule families
|
|
16
|
+
(kernel) + Iron-Law count per rule.
|
|
17
|
+
|
|
18
|
+
Output: agents/evidence/analysis/meta-layer-inventory.md (+ .csv)
|
|
19
|
+
Read-only. Touches no abstraction file.
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
python3 scripts/inventory_meta_layers.py [--quiet]
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import csv
|
|
28
|
+
import json
|
|
29
|
+
import re
|
|
30
|
+
import subprocess
|
|
31
|
+
from collections import defaultdict
|
|
32
|
+
from dataclasses import dataclass, field
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
36
|
+
RULES_DIR = REPO_ROOT / ".agent-src" / "rules"
|
|
37
|
+
CONTRACTS_DIR = REPO_ROOT / "docs" / "contracts"
|
|
38
|
+
GUIDELINES_DIR = REPO_ROOT / "docs" / "guidelines"
|
|
39
|
+
CONTEXTS_DIR = REPO_ROOT / ".agent-src" / "contexts"
|
|
40
|
+
ROUTER = REPO_ROOT / "dist" / "router.json"
|
|
41
|
+
EVIDENCE_DIR = REPO_ROOT / "agents" / "evidence" / "analysis"
|
|
42
|
+
|
|
43
|
+
IRON_LAW_RE = re.compile(r"^#{1,3}\s+(?:The\s+)?Iron\s+Laws?\b", re.IGNORECASE | re.MULTILINE)
|
|
44
|
+
|
|
45
|
+
# Generic filename tokens that carry no concept identity — dropped before grouping.
|
|
46
|
+
STOPWORDS = {
|
|
47
|
+
"rule", "rules", "contract", "contracts", "mechanics", "policy", "schema",
|
|
48
|
+
"config", "v1", "v2", "and", "the", "of", "for", "to", "in", "on", "a",
|
|
49
|
+
"adr", "model", "spec", "format", "default", "defaults", "system", "examples",
|
|
50
|
+
"demos", "writing", "patterns", "auto", "core", "base",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Curated meta-layer families the feedback names (always emitted as rows).
|
|
54
|
+
SEED_FAMILIES = ["iron", "value", "roadmap", "linked", "marketplace", "governance", "council"]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class Surface:
|
|
59
|
+
path: Path
|
|
60
|
+
kind: str # rule | contract | guideline | context
|
|
61
|
+
lines: int
|
|
62
|
+
tokens: set[str] = field(default_factory=set)
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def rel(self) -> str:
|
|
66
|
+
return str(self.path.relative_to(REPO_ROOT))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _last_touched(path: Path) -> str:
|
|
70
|
+
try:
|
|
71
|
+
out = subprocess.run(
|
|
72
|
+
["git", "log", "-1", "--format=%ad", "--date=short", "--", str(path)],
|
|
73
|
+
cwd=REPO_ROOT, capture_output=True, text=True, check=False, timeout=10,
|
|
74
|
+
)
|
|
75
|
+
return out.stdout.strip() or "untracked"
|
|
76
|
+
except Exception:
|
|
77
|
+
return "unknown"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _tokens(stem: str) -> set[str]:
|
|
81
|
+
return {t for t in re.split(r"[-_]", stem.lower()) if t and t not in STOPWORDS and len(t) > 2}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _collect(directory: Path, kind: str) -> list[Surface]:
|
|
85
|
+
out: list[Surface] = []
|
|
86
|
+
if not directory.exists():
|
|
87
|
+
return out
|
|
88
|
+
for p in sorted(directory.rglob("*.md")):
|
|
89
|
+
if p.name == "README.md":
|
|
90
|
+
continue
|
|
91
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
92
|
+
out.append(Surface(p, kind, text.count("\n") + 1, _tokens(p.stem)))
|
|
93
|
+
return out
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _kernel_and_tiers() -> dict[str, int | list[str]]:
|
|
97
|
+
if not ROUTER.exists():
|
|
98
|
+
return {"kernel": [], "tier_1": 0, "tier_2": 0}
|
|
99
|
+
d = json.loads(ROUTER.read_text(encoding="utf-8"))
|
|
100
|
+
return {
|
|
101
|
+
"kernel": d.get("kernel", []),
|
|
102
|
+
"tier_1": d.get("tier_1", 0) if isinstance(d.get("tier_1"), int) else len(d.get("tier_1", [])),
|
|
103
|
+
"tier_2": d.get("tier_2", 0) if isinstance(d.get("tier_2"), int) else len(d.get("tier_2", [])),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _iron_law_counts() -> dict[str, int]:
|
|
108
|
+
counts: dict[str, int] = {}
|
|
109
|
+
for p in sorted(RULES_DIR.glob("*.md")):
|
|
110
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
111
|
+
n = len(IRON_LAW_RE.findall(text))
|
|
112
|
+
if n:
|
|
113
|
+
counts[p.stem] = n
|
|
114
|
+
return counts
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _stack(s: Surface) -> str:
|
|
118
|
+
"""Concern stack a surface belongs to. PHP coding guidelines are a
|
|
119
|
+
different domain from agent-behaviour rules/contracts — a shared topic
|
|
120
|
+
word (`git`, `security`) between them is coincidence, not duplication.
|
|
121
|
+
"""
|
|
122
|
+
return "php" if "/guidelines/php/" in s.rel.replace("\\", "/") else "agent"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _same_concept(a: Surface, b: Surface) -> bool:
|
|
126
|
+
"""Genuine concept duplication, not topic adjacency.
|
|
127
|
+
|
|
128
|
+
True when one stem is a prefix of the other (the rule→mechanics/examples
|
|
129
|
+
split — `language-and-tone` ⊂ `language-and-tone-examples`), OR the two
|
|
130
|
+
stems share ≥ 2 significant tokens (a tight family like `domain-safety-*`).
|
|
131
|
+
Single shared topic token (`skill`, `command`, `agent`) is NOT enough.
|
|
132
|
+
|
|
133
|
+
Cross-stack guard (Phase-1 council namespace-hygiene, 2026-05-30): a PHP
|
|
134
|
+
coding guideline and an agent rule/contract that merely share one topic
|
|
135
|
+
word (`git`, `security`) are distinct concerns — only group them on a
|
|
136
|
+
near-identical stem (prefix containment), never on a lone shared token.
|
|
137
|
+
"""
|
|
138
|
+
sa, sb = a.path.stem.lower(), b.path.stem.lower()
|
|
139
|
+
short, long = (sa, sb) if len(sa) <= len(sb) else (sb, sa)
|
|
140
|
+
# Containment only counts when the prefix is itself a multi-token concept
|
|
141
|
+
# (`language-and-tone` ⊂ `…-examples`), never a generic single word
|
|
142
|
+
# (`git` ⊂ `git-history-discipline` is coincidence, not duplication).
|
|
143
|
+
if short != long and long.startswith(short + "-") and len(_tokens(short)) >= 2:
|
|
144
|
+
return True
|
|
145
|
+
if _stack(a) != _stack(b):
|
|
146
|
+
return False
|
|
147
|
+
return len(a.tokens & b.tokens) >= 2
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _concept_rows(surfaces: list[Surface]) -> list[dict]:
|
|
151
|
+
# Union-find over the genuine-duplication adjacency.
|
|
152
|
+
parent = list(range(len(surfaces)))
|
|
153
|
+
|
|
154
|
+
def find(i: int) -> int:
|
|
155
|
+
while parent[i] != i:
|
|
156
|
+
parent[i] = parent[parent[i]]
|
|
157
|
+
i = parent[i]
|
|
158
|
+
return i
|
|
159
|
+
|
|
160
|
+
def union(i: int, j: int) -> None:
|
|
161
|
+
parent[find(i)] = find(j)
|
|
162
|
+
|
|
163
|
+
for i in range(len(surfaces)):
|
|
164
|
+
for j in range(i + 1, len(surfaces)):
|
|
165
|
+
if _same_concept(surfaces[i], surfaces[j]):
|
|
166
|
+
union(i, j)
|
|
167
|
+
|
|
168
|
+
comps: dict[int, list[Surface]] = defaultdict(list)
|
|
169
|
+
for idx, s in enumerate(surfaces):
|
|
170
|
+
comps[find(idx)].append(s)
|
|
171
|
+
|
|
172
|
+
rows: list[dict] = []
|
|
173
|
+
for group in comps.values():
|
|
174
|
+
if len(group) < 2:
|
|
175
|
+
continue
|
|
176
|
+
kinds = {s.kind for s in group}
|
|
177
|
+
# Concept label = the most-common significant token shared across the group.
|
|
178
|
+
tok_freq: dict[str, int] = defaultdict(int)
|
|
179
|
+
for s in group:
|
|
180
|
+
for t in s.tokens:
|
|
181
|
+
tok_freq[t] += 1
|
|
182
|
+
shared = [t for t, n in tok_freq.items() if n == len(group)] or [max(tok_freq, key=tok_freq.get)]
|
|
183
|
+
label = "-".join(sorted(shared)[:2])
|
|
184
|
+
seeded = any(any(t.startswith(f) or f.startswith(t) for f in SEED_FAMILIES) for t in shared)
|
|
185
|
+
cross_kind = len(kinds) >= 2
|
|
186
|
+
contract_dup = sum(1 for s in group if s.kind == "contract") >= 2
|
|
187
|
+
line_cost = sum(s.lines for s in group)
|
|
188
|
+
touched = max((_last_touched(s.path) for s in group), default="unknown")
|
|
189
|
+
rows.append({
|
|
190
|
+
"concept": label,
|
|
191
|
+
"surfaces": "; ".join(s.rel for s in sorted(group, key=lambda x: x.rel)),
|
|
192
|
+
"surface_count": len(group),
|
|
193
|
+
"kinds": ",".join(sorted(kinds)),
|
|
194
|
+
"line_cost": line_cost,
|
|
195
|
+
"last_touched": touched,
|
|
196
|
+
"overlap": "Y" if (cross_kind or contract_dup) else "family",
|
|
197
|
+
"seeded": "Y" if seeded else "",
|
|
198
|
+
})
|
|
199
|
+
rows.sort(key=lambda r: (-r["surface_count"], -r["line_cost"]))
|
|
200
|
+
return rows
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def main(argv: list[str] | None = None) -> int:
|
|
204
|
+
ap = argparse.ArgumentParser(description="Meta-layer / concept-surface inventory (read-only).")
|
|
205
|
+
ap.add_argument("--quiet", action="store_true")
|
|
206
|
+
args = ap.parse_args(argv)
|
|
207
|
+
|
|
208
|
+
surfaces = (
|
|
209
|
+
_collect(RULES_DIR, "rule")
|
|
210
|
+
+ _collect(CONTRACTS_DIR, "contract")
|
|
211
|
+
+ _collect(GUIDELINES_DIR, "guideline")
|
|
212
|
+
+ _collect(CONTEXTS_DIR, "context")
|
|
213
|
+
)
|
|
214
|
+
tiers = _kernel_and_tiers()
|
|
215
|
+
iron = _iron_law_counts()
|
|
216
|
+
rows = _concept_rows(surfaces)
|
|
217
|
+
|
|
218
|
+
EVIDENCE_DIR.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
md = EVIDENCE_DIR / "meta-layer-inventory.md"
|
|
220
|
+
csv_path = EVIDENCE_DIR / "meta-layer-inventory.csv"
|
|
221
|
+
|
|
222
|
+
kernel = tiers["kernel"]
|
|
223
|
+
overlap_count = sum(1 for r in rows if r["overlap"] == "Y")
|
|
224
|
+
iron_total = sum(iron.values())
|
|
225
|
+
|
|
226
|
+
lines = [
|
|
227
|
+
"# Meta-Layer / Concept-Surface Inventory",
|
|
228
|
+
"",
|
|
229
|
+
"> Read-only discovery output for `agents/roadmaps/road-to-leaner-core-and-discovery.md` Phase 1.",
|
|
230
|
+
"> Counts are grep/git-backed via `scripts/inventory_meta_layers.py`. A row is an *overlap candidate*",
|
|
231
|
+
"> when one concept (a shared filename token) is defined across ≥ 2 stable surfaces.",
|
|
232
|
+
"",
|
|
233
|
+
"## Summary",
|
|
234
|
+
"",
|
|
235
|
+
"| Metric | Value |",
|
|
236
|
+
"|---|---:|",
|
|
237
|
+
f"| Always-loaded kernel rule families | {len(kernel)} |",
|
|
238
|
+
f"| tier_1 (balanced) rules | {tiers['tier_1']} |",
|
|
239
|
+
f"| tier_2 (full) rules | {tiers['tier_2']} |",
|
|
240
|
+
f"| Rules carrying Iron-Law headings | {len(iron)} |",
|
|
241
|
+
f"| Total Iron-Law headings across rules | {iron_total} |",
|
|
242
|
+
f"| Concept surfaces scanned (rule/contract/guideline/context) | {len(surfaces)} |",
|
|
243
|
+
f"| Concept overlap candidates (≥ 2 surfaces, cross-kind/contract-dup) | {overlap_count} |",
|
|
244
|
+
"",
|
|
245
|
+
f"Kernel: {', '.join(kernel)}",
|
|
246
|
+
"",
|
|
247
|
+
"## Iron-Law density per rule (top 15)",
|
|
248
|
+
"",
|
|
249
|
+
"| Rule | Iron Laws |",
|
|
250
|
+
"|---|---:|",
|
|
251
|
+
]
|
|
252
|
+
for stem, n in sorted(iron.items(), key=lambda kv: -kv[1])[:15]:
|
|
253
|
+
lines.append(f"| `{stem}` | {n} |")
|
|
254
|
+
|
|
255
|
+
lines += [
|
|
256
|
+
"",
|
|
257
|
+
"## Concept-overlap ledger",
|
|
258
|
+
"",
|
|
259
|
+
"> One row per concept defined in ≥ 2 surfaces. `overlap=Y` = cross-kind or duplicate-contract",
|
|
260
|
+
"> (genuine merge/delete candidate). `seeded` = a feedback-named meta-layer family.",
|
|
261
|
+
"> Classification (merge / delete / keep-with-reason) is filled in Step 2 — left blank here.",
|
|
262
|
+
"",
|
|
263
|
+
"| Concept | Surfaces | # | Kinds | Lines | Last touched | Overlap | Class |",
|
|
264
|
+
"|---|---|---:|---|---:|---|---|---|",
|
|
265
|
+
]
|
|
266
|
+
for r in rows:
|
|
267
|
+
lines.append(
|
|
268
|
+
f"| `{r['concept']}` | {r['surfaces']} | {r['surface_count']} | {r['kinds']} | "
|
|
269
|
+
f"{r['line_cost']} | {r['last_touched']} | {r['overlap']} | _unclassified_ |"
|
|
270
|
+
)
|
|
271
|
+
lines.append("")
|
|
272
|
+
md.write_text("\n".join(lines), encoding="utf-8")
|
|
273
|
+
|
|
274
|
+
with csv_path.open("w", newline="", encoding="utf-8") as fh:
|
|
275
|
+
w = csv.DictWriter(fh, fieldnames=["concept", "surface_count", "kinds", "line_cost", "last_touched", "overlap", "seeded", "surfaces"])
|
|
276
|
+
w.writeheader()
|
|
277
|
+
for r in rows:
|
|
278
|
+
w.writerow(r)
|
|
279
|
+
|
|
280
|
+
if not args.quiet:
|
|
281
|
+
print(f"meta-layer inventory: {len(surfaces)} surfaces, {len(rows)} concept rows, {overlap_count} overlap candidates")
|
|
282
|
+
print(f" → {md.relative_to(REPO_ROOT)}")
|
|
283
|
+
print(f" → {csv_path.relative_to(REPO_ROOT)}")
|
|
284
|
+
return 0
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
if __name__ == "__main__":
|
|
288
|
+
raise SystemExit(main())
|
package/scripts/iron_law_sha.py
CHANGED
|
@@ -26,7 +26,15 @@ import sys
|
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
|
|
28
28
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
29
|
-
|
|
29
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
30
|
+
from _lib.agent_src import artefact_roots # noqa: E402
|
|
31
|
+
|
|
32
|
+
# Pre-monorepo this was REPO_ROOT/.agent-src.uncondensed/rules. Post-move
|
|
33
|
+
# (ADR-017) the source rules live under packages/*/.agent-src.uncondensed/rules.
|
|
34
|
+
# Resolve the same way measure_rule_budget does (multi-root aware) so the
|
|
35
|
+
# Iron-Law SHA gate keeps working against the current layout.
|
|
36
|
+
def _rules_dirs() -> list[Path]:
|
|
37
|
+
return [root / "rules" for root in artefact_roots() if (root / "rules").is_dir()]
|
|
30
38
|
|
|
31
39
|
# Locked kernel set — kept in sync with measure_rule_budget.KERNEL_RULES.
|
|
32
40
|
KERNEL_RULES = (
|
|
@@ -58,10 +66,11 @@ def iron_law_sha(text: str) -> str:
|
|
|
58
66
|
|
|
59
67
|
|
|
60
68
|
def rule_sha(rule_id: str) -> str:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
for rules_dir in _rules_dirs():
|
|
70
|
+
path = rules_dir / f"{rule_id}.md"
|
|
71
|
+
if path.exists():
|
|
72
|
+
return iron_law_sha(path.read_text(encoding="utf-8"))
|
|
73
|
+
raise FileNotFoundError(f"{rule_id}.md not found under any artefact root's rules/")
|
|
65
74
|
|
|
66
75
|
|
|
67
76
|
def main(argv: list[str] | None = None) -> int:
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""`agent-config linked-projects:list` — list opted-in IDE-attached siblings.
|
|
2
|
+
|
|
3
|
+
Phase 4 of `road-to-leaner-core-and-discovery`; closes the ADR-032 follow-up
|
|
4
|
+
"expose the detector as a CLI subcommand for consumer reach". Pure wrapper over
|
|
5
|
+
`scripts/_lib/linked_projects.detect_linked_projects` + the
|
|
6
|
+
`agents/settings/.agent-settings.local.yml` → `linked_projects[]` opt-in
|
|
7
|
+
cascade. No detection logic is duplicated here.
|
|
8
|
+
|
|
9
|
+
Prints opted-in siblings (`path · detected_via · large`). `--all` shows every
|
|
10
|
+
detected sibling with its opt-in status; `--format json` is machine-readable.
|
|
11
|
+
Read-only, no network.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
python3 scripts/linked_projects_list.py [--all] [--format text|json] [--root PATH]
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import json
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import yaml
|
|
24
|
+
|
|
25
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
26
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
27
|
+
from _lib.linked_projects import detect_linked_projects # type: ignore # noqa: E402
|
|
28
|
+
|
|
29
|
+
LOCAL_SETTINGS = Path("agents") / "settings" / ".agent-settings.local.yml"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _opt_in_map(root: Path) -> dict[str, bool]:
|
|
33
|
+
"""Map resolved sibling path → include flag from the local settings cascade."""
|
|
34
|
+
f = root / LOCAL_SETTINGS
|
|
35
|
+
if not f.is_file():
|
|
36
|
+
return {}
|
|
37
|
+
try:
|
|
38
|
+
data = yaml.safe_load(f.read_text(encoding="utf-8", errors="replace")) or {}
|
|
39
|
+
except yaml.YAMLError:
|
|
40
|
+
return {}
|
|
41
|
+
out: dict[str, bool] = {}
|
|
42
|
+
for entry in (data.get("linked_projects") or []):
|
|
43
|
+
if isinstance(entry, dict) and entry.get("path"):
|
|
44
|
+
try:
|
|
45
|
+
out[str(Path(entry["path"]).expanduser().resolve())] = bool(entry.get("include"))
|
|
46
|
+
except OSError:
|
|
47
|
+
out[str(entry["path"])] = bool(entry.get("include"))
|
|
48
|
+
return out
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def collect(root: Path, show_all: bool) -> list[dict]:
|
|
52
|
+
detected = detect_linked_projects(root)
|
|
53
|
+
opt_in = _opt_in_map(root)
|
|
54
|
+
rows: list[dict] = []
|
|
55
|
+
for d in detected:
|
|
56
|
+
include = opt_in.get(d["path"]) # None = undecided
|
|
57
|
+
if not show_all and include is not True:
|
|
58
|
+
continue
|
|
59
|
+
rows.append({**d, "include": include})
|
|
60
|
+
return rows
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def render_text(rows: list[dict], show_all: bool) -> str:
|
|
64
|
+
if not rows:
|
|
65
|
+
scope = "detected" if show_all else "opted-in"
|
|
66
|
+
return f"No {scope} linked-project siblings. (Attach a sibling repo in your IDE and opt in.)"
|
|
67
|
+
lines = ["| path | detected via | large | opted in |", "|---|---|---|---|"]
|
|
68
|
+
for r in rows:
|
|
69
|
+
inc = {True: "yes", False: "no", None: "undecided"}[r.get("include")]
|
|
70
|
+
lines.append(f"| {r['path']} | {r['detected_via']} | {'yes' if r['large'] else 'no'} | {inc} |")
|
|
71
|
+
return "\n".join(lines)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def main(argv: list[str] | None = None) -> int:
|
|
75
|
+
ap = argparse.ArgumentParser(description="List opted-in IDE-attached sibling projects (read-only).")
|
|
76
|
+
ap.add_argument("--all", action="store_true", help="Show every detected sibling, not only opted-in.")
|
|
77
|
+
ap.add_argument("--format", choices=("text", "json"), default="text")
|
|
78
|
+
ap.add_argument("--root", default=".", help="Project root (default: cwd).")
|
|
79
|
+
args = ap.parse_args(argv)
|
|
80
|
+
|
|
81
|
+
root = Path(args.root).resolve()
|
|
82
|
+
rows = collect(root, args.all)
|
|
83
|
+
if args.format == "json":
|
|
84
|
+
print(json.dumps({"root": str(root), "siblings": rows}, indent=2))
|
|
85
|
+
else:
|
|
86
|
+
print(render_text(rows, args.all))
|
|
87
|
+
return 0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
raise SystemExit(main())
|
|
@@ -28,6 +28,7 @@ from pathlib import Path
|
|
|
28
28
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
29
29
|
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
30
30
|
from _lib.agent_src import artefact_roots # noqa: E402
|
|
31
|
+
from _lib import token_count # noqa: E402
|
|
31
32
|
|
|
32
33
|
OVERRIDES_FILE = REPO_ROOT / "docs" / "contracts" / "iron-law-overrides.txt"
|
|
33
34
|
TREND_FILE = REPO_ROOT / "agents" / "runtime" / ".rule-budget-history.jsonl"
|
|
@@ -91,6 +92,9 @@ def measure_rule(path: Path) -> dict[str, object]:
|
|
|
91
92
|
"tier": fields.get("tier", ""),
|
|
92
93
|
"chars": len(body),
|
|
93
94
|
"lines": body.count("\n"),
|
|
95
|
+
# Real-tokenizer truth alongside the char proxy (roadmap 0B.1).
|
|
96
|
+
"tokens_gpt": token_count.gpt_tokens(body).tokens,
|
|
97
|
+
"tokens_claude": token_count.claude_tokens(body).tokens,
|
|
94
98
|
}
|
|
95
99
|
|
|
96
100
|
|
|
@@ -141,6 +145,11 @@ def aggregate(rules: list[dict[str, object]]) -> dict[str, object]:
|
|
|
141
145
|
"auto_chars": sum(int(r["chars"]) for r in auto),
|
|
142
146
|
"kernel_chars": sum(int(r["chars"]) for r in kernel),
|
|
143
147
|
"total_chars": total_chars,
|
|
148
|
+
"kernel_tokens_gpt": sum(int(r.get("tokens_gpt", 0)) for r in kernel),
|
|
149
|
+
"kernel_tokens_claude": sum(int(r.get("tokens_claude", 0)) for r in kernel),
|
|
150
|
+
"total_tokens_gpt": sum(int(r.get("tokens_gpt", 0)) for r in rules),
|
|
151
|
+
"total_tokens_claude": sum(int(r.get("tokens_claude", 0)) for r in rules),
|
|
152
|
+
"token_method": token_count.method_note(),
|
|
144
153
|
"kernel_hard": KERNEL_HARD,
|
|
145
154
|
"kernel_target": KERNEL_TARGET,
|
|
146
155
|
"per_rule_hard": PER_RULE_HARD,
|
|
@@ -181,6 +190,12 @@ def render_table(rules: list[dict[str, object]], agg: dict[str, object]) -> str:
|
|
|
181
190
|
)
|
|
182
191
|
lines.append(f" total: {agg['total_chars']:>6} chars across {agg['rule_count']} rules")
|
|
183
192
|
lines.append("")
|
|
193
|
+
lines.append(
|
|
194
|
+
f"kernel-tokens: {agg['kernel_tokens_gpt']:>6} GPT · {agg['kernel_tokens_claude']:>6} Claude "
|
|
195
|
+
f"(total {agg['total_tokens_gpt']} GPT · {agg['total_tokens_claude']} Claude)"
|
|
196
|
+
)
|
|
197
|
+
lines.append(f" token method: {agg['token_method']}")
|
|
198
|
+
lines.append("")
|
|
184
199
|
lines.append(f"top-5 largest:")
|
|
185
200
|
for r in agg["top5_largest"]: # type: ignore[index]
|
|
186
201
|
lines.append(f" {r['chars']:>5} {r['id']} ({r['type']})")
|