@event4u/agent-config 5.4.1 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/knowledge/cross-repo.md +71 -0
- package/.agent-src/commands/knowledge.md +2 -0
- package/.agent-src/commands/skill/preview.md +67 -0
- package/.agent-src/commands/skill.md +48 -0
- package/.agent-src/commands/skills/discover.md +76 -0
- package/.agent-src/commands/skills.md +56 -0
- package/.agent-src/commands/video/from-song.md +317 -0
- package/.agent-src/commands/video.md +19 -9
- package/.agent-src/rules/linked-projects-onboarding-gate.md +1 -1
- package/.agent-src/skills/song-to-script/SKILL.md +193 -0
- package/.claude-plugin/marketplace.json +9 -2
- package/CHANGELOG.md +37 -0
- package/CONTRIBUTING.md +6 -0
- package/README.md +3 -3
- package/dist/cli/registry.js +1 -0
- package/dist/cli/registry.js.map +1 -1
- package/dist/discovery/deprecation-report.md +1 -1
- package/dist/discovery/discovery-manifest.json +171 -17
- package/dist/discovery/discovery-manifest.json.sha256 +1 -1
- package/dist/discovery/discovery-manifest.summary.md +4 -4
- package/dist/discovery/orphan-report.md +1 -1
- package/dist/discovery/packs.json +17 -10
- package/dist/discovery/trust-report.md +3 -3
- package/dist/discovery/workspaces.json +13 -6
- package/dist/mcp/registry-manifest.json +2 -2
- package/docs/architecture.md +2 -2
- package/docs/contracts/command-clusters.md +4 -1
- package/docs/contracts/cross-repo-retrieval.md +64 -0
- package/docs/contracts/skill-discovery.md +80 -0
- package/docs/contracts/skill-dry-run.md +47 -0
- package/docs/decisions/ADR-032-linked-projects-scope.md +7 -3
- package/docs/getting-started.md +1 -1
- package/docs/guides/cross-repo-linked-projects.md +7 -0
- package/docs/guides/cross-repo-retrieval.md +61 -0
- package/docs/guides/skill-discovery.md +71 -0
- package/docs/guides/skill-preview.md +71 -0
- package/package.json +1 -1
- package/scripts/__pycache__/validate_frontmatter.cpython-312.pyc +0 -0
- package/scripts/_dispatch.bash +10 -0
- package/scripts/_lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/scripts/_lib/__pycache__/agent_src.cpython-312.pyc +0 -0
- package/scripts/ai-video/lib/probe-audio.sh +181 -0
- package/scripts/cross_repo_retrieve.py +172 -0
- package/scripts/inventory_meta_layers.py +288 -0
- package/scripts/linked_projects_list.py +91 -0
- package/scripts/memory_lookup.py +53 -2
- package/scripts/skill_discovery.py +254 -0
- package/scripts/skill_linter.py +8 -4
- package/scripts/skill_preview.py +179 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""Cross-repo retrieval — read-only, targeted, opt-in (ADR-032 Option A).
|
|
2
|
+
|
|
3
|
+
Phase 4 of `road-to-leaner-core-and-discovery`. Given a query and the opted-in
|
|
4
|
+
`linked_projects` siblings, runs a bounded *targeted* search (path-glob +
|
|
5
|
+
content grep — never a full walk) and returns the retrieval envelope defined in
|
|
6
|
+
`docs/contracts/cross-repo-retrieval.md`. Reuses the redaction + chunking floor
|
|
7
|
+
from `knowledge_ingest.py` so no secret crosses a repo boundary.
|
|
8
|
+
|
|
9
|
+
Scope guards (Option A):
|
|
10
|
+
- read-only, no writes, no network;
|
|
11
|
+
- only siblings with `include: true` in agents/settings/.agent-settings.local.yml;
|
|
12
|
+
- `large`-flagged siblings REQUIRE a `--path-scope` (reject an unscoped query);
|
|
13
|
+
- ≤ --max-chunks results, one concept per query.
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
python3 scripts/cross_repo_retrieve.py "<query>" [--path-scope GLOB]
|
|
17
|
+
[--max-chunks N] [--format text|json] [--root PATH]
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import fnmatch
|
|
23
|
+
import json
|
|
24
|
+
import subprocess
|
|
25
|
+
import sys
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
29
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
30
|
+
sys.path.insert(0, str(REPO_ROOT / "packages" / "core" / "installer" / "python"))
|
|
31
|
+
from linked_projects_list import collect as collect_siblings # type: ignore # noqa: E402
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from knowledge_ingest import redact, chunk_text # type: ignore
|
|
35
|
+
except Exception: # pragma: no cover - keep retrieval usable if ingest moves
|
|
36
|
+
def redact(text, counters): # type: ignore
|
|
37
|
+
return text, 0
|
|
38
|
+
|
|
39
|
+
def chunk_text(text, target_bytes=2048): # type: ignore
|
|
40
|
+
return [text[:target_bytes]] if text else []
|
|
41
|
+
|
|
42
|
+
DEFAULT_MAX_CHUNKS = 8
|
|
43
|
+
MAX_FILES_SCANNED = 2000 # hard ceiling on the targeted walk, defence-in-depth
|
|
44
|
+
TEXT_SUFFIXES = {".md", ".txt", ".py", ".ts", ".tsx", ".js", ".jsx", ".php", ".go",
|
|
45
|
+
".rs", ".rb", ".java", ".json", ".yml", ".yaml", ".toml", ".sql", ".sh"}
|
|
46
|
+
SKIP_DIRS = {".git", "node_modules", "dist", "vendor", ".venv", "__pycache__",
|
|
47
|
+
".idea", ".vscode", "build", "target", ".next", "coverage"}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _freshness(repo: Path, rel: str) -> str:
|
|
51
|
+
try:
|
|
52
|
+
out = subprocess.run(
|
|
53
|
+
["git", "log", "-1", "--format=%ad", "--date=short", "--", rel],
|
|
54
|
+
cwd=repo, capture_output=True, text=True, timeout=8, check=False,
|
|
55
|
+
)
|
|
56
|
+
if out.returncode == 0 and out.stdout.strip():
|
|
57
|
+
return out.stdout.strip()
|
|
58
|
+
except (OSError, subprocess.SubprocessError):
|
|
59
|
+
pass
|
|
60
|
+
try:
|
|
61
|
+
from datetime import datetime, timezone
|
|
62
|
+
ts = (repo / rel).stat().st_mtime
|
|
63
|
+
return datetime.fromtimestamp(ts, timezone.utc).date().isoformat()
|
|
64
|
+
except OSError:
|
|
65
|
+
return "unknown"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _iter_files(repo: Path, path_scope: str | None):
|
|
69
|
+
count = 0
|
|
70
|
+
for p in sorted(repo.rglob("*")):
|
|
71
|
+
if count >= MAX_FILES_SCANNED:
|
|
72
|
+
break
|
|
73
|
+
if not p.is_file() or p.suffix.lower() not in TEXT_SUFFIXES:
|
|
74
|
+
continue
|
|
75
|
+
if any(part in SKIP_DIRS for part in p.relative_to(repo).parts):
|
|
76
|
+
continue
|
|
77
|
+
rel = str(p.relative_to(repo))
|
|
78
|
+
if path_scope and not fnmatch.fnmatch(rel, path_scope):
|
|
79
|
+
continue
|
|
80
|
+
count += 1
|
|
81
|
+
yield p, rel
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _terms(query: str) -> list[str]:
|
|
85
|
+
return [t for t in query.lower().replace(",", " ").split() if len(t) > 2]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def search_sibling(repo: Path, query: str, terms: list[str], path_scope: str | None,
|
|
89
|
+
budget: int) -> list[dict]:
|
|
90
|
+
hits: list[dict] = []
|
|
91
|
+
repo_name = repo.name
|
|
92
|
+
for p, rel in _iter_files(repo, path_scope):
|
|
93
|
+
if len(hits) >= budget:
|
|
94
|
+
break
|
|
95
|
+
rel_lower = rel.lower()
|
|
96
|
+
path_match = any(t in rel_lower for t in terms)
|
|
97
|
+
try:
|
|
98
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
99
|
+
except OSError:
|
|
100
|
+
continue
|
|
101
|
+
text_lower = text.lower()
|
|
102
|
+
content_terms = [t for t in terms if t in text_lower]
|
|
103
|
+
if not path_match and not content_terms:
|
|
104
|
+
continue
|
|
105
|
+
# Pull the most relevant chunk (first chunk containing a term, else the head).
|
|
106
|
+
chunks = chunk_text(text)
|
|
107
|
+
chosen = next((c for c in chunks if any(t in c.lower() for t in terms)), chunks[0] if chunks else "")
|
|
108
|
+
redacted, _ = redact(chosen, {})
|
|
109
|
+
reason = (f"path matches: {rel}" if path_match
|
|
110
|
+
else f"content term(s): {', '.join(content_terms[:3])}")
|
|
111
|
+
hits.append({
|
|
112
|
+
"source_repo": repo_name,
|
|
113
|
+
"path": rel,
|
|
114
|
+
"chunk": redacted[:2048],
|
|
115
|
+
"freshness": _freshness(repo, rel),
|
|
116
|
+
"match_reason": reason,
|
|
117
|
+
})
|
|
118
|
+
return hits
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def retrieve(root: Path, query: str, path_scope: str | None, max_chunks: int) -> dict:
|
|
122
|
+
siblings = collect_siblings(root, show_all=False) # opted-in only
|
|
123
|
+
if not siblings:
|
|
124
|
+
return {"query": query, "matches": [], "note": "no opted-in linked-project siblings — nothing to search"}
|
|
125
|
+
terms = _terms(query)
|
|
126
|
+
if not terms:
|
|
127
|
+
return {"query": query, "matches": [], "note": "query too short — give at least one term > 2 chars"}
|
|
128
|
+
matches: list[dict] = []
|
|
129
|
+
skipped: list[str] = []
|
|
130
|
+
for sib in siblings:
|
|
131
|
+
if len(matches) >= max_chunks:
|
|
132
|
+
break
|
|
133
|
+
repo = Path(sib["path"])
|
|
134
|
+
if sib.get("large") and not path_scope:
|
|
135
|
+
skipped.append(sib["path"])
|
|
136
|
+
continue
|
|
137
|
+
matches.extend(search_sibling(repo, query, terms, path_scope, max_chunks - len(matches)))
|
|
138
|
+
out: dict = {"query": query, "matches": matches[:max_chunks]}
|
|
139
|
+
if skipped:
|
|
140
|
+
out["note"] = ("large sibling(s) skipped — supply --path-scope to search them: "
|
|
141
|
+
+ "; ".join(skipped))
|
|
142
|
+
return out
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def render_text(result: dict) -> str:
|
|
146
|
+
matches = result["matches"]
|
|
147
|
+
if not matches:
|
|
148
|
+
return result.get("note", "no matches")
|
|
149
|
+
lines = ["| source_repo | path | freshness | why |", "|---|---|---|---|"]
|
|
150
|
+
for m in matches:
|
|
151
|
+
lines.append(f"| {m['source_repo']} | {m['path']} | {m['freshness']} | {m['match_reason']} |")
|
|
152
|
+
if result.get("note"):
|
|
153
|
+
lines += ["", f"> {result['note']}"]
|
|
154
|
+
return "\n".join(lines)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def main(argv: list[str] | None = None) -> int:
|
|
158
|
+
ap = argparse.ArgumentParser(description="Targeted, read-only cross-repo retrieval (ADR-032 Option A).")
|
|
159
|
+
ap.add_argument("query", help="What to retrieve (one concept; ≥ 1 term > 2 chars).")
|
|
160
|
+
ap.add_argument("--path-scope", default=None, help="Glob to scope the search (required for large siblings).")
|
|
161
|
+
ap.add_argument("--max-chunks", type=int, default=DEFAULT_MAX_CHUNKS)
|
|
162
|
+
ap.add_argument("--format", choices=("text", "json"), default="text")
|
|
163
|
+
ap.add_argument("--root", default=".")
|
|
164
|
+
args = ap.parse_args(argv)
|
|
165
|
+
|
|
166
|
+
result = retrieve(Path(args.root).resolve(), args.query, args.path_scope, args.max_chunks)
|
|
167
|
+
print(json.dumps(result, indent=2) if args.format == "json" else render_text(result))
|
|
168
|
+
return 0
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
if __name__ == "__main__":
|
|
172
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""Meta-layer / concept-surface inventory — read-only discovery pass.
|
|
2
|
+
|
|
3
|
+
Drives Phase 1 of `agents/roadmaps/road-to-leaner-core-and-discovery.md`.
|
|
4
|
+
Sibling to `scripts/inventory_abstraction_budget.py`: that tool counts
|
|
5
|
+
per-artefact references + frontmatter bloat; this one inventories the
|
|
6
|
+
*concept surface* the post-5.x feedback names as meta-complexity.
|
|
7
|
+
|
|
8
|
+
For each concept it emits one row:
|
|
9
|
+
concept · surfaces it lives in · line cost · last-touched · overlap candidates
|
|
10
|
+
|
|
11
|
+
Concept = a normalized token shared by ≥ 2 stable artefacts (a rule, a
|
|
12
|
+
contract, a guideline, or a context) — i.e. a single idea defined in
|
|
13
|
+
more than one surface. Plus the curated meta-layer families the
|
|
14
|
+
feedback names explicitly (iron-laws, value, roadmap, linked-projects,
|
|
15
|
+
marketplace, governance). Also tabulates always-loaded rule families
|
|
16
|
+
(kernel) + Iron-Law count per rule.
|
|
17
|
+
|
|
18
|
+
Output: agents/evidence/analysis/meta-layer-inventory.md (+ .csv)
|
|
19
|
+
Read-only. Touches no abstraction file.
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
python3 scripts/inventory_meta_layers.py [--quiet]
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import csv
|
|
28
|
+
import json
|
|
29
|
+
import re
|
|
30
|
+
import subprocess
|
|
31
|
+
from collections import defaultdict
|
|
32
|
+
from dataclasses import dataclass, field
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
36
|
+
RULES_DIR = REPO_ROOT / ".agent-src" / "rules"
|
|
37
|
+
CONTRACTS_DIR = REPO_ROOT / "docs" / "contracts"
|
|
38
|
+
GUIDELINES_DIR = REPO_ROOT / "docs" / "guidelines"
|
|
39
|
+
CONTEXTS_DIR = REPO_ROOT / ".agent-src" / "contexts"
|
|
40
|
+
ROUTER = REPO_ROOT / "dist" / "router.json"
|
|
41
|
+
EVIDENCE_DIR = REPO_ROOT / "agents" / "evidence" / "analysis"
|
|
42
|
+
|
|
43
|
+
IRON_LAW_RE = re.compile(r"^#{1,3}\s+(?:The\s+)?Iron\s+Laws?\b", re.IGNORECASE | re.MULTILINE)
|
|
44
|
+
|
|
45
|
+
# Generic filename tokens that carry no concept identity — dropped before grouping.
|
|
46
|
+
STOPWORDS = {
|
|
47
|
+
"rule", "rules", "contract", "contracts", "mechanics", "policy", "schema",
|
|
48
|
+
"config", "v1", "v2", "and", "the", "of", "for", "to", "in", "on", "a",
|
|
49
|
+
"adr", "model", "spec", "format", "default", "defaults", "system", "examples",
|
|
50
|
+
"demos", "writing", "patterns", "auto", "core", "base",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Curated meta-layer families the feedback names (always emitted as rows).
|
|
54
|
+
SEED_FAMILIES = ["iron", "value", "roadmap", "linked", "marketplace", "governance", "council"]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class Surface:
|
|
59
|
+
path: Path
|
|
60
|
+
kind: str # rule | contract | guideline | context
|
|
61
|
+
lines: int
|
|
62
|
+
tokens: set[str] = field(default_factory=set)
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def rel(self) -> str:
|
|
66
|
+
return str(self.path.relative_to(REPO_ROOT))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _last_touched(path: Path) -> str:
|
|
70
|
+
try:
|
|
71
|
+
out = subprocess.run(
|
|
72
|
+
["git", "log", "-1", "--format=%ad", "--date=short", "--", str(path)],
|
|
73
|
+
cwd=REPO_ROOT, capture_output=True, text=True, check=False, timeout=10,
|
|
74
|
+
)
|
|
75
|
+
return out.stdout.strip() or "untracked"
|
|
76
|
+
except Exception:
|
|
77
|
+
return "unknown"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _tokens(stem: str) -> set[str]:
|
|
81
|
+
return {t for t in re.split(r"[-_]", stem.lower()) if t and t not in STOPWORDS and len(t) > 2}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _collect(directory: Path, kind: str) -> list[Surface]:
|
|
85
|
+
out: list[Surface] = []
|
|
86
|
+
if not directory.exists():
|
|
87
|
+
return out
|
|
88
|
+
for p in sorted(directory.rglob("*.md")):
|
|
89
|
+
if p.name == "README.md":
|
|
90
|
+
continue
|
|
91
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
92
|
+
out.append(Surface(p, kind, text.count("\n") + 1, _tokens(p.stem)))
|
|
93
|
+
return out
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _kernel_and_tiers() -> dict[str, int | list[str]]:
|
|
97
|
+
if not ROUTER.exists():
|
|
98
|
+
return {"kernel": [], "tier_1": 0, "tier_2": 0}
|
|
99
|
+
d = json.loads(ROUTER.read_text(encoding="utf-8"))
|
|
100
|
+
return {
|
|
101
|
+
"kernel": d.get("kernel", []),
|
|
102
|
+
"tier_1": d.get("tier_1", 0) if isinstance(d.get("tier_1"), int) else len(d.get("tier_1", [])),
|
|
103
|
+
"tier_2": d.get("tier_2", 0) if isinstance(d.get("tier_2"), int) else len(d.get("tier_2", [])),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _iron_law_counts() -> dict[str, int]:
|
|
108
|
+
counts: dict[str, int] = {}
|
|
109
|
+
for p in sorted(RULES_DIR.glob("*.md")):
|
|
110
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
111
|
+
n = len(IRON_LAW_RE.findall(text))
|
|
112
|
+
if n:
|
|
113
|
+
counts[p.stem] = n
|
|
114
|
+
return counts
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _stack(s: Surface) -> str:
|
|
118
|
+
"""Concern stack a surface belongs to. PHP coding guidelines are a
|
|
119
|
+
different domain from agent-behaviour rules/contracts — a shared topic
|
|
120
|
+
word (`git`, `security`) between them is coincidence, not duplication.
|
|
121
|
+
"""
|
|
122
|
+
return "php" if "/guidelines/php/" in s.rel.replace("\\", "/") else "agent"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _same_concept(a: Surface, b: Surface) -> bool:
|
|
126
|
+
"""Genuine concept duplication, not topic adjacency.
|
|
127
|
+
|
|
128
|
+
True when one stem is a prefix of the other (the rule→mechanics/examples
|
|
129
|
+
split — `language-and-tone` ⊂ `language-and-tone-examples`), OR the two
|
|
130
|
+
stems share ≥ 2 significant tokens (a tight family like `domain-safety-*`).
|
|
131
|
+
Single shared topic token (`skill`, `command`, `agent`) is NOT enough.
|
|
132
|
+
|
|
133
|
+
Cross-stack guard (Phase-1 council namespace-hygiene, 2026-05-30): a PHP
|
|
134
|
+
coding guideline and an agent rule/contract that merely share one topic
|
|
135
|
+
word (`git`, `security`) are distinct concerns — only group them on a
|
|
136
|
+
near-identical stem (prefix containment), never on a lone shared token.
|
|
137
|
+
"""
|
|
138
|
+
sa, sb = a.path.stem.lower(), b.path.stem.lower()
|
|
139
|
+
short, long = (sa, sb) if len(sa) <= len(sb) else (sb, sa)
|
|
140
|
+
# Containment only counts when the prefix is itself a multi-token concept
|
|
141
|
+
# (`language-and-tone` ⊂ `…-examples`), never a generic single word
|
|
142
|
+
# (`git` ⊂ `git-history-discipline` is coincidence, not duplication).
|
|
143
|
+
if short != long and long.startswith(short + "-") and len(_tokens(short)) >= 2:
|
|
144
|
+
return True
|
|
145
|
+
if _stack(a) != _stack(b):
|
|
146
|
+
return False
|
|
147
|
+
return len(a.tokens & b.tokens) >= 2
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _concept_rows(surfaces: list[Surface]) -> list[dict]:
|
|
151
|
+
# Union-find over the genuine-duplication adjacency.
|
|
152
|
+
parent = list(range(len(surfaces)))
|
|
153
|
+
|
|
154
|
+
def find(i: int) -> int:
|
|
155
|
+
while parent[i] != i:
|
|
156
|
+
parent[i] = parent[parent[i]]
|
|
157
|
+
i = parent[i]
|
|
158
|
+
return i
|
|
159
|
+
|
|
160
|
+
def union(i: int, j: int) -> None:
|
|
161
|
+
parent[find(i)] = find(j)
|
|
162
|
+
|
|
163
|
+
for i in range(len(surfaces)):
|
|
164
|
+
for j in range(i + 1, len(surfaces)):
|
|
165
|
+
if _same_concept(surfaces[i], surfaces[j]):
|
|
166
|
+
union(i, j)
|
|
167
|
+
|
|
168
|
+
comps: dict[int, list[Surface]] = defaultdict(list)
|
|
169
|
+
for idx, s in enumerate(surfaces):
|
|
170
|
+
comps[find(idx)].append(s)
|
|
171
|
+
|
|
172
|
+
rows: list[dict] = []
|
|
173
|
+
for group in comps.values():
|
|
174
|
+
if len(group) < 2:
|
|
175
|
+
continue
|
|
176
|
+
kinds = {s.kind for s in group}
|
|
177
|
+
# Concept label = the most-common significant token shared across the group.
|
|
178
|
+
tok_freq: dict[str, int] = defaultdict(int)
|
|
179
|
+
for s in group:
|
|
180
|
+
for t in s.tokens:
|
|
181
|
+
tok_freq[t] += 1
|
|
182
|
+
shared = [t for t, n in tok_freq.items() if n == len(group)] or [max(tok_freq, key=tok_freq.get)]
|
|
183
|
+
label = "-".join(sorted(shared)[:2])
|
|
184
|
+
seeded = any(any(t.startswith(f) or f.startswith(t) for f in SEED_FAMILIES) for t in shared)
|
|
185
|
+
cross_kind = len(kinds) >= 2
|
|
186
|
+
contract_dup = sum(1 for s in group if s.kind == "contract") >= 2
|
|
187
|
+
line_cost = sum(s.lines for s in group)
|
|
188
|
+
touched = max((_last_touched(s.path) for s in group), default="unknown")
|
|
189
|
+
rows.append({
|
|
190
|
+
"concept": label,
|
|
191
|
+
"surfaces": "; ".join(s.rel for s in sorted(group, key=lambda x: x.rel)),
|
|
192
|
+
"surface_count": len(group),
|
|
193
|
+
"kinds": ",".join(sorted(kinds)),
|
|
194
|
+
"line_cost": line_cost,
|
|
195
|
+
"last_touched": touched,
|
|
196
|
+
"overlap": "Y" if (cross_kind or contract_dup) else "family",
|
|
197
|
+
"seeded": "Y" if seeded else "",
|
|
198
|
+
})
|
|
199
|
+
rows.sort(key=lambda r: (-r["surface_count"], -r["line_cost"]))
|
|
200
|
+
return rows
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def main(argv: list[str] | None = None) -> int:
|
|
204
|
+
ap = argparse.ArgumentParser(description="Meta-layer / concept-surface inventory (read-only).")
|
|
205
|
+
ap.add_argument("--quiet", action="store_true")
|
|
206
|
+
args = ap.parse_args(argv)
|
|
207
|
+
|
|
208
|
+
surfaces = (
|
|
209
|
+
_collect(RULES_DIR, "rule")
|
|
210
|
+
+ _collect(CONTRACTS_DIR, "contract")
|
|
211
|
+
+ _collect(GUIDELINES_DIR, "guideline")
|
|
212
|
+
+ _collect(CONTEXTS_DIR, "context")
|
|
213
|
+
)
|
|
214
|
+
tiers = _kernel_and_tiers()
|
|
215
|
+
iron = _iron_law_counts()
|
|
216
|
+
rows = _concept_rows(surfaces)
|
|
217
|
+
|
|
218
|
+
EVIDENCE_DIR.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
md = EVIDENCE_DIR / "meta-layer-inventory.md"
|
|
220
|
+
csv_path = EVIDENCE_DIR / "meta-layer-inventory.csv"
|
|
221
|
+
|
|
222
|
+
kernel = tiers["kernel"]
|
|
223
|
+
overlap_count = sum(1 for r in rows if r["overlap"] == "Y")
|
|
224
|
+
iron_total = sum(iron.values())
|
|
225
|
+
|
|
226
|
+
lines = [
|
|
227
|
+
"# Meta-Layer / Concept-Surface Inventory",
|
|
228
|
+
"",
|
|
229
|
+
"> Read-only discovery output for `agents/roadmaps/road-to-leaner-core-and-discovery.md` Phase 1.",
|
|
230
|
+
"> Counts are grep/git-backed via `scripts/inventory_meta_layers.py`. A row is an *overlap candidate*",
|
|
231
|
+
"> when one concept (a shared filename token) is defined across ≥ 2 stable surfaces.",
|
|
232
|
+
"",
|
|
233
|
+
"## Summary",
|
|
234
|
+
"",
|
|
235
|
+
"| Metric | Value |",
|
|
236
|
+
"|---|---:|",
|
|
237
|
+
f"| Always-loaded kernel rule families | {len(kernel)} |",
|
|
238
|
+
f"| tier_1 (balanced) rules | {tiers['tier_1']} |",
|
|
239
|
+
f"| tier_2 (full) rules | {tiers['tier_2']} |",
|
|
240
|
+
f"| Rules carrying Iron-Law headings | {len(iron)} |",
|
|
241
|
+
f"| Total Iron-Law headings across rules | {iron_total} |",
|
|
242
|
+
f"| Concept surfaces scanned (rule/contract/guideline/context) | {len(surfaces)} |",
|
|
243
|
+
f"| Concept overlap candidates (≥ 2 surfaces, cross-kind/contract-dup) | {overlap_count} |",
|
|
244
|
+
"",
|
|
245
|
+
f"Kernel: {', '.join(kernel)}",
|
|
246
|
+
"",
|
|
247
|
+
"## Iron-Law density per rule (top 15)",
|
|
248
|
+
"",
|
|
249
|
+
"| Rule | Iron Laws |",
|
|
250
|
+
"|---|---:|",
|
|
251
|
+
]
|
|
252
|
+
for stem, n in sorted(iron.items(), key=lambda kv: -kv[1])[:15]:
|
|
253
|
+
lines.append(f"| `{stem}` | {n} |")
|
|
254
|
+
|
|
255
|
+
lines += [
|
|
256
|
+
"",
|
|
257
|
+
"## Concept-overlap ledger",
|
|
258
|
+
"",
|
|
259
|
+
"> One row per concept defined in ≥ 2 surfaces. `overlap=Y` = cross-kind or duplicate-contract",
|
|
260
|
+
"> (genuine merge/delete candidate). `seeded` = a feedback-named meta-layer family.",
|
|
261
|
+
"> Classification (merge / delete / keep-with-reason) is filled in Step 2 — left blank here.",
|
|
262
|
+
"",
|
|
263
|
+
"| Concept | Surfaces | # | Kinds | Lines | Last touched | Overlap | Class |",
|
|
264
|
+
"|---|---|---:|---|---:|---|---|---|",
|
|
265
|
+
]
|
|
266
|
+
for r in rows:
|
|
267
|
+
lines.append(
|
|
268
|
+
f"| `{r['concept']}` | {r['surfaces']} | {r['surface_count']} | {r['kinds']} | "
|
|
269
|
+
f"{r['line_cost']} | {r['last_touched']} | {r['overlap']} | _unclassified_ |"
|
|
270
|
+
)
|
|
271
|
+
lines.append("")
|
|
272
|
+
md.write_text("\n".join(lines), encoding="utf-8")
|
|
273
|
+
|
|
274
|
+
with csv_path.open("w", newline="", encoding="utf-8") as fh:
|
|
275
|
+
w = csv.DictWriter(fh, fieldnames=["concept", "surface_count", "kinds", "line_cost", "last_touched", "overlap", "seeded", "surfaces"])
|
|
276
|
+
w.writeheader()
|
|
277
|
+
for r in rows:
|
|
278
|
+
w.writerow(r)
|
|
279
|
+
|
|
280
|
+
if not args.quiet:
|
|
281
|
+
print(f"meta-layer inventory: {len(surfaces)} surfaces, {len(rows)} concept rows, {overlap_count} overlap candidates")
|
|
282
|
+
print(f" → {md.relative_to(REPO_ROOT)}")
|
|
283
|
+
print(f" → {csv_path.relative_to(REPO_ROOT)}")
|
|
284
|
+
return 0
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
if __name__ == "__main__":
|
|
288
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""`agent-config linked-projects:list` — list opted-in IDE-attached siblings.
|
|
2
|
+
|
|
3
|
+
Phase 4 of `road-to-leaner-core-and-discovery`; closes the ADR-032 follow-up
|
|
4
|
+
"expose the detector as a CLI subcommand for consumer reach". Pure wrapper over
|
|
5
|
+
`scripts/_lib/linked_projects.detect_linked_projects` + the
|
|
6
|
+
`agents/settings/.agent-settings.local.yml` → `linked_projects[]` opt-in
|
|
7
|
+
cascade. No detection logic is duplicated here.
|
|
8
|
+
|
|
9
|
+
Prints opted-in siblings (`path · detected_via · large`). `--all` shows every
|
|
10
|
+
detected sibling with its opt-in status; `--format json` is machine-readable.
|
|
11
|
+
Read-only, no network.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
python3 scripts/linked_projects_list.py [--all] [--format text|json] [--root PATH]
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import json
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import yaml
|
|
24
|
+
|
|
25
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
26
|
+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
|
|
27
|
+
from _lib.linked_projects import detect_linked_projects # type: ignore # noqa: E402
|
|
28
|
+
|
|
29
|
+
LOCAL_SETTINGS = Path("agents") / "settings" / ".agent-settings.local.yml"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _opt_in_map(root: Path) -> dict[str, bool]:
|
|
33
|
+
"""Map resolved sibling path → include flag from the local settings cascade."""
|
|
34
|
+
f = root / LOCAL_SETTINGS
|
|
35
|
+
if not f.is_file():
|
|
36
|
+
return {}
|
|
37
|
+
try:
|
|
38
|
+
data = yaml.safe_load(f.read_text(encoding="utf-8", errors="replace")) or {}
|
|
39
|
+
except yaml.YAMLError:
|
|
40
|
+
return {}
|
|
41
|
+
out: dict[str, bool] = {}
|
|
42
|
+
for entry in (data.get("linked_projects") or []):
|
|
43
|
+
if isinstance(entry, dict) and entry.get("path"):
|
|
44
|
+
try:
|
|
45
|
+
out[str(Path(entry["path"]).expanduser().resolve())] = bool(entry.get("include"))
|
|
46
|
+
except OSError:
|
|
47
|
+
out[str(entry["path"])] = bool(entry.get("include"))
|
|
48
|
+
return out
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def collect(root: Path, show_all: bool) -> list[dict]:
|
|
52
|
+
detected = detect_linked_projects(root)
|
|
53
|
+
opt_in = _opt_in_map(root)
|
|
54
|
+
rows: list[dict] = []
|
|
55
|
+
for d in detected:
|
|
56
|
+
include = opt_in.get(d["path"]) # None = undecided
|
|
57
|
+
if not show_all and include is not True:
|
|
58
|
+
continue
|
|
59
|
+
rows.append({**d, "include": include})
|
|
60
|
+
return rows
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def render_text(rows: list[dict], show_all: bool) -> str:
|
|
64
|
+
if not rows:
|
|
65
|
+
scope = "detected" if show_all else "opted-in"
|
|
66
|
+
return f"No {scope} linked-project siblings. (Attach a sibling repo in your IDE and opt in.)"
|
|
67
|
+
lines = ["| path | detected via | large | opted in |", "|---|---|---|---|"]
|
|
68
|
+
for r in rows:
|
|
69
|
+
inc = {True: "yes", False: "no", None: "undecided"}[r.get("include")]
|
|
70
|
+
lines.append(f"| {r['path']} | {r['detected_via']} | {'yes' if r['large'] else 'no'} | {inc} |")
|
|
71
|
+
return "\n".join(lines)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def main(argv: list[str] | None = None) -> int:
|
|
75
|
+
ap = argparse.ArgumentParser(description="List opted-in IDE-attached sibling projects (read-only).")
|
|
76
|
+
ap.add_argument("--all", action="store_true", help="Show every detected sibling, not only opted-in.")
|
|
77
|
+
ap.add_argument("--format", choices=("text", "json"), default="text")
|
|
78
|
+
ap.add_argument("--root", default=".", help="Project root (default: cwd).")
|
|
79
|
+
args = ap.parse_args(argv)
|
|
80
|
+
|
|
81
|
+
root = Path(args.root).resolve()
|
|
82
|
+
rows = collect(root, args.all)
|
|
83
|
+
if args.format == "json":
|
|
84
|
+
print(json.dumps({"root": str(root), "siblings": rows}, indent=2))
|
|
85
|
+
else:
|
|
86
|
+
print(render_text(rows, args.all))
|
|
87
|
+
return 0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
raise SystemExit(main())
|
package/scripts/memory_lookup.py
CHANGED
|
@@ -56,12 +56,21 @@ CURATED_TYPES = {
|
|
|
56
56
|
# conflict rule still treats them as repo entries against operational.
|
|
57
57
|
KNOWLEDGE_TYPE = "knowledge"
|
|
58
58
|
|
|
59
|
+
# Cross-repo retrieval (road-to-leaner-core-and-discovery Phase 4). When this
|
|
60
|
+
# type is requested AND opted-in linked-project siblings exist, matches from
|
|
61
|
+
# scripts/cross_repo_retrieve.py are projected as `source="cross-repo"` Hits,
|
|
62
|
+
# scored below curated/knowledge so cross-repo context never outranks the
|
|
63
|
+
# project's own truth (mirrors the 0.85× knowledge discount, then floored
|
|
64
|
+
# further). Opt-in by caller (type must be requested) + lazy import → existing
|
|
65
|
+
# call sites and consumers without the script are unaffected.
|
|
66
|
+
CROSS_REPO_TYPE = "cross-repo"
|
|
67
|
+
|
|
59
68
|
|
|
60
69
|
@dataclass
|
|
61
70
|
class Hit:
|
|
62
71
|
id: str
|
|
63
72
|
type: str
|
|
64
|
-
source: str # "curated" | "intake" | "operational"
|
|
73
|
+
source: str # "curated" | "intake" | "operational" | "knowledge" | "cross-repo"
|
|
65
74
|
path: str # file (or logical locator) that produced the hit
|
|
66
75
|
score: float # naive, content-match based [0..1]
|
|
67
76
|
entry: dict = field(default_factory=dict)
|
|
@@ -416,6 +425,45 @@ def package_operational_provider() -> Optional[OperationalProvider]:
|
|
|
416
425
|
return _cli_operational_provider
|
|
417
426
|
|
|
418
427
|
|
|
428
|
+
def _cross_repo_hits(keys: list[str], limit: int) -> list[Hit]:
|
|
429
|
+
"""Project cross-repo matches into discounted, tagged Hits.
|
|
430
|
+
|
|
431
|
+
Lazy + guarded: imports `cross_repo_retrieve` on demand and swallows any
|
|
432
|
+
failure (script absent in a consumer install, no opted-in siblings) so the
|
|
433
|
+
cross-repo type degrades to zero hits rather than breaking retrieval. Scores
|
|
434
|
+
sit below curated/knowledge (0.85× floor, then a small per-rank decrement)
|
|
435
|
+
so cross-repo context never outranks the project's own truth.
|
|
436
|
+
"""
|
|
437
|
+
query = " ".join(k for k in keys if k).strip()
|
|
438
|
+
if not query:
|
|
439
|
+
return []
|
|
440
|
+
try:
|
|
441
|
+
import os
|
|
442
|
+
import sys as _sys
|
|
443
|
+
from pathlib import Path as _Path
|
|
444
|
+
|
|
445
|
+
here = _Path(__file__).resolve().parent
|
|
446
|
+
if str(here) not in _sys.path:
|
|
447
|
+
_sys.path.insert(0, str(here))
|
|
448
|
+
import cross_repo_retrieve # type: ignore
|
|
449
|
+
|
|
450
|
+
result = cross_repo_retrieve.retrieve(_Path(os.getcwd()), query, None, limit)
|
|
451
|
+
except Exception: # noqa: BLE001 — optional surface; never break retrieval
|
|
452
|
+
return []
|
|
453
|
+
|
|
454
|
+
hits: list[Hit] = []
|
|
455
|
+
for i, m in enumerate(result.get("matches", [])):
|
|
456
|
+
hits.append(Hit(
|
|
457
|
+
id=f"cross-repo:{m.get('source_repo', '')}:{m.get('path', '')}",
|
|
458
|
+
type=CROSS_REPO_TYPE,
|
|
459
|
+
source="cross-repo",
|
|
460
|
+
path=f"{m.get('source_repo', '')}/{m.get('path', '')}",
|
|
461
|
+
score=round(0.7 * 0.85 - i * 0.01, 4),
|
|
462
|
+
entry=m,
|
|
463
|
+
))
|
|
464
|
+
return hits
|
|
465
|
+
|
|
466
|
+
|
|
419
467
|
def retrieve(
|
|
420
468
|
types: list[str],
|
|
421
469
|
keys: list[str],
|
|
@@ -455,6 +503,9 @@ def retrieve(
|
|
|
455
503
|
entry=entry,
|
|
456
504
|
))
|
|
457
505
|
continue
|
|
506
|
+
if mtype == CROSS_REPO_TYPE:
|
|
507
|
+
repo_hits.extend(_cross_repo_hits(keys, limit))
|
|
508
|
+
continue
|
|
458
509
|
if mtype not in CURATED_TYPES:
|
|
459
510
|
continue
|
|
460
511
|
for path, entry in _iter_curated_entries(mtype):
|
|
@@ -503,7 +554,7 @@ CONTRACT_VERSION = 1
|
|
|
503
554
|
|
|
504
555
|
# Memory types this file-backed backend can answer. Types outside this
|
|
505
556
|
# set map to `unknown_type` per the retrieval contract.
|
|
506
|
-
_KNOWN_TYPES = CURATED_TYPES | {KNOWLEDGE_TYPE}
|
|
557
|
+
_KNOWN_TYPES = CURATED_TYPES | {KNOWLEDGE_TYPE, CROSS_REPO_TYPE}
|
|
507
558
|
|
|
508
559
|
|
|
509
560
|
def retrieve_v1(
|