@event4u/agent-config 2.11.0 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/council/analysis.md +142 -0
- package/.agent-src/commands/council/debate.md +129 -0
- package/.agent-src/commands/council/default.md +8 -0
- package/.agent-src/commands/council/design.md +16 -12
- package/.agent-src/commands/council/optimize.md +16 -15
- package/.agent-src/commands/council/pr.md +12 -12
- package/.agent-src/commands/council.md +48 -2
- package/.agent-src/personas/advisors/contrarian.md +95 -0
- package/.agent-src/personas/advisors/executor.md +99 -0
- package/.agent-src/personas/advisors/expansionist.md +98 -0
- package/.agent-src/personas/advisors/first-principles.md +98 -0
- package/.agent-src/personas/advisors/outsider.md +102 -0
- package/.agent-src/rules/copilot-routing.md +19 -0
- package/.agent-src/rules/devcontainer-routing.md +20 -0
- package/.agent-src/rules/laravel-routing.md +20 -0
- package/.agent-src/rules/symfony-routing.md +20 -0
- package/.agent-src/skills/ai-council/SKILL.md +180 -2
- package/.agent-src/skills/canvas-design/SKILL.md +132 -0
- package/.agent-src/skills/canvas-design/evals/triggers.json +16 -0
- package/.agent-src/skills/copilot-config/SKILL.md +1 -1
- package/.agent-src/skills/devcontainer/SKILL.md +1 -1
- package/.agent-src/skills/doc-coauthoring/SKILL.md +129 -0
- package/.agent-src/skills/doc-coauthoring/evals/triggers.json +16 -0
- package/.agent-src/skills/laravel/SKILL.md +1 -1
- package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
- package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
- package/.agent-src/skills/skill-writing/SKILL.md +101 -16
- package/.agent-src/skills/sql-writing/SKILL.md +1 -1
- package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
- package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.claude-plugin/marketplace.json +5 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +78 -0
- package/CONTRIBUTING.md +5 -0
- package/README.md +3 -3
- package/config/agent-settings.template.yml +5 -84
- package/docs/architecture/multi-tool-projection.md +53 -0
- package/docs/architecture/{compression.md → source-projection.md} +21 -3
- package/docs/architecture.md +6 -6
- package/docs/catalog.md +21 -11
- package/docs/contracts/adr-architectural-consensus-mechanism.md +67 -0
- package/docs/contracts/adr-level-6-productization.md +2 -2
- package/docs/contracts/ai-council-config.md +186 -0
- package/docs/contracts/command-clusters.md +57 -1
- package/docs/contracts/multi-tool-projection-fidelity.md +109 -0
- package/docs/getting-started.md +2 -2
- package/package.json +1 -1
- package/scripts/_archive/README.md +59 -0
- package/scripts/ai_council/_default_prices.py +10 -1
- package/scripts/ai_council/advisors.py +148 -0
- package/scripts/ai_council/clients.py +189 -4
- package/scripts/ai_council/config.py +368 -0
- package/scripts/ai_council/consensus.py +290 -0
- package/scripts/ai_council/orchestrator.py +634 -16
- package/scripts/ai_council/prompts.py +335 -0
- package/scripts/check_compressed_paths.py +6 -1
- package/scripts/check_references.py +25 -0
- package/scripts/ci_time_ratio.py +168 -0
- package/scripts/council_cli.py +1007 -32
- package/scripts/measure_projection_bytes.py +159 -0
- package/scripts/measure_roadmap_trajectory.py +112 -0
- package/scripts/probe_projection_fidelity.py +202 -0
- package/scripts/run_skill_evals.py +185 -0
- package/scripts/schemas/skill.schema.json +4 -0
- package/scripts/score_skill_selection.py +198 -0
- package/scripts/skill_collision_clusters.py +162 -0
- package/scripts/skill_linter.py +71 -1
- /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
- /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
- /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
- /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
- /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
- /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
- /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Selection-accuracy scorer (council file 05, Phase 2.2).
|
|
3
|
+
|
|
4
|
+
Reads `tests/fixtures/skill_selection/fixtures.yml` and a predictions
|
|
5
|
+
JSON (`{fixture_id: selected_skill_name}`), then computes:
|
|
6
|
+
|
|
7
|
+
- (a) intended-skill hit rate — exact `intended` match
|
|
8
|
+
- (b) correct-cluster hit rate — any member of the same cluster
|
|
9
|
+
|
|
10
|
+
Per-cluster pass/fail uses the Round-3 protocol:
|
|
11
|
+
pass = (a) >= 0.90 OR (b) >= 0.95
|
|
12
|
+
fail = (a) < 0.80 AND (b) < 0.80 → cluster needs `routes_to`
|
|
13
|
+
|
|
14
|
+
Predictions source:
|
|
15
|
+
- `--predictions <path>`: external JSON file (LLM run, eval harness, manual).
|
|
16
|
+
- `--baseline`: built-in TF-IDF-style description-similarity baseline. The
|
|
17
|
+
baseline does NOT speak for any specific host tool; it estimates what
|
|
18
|
+
pure description-matching would do and provides a numeric floor.
|
|
19
|
+
|
|
20
|
+
Output: human-readable summary on stdout + machine JSON to
|
|
21
|
+
`agents/reports/skill-selection-accuracy.json` (or `--out`).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import json
|
|
28
|
+
import math
|
|
29
|
+
import re
|
|
30
|
+
import sys
|
|
31
|
+
from collections import Counter, defaultdict
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
import yaml
|
|
35
|
+
|
|
36
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
37
|
+
FIXTURES = REPO_ROOT / "tests" / "fixtures" / "skill_selection" / "fixtures.yml"
|
|
38
|
+
CLUSTERS = REPO_ROOT / "agents" / "reports" / "skill-collision-clusters.json"
|
|
39
|
+
SKILLS_DIR = REPO_ROOT / ".agent-src.uncompressed" / "skills"
|
|
40
|
+
DEFAULT_OUT = REPO_ROOT / "agents" / "reports" / "skill-selection-accuracy.json"
|
|
41
|
+
|
|
42
|
+
PASS_A = 0.90
|
|
43
|
+
PASS_B = 0.95
|
|
44
|
+
FAIL_THRESHOLD = 0.80
|
|
45
|
+
|
|
46
|
+
STOPWORDS = {
|
|
47
|
+
"the", "and", "for", "with", "when", "use", "or", "of", "to", "a", "an",
|
|
48
|
+
"is", "in", "on", "by", "be", "at", "as", "it", "if", "are", "this",
|
|
49
|
+
"that", "from", "but", "not", "can", "any", "all", "no", "after",
|
|
50
|
+
"before", "during", "user", "agent", "code", "project", "via", "into",
|
|
51
|
+
"onto", "even", "without", "naming", "uses", "used", "using", "also",
|
|
52
|
+
"etc", "across", "between",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def tokenize(text: str) -> list[str]:
|
|
57
|
+
tokens = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,}", text.lower())
|
|
58
|
+
return [t for t in tokens if t not in STOPWORDS and not t.isdigit()]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def load_skills() -> dict[str, str]:
|
|
62
|
+
out = {}
|
|
63
|
+
for skill_md in sorted(SKILLS_DIR.glob("*/SKILL.md")):
|
|
64
|
+
text = skill_md.read_text()
|
|
65
|
+
if not text.startswith("---"):
|
|
66
|
+
continue
|
|
67
|
+
parts = text.split("---", 2)
|
|
68
|
+
if len(parts) < 3:
|
|
69
|
+
continue
|
|
70
|
+
try:
|
|
71
|
+
fm = yaml.safe_load(parts[1]) or {}
|
|
72
|
+
except yaml.YAMLError:
|
|
73
|
+
continue
|
|
74
|
+
name = fm.get("name") or skill_md.parent.name
|
|
75
|
+
desc = (fm.get("description") or "").strip()
|
|
76
|
+
if desc:
|
|
77
|
+
out[name] = desc
|
|
78
|
+
return out
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def tfidf_vectors(docs: dict[str, str]) -> tuple[dict[str, dict[str, float]], dict[str, float]]:
|
|
82
|
+
n_docs = len(docs)
|
|
83
|
+
df: Counter[str] = Counter()
|
|
84
|
+
tokenized = {k: tokenize(v) for k, v in docs.items()}
|
|
85
|
+
for toks in tokenized.values():
|
|
86
|
+
for term in set(toks):
|
|
87
|
+
df[term] += 1
|
|
88
|
+
idf = {term: math.log((n_docs + 1) / (count + 1)) + 1 for term, count in df.items()}
|
|
89
|
+
vectors: dict[str, dict[str, float]] = {}
|
|
90
|
+
for name, toks in tokenized.items():
|
|
91
|
+
tf = Counter(toks)
|
|
92
|
+
vectors[name] = {term: tf[term] * idf.get(term, 0.0) for term in tf}
|
|
93
|
+
return vectors, idf
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def cosine(a: dict[str, float], b: dict[str, float]) -> float:
|
|
97
|
+
if not a or not b:
|
|
98
|
+
return 0.0
|
|
99
|
+
common = set(a) & set(b)
|
|
100
|
+
dot = sum(a[t] * b[t] for t in common)
|
|
101
|
+
na = math.sqrt(sum(v * v for v in a.values()))
|
|
102
|
+
nb = math.sqrt(sum(v * v for v in b.values()))
|
|
103
|
+
if na == 0 or nb == 0:
|
|
104
|
+
return 0.0
|
|
105
|
+
return dot / (na * nb)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def baseline_predict(fixtures: list[dict], skills: dict[str, str]) -> dict[str, str]:
|
|
109
|
+
vectors, idf = tfidf_vectors(skills)
|
|
110
|
+
preds: dict[str, str] = {}
|
|
111
|
+
for fx in fixtures:
|
|
112
|
+
prompt_tokens = tokenize(fx["prompt"])
|
|
113
|
+
tf = Counter(prompt_tokens)
|
|
114
|
+
pv = {term: tf[term] * idf.get(term, 0.0) for term in tf}
|
|
115
|
+
best_name, best_score = "", -1.0
|
|
116
|
+
for name, vec in vectors.items():
|
|
117
|
+
score = cosine(pv, vec)
|
|
118
|
+
if score > best_score:
|
|
119
|
+
best_name, best_score = name, score
|
|
120
|
+
preds[fx["id"]] = best_name
|
|
121
|
+
return preds
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def score(fixtures: list[dict], clusters: list[dict], preds: dict[str, str]) -> dict:
|
|
125
|
+
# Look up cluster membership by intended-skill (robust to cluster_id renumbering).
|
|
126
|
+
by_member: dict[str, set[str]] = {}
|
|
127
|
+
for c in clusters:
|
|
128
|
+
members = set(c["members"])
|
|
129
|
+
for m in members:
|
|
130
|
+
by_member[m] = members
|
|
131
|
+
per_cluster = defaultdict(lambda: {"total": 0, "hits_a": 0, "hits_b": 0, "misses": [], "label": ""})
|
|
132
|
+
for fx in fixtures:
|
|
133
|
+
intended = fx["intended"]
|
|
134
|
+
members = by_member.get(intended, {intended})
|
|
135
|
+
# Stable label: sorted members joined — survives cluster_id renumbering.
|
|
136
|
+
cid = fx.get("cluster") or "+".join(sorted(members)[:2])
|
|
137
|
+
pred = preds.get(fx["id"], "")
|
|
138
|
+
rec = per_cluster[cid]
|
|
139
|
+
rec["total"] += 1
|
|
140
|
+
rec["label"] = ",".join(sorted(members))
|
|
141
|
+
if pred == intended:
|
|
142
|
+
rec["hits_a"] += 1
|
|
143
|
+
if pred in members:
|
|
144
|
+
rec["hits_b"] += 1
|
|
145
|
+
else:
|
|
146
|
+
rec["misses"].append({"id": fx["id"], "intended": intended, "predicted": pred})
|
|
147
|
+
results = []
|
|
148
|
+
for cid, rec in sorted(per_cluster.items()):
|
|
149
|
+
a = rec["hits_a"] / rec["total"]
|
|
150
|
+
b = rec["hits_b"] / rec["total"]
|
|
151
|
+
if a >= PASS_A or b >= PASS_B:
|
|
152
|
+
verdict = "pass"
|
|
153
|
+
elif a < FAIL_THRESHOLD and b < FAIL_THRESHOLD:
|
|
154
|
+
verdict = "fail-needs-routes_to"
|
|
155
|
+
else:
|
|
156
|
+
verdict = "mixed"
|
|
157
|
+
results.append({"cluster": cid, "n": rec["total"], "hit_a": round(a, 3),
|
|
158
|
+
"hit_b": round(b, 3), "verdict": verdict, "misses": rec["misses"]})
|
|
159
|
+
total = sum(r["n"] for r in results)
|
|
160
|
+
overall_a = sum(r["hit_a"] * r["n"] for r in results) / total if total else 0.0
|
|
161
|
+
overall_b = sum(r["hit_b"] * r["n"] for r in results) / total if total else 0.0
|
|
162
|
+
return {"clusters": results,
|
|
163
|
+
"overall": {"n": total, "hit_a": round(overall_a, 3), "hit_b": round(overall_b, 3)}}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def main() -> int:
|
|
167
|
+
p = argparse.ArgumentParser()
|
|
168
|
+
p.add_argument("--predictions", type=Path, help="JSON file: {fixture_id: skill_name}")
|
|
169
|
+
p.add_argument("--baseline", action="store_true", help="Use built-in TF-IDF baseline")
|
|
170
|
+
p.add_argument("--source", default="external", help="Label recorded in output")
|
|
171
|
+
p.add_argument("--out", type=Path, default=DEFAULT_OUT)
|
|
172
|
+
args = p.parse_args()
|
|
173
|
+
|
|
174
|
+
if not args.predictions and not args.baseline:
|
|
175
|
+
print("❌ Specify --predictions <file> or --baseline", file=sys.stderr)
|
|
176
|
+
return 2
|
|
177
|
+
fixtures = yaml.safe_load(FIXTURES.read_text())["fixtures"]
|
|
178
|
+
clusters = json.loads(CLUSTERS.read_text())["clusters"]
|
|
179
|
+
skills = load_skills()
|
|
180
|
+
if args.baseline:
|
|
181
|
+
preds = baseline_predict(fixtures, skills)
|
|
182
|
+
source = "tfidf-baseline"
|
|
183
|
+
else:
|
|
184
|
+
preds = json.loads(args.predictions.read_text())
|
|
185
|
+
source = args.source
|
|
186
|
+
report = score(fixtures, clusters, preds)
|
|
187
|
+
report["source"] = source
|
|
188
|
+
args.out.parent.mkdir(parents=True, exist_ok=True)
|
|
189
|
+
args.out.write_text(json.dumps(report, indent=2) + "\n")
|
|
190
|
+
print(f"✅ Wrote {args.out.relative_to(REPO_ROOT)} (source={source})")
|
|
191
|
+
print(f" overall: hit_a={report['overall']['hit_a']:.3f} hit_b={report['overall']['hit_b']:.3f} n={report['overall']['n']}")
|
|
192
|
+
for c in report["clusters"]:
|
|
193
|
+
print(f" {c['cluster']:6} n={c['n']:2} hit_a={c['hit_a']:.2f} hit_b={c['hit_b']:.2f} {c['verdict']}")
|
|
194
|
+
return 0
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
if __name__ == "__main__":
|
|
198
|
+
sys.exit(main())
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Skill-collision cluster analysis (Phase 2.2 of step-1-v2-feedback-followup).
|
|
3
|
+
|
|
4
|
+
Walks `.agent-src.uncompressed/skills/<id>/SKILL.md`, extracts the
|
|
5
|
+
`description` frontmatter, computes pairwise keyword overlap, and groups
|
|
6
|
+
high-overlap skill pairs into clusters. The output drives the
|
|
7
|
+
selection-accuracy fixture set defined by council file 05 (Round-3
|
|
8
|
+
protocol — ≥ 3 shared significant terms → collision cluster).
|
|
9
|
+
|
|
10
|
+
Output: `agents/reports/skill-collision-clusters.json`
|
|
11
|
+
|
|
12
|
+
Schema:
|
|
13
|
+
{
|
|
14
|
+
"skill_count": int,
|
|
15
|
+
"cluster_count": int,
|
|
16
|
+
"clusters": [
|
|
17
|
+
{
|
|
18
|
+
"cluster_id": "C01",
|
|
19
|
+
"members": ["skill-a", "skill-b", ...],
|
|
20
|
+
"shared_keywords": [...],
|
|
21
|
+
"max_overlap": float,
|
|
22
|
+
"descriptions": {"skill-a": "...", ...}
|
|
23
|
+
},
|
|
24
|
+
...
|
|
25
|
+
]
|
|
26
|
+
}
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import json
|
|
32
|
+
import re
|
|
33
|
+
import sys
|
|
34
|
+
from itertools import combinations
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
|
|
37
|
+
import yaml
|
|
38
|
+
|
|
39
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
40
|
+
SKILLS_DIR = REPO_ROOT / ".agent-src.uncompressed" / "skills"
|
|
41
|
+
OUT_JSON = REPO_ROOT / "agents" / "reports" / "skill-collision-clusters.json"
|
|
42
|
+
|
|
43
|
+
KEYWORD_OVERLAP_THRESHOLD = 0.40
|
|
44
|
+
MIN_SHARED_KEYWORDS = 3
|
|
45
|
+
TOP_N_CLUSTERS = 10
|
|
46
|
+
|
|
47
|
+
STOPWORDS = {
|
|
48
|
+
"the", "and", "for", "with", "when", "use", "or", "of", "to", "a",
|
|
49
|
+
"an", "is", "in", "on", "by", "be", "at", "as", "it", "if", "are",
|
|
50
|
+
"this", "that", "from", "but", "not", "can", "any", "all", "no",
|
|
51
|
+
"after", "before", "during", "user", "agent", "code", "project",
|
|
52
|
+
"via", "into", "onto", "even", "without", "naming", "uses", "used",
|
|
53
|
+
"using", "also", "etc", "across", "between", "review", "design",
|
|
54
|
+
"writing", "create", "creating", "edit", "editing", "make", "making",
|
|
55
|
+
"set", "setting", "based", "well", "right", "left", "new",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def keyword_set(text: str) -> set[str]:
|
|
60
|
+
tokens = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,}", text.lower())
|
|
61
|
+
return {t for t in tokens if t not in STOPWORDS and not t.isdigit()}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def overlap_fraction(a: set[str], b: set[str]) -> float:
|
|
65
|
+
if not a or not b:
|
|
66
|
+
return 0.0
|
|
67
|
+
return len(a & b) / min(len(a), len(b))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def load_skills() -> list[dict]:
|
|
71
|
+
skills = []
|
|
72
|
+
for skill_md in sorted(SKILLS_DIR.glob("*/SKILL.md")):
|
|
73
|
+
text = skill_md.read_text()
|
|
74
|
+
if not text.startswith("---"):
|
|
75
|
+
continue
|
|
76
|
+
parts = text.split("---", 2)
|
|
77
|
+
if len(parts) < 3:
|
|
78
|
+
continue
|
|
79
|
+
try:
|
|
80
|
+
fm = yaml.safe_load(parts[1]) or {}
|
|
81
|
+
except yaml.YAMLError:
|
|
82
|
+
continue
|
|
83
|
+
name = fm.get("name") or skill_md.parent.name
|
|
84
|
+
description = (fm.get("description") or "").strip()
|
|
85
|
+
if not description:
|
|
86
|
+
continue
|
|
87
|
+
skills.append(
|
|
88
|
+
{
|
|
89
|
+
"name": name,
|
|
90
|
+
"description": description,
|
|
91
|
+
"_keywords": keyword_set(description),
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
return skills
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def build_clusters(skills: list[dict]) -> list[dict]:
|
|
98
|
+
# Pairwise edges where overlap & shared-keyword threshold is met.
|
|
99
|
+
edges: list[tuple[str, str, set[str], float]] = []
|
|
100
|
+
by_name = {s["name"]: s for s in skills}
|
|
101
|
+
for a, b in combinations(skills, 2):
|
|
102
|
+
shared = a["_keywords"] & b["_keywords"]
|
|
103
|
+
ov = overlap_fraction(a["_keywords"], b["_keywords"])
|
|
104
|
+
if len(shared) >= MIN_SHARED_KEYWORDS and ov >= KEYWORD_OVERLAP_THRESHOLD:
|
|
105
|
+
edges.append((a["name"], b["name"], shared, ov))
|
|
106
|
+
|
|
107
|
+
# Union-find over edge set → connected-component clusters.
|
|
108
|
+
parent: dict[str, str] = {}
|
|
109
|
+
|
|
110
|
+
def find(x: str) -> str:
|
|
111
|
+
parent.setdefault(x, x)
|
|
112
|
+
while parent[x] != x:
|
|
113
|
+
parent[x] = parent[parent[x]]
|
|
114
|
+
x = parent[x]
|
|
115
|
+
return x
|
|
116
|
+
|
|
117
|
+
def union(x: str, y: str) -> None:
|
|
118
|
+
rx, ry = find(x), find(y)
|
|
119
|
+
if rx != ry:
|
|
120
|
+
parent[rx] = ry
|
|
121
|
+
|
|
122
|
+
for a, b, _, _ in edges:
|
|
123
|
+
union(a, b)
|
|
124
|
+
|
|
125
|
+
components: dict[str, list[str]] = {}
|
|
126
|
+
for name in {n for edge in edges for n in edge[:2]}:
|
|
127
|
+
components.setdefault(find(name), []).append(name)
|
|
128
|
+
|
|
129
|
+
clusters: list[dict] = []
|
|
130
|
+
for idx, (_, members) in enumerate(sorted(components.items(), key=lambda kv: -len(kv[1])), start=1):
|
|
131
|
+
member_kws = [by_name[m]["_keywords"] for m in members]
|
|
132
|
+
shared_all = set.intersection(*member_kws) if member_kws else set()
|
|
133
|
+
member_edges = [(a, b, sk, ov) for a, b, sk, ov in edges if a in members and b in members]
|
|
134
|
+
max_ov = max((ov for *_, ov in member_edges), default=0.0)
|
|
135
|
+
clusters.append({
|
|
136
|
+
"cluster_id": f"C{idx:02d}",
|
|
137
|
+
"members": sorted(members),
|
|
138
|
+
"shared_keywords": sorted(shared_all),
|
|
139
|
+
"max_overlap": round(max_ov, 3),
|
|
140
|
+
"descriptions": {m: by_name[m]["description"] for m in sorted(members)},
|
|
141
|
+
})
|
|
142
|
+
return clusters[:TOP_N_CLUSTERS]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def main() -> int:
|
|
146
|
+
if not SKILLS_DIR.exists():
|
|
147
|
+
print(f"❌ Skills dir not found: {SKILLS_DIR}", file=sys.stderr)
|
|
148
|
+
return 2
|
|
149
|
+
skills = load_skills()
|
|
150
|
+
clusters = build_clusters(skills)
|
|
151
|
+
OUT_JSON.parent.mkdir(parents=True, exist_ok=True)
|
|
152
|
+
OUT_JSON.write_text(json.dumps({
|
|
153
|
+
"skill_count": len(skills),
|
|
154
|
+
"cluster_count": len(clusters),
|
|
155
|
+
"clusters": clusters,
|
|
156
|
+
}, indent=2) + "\n")
|
|
157
|
+
print(f"✅ Wrote {OUT_JSON.relative_to(REPO_ROOT)} — {len(clusters)} clusters from {len(skills)} skills")
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
sys.exit(main())
|
package/scripts/skill_linter.py
CHANGED
|
@@ -775,8 +775,14 @@ def lint_skill(path: Path, text: str) -> LintResult:
|
|
|
775
775
|
# is *both* large AND prose-dominant OR ships ≥ 2 independently invocable
|
|
776
776
|
# procedures. Reference catalogues (quality-tools 411 L / density 0.83)
|
|
777
777
|
# pass; multi-procedure skills are flagged for split.
|
|
778
|
+
#
|
|
779
|
+
# Frontmatter opt-out: `meta_skill: true` exempts a skill from the size
|
|
780
|
+
# warn when the skill's purpose *is* breadth (skill-writing, agent-docs-
|
|
781
|
+
# writing, skill-reviewer, etc.). Meta-skills inherently bundle multiple
|
|
782
|
+
# procedures and inline examples.
|
|
778
783
|
total_lines = len(text.splitlines())
|
|
779
|
-
|
|
784
|
+
is_meta_skill = bool(fm) and re.search(r"^meta_skill:\s*true\s*$", fm, re.MULTILINE)
|
|
785
|
+
if total_lines > 400 and not is_meta_skill:
|
|
780
786
|
density = _density_score(text)
|
|
781
787
|
procedures = _count_procedure_sections(text)
|
|
782
788
|
if density < 0.6 or procedures >= 2:
|
|
@@ -832,6 +838,12 @@ def lint_skill(path: Path, text: str) -> LintResult:
|
|
|
832
838
|
f"{meaningful_steps} steps) — may lack its own executable workflow"))
|
|
833
839
|
suggestions.append("Expand the skill so it remains executable without opening a guideline")
|
|
834
840
|
|
|
841
|
+
# --- evals.json schema validator ---
|
|
842
|
+
# When a skill ships sibling `evals/evals.json` (quantitative behavior
|
|
843
|
+
# eval per skill-writing § 7), validate its shape. Triggers.json is a
|
|
844
|
+
# separate concern handled elsewhere. All issues here are WARN.
|
|
845
|
+
issues.extend(validate_evals_json(path))
|
|
846
|
+
|
|
835
847
|
return LintResult(
|
|
836
848
|
file=str(path),
|
|
837
849
|
artifact_type="skill",
|
|
@@ -841,6 +853,64 @@ def lint_skill(path: Path, text: str) -> LintResult:
|
|
|
841
853
|
)
|
|
842
854
|
|
|
843
855
|
|
|
856
|
+
def validate_evals_json(skill_path: Path) -> list[Issue]:
|
|
857
|
+
"""Validate `{skill_dir}/evals/evals.json` against the schema declared
|
|
858
|
+
in `skill-writing` § 7. Returns WARN-level issues only; never blocks.
|
|
859
|
+
Skipped entirely when the file is absent."""
|
|
860
|
+
evals_path = skill_path.parent / "evals" / "evals.json"
|
|
861
|
+
if not evals_path.is_file():
|
|
862
|
+
return []
|
|
863
|
+
issues: list[Issue] = []
|
|
864
|
+
try:
|
|
865
|
+
data = json.loads(evals_path.read_text(encoding="utf-8"))
|
|
866
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
867
|
+
return [Issue("warning", "evals_json_unreadable",
|
|
868
|
+
f"evals/evals.json could not be parsed: {exc}")]
|
|
869
|
+
if not isinstance(data, dict):
|
|
870
|
+
return [Issue("warning", "evals_json_shape",
|
|
871
|
+
"evals/evals.json root must be an object")]
|
|
872
|
+
if "skill" not in data or not isinstance(data["skill"], str):
|
|
873
|
+
issues.append(Issue("warning", "evals_json_missing_skill",
|
|
874
|
+
"evals/evals.json must declare top-level 'skill' (string)"))
|
|
875
|
+
scenarios = data.get("scenarios")
|
|
876
|
+
if not isinstance(scenarios, list) or len(scenarios) < 1:
|
|
877
|
+
issues.append(Issue("warning", "evals_json_no_scenarios",
|
|
878
|
+
"evals/evals.json must declare 'scenarios' (non-empty array)"))
|
|
879
|
+
return issues
|
|
880
|
+
valid_kinds = {"contains", "file_exists", "rubric"}
|
|
881
|
+
for idx, scenario in enumerate(scenarios):
|
|
882
|
+
loc = f"scenarios[{idx}]"
|
|
883
|
+
if not isinstance(scenario, dict):
|
|
884
|
+
issues.append(Issue("warning", "evals_json_scenario_shape",
|
|
885
|
+
f"{loc} must be an object"))
|
|
886
|
+
continue
|
|
887
|
+
for key in ("id", "prompt"):
|
|
888
|
+
if key not in scenario or not isinstance(scenario[key], str) or not scenario[key].strip():
|
|
889
|
+
issues.append(Issue("warning", "evals_json_scenario_missing_field",
|
|
890
|
+
f"{loc} missing required string field '{key}'"))
|
|
891
|
+
assertions = scenario.get("assertions")
|
|
892
|
+
if not isinstance(assertions, list) or len(assertions) < 1:
|
|
893
|
+
issues.append(Issue("warning", "evals_json_scenario_no_assertions",
|
|
894
|
+
f"{loc}.assertions must be a non-empty array"))
|
|
895
|
+
continue
|
|
896
|
+
for a_idx, assertion in enumerate(assertions):
|
|
897
|
+
a_loc = f"{loc}.assertions[{a_idx}]"
|
|
898
|
+
if not isinstance(assertion, dict):
|
|
899
|
+
issues.append(Issue("warning", "evals_json_assertion_shape",
|
|
900
|
+
f"{a_loc} must be an object"))
|
|
901
|
+
continue
|
|
902
|
+
kind = assertion.get("kind")
|
|
903
|
+
if kind not in valid_kinds:
|
|
904
|
+
issues.append(Issue("warning", "evals_json_assertion_kind",
|
|
905
|
+
f"{a_loc}.kind must be one of {sorted(valid_kinds)}, got {kind!r}"))
|
|
906
|
+
continue
|
|
907
|
+
required_field = {"contains": "value", "file_exists": "path", "rubric": "criterion"}[kind]
|
|
908
|
+
if required_field not in assertion or not isinstance(assertion[required_field], str):
|
|
909
|
+
issues.append(Issue("warning", "evals_json_assertion_missing_field",
|
|
910
|
+
f"{a_loc} (kind={kind}) missing required string field '{required_field}'"))
|
|
911
|
+
return issues
|
|
912
|
+
|
|
913
|
+
|
|
844
914
|
def extract_frontmatter(text: str) -> Optional[str]:
|
|
845
915
|
match = FRONTMATTER_PATTERN.search(text)
|
|
846
916
|
return match.group(1) if match else None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|