kushi-agents 5.0.2 → 5.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -0
- package/bin/cli.mjs +103 -0
- package/package.json +6 -2
- package/plugin/agents/kushi.agent.md +3 -1
- package/plugin/instructions/skill-authoring.instructions.md +147 -0
- package/plugin/instructions/skill-evals.instructions.md +130 -0
- package/plugin/skills/aggregate-project/evals/evals.json +33 -0
- package/plugin/skills/apply-ado-update/evals/evals.json +33 -0
- package/plugin/skills/ask-project/SKILL.md +10 -0
- package/plugin/skills/ask-project/evals/evals.json +34 -0
- package/plugin/skills/bootstrap-project/evals/evals.json +34 -0
- package/plugin/skills/build-state/evals/evals.json +31 -0
- package/plugin/skills/consolidate-evidence/evals/evals.json +33 -0
- package/plugin/skills/dashboard/evals/evals.json +33 -0
- package/plugin/skills/emit-vertex/evals/evals.json +33 -0
- package/plugin/skills/eval/SKILL.md +90 -0
- package/plugin/skills/eval/evals.schema.json +73 -0
- package/plugin/skills/eval/run-evals.ps1 +372 -0
- package/plugin/skills/fde-intake/evals/evals.json +33 -0
- package/plugin/skills/fde-report/evals/evals.json +33 -0
- package/plugin/skills/fde-triage/evals/evals.json +33 -0
- package/plugin/skills/intro/SKILL.md +160 -451
- package/plugin/skills/intro/evals/evals.json +33 -0
- package/plugin/skills/intro/references/walkthrough.md +310 -0
- package/plugin/skills/link-entities/evals/evals.json +31 -0
- package/plugin/skills/project-status/SKILL.md +10 -1
- package/plugin/skills/project-status/evals/evals.json +33 -0
- package/plugin/skills/propose-ado-update/evals/evals.json +33 -0
- package/plugin/skills/pull-ado/evals/evals.json +35 -0
- package/plugin/skills/pull-crm/evals/evals.json +35 -0
- package/plugin/skills/pull-email/evals/evals.json +35 -0
- package/plugin/skills/pull-loop/evals/evals.json +35 -0
- package/plugin/skills/pull-meetings/evals/evals.json +35 -0
- package/plugin/skills/pull-misc/evals/evals.json +35 -0
- package/plugin/skills/pull-onenote/evals/evals.json +35 -0
- package/plugin/skills/pull-sharepoint/evals/evals.json +35 -0
- package/plugin/skills/pull-teams/evals/evals.json +35 -0
- package/plugin/skills/refresh-project/evals/evals.json +31 -0
- package/plugin/skills/self-check/SKILL.md +2 -0
- package/plugin/skills/self-check/evals/evals.json +28 -0
- package/plugin/skills/self-check/run.ps1 +144 -0
- package/plugin/skills/setup/SKILL.md +10 -0
- package/plugin/skills/setup/evals/evals.json +33 -0
- package/plugin/skills/skill-checker/SKILL.md +136 -0
- package/plugin/skills/skill-checker/check-skill.ps1 +416 -0
- package/plugin/skills/skill-checker/evals/evals.json +41 -0
- package/plugin/skills/skill-creator/SKILL.md +134 -0
- package/plugin/skills/skill-creator/evals/evals.json +40 -0
- package/plugin/skills/skill-creator/generate-eval-review.ps1 +101 -0
- package/plugin/skills/skill-creator/optimize-description.ps1 +87 -0
- package/plugin/skills/skill-creator/scaffold.ps1 +180 -0
- package/plugin/skills/skill-creator/templates/evals-starter.template.json +27 -0
- package/plugin/skills/skill-creator/templates/gotchas-stub.template.md +9 -0
- package/plugin/skills/skill-creator/templates/skill-skeleton.template.md +28 -0
- package/plugin/skills/tour/evals/evals.json +33 -0
- package/plugin/skills/vertex-link/SKILL.md +10 -0
- package/plugin/skills/vertex-link/evals/evals.json +33 -0
- package/src/eval-aggregator.mjs +209 -0
- package/src/eval-aggregator.test.mjs +64 -0
- package/src/eval-runner.test.mjs +69 -0
- package/src/skill-checker.test.mjs +118 -0
- package/src/skill-creator.test.mjs +92 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-sharepoint",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-sharepoint. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-sharepoint-cached-1",
|
|
8
|
+
"name": "pull-sharepoint cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-sharepoint-rubric-1",
|
|
22
|
+
"name": "pull-sharepoint output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-sharepoint pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-sharepoint response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-teams",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-teams. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-teams-cached-1",
|
|
8
|
+
"name": "pull-teams cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-teams-rubric-1",
|
|
22
|
+
"name": "pull-teams output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-teams pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-teams response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "refresh-project",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Verifies refresh chains link-entities -> dashboard -> tour on a fixture.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "rp-chain-mention",
|
|
8
|
+
"name": "refresh plan mentions link-entities + dashboard + tour",
|
|
9
|
+
"input": "refresh fixture-acme --dry-run\nWould chain: link-entities -> dashboard -> tour\nGraph: Evidence/_graph/project-graph.json\nDashboard: dashboard.html\nTour: State/tour.md",
|
|
10
|
+
"fixture": "evals/fixtures/fixture-acme",
|
|
11
|
+
"canary": true,
|
|
12
|
+
"grader_type": "script",
|
|
13
|
+
"expected_assertions": [
|
|
14
|
+
{ "type": "regex-match", "pattern": "link-entities" },
|
|
15
|
+
{ "type": "regex-match", "pattern": "dashboard" },
|
|
16
|
+
{ "type": "regex-match", "pattern": "tour" }
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"id": "rp-graph-fixture-present",
|
|
21
|
+
"name": "refresh has a graph fixture to update",
|
|
22
|
+
"input": "verify graph fixture",
|
|
23
|
+
"fixture": "evals/fixtures/fixture-acme",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "script",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{ "type": "file-exists", "path": "Evidence/_graph/project-graph.json" }
|
|
28
|
+
]
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
@@ -69,6 +69,8 @@ Checks split into **core** (always run) and **deep** (opt-in).
|
|
|
69
69
|
| D30.description-optimized | Trigger-based description | every SKILL.md `description:` front-matter leads with `USE WHEN` or `WHEN ` per <https://agentskills.io/skill-creation/optimizing-descriptions>. |
|
|
70
70
|
| D31.genealogy | Release genealogy entry exists | every `git tag` matching `v<x.y.z>` MUST appear in `docs/genealogy.md` as a `## v<x.y.z>` heading or be named under a parent's "Patch lineage" line. See `release-genealogy.instructions.md`. |
|
|
71
71
|
| D32.multi-host | Multi-host install integrity | validates `src/multi-host.mjs` exports + `bin/cli.mjs` flag handling, then performs a temp-dir dry-run install for BOTH supported hosts (Clawpilot + VS Code Chat) under a fake `$HOME` in `$env:TEMP`. Asserts SKILL.md + agent file + skills/ + prompts/ + skills-metadata.json with a kushi entry are present, then asserts a clean uninstall. NEVER touches the real `~/.copilot/` or `~/.vscode/`. See `multi-host-install.instructions.md`. |
|
|
72
|
+
| D33.evals | Skill evals framework integrity | every `plugin/skills/<name>/` (except `eval`) ships `evals/evals.json` with ≥ 2 cases and ≥ 1 assertion per case; the runner (`plugin/skills/eval/run-evals.ps1`) and schema (`plugin/skills/eval/evals.schema.json`) are present; `evals/baseline.json` exists (warn-only). Six sub-checks: `D33.evals-exist`, `D33.evals-schema`, `D33.evals-min-cases`, `D33.evals-have-assertions`, `D33.eval-runner-exists`, `D33.baseline-exists`. See `skill-evals.instructions.md`. |
|
|
73
|
+
| D34.creator-conformance | skill-creator + skill-checker harness integrity (v5.0.4+) | validates `scaffold.ps1` + `check-skill.ps1` ship and are parseable; every skill carrying the `.created-by-skill-creator` marker passes `check-skill --lint` clean; `check-skill --all --retrofit --dry-run` shows no non-additive gaps; the dogfood report at `docs/audits/v5.0.4-skill-creator-dogfood.md` is fresh (≤14 days). Five sub-checks: `D34.skill-creator-exists`, `D34.skill-checker-exists`, `D34.creator-output-conforms`, `D34.retrofit-clean`, `D34.dogfood-report-fresh`. See `skill-authoring.instructions.md`. |
|
|
72
74
|
| **CSC weekly-layout checks (kushi v4.9.0)** | | gated on `Resolve-EngagementRoots` — no-ops on the kushi repo itself. |
|
|
73
75
|
| D11.csc | CSC entity coverage + depth | every `Evidence/<alias>/<source>/weekly/*-csc.md` has ≥ 1 entity heading; per-source minimum bullet count + populated-section count (meetings 25/6, email 8/4, teams 6/3, onenote 10/4, sharepoint 8/3, crm 12/5, ado 8/4). Coverage-Notes-only blocks (low-signal escape) are exempt. |
|
|
74
76
|
| D12.csc | CSC section order | every entity block's `###` section headings appear in the canonical order: Participants → Topics → Q&A → Who Said What → Decisions → Dates & Numbers → Action Items → Next Steps → Open Questions → Risks → Customer Asks → Artifacts → Coverage Notes. |
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "self-check",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Meta — self-check's own run.ps1 is parseable and SKILL.md is present.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "sc-runps1-exists",
|
|
8
|
+
"name": "run.ps1 ships in the skill folder",
|
|
9
|
+
"input": "verify self-check artifacts",
|
|
10
|
+
"canary": true,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{ "type": "file-exists", "path": "plugin/skills/self-check/run.ps1" },
|
|
14
|
+
{ "type": "file-exists", "path": "plugin/skills/self-check/SKILL.md" }
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"id": "sc-runsh-exists",
|
|
19
|
+
"name": "cross-platform run.sh ships too",
|
|
20
|
+
"input": "verify run.sh",
|
|
21
|
+
"canary": false,
|
|
22
|
+
"grader_type": "script",
|
|
23
|
+
"expected_assertions": [
|
|
24
|
+
{ "type": "file-exists", "path": "plugin/skills/self-check/run.sh" }
|
|
25
|
+
]
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
}
|
|
@@ -1632,6 +1632,150 @@ process.stdout.write(JSON.stringify(out));
|
|
|
1632
1632
|
if (Test-Path $fakeHome) { Remove-Item -LiteralPath $fakeHome -Recurse -Force -ErrorAction SilentlyContinue }
|
|
1633
1633
|
}
|
|
1634
1634
|
}
|
|
1635
|
+
|
|
1636
|
+
# === D33.evals — per-skill evals framework (v5.0.3+) ===
|
|
1637
|
+
# Per skill-evals.instructions.md, every plugin/skills/<name>/ (except eval and
|
|
1638
|
+
# self-check) MUST ship evals/evals.json with >=2 cases and every case MUST have
|
|
1639
|
+
# >=1 assertion. Schema lives at plugin/skills/eval/evals.schema.json. Runner
|
|
1640
|
+
# lives at plugin/skills/eval/run-evals.ps1. Baseline file evals/baseline.json
|
|
1641
|
+
# is warn-only — maintainers seed it with `npm run eval:baseline`.
|
|
1642
|
+
$evalSkillDir = Join-Path $Root 'plugin/skills/eval'
|
|
1643
|
+
$evalRunner = Join-Path $evalSkillDir 'run-evals.ps1'
|
|
1644
|
+
$evalSchema = Join-Path $evalSkillDir 'evals.schema.json'
|
|
1645
|
+
$baselineFile = Join-Path $Root 'evals/baseline.json'
|
|
1646
|
+
|
|
1647
|
+
if (-not (Test-Path $evalRunner)) {
|
|
1648
|
+
Add-Finding 'D33.eval-runner-exists' 'Evals' 'warning' 'plugin/skills/eval/run-evals.ps1 is missing' 'Restore the runner from git — it ships in v5.0.3+.' $evalRunner 0
|
|
1649
|
+
} else {
|
|
1650
|
+
# Quick parse-ability check: scan for at least one param( block.
|
|
1651
|
+
try {
|
|
1652
|
+
$rt = Get-Content -Raw $evalRunner
|
|
1653
|
+
if ($rt -notmatch '(?ms)^\s*param\s*\(') {
|
|
1654
|
+
Add-Finding 'D33.eval-runner-exists' 'Evals' 'warning' 'plugin/skills/eval/run-evals.ps1 has no param() block' 'Confirm the runner is valid pwsh; rerun seed-evals if it was clobbered.' $evalRunner 0
|
|
1655
|
+
}
|
|
1656
|
+
} catch {
|
|
1657
|
+
Add-Finding 'D33.eval-runner-exists' 'Evals' 'warning' "Could not read run-evals.ps1: $($_.Exception.Message)" 'Check file permissions.' $evalRunner 0
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
if (-not (Test-Path $evalSchema)) {
|
|
1662
|
+
Add-Finding 'D33.evals-schema' 'Evals' 'warning' 'plugin/skills/eval/evals.schema.json is missing' 'Restore the schema from git.' $evalSchema 0
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1665
|
+
$skillsRoot = Join-Path $Root 'plugin/skills'
|
|
1666
|
+
$skillDirs = Get-ChildItem -Path $skillsRoot -Directory | Where-Object { $_.Name -notin @('eval', 'self-check') }
|
|
1667
|
+
foreach ($sd in $skillDirs) {
|
|
1668
|
+
$evalsFile = Join-Path $sd.FullName 'evals/evals.json'
|
|
1669
|
+
if (-not (Test-Path $evalsFile)) {
|
|
1670
|
+
Add-Finding 'D33.evals-exist' 'Evals' 'warning' "Skill '$($sd.Name)' has no evals/evals.json" "Create $evalsFile with >=2 cases per skill-evals.instructions.md. Quick start: run 'node scripts/seed-evals.mjs'." $evalsFile 0
|
|
1671
|
+
continue
|
|
1672
|
+
}
|
|
1673
|
+
try {
|
|
1674
|
+
$obj = Get-Content -Raw $evalsFile | ConvertFrom-Json
|
|
1675
|
+
} catch {
|
|
1676
|
+
Add-Finding 'D33.evals-schema' 'Evals' 'warning' "Skill '$($sd.Name)' evals.json is not valid JSON: $($_.Exception.Message)" 'Fix the JSON and rerun self-check.' $evalsFile 0
|
|
1677
|
+
continue
|
|
1678
|
+
}
|
|
1679
|
+
if (-not $obj.skill -or $obj.skill -ne $sd.Name) {
|
|
1680
|
+
Add-Finding 'D33.evals-schema' 'Evals' 'warning' "Skill '$($sd.Name)' evals.json declares skill='$($obj.skill)' (mismatch)" 'Set the skill field to match the directory name.' $evalsFile 0
|
|
1681
|
+
}
|
|
1682
|
+
if (-not $obj.cases -or $obj.cases.Count -lt 2) {
|
|
1683
|
+
Add-Finding 'D33.evals-min-cases' 'Evals' 'warning' "Skill '$($sd.Name)' has fewer than 2 eval cases" 'Add at least 2 deterministic cases per skill-evals.instructions.md.' $evalsFile 0
|
|
1684
|
+
}
|
|
1685
|
+
foreach ($c in $obj.cases) {
|
|
1686
|
+
if (-not $c.expected_assertions -or $c.expected_assertions.Count -lt 1) {
|
|
1687
|
+
Add-Finding 'D33.evals-have-assertions' 'Evals' 'warning' "Skill '$($sd.Name)' case '$($c.id)' has no expected_assertions" 'Every case needs >=1 assertion (file-exists / file-contains / json-path-equals / regex-match / llm-rubric).' $evalsFile 0
|
|
1688
|
+
}
|
|
1689
|
+
if ($c.grader_type -and ($c.grader_type -notin 'script', 'llm')) {
|
|
1690
|
+
Add-Finding 'D33.evals-schema' 'Evals' 'warning' "Skill '$($sd.Name)' case '$($c.id)' has invalid grader_type '$($c.grader_type)'" "Use 'script' or 'llm'." $evalsFile 0
|
|
1691
|
+
}
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
|
|
1695
|
+
if (-not (Test-Path $baselineFile)) {
|
|
1696
|
+
Add-Finding 'D33.baseline-exists' 'Evals' 'warning' 'evals/baseline.json is missing' "Seed the baseline with 'npm run eval:baseline' (warn-only — does not block)." $baselineFile 0
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
# === D34.creator-conformance — skill-creator + skill-checker harness (v5.0.4+) ===
|
|
1700
|
+
# Per skill-authoring.instructions.md, every skill carrying a
|
|
1701
|
+
# `.created-by-skill-creator` marker MUST pass `check-skill --lint` clean.
|
|
1702
|
+
# The retrofit gate (additive fixes only) MUST be clean across the whole
|
|
1703
|
+
# repo. The dogfood report tracks the last full run.
|
|
1704
|
+
$creatorDir = Join-Path $Root 'plugin/skills/skill-creator'
|
|
1705
|
+
$checkerDir = Join-Path $Root 'plugin/skills/skill-checker'
|
|
1706
|
+
$scaffold = Join-Path $creatorDir 'scaffold.ps1'
|
|
1707
|
+
$checkSkill = Join-Path $checkerDir 'check-skill.ps1'
|
|
1708
|
+
$dogfoodReport = Join-Path $Root 'docs/audits/v5.0.4-skill-creator-dogfood.md'
|
|
1709
|
+
|
|
1710
|
+
if (-not (Test-Path $scaffold)) {
|
|
1711
|
+
Add-Finding 'D34.skill-creator-exists' 'Creator conformance' 'warning' 'plugin/skills/skill-creator/scaffold.ps1 is missing' 'Restore the scaffolder from git — ships in v5.0.4+.' $scaffold 0
|
|
1712
|
+
} else {
|
|
1713
|
+
try {
|
|
1714
|
+
$st = Get-Content -Raw $scaffold
|
|
1715
|
+
if ($st -notmatch '(?ms)^\s*param\s*\(') {
|
|
1716
|
+
Add-Finding 'D34.skill-creator-exists' 'Creator conformance' 'warning' 'scaffold.ps1 has no param() block' 'Confirm the scaffolder is valid pwsh.' $scaffold 0
|
|
1717
|
+
}
|
|
1718
|
+
} catch {
|
|
1719
|
+
Add-Finding 'D34.skill-creator-exists' 'Creator conformance' 'warning' "Could not read scaffold.ps1: $($_.Exception.Message)" 'Check file permissions.' $scaffold 0
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1722
|
+
if (-not (Test-Path $checkSkill)) {
|
|
1723
|
+
Add-Finding 'D34.skill-checker-exists' 'Creator conformance' 'warning' 'plugin/skills/skill-checker/check-skill.ps1 is missing' 'Restore the checker from git — ships in v5.0.4+.' $checkSkill 0
|
|
1724
|
+
} else {
|
|
1725
|
+
try {
|
|
1726
|
+
$ct = Get-Content -Raw $checkSkill
|
|
1727
|
+
if ($ct -notmatch '(?ms)^\s*param\s*\(') {
|
|
1728
|
+
Add-Finding 'D34.skill-checker-exists' 'Creator conformance' 'warning' 'check-skill.ps1 has no param() block' 'Confirm the checker is valid pwsh.' $checkSkill 0
|
|
1729
|
+
}
|
|
1730
|
+
} catch {
|
|
1731
|
+
Add-Finding 'D34.skill-checker-exists' 'Creator conformance' 'warning' "Could not read check-skill.ps1: $($_.Exception.Message)" 'Check file permissions.' $checkSkill 0
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
|
|
1735
|
+
# D34.creator-output-conforms: every skill carrying the marker must lint clean.
|
|
1736
|
+
if (Test-Path $checkSkill) {
|
|
1737
|
+
$skillsRoot = Join-Path $Root 'plugin/skills'
|
|
1738
|
+
$markedSkills = Get-ChildItem -Path $skillsRoot -Directory -ErrorAction SilentlyContinue |
|
|
1739
|
+
Where-Object { Test-Path (Join-Path $_.FullName '.created-by-skill-creator') }
|
|
1740
|
+
foreach ($ms in $markedSkills) {
|
|
1741
|
+
try {
|
|
1742
|
+
$jsonText = (& pwsh -NoProfile -File $checkSkill -Skill $ms.Name -Json 2>$null) -join "`n"
|
|
1743
|
+
$parsed = $null
|
|
1744
|
+
try { $parsed = $jsonText | ConvertFrom-Json } catch {}
|
|
1745
|
+
if ($parsed -and $parsed.summary -and $parsed.summary.total_findings -gt 0) {
|
|
1746
|
+
Add-Finding 'D34.creator-output-conforms' 'Creator conformance' 'warning' "Skill '$($ms.Name)' carries .created-by-skill-creator marker but check-skill --lint has $($parsed.summary.total_findings) finding(s)" 'Fix the lint findings, or remove the marker if the skill is intentionally non-conformant.' (Join-Path $ms.FullName 'SKILL.md') 0
|
|
1747
|
+
}
|
|
1748
|
+
} catch {
|
|
1749
|
+
Add-Finding 'D34.creator-output-conforms' 'Creator conformance' 'warning' "Could not lint marked skill '$($ms.Name)': $($_.Exception.Message)" 'Run check-skill manually to inspect.' $checkSkill 0
|
|
1750
|
+
}
|
|
1751
|
+
}
|
|
1752
|
+
|
|
1753
|
+
# D34.retrofit-clean: --all --retrofit --dry-run must show no non-additive gaps.
|
|
1754
|
+
try {
|
|
1755
|
+
$allJson = (& pwsh -NoProfile -File $checkSkill -All -Retrofit -DryRun -Json 2>$null) -join "`n"
|
|
1756
|
+
$allParsed = $null
|
|
1757
|
+
try { $allParsed = $allJson | ConvertFrom-Json } catch {}
|
|
1758
|
+
if ($allParsed -and $allParsed.skills) {
|
|
1759
|
+
foreach ($s in $allParsed.skills) {
|
|
1760
|
+
if ($s.non_additive_count -gt 0) {
|
|
1761
|
+
Add-Finding 'D34.retrofit-clean' 'Creator conformance' 'warning' "Skill '$($s.name)' has $($s.non_additive_count) non-additive gap(s) that retrofit cannot auto-fix" "Inspect Evidence/_skill-checker/$($s.name)/fix-plan.json and document/fix manually. See docs/audits/v5.0.4-skill-creator-dogfood.md for known exceptions." (Join-Path $Root "plugin/skills/$($s.name)/SKILL.md") 0
|
|
1762
|
+
}
|
|
1763
|
+
}
|
|
1764
|
+
}
|
|
1765
|
+
} catch {
|
|
1766
|
+
Add-Finding 'D34.retrofit-clean' 'Creator conformance' 'warning' "Could not run check-skill --all --retrofit --dry-run: $($_.Exception.Message)" 'Run the command manually to inspect.' $checkSkill 0
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
# D34.dogfood-report-fresh: warn if the audit doc is missing or stale (>14 days).
|
|
1771
|
+
if (-not (Test-Path $dogfoodReport)) {
|
|
1772
|
+
Add-Finding 'D34.dogfood-report-fresh' 'Creator conformance' 'warning' 'docs/audits/v5.0.4-skill-creator-dogfood.md is missing' 'Re-run check-skill --all --retrofit and write the baseline audit per skill-authoring.instructions.md.' $dogfoodReport 0
|
|
1773
|
+
} else {
|
|
1774
|
+
$age = (Get-Date) - (Get-Item $dogfoodReport).LastWriteTime
|
|
1775
|
+
if ($age.TotalDays -gt 14) {
|
|
1776
|
+
Add-Finding 'D34.dogfood-report-fresh' 'Creator conformance' 'warning' "docs/audits/v5.0.4-skill-creator-dogfood.md is $([math]::Round($age.TotalDays)) days old (cap 14)" 'Re-run check-skill --all --retrofit and refresh the audit.' $dogfoodReport 0
|
|
1777
|
+
}
|
|
1778
|
+
}
|
|
1635
1779
|
}
|
|
1636
1780
|
|
|
1637
1781
|
# === Output ===
|
|
@@ -270,3 +270,13 @@ Skip path:
|
|
|
270
270
|
sync sub-flow + OneDrive pin policy → `references/onedrive-pin-sync.md`. SKILL.md trimmed from
|
|
271
271
|
378 to ~270 lines. Behaviour unchanged; load-on-trigger pointers added.
|
|
272
272
|
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
## Validation loop
|
|
276
|
+
|
|
277
|
+
<!-- TODO(retrofit): fill in — describe how to verify this skill ran correctly. Auto-added by skill-checker --retrofit --apply per skill-authoring.instructions.md. -->
|
|
278
|
+
|
|
279
|
+
1. Run pwsh plugin/skills/self-check/run.ps1 -Targeted <area>.
|
|
280
|
+
2. Fix any findings, then re-run the affected step.
|
|
281
|
+
3. Repeat until self-check exits 0.
|
|
282
|
+
4. Only then update
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "setup",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for setup. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "setup-smoke-1",
|
|
8
|
+
"name": "setup produces a non-empty response",
|
|
9
|
+
"input": "synthetic setup probe — canary smoke",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": ".+"
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"id": "setup-smoke-2",
|
|
21
|
+
"name": "setup echoes case id",
|
|
22
|
+
"input": "case-id setup-smoke-2",
|
|
23
|
+
"canary": false,
|
|
24
|
+
"grader_type": "script",
|
|
25
|
+
"expected_assertions": [
|
|
26
|
+
{
|
|
27
|
+
"type": "regex-match",
|
|
28
|
+
"pattern": "setup-smoke-2"
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "skill-checker"
|
|
3
|
+
version: "1.0.0"
|
|
4
|
+
description: "USE WHEN the user says \"check skill\", \"lint skill\", \"retrofit skill\", \"audit skills\", \"npx kushi-agents check-skill ...\", or before merging a PR that touches plugin/skills/. DO NOT USE for runtime evidence validation (use ask-project) or for running evals (use the eval skill). Capability: lints any plugin/skills/<name>/ against the agentskills.io blueprint (frontmatter, sections, size caps, evals presence); offers a non-destructive --retrofit + --apply that adds missing section stubs without overwriting existing content; can invoke the skill-creator eval-review viewer."
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Skill: skill-checker
|
|
8
|
+
|
|
9
|
+
The lint + retrofit harness for `plugin/skills/`. Companion to `skill-creator`. The two together close the authoring loop: `create-skill` scaffolds conformant; `check-skill` keeps it conformant + drags legacy skills up to spec.
|
|
10
|
+
|
|
11
|
+
User triggers: "check skill", "lint skill", "audit skills", "retrofit skill", "is this skill conformant?", "kushi check-skill --all".
|
|
12
|
+
|
|
13
|
+
Doctrine: [`plugin/instructions/skill-authoring.instructions.md`](../../instructions/skill-authoring.instructions.md).
|
|
14
|
+
|
|
15
|
+
## USE WHEN
|
|
16
|
+
|
|
17
|
+
- Before merging a PR that touches `plugin/skills/`.
|
|
18
|
+
- After running `skill-creator/scaffold.ps1`, to verify the scaffold output passes lint.
|
|
19
|
+
- Auditing the entire `plugin/skills/` tree against the blueprint (`-All`).
|
|
20
|
+
- Migrating legacy skills written before v5.0.4 (`-Retrofit`, then `-Apply`).
|
|
21
|
+
- Refreshing a skill's description per the optimization rules (`-OptimizeDescription`).
|
|
22
|
+
|
|
23
|
+
## DO NOT USE FOR
|
|
24
|
+
|
|
25
|
+
- Validating real customer evidence (use `ask-project` / `project-status`).
|
|
26
|
+
- Running per-case evals (use the `eval` skill + `npm run eval`).
|
|
27
|
+
- Authoring new skills (use `skill-creator`).
|
|
28
|
+
|
|
29
|
+
## Gotchas
|
|
30
|
+
|
|
31
|
+
- **Lint reuses self-check D30 + D33 logic** — it does not duplicate the checks. If you change blueprint rules, change them in `self-check/run.ps1` and re-run; this skill picks them up by delegating.
|
|
32
|
+
- **Retrofit is additive only.** It never deletes or rewrites existing content. Missing sections are appended with `<!-- TODO(retrofit): fill in -->` markers. If a section exists but is wrong, retrofit flags it but won't touch it.
|
|
33
|
+
- **`-Apply` requires `-Retrofit`.** Lint mode is read-only by design.
|
|
34
|
+
- **No auto-commit.** Even after `-Apply`, the human reviews the diff and commits manually.
|
|
35
|
+
- **`Evidence/_skill-checker/` is gitignored** — fix plans + scratch HTML stay local.
|
|
36
|
+
|
|
37
|
+
## Step checklist
|
|
38
|
+
|
|
39
|
+
- [ ] **Pick scope**: `-Skill <name>` (one) OR `-All`.
|
|
40
|
+
- [ ] **Pick mode**: default (`-Lint`), `-Retrofit`, `-Retrofit -Apply`, `-OptimizeDescription`, `-Review`.
|
|
41
|
+
- [ ] **Run**:
|
|
42
|
+
|
|
43
|
+
```powershell
|
|
44
|
+
pwsh plugin/skills/skill-checker/check-skill.ps1 -Skill my-skill
|
|
45
|
+
pwsh plugin/skills/skill-checker/check-skill.ps1 -All -Retrofit -DryRun
|
|
46
|
+
pwsh plugin/skills/skill-checker/check-skill.ps1 -Skill my-skill -Retrofit -Apply
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
- [ ] **Inspect output** under `Evidence/_skill-checker/<skill>/` (fix plans) + stdout (findings).
|
|
50
|
+
- [ ] **Re-run lint** after `-Apply` to confirm green.
|
|
51
|
+
|
|
52
|
+
## Validation loop
|
|
53
|
+
|
|
54
|
+
After running:
|
|
55
|
+
|
|
56
|
+
1. Lint mode → if exit ≠ 0, fix the findings in the SKILL.md / evals.json directly and re-run.
|
|
57
|
+
2. Retrofit mode → review the stdout diff; if it looks safe, re-run with `-Apply`.
|
|
58
|
+
3. Apply mode → run `pwsh plugin/skills/self-check/run.ps1 -Deep -Targeted <skill>` to confirm no regressions.
|
|
59
|
+
4. Optimize-description → review the diff; if good, copy the rewritten string into SKILL.md frontmatter manually.
|
|
60
|
+
5. Commit.
|
|
61
|
+
|
|
62
|
+
## Modes
|
|
63
|
+
|
|
64
|
+
### `-Lint` (default)
|
|
65
|
+
|
|
66
|
+
Runs the agentskills.io blueprint check. Calls `self-check/run.ps1 -Deep -Targeted <skill>` and filters its findings to the D30/D33-class codes that map to the blueprint. Exits non-zero if any blocking finding remains.
|
|
67
|
+
|
|
68
|
+
### `-Retrofit`
|
|
69
|
+
|
|
70
|
+
Compares actual structure to the blueprint and emits a fix plan:
|
|
71
|
+
|
|
72
|
+
- Missing `## Gotchas` (pull-* / discovery skills) → append.
|
|
73
|
+
- Missing `## Validation loop` (writer skills) → append.
|
|
74
|
+
- Missing `## Step checklist` (orchestrators) → append.
|
|
75
|
+
- Missing `evals/evals.json` → create starter.
|
|
76
|
+
- Missing `USE WHEN` in description → flag (not auto-fixed — needs human review).
|
|
77
|
+
- Description has marketing fluff → flag.
|
|
78
|
+
|
|
79
|
+
Writes fix-plan JSON to `Evidence/_skill-checker/<skill>/fix-plan.json`. Stdout: human diff.
|
|
80
|
+
|
|
81
|
+
### `-Apply` (requires `-Retrofit`)
|
|
82
|
+
|
|
83
|
+
Executes the additive parts of the fix plan. Never overwrites; only appends sections + creates missing evals files. Stops short of any change marked `requires_human` in the plan.
|
|
84
|
+
|
|
85
|
+
### `-OptimizeDescription`
|
|
86
|
+
|
|
87
|
+
Reads the skill's current SKILL.md description, runs `skill-creator/optimize-description.ps1`, prints the diff. Never auto-applies.
|
|
88
|
+
|
|
89
|
+
### `-Review`
|
|
90
|
+
|
|
91
|
+
Invokes `skill-creator/generate-eval-review.ps1 -Skill <name>` — renders the HTML side-by-side viewer.
|
|
92
|
+
|
|
93
|
+
## Arguments
|
|
94
|
+
|
|
95
|
+
| Flag | Purpose |
|
|
96
|
+
|---|---|
|
|
97
|
+
| `-Skill <name>` | Target one skill. |
|
|
98
|
+
| `-All` | Target every `plugin/skills/<name>/` except `eval`, `self-check`, `skill-creator`, `skill-checker`. |
|
|
99
|
+
| `-Retrofit` | Emit the fix plan. |
|
|
100
|
+
| `-Apply` | With `-Retrofit`, execute additive fixes. |
|
|
101
|
+
| `-DryRun` | Print actions without writing. |
|
|
102
|
+
| `-OptimizeDescription` | Run the description optimizer + diff. |
|
|
103
|
+
| `-Review` | Render the eval-review HTML viewer. |
|
|
104
|
+
| `-Output <path>` | Override the report / plan output path. |
|
|
105
|
+
| `-Root <path>` | Override repo root. |
|
|
106
|
+
| `-Json` | Emit a JSON report on stdout. |
|
|
107
|
+
| `-StrictExit` | Exit 1 if any finding (else 0). |
|
|
108
|
+
|
|
109
|
+
## How retrofit categorises gaps
|
|
110
|
+
|
|
111
|
+
| Gap | Kind | Apply behaviour |
|
|
112
|
+
|---|---|---|
|
|
113
|
+
| No `evals/evals.json` | additive | creates starter file with 2 cases |
|
|
114
|
+
| No `## Gotchas` (pull-*) | additive | appends stub block |
|
|
115
|
+
| No `## Validation loop` (writer) | additive | appends stub block |
|
|
116
|
+
| No `## Step checklist` (orchestrator) | additive | appends stub block |
|
|
117
|
+
| Description missing `USE WHEN` | non-additive | flagged; needs human |
|
|
118
|
+
| Description >1024 chars | non-additive | flagged; needs human |
|
|
119
|
+
| SKILL.md >500 lines | non-additive | flagged; split into references/ |
|
|
120
|
+
| Marketing words in description | non-additive | flagged; needs human |
|
|
121
|
+
|
|
122
|
+
## Output paths
|
|
123
|
+
|
|
124
|
+
- Fix plan JSON: `Evidence/_skill-checker/<skill>/fix-plan.json`
|
|
125
|
+
- Dogfood report (manual rollup): `docs/audits/v5.0.4-skill-creator-dogfood.md`
|
|
126
|
+
- Per-run summary (with `-Output`): wherever you direct it.
|
|
127
|
+
|
|
128
|
+
## References
|
|
129
|
+
|
|
130
|
+
- `plugin/instructions/skill-authoring.instructions.md` (doctrine)
|
|
131
|
+
- `plugin/instructions/agentskills-compliance.instructions.md` (D30 rules this delegates to)
|
|
132
|
+
- `plugin/instructions/skill-evals.instructions.md` (D33 rules)
|
|
133
|
+
- `plugin/skills/self-check/run.ps1` (the underlying linter)
|
|
134
|
+
- `plugin/skills/skill-creator/SKILL.md` (the partner authoring skill)
|
|
135
|
+
- `docs/audits/v5.0.4-skill-creator-dogfood.md` (baseline retrofit run)
|
|
136
|
+
- <https://github.com/anthropics/skills/blob/main/skills/skill-creator/SKILL.md>
|