@skill-graph/cli 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +247 -0
- package/LICENSE +200 -0
- package/NOTICE +62 -0
- package/README.md +398 -0
- package/SKILL_GRAPH.md +443 -0
- package/bin/skill-graph.js +374 -0
- package/docs/ADOPTION.md +117 -0
- package/docs/CONFORMANCE.md +66 -0
- package/docs/PRIMER.md +384 -0
- package/docs/QUICKSTART-30MIN.md +333 -0
- package/docs/ROUTING-METRICS.md +120 -0
- package/docs/SKILL-MD-FORMAT-COMPATIBILITY.md +127 -0
- package/docs/SKILL_AUDIT_CHECKLIST.md +199 -0
- package/docs/SKILL_AUDIT_LOOP.md +195 -0
- package/docs/SKILL_METADATA_PROTOCOL.md +609 -0
- package/docs/_archived/marketplace-publication-priority-2026-05-18.md +239 -0
- package/docs/adr/0001-predicate-set.md +69 -0
- package/docs/adr/0002-json-ld-context.md +82 -0
- package/docs/adr/0003-ontoclean-rigidity-tags.md +65 -0
- package/docs/adr/0004-persistent-identifiers.md +74 -0
- package/docs/adr/0005-freshness-consolidation.md +70 -0
- package/docs/adr/0006-revise-predicate-rename.md +105 -0
- package/docs/adr/0007-audit-loop-cadence.md +99 -0
- package/docs/adr/0008-skill-surface-split-and-curation-policy.md +93 -0
- package/docs/category-consumers.md +168 -0
- package/docs/concept-map.md +194 -0
- package/docs/diagrams/drift-states.mmd +21 -0
- package/docs/diagrams/manifest-pipeline.mmd +25 -0
- package/docs/diagrams/routing-harness.mmd +41 -0
- package/docs/diagrams/starter-graph.mmd +53 -0
- package/docs/field-decision-guide.md +315 -0
- package/docs/field-rationale.md +211 -0
- package/docs/field-reference.generated.md +624 -0
- package/docs/field-reference.md +1426 -0
- package/docs/glossary.md +190 -0
- package/docs/head-noun-glossary.md +63 -0
- package/docs/images/audit-phases.png +0 -0
- package/docs/images/drift-states.png +0 -0
- package/docs/images/graded-mode.png +0 -0
- package/docs/images/manifest-pipeline.png +0 -0
- package/docs/images/routing-harness.png +0 -0
- package/docs/images/skill-anatomy.png +0 -0
- package/docs/images/starter-graph.png +0 -0
- package/docs/images/system-model.png +0 -0
- package/docs/integrations/github-actions.md +155 -0
- package/docs/manifest-field-mapping.md +443 -0
- package/docs/marketplace-publication-queue.generated.md +240 -0
- package/docs/marketplace-release-agent-prompt.md +82 -0
- package/docs/marketplace-skill-candidate-list.md +272 -0
- package/docs/marketplace-syndication.md +222 -0
- package/docs/migration-sample-review.md +155 -0
- package/docs/migrations/v4-to-v5.md +168 -0
- package/docs/migrations/v5-to-v6.md +221 -0
- package/docs/name-exceptions.yaml +37 -0
- package/docs/plans/marketplace-p1-public-migration-plan.md +41 -0
- package/docs/plans/multi-root-workspace.md +148 -0
- package/docs/plans/scripts-roadmap.md +107 -0
- package/docs/plans/v4-schema-bump.md +160 -0
- package/docs/plans/wave-2-extraction.md +122 -0
- package/docs/positioning-vs-marketplaces.md +175 -0
- package/docs/proposals/skill-audit-loop-positioning.md +160 -0
- package/docs/quality-doctrine.md +138 -0
- package/docs/recommended-skills.md +150 -0
- package/docs/research/skill-comprehension-eval-research.md +1830 -0
- package/docs/research/skill-retrieval-evidence.md +66 -0
- package/docs/skill-metadata-protocol.md +471 -0
- package/docs/skills-sh-maintainer-cleanup-request.md +80 -0
- package/examples/audits/a11y/findings.md +52 -0
- package/examples/audits/a11y/scorecard.md +21 -0
- package/examples/audits/a11y/verdict.md +44 -0
- package/examples/audits/debugging/findings.md +59 -0
- package/examples/audits/debugging/scorecard.md +22 -0
- package/examples/audits/debugging/verdict.md +33 -0
- package/examples/audits/documentation/findings.md +59 -0
- package/examples/audits/documentation/scorecard.md +22 -0
- package/examples/audits/documentation/verdict.md +33 -0
- package/examples/evals/a11y.json +140 -0
- package/examples/evals/api-design.json +52 -0
- package/examples/evals/code-review.json +52 -0
- package/examples/evals/data-modeling.json +52 -0
- package/examples/evals/database-migration.json +52 -0
- package/examples/evals/debugging.json +118 -0
- package/examples/evals/dependency-architecture.json +52 -0
- package/examples/evals/design-system-architecture.json +52 -0
- package/examples/evals/error-tracking.json +52 -0
- package/examples/evals/event-contract-design.json +52 -0
- package/examples/evals/form-ux-architecture.json +52 -0
- package/examples/evals/framework-fit-analysis.json +52 -0
- package/examples/evals/graph-audit.json +139 -0
- package/examples/evals/information-architecture.json +52 -0
- package/examples/evals/interaction-feedback.json +52 -0
- package/examples/evals/interaction-patterns.json +52 -0
- package/examples/evals/layout-composition.json +52 -0
- package/examples/evals/lint-overlay.json +117 -0
- package/examples/evals/microcopy.json +52 -0
- package/examples/evals/observability-modeling.json +52 -0
- package/examples/evals/pattern-recognition.json +96 -0
- package/examples/evals/performance-engineering.json +52 -0
- package/examples/evals/refactor.json +128 -0
- package/examples/evals/semiotics.json +52 -0
- package/examples/evals/skill-infrastructure.json +96 -0
- package/examples/evals/skill-router.json +140 -0
- package/examples/evals/skill-router.routing.json +113 -0
- package/examples/evals/system-interface-contracts.json +52 -0
- package/examples/evals/task-analysis.json +52 -0
- package/examples/evals/testing-strategy.json +118 -0
- package/examples/evals/type-safety.json +249 -0
- package/examples/evals/visual-design-foundations.json +52 -0
- package/examples/evals/webhook-integration.json +52 -0
- package/examples/exports/a11y.skill-md.md +80 -0
- package/examples/exports/debugging.skill-md.md +80 -0
- package/examples/exports/refactor.skill-md.md +78 -0
- package/examples/exports/testing-strategy.skill-md.md +81 -0
- package/examples/projects/markdown-static-site/README.md +115 -0
- package/examples/projects/markdown-static-site/skills/content-source-router/SKILL.md +131 -0
- package/examples/projects/markdown-static-site/skills/image-optimization-pipeline-config/SKILL.md +132 -0
- package/examples/projects/markdown-static-site/skills/link-rot-detection/SKILL.md +103 -0
- package/examples/projects/markdown-static-site/skills/markdown-post-frontmatter-validation/SKILL.md +133 -0
- package/examples/projects/markdown-static-site/skills/migrate-posts-to-v2-frontmatter/SKILL.md +140 -0
- package/examples/projects/saas-stripe-postgres/README.md +208 -0
- package/examples/projects/saas-stripe-postgres/db/migrations/0004_canonicalize_orders.sql +37 -0
- package/examples/projects/saas-stripe-postgres/db/schema.sql +112 -0
- package/examples/projects/saas-stripe-postgres/skills/migrate-orders-to-canonical-schema/SKILL.md +149 -0
- package/examples/projects/saas-stripe-postgres/skills/nextjs-server-action-validation/SKILL.md +154 -0
- package/examples/projects/saas-stripe-postgres/skills/payment-provider-router/SKILL.md +153 -0
- package/examples/projects/saas-stripe-postgres/skills/postgres-rls-pattern/SKILL.md +163 -0
- package/examples/projects/saas-stripe-postgres/skills/stripe-webhook-signature-verification/SKILL.md +137 -0
- package/examples/protocol/skill-metadata-template.md +301 -0
- package/examples/protocol/skills.manifest.sample.json +13245 -0
- package/examples/skill-metadata-template.md +317 -0
- package/examples/skills.manifest.sample.json +13519 -0
- package/examples/tests/v3-1-skos-fixture/SKILL.md +93 -0
- package/marketplace/README.md +17 -0
- package/marketplace/skills/a11y/SKILL.md +66 -0
- package/marketplace/skills/acid-fundamentals/SKILL.md +106 -0
- package/marketplace/skills/agent-engineering/SKILL.md +386 -0
- package/marketplace/skills/agent-eval-design/SKILL.md +55 -0
- package/marketplace/skills/ai-native-development/SKILL.md +294 -0
- package/marketplace/skills/api-design/SKILL.md +60 -0
- package/marketplace/skills/architecture-decision-records/SKILL.md +55 -0
- package/marketplace/skills/background-jobs/SKILL.md +265 -0
- package/marketplace/skills/bounded-context-mapping/SKILL.md +55 -0
- package/marketplace/skills/cap-theorem-tradeoffs/SKILL.md +127 -0
- package/marketplace/skills/client-server-boundary/SKILL.md +187 -0
- package/marketplace/skills/code-review/SKILL.md +120 -0
- package/marketplace/skills/color-system-design/SKILL.md +43 -0
- package/marketplace/skills/component-architecture/SKILL.md +126 -0
- package/marketplace/skills/compression/SKILL.md +112 -0
- package/marketplace/skills/conceptual-modeling/SKILL.md +181 -0
- package/marketplace/skills/connection-pooling/SKILL.md +105 -0
- package/marketplace/skills/constraint-awareness/SKILL.md +287 -0
- package/marketplace/skills/content-monitor/SKILL.md +209 -0
- package/marketplace/skills/context-engineering/SKILL.md +320 -0
- package/marketplace/skills/context-graph/SKILL.md +174 -0
- package/marketplace/skills/context-management/SKILL.md +174 -0
- package/marketplace/skills/context-window/SKILL.md +239 -0
- package/marketplace/skills/contract-testing/SKILL.md +120 -0
- package/marketplace/skills/cron-scheduling/SKILL.md +223 -0
- package/marketplace/skills/dark-mode-implementation/SKILL.md +47 -0
- package/marketplace/skills/data-modeling/SKILL.md +59 -0
- package/marketplace/skills/data-modeling-fundamentals/SKILL.md +117 -0
- package/marketplace/skills/database-migration/SKILL.md +429 -0
- package/marketplace/skills/debugging/SKILL.md +67 -0
- package/marketplace/skills/dependency-architecture/SKILL.md +58 -0
- package/marketplace/skills/design-module-composition/SKILL.md +43 -0
- package/marketplace/skills/design-system-architecture/SKILL.md +61 -0
- package/marketplace/skills/design-thinking/SKILL.md +44 -0
- package/marketplace/skills/diagnosis/SKILL.md +296 -0
- package/marketplace/skills/diff-analysis/SKILL.md +188 -0
- package/marketplace/skills/e2e-test-design/SKILL.md +113 -0
- package/marketplace/skills/entity-relationship-modeling/SKILL.md +218 -0
- package/marketplace/skills/epistemic-grounding/SKILL.md +112 -0
- package/marketplace/skills/error-boundary/SKILL.md +235 -0
- package/marketplace/skills/error-tracking/SKILL.md +261 -0
- package/marketplace/skills/eval-driven-development/SKILL.md +147 -0
- package/marketplace/skills/evaluation/SKILL.md +113 -0
- package/marketplace/skills/event-contract-design/SKILL.md +60 -0
- package/marketplace/skills/event-storming/SKILL.md +56 -0
- package/marketplace/skills/form-ux-architecture/SKILL.md +60 -0
- package/marketplace/skills/framework-fit-analysis/SKILL.md +59 -0
- package/marketplace/skills/frontend-architecture/SKILL.md +43 -0
- package/marketplace/skills/generative-ui/SKILL.md +118 -0
- package/marketplace/skills/graph-audit/SKILL.md +81 -0
- package/marketplace/skills/guardrails/SKILL.md +118 -0
- package/marketplace/skills/hooks-patterns/SKILL.md +185 -0
- package/marketplace/skills/http-semantics/SKILL.md +136 -0
- package/marketplace/skills/ideation/SKILL.md +41 -0
- package/marketplace/skills/indexing-strategy/SKILL.md +108 -0
- package/marketplace/skills/information-architecture/SKILL.md +59 -0
- package/marketplace/skills/integration-test-design/SKILL.md +111 -0
- package/marketplace/skills/intent-recognition/SKILL.md +136 -0
- package/marketplace/skills/interaction-feedback/SKILL.md +59 -0
- package/marketplace/skills/interaction-patterns/SKILL.md +59 -0
- package/marketplace/skills/journey-mapping/SKILL.md +41 -0
- package/marketplace/skills/keywords/SKILL.md +213 -0
- package/marketplace/skills/knowledge-modeling/SKILL.md +232 -0
- package/marketplace/skills/layout-composition/SKILL.md +59 -0
- package/marketplace/skills/linguistics/SKILL.md +429 -0
- package/marketplace/skills/lint-overlay/SKILL.md +76 -0
- package/marketplace/skills/mental-models/SKILL.md +126 -0
- package/marketplace/skills/merge-queue/SKILL.md +94 -0
- package/marketplace/skills/methodology/SKILL.md +317 -0
- package/marketplace/skills/microcopy/SKILL.md +232 -0
- package/marketplace/skills/middleware-patterns/SKILL.md +363 -0
- package/marketplace/skills/mobile-responsive-ux/SKILL.md +287 -0
- package/marketplace/skills/mutation-testing/SKILL.md +112 -0
- package/marketplace/skills/naming-conventions/SKILL.md +112 -0
- package/marketplace/skills/observability-modeling/SKILL.md +59 -0
- package/marketplace/skills/ontology-modeling/SKILL.md +67 -0
- package/marketplace/skills/owasp-security/SKILL.md +153 -0
- package/marketplace/skills/pattern-recognition/SKILL.md +472 -0
- package/marketplace/skills/performance-budgets/SKILL.md +185 -0
- package/marketplace/skills/performance-engineering/SKILL.md +58 -0
- package/marketplace/skills/performance-testing/SKILL.md +125 -0
- package/marketplace/skills/printify/SKILL.md +42 -0
- package/marketplace/skills/prioritization/SKILL.md +118 -0
- package/marketplace/skills/problem-framing/SKILL.md +41 -0
- package/marketplace/skills/problem-locating-solving/SKILL.md +203 -0
- package/marketplace/skills/project-knowledge-extraction/SKILL.md +54 -0
- package/marketplace/skills/prompt-craft/SKILL.md +134 -0
- package/marketplace/skills/prompt-injection-defense/SKILL.md +132 -0
- package/marketplace/skills/property-based-testing/SKILL.md +100 -0
- package/marketplace/skills/prototyping/SKILL.md +43 -0
- package/marketplace/skills/query-optimization/SKILL.md +144 -0
- package/marketplace/skills/real-time-updates/SKILL.md +324 -0
- package/marketplace/skills/ref-patterns/SKILL.md +284 -0
- package/marketplace/skills/refactor/SKILL.md +65 -0
- package/marketplace/skills/rendering-models/SKILL.md +142 -0
- package/marketplace/skills/replication-patterns/SKILL.md +110 -0
- package/marketplace/skills/research-synthesis/SKILL.md +41 -0
- package/marketplace/skills/route-handler-design/SKILL.md +347 -0
- package/marketplace/skills/schema-evolution/SKILL.md +140 -0
- package/marketplace/skills/security-fundamentals/SKILL.md +139 -0
- package/marketplace/skills/semantic-center/SKILL.md +194 -0
- package/marketplace/skills/semantic-relations/SKILL.md +250 -0
- package/marketplace/skills/semantics/SKILL.md +366 -0
- package/marketplace/skills/semiotics/SKILL.md +230 -0
- package/marketplace/skills/seo-strategy/SKILL.md +260 -0
- package/marketplace/skills/server-actions-design/SKILL.md +243 -0
- package/marketplace/skills/server-components-design/SKILL.md +190 -0
- package/marketplace/skills/sharding-strategy/SKILL.md +123 -0
- package/marketplace/skills/shopify/SKILL.md +42 -0
- package/marketplace/skills/skill-infrastructure/SKILL.md +320 -0
- package/marketplace/skills/skill-router/SKILL.md +71 -0
- package/marketplace/skills/skill-scaffold/SKILL.md +105 -0
- package/marketplace/skills/snapshot-testing/SKILL.md +120 -0
- package/marketplace/skills/spec-driven-development/SKILL.md +148 -0
- package/marketplace/skills/state-machine-modeling/SKILL.md +56 -0
- package/marketplace/skills/state-management/SKILL.md +134 -0
- package/marketplace/skills/streaming-architecture/SKILL.md +194 -0
- package/marketplace/skills/summarization/SKILL.md +156 -0
- package/marketplace/skills/suspense-patterns/SKILL.md +265 -0
- package/marketplace/skills/system-interface-contracts/SKILL.md +59 -0
- package/marketplace/skills/task-analysis/SKILL.md +201 -0
- package/marketplace/skills/taxonomy-design/SKILL.md +66 -0
- package/marketplace/skills/test-coverage-strategy/SKILL.md +108 -0
- package/marketplace/skills/test-doubles-design/SKILL.md +98 -0
- package/marketplace/skills/test-driven-development/SKILL.md +96 -0
- package/marketplace/skills/testing-strategy/SKILL.md +67 -0
- package/marketplace/skills/theme-system-design/SKILL.md +43 -0
- package/marketplace/skills/tool-call-flow/SKILL.md +229 -0
- package/marketplace/skills/tool-call-strategy/SKILL.md +292 -0
- package/marketplace/skills/transaction-isolation/SKILL.md +98 -0
- package/marketplace/skills/type-safety/SKILL.md +177 -0
- package/marketplace/skills/typography-system/SKILL.md +43 -0
- package/marketplace/skills/usability-testing/SKILL.md +43 -0
- package/marketplace/skills/user-research/SKILL.md +43 -0
- package/marketplace/skills/vercel-composition-patterns/SKILL.md +157 -0
- package/marketplace/skills/version-control/SKILL.md +233 -0
- package/marketplace/skills/visual-design-foundations/SKILL.md +59 -0
- package/marketplace/skills/visual-hierarchy/SKILL.md +43 -0
- package/marketplace/skills/webhook-integration/SKILL.md +331 -0
- package/marketplace/skills/writing-humanizer/SKILL.md +380 -0
- package/package.json +67 -0
- package/schemas/manifest.schema.json +811 -0
- package/schemas/manifest.v2.schema.json +164 -0
- package/schemas/manifest.v3.schema.json +758 -0
- package/schemas/manifest.v4.schema.json +755 -0
- package/schemas/manifest.v5.schema.json +755 -0
- package/schemas/manifest.v6.schema.json +811 -0
- package/schemas/skill.context.jsonld +279 -0
- package/schemas/skill.schema.json +919 -0
- package/schemas/skill.v2.schema.json +201 -0
- package/schemas/skill.v3.schema.json +827 -0
- package/schemas/skill.v4.schema.json +822 -0
- package/schemas/skill.v5.schema.json +830 -0
- package/schemas/skill.v6.schema.json +946 -0
- package/schemas/vocabulary/keywords.json +180 -0
- package/schemas/vocabulary/workspace_tags.json +23 -0
- package/scripts/__tests__/migrate-skill-v2-to-v3.test.js +161 -0
- package/scripts/__tests__/migrate-skill-v3-to-v4.test.js +158 -0
- package/scripts/__tests__/test-export-parser-drift.js +149 -0
- package/scripts/__tests__/test-marketplace-export.js +114 -0
- package/scripts/__tests__/test-router-paths.js +82 -0
- package/scripts/__tests__/test-stability-promotion.js +244 -0
- package/scripts/__tests__/test-v3-1-alias-contract.js +109 -0
- package/scripts/__tests__/test-v3-1-skos-runtime.js +116 -0
- package/scripts/backfill-schema-version.js +198 -0
- package/scripts/build-field-reference.js +160 -0
- package/scripts/build-retrieval-baseline.js +511 -0
- package/scripts/check-markdown-links.js +211 -0
- package/scripts/check-protocol-consistency.js +979 -0
- package/scripts/export-marketplace-skills.js +610 -0
- package/scripts/export-skill.js +374 -0
- package/scripts/generate-manifest.js +787 -0
- package/scripts/lib/alias-contract.js +83 -0
- package/scripts/lib/audit-prompt-builder.js +771 -0
- package/scripts/lib/mock-grader.js +134 -0
- package/scripts/lib/parse-frontmatter.js +429 -0
- package/scripts/lib/roots.js +119 -0
- package/scripts/lint/check-archetype-sections.js +185 -0
- package/scripts/lint/check-category-enum.js +83 -0
- package/scripts/lint/check-routing-eval.js +146 -0
- package/scripts/lint/check-routing-quality.js +211 -0
- package/scripts/lint/check-stability-promotion.js +220 -0
- package/scripts/lint/format-code-frame.js +206 -0
- package/scripts/marketplace-install.js +125 -0
- package/scripts/migrate-category-to-enum.js +169 -0
- package/scripts/migrate-skill-v2-to-v3.js +424 -0
- package/scripts/migrate-skill-v3-to-v4.js +200 -0
- package/scripts/migrate-skill-v5-to-v6.js +304 -0
- package/scripts/restructure-by-category.js +85 -0
- package/scripts/seed-publication-classification.js +282 -0
- package/scripts/skill-audit.js +893 -0
- package/scripts/skill-graph-drift.js +483 -0
- package/scripts/skill-graph-route.js +766 -0
- package/scripts/skill-graph-routing-eval.js +393 -0
- package/scripts/skill-lint.js +1317 -0
- package/scripts/skill-overlap.js +213 -0
- package/scripts/verify-skill-md-export.js +201 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Skill Audit Checklist
|
|
2
|
+
|
|
3
|
+
This is the canonical checklist for auditing a single skill in Skill Graph.
|
|
4
|
+
|
|
5
|
+
## Purpose
|
|
6
|
+
|
|
7
|
+
Use this checklist to answer 3 questions:
|
|
8
|
+
|
|
9
|
+
1. Is the skill structurally valid?
|
|
10
|
+
2. Is the skill semantically correct?
|
|
11
|
+
3. Is the skill useful enough to keep loading?
|
|
12
|
+
|
|
13
|
+
## Audit Outputs
|
|
14
|
+
|
|
15
|
+
A complete audit should produce:
|
|
16
|
+
|
|
17
|
+
1. pass/fail verdict
|
|
18
|
+
2. findings list
|
|
19
|
+
3. required fixes
|
|
20
|
+
4. updated metadata or content when drift is confirmed
|
|
21
|
+
|
|
22
|
+
## Standard Artifact Names
|
|
23
|
+
|
|
24
|
+
Use this exact artifact shape when writing audit output:
|
|
25
|
+
|
|
26
|
+
```text
|
|
27
|
+
audits/<skill-name>/
|
|
28
|
+
findings.md
|
|
29
|
+
verdict.md
|
|
30
|
+
scorecard.md
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
`scorecard.md` is optional for lightweight audits, but `findings.md` and `verdict.md` are always required.
|
|
34
|
+
|
|
35
|
+
## Standard Artifact Structure
|
|
36
|
+
|
|
37
|
+
### `findings.md`
|
|
38
|
+
|
|
39
|
+
Required sections:
|
|
40
|
+
|
|
41
|
+
1. `# Findings`
|
|
42
|
+
2. `## Skill`
|
|
43
|
+
3. `## Verdict Summary`
|
|
44
|
+
4. `## Findings`
|
|
45
|
+
5. `## Required Fixes`
|
|
46
|
+
|
|
47
|
+
Each finding must use:
|
|
48
|
+
|
|
49
|
+
- `ID:`
|
|
50
|
+
- `Severity:`
|
|
51
|
+
- `Surface:`
|
|
52
|
+
- `Problem:`
|
|
53
|
+
- `Evidence:`
|
|
54
|
+
- `Required action:`
|
|
55
|
+
|
|
56
|
+
### `verdict.md`
|
|
57
|
+
|
|
58
|
+
Required sections:
|
|
59
|
+
|
|
60
|
+
1. `# Verdict`
|
|
61
|
+
2. `## Skill`
|
|
62
|
+
3. `## Final Verdict`
|
|
63
|
+
4. `## Rationale`
|
|
64
|
+
5. `## Follow-up State`
|
|
65
|
+
|
|
66
|
+
`Final Verdict` must be exactly one of: `PASS`, `PASS WITH FIXES`, `PARTIAL`, `FAIL`.
|
|
67
|
+
|
|
68
|
+
### `scorecard.md`
|
|
69
|
+
|
|
70
|
+
Required sections when present:
|
|
71
|
+
|
|
72
|
+
1. `# Scorecard`
|
|
73
|
+
2. `## Skill`
|
|
74
|
+
3. `## Dimensions`
|
|
75
|
+
|
|
76
|
+
Required dimension rows:
|
|
77
|
+
|
|
78
|
+
- Metadata validity
|
|
79
|
+
- Activation quality
|
|
80
|
+
- Relation quality
|
|
81
|
+
- Grounding fidelity
|
|
82
|
+
- Content quality
|
|
83
|
+
- Eval quality
|
|
84
|
+
- Portability quality
|
|
85
|
+
|
|
86
|
+
## Canonical Checklist
|
|
87
|
+
|
|
88
|
+
### 1. Frontmatter validity
|
|
89
|
+
|
|
90
|
+
- [ ] `schema_version` exists and equals `5` (integer; the string `"5"` is tolerated for hand-rolled YAML for back-compat — see `schemas/skill.v5.schema.json`)
|
|
91
|
+
- [ ] `name` exists and matches the intended skill identifier
|
|
92
|
+
- [ ] `description` exists and is specific enough to route from
|
|
93
|
+
- [ ] `version` exists
|
|
94
|
+
- [ ] `type` is one of `capability`, `workflow`, `router`, `overlay`
|
|
95
|
+
- [ ] `category` is one of the closed v5 enum: `foundations` / `engineering` / `design` / `quality` / `agent` / `product` (v5 — closed enum framed as browse facet; previously open-string in v3/v4; `family` in v2)
|
|
96
|
+
- [ ] `scope` is one of `codebase`, `reference`, `portable`
|
|
97
|
+
- [ ] `owner` exists
|
|
98
|
+
- [ ] `freshness` exists
|
|
99
|
+
- [ ] `drift_check` exists as an object with `last_verified` (v3+ — was scalar date in v2)
|
|
100
|
+
- [ ] `eval_artifacts`, `eval_state`, `routing_eval` all exist (orthogonal triple — shipped in schema_version 2 under SH-5784, retained through v5)
|
|
101
|
+
- [ ] `extends` exists when `type: overlay`
|
|
102
|
+
- [ ] `extends` is absent when `type` is not `overlay`
|
|
103
|
+
|
|
104
|
+
### 2. Activation quality
|
|
105
|
+
|
|
106
|
+
- [ ] `description` names real trigger scenarios
|
|
107
|
+
- [ ] `keywords` are not empty for routable skills
|
|
108
|
+
- [ ] `triggers` are present when label-based routing is intended
|
|
109
|
+
- [ ] `paths` are present when file-based activation is useful
|
|
110
|
+
- [ ] activation terms are specific, not generic filler
|
|
111
|
+
- [ ] description does not under-trigger obvious user language
|
|
112
|
+
|
|
113
|
+
### 3. Relation quality
|
|
114
|
+
|
|
115
|
+
- [ ] `relations.adjacent` points to real neighboring skills
|
|
116
|
+
- [ ] `relations.boundary` clearly prevents misuse
|
|
117
|
+
- [ ] `relations.verify_with` names valid verification partners
|
|
118
|
+
- [ ] `relations.depends_on` is only used where a real dependency exists
|
|
119
|
+
- [ ] relation semantics are not vague or ornamental
|
|
120
|
+
|
|
121
|
+
### 4. Grounding quality
|
|
122
|
+
|
|
123
|
+
Run this section when the skill is repo-grounded or implementation-aware.
|
|
124
|
+
|
|
125
|
+
- [ ] `grounding` exists when the skill makes concrete implementation claims
|
|
126
|
+
- [ ] `domain_object` clearly states what the skill governs
|
|
127
|
+
- [ ] `truth_sources` point to real files or docs
|
|
128
|
+
- [ ] `failure_modes` are concrete and testable
|
|
129
|
+
- [ ] `evidence_priority` is explicit
|
|
130
|
+
- [ ] claims in the body match the truth sources
|
|
131
|
+
|
|
132
|
+
### 5. Content quality
|
|
133
|
+
|
|
134
|
+
- [ ] the skill has a clear `Coverage` section
|
|
135
|
+
- [ ] the skill has a clear `Philosophy` section
|
|
136
|
+
- [ ] the skill has a clear `Verification` section (required for `capability` and `workflow` archetypes per `docs/skill-metadata-protocol.md § Archetype Section Map`; enforced by `scripts/lint/check-archetype-sections.js`)
|
|
137
|
+
- [ ] the skill has at least one concrete decision table, checklist, or routing rule
|
|
138
|
+
- [ ] the skill contains negative bounds (`Do NOT Use When` or equivalent)
|
|
139
|
+
- [ ] the skill does not contain generic model-native filler
|
|
140
|
+
- [ ] the skill does not claim behavior it cannot verify
|
|
141
|
+
|
|
142
|
+
### 6. Eval quality
|
|
143
|
+
|
|
144
|
+
- [ ] eval files exist if the skill is expected to be graded
|
|
145
|
+
- [ ] eval coverage is adequate for the skill's complexity
|
|
146
|
+
- [ ] evals test realistic prompts, not trivia
|
|
147
|
+
- [ ] evals cover boundaries and failure cases, not just happy path
|
|
148
|
+
- [ ] repo-grounded skills include repo-grounded eval evidence
|
|
149
|
+
|
|
150
|
+
### 7. Portability quality
|
|
151
|
+
|
|
152
|
+
- [ ] `portability.readiness` is declared when relevant
|
|
153
|
+
- [ ] export targets are realistic
|
|
154
|
+
- [ ] no private or local-only assumptions leak into the public skill
|
|
155
|
+
- [ ] the skill can survive export without losing its main meaning
|
|
156
|
+
|
|
157
|
+
### 8. Drift and safety
|
|
158
|
+
|
|
159
|
+
- [ ] no personal names or private identifiers remain
|
|
160
|
+
- [ ] no local filesystem paths remain unless explicitly part of an example
|
|
161
|
+
- [ ] no project-secret doctrine is embedded in public text
|
|
162
|
+
- [ ] no stale references to non-existent files or tools remain
|
|
163
|
+
- [ ] no contradictory instructions exist relative to neighboring skills
|
|
164
|
+
|
|
165
|
+
## Severity Model
|
|
166
|
+
|
|
167
|
+
Use this when reporting findings.
|
|
168
|
+
|
|
169
|
+
| Severity | Meaning |
|
|
170
|
+
|---|---|
|
|
171
|
+
| P0 | dangerous, misleading, or security-sensitive |
|
|
172
|
+
| P1 | materially wrong or broken |
|
|
173
|
+
| P2 | incomplete, ambiguous, or structurally weak |
|
|
174
|
+
| P3 | polish or clarity issue |
|
|
175
|
+
| P4 | informational only |
|
|
176
|
+
|
|
177
|
+
## Minimal Finding Format
|
|
178
|
+
|
|
179
|
+
```text
|
|
180
|
+
F1
|
|
181
|
+
Severity: P1
|
|
182
|
+
Surface: frontmatter
|
|
183
|
+
Problem: `type` uses a deprecated value
|
|
184
|
+
Evidence: `type: doctrine`
|
|
185
|
+
Required action: replace with one of the canonical archetype values
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Completion Rule
|
|
189
|
+
|
|
190
|
+
A skill audit is complete when:
|
|
191
|
+
|
|
192
|
+
1. every checklist section was reviewed
|
|
193
|
+
2. every finding has severity and evidence
|
|
194
|
+
3. confirmed drift was fixed or explicitly deferred
|
|
195
|
+
4. the final verdict is one of:
|
|
196
|
+
- PASS
|
|
197
|
+
- PASS WITH FIXES
|
|
198
|
+
- PARTIAL
|
|
199
|
+
- FAIL
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# Skill Audit Loop
|
|
2
|
+
|
|
3
|
+
A skill is a contract about a subject. The contract is only true while the things it was written against still hold — the codebase drifts, the subject drifts, and the audit fingerprint in the skill's own frontmatter drifts with them. The Skill Audit Loop re-grounds a skill against current truth and records the result on the skill itself.
|
|
4
|
+
|
|
5
|
+
This loop has one shape:
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
read → fix → test → next
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
That's it. One field at a time, keep or revert based on a single measurable signal, then move on. The discipline comes from Karpathy's auto-improvement loop: one editable asset, one scalar metric, one time box. The "read before changing" framing comes from Design Thinking. The structure here is the cheapest expression of both.
|
|
12
|
+
|
|
13
|
+
## The Four Operations
|
|
14
|
+
|
|
15
|
+
Every action in this loop falls into one of four operations. Each writes to a specific set of flat fields in the Skill Metadata Protocol v6 (see `skill-metadata-protocol/schemas/skill.v6.schema.json`).
|
|
16
|
+
|
|
17
|
+
| Operation | What it does | Mutates skill? | Writes which fields |
|
|
18
|
+
|---|---|---|---|
|
|
19
|
+
| **audit** | Read every field, check freshness and validity against repo truth, score the seven graded dimensions when `--graded`. | No | `last_audited`, `audit_verdict`, `lint_verdict`, `drift_status` |
|
|
20
|
+
| **improve** | Edit one field. One commit. Time-boxed. | Yes | the chosen field + `last_changed` |
|
|
21
|
+
| **evaluate** | Run the eval suite (deterministic + comprehension grader) against the skill. | No | `eval_score`, `eval_failed_ids`, `freshness` |
|
|
22
|
+
| **evolve** | Loop over the corpus: `audit → improve → evaluate`, prioritised by skill-graph centrality + staleness. | Yes (per skill) | all of the above, per skill |
|
|
23
|
+
|
|
24
|
+
This replaces the previous 13-command surface. The mapping:
|
|
25
|
+
|
|
26
|
+
| Old command | New |
|
|
27
|
+
|---|---|
|
|
28
|
+
| `audit:audit-skill` | `audit` |
|
|
29
|
+
| `audit:domain-audit` | `audit --source-first` |
|
|
30
|
+
| `audit:bidirectional-audit` | `audit --fix-code-too` |
|
|
31
|
+
| `audit:deep-repo-audit` | `audit --scope repo` |
|
|
32
|
+
| `audit:workspace-audit` | `audit --scope workspace` |
|
|
33
|
+
| `audit:improve-skill` | `improve` |
|
|
34
|
+
| `audit:auto-improve` | `improve --mode <adapter>` |
|
|
35
|
+
| `audit:skill-fix` | `improve --lens <skill>` |
|
|
36
|
+
| `audit:evaluate-skill` | `evaluate` |
|
|
37
|
+
| `audit:improve-eval` | DELETED (was byte-equivalent duplicate of `evaluate-skill`) |
|
|
38
|
+
| `audit:skill-evolution` | `evolve` |
|
|
39
|
+
| `audit:skill-discovery` | retained — creates new skills, separate concern |
|
|
40
|
+
| `audit:feedback` | moved to `design:feedback` — visual loop, not skill audit |
|
|
41
|
+
|
|
42
|
+
## The Health Block — state lives on the skill
|
|
43
|
+
|
|
44
|
+
Schema v6 adds seven flat Health fields to every SKILL.md frontmatter:
|
|
45
|
+
|
|
46
|
+
```yaml
|
|
47
|
+
last_audited: 2026-05-17 # date `audit` last ran
|
|
48
|
+
last_changed: 2026-05-15 # date the skill body or frontmatter was last edited
|
|
49
|
+
audit_verdict: PASS_WITH_FIXES # aggregate of lint + drift + graded dimensions
|
|
50
|
+
eval_score: 4.2 # 0.0–5.0 from the eval runner
|
|
51
|
+
eval_failed_ids: [] # empty when clean
|
|
52
|
+
lint_verdict: PASS # deterministic-lint result
|
|
53
|
+
drift_status: OK # OK | DRIFT | BROKEN | STALE | NO_BASELINE | EXTERNAL_UNHASHED | UNKNOWN
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Before v6, this state was scattered across `eval-history.jsonl`, `routing-misses.jsonl`, `.opencode/progress/skill-audit-*`, `health-ledger.jsonl`, and `findings/*.md`. To know one skill's audit status you grepped five places. v6 collapses that to one frontmatter block. The loop reads it; the operations write it back.
|
|
57
|
+
|
|
58
|
+
The same skill's body still gets `audits/<skill-name>/findings.md` and `verdict.md` when an audit produces longer-form output, but those files are evidence, not state. The state of truth is the Health Block.
|
|
59
|
+
|
|
60
|
+
## The Inner Pipeline of `audit`
|
|
61
|
+
|
|
62
|
+
The previous five-phase shape (Deterministic → Graded → Aggregate → Fix-or-defer → Re-verify) survives, but it lives entirely inside the `audit` operation as its internal pipeline. Users no longer see five phases — they see one `audit` command. Internally:
|
|
63
|
+
|
|
64
|
+
1. **Deterministic** (always) — `skill-lint.js` runs schema validation, relation-target existence, eval coherence, archetype section presence, routing quality. Writes `lint_verdict`.
|
|
65
|
+
2. **Graded** (only under `--graded`) — fans out seven per-dimension prompts (metadata, activation, relation, grounding, content, eval, portability) to the grader CLI.
|
|
66
|
+
3. **Aggregate** — combines the dimension verdicts. Any `FAIL` dominates; otherwise any `PASS_WITH_FIXES` dominates; otherwise `PASS`. Writes `audit_verdict`.
|
|
67
|
+
4. **Drift check** — `skill-graph-drift.js` against declared `grounding.truth_sources`. Writes `drift_status`.
|
|
68
|
+
5. **Stamp** — writes `last_audited` to today's ISO date.
|
|
69
|
+
|
|
70
|
+
This is deterministic plumbing. The user runs `audit <skill>`; the internal pipeline does its work; the frontmatter records the result.
|
|
71
|
+
|
|
72
|
+
## The Inner Pipeline of `evaluate`
|
|
73
|
+
|
|
74
|
+
`evaluate` runs the eval suite the skill declares (typically `evals/<skill>.json` plus the optional `evals/comprehension.json`). It writes:
|
|
75
|
+
|
|
76
|
+
- `eval_score` — aggregate 0.0–5.0 across all evals run
|
|
77
|
+
- `eval_failed_ids` — list of failed case IDs, empty when clean
|
|
78
|
+
- `freshness` — today's ISO date
|
|
79
|
+
|
|
80
|
+
When `evals/comprehension.json` exists, the comprehension grader runs against the five flat Understanding fields (`mental_model`, `purpose`, `boundary`, `analogy`, `misconception`) — or against the legacy `concept.*` block for v5 skills not yet migrated.
|
|
81
|
+
|
|
82
|
+
## The Inner Pipeline of `improve`
|
|
83
|
+
|
|
84
|
+
`improve` is the only operation that mutates the skill. Karpathy discipline applies absolutely:
|
|
85
|
+
|
|
86
|
+
1. **One field, one commit.** The operator (or the loop) chooses one stale or failing field. `improve --field mental_model <skill>` edits exactly that field.
|
|
87
|
+
2. **Time-boxed.** Default 20 minutes per field. Beyond that, abort and re-queue.
|
|
88
|
+
3. **Auto-test after.** `improve` immediately calls `evaluate` and checks the metric for the targeted field.
|
|
89
|
+
4. **Keep or revert.** If `eval_score` did not improve (or regressed below an allowed threshold), the commit is reverted automatically. The loop records the failed attempt and moves to the next field.
|
|
90
|
+
5. **Stamp** — writes `last_changed` to today's ISO date.
|
|
91
|
+
|
|
92
|
+
`improve` has three modes:
|
|
93
|
+
|
|
94
|
+
| Mode | What it does | When to use |
|
|
95
|
+
|---|---|---|
|
|
96
|
+
| (default) | Edit a field of this skill's own SKILL.md | The most common case |
|
|
97
|
+
| `--mode <adapter>` | Run an auto-improve adapter (prompt-evolution, design-candidate-discovery, perf, docs) against this skill | When the change pattern is well-known |
|
|
98
|
+
| `--lens <other-skill>` | Apply another skill as an audit lens against this skill and fix the violations | Cross-skill consistency work — formerly `audit:skill-fix` |
|
|
99
|
+
|
|
100
|
+
## The Inner Pipeline of `evolve`
|
|
101
|
+
|
|
102
|
+
`evolve` is a thin for-loop over the four operations:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
for skill in priority_order(skill-graph centrality + staleness):
|
|
106
|
+
audit(skill)
|
|
107
|
+
if audit_verdict in {FAIL, PASS_WITH_FIXES} and understanding_field_targetable:
|
|
108
|
+
improve(skill, field=understanding_field) # one v6 Understanding field
|
|
109
|
+
evaluate(skill)
|
|
110
|
+
write Health Block fields back
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
`understanding_field` is selected by `understandingField()` in
|
|
114
|
+
`scripts/skill/skill-evolution-loop.js` — empty/missing field wins
|
|
115
|
+
outright, otherwise shortest populated value among `description`,
|
|
116
|
+
`mental_model`, `purpose`, `boundary`, `analogy`, `misconception`. The
|
|
117
|
+
stalest Health date field stays in the trace as a staleness signal
|
|
118
|
+
but is not what gets passed to the improver's HARD SCOPE.
|
|
119
|
+
|
|
120
|
+
Priority is read directly from the Health Block — `last_audited` ascending tells the loop which skill to pick next. No telemetry crawl, no log aggregation.
|
|
121
|
+
|
|
122
|
+
## Loop Principles
|
|
123
|
+
|
|
124
|
+
1. **One skill, one field, one metric at a time.** Karpathy keep-or-revert pressure makes the loop tractable.
|
|
125
|
+
2. **State lives in the artifact.** The Health Block is the source of truth; logs are append-only evidence.
|
|
126
|
+
3. **Read before changing.** `audit` must run before `improve` is allowed to write.
|
|
127
|
+
4. **Deterministic checks first; graded checks second.** Lint and drift are mechanical and trustworthy; graded scores are subject to model variance.
|
|
128
|
+
5. **Fixes are tiny by default.** A field-sized change is the unit of work. Larger changes are decomposed into a sequence of field-sized improves.
|
|
129
|
+
|
|
130
|
+
## Loop Inputs
|
|
131
|
+
|
|
132
|
+
1. `SKILL.md` (frontmatter is read; Health Block is read first)
|
|
133
|
+
2. `evals/<skill>.json` and optional `evals/comprehension.json`
|
|
134
|
+
3. The truth sources declared in `grounding.truth_sources`
|
|
135
|
+
4. `skills.manifest.json` (generated by `skill-graph`)
|
|
136
|
+
5. Skill-graph priority order from `skill-graph-builder.js`
|
|
137
|
+
|
|
138
|
+
## Loop Outputs
|
|
139
|
+
|
|
140
|
+
Two kinds. The Health Block (state) and the audit artifacts (evidence):
|
|
141
|
+
|
|
142
|
+
**Health Block** — written back into the skill's own frontmatter. This is the state.
|
|
143
|
+
|
|
144
|
+
**Audit artifacts** — under `audits/<skill-name>/`:
|
|
145
|
+
|
|
146
|
+
```text
|
|
147
|
+
audits/<skill-name>/
|
|
148
|
+
findings.md ← human-readable narrative of issues found
|
|
149
|
+
verdict.md ← short rationale and fix/defer record
|
|
150
|
+
scorecard.md ← per-dimension scores when --graded ran
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
These remain append-only evidence files for any audit run that needs long-form output. The skill's Health Block lets a reader skip them entirely if all they need is the verdict.
|
|
154
|
+
|
|
155
|
+
## Quick start
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
# Audit a single skill
|
|
159
|
+
node src/skill-audit.js <skill-name>
|
|
160
|
+
|
|
161
|
+
# Audit with graded dimensions
|
|
162
|
+
node src/skill-audit.js <skill-name> --graded
|
|
163
|
+
|
|
164
|
+
# Improve one field (auto-tests + keeps or reverts)
|
|
165
|
+
node src/skill-improve.js <skill-name> --field mental_model
|
|
166
|
+
|
|
167
|
+
# Evaluate a skill (writes eval_score and eval_failed_ids)
|
|
168
|
+
node src/evaluate-skill.js <skill-name>
|
|
169
|
+
|
|
170
|
+
# Evolve the corpus — audit, improve, evaluate each in priority order
|
|
171
|
+
node src/skill-evolve.js --top 10
|
|
172
|
+
|
|
173
|
+
# Show the Health Block for a skill at a glance
|
|
174
|
+
node src/skill-status.js <skill-name>
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Cadence
|
|
178
|
+
|
|
179
|
+
| Cadence | Action |
|
|
180
|
+
|---|---|
|
|
181
|
+
| Every change | Deterministic `audit` runs in lint as part of CI |
|
|
182
|
+
| Daily | `evolve --top 5` walks the five stalest skills |
|
|
183
|
+
| Weekly | `audit --graded` for skills with `last_audited` older than 7 days and `category` in the high-centrality set |
|
|
184
|
+
| Before release | `evolve --scope all` |
|
|
185
|
+
|
|
186
|
+
## Non-Goals
|
|
187
|
+
|
|
188
|
+
The loop does not require a separate issue tracker, dashboard, control plane, or proprietary quality rubric. Markdown reader + JSON Schema validator + the four operations is the full stack. Adopters can layer monitoring or queue management on top, but the loop itself stays minimal.
|
|
189
|
+
|
|
190
|
+
## Related Specs
|
|
191
|
+
|
|
192
|
+
- `skill-metadata-protocol/docs/skill-metadata-protocol.md` — the canonical field list including the v6 Health Block and flat Understanding fields
|
|
193
|
+
- `skill-metadata-protocol/schemas/skill.v6.schema.json` — the machine-validated contract
|
|
194
|
+
- `skill-metadata-protocol/docs/migrations/v5-to-v6.md` — concept block flattening + Health Block introduction
|
|
195
|
+
- `SKILL_AUDIT_CHECKLIST.md` — the per-skill checklist used during `audit`
|