@skill-graph/cli 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +247 -0
- package/LICENSE +200 -0
- package/NOTICE +62 -0
- package/README.md +398 -0
- package/SKILL_GRAPH.md +443 -0
- package/bin/skill-graph.js +374 -0
- package/docs/ADOPTION.md +117 -0
- package/docs/CONFORMANCE.md +66 -0
- package/docs/PRIMER.md +384 -0
- package/docs/QUICKSTART-30MIN.md +333 -0
- package/docs/ROUTING-METRICS.md +120 -0
- package/docs/SKILL-MD-FORMAT-COMPATIBILITY.md +127 -0
- package/docs/SKILL_AUDIT_CHECKLIST.md +199 -0
- package/docs/SKILL_AUDIT_LOOP.md +195 -0
- package/docs/SKILL_METADATA_PROTOCOL.md +609 -0
- package/docs/_archived/marketplace-publication-priority-2026-05-18.md +239 -0
- package/docs/adr/0001-predicate-set.md +69 -0
- package/docs/adr/0002-json-ld-context.md +82 -0
- package/docs/adr/0003-ontoclean-rigidity-tags.md +65 -0
- package/docs/adr/0004-persistent-identifiers.md +74 -0
- package/docs/adr/0005-freshness-consolidation.md +70 -0
- package/docs/adr/0006-revise-predicate-rename.md +105 -0
- package/docs/adr/0007-audit-loop-cadence.md +99 -0
- package/docs/adr/0008-skill-surface-split-and-curation-policy.md +93 -0
- package/docs/category-consumers.md +168 -0
- package/docs/concept-map.md +194 -0
- package/docs/diagrams/drift-states.mmd +21 -0
- package/docs/diagrams/manifest-pipeline.mmd +25 -0
- package/docs/diagrams/routing-harness.mmd +41 -0
- package/docs/diagrams/starter-graph.mmd +53 -0
- package/docs/field-decision-guide.md +315 -0
- package/docs/field-rationale.md +211 -0
- package/docs/field-reference.generated.md +624 -0
- package/docs/field-reference.md +1426 -0
- package/docs/glossary.md +190 -0
- package/docs/head-noun-glossary.md +63 -0
- package/docs/images/audit-phases.png +0 -0
- package/docs/images/drift-states.png +0 -0
- package/docs/images/graded-mode.png +0 -0
- package/docs/images/manifest-pipeline.png +0 -0
- package/docs/images/routing-harness.png +0 -0
- package/docs/images/skill-anatomy.png +0 -0
- package/docs/images/starter-graph.png +0 -0
- package/docs/images/system-model.png +0 -0
- package/docs/integrations/github-actions.md +155 -0
- package/docs/manifest-field-mapping.md +443 -0
- package/docs/marketplace-publication-queue.generated.md +240 -0
- package/docs/marketplace-release-agent-prompt.md +82 -0
- package/docs/marketplace-skill-candidate-list.md +272 -0
- package/docs/marketplace-syndication.md +222 -0
- package/docs/migration-sample-review.md +155 -0
- package/docs/migrations/v4-to-v5.md +168 -0
- package/docs/migrations/v5-to-v6.md +221 -0
- package/docs/name-exceptions.yaml +37 -0
- package/docs/plans/marketplace-p1-public-migration-plan.md +41 -0
- package/docs/plans/multi-root-workspace.md +148 -0
- package/docs/plans/scripts-roadmap.md +107 -0
- package/docs/plans/v4-schema-bump.md +160 -0
- package/docs/plans/wave-2-extraction.md +122 -0
- package/docs/positioning-vs-marketplaces.md +175 -0
- package/docs/proposals/skill-audit-loop-positioning.md +160 -0
- package/docs/quality-doctrine.md +138 -0
- package/docs/recommended-skills.md +150 -0
- package/docs/research/skill-comprehension-eval-research.md +1830 -0
- package/docs/research/skill-retrieval-evidence.md +66 -0
- package/docs/skill-metadata-protocol.md +471 -0
- package/docs/skills-sh-maintainer-cleanup-request.md +80 -0
- package/examples/audits/a11y/findings.md +52 -0
- package/examples/audits/a11y/scorecard.md +21 -0
- package/examples/audits/a11y/verdict.md +44 -0
- package/examples/audits/debugging/findings.md +59 -0
- package/examples/audits/debugging/scorecard.md +22 -0
- package/examples/audits/debugging/verdict.md +33 -0
- package/examples/audits/documentation/findings.md +59 -0
- package/examples/audits/documentation/scorecard.md +22 -0
- package/examples/audits/documentation/verdict.md +33 -0
- package/examples/evals/a11y.json +140 -0
- package/examples/evals/api-design.json +52 -0
- package/examples/evals/code-review.json +52 -0
- package/examples/evals/data-modeling.json +52 -0
- package/examples/evals/database-migration.json +52 -0
- package/examples/evals/debugging.json +118 -0
- package/examples/evals/dependency-architecture.json +52 -0
- package/examples/evals/design-system-architecture.json +52 -0
- package/examples/evals/error-tracking.json +52 -0
- package/examples/evals/event-contract-design.json +52 -0
- package/examples/evals/form-ux-architecture.json +52 -0
- package/examples/evals/framework-fit-analysis.json +52 -0
- package/examples/evals/graph-audit.json +139 -0
- package/examples/evals/information-architecture.json +52 -0
- package/examples/evals/interaction-feedback.json +52 -0
- package/examples/evals/interaction-patterns.json +52 -0
- package/examples/evals/layout-composition.json +52 -0
- package/examples/evals/lint-overlay.json +117 -0
- package/examples/evals/microcopy.json +52 -0
- package/examples/evals/observability-modeling.json +52 -0
- package/examples/evals/pattern-recognition.json +96 -0
- package/examples/evals/performance-engineering.json +52 -0
- package/examples/evals/refactor.json +128 -0
- package/examples/evals/semiotics.json +52 -0
- package/examples/evals/skill-infrastructure.json +96 -0
- package/examples/evals/skill-router.json +140 -0
- package/examples/evals/skill-router.routing.json +113 -0
- package/examples/evals/system-interface-contracts.json +52 -0
- package/examples/evals/task-analysis.json +52 -0
- package/examples/evals/testing-strategy.json +118 -0
- package/examples/evals/type-safety.json +249 -0
- package/examples/evals/visual-design-foundations.json +52 -0
- package/examples/evals/webhook-integration.json +52 -0
- package/examples/exports/a11y.skill-md.md +80 -0
- package/examples/exports/debugging.skill-md.md +80 -0
- package/examples/exports/refactor.skill-md.md +78 -0
- package/examples/exports/testing-strategy.skill-md.md +81 -0
- package/examples/projects/markdown-static-site/README.md +115 -0
- package/examples/projects/markdown-static-site/skills/content-source-router/SKILL.md +131 -0
- package/examples/projects/markdown-static-site/skills/image-optimization-pipeline-config/SKILL.md +132 -0
- package/examples/projects/markdown-static-site/skills/link-rot-detection/SKILL.md +103 -0
- package/examples/projects/markdown-static-site/skills/markdown-post-frontmatter-validation/SKILL.md +133 -0
- package/examples/projects/markdown-static-site/skills/migrate-posts-to-v2-frontmatter/SKILL.md +140 -0
- package/examples/projects/saas-stripe-postgres/README.md +208 -0
- package/examples/projects/saas-stripe-postgres/db/migrations/0004_canonicalize_orders.sql +37 -0
- package/examples/projects/saas-stripe-postgres/db/schema.sql +112 -0
- package/examples/projects/saas-stripe-postgres/skills/migrate-orders-to-canonical-schema/SKILL.md +149 -0
- package/examples/projects/saas-stripe-postgres/skills/nextjs-server-action-validation/SKILL.md +154 -0
- package/examples/projects/saas-stripe-postgres/skills/payment-provider-router/SKILL.md +153 -0
- package/examples/projects/saas-stripe-postgres/skills/postgres-rls-pattern/SKILL.md +163 -0
- package/examples/projects/saas-stripe-postgres/skills/stripe-webhook-signature-verification/SKILL.md +137 -0
- package/examples/protocol/skill-metadata-template.md +301 -0
- package/examples/protocol/skills.manifest.sample.json +13245 -0
- package/examples/skill-metadata-template.md +317 -0
- package/examples/skills.manifest.sample.json +13519 -0
- package/examples/tests/v3-1-skos-fixture/SKILL.md +93 -0
- package/marketplace/README.md +17 -0
- package/marketplace/skills/a11y/SKILL.md +66 -0
- package/marketplace/skills/acid-fundamentals/SKILL.md +106 -0
- package/marketplace/skills/agent-engineering/SKILL.md +386 -0
- package/marketplace/skills/agent-eval-design/SKILL.md +55 -0
- package/marketplace/skills/ai-native-development/SKILL.md +294 -0
- package/marketplace/skills/api-design/SKILL.md +60 -0
- package/marketplace/skills/architecture-decision-records/SKILL.md +55 -0
- package/marketplace/skills/background-jobs/SKILL.md +265 -0
- package/marketplace/skills/bounded-context-mapping/SKILL.md +55 -0
- package/marketplace/skills/cap-theorem-tradeoffs/SKILL.md +127 -0
- package/marketplace/skills/client-server-boundary/SKILL.md +187 -0
- package/marketplace/skills/code-review/SKILL.md +120 -0
- package/marketplace/skills/color-system-design/SKILL.md +43 -0
- package/marketplace/skills/component-architecture/SKILL.md +126 -0
- package/marketplace/skills/compression/SKILL.md +112 -0
- package/marketplace/skills/conceptual-modeling/SKILL.md +181 -0
- package/marketplace/skills/connection-pooling/SKILL.md +105 -0
- package/marketplace/skills/constraint-awareness/SKILL.md +287 -0
- package/marketplace/skills/content-monitor/SKILL.md +209 -0
- package/marketplace/skills/context-engineering/SKILL.md +320 -0
- package/marketplace/skills/context-graph/SKILL.md +174 -0
- package/marketplace/skills/context-management/SKILL.md +174 -0
- package/marketplace/skills/context-window/SKILL.md +239 -0
- package/marketplace/skills/contract-testing/SKILL.md +120 -0
- package/marketplace/skills/cron-scheduling/SKILL.md +223 -0
- package/marketplace/skills/dark-mode-implementation/SKILL.md +47 -0
- package/marketplace/skills/data-modeling/SKILL.md +59 -0
- package/marketplace/skills/data-modeling-fundamentals/SKILL.md +117 -0
- package/marketplace/skills/database-migration/SKILL.md +429 -0
- package/marketplace/skills/debugging/SKILL.md +67 -0
- package/marketplace/skills/dependency-architecture/SKILL.md +58 -0
- package/marketplace/skills/design-module-composition/SKILL.md +43 -0
- package/marketplace/skills/design-system-architecture/SKILL.md +61 -0
- package/marketplace/skills/design-thinking/SKILL.md +44 -0
- package/marketplace/skills/diagnosis/SKILL.md +296 -0
- package/marketplace/skills/diff-analysis/SKILL.md +188 -0
- package/marketplace/skills/e2e-test-design/SKILL.md +113 -0
- package/marketplace/skills/entity-relationship-modeling/SKILL.md +218 -0
- package/marketplace/skills/epistemic-grounding/SKILL.md +112 -0
- package/marketplace/skills/error-boundary/SKILL.md +235 -0
- package/marketplace/skills/error-tracking/SKILL.md +261 -0
- package/marketplace/skills/eval-driven-development/SKILL.md +147 -0
- package/marketplace/skills/evaluation/SKILL.md +113 -0
- package/marketplace/skills/event-contract-design/SKILL.md +60 -0
- package/marketplace/skills/event-storming/SKILL.md +56 -0
- package/marketplace/skills/form-ux-architecture/SKILL.md +60 -0
- package/marketplace/skills/framework-fit-analysis/SKILL.md +59 -0
- package/marketplace/skills/frontend-architecture/SKILL.md +43 -0
- package/marketplace/skills/generative-ui/SKILL.md +118 -0
- package/marketplace/skills/graph-audit/SKILL.md +81 -0
- package/marketplace/skills/guardrails/SKILL.md +118 -0
- package/marketplace/skills/hooks-patterns/SKILL.md +185 -0
- package/marketplace/skills/http-semantics/SKILL.md +136 -0
- package/marketplace/skills/ideation/SKILL.md +41 -0
- package/marketplace/skills/indexing-strategy/SKILL.md +108 -0
- package/marketplace/skills/information-architecture/SKILL.md +59 -0
- package/marketplace/skills/integration-test-design/SKILL.md +111 -0
- package/marketplace/skills/intent-recognition/SKILL.md +136 -0
- package/marketplace/skills/interaction-feedback/SKILL.md +59 -0
- package/marketplace/skills/interaction-patterns/SKILL.md +59 -0
- package/marketplace/skills/journey-mapping/SKILL.md +41 -0
- package/marketplace/skills/keywords/SKILL.md +213 -0
- package/marketplace/skills/knowledge-modeling/SKILL.md +232 -0
- package/marketplace/skills/layout-composition/SKILL.md +59 -0
- package/marketplace/skills/linguistics/SKILL.md +429 -0
- package/marketplace/skills/lint-overlay/SKILL.md +76 -0
- package/marketplace/skills/mental-models/SKILL.md +126 -0
- package/marketplace/skills/merge-queue/SKILL.md +94 -0
- package/marketplace/skills/methodology/SKILL.md +317 -0
- package/marketplace/skills/microcopy/SKILL.md +232 -0
- package/marketplace/skills/middleware-patterns/SKILL.md +363 -0
- package/marketplace/skills/mobile-responsive-ux/SKILL.md +287 -0
- package/marketplace/skills/mutation-testing/SKILL.md +112 -0
- package/marketplace/skills/naming-conventions/SKILL.md +112 -0
- package/marketplace/skills/observability-modeling/SKILL.md +59 -0
- package/marketplace/skills/ontology-modeling/SKILL.md +67 -0
- package/marketplace/skills/owasp-security/SKILL.md +153 -0
- package/marketplace/skills/pattern-recognition/SKILL.md +472 -0
- package/marketplace/skills/performance-budgets/SKILL.md +185 -0
- package/marketplace/skills/performance-engineering/SKILL.md +58 -0
- package/marketplace/skills/performance-testing/SKILL.md +125 -0
- package/marketplace/skills/printify/SKILL.md +42 -0
- package/marketplace/skills/prioritization/SKILL.md +118 -0
- package/marketplace/skills/problem-framing/SKILL.md +41 -0
- package/marketplace/skills/problem-locating-solving/SKILL.md +203 -0
- package/marketplace/skills/project-knowledge-extraction/SKILL.md +54 -0
- package/marketplace/skills/prompt-craft/SKILL.md +134 -0
- package/marketplace/skills/prompt-injection-defense/SKILL.md +132 -0
- package/marketplace/skills/property-based-testing/SKILL.md +100 -0
- package/marketplace/skills/prototyping/SKILL.md +43 -0
- package/marketplace/skills/query-optimization/SKILL.md +144 -0
- package/marketplace/skills/real-time-updates/SKILL.md +324 -0
- package/marketplace/skills/ref-patterns/SKILL.md +284 -0
- package/marketplace/skills/refactor/SKILL.md +65 -0
- package/marketplace/skills/rendering-models/SKILL.md +142 -0
- package/marketplace/skills/replication-patterns/SKILL.md +110 -0
- package/marketplace/skills/research-synthesis/SKILL.md +41 -0
- package/marketplace/skills/route-handler-design/SKILL.md +347 -0
- package/marketplace/skills/schema-evolution/SKILL.md +140 -0
- package/marketplace/skills/security-fundamentals/SKILL.md +139 -0
- package/marketplace/skills/semantic-center/SKILL.md +194 -0
- package/marketplace/skills/semantic-relations/SKILL.md +250 -0
- package/marketplace/skills/semantics/SKILL.md +366 -0
- package/marketplace/skills/semiotics/SKILL.md +230 -0
- package/marketplace/skills/seo-strategy/SKILL.md +260 -0
- package/marketplace/skills/server-actions-design/SKILL.md +243 -0
- package/marketplace/skills/server-components-design/SKILL.md +190 -0
- package/marketplace/skills/sharding-strategy/SKILL.md +123 -0
- package/marketplace/skills/shopify/SKILL.md +42 -0
- package/marketplace/skills/skill-infrastructure/SKILL.md +320 -0
- package/marketplace/skills/skill-router/SKILL.md +71 -0
- package/marketplace/skills/skill-scaffold/SKILL.md +105 -0
- package/marketplace/skills/snapshot-testing/SKILL.md +120 -0
- package/marketplace/skills/spec-driven-development/SKILL.md +148 -0
- package/marketplace/skills/state-machine-modeling/SKILL.md +56 -0
- package/marketplace/skills/state-management/SKILL.md +134 -0
- package/marketplace/skills/streaming-architecture/SKILL.md +194 -0
- package/marketplace/skills/summarization/SKILL.md +156 -0
- package/marketplace/skills/suspense-patterns/SKILL.md +265 -0
- package/marketplace/skills/system-interface-contracts/SKILL.md +59 -0
- package/marketplace/skills/task-analysis/SKILL.md +201 -0
- package/marketplace/skills/taxonomy-design/SKILL.md +66 -0
- package/marketplace/skills/test-coverage-strategy/SKILL.md +108 -0
- package/marketplace/skills/test-doubles-design/SKILL.md +98 -0
- package/marketplace/skills/test-driven-development/SKILL.md +96 -0
- package/marketplace/skills/testing-strategy/SKILL.md +67 -0
- package/marketplace/skills/theme-system-design/SKILL.md +43 -0
- package/marketplace/skills/tool-call-flow/SKILL.md +229 -0
- package/marketplace/skills/tool-call-strategy/SKILL.md +292 -0
- package/marketplace/skills/transaction-isolation/SKILL.md +98 -0
- package/marketplace/skills/type-safety/SKILL.md +177 -0
- package/marketplace/skills/typography-system/SKILL.md +43 -0
- package/marketplace/skills/usability-testing/SKILL.md +43 -0
- package/marketplace/skills/user-research/SKILL.md +43 -0
- package/marketplace/skills/vercel-composition-patterns/SKILL.md +157 -0
- package/marketplace/skills/version-control/SKILL.md +233 -0
- package/marketplace/skills/visual-design-foundations/SKILL.md +59 -0
- package/marketplace/skills/visual-hierarchy/SKILL.md +43 -0
- package/marketplace/skills/webhook-integration/SKILL.md +331 -0
- package/marketplace/skills/writing-humanizer/SKILL.md +380 -0
- package/package.json +67 -0
- package/schemas/manifest.schema.json +811 -0
- package/schemas/manifest.v2.schema.json +164 -0
- package/schemas/manifest.v3.schema.json +758 -0
- package/schemas/manifest.v4.schema.json +755 -0
- package/schemas/manifest.v5.schema.json +755 -0
- package/schemas/manifest.v6.schema.json +811 -0
- package/schemas/skill.context.jsonld +279 -0
- package/schemas/skill.schema.json +919 -0
- package/schemas/skill.v2.schema.json +201 -0
- package/schemas/skill.v3.schema.json +827 -0
- package/schemas/skill.v4.schema.json +822 -0
- package/schemas/skill.v5.schema.json +830 -0
- package/schemas/skill.v6.schema.json +946 -0
- package/schemas/vocabulary/keywords.json +180 -0
- package/schemas/vocabulary/workspace_tags.json +23 -0
- package/scripts/__tests__/migrate-skill-v2-to-v3.test.js +161 -0
- package/scripts/__tests__/migrate-skill-v3-to-v4.test.js +158 -0
- package/scripts/__tests__/test-export-parser-drift.js +149 -0
- package/scripts/__tests__/test-marketplace-export.js +114 -0
- package/scripts/__tests__/test-router-paths.js +82 -0
- package/scripts/__tests__/test-stability-promotion.js +244 -0
- package/scripts/__tests__/test-v3-1-alias-contract.js +109 -0
- package/scripts/__tests__/test-v3-1-skos-runtime.js +116 -0
- package/scripts/backfill-schema-version.js +198 -0
- package/scripts/build-field-reference.js +160 -0
- package/scripts/build-retrieval-baseline.js +511 -0
- package/scripts/check-markdown-links.js +211 -0
- package/scripts/check-protocol-consistency.js +979 -0
- package/scripts/export-marketplace-skills.js +610 -0
- package/scripts/export-skill.js +374 -0
- package/scripts/generate-manifest.js +787 -0
- package/scripts/lib/alias-contract.js +83 -0
- package/scripts/lib/audit-prompt-builder.js +771 -0
- package/scripts/lib/mock-grader.js +134 -0
- package/scripts/lib/parse-frontmatter.js +429 -0
- package/scripts/lib/roots.js +119 -0
- package/scripts/lint/check-archetype-sections.js +185 -0
- package/scripts/lint/check-category-enum.js +83 -0
- package/scripts/lint/check-routing-eval.js +146 -0
- package/scripts/lint/check-routing-quality.js +211 -0
- package/scripts/lint/check-stability-promotion.js +220 -0
- package/scripts/lint/format-code-frame.js +206 -0
- package/scripts/marketplace-install.js +125 -0
- package/scripts/migrate-category-to-enum.js +169 -0
- package/scripts/migrate-skill-v2-to-v3.js +424 -0
- package/scripts/migrate-skill-v3-to-v4.js +200 -0
- package/scripts/migrate-skill-v5-to-v6.js +304 -0
- package/scripts/restructure-by-category.js +85 -0
- package/scripts/seed-publication-classification.js +282 -0
- package/scripts/skill-audit.js +893 -0
- package/scripts/skill-graph-drift.js +483 -0
- package/scripts/skill-graph-route.js +766 -0
- package/scripts/skill-graph-routing-eval.js +393 -0
- package/scripts/skill-lint.js +1317 -0
- package/scripts/skill-overlap.js +213 -0
- package/scripts/verify-skill-md-export.js +201 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Findings
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`a11y`
|
|
6
|
+
|
|
7
|
+
## Audit Date
|
|
8
|
+
|
|
9
|
+
2026-04-17
|
|
10
|
+
|
|
11
|
+
## Audit Mode
|
|
12
|
+
|
|
13
|
+
`--graded` (grader: `node scripts/lib/mock-grader.js`)
|
|
14
|
+
|
|
15
|
+
## Verdict Summary
|
|
16
|
+
|
|
17
|
+
PASS WITH FIXES
|
|
18
|
+
|
|
19
|
+
## Findings
|
|
20
|
+
|
|
21
|
+
ID: F1
|
|
22
|
+
Severity: P3
|
|
23
|
+
Surface: frontmatter: triggers
|
|
24
|
+
Category: Activation quality
|
|
25
|
+
Source: grader (node scripts/lib/mock-grader.js)
|
|
26
|
+
Problem: No triggers array is declared; the skill is only discoverable via keyword matching.
|
|
27
|
+
Evidence: triggers: (absent from frontmatter)
|
|
28
|
+
Required action: Add a `triggers: [documentation-skill]` entry so label-based routers can activate the skill deterministically.
|
|
29
|
+
|
|
30
|
+
ID: F2
|
|
31
|
+
Severity: P2
|
|
32
|
+
Surface: skill body
|
|
33
|
+
Category: Content quality
|
|
34
|
+
Source: grader (node scripts/lib/mock-grader.js)
|
|
35
|
+
Problem: No explicit `## Do NOT Use When` section; negative routing is only implied.
|
|
36
|
+
Evidence: Section headings observed: `# Documentation`, `## Coverage`, `## Philosophy`, `## Verification` — no explicit negative-bounds section.
|
|
37
|
+
Required action: Add a `## Do NOT Use When` section listing at least two cases where the skill must not activate (e.g. UI accessibility behavior, runtime debugging).
|
|
38
|
+
|
|
39
|
+
ID: F3
|
|
40
|
+
Severity: P3
|
|
41
|
+
Surface: examples/evals/comprehension.json
|
|
42
|
+
Category: Eval quality
|
|
43
|
+
Source: grader (node scripts/lib/mock-grader.js)
|
|
44
|
+
Problem: Eval covers happy-path and boundary prompts but has no explicit failure-mode eval.
|
|
45
|
+
Evidence: Seven prompts, all affirmative; no prompt tests what the skill should refuse.
|
|
46
|
+
Required action: Add one failure-mode prompt per skills/evaluation SKILL.md guidance (≥ 1 negative expectation per skill).
|
|
47
|
+
|
|
48
|
+
## Required Fixes
|
|
49
|
+
|
|
50
|
+
- Activation quality: PASS WITH FIXES — 1 finding(s) from grader
|
|
51
|
+
- Content quality: PASS WITH FIXES — 1 finding(s) from grader
|
|
52
|
+
- Eval quality: PASS WITH FIXES — 1 finding(s) from grader
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Scorecard
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`a11y`
|
|
6
|
+
|
|
7
|
+
## Dimensions
|
|
8
|
+
|
|
9
|
+
| Dimension | Score | Note |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| Metadata validity | 5 | auto: lint passes |
|
|
12
|
+
| Activation quality | 4 | PASS WITH FIXES — Description names real trigger scenarios and keywords are specific, but the skill has no explicit `triggers` array for label-based routing. |
|
|
13
|
+
| Relation quality | 5 | PASS — adjacent and boundary relations are concise and point at real sibling skills; no dangling targets. |
|
|
14
|
+
| Grounding fidelity | N/A | N/A — Dimension does not apply to this skill (scope: portable). |
|
|
15
|
+
| Content quality | 4 | PASS WITH FIXES — Coverage, Philosophy, and Verification sections are present and concrete, but "Do NOT Use When" boundaries are implicit rather than an explicit named section. |
|
|
16
|
+
| Eval quality | 4 | PASS WITH FIXES — Eval artifact ships with seven grounded prompts; boundary coverage is good, but failure-mode prompts are missing. |
|
|
17
|
+
| Portability quality | 5 | PASS — Skill is generic, portable, and the skill-md export via scripts/export-skill.js round-trips cleanly. |
|
|
18
|
+
|
|
19
|
+
> **Note:** Metadata validity is auto-scored from `skill-lint.js`. All other
|
|
20
|
+
> dimensions come from the `--graded` grader pass. See `verdict.md` for the
|
|
21
|
+
> per-dimension rationale and `findings.md` for the specific finding evidence.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Verdict
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`a11y`
|
|
6
|
+
|
|
7
|
+
## Audit Date
|
|
8
|
+
|
|
9
|
+
2026-04-17
|
|
10
|
+
|
|
11
|
+
## Audit Mode
|
|
12
|
+
|
|
13
|
+
`--graded` (grader: `node scripts/lib/mock-grader.js`)
|
|
14
|
+
|
|
15
|
+
## Final Verdict
|
|
16
|
+
|
|
17
|
+
PASS WITH FIXES
|
|
18
|
+
|
|
19
|
+
## Dimension Summary
|
|
20
|
+
|
|
21
|
+
| Dimension | Verdict | Score |
|
|
22
|
+
|---|---|---|
|
|
23
|
+
| Metadata validity | PASS | 5 |
|
|
24
|
+
| Activation quality | PASS WITH FIXES | 4 |
|
|
25
|
+
| Relation quality | PASS | 5 |
|
|
26
|
+
| Grounding fidelity | N/A | N/A |
|
|
27
|
+
| Content quality | PASS WITH FIXES | 4 |
|
|
28
|
+
| Eval quality | PASS WITH FIXES | 4 |
|
|
29
|
+
| Portability quality | PASS | 5 |
|
|
30
|
+
|
|
31
|
+
## Rationale
|
|
32
|
+
|
|
33
|
+
- **Metadata validity** (PASS, score 5): All thirteen required v2 frontmatter fields are present and well-typed; schema_version is 2.
|
|
34
|
+
- **Activation quality** (PASS WITH FIXES, score 4): Description names real trigger scenarios and keywords are specific, but the skill has no explicit `triggers` array for label-based routing.
|
|
35
|
+
- **Relation quality** (PASS, score 5): adjacent and boundary relations are concise and point at real sibling skills; no dangling targets.
|
|
36
|
+
- **Grounding fidelity** (N/A, score N/A): Dimension does not apply to this skill (scope: portable).
|
|
37
|
+
- **Content quality** (PASS WITH FIXES, score 4): Coverage, Philosophy, and Verification sections are present and concrete, but "Do NOT Use When" boundaries are implicit rather than an explicit named section.
|
|
38
|
+
- **Eval quality** (PASS WITH FIXES, score 4): Eval artifact ships with seven grounded prompts; boundary coverage is good, but failure-mode prompts are missing.
|
|
39
|
+
- **Portability quality** (PASS, score 5): Skill is generic, portable, and the skill-md export via scripts/export-skill.js round-trips cleanly.
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
## Follow-up State
|
|
43
|
+
|
|
44
|
+
TODO — set to one of: `No fixes required`, `Fixes applied`, `Fixes deferred — <reason>`, or `Pending human review`.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Findings
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`debugging`
|
|
6
|
+
|
|
7
|
+
## Audit Date
|
|
8
|
+
|
|
9
|
+
2026-04-17
|
|
10
|
+
|
|
11
|
+
## Verdict Summary
|
|
12
|
+
|
|
13
|
+
PASS WITH FIXES
|
|
14
|
+
|
|
15
|
+
## Findings
|
|
16
|
+
|
|
17
|
+
ID: F1
|
|
18
|
+
Severity: TODO
|
|
19
|
+
Surface: activation
|
|
20
|
+
Category: Activation quality — routing coverage
|
|
21
|
+
Problem: TODO — human judgment required
|
|
22
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
23
|
+
Required action: Does the description name real trigger scenarios? Are keywords specific and not generic filler? Does the skill under-trigger or over-trigger for its intended use case?
|
|
24
|
+
|
|
25
|
+
ID: F2
|
|
26
|
+
Severity: TODO
|
|
27
|
+
Surface: relations
|
|
28
|
+
Category: Relation quality — graph correctness
|
|
29
|
+
Problem: TODO — human judgment required
|
|
30
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
31
|
+
Required action: Do relations point at semantically correct neighbors? Are boundary handoffs crisp enough to prevent misuse? Are broader/narrower claims taxonomic rather than associative? Are dependencies real?
|
|
32
|
+
|
|
33
|
+
ID: F3
|
|
34
|
+
Severity: TODO
|
|
35
|
+
Surface: grounding
|
|
36
|
+
Category: Grounding quality — claims vs truth sources
|
|
37
|
+
Problem: TODO — human judgment required
|
|
38
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
39
|
+
Required action: If scope: codebase, do all truth_sources exist? Do claims in the body match the referenced files? Classify any mismatch as skill drift, code drift, or doc drift.
|
|
40
|
+
|
|
41
|
+
ID: F4
|
|
42
|
+
Severity: TODO
|
|
43
|
+
Surface: content
|
|
44
|
+
Category: Content quality — completeness and density
|
|
45
|
+
Problem: TODO — human judgment required
|
|
46
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
47
|
+
Required action: Does the skill have a clear Coverage section, a Philosophy section, at least one decision table or checklist, and explicit negative bounds (Do NOT Use When)? Does it contain generic filler that adds no routing signal?
|
|
48
|
+
|
|
49
|
+
ID: F5
|
|
50
|
+
Severity: TODO
|
|
51
|
+
Surface: evals
|
|
52
|
+
Category: Eval quality — coverage and realism
|
|
53
|
+
Problem: TODO — human judgment required
|
|
54
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
55
|
+
Required action: Do eval files exist if the skill is expected to be graded? Do they test realistic prompts — not trivia — and cover boundaries and failure cases as well as the happy path?
|
|
56
|
+
|
|
57
|
+
## Required Fixes
|
|
58
|
+
|
|
59
|
+
None identified by lint. See human-judgment finding blocks above for remaining review areas.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Scorecard
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`debugging`
|
|
6
|
+
|
|
7
|
+
## Dimensions
|
|
8
|
+
|
|
9
|
+
| Dimension | Score | Note |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| Metadata validity | 5 (auto: lint passes) |
|
|
12
|
+
| Activation quality | TODO | Human review required — see findings.md |
|
|
13
|
+
| Relation quality | TODO | Human review required — see findings.md |
|
|
14
|
+
| Grounding fidelity | N/A | `scope: portable` — grounding dimension does not apply |
|
|
15
|
+
| Content quality | TODO | Human review required — see findings.md |
|
|
16
|
+
| Eval quality | TODO | Human review required — see findings.md |
|
|
17
|
+
| Portability quality | TODO | Human review required — see findings.md |
|
|
18
|
+
|
|
19
|
+
> **Note:** This scorecard was generated by `node scripts/skill-audit.js`.
|
|
20
|
+
> Schema validity is auto-scored from `skill-lint.js` output.
|
|
21
|
+
> All other dimensions require human judgment. Replace each TODO with a
|
|
22
|
+
> 1–5 score and a short justification once the review is complete.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Verdict
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`debugging`
|
|
6
|
+
|
|
7
|
+
## Audit Date
|
|
8
|
+
|
|
9
|
+
2026-04-17
|
|
10
|
+
|
|
11
|
+
## Final Verdict
|
|
12
|
+
|
|
13
|
+
PASS WITH FIXES
|
|
14
|
+
|
|
15
|
+
## Rationale
|
|
16
|
+
|
|
17
|
+
The skill passes all deterministic lint checks (schema validity, naming convention, relation target existence, eval coherence). Human judgment is required to assess activation quality, relation semantics, grounding fidelity, content quality, eval realism, and portability. Update this verdict after completing the qualitative review sections in findings.md.
|
|
18
|
+
|
|
19
|
+
## Human Judgment Required
|
|
20
|
+
|
|
21
|
+
This is a stub verdict generated by `node scripts/skill-audit.js`. It reflects only the deterministic lint result.
|
|
22
|
+
A human auditor must review the following before this verdict is final:
|
|
23
|
+
|
|
24
|
+
- Activation quality (routing coverage, keyword specificity)
|
|
25
|
+
- Relation semantics (adjacency correctness, boundary crispness)
|
|
26
|
+
- Grounding fidelity (claims vs truth sources, when scope: codebase)
|
|
27
|
+
- Content quality (decision tables, Philosophy section, negative bounds)
|
|
28
|
+
- Eval quality (coverage, realism, boundary cases)
|
|
29
|
+
- Portability (no private assumptions leak through, export targets are real)
|
|
30
|
+
|
|
31
|
+
## Follow-up State
|
|
32
|
+
|
|
33
|
+
TODO — set to one of: `No fixes required`, `Fixes applied`, `Fixes deferred — <reason>`, or `Pending human review`.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Findings
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`documentation`
|
|
6
|
+
|
|
7
|
+
## Audit Date
|
|
8
|
+
|
|
9
|
+
2026-04-18
|
|
10
|
+
|
|
11
|
+
## Verdict Summary
|
|
12
|
+
|
|
13
|
+
PARTIAL
|
|
14
|
+
|
|
15
|
+
## Findings
|
|
16
|
+
|
|
17
|
+
ID: F1
|
|
18
|
+
Severity: TODO
|
|
19
|
+
Surface: activation
|
|
20
|
+
Category: Activation quality — routing coverage
|
|
21
|
+
Problem: TODO — human judgment required
|
|
22
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
23
|
+
Required action: Does the description name real trigger scenarios? Are keywords specific and not generic filler? Does the skill under-trigger or over-trigger for its intended use case?
|
|
24
|
+
|
|
25
|
+
ID: F2
|
|
26
|
+
Severity: TODO
|
|
27
|
+
Surface: relations
|
|
28
|
+
Category: Relation quality — graph correctness
|
|
29
|
+
Problem: TODO — human judgment required
|
|
30
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
31
|
+
Required action: Do relations point at semantically correct neighbors? Are boundary handoffs crisp enough to prevent misuse? Are broader/narrower claims taxonomic rather than associative? Are dependencies real?
|
|
32
|
+
|
|
33
|
+
ID: F3
|
|
34
|
+
Severity: TODO
|
|
35
|
+
Surface: grounding
|
|
36
|
+
Category: Grounding quality — claims vs truth sources
|
|
37
|
+
Problem: TODO — human judgment required
|
|
38
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
39
|
+
Required action: If scope: codebase, do all truth_sources exist? Do claims in the body match the referenced files? Classify any mismatch as skill drift, code drift, or doc drift.
|
|
40
|
+
|
|
41
|
+
ID: F4
|
|
42
|
+
Severity: TODO
|
|
43
|
+
Surface: content
|
|
44
|
+
Category: Content quality — completeness and density
|
|
45
|
+
Problem: TODO — human judgment required
|
|
46
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
47
|
+
Required action: Does the skill have a clear Coverage section, a Philosophy section, at least one decision table or checklist, and explicit negative bounds (Do NOT Use When)? Does it contain generic filler that adds no routing signal?
|
|
48
|
+
|
|
49
|
+
ID: F5
|
|
50
|
+
Severity: TODO
|
|
51
|
+
Surface: evals
|
|
52
|
+
Category: Eval quality — coverage and realism
|
|
53
|
+
Problem: TODO — human judgment required
|
|
54
|
+
Evidence: TODO — reviewer must inspect the skill body
|
|
55
|
+
Required action: Do eval files exist if the skill is expected to be graded? Do they test realistic prompts — not trivia — and cover boundaries and failure cases as well as the happy path?
|
|
56
|
+
|
|
57
|
+
## Required Fixes
|
|
58
|
+
|
|
59
|
+
None identified by lint. See human-judgment finding blocks above for remaining review areas.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Scorecard
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`documentation`
|
|
6
|
+
|
|
7
|
+
## Dimensions
|
|
8
|
+
|
|
9
|
+
| Dimension | Score | Note |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| Metadata validity | 5 | auto: lint passes clean |
|
|
12
|
+
| Activation quality | TODO | Human review required — see findings.md |
|
|
13
|
+
| Relation quality | TODO | Human review required — see findings.md |
|
|
14
|
+
| Grounding fidelity | N/A | `scope: portable` — grounding dimension does not apply |
|
|
15
|
+
| Content quality | TODO | Human review required — see findings.md |
|
|
16
|
+
| Eval quality | TODO | Human review required — see findings.md |
|
|
17
|
+
| Portability quality | TODO | Human review required — see findings.md |
|
|
18
|
+
|
|
19
|
+
> **Note:** This scorecard was generated by `node scripts/skill-audit.js`.
|
|
20
|
+
> Schema validity is auto-scored from `skill-lint.js` output.
|
|
21
|
+
> All other dimensions require human judgment. Replace each TODO with a
|
|
22
|
+
> 1–5 score and a short justification once the review is complete.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Verdict
|
|
2
|
+
|
|
3
|
+
## Skill
|
|
4
|
+
|
|
5
|
+
`documentation`
|
|
6
|
+
|
|
7
|
+
## Audit Date
|
|
8
|
+
|
|
9
|
+
2026-04-18
|
|
10
|
+
|
|
11
|
+
## Final Verdict
|
|
12
|
+
|
|
13
|
+
PARTIAL
|
|
14
|
+
|
|
15
|
+
## Rationale
|
|
16
|
+
|
|
17
|
+
The skill passes all deterministic lint checks (schema validity, naming convention, relation target existence, eval coherence). Human judgment is required to assess activation quality, relation semantics, grounding fidelity, content quality, eval realism, and portability. Update this verdict after completing the qualitative review sections in findings.md.
|
|
18
|
+
|
|
19
|
+
## Human Judgment Required
|
|
20
|
+
|
|
21
|
+
This is a stub verdict generated by `node scripts/skill-audit.js`. It reflects only the deterministic lint result.
|
|
22
|
+
A human auditor must review the following before this verdict is final:
|
|
23
|
+
|
|
24
|
+
- Activation quality (routing coverage, keyword specificity)
|
|
25
|
+
- Relation semantics (adjacency correctness, boundary crispness)
|
|
26
|
+
- Grounding fidelity (claims vs truth sources, when scope: codebase)
|
|
27
|
+
- Content quality (decision tables, Philosophy section, negative bounds)
|
|
28
|
+
- Eval quality (coverage, realism, boundary cases)
|
|
29
|
+
- Portability (no private assumptions leak through, export targets are real)
|
|
30
|
+
|
|
31
|
+
## Follow-up State
|
|
32
|
+
|
|
33
|
+
TODO — set to one of: `No fixes required`, `Fixes applied`, `Fixes deferred — <reason>`, or `Pending human review`.
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "a11y",
|
|
3
|
+
"subject": "Interaction accessibility for web UI: semantic primitive selection, keyboard access, focus management, labeling, change announcement, and user-preference respect",
|
|
4
|
+
"adjacent_concepts": ["documentation", "testing-strategy", "refactor"],
|
|
5
|
+
"grounding_note": "Truth sources cite line ranges in SKILL.md. The stability mechanism is the `checkEvalTruthSourceRanges` check in scripts/skill-lint.js (D2) — every edit that shifts a cited line out of file bounds or to a missing file fails lint before commit. Section-anchor citations are not used because Markdown renderers slugify headings inconsistently across consumers; line ranges give the grader an exact substring to ground against.",
|
|
6
|
+
"evals": [
|
|
7
|
+
{
|
|
8
|
+
"id": 1,
|
|
9
|
+
"prompt": "A designer wants a clickable element that triggers an action on the same page without navigating. The current code uses `<a href=\"#\">` with a click handler. According to the a11y skill's Primitive Selection table, what is the correct primitive and why is the current one wrong?",
|
|
10
|
+
"dimension": "definition",
|
|
11
|
+
"substance": "domain",
|
|
12
|
+
"calibration": "semantic",
|
|
13
|
+
"truth_mode": "code_verification",
|
|
14
|
+
"skill_type": "concept",
|
|
15
|
+
"criticality": "high",
|
|
16
|
+
"truth_sources": ["skills/a11y/SKILL.md:83-96"]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"id": 2,
|
|
20
|
+
"prompt": "The a11y skill says accessible interaction is \"structural, not cosmetic.\" Explain what that claim commits the author to before writing any ARIA, and why treating accessibility as a finishing pass costs teams twice.",
|
|
21
|
+
"dimension": "mental_model",
|
|
22
|
+
"substance": "domain",
|
|
23
|
+
"calibration": "semantic",
|
|
24
|
+
"truth_mode": "conceptual_correctness_plus_repo_application",
|
|
25
|
+
"skill_type": "concept",
|
|
26
|
+
"criticality": "normal",
|
|
27
|
+
"truth_sources": ["skills/a11y/SKILL.md:79-81"]
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"id": 3,
|
|
31
|
+
"prompt": "A reviewer is asked to evaluate whether a tutorial page uses clear section headings and reading-level-appropriate prose. Should they load the a11y skill or a different one? Cite the negative-routing rule that decides this.",
|
|
32
|
+
"dimension": "boundary",
|
|
33
|
+
"substance": "contradiction-check",
|
|
34
|
+
"calibration": "semantic",
|
|
35
|
+
"truth_mode": "code_verification",
|
|
36
|
+
"skill_type": "concept",
|
|
37
|
+
"criticality": "normal",
|
|
38
|
+
"truth_sources": ["skills/a11y/SKILL.md:114-119"]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": 4,
|
|
42
|
+
"prompt": "A custom tab strip is built with `<div role=\"tab\">` elements. According to the a11y skill, what additional work must ship with the `role` attribute for the ARIA to be appropriate rather than harmful, and when is ARIA the wrong answer entirely?",
|
|
43
|
+
"dimension": "application",
|
|
44
|
+
"substance": "domain",
|
|
45
|
+
"calibration": "process",
|
|
46
|
+
"truth_mode": "process_correctness",
|
|
47
|
+
"skill_type": "workflow",
|
|
48
|
+
"criticality": "high",
|
|
49
|
+
"truth_sources": ["skills/a11y/SKILL.md:83-100"]
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"id": 5,
|
|
53
|
+
"prompt": "The a11y Verification checklist lists \"User preferences (reduced motion, high contrast) are respected\" as a required item, alongside keyboard and label checks. Why are user-preference media queries treated as a verification gate rather than a progressive enhancement?",
|
|
54
|
+
"dimension": "purpose",
|
|
55
|
+
"substance": "domain",
|
|
56
|
+
"calibration": "semantic",
|
|
57
|
+
"truth_mode": "conceptual_correctness_plus_repo_application",
|
|
58
|
+
"skill_type": "concept",
|
|
59
|
+
"criticality": "normal",
|
|
60
|
+
"truth_sources": ["skills/a11y/SKILL.md:106-112", "skills/a11y/SKILL.md:70-77"]
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"id": 6,
|
|
64
|
+
"prompt": "A contributor claims that adding `role=\"button\"` to a `<div>` makes it accessible because screen readers will announce it as a button. What does the a11y skill say about this claim, and why is the ad-hoc approach worse than leaving the `<div>` untyped?",
|
|
65
|
+
"dimension": "application",
|
|
66
|
+
"substance": "contradiction-check",
|
|
67
|
+
"calibration": "semantic",
|
|
68
|
+
"truth_mode": "conceptual_correctness_plus_repo_application",
|
|
69
|
+
"skill_type": "concept",
|
|
70
|
+
"criticality": "critical",
|
|
71
|
+
"truth_sources": ["skills/a11y/SKILL.md:98-100", "skills/a11y/SKILL.md:83-96"]
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"id": 7,
|
|
75
|
+
"prompt": "An engineer asks the a11y skill to review a pull request that renames variables and extracts a function inside an already-accessible component, with no change to rendered output, focus order, or ARIA. Should the skill accept the task or refuse it? Cite the rule.",
|
|
76
|
+
"dimension": "boundary",
|
|
77
|
+
"substance": "contradiction-check",
|
|
78
|
+
"calibration": "semantic",
|
|
79
|
+
"truth_mode": "code_verification",
|
|
80
|
+
"skill_type": "concept",
|
|
81
|
+
"criticality": "normal",
|
|
82
|
+
"truth_sources": ["skills/a11y/SKILL.md:114-119"]
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"id": 8,
|
|
86
|
+
"prompt": "A modal dialog closes when the user presses Escape. The current implementation removes the modal from the DOM but leaves focus wherever it happened to be when the modal opened. According to the a11y skill's Coverage statement on focus management, where should focus land after the modal closes, and why does \"correctly placed after navigation and state changes\" specifically cover this case?",
|
|
87
|
+
"dimension": "application",
|
|
88
|
+
"substance": "domain",
|
|
89
|
+
"calibration": "process",
|
|
90
|
+
"truth_mode": "process_correctness",
|
|
91
|
+
"skill_type": "workflow",
|
|
92
|
+
"criticality": "high",
|
|
93
|
+
"truth_sources": ["skills/a11y/SKILL.md:70-77"]
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"id": 9,
|
|
97
|
+
"prompt": "An async form submission shows a spinner while the request is in flight, then silently swaps the spinner for a success toast in a visual-only layer. A screen-reader user receives no signal that the form submitted successfully. Which Primitive Selection row covers this case, and what is the correct implementation?",
|
|
98
|
+
"dimension": "application",
|
|
99
|
+
"substance": "domain",
|
|
100
|
+
"calibration": "process",
|
|
101
|
+
"truth_mode": "process_correctness",
|
|
102
|
+
"skill_type": "workflow",
|
|
103
|
+
"criticality": "high",
|
|
104
|
+
"truth_sources": ["skills/a11y/SKILL.md:83-96"]
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
"id": 10,
|
|
108
|
+
"prompt": "A form input has no visible label; the placeholder text `\"Enter your email\"` is used as the label substitute. The a11y skill's Primitive Selection table explicitly flags `placeholder` as a wrong primitive for labeling. Explain why placeholder-as-label fails the skill's labeling criterion and what the correct pattern is.",
|
|
109
|
+
"dimension": "definition",
|
|
110
|
+
"substance": "domain",
|
|
111
|
+
"calibration": "semantic",
|
|
112
|
+
"truth_mode": "code_verification",
|
|
113
|
+
"skill_type": "concept",
|
|
114
|
+
"criticality": "normal",
|
|
115
|
+
"truth_sources": ["skills/a11y/SKILL.md:83-96", "skills/a11y/SKILL.md:70-77"]
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
"id": 11,
|
|
119
|
+
"prompt": "A custom dropdown is implemented as a styled `<div>` that opens on click. When it is open, the items inside are focusable but pressing the Down arrow key does nothing, Enter does not select the focused item, and Escape does not close the dropdown. No screen reader is involved in this bug report — the complaint is purely about keyboard operability. According to the a11y skill's Coverage statement on keyboard access, what is wrong with this component and what specific keyboard behaviors must ship before it is considered accessible?",
|
|
120
|
+
"dimension": "application",
|
|
121
|
+
"substance": "domain",
|
|
122
|
+
"calibration": "process",
|
|
123
|
+
"truth_mode": "process_correctness",
|
|
124
|
+
"skill_type": "workflow",
|
|
125
|
+
"criticality": "high",
|
|
126
|
+
"truth_sources": ["skills/a11y/SKILL.md:70-77", "skills/a11y/SKILL.md:98-100"]
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"id": 12,
|
|
130
|
+
"prompt": "A product designer asks the a11y skill to recommend brand palette colors that meet WCAG AA contrast while looking premium. According to the a11y skill's `description` field and `anti_examples`, should the skill take this task? Cite the specific anti_example entry that decides the boundary and name the skill family that actually owns color-palette creation.",
|
|
131
|
+
"dimension": "boundary",
|
|
132
|
+
"substance": "contradiction-check",
|
|
133
|
+
"calibration": "semantic",
|
|
134
|
+
"truth_mode": "code_verification",
|
|
135
|
+
"skill_type": "concept",
|
|
136
|
+
"criticality": "normal",
|
|
137
|
+
"truth_sources": ["skills/a11y/SKILL.md:52-55"]
|
|
138
|
+
}
|
|
139
|
+
]
|
|
140
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "api-design",
|
|
3
|
+
"subject": "API surface design for resources, actions, schemas, status codes, pagination, idempotency, versioning, auth, and error envelopes",
|
|
4
|
+
"adjacent_concepts": ["system-interface-contracts", "event-contract-design", "data-modeling", "webhook-integration"],
|
|
5
|
+
"grounding_note": "Truth sources cite the whole SKILL.md file to keep the initial eval surface stable while routing boundaries are tightened.",
|
|
6
|
+
"evals": [
|
|
7
|
+
{
|
|
8
|
+
"id": 1,
|
|
9
|
+
"prompt": "A team needs an endpoint for listing orders with filters, pagination, stable ordering, auth boundaries, and predictable errors. Which skill owns the API contract shape?",
|
|
10
|
+
"dimension": "application",
|
|
11
|
+
"substance": "domain",
|
|
12
|
+
"calibration": "process",
|
|
13
|
+
"truth_mode": "process_correctness",
|
|
14
|
+
"skill_type": "concept",
|
|
15
|
+
"criticality": "high",
|
|
16
|
+
"truth_sources": ["skills/api-design/SKILL.md"]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"id": 2,
|
|
20
|
+
"prompt": "A create endpoint can be retried by clients after timeouts. According to api-design, what must the contract decide before implementation?",
|
|
21
|
+
"dimension": "application",
|
|
22
|
+
"substance": "domain",
|
|
23
|
+
"calibration": "semantic",
|
|
24
|
+
"truth_mode": "conceptual_correctness_plus_repo_application",
|
|
25
|
+
"skill_type": "concept",
|
|
26
|
+
"criticality": "high",
|
|
27
|
+
"truth_sources": ["skills/api-design/SKILL.md"]
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"id": 3,
|
|
31
|
+
"prompt": "The task is to define topic names, event envelopes, replay behavior, and compatibility rules for asynchronous messages. Should api-design accept or hand off?",
|
|
32
|
+
"dimension": "boundary",
|
|
33
|
+
"substance": "contradiction-check",
|
|
34
|
+
"calibration": "semantic",
|
|
35
|
+
"truth_mode": "code_verification",
|
|
36
|
+
"skill_type": "concept",
|
|
37
|
+
"criticality": "normal",
|
|
38
|
+
"truth_sources": ["skills/api-design/SKILL.md"]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": 4,
|
|
42
|
+
"prompt": "The task is to design database tables, foreign keys, and uniqueness constraints before any endpoint exists. Which boundary should api-design respect?",
|
|
43
|
+
"dimension": "boundary",
|
|
44
|
+
"substance": "contradiction-check",
|
|
45
|
+
"calibration": "semantic",
|
|
46
|
+
"truth_mode": "code_verification",
|
|
47
|
+
"skill_type": "concept",
|
|
48
|
+
"criticality": "normal",
|
|
49
|
+
"truth_sources": ["skills/api-design/SKILL.md"]
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "code-review",
|
|
3
|
+
"subject": "Reviewing diffs and pull requests for correctness, clarity, security, performance, tests, and project conventions",
|
|
4
|
+
"adjacent_concepts": ["testing-strategy", "refactor", "debugging", "owasp-security"],
|
|
5
|
+
"grounding_note": "Truth sources cite the whole SKILL.md file to keep the initial eval surface stable while routing boundaries are tightened.",
|
|
6
|
+
"evals": [
|
|
7
|
+
{
|
|
8
|
+
"id": 1,
|
|
9
|
+
"prompt": "A teammate opens a PR that changes an endpoint, tests, and call sites. Which skill owns the review pass and what read order should it use?",
|
|
10
|
+
"dimension": "application",
|
|
11
|
+
"substance": "domain",
|
|
12
|
+
"calibration": "process",
|
|
13
|
+
"truth_mode": "process_correctness",
|
|
14
|
+
"skill_type": "workflow",
|
|
15
|
+
"criticality": "high",
|
|
16
|
+
"truth_sources": ["skills/code-review/SKILL.md"]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"id": 2,
|
|
20
|
+
"prompt": "An AI-generated diff looks plausible but changes auth behavior. According to code-review, what should the reviewer refuse to do?",
|
|
21
|
+
"dimension": "application",
|
|
22
|
+
"substance": "domain",
|
|
23
|
+
"calibration": "semantic",
|
|
24
|
+
"truth_mode": "conceptual_correctness_plus_repo_application",
|
|
25
|
+
"skill_type": "workflow",
|
|
26
|
+
"criticality": "high",
|
|
27
|
+
"truth_sources": ["skills/code-review/SKILL.md"]
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"id": 3,
|
|
31
|
+
"prompt": "Production users are already seeing a known failure and there is no proposed diff yet. Should code-review accept the task?",
|
|
32
|
+
"dimension": "boundary",
|
|
33
|
+
"substance": "contradiction-check",
|
|
34
|
+
"calibration": "semantic",
|
|
35
|
+
"truth_mode": "code_verification",
|
|
36
|
+
"skill_type": "concept",
|
|
37
|
+
"criticality": "normal",
|
|
38
|
+
"truth_sources": ["skills/code-review/SKILL.md"]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": 4,
|
|
42
|
+
"prompt": "The user asks for a security-only audit focused on SQL injection and XSS. Which boundary should code-review respect?",
|
|
43
|
+
"dimension": "boundary",
|
|
44
|
+
"substance": "contradiction-check",
|
|
45
|
+
"calibration": "semantic",
|
|
46
|
+
"truth_mode": "code_verification",
|
|
47
|
+
"skill_type": "concept",
|
|
48
|
+
"criticality": "normal",
|
|
49
|
+
"truth_sources": ["skills/code-review/SKILL.md"]
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "data-modeling",
|
|
3
|
+
"subject": "Persistence model design for entities, relationships, keys, constraints, normalization, denormalization, and views",
|
|
4
|
+
"adjacent_concepts": ["conceptual-modeling", "database-migration", "api-design", "system-interface-contracts"],
|
|
5
|
+
"grounding_note": "Truth sources cite the whole SKILL.md file to keep the initial eval surface stable while routing boundaries are tightened.",
|
|
6
|
+
"evals": [
|
|
7
|
+
{
|
|
8
|
+
"id": 1,
|
|
9
|
+
"prompt": "A conceptual domain model is settled and the team now needs tables, identifiers, uniqueness constraints, and relationship shape. Which skill owns the next step?",
|
|
10
|
+
"dimension": "application",
|
|
11
|
+
"substance": "domain",
|
|
12
|
+
"calibration": "process",
|
|
13
|
+
"truth_mode": "process_correctness",
|
|
14
|
+
"skill_type": "concept",
|
|
15
|
+
"criticality": "high",
|
|
16
|
+
"truth_sources": ["skills/data-modeling/SKILL.md"]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"id": 2,
|
|
20
|
+
"prompt": "A reporting workflow might need normalized tables, denormalized projections, or materialized views. What should data-modeling evaluate?",
|
|
21
|
+
"dimension": "application",
|
|
22
|
+
"substance": "domain",
|
|
23
|
+
"calibration": "semantic",
|
|
24
|
+
"truth_mode": "conceptual_correctness_plus_repo_application",
|
|
25
|
+
"skill_type": "concept",
|
|
26
|
+
"criticality": "high",
|
|
27
|
+
"truth_sources": ["skills/data-modeling/SKILL.md"]
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"id": 3,
|
|
31
|
+
"prompt": "The task is to write the migration rollout sequence and avoid table locks in production. Should data-modeling continue or hand off?",
|
|
32
|
+
"dimension": "boundary",
|
|
33
|
+
"substance": "contradiction-check",
|
|
34
|
+
"calibration": "semantic",
|
|
35
|
+
"truth_mode": "code_verification",
|
|
36
|
+
"skill_type": "concept",
|
|
37
|
+
"criticality": "normal",
|
|
38
|
+
"truth_sources": ["skills/data-modeling/SKILL.md"]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": 4,
|
|
42
|
+
"prompt": "The task is to design request and response JSON for a route handler. Which boundary should data-modeling respect?",
|
|
43
|
+
"dimension": "boundary",
|
|
44
|
+
"substance": "contradiction-check",
|
|
45
|
+
"calibration": "semantic",
|
|
46
|
+
"truth_mode": "code_verification",
|
|
47
|
+
"skill_type": "concept",
|
|
48
|
+
"criticality": "normal",
|
|
49
|
+
"truth_sources": ["skills/data-modeling/SKILL.md"]
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|