@skill-graph/cli 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +247 -0
- package/LICENSE +200 -0
- package/NOTICE +62 -0
- package/README.md +398 -0
- package/SKILL_GRAPH.md +443 -0
- package/bin/skill-graph.js +374 -0
- package/docs/ADOPTION.md +117 -0
- package/docs/CONFORMANCE.md +66 -0
- package/docs/PRIMER.md +384 -0
- package/docs/QUICKSTART-30MIN.md +333 -0
- package/docs/ROUTING-METRICS.md +120 -0
- package/docs/SKILL-MD-FORMAT-COMPATIBILITY.md +127 -0
- package/docs/SKILL_AUDIT_CHECKLIST.md +199 -0
- package/docs/SKILL_AUDIT_LOOP.md +195 -0
- package/docs/SKILL_METADATA_PROTOCOL.md +609 -0
- package/docs/_archived/marketplace-publication-priority-2026-05-18.md +239 -0
- package/docs/adr/0001-predicate-set.md +69 -0
- package/docs/adr/0002-json-ld-context.md +82 -0
- package/docs/adr/0003-ontoclean-rigidity-tags.md +65 -0
- package/docs/adr/0004-persistent-identifiers.md +74 -0
- package/docs/adr/0005-freshness-consolidation.md +70 -0
- package/docs/adr/0006-revise-predicate-rename.md +105 -0
- package/docs/adr/0007-audit-loop-cadence.md +99 -0
- package/docs/adr/0008-skill-surface-split-and-curation-policy.md +93 -0
- package/docs/category-consumers.md +168 -0
- package/docs/concept-map.md +194 -0
- package/docs/diagrams/drift-states.mmd +21 -0
- package/docs/diagrams/manifest-pipeline.mmd +25 -0
- package/docs/diagrams/routing-harness.mmd +41 -0
- package/docs/diagrams/starter-graph.mmd +53 -0
- package/docs/field-decision-guide.md +315 -0
- package/docs/field-rationale.md +211 -0
- package/docs/field-reference.generated.md +624 -0
- package/docs/field-reference.md +1426 -0
- package/docs/glossary.md +190 -0
- package/docs/head-noun-glossary.md +63 -0
- package/docs/images/audit-phases.png +0 -0
- package/docs/images/drift-states.png +0 -0
- package/docs/images/graded-mode.png +0 -0
- package/docs/images/manifest-pipeline.png +0 -0
- package/docs/images/routing-harness.png +0 -0
- package/docs/images/skill-anatomy.png +0 -0
- package/docs/images/starter-graph.png +0 -0
- package/docs/images/system-model.png +0 -0
- package/docs/integrations/github-actions.md +155 -0
- package/docs/manifest-field-mapping.md +443 -0
- package/docs/marketplace-publication-queue.generated.md +240 -0
- package/docs/marketplace-release-agent-prompt.md +82 -0
- package/docs/marketplace-skill-candidate-list.md +272 -0
- package/docs/marketplace-syndication.md +222 -0
- package/docs/migration-sample-review.md +155 -0
- package/docs/migrations/v4-to-v5.md +168 -0
- package/docs/migrations/v5-to-v6.md +221 -0
- package/docs/name-exceptions.yaml +37 -0
- package/docs/plans/marketplace-p1-public-migration-plan.md +41 -0
- package/docs/plans/multi-root-workspace.md +148 -0
- package/docs/plans/scripts-roadmap.md +107 -0
- package/docs/plans/v4-schema-bump.md +160 -0
- package/docs/plans/wave-2-extraction.md +122 -0
- package/docs/positioning-vs-marketplaces.md +175 -0
- package/docs/proposals/skill-audit-loop-positioning.md +160 -0
- package/docs/quality-doctrine.md +138 -0
- package/docs/recommended-skills.md +150 -0
- package/docs/research/skill-comprehension-eval-research.md +1830 -0
- package/docs/research/skill-retrieval-evidence.md +66 -0
- package/docs/skill-metadata-protocol.md +471 -0
- package/docs/skills-sh-maintainer-cleanup-request.md +80 -0
- package/examples/audits/a11y/findings.md +52 -0
- package/examples/audits/a11y/scorecard.md +21 -0
- package/examples/audits/a11y/verdict.md +44 -0
- package/examples/audits/debugging/findings.md +59 -0
- package/examples/audits/debugging/scorecard.md +22 -0
- package/examples/audits/debugging/verdict.md +33 -0
- package/examples/audits/documentation/findings.md +59 -0
- package/examples/audits/documentation/scorecard.md +22 -0
- package/examples/audits/documentation/verdict.md +33 -0
- package/examples/evals/a11y.json +140 -0
- package/examples/evals/api-design.json +52 -0
- package/examples/evals/code-review.json +52 -0
- package/examples/evals/data-modeling.json +52 -0
- package/examples/evals/database-migration.json +52 -0
- package/examples/evals/debugging.json +118 -0
- package/examples/evals/dependency-architecture.json +52 -0
- package/examples/evals/design-system-architecture.json +52 -0
- package/examples/evals/error-tracking.json +52 -0
- package/examples/evals/event-contract-design.json +52 -0
- package/examples/evals/form-ux-architecture.json +52 -0
- package/examples/evals/framework-fit-analysis.json +52 -0
- package/examples/evals/graph-audit.json +139 -0
- package/examples/evals/information-architecture.json +52 -0
- package/examples/evals/interaction-feedback.json +52 -0
- package/examples/evals/interaction-patterns.json +52 -0
- package/examples/evals/layout-composition.json +52 -0
- package/examples/evals/lint-overlay.json +117 -0
- package/examples/evals/microcopy.json +52 -0
- package/examples/evals/observability-modeling.json +52 -0
- package/examples/evals/pattern-recognition.json +96 -0
- package/examples/evals/performance-engineering.json +52 -0
- package/examples/evals/refactor.json +128 -0
- package/examples/evals/semiotics.json +52 -0
- package/examples/evals/skill-infrastructure.json +96 -0
- package/examples/evals/skill-router.json +140 -0
- package/examples/evals/skill-router.routing.json +113 -0
- package/examples/evals/system-interface-contracts.json +52 -0
- package/examples/evals/task-analysis.json +52 -0
- package/examples/evals/testing-strategy.json +118 -0
- package/examples/evals/type-safety.json +249 -0
- package/examples/evals/visual-design-foundations.json +52 -0
- package/examples/evals/webhook-integration.json +52 -0
- package/examples/exports/a11y.skill-md.md +80 -0
- package/examples/exports/debugging.skill-md.md +80 -0
- package/examples/exports/refactor.skill-md.md +78 -0
- package/examples/exports/testing-strategy.skill-md.md +81 -0
- package/examples/projects/markdown-static-site/README.md +115 -0
- package/examples/projects/markdown-static-site/skills/content-source-router/SKILL.md +131 -0
- package/examples/projects/markdown-static-site/skills/image-optimization-pipeline-config/SKILL.md +132 -0
- package/examples/projects/markdown-static-site/skills/link-rot-detection/SKILL.md +103 -0
- package/examples/projects/markdown-static-site/skills/markdown-post-frontmatter-validation/SKILL.md +133 -0
- package/examples/projects/markdown-static-site/skills/migrate-posts-to-v2-frontmatter/SKILL.md +140 -0
- package/examples/projects/saas-stripe-postgres/README.md +208 -0
- package/examples/projects/saas-stripe-postgres/db/migrations/0004_canonicalize_orders.sql +37 -0
- package/examples/projects/saas-stripe-postgres/db/schema.sql +112 -0
- package/examples/projects/saas-stripe-postgres/skills/migrate-orders-to-canonical-schema/SKILL.md +149 -0
- package/examples/projects/saas-stripe-postgres/skills/nextjs-server-action-validation/SKILL.md +154 -0
- package/examples/projects/saas-stripe-postgres/skills/payment-provider-router/SKILL.md +153 -0
- package/examples/projects/saas-stripe-postgres/skills/postgres-rls-pattern/SKILL.md +163 -0
- package/examples/projects/saas-stripe-postgres/skills/stripe-webhook-signature-verification/SKILL.md +137 -0
- package/examples/protocol/skill-metadata-template.md +301 -0
- package/examples/protocol/skills.manifest.sample.json +13245 -0
- package/examples/skill-metadata-template.md +317 -0
- package/examples/skills.manifest.sample.json +13519 -0
- package/examples/tests/v3-1-skos-fixture/SKILL.md +93 -0
- package/marketplace/README.md +17 -0
- package/marketplace/skills/a11y/SKILL.md +66 -0
- package/marketplace/skills/acid-fundamentals/SKILL.md +106 -0
- package/marketplace/skills/agent-engineering/SKILL.md +386 -0
- package/marketplace/skills/agent-eval-design/SKILL.md +55 -0
- package/marketplace/skills/ai-native-development/SKILL.md +294 -0
- package/marketplace/skills/api-design/SKILL.md +60 -0
- package/marketplace/skills/architecture-decision-records/SKILL.md +55 -0
- package/marketplace/skills/background-jobs/SKILL.md +265 -0
- package/marketplace/skills/bounded-context-mapping/SKILL.md +55 -0
- package/marketplace/skills/cap-theorem-tradeoffs/SKILL.md +127 -0
- package/marketplace/skills/client-server-boundary/SKILL.md +187 -0
- package/marketplace/skills/code-review/SKILL.md +120 -0
- package/marketplace/skills/color-system-design/SKILL.md +43 -0
- package/marketplace/skills/component-architecture/SKILL.md +126 -0
- package/marketplace/skills/compression/SKILL.md +112 -0
- package/marketplace/skills/conceptual-modeling/SKILL.md +181 -0
- package/marketplace/skills/connection-pooling/SKILL.md +105 -0
- package/marketplace/skills/constraint-awareness/SKILL.md +287 -0
- package/marketplace/skills/content-monitor/SKILL.md +209 -0
- package/marketplace/skills/context-engineering/SKILL.md +320 -0
- package/marketplace/skills/context-graph/SKILL.md +174 -0
- package/marketplace/skills/context-management/SKILL.md +174 -0
- package/marketplace/skills/context-window/SKILL.md +239 -0
- package/marketplace/skills/contract-testing/SKILL.md +120 -0
- package/marketplace/skills/cron-scheduling/SKILL.md +223 -0
- package/marketplace/skills/dark-mode-implementation/SKILL.md +47 -0
- package/marketplace/skills/data-modeling/SKILL.md +59 -0
- package/marketplace/skills/data-modeling-fundamentals/SKILL.md +117 -0
- package/marketplace/skills/database-migration/SKILL.md +429 -0
- package/marketplace/skills/debugging/SKILL.md +67 -0
- package/marketplace/skills/dependency-architecture/SKILL.md +58 -0
- package/marketplace/skills/design-module-composition/SKILL.md +43 -0
- package/marketplace/skills/design-system-architecture/SKILL.md +61 -0
- package/marketplace/skills/design-thinking/SKILL.md +44 -0
- package/marketplace/skills/diagnosis/SKILL.md +296 -0
- package/marketplace/skills/diff-analysis/SKILL.md +188 -0
- package/marketplace/skills/e2e-test-design/SKILL.md +113 -0
- package/marketplace/skills/entity-relationship-modeling/SKILL.md +218 -0
- package/marketplace/skills/epistemic-grounding/SKILL.md +112 -0
- package/marketplace/skills/error-boundary/SKILL.md +235 -0
- package/marketplace/skills/error-tracking/SKILL.md +261 -0
- package/marketplace/skills/eval-driven-development/SKILL.md +147 -0
- package/marketplace/skills/evaluation/SKILL.md +113 -0
- package/marketplace/skills/event-contract-design/SKILL.md +60 -0
- package/marketplace/skills/event-storming/SKILL.md +56 -0
- package/marketplace/skills/form-ux-architecture/SKILL.md +60 -0
- package/marketplace/skills/framework-fit-analysis/SKILL.md +59 -0
- package/marketplace/skills/frontend-architecture/SKILL.md +43 -0
- package/marketplace/skills/generative-ui/SKILL.md +118 -0
- package/marketplace/skills/graph-audit/SKILL.md +81 -0
- package/marketplace/skills/guardrails/SKILL.md +118 -0
- package/marketplace/skills/hooks-patterns/SKILL.md +185 -0
- package/marketplace/skills/http-semantics/SKILL.md +136 -0
- package/marketplace/skills/ideation/SKILL.md +41 -0
- package/marketplace/skills/indexing-strategy/SKILL.md +108 -0
- package/marketplace/skills/information-architecture/SKILL.md +59 -0
- package/marketplace/skills/integration-test-design/SKILL.md +111 -0
- package/marketplace/skills/intent-recognition/SKILL.md +136 -0
- package/marketplace/skills/interaction-feedback/SKILL.md +59 -0
- package/marketplace/skills/interaction-patterns/SKILL.md +59 -0
- package/marketplace/skills/journey-mapping/SKILL.md +41 -0
- package/marketplace/skills/keywords/SKILL.md +213 -0
- package/marketplace/skills/knowledge-modeling/SKILL.md +232 -0
- package/marketplace/skills/layout-composition/SKILL.md +59 -0
- package/marketplace/skills/linguistics/SKILL.md +429 -0
- package/marketplace/skills/lint-overlay/SKILL.md +76 -0
- package/marketplace/skills/mental-models/SKILL.md +126 -0
- package/marketplace/skills/merge-queue/SKILL.md +94 -0
- package/marketplace/skills/methodology/SKILL.md +317 -0
- package/marketplace/skills/microcopy/SKILL.md +232 -0
- package/marketplace/skills/middleware-patterns/SKILL.md +363 -0
- package/marketplace/skills/mobile-responsive-ux/SKILL.md +287 -0
- package/marketplace/skills/mutation-testing/SKILL.md +112 -0
- package/marketplace/skills/naming-conventions/SKILL.md +112 -0
- package/marketplace/skills/observability-modeling/SKILL.md +59 -0
- package/marketplace/skills/ontology-modeling/SKILL.md +67 -0
- package/marketplace/skills/owasp-security/SKILL.md +153 -0
- package/marketplace/skills/pattern-recognition/SKILL.md +472 -0
- package/marketplace/skills/performance-budgets/SKILL.md +185 -0
- package/marketplace/skills/performance-engineering/SKILL.md +58 -0
- package/marketplace/skills/performance-testing/SKILL.md +125 -0
- package/marketplace/skills/printify/SKILL.md +42 -0
- package/marketplace/skills/prioritization/SKILL.md +118 -0
- package/marketplace/skills/problem-framing/SKILL.md +41 -0
- package/marketplace/skills/problem-locating-solving/SKILL.md +203 -0
- package/marketplace/skills/project-knowledge-extraction/SKILL.md +54 -0
- package/marketplace/skills/prompt-craft/SKILL.md +134 -0
- package/marketplace/skills/prompt-injection-defense/SKILL.md +132 -0
- package/marketplace/skills/property-based-testing/SKILL.md +100 -0
- package/marketplace/skills/prototyping/SKILL.md +43 -0
- package/marketplace/skills/query-optimization/SKILL.md +144 -0
- package/marketplace/skills/real-time-updates/SKILL.md +324 -0
- package/marketplace/skills/ref-patterns/SKILL.md +284 -0
- package/marketplace/skills/refactor/SKILL.md +65 -0
- package/marketplace/skills/rendering-models/SKILL.md +142 -0
- package/marketplace/skills/replication-patterns/SKILL.md +110 -0
- package/marketplace/skills/research-synthesis/SKILL.md +41 -0
- package/marketplace/skills/route-handler-design/SKILL.md +347 -0
- package/marketplace/skills/schema-evolution/SKILL.md +140 -0
- package/marketplace/skills/security-fundamentals/SKILL.md +139 -0
- package/marketplace/skills/semantic-center/SKILL.md +194 -0
- package/marketplace/skills/semantic-relations/SKILL.md +250 -0
- package/marketplace/skills/semantics/SKILL.md +366 -0
- package/marketplace/skills/semiotics/SKILL.md +230 -0
- package/marketplace/skills/seo-strategy/SKILL.md +260 -0
- package/marketplace/skills/server-actions-design/SKILL.md +243 -0
- package/marketplace/skills/server-components-design/SKILL.md +190 -0
- package/marketplace/skills/sharding-strategy/SKILL.md +123 -0
- package/marketplace/skills/shopify/SKILL.md +42 -0
- package/marketplace/skills/skill-infrastructure/SKILL.md +320 -0
- package/marketplace/skills/skill-router/SKILL.md +71 -0
- package/marketplace/skills/skill-scaffold/SKILL.md +105 -0
- package/marketplace/skills/snapshot-testing/SKILL.md +120 -0
- package/marketplace/skills/spec-driven-development/SKILL.md +148 -0
- package/marketplace/skills/state-machine-modeling/SKILL.md +56 -0
- package/marketplace/skills/state-management/SKILL.md +134 -0
- package/marketplace/skills/streaming-architecture/SKILL.md +194 -0
- package/marketplace/skills/summarization/SKILL.md +156 -0
- package/marketplace/skills/suspense-patterns/SKILL.md +265 -0
- package/marketplace/skills/system-interface-contracts/SKILL.md +59 -0
- package/marketplace/skills/task-analysis/SKILL.md +201 -0
- package/marketplace/skills/taxonomy-design/SKILL.md +66 -0
- package/marketplace/skills/test-coverage-strategy/SKILL.md +108 -0
- package/marketplace/skills/test-doubles-design/SKILL.md +98 -0
- package/marketplace/skills/test-driven-development/SKILL.md +96 -0
- package/marketplace/skills/testing-strategy/SKILL.md +67 -0
- package/marketplace/skills/theme-system-design/SKILL.md +43 -0
- package/marketplace/skills/tool-call-flow/SKILL.md +229 -0
- package/marketplace/skills/tool-call-strategy/SKILL.md +292 -0
- package/marketplace/skills/transaction-isolation/SKILL.md +98 -0
- package/marketplace/skills/type-safety/SKILL.md +177 -0
- package/marketplace/skills/typography-system/SKILL.md +43 -0
- package/marketplace/skills/usability-testing/SKILL.md +43 -0
- package/marketplace/skills/user-research/SKILL.md +43 -0
- package/marketplace/skills/vercel-composition-patterns/SKILL.md +157 -0
- package/marketplace/skills/version-control/SKILL.md +233 -0
- package/marketplace/skills/visual-design-foundations/SKILL.md +59 -0
- package/marketplace/skills/visual-hierarchy/SKILL.md +43 -0
- package/marketplace/skills/webhook-integration/SKILL.md +331 -0
- package/marketplace/skills/writing-humanizer/SKILL.md +380 -0
- package/package.json +67 -0
- package/schemas/manifest.schema.json +811 -0
- package/schemas/manifest.v2.schema.json +164 -0
- package/schemas/manifest.v3.schema.json +758 -0
- package/schemas/manifest.v4.schema.json +755 -0
- package/schemas/manifest.v5.schema.json +755 -0
- package/schemas/manifest.v6.schema.json +811 -0
- package/schemas/skill.context.jsonld +279 -0
- package/schemas/skill.schema.json +919 -0
- package/schemas/skill.v2.schema.json +201 -0
- package/schemas/skill.v3.schema.json +827 -0
- package/schemas/skill.v4.schema.json +822 -0
- package/schemas/skill.v5.schema.json +830 -0
- package/schemas/skill.v6.schema.json +946 -0
- package/schemas/vocabulary/keywords.json +180 -0
- package/schemas/vocabulary/workspace_tags.json +23 -0
- package/scripts/__tests__/migrate-skill-v2-to-v3.test.js +161 -0
- package/scripts/__tests__/migrate-skill-v3-to-v4.test.js +158 -0
- package/scripts/__tests__/test-export-parser-drift.js +149 -0
- package/scripts/__tests__/test-marketplace-export.js +114 -0
- package/scripts/__tests__/test-router-paths.js +82 -0
- package/scripts/__tests__/test-stability-promotion.js +244 -0
- package/scripts/__tests__/test-v3-1-alias-contract.js +109 -0
- package/scripts/__tests__/test-v3-1-skos-runtime.js +116 -0
- package/scripts/backfill-schema-version.js +198 -0
- package/scripts/build-field-reference.js +160 -0
- package/scripts/build-retrieval-baseline.js +511 -0
- package/scripts/check-markdown-links.js +211 -0
- package/scripts/check-protocol-consistency.js +979 -0
- package/scripts/export-marketplace-skills.js +610 -0
- package/scripts/export-skill.js +374 -0
- package/scripts/generate-manifest.js +787 -0
- package/scripts/lib/alias-contract.js +83 -0
- package/scripts/lib/audit-prompt-builder.js +771 -0
- package/scripts/lib/mock-grader.js +134 -0
- package/scripts/lib/parse-frontmatter.js +429 -0
- package/scripts/lib/roots.js +119 -0
- package/scripts/lint/check-archetype-sections.js +185 -0
- package/scripts/lint/check-category-enum.js +83 -0
- package/scripts/lint/check-routing-eval.js +146 -0
- package/scripts/lint/check-routing-quality.js +211 -0
- package/scripts/lint/check-stability-promotion.js +220 -0
- package/scripts/lint/format-code-frame.js +206 -0
- package/scripts/marketplace-install.js +125 -0
- package/scripts/migrate-category-to-enum.js +169 -0
- package/scripts/migrate-skill-v2-to-v3.js +424 -0
- package/scripts/migrate-skill-v3-to-v4.js +200 -0
- package/scripts/migrate-skill-v5-to-v6.js +304 -0
- package/scripts/restructure-by-category.js +85 -0
- package/scripts/seed-publication-classification.js +282 -0
- package/scripts/skill-audit.js +893 -0
- package/scripts/skill-graph-drift.js +483 -0
- package/scripts/skill-graph-route.js +766 -0
- package/scripts/skill-graph-routing-eval.js +393 -0
- package/scripts/skill-lint.js +1317 -0
- package/scripts/skill-overlap.js +213 -0
- package/scripts/verify-skill-md-export.js +201 -0
|
@@ -0,0 +1,771 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* audit-prompt-builder.js — per-dimension prompt composition for skill-audit.js --graded.
|
|
5
|
+
*
|
|
6
|
+
* Builds the context and prompts for the seven scorecard dimensions defined in
|
|
7
|
+
* SKILL_AUDIT_CHECKLIST.md. The audit runner calls an external
|
|
8
|
+
* model CLI (e.g. `claude -p`) for each dimension, collects the structured
|
|
9
|
+
* verdicts, and merges them into findings.md / verdict.md / scorecard.md.
|
|
10
|
+
*
|
|
11
|
+
* This file is self-contained. It only uses Node built-ins and does not depend
|
|
12
|
+
* on any specific model provider — the grader CLI is resolved by the caller.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const { parseFrontmatter } = require('./parse-frontmatter');
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Dimension registry
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* The seven dimensions mirror the scorecard rows in
|
|
25
|
+
* SKILL_AUDIT_CHECKLIST.md § Standard Artifact Structure, so the
|
|
26
|
+
* grader output slots directly into the existing scorecard.md shape.
|
|
27
|
+
*
|
|
28
|
+
* `checklistAnchor` is the section heading (without the leading "### ") in
|
|
29
|
+
* the single-skill audit checklist that defines the pass criteria for the
|
|
30
|
+
* dimension. The prompt builder extracts the bullet list under that heading
|
|
31
|
+
* and injects it as the "pass criteria" block.
|
|
32
|
+
*
|
|
33
|
+
* `appliesWhen` is an optional predicate that takes the parsed frontmatter
|
|
34
|
+
* and returns true/false. If it returns false, the dimension is graded as
|
|
35
|
+
* N/A and no model call is made (e.g. grounding for scope: portable).
|
|
36
|
+
*/
|
|
37
|
+
const DIMENSIONS = [
|
|
38
|
+
{
|
|
39
|
+
id: 'metadata',
|
|
40
|
+
label: 'Metadata validity',
|
|
41
|
+
checklistAnchor: '1. Frontmatter validity',
|
|
42
|
+
appliesWhen: () => true,
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
id: 'activation',
|
|
46
|
+
label: 'Activation quality',
|
|
47
|
+
checklistAnchor: '2. Activation quality',
|
|
48
|
+
appliesWhen: () => true,
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
id: 'relation',
|
|
52
|
+
label: 'Relation quality',
|
|
53
|
+
checklistAnchor: '3. Relation quality',
|
|
54
|
+
appliesWhen: () => true,
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
id: 'grounding',
|
|
58
|
+
label: 'Grounding fidelity',
|
|
59
|
+
checklistAnchor: '4. Grounding quality',
|
|
60
|
+
appliesWhen: (fm) => fm && fm.scope === 'codebase',
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
id: 'content',
|
|
64
|
+
label: 'Content quality',
|
|
65
|
+
checklistAnchor: '5. Content quality',
|
|
66
|
+
appliesWhen: () => true,
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
id: 'eval',
|
|
70
|
+
label: 'Eval quality',
|
|
71
|
+
checklistAnchor: '6. Eval quality',
|
|
72
|
+
appliesWhen: () => true,
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
id: 'portability',
|
|
76
|
+
label: 'Portability quality',
|
|
77
|
+
checklistAnchor: '7. Portability quality',
|
|
78
|
+
appliesWhen: () => true,
|
|
79
|
+
},
|
|
80
|
+
];
|
|
81
|
+
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
// Context collection
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
const DEFAULT_TRUTH_SOURCE_CHAR_LIMIT = 6000;
|
|
87
|
+
const DEFAULT_EVAL_ARTIFACT_CHAR_LIMIT = 12000;
|
|
88
|
+
const DEFAULT_SCHEMA_CHAR_LIMIT = 20000;
|
|
89
|
+
const DEFAULT_NEIGHBOR_CHAR_LIMIT = 800;
|
|
90
|
+
const EVAL_ARTIFACTS_DIR_REL = path.join('examples', 'evals');
|
|
91
|
+
const SCHEMA_REL = path.join('schemas', 'skill.schema.json');
|
|
92
|
+
const SKILLS_DIR_REL = 'skills';
|
|
93
|
+
const EXPORT_SCRIPT_REL = path.join('scripts', 'export-skill.js');
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Read the skill, its truth sources, its eval artifacts, and the checklist.
|
|
97
|
+
* Returns the payload the prompt builder needs. Reads are bounded so a single
|
|
98
|
+
* massive file does not explode the prompt budget.
|
|
99
|
+
*
|
|
100
|
+
* Eval artifact discovery mirrors the lint contract in `scripts/skill-lint.js`
|
|
101
|
+
* (checkEvalCoherence): scan `<repoRoot>/examples/evals/*.json` and collect
|
|
102
|
+
* every file whose parsed JSON has `skill_name === frontmatter.name`. Only
|
|
103
|
+
* runs when `frontmatter.eval_artifacts === 'present'` — `planned` / `none` /
|
|
104
|
+
* missing frontmatter all produce an empty `evalArtifacts` array.
|
|
105
|
+
*
|
|
106
|
+
* @param {object} opts
|
|
107
|
+
* @param {string} opts.skillDir Absolute path to the skill directory.
|
|
108
|
+
* @param {string} opts.repoRoot Absolute path to the repo root.
|
|
109
|
+
* @param {number} [opts.truthSourceCharLimit] Per-file character cap for truth sources.
|
|
110
|
+
* @param {number} [opts.evalArtifactCharLimit] Per-file character cap for eval artifacts.
|
|
111
|
+
* @returns {{
|
|
112
|
+
* skillName: string,
|
|
113
|
+
* skillBody: string,
|
|
114
|
+
* frontmatter: object|null,
|
|
115
|
+
* truthSources: Array<{ path: string, content: string, truncated: boolean }>,
|
|
116
|
+
* evalArtifacts: Array<{ path: string, content: string, truncated: boolean }>,
|
|
117
|
+
* checklist: string,
|
|
118
|
+
* }}
|
|
119
|
+
*/
|
|
120
|
+
function collectContext(opts) {
|
|
121
|
+
const {
|
|
122
|
+
skillDir,
|
|
123
|
+
repoRoot,
|
|
124
|
+
truthSourceCharLimit = DEFAULT_TRUTH_SOURCE_CHAR_LIMIT,
|
|
125
|
+
evalArtifactCharLimit = DEFAULT_EVAL_ARTIFACT_CHAR_LIMIT,
|
|
126
|
+
} = opts;
|
|
127
|
+
|
|
128
|
+
const skillFile = path.join(skillDir, 'SKILL.md');
|
|
129
|
+
const skillBody = fs.readFileSync(skillFile, 'utf8');
|
|
130
|
+
const frontmatter = parseFrontmatter(skillBody);
|
|
131
|
+
|
|
132
|
+
const truthSources = [];
|
|
133
|
+
const declared = (frontmatter && frontmatter.grounding && Array.isArray(frontmatter.grounding.truth_sources))
|
|
134
|
+
? frontmatter.grounding.truth_sources
|
|
135
|
+
: [];
|
|
136
|
+
|
|
137
|
+
for (const relPath of declared) {
|
|
138
|
+
const abs = path.resolve(repoRoot, String(relPath));
|
|
139
|
+
if (!fs.existsSync(abs)) {
|
|
140
|
+
truthSources.push({ path: relPath, content: '[file not found — grounding drift]', truncated: false });
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
const raw = fs.readFileSync(abs, 'utf8');
|
|
144
|
+
const truncated = raw.length > truthSourceCharLimit;
|
|
145
|
+
const content = truncated ? raw.slice(0, truthSourceCharLimit) + '\n\n[…truncated]' : raw;
|
|
146
|
+
truthSources.push({ path: relPath, content, truncated });
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const evalArtifacts = collectEvalArtifacts({
|
|
150
|
+
frontmatter,
|
|
151
|
+
repoRoot,
|
|
152
|
+
charLimit: evalArtifactCharLimit,
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
// E1: active schema. Embedded on the `metadata` dimension so the grader can
|
|
156
|
+
// cross-check every required field, enum, and conditional rule without
|
|
157
|
+
// relying on recall. Truncated defensively — current schema is ~12KB.
|
|
158
|
+
const schemaContent = readFileBounded(
|
|
159
|
+
path.join(repoRoot, SCHEMA_REL),
|
|
160
|
+
DEFAULT_SCHEMA_CHAR_LIMIT
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
// E2: neighbor skill summaries. Every skill referenced in
|
|
164
|
+
// `relations.*` becomes a short
|
|
165
|
+
// summary block {name, type, scope, description} so the `relation` dimension
|
|
166
|
+
// grader can judge whether the linkage targets a semantically correct peer
|
|
167
|
+
// (not merely whether the name exists — that's already a lint check).
|
|
168
|
+
const neighborSummaries = collectNeighborSummaries({
|
|
169
|
+
frontmatter,
|
|
170
|
+
repoRoot,
|
|
171
|
+
charLimit: DEFAULT_NEIGHBOR_CHAR_LIMIT,
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
// E4: portability export transform. The sole supported target is
|
|
175
|
+
// `skill-md` and the transform lives at scripts/export-skill.js.
|
|
176
|
+
// We pass a boolean so the `portability` dimension can note whether the
|
|
177
|
+
// transform ships — the grader uses this to judge "export targets are
|
|
178
|
+
// realistic" concretely rather than speculatively.
|
|
179
|
+
const exportTransformAvailable = fs.existsSync(path.join(repoRoot, EXPORT_SCRIPT_REL));
|
|
180
|
+
|
|
181
|
+
const checklistPath = path.join(repoRoot, 'SKILL_AUDIT_CHECKLIST.md');
|
|
182
|
+
const checklist = fs.readFileSync(checklistPath, 'utf8');
|
|
183
|
+
|
|
184
|
+
const skillName = path.basename(skillDir);
|
|
185
|
+
return {
|
|
186
|
+
skillName,
|
|
187
|
+
skillBody,
|
|
188
|
+
frontmatter,
|
|
189
|
+
truthSources,
|
|
190
|
+
evalArtifacts,
|
|
191
|
+
schemaContent,
|
|
192
|
+
neighborSummaries,
|
|
193
|
+
exportTransformAvailable,
|
|
194
|
+
checklist,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Read a file and truncate it to `charLimit` characters. Returns `null` if the
|
|
200
|
+
* file does not exist. Used for optional context blocks that should fail
|
|
201
|
+
* quietly — the grader prompt then emits an explicit "absent" marker instead.
|
|
202
|
+
*/
|
|
203
|
+
function readFileBounded(absPath, charLimit) {
|
|
204
|
+
if (!fs.existsSync(absPath)) return null;
|
|
205
|
+
try {
|
|
206
|
+
const raw = fs.readFileSync(absPath, 'utf8');
|
|
207
|
+
return raw.length > charLimit ? raw.slice(0, charLimit) + '\n\n[…truncated]' : raw;
|
|
208
|
+
} catch (_) {
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Collect summary blocks for every sibling skill referenced in this skill's
|
|
215
|
+
* relations. Each summary is `{name, type, scope, description}` — enough to
|
|
216
|
+
* judge semantic neighborhood without embedding full sibling SKILL.md bodies.
|
|
217
|
+
*
|
|
218
|
+
* Supports v3 polymorphic relation items: `boundary`, `disjoint_with`, and
|
|
219
|
+
* `depends_on` may be bare strings or `{skill, reason}` / `{skill, min_version}` objects.
|
|
220
|
+
* The target name is extracted from both shapes identically to
|
|
221
|
+
* `scripts/skill-lint.js#checkRelationTargets`.
|
|
222
|
+
*
|
|
223
|
+
* Silent on missing peers — the lint check already fails on dangling relation
|
|
224
|
+
* targets, so the grader does not need to duplicate that error class.
|
|
225
|
+
*/
|
|
226
|
+
function collectNeighborSummaries({ frontmatter, repoRoot, charLimit }) {
|
|
227
|
+
if (!frontmatter || !frontmatter.relations) return [];
|
|
228
|
+
|
|
229
|
+
const rels = frontmatter.relations;
|
|
230
|
+
const kinds = [
|
|
231
|
+
'adjacent',
|
|
232
|
+
'related',
|
|
233
|
+
'broader',
|
|
234
|
+
'narrower',
|
|
235
|
+
'boundary',
|
|
236
|
+
'disjoint_with',
|
|
237
|
+
'verify_with',
|
|
238
|
+
'depends_on',
|
|
239
|
+
];
|
|
240
|
+
const targetsByKind = new Map();
|
|
241
|
+
const allTargets = new Set();
|
|
242
|
+
|
|
243
|
+
for (const kind of kinds) {
|
|
244
|
+
const list = Array.isArray(rels[kind]) ? rels[kind] : [];
|
|
245
|
+
const names = [];
|
|
246
|
+
for (const item of list) {
|
|
247
|
+
let name = null;
|
|
248
|
+
if (typeof item === 'string') name = item;
|
|
249
|
+
else if (item && typeof item === 'object' && typeof item.skill === 'string') name = item.skill;
|
|
250
|
+
if (name && name !== frontmatter.name) {
|
|
251
|
+
names.push(name);
|
|
252
|
+
allTargets.add(name);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
if (names.length > 0) targetsByKind.set(kind, names);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (allTargets.size === 0) return [];
|
|
259
|
+
|
|
260
|
+
const skillsRoot = path.join(repoRoot, SKILLS_DIR_REL);
|
|
261
|
+
if (!fs.existsSync(skillsRoot)) return [];
|
|
262
|
+
|
|
263
|
+
const out = [];
|
|
264
|
+
for (const name of Array.from(allTargets).sort()) {
|
|
265
|
+
const skillMd = path.join(skillsRoot, name, 'SKILL.md');
|
|
266
|
+
if (!fs.existsSync(skillMd)) continue;
|
|
267
|
+
let body;
|
|
268
|
+
try {
|
|
269
|
+
body = fs.readFileSync(skillMd, 'utf8');
|
|
270
|
+
} catch (_) {
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
const fm = parseFrontmatter(body);
|
|
274
|
+
if (!fm || !fm.name) continue;
|
|
275
|
+
|
|
276
|
+
const description = typeof fm.description === 'string'
|
|
277
|
+
? (fm.description.length > charLimit ? fm.description.slice(0, charLimit) + '…' : fm.description)
|
|
278
|
+
: '';
|
|
279
|
+
|
|
280
|
+
// Discover which of the caller's relation kinds reference this neighbor.
|
|
281
|
+
const relatedVia = [];
|
|
282
|
+
for (const [kind, names] of targetsByKind) {
|
|
283
|
+
if (names.includes(name)) relatedVia.push(kind);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
out.push({
|
|
287
|
+
name: fm.name,
|
|
288
|
+
type: fm.type || null,
|
|
289
|
+
scope: fm.scope || null,
|
|
290
|
+
description,
|
|
291
|
+
relatedVia, // ["related"], ["boundary", "verify_with"], etc.
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return out;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Discover and read the eval artifacts associated with a skill.
|
|
300
|
+
*
|
|
301
|
+
* Contract: a skill is associated with every `examples/evals/*.json` file whose
|
|
302
|
+
* parsed JSON has `skill_name === frontmatter.name`. This matches the lint
|
|
303
|
+
* check in `scripts/skill-lint.js#checkEvalCoherence`, so authoring / linting /
|
|
304
|
+
* grading all agree on what "the eval artifact for this skill" means.
|
|
305
|
+
*
|
|
306
|
+
* Only runs when `frontmatter.eval_artifacts === 'present'`. Every other value
|
|
307
|
+
* (including absent frontmatter) returns an empty array so prompts stay lean
|
|
308
|
+
* for skills that have not shipped an eval.
|
|
309
|
+
*
|
|
310
|
+
* Malformed JSON files are skipped silently — they surface as a lint error
|
|
311
|
+
* elsewhere and should not break the grader run.
|
|
312
|
+
*
|
|
313
|
+
* @param {object} args
|
|
314
|
+
* @param {object|null} args.frontmatter Parsed frontmatter from the skill.
|
|
315
|
+
* @param {string} args.repoRoot Absolute repo root.
|
|
316
|
+
* @param {number} args.charLimit Per-file character cap.
|
|
317
|
+
* @returns {Array<{ path: string, content: string, truncated: boolean }>}
|
|
318
|
+
*/
|
|
319
|
+
function collectEvalArtifacts({ frontmatter, repoRoot, charLimit }) {
|
|
320
|
+
if (!frontmatter || frontmatter.eval_artifacts !== 'present' || !frontmatter.name) return [];
|
|
321
|
+
|
|
322
|
+
const evalsDir = path.join(repoRoot, EVAL_ARTIFACTS_DIR_REL);
|
|
323
|
+
if (!fs.existsSync(evalsDir)) return [];
|
|
324
|
+
|
|
325
|
+
const out = [];
|
|
326
|
+
let files;
|
|
327
|
+
try {
|
|
328
|
+
files = fs.readdirSync(evalsDir).filter(f => f.endsWith('.json')).sort();
|
|
329
|
+
} catch (_) {
|
|
330
|
+
return [];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
for (const fileName of files) {
|
|
334
|
+
const abs = path.join(evalsDir, fileName);
|
|
335
|
+
let raw;
|
|
336
|
+
try {
|
|
337
|
+
raw = fs.readFileSync(abs, 'utf8');
|
|
338
|
+
} catch (_) {
|
|
339
|
+
continue;
|
|
340
|
+
}
|
|
341
|
+
let parsed;
|
|
342
|
+
try {
|
|
343
|
+
parsed = JSON.parse(raw);
|
|
344
|
+
} catch (_) {
|
|
345
|
+
continue; // malformed eval files surface as lint errors, not grader breakage
|
|
346
|
+
}
|
|
347
|
+
if (!parsed || parsed.skill_name !== frontmatter.name) continue;
|
|
348
|
+
|
|
349
|
+
const truncated = raw.length > charLimit;
|
|
350
|
+
const content = truncated ? raw.slice(0, charLimit) + '\n\n[…truncated]' : raw;
|
|
351
|
+
const relPath = path.posix.join(EVAL_ARTIFACTS_DIR_REL.split(path.sep).join('/'), fileName);
|
|
352
|
+
out.push({ path: relPath, content, truncated });
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
return out;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// ---------------------------------------------------------------------------
|
|
359
|
+
// Checklist slicing
|
|
360
|
+
// ---------------------------------------------------------------------------
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Extract the checklist bullet list under a given H3 anchor.
|
|
364
|
+
*
|
|
365
|
+
* The checklist file formats each dimension as:
|
|
366
|
+
*
|
|
367
|
+
* ### 2. Activation quality
|
|
368
|
+
*
|
|
369
|
+
* - [ ] description names real trigger scenarios
|
|
370
|
+
* - [ ] keywords are not empty for routable skills
|
|
371
|
+
* ...
|
|
372
|
+
*
|
|
373
|
+
* ### 3. Relation quality
|
|
374
|
+
*
|
|
375
|
+
* This function returns the bullets under the matching anchor, stopping at
|
|
376
|
+
* the next H2 or H3.
|
|
377
|
+
*
|
|
378
|
+
* @param {string} checklist Full checklist markdown.
|
|
379
|
+
* @param {string} anchor Section title without the leading "### ".
|
|
380
|
+
* @returns {string} The bullet block, or an empty string if anchor not found.
|
|
381
|
+
*/
|
|
382
|
+
function sliceChecklist(checklist, anchor) {
|
|
383
|
+
const lines = checklist.split('\n');
|
|
384
|
+
const startPattern = new RegExp(`^###\\s+${escapeRegex(anchor)}\\s*$`);
|
|
385
|
+
let start = -1;
|
|
386
|
+
for (let i = 0; i < lines.length; i++) {
|
|
387
|
+
if (startPattern.test(lines[i])) { start = i + 1; break; }
|
|
388
|
+
}
|
|
389
|
+
if (start === -1) return '';
|
|
390
|
+
|
|
391
|
+
const out = [];
|
|
392
|
+
for (let i = start; i < lines.length; i++) {
|
|
393
|
+
if (/^##\s+/.test(lines[i])) break;
|
|
394
|
+
if (/^###\s+/.test(lines[i])) break;
|
|
395
|
+
out.push(lines[i]);
|
|
396
|
+
}
|
|
397
|
+
return out.join('\n').trim();
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
function escapeRegex(s) {
|
|
401
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// ---------------------------------------------------------------------------
|
|
405
|
+
// Prompt composition
|
|
406
|
+
// ---------------------------------------------------------------------------
|
|
407
|
+
|
|
408
|
+
const VERDICT_ENUM = ['PASS', 'PASS WITH FIXES', 'PARTIAL', 'FAIL', 'N/A'];
|
|
409
|
+
|
|
410
|
+
// v0.5.0: align with the evaluation-doctrine principle that a task is not
|
|
411
|
+
// "Done" until per-dimension scores are >= 4. This constant is the min-pass
|
|
412
|
+
// threshold on the 1–5 dimension score. Exposed for overrides via
|
|
413
|
+
// `skill-audit.js --min-pass-score <n>`.
|
|
414
|
+
const MIN_PASS_SCORE = 4;
|
|
415
|
+
const SEVERITY_ENUM = ['P0', 'P1', 'P2', 'P3', 'P4'];
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Compose the grader prompt for a single dimension. Follows the
|
|
419
|
+
* IDENTITY → STEPS → INPUT → OUTPUT structure from skills/prompt-craft.
|
|
420
|
+
*
|
|
421
|
+
* The prompt is evidence-first and forces a single <verdict>...</verdict>
|
|
422
|
+
* JSON block with a fixed schema so the parser is deterministic.
|
|
423
|
+
*
|
|
424
|
+
* @param {object} opts
|
|
425
|
+
* @param {object} opts.dimension One element of DIMENSIONS.
|
|
426
|
+
* @param {object} opts.context Output of collectContext().
|
|
427
|
+
* @returns {string} The full prompt text.
|
|
428
|
+
*/
|
|
429
|
+
function buildDimensionPrompt(opts) {
|
|
430
|
+
const { dimension, context } = opts;
|
|
431
|
+
const {
|
|
432
|
+
skillName,
|
|
433
|
+
skillBody,
|
|
434
|
+
truthSources,
|
|
435
|
+
evalArtifacts,
|
|
436
|
+
schemaContent,
|
|
437
|
+
neighborSummaries,
|
|
438
|
+
exportTransformAvailable,
|
|
439
|
+
checklist,
|
|
440
|
+
} = context;
|
|
441
|
+
const criteria = sliceChecklist(checklist, dimension.checklistAnchor) || '[checklist anchor not found]';
|
|
442
|
+
|
|
443
|
+
const truthBlock = truthSources.length === 0
|
|
444
|
+
? '(no truth_sources declared in frontmatter — grounding block is absent or empty)'
|
|
445
|
+
: truthSources.map(ts => [
|
|
446
|
+
`<truth-source path="${ts.path}"${ts.truncated ? ' truncated="true"' : ''}>`,
|
|
447
|
+
ts.content.trim(),
|
|
448
|
+
'</truth-source>',
|
|
449
|
+
].join('\n')).join('\n\n');
|
|
450
|
+
|
|
451
|
+
// Eval artifacts are embedded only for the `eval` dimension. Other dimensions
|
|
452
|
+
// do not need them and including them everywhere would inflate every prompt.
|
|
453
|
+
// When a skill declares `eval_artifacts: present` but no matching file is
|
|
454
|
+
// found on disk, we still emit the section with an explicit missing marker
|
|
455
|
+
// so the grader can flag the drift rather than silently assume absence.
|
|
456
|
+
const includeEvalBlock = dimension.id === 'eval';
|
|
457
|
+
const evalArtifactsArr = Array.isArray(evalArtifacts) ? evalArtifacts : [];
|
|
458
|
+
const evalBlock = !includeEvalBlock
|
|
459
|
+
? null
|
|
460
|
+
: (evalArtifactsArr.length === 0
|
|
461
|
+
? '(no eval artifact shipped for this skill — frontmatter.eval_artifacts is not `present` or no file in examples/evals/ matches skill_name)'
|
|
462
|
+
: evalArtifactsArr.map(ea => [
|
|
463
|
+
`<eval-artifact path="${ea.path}"${ea.truncated ? ' truncated="true"' : ''}>`,
|
|
464
|
+
ea.content.trim(),
|
|
465
|
+
'</eval-artifact>',
|
|
466
|
+
].join('\n')).join('\n\n'));
|
|
467
|
+
|
|
468
|
+
// E1: active schema. Embedded only on the `metadata` dimension — other
|
|
469
|
+
// dimensions don't need to re-verify field definitions and embedding 12KB
|
|
470
|
+
// of schema on every call would waste tokens.
|
|
471
|
+
const includeSchemaBlock = dimension.id === 'metadata';
|
|
472
|
+
const schemaBlock = !includeSchemaBlock
|
|
473
|
+
? null
|
|
474
|
+
: (schemaContent
|
|
475
|
+
? `<schema path="${SCHEMA_REL}">\n${schemaContent.trim()}\n</schema>`
|
|
476
|
+
: `<schema path="${SCHEMA_REL}">(schema file not found at this path — grader should flag this as infrastructure drift)</schema>`);
|
|
477
|
+
|
|
478
|
+
// E2: neighbor summaries. Embedded only on the `relation` dimension so the
|
|
479
|
+
// grader can judge semantic adjacency against actual peer metadata rather
|
|
480
|
+
// than recall. Empty when the skill has no relations.
|
|
481
|
+
const includeNeighborBlock = dimension.id === 'relation';
|
|
482
|
+
const neighbors = Array.isArray(neighborSummaries) ? neighborSummaries : [];
|
|
483
|
+
const neighborBlock = !includeNeighborBlock
|
|
484
|
+
? null
|
|
485
|
+
: (neighbors.length === 0
|
|
486
|
+
? '(this skill declares no relations — nothing to cross-check)'
|
|
487
|
+
: neighbors.map(n => [
|
|
488
|
+
`<neighbor name="${n.name}" type="${n.type || 'unknown'}" scope="${n.scope || 'unknown'}" related-via="${(n.relatedVia || []).join(',')}">`,
|
|
489
|
+
n.description,
|
|
490
|
+
'</neighbor>',
|
|
491
|
+
].join('\n')).join('\n\n'));
|
|
492
|
+
|
|
493
|
+
// E4: export transform reference. Embedded only on the `portability`
|
|
494
|
+
// dimension. The sole supported target is `skill-md` and the transform
|
|
495
|
+
// lives at scripts/export-skill.js. Stating whether the script exists
|
|
496
|
+
// converts the `readiness: scripted` claim from self-report to verifiable.
|
|
497
|
+
const includePortabilityBlock = dimension.id === 'portability';
|
|
498
|
+
const portabilityBlock = !includePortabilityBlock
|
|
499
|
+
? null
|
|
500
|
+
: (exportTransformAvailable
|
|
501
|
+
? `<export-transform path="${EXPORT_SCRIPT_REL}" available="true">\nThe export transform exists on disk. Run \`node ${EXPORT_SCRIPT_REL} skills/${skillName}\` to produce a SKILL.skill-md.md with only SKILL.md base fields at the top level. Only \`skill-md\` is a valid portability.targets value today; other runtimes (cursor, windsurf, copilot, agents-md) are deferred per v0.3.0 CHANGELOG.\n</export-transform>`
|
|
502
|
+
: `<export-transform path="${EXPORT_SCRIPT_REL}" available="false">\nThe export transform script is missing from the repo. A skill declaring \`portability.readiness: scripted\` while the transform is absent is over-claiming — flag this as a contract violation.\n</export-transform>`);
|
|
503
|
+
|
|
504
|
+
// STEPS are composed dynamically per dimension so only the context sources
|
|
505
|
+
// the grader actually has are referenced. This keeps the step count honest
|
|
506
|
+
// (no "read the schema" when no schema block is present) and the numbering
|
|
507
|
+
// contiguous — important for LLMs that interpret step numbers literally.
|
|
508
|
+
const steps = [
|
|
509
|
+
`1. Read the SKILL.md body for the skill named \`${skillName}\`.`,
|
|
510
|
+
'2. Read the truth_source files listed in the skill\'s frontmatter (if any).',
|
|
511
|
+
];
|
|
512
|
+
let n = 3;
|
|
513
|
+
if (includeSchemaBlock) {
|
|
514
|
+
steps.push(`${n++}. Read the embedded <schema> — this is the active Skill Graph JSON Schema that every field must conform to.`);
|
|
515
|
+
}
|
|
516
|
+
if (includeNeighborBlock) {
|
|
517
|
+
steps.push(`${n++}. Read the <neighbor-skills> summaries — each is a sibling skill this one links to via relations. Judge whether the linkage is semantically correct.`);
|
|
518
|
+
}
|
|
519
|
+
if (includeEvalBlock) {
|
|
520
|
+
steps.push(`${n++}. Read the eval artifacts embedded in <eval-artifacts> — these are the authored evaluation cases for this skill.`);
|
|
521
|
+
}
|
|
522
|
+
if (includePortabilityBlock) {
|
|
523
|
+
steps.push(`${n++}. Read the <export-transform> note — it states whether the SKILL.md export script actually ships and how to invoke it.`);
|
|
524
|
+
}
|
|
525
|
+
steps.push(`${n++}. Read the pass criteria for dimension "${dimension.label}".`);
|
|
526
|
+
steps.push(`${n++}. For each checklist bullet, mark PASS, PASS WITH FIXES, or FAIL with a quoted evidence snippet.`);
|
|
527
|
+
steps.push(`${n++}. Aggregate into one dimension verdict and a 1–5 score (5 = state of the art, 1 = broken).`);
|
|
528
|
+
steps.push(`${n++}. Produce a finding row for every checklist bullet that is not a full PASS.`);
|
|
529
|
+
|
|
530
|
+
// Evidence-sources clause reflects whichever blocks are embedded — the
|
|
531
|
+
// grader is constrained to cite only what it can see.
|
|
532
|
+
const evidenceParts = ['the skill', 'a truth source'];
|
|
533
|
+
if (includeSchemaBlock) evidenceParts.push('the schema');
|
|
534
|
+
if (includeNeighborBlock) evidenceParts.push('a neighbor summary');
|
|
535
|
+
if (includeEvalBlock) evidenceParts.push('an eval artifact');
|
|
536
|
+
if (includePortabilityBlock) evidenceParts.push('the export-transform note');
|
|
537
|
+
const evidenceSources = evidenceParts.length === 2
|
|
538
|
+
? evidenceParts.join(' or ')
|
|
539
|
+
: evidenceParts.slice(0, -1).join(', ') + ', or ' + evidenceParts[evidenceParts.length - 1];
|
|
540
|
+
|
|
541
|
+
const inputSections = [
|
|
542
|
+
`<skill-name>${skillName}</skill-name>`,
|
|
543
|
+
'',
|
|
544
|
+
'<skill-body>',
|
|
545
|
+
skillBody.trim(),
|
|
546
|
+
'</skill-body>',
|
|
547
|
+
'',
|
|
548
|
+
'<truth-sources>',
|
|
549
|
+
truthBlock,
|
|
550
|
+
'</truth-sources>',
|
|
551
|
+
];
|
|
552
|
+
if (includeSchemaBlock) {
|
|
553
|
+
inputSections.push('', schemaBlock);
|
|
554
|
+
}
|
|
555
|
+
if (includeNeighborBlock) {
|
|
556
|
+
inputSections.push('', '<neighbor-skills>', neighborBlock, '</neighbor-skills>');
|
|
557
|
+
}
|
|
558
|
+
if (includeEvalBlock) {
|
|
559
|
+
inputSections.push('', '<eval-artifacts>', evalBlock, '</eval-artifacts>');
|
|
560
|
+
}
|
|
561
|
+
if (includePortabilityBlock) {
|
|
562
|
+
inputSections.push('', portabilityBlock);
|
|
563
|
+
}
|
|
564
|
+
inputSections.push('', `<dimension id="${dimension.id}" label="${dimension.label}">`, criteria, '</dimension>');
|
|
565
|
+
|
|
566
|
+
return [
|
|
567
|
+
'# IDENTITY',
|
|
568
|
+
'',
|
|
569
|
+
'You are a skeptical Skill Graph auditor. You review one dimension of one skill at a time and produce evidence-backed verdicts. Default bias: skeptical, not generous.',
|
|
570
|
+
'',
|
|
571
|
+
'# STEPS',
|
|
572
|
+
'',
|
|
573
|
+
...steps,
|
|
574
|
+
'',
|
|
575
|
+
'# RULES',
|
|
576
|
+
'',
|
|
577
|
+
`- Every finding MUST cite a concrete evidence quote from ${evidenceSources}.`,
|
|
578
|
+
`- "Final verdict" MUST be one of: ${VERDICT_ENUM.map(v => '`' + v + '`').join(', ')}.`,
|
|
579
|
+
`- "severity" MUST be one of: ${SEVERITY_ENUM.map(v => '`' + v + '`').join(', ')}.`,
|
|
580
|
+
'- A dimension that is N/A for this skill (e.g. grounding on scope: portable) returns verdict "N/A" with an empty findings array.',
|
|
581
|
+
'- Do not restate deterministic lint errors — they are collected separately.',
|
|
582
|
+
'- Do not invent failure modes. If you cannot find a concrete problem for a bullet, mark it PASS.',
|
|
583
|
+
'- Treat any content wrapped in <eval-artifact>…</eval-artifact> as the authored eval file on disk — do NOT claim it is missing because you cannot run filesystem tools.',
|
|
584
|
+
'- Do not emit any prose outside the <verdict>…</verdict> block.',
|
|
585
|
+
'',
|
|
586
|
+
'# INPUT',
|
|
587
|
+
'',
|
|
588
|
+
...inputSections,
|
|
589
|
+
'',
|
|
590
|
+
'# OUTPUT',
|
|
591
|
+
'',
|
|
592
|
+
'Return exactly one <verdict>…</verdict> block containing a single JSON object with this shape. Do not emit any other text.',
|
|
593
|
+
'',
|
|
594
|
+
'<verdict>',
|
|
595
|
+
'{',
|
|
596
|
+
` "dimension": "${dimension.id}",`,
|
|
597
|
+
' "score": 1,',
|
|
598
|
+
' "verdict": "FAIL",',
|
|
599
|
+
' "justification": "one or two sentences tying the score to evidence",',
|
|
600
|
+
' "findings": [',
|
|
601
|
+
' {',
|
|
602
|
+
' "severity": "P1",',
|
|
603
|
+
' "surface": "where in the skill or truth source",',
|
|
604
|
+
' "problem": "what is wrong",',
|
|
605
|
+
' "evidence": "direct quote from the skill or a truth source",',
|
|
606
|
+
' "required_action": "specific, actionable fix"',
|
|
607
|
+
' }',
|
|
608
|
+
' ]',
|
|
609
|
+
'}',
|
|
610
|
+
'</verdict>',
|
|
611
|
+
'',
|
|
612
|
+
].join('\n');
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// ---------------------------------------------------------------------------
|
|
616
|
+
// Response parsing
|
|
617
|
+
// ---------------------------------------------------------------------------
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* Extract and validate a dimension verdict from a raw model response.
|
|
621
|
+
*
|
|
622
|
+
* Accepts the response body exactly as the CLI printed it. Locates the
|
|
623
|
+
* <verdict>…</verdict> block (first occurrence), parses the inner JSON,
|
|
624
|
+
* and coerces fields into the expected shape. Missing optional fields are
|
|
625
|
+
* filled with sensible defaults.
|
|
626
|
+
*
|
|
627
|
+
* @param {string} response Raw stdout from the grader CLI.
|
|
628
|
+
* @param {object} dimension The dimension record this response corresponds to.
|
|
629
|
+
* @returns {{
|
|
630
|
+
* ok: boolean,
|
|
631
|
+
* error: string|null,
|
|
632
|
+
* verdict: {
|
|
633
|
+
* dimension: string,
|
|
634
|
+
* score: number|string,
|
|
635
|
+
* verdict: string,
|
|
636
|
+
* justification: string,
|
|
637
|
+
* findings: Array<{ severity: string, surface: string, problem: string, evidence: string, required_action: string }>,
|
|
638
|
+
* raw: string,
|
|
639
|
+
* }|null,
|
|
640
|
+
* }}
|
|
641
|
+
*/
|
|
642
|
+
function parseDimensionResponse(response, dimension) {
|
|
643
|
+
if (!response || typeof response !== 'string') {
|
|
644
|
+
return { ok: false, error: 'empty response from grader', verdict: null };
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
const blockMatch = response.match(/<verdict>([\s\S]*?)<\/verdict>/);
|
|
648
|
+
if (!blockMatch) {
|
|
649
|
+
return { ok: false, error: 'no <verdict>…</verdict> block found in response', verdict: null };
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
let parsed;
|
|
653
|
+
try {
|
|
654
|
+
parsed = JSON.parse(blockMatch[1].trim());
|
|
655
|
+
} catch (e) {
|
|
656
|
+
return { ok: false, error: `verdict block is not valid JSON: ${e.message}`, verdict: null };
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
const verdict = {
|
|
660
|
+
dimension: String(parsed.dimension || dimension.id),
|
|
661
|
+
score: normalizeScore(parsed.score),
|
|
662
|
+
verdict: normalizeVerdict(parsed.verdict),
|
|
663
|
+
justification: String(parsed.justification || '').trim() || '(no justification provided)',
|
|
664
|
+
findings: Array.isArray(parsed.findings) ? parsed.findings.map(normalizeFinding).filter(Boolean) : [],
|
|
665
|
+
raw: blockMatch[1].trim(),
|
|
666
|
+
};
|
|
667
|
+
|
|
668
|
+
// Contract: N/A verdict implies empty findings.
|
|
669
|
+
if (verdict.verdict === 'N/A') verdict.findings = [];
|
|
670
|
+
|
|
671
|
+
return { ok: true, error: null, verdict };
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
function normalizeScore(s) {
|
|
675
|
+
if (s === 'N/A' || s === 'n/a') return 'N/A';
|
|
676
|
+
const n = Number(s);
|
|
677
|
+
if (!Number.isFinite(n)) return 'N/A';
|
|
678
|
+
if (n < 1) return 1;
|
|
679
|
+
if (n > 5) return 5;
|
|
680
|
+
return Math.round(n);
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
function normalizeVerdict(v) {
|
|
684
|
+
const up = String(v || '').trim().toUpperCase();
|
|
685
|
+
if (VERDICT_ENUM.includes(up)) return up;
|
|
686
|
+
// tolerate minor formatting drift
|
|
687
|
+
if (up === 'PASS WITH FIX') return 'PASS WITH FIXES';
|
|
688
|
+
if (up === 'FAILED') return 'FAIL';
|
|
689
|
+
return 'PASS WITH FIXES';
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function normalizeFinding(f) {
|
|
693
|
+
if (!f || typeof f !== 'object') return null;
|
|
694
|
+
const severityRaw = String(f.severity || 'P2').trim().toUpperCase();
|
|
695
|
+
const severity = SEVERITY_ENUM.includes(severityRaw) ? severityRaw : 'P2';
|
|
696
|
+
return {
|
|
697
|
+
severity,
|
|
698
|
+
surface: String(f.surface || '(unknown)').trim(),
|
|
699
|
+
problem: String(f.problem || '(unspecified)').trim(),
|
|
700
|
+
evidence: String(f.evidence || '(no evidence cited)').trim(),
|
|
701
|
+
required_action: String(f.required_action || f.requiredAction || '(no action proposed)').trim(),
|
|
702
|
+
};
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// ---------------------------------------------------------------------------
|
|
706
|
+
// Verdict aggregation
|
|
707
|
+
// ---------------------------------------------------------------------------
|
|
708
|
+
|
|
709
|
+
/**
|
|
710
|
+
* Derive a single overall verdict from the per-dimension verdicts AND scores.
|
|
711
|
+
*
|
|
712
|
+
* v0.5.0: rewritten to honor the `evaluation` doctrine's `min_pass_score: 4`
|
|
713
|
+
* threshold (see skills/evaluation/SKILL.md:69-106). The prior implementation
|
|
714
|
+
* used labels only and ignored the 1–5 numeric scores the grader emits,
|
|
715
|
+
* producing PASS WITH FIXES defaults that masked sub-threshold scores.
|
|
716
|
+
*
|
|
717
|
+
* Rule set (evaluated in order — first match wins):
|
|
718
|
+
* 1. Any dimension with an explicit verdict of FAIL (non-N/A) → FAIL
|
|
719
|
+
* 2. Any dimension with a numeric score ≤ 2 → FAIL
|
|
720
|
+
* 3. Any dimension with a numeric score < `minPassScore` (default 4) → PARTIAL
|
|
721
|
+
* 4. Any dimension with verdict `PASS WITH FIXES` and score >= `minPassScore`→ PASS WITH FIXES
|
|
722
|
+
* 5. Any dimension with verdict `PARTIAL` → PARTIAL
|
|
723
|
+
* 6. All dimensions PASS or N/A, all scores >= `minPassScore` → PASS
|
|
724
|
+
*
|
|
725
|
+
* N/A dimensions count as PASS with score = N/A (neither raises nor lowers).
|
|
726
|
+
*
|
|
727
|
+
* @param {Array<{ verdict: string, score: number|string }>} dimensionVerdicts
|
|
728
|
+
* @param {object} [opts]
|
|
729
|
+
* @param {number} [opts.minPassScore=MIN_PASS_SCORE] Override the 1–5 pass threshold.
|
|
730
|
+
* @returns {'PASS' | 'PASS WITH FIXES' | 'PARTIAL' | 'FAIL'}
|
|
731
|
+
*/
|
|
732
|
+
function aggregateVerdict(dimensionVerdicts, opts) {
|
|
733
|
+
const minPass = (opts && Number.isFinite(opts.minPassScore)) ? opts.minPassScore : MIN_PASS_SCORE;
|
|
734
|
+
|
|
735
|
+
let sawPartial = false;
|
|
736
|
+
let sawWithFixes = false;
|
|
737
|
+
let sawSubThreshold = false;
|
|
738
|
+
|
|
739
|
+
for (const d of dimensionVerdicts) {
|
|
740
|
+
const verdict = d.verdict;
|
|
741
|
+
const score = (typeof d.score === 'number') ? d.score : null;
|
|
742
|
+
|
|
743
|
+
if (verdict === 'FAIL') return 'FAIL';
|
|
744
|
+
if (score !== null && score <= 2) return 'FAIL';
|
|
745
|
+
|
|
746
|
+
if (score !== null && score < minPass) sawSubThreshold = true;
|
|
747
|
+
if (verdict === 'PARTIAL') sawPartial = true;
|
|
748
|
+
if (verdict === 'PASS WITH FIXES') sawWithFixes = true;
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
if (sawSubThreshold) return 'PARTIAL';
|
|
752
|
+
if (sawPartial) return 'PARTIAL';
|
|
753
|
+
if (sawWithFixes) return 'PASS WITH FIXES';
|
|
754
|
+
return 'PASS';
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// ---------------------------------------------------------------------------
|
|
758
|
+
// Exports
|
|
759
|
+
// ---------------------------------------------------------------------------
|
|
760
|
+
|
|
761
|
+
module.exports = {
|
|
762
|
+
DIMENSIONS,
|
|
763
|
+
VERDICT_ENUM,
|
|
764
|
+
MIN_PASS_SCORE,
|
|
765
|
+
SEVERITY_ENUM,
|
|
766
|
+
collectContext,
|
|
767
|
+
sliceChecklist,
|
|
768
|
+
buildDimensionPrompt,
|
|
769
|
+
parseDimensionResponse,
|
|
770
|
+
aggregateVerdict,
|
|
771
|
+
};
|