@skill-graph/cli 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +247 -0
- package/LICENSE +200 -0
- package/NOTICE +62 -0
- package/README.md +398 -0
- package/SKILL_GRAPH.md +443 -0
- package/bin/skill-graph.js +374 -0
- package/docs/ADOPTION.md +117 -0
- package/docs/CONFORMANCE.md +66 -0
- package/docs/PRIMER.md +384 -0
- package/docs/QUICKSTART-30MIN.md +333 -0
- package/docs/ROUTING-METRICS.md +120 -0
- package/docs/SKILL-MD-FORMAT-COMPATIBILITY.md +127 -0
- package/docs/SKILL_AUDIT_CHECKLIST.md +199 -0
- package/docs/SKILL_AUDIT_LOOP.md +195 -0
- package/docs/SKILL_METADATA_PROTOCOL.md +609 -0
- package/docs/_archived/marketplace-publication-priority-2026-05-18.md +239 -0
- package/docs/adr/0001-predicate-set.md +69 -0
- package/docs/adr/0002-json-ld-context.md +82 -0
- package/docs/adr/0003-ontoclean-rigidity-tags.md +65 -0
- package/docs/adr/0004-persistent-identifiers.md +74 -0
- package/docs/adr/0005-freshness-consolidation.md +70 -0
- package/docs/adr/0006-revise-predicate-rename.md +105 -0
- package/docs/adr/0007-audit-loop-cadence.md +99 -0
- package/docs/adr/0008-skill-surface-split-and-curation-policy.md +93 -0
- package/docs/category-consumers.md +168 -0
- package/docs/concept-map.md +194 -0
- package/docs/diagrams/drift-states.mmd +21 -0
- package/docs/diagrams/manifest-pipeline.mmd +25 -0
- package/docs/diagrams/routing-harness.mmd +41 -0
- package/docs/diagrams/starter-graph.mmd +53 -0
- package/docs/field-decision-guide.md +315 -0
- package/docs/field-rationale.md +211 -0
- package/docs/field-reference.generated.md +624 -0
- package/docs/field-reference.md +1426 -0
- package/docs/glossary.md +190 -0
- package/docs/head-noun-glossary.md +63 -0
- package/docs/images/audit-phases.png +0 -0
- package/docs/images/drift-states.png +0 -0
- package/docs/images/graded-mode.png +0 -0
- package/docs/images/manifest-pipeline.png +0 -0
- package/docs/images/routing-harness.png +0 -0
- package/docs/images/skill-anatomy.png +0 -0
- package/docs/images/starter-graph.png +0 -0
- package/docs/images/system-model.png +0 -0
- package/docs/integrations/github-actions.md +155 -0
- package/docs/manifest-field-mapping.md +443 -0
- package/docs/marketplace-publication-queue.generated.md +240 -0
- package/docs/marketplace-release-agent-prompt.md +82 -0
- package/docs/marketplace-skill-candidate-list.md +272 -0
- package/docs/marketplace-syndication.md +222 -0
- package/docs/migration-sample-review.md +155 -0
- package/docs/migrations/v4-to-v5.md +168 -0
- package/docs/migrations/v5-to-v6.md +221 -0
- package/docs/name-exceptions.yaml +37 -0
- package/docs/plans/marketplace-p1-public-migration-plan.md +41 -0
- package/docs/plans/multi-root-workspace.md +148 -0
- package/docs/plans/scripts-roadmap.md +107 -0
- package/docs/plans/v4-schema-bump.md +160 -0
- package/docs/plans/wave-2-extraction.md +122 -0
- package/docs/positioning-vs-marketplaces.md +175 -0
- package/docs/proposals/skill-audit-loop-positioning.md +160 -0
- package/docs/quality-doctrine.md +138 -0
- package/docs/recommended-skills.md +150 -0
- package/docs/research/skill-comprehension-eval-research.md +1830 -0
- package/docs/research/skill-retrieval-evidence.md +66 -0
- package/docs/skill-metadata-protocol.md +471 -0
- package/docs/skills-sh-maintainer-cleanup-request.md +80 -0
- package/examples/audits/a11y/findings.md +52 -0
- package/examples/audits/a11y/scorecard.md +21 -0
- package/examples/audits/a11y/verdict.md +44 -0
- package/examples/audits/debugging/findings.md +59 -0
- package/examples/audits/debugging/scorecard.md +22 -0
- package/examples/audits/debugging/verdict.md +33 -0
- package/examples/audits/documentation/findings.md +59 -0
- package/examples/audits/documentation/scorecard.md +22 -0
- package/examples/audits/documentation/verdict.md +33 -0
- package/examples/evals/a11y.json +140 -0
- package/examples/evals/api-design.json +52 -0
- package/examples/evals/code-review.json +52 -0
- package/examples/evals/data-modeling.json +52 -0
- package/examples/evals/database-migration.json +52 -0
- package/examples/evals/debugging.json +118 -0
- package/examples/evals/dependency-architecture.json +52 -0
- package/examples/evals/design-system-architecture.json +52 -0
- package/examples/evals/error-tracking.json +52 -0
- package/examples/evals/event-contract-design.json +52 -0
- package/examples/evals/form-ux-architecture.json +52 -0
- package/examples/evals/framework-fit-analysis.json +52 -0
- package/examples/evals/graph-audit.json +139 -0
- package/examples/evals/information-architecture.json +52 -0
- package/examples/evals/interaction-feedback.json +52 -0
- package/examples/evals/interaction-patterns.json +52 -0
- package/examples/evals/layout-composition.json +52 -0
- package/examples/evals/lint-overlay.json +117 -0
- package/examples/evals/microcopy.json +52 -0
- package/examples/evals/observability-modeling.json +52 -0
- package/examples/evals/pattern-recognition.json +96 -0
- package/examples/evals/performance-engineering.json +52 -0
- package/examples/evals/refactor.json +128 -0
- package/examples/evals/semiotics.json +52 -0
- package/examples/evals/skill-infrastructure.json +96 -0
- package/examples/evals/skill-router.json +140 -0
- package/examples/evals/skill-router.routing.json +113 -0
- package/examples/evals/system-interface-contracts.json +52 -0
- package/examples/evals/task-analysis.json +52 -0
- package/examples/evals/testing-strategy.json +118 -0
- package/examples/evals/type-safety.json +249 -0
- package/examples/evals/visual-design-foundations.json +52 -0
- package/examples/evals/webhook-integration.json +52 -0
- package/examples/exports/a11y.skill-md.md +80 -0
- package/examples/exports/debugging.skill-md.md +80 -0
- package/examples/exports/refactor.skill-md.md +78 -0
- package/examples/exports/testing-strategy.skill-md.md +81 -0
- package/examples/projects/markdown-static-site/README.md +115 -0
- package/examples/projects/markdown-static-site/skills/content-source-router/SKILL.md +131 -0
- package/examples/projects/markdown-static-site/skills/image-optimization-pipeline-config/SKILL.md +132 -0
- package/examples/projects/markdown-static-site/skills/link-rot-detection/SKILL.md +103 -0
- package/examples/projects/markdown-static-site/skills/markdown-post-frontmatter-validation/SKILL.md +133 -0
- package/examples/projects/markdown-static-site/skills/migrate-posts-to-v2-frontmatter/SKILL.md +140 -0
- package/examples/projects/saas-stripe-postgres/README.md +208 -0
- package/examples/projects/saas-stripe-postgres/db/migrations/0004_canonicalize_orders.sql +37 -0
- package/examples/projects/saas-stripe-postgres/db/schema.sql +112 -0
- package/examples/projects/saas-stripe-postgres/skills/migrate-orders-to-canonical-schema/SKILL.md +149 -0
- package/examples/projects/saas-stripe-postgres/skills/nextjs-server-action-validation/SKILL.md +154 -0
- package/examples/projects/saas-stripe-postgres/skills/payment-provider-router/SKILL.md +153 -0
- package/examples/projects/saas-stripe-postgres/skills/postgres-rls-pattern/SKILL.md +163 -0
- package/examples/projects/saas-stripe-postgres/skills/stripe-webhook-signature-verification/SKILL.md +137 -0
- package/examples/protocol/skill-metadata-template.md +301 -0
- package/examples/protocol/skills.manifest.sample.json +13245 -0
- package/examples/skill-metadata-template.md +317 -0
- package/examples/skills.manifest.sample.json +13519 -0
- package/examples/tests/v3-1-skos-fixture/SKILL.md +93 -0
- package/marketplace/README.md +17 -0
- package/marketplace/skills/a11y/SKILL.md +66 -0
- package/marketplace/skills/acid-fundamentals/SKILL.md +106 -0
- package/marketplace/skills/agent-engineering/SKILL.md +386 -0
- package/marketplace/skills/agent-eval-design/SKILL.md +55 -0
- package/marketplace/skills/ai-native-development/SKILL.md +294 -0
- package/marketplace/skills/api-design/SKILL.md +60 -0
- package/marketplace/skills/architecture-decision-records/SKILL.md +55 -0
- package/marketplace/skills/background-jobs/SKILL.md +265 -0
- package/marketplace/skills/bounded-context-mapping/SKILL.md +55 -0
- package/marketplace/skills/cap-theorem-tradeoffs/SKILL.md +127 -0
- package/marketplace/skills/client-server-boundary/SKILL.md +187 -0
- package/marketplace/skills/code-review/SKILL.md +120 -0
- package/marketplace/skills/color-system-design/SKILL.md +43 -0
- package/marketplace/skills/component-architecture/SKILL.md +126 -0
- package/marketplace/skills/compression/SKILL.md +112 -0
- package/marketplace/skills/conceptual-modeling/SKILL.md +181 -0
- package/marketplace/skills/connection-pooling/SKILL.md +105 -0
- package/marketplace/skills/constraint-awareness/SKILL.md +287 -0
- package/marketplace/skills/content-monitor/SKILL.md +209 -0
- package/marketplace/skills/context-engineering/SKILL.md +320 -0
- package/marketplace/skills/context-graph/SKILL.md +174 -0
- package/marketplace/skills/context-management/SKILL.md +174 -0
- package/marketplace/skills/context-window/SKILL.md +239 -0
- package/marketplace/skills/contract-testing/SKILL.md +120 -0
- package/marketplace/skills/cron-scheduling/SKILL.md +223 -0
- package/marketplace/skills/dark-mode-implementation/SKILL.md +47 -0
- package/marketplace/skills/data-modeling/SKILL.md +59 -0
- package/marketplace/skills/data-modeling-fundamentals/SKILL.md +117 -0
- package/marketplace/skills/database-migration/SKILL.md +429 -0
- package/marketplace/skills/debugging/SKILL.md +67 -0
- package/marketplace/skills/dependency-architecture/SKILL.md +58 -0
- package/marketplace/skills/design-module-composition/SKILL.md +43 -0
- package/marketplace/skills/design-system-architecture/SKILL.md +61 -0
- package/marketplace/skills/design-thinking/SKILL.md +44 -0
- package/marketplace/skills/diagnosis/SKILL.md +296 -0
- package/marketplace/skills/diff-analysis/SKILL.md +188 -0
- package/marketplace/skills/e2e-test-design/SKILL.md +113 -0
- package/marketplace/skills/entity-relationship-modeling/SKILL.md +218 -0
- package/marketplace/skills/epistemic-grounding/SKILL.md +112 -0
- package/marketplace/skills/error-boundary/SKILL.md +235 -0
- package/marketplace/skills/error-tracking/SKILL.md +261 -0
- package/marketplace/skills/eval-driven-development/SKILL.md +147 -0
- package/marketplace/skills/evaluation/SKILL.md +113 -0
- package/marketplace/skills/event-contract-design/SKILL.md +60 -0
- package/marketplace/skills/event-storming/SKILL.md +56 -0
- package/marketplace/skills/form-ux-architecture/SKILL.md +60 -0
- package/marketplace/skills/framework-fit-analysis/SKILL.md +59 -0
- package/marketplace/skills/frontend-architecture/SKILL.md +43 -0
- package/marketplace/skills/generative-ui/SKILL.md +118 -0
- package/marketplace/skills/graph-audit/SKILL.md +81 -0
- package/marketplace/skills/guardrails/SKILL.md +118 -0
- package/marketplace/skills/hooks-patterns/SKILL.md +185 -0
- package/marketplace/skills/http-semantics/SKILL.md +136 -0
- package/marketplace/skills/ideation/SKILL.md +41 -0
- package/marketplace/skills/indexing-strategy/SKILL.md +108 -0
- package/marketplace/skills/information-architecture/SKILL.md +59 -0
- package/marketplace/skills/integration-test-design/SKILL.md +111 -0
- package/marketplace/skills/intent-recognition/SKILL.md +136 -0
- package/marketplace/skills/interaction-feedback/SKILL.md +59 -0
- package/marketplace/skills/interaction-patterns/SKILL.md +59 -0
- package/marketplace/skills/journey-mapping/SKILL.md +41 -0
- package/marketplace/skills/keywords/SKILL.md +213 -0
- package/marketplace/skills/knowledge-modeling/SKILL.md +232 -0
- package/marketplace/skills/layout-composition/SKILL.md +59 -0
- package/marketplace/skills/linguistics/SKILL.md +429 -0
- package/marketplace/skills/lint-overlay/SKILL.md +76 -0
- package/marketplace/skills/mental-models/SKILL.md +126 -0
- package/marketplace/skills/merge-queue/SKILL.md +94 -0
- package/marketplace/skills/methodology/SKILL.md +317 -0
- package/marketplace/skills/microcopy/SKILL.md +232 -0
- package/marketplace/skills/middleware-patterns/SKILL.md +363 -0
- package/marketplace/skills/mobile-responsive-ux/SKILL.md +287 -0
- package/marketplace/skills/mutation-testing/SKILL.md +112 -0
- package/marketplace/skills/naming-conventions/SKILL.md +112 -0
- package/marketplace/skills/observability-modeling/SKILL.md +59 -0
- package/marketplace/skills/ontology-modeling/SKILL.md +67 -0
- package/marketplace/skills/owasp-security/SKILL.md +153 -0
- package/marketplace/skills/pattern-recognition/SKILL.md +472 -0
- package/marketplace/skills/performance-budgets/SKILL.md +185 -0
- package/marketplace/skills/performance-engineering/SKILL.md +58 -0
- package/marketplace/skills/performance-testing/SKILL.md +125 -0
- package/marketplace/skills/printify/SKILL.md +42 -0
- package/marketplace/skills/prioritization/SKILL.md +118 -0
- package/marketplace/skills/problem-framing/SKILL.md +41 -0
- package/marketplace/skills/problem-locating-solving/SKILL.md +203 -0
- package/marketplace/skills/project-knowledge-extraction/SKILL.md +54 -0
- package/marketplace/skills/prompt-craft/SKILL.md +134 -0
- package/marketplace/skills/prompt-injection-defense/SKILL.md +132 -0
- package/marketplace/skills/property-based-testing/SKILL.md +100 -0
- package/marketplace/skills/prototyping/SKILL.md +43 -0
- package/marketplace/skills/query-optimization/SKILL.md +144 -0
- package/marketplace/skills/real-time-updates/SKILL.md +324 -0
- package/marketplace/skills/ref-patterns/SKILL.md +284 -0
- package/marketplace/skills/refactor/SKILL.md +65 -0
- package/marketplace/skills/rendering-models/SKILL.md +142 -0
- package/marketplace/skills/replication-patterns/SKILL.md +110 -0
- package/marketplace/skills/research-synthesis/SKILL.md +41 -0
- package/marketplace/skills/route-handler-design/SKILL.md +347 -0
- package/marketplace/skills/schema-evolution/SKILL.md +140 -0
- package/marketplace/skills/security-fundamentals/SKILL.md +139 -0
- package/marketplace/skills/semantic-center/SKILL.md +194 -0
- package/marketplace/skills/semantic-relations/SKILL.md +250 -0
- package/marketplace/skills/semantics/SKILL.md +366 -0
- package/marketplace/skills/semiotics/SKILL.md +230 -0
- package/marketplace/skills/seo-strategy/SKILL.md +260 -0
- package/marketplace/skills/server-actions-design/SKILL.md +243 -0
- package/marketplace/skills/server-components-design/SKILL.md +190 -0
- package/marketplace/skills/sharding-strategy/SKILL.md +123 -0
- package/marketplace/skills/shopify/SKILL.md +42 -0
- package/marketplace/skills/skill-infrastructure/SKILL.md +320 -0
- package/marketplace/skills/skill-router/SKILL.md +71 -0
- package/marketplace/skills/skill-scaffold/SKILL.md +105 -0
- package/marketplace/skills/snapshot-testing/SKILL.md +120 -0
- package/marketplace/skills/spec-driven-development/SKILL.md +148 -0
- package/marketplace/skills/state-machine-modeling/SKILL.md +56 -0
- package/marketplace/skills/state-management/SKILL.md +134 -0
- package/marketplace/skills/streaming-architecture/SKILL.md +194 -0
- package/marketplace/skills/summarization/SKILL.md +156 -0
- package/marketplace/skills/suspense-patterns/SKILL.md +265 -0
- package/marketplace/skills/system-interface-contracts/SKILL.md +59 -0
- package/marketplace/skills/task-analysis/SKILL.md +201 -0
- package/marketplace/skills/taxonomy-design/SKILL.md +66 -0
- package/marketplace/skills/test-coverage-strategy/SKILL.md +108 -0
- package/marketplace/skills/test-doubles-design/SKILL.md +98 -0
- package/marketplace/skills/test-driven-development/SKILL.md +96 -0
- package/marketplace/skills/testing-strategy/SKILL.md +67 -0
- package/marketplace/skills/theme-system-design/SKILL.md +43 -0
- package/marketplace/skills/tool-call-flow/SKILL.md +229 -0
- package/marketplace/skills/tool-call-strategy/SKILL.md +292 -0
- package/marketplace/skills/transaction-isolation/SKILL.md +98 -0
- package/marketplace/skills/type-safety/SKILL.md +177 -0
- package/marketplace/skills/typography-system/SKILL.md +43 -0
- package/marketplace/skills/usability-testing/SKILL.md +43 -0
- package/marketplace/skills/user-research/SKILL.md +43 -0
- package/marketplace/skills/vercel-composition-patterns/SKILL.md +157 -0
- package/marketplace/skills/version-control/SKILL.md +233 -0
- package/marketplace/skills/visual-design-foundations/SKILL.md +59 -0
- package/marketplace/skills/visual-hierarchy/SKILL.md +43 -0
- package/marketplace/skills/webhook-integration/SKILL.md +331 -0
- package/marketplace/skills/writing-humanizer/SKILL.md +380 -0
- package/package.json +67 -0
- package/schemas/manifest.schema.json +811 -0
- package/schemas/manifest.v2.schema.json +164 -0
- package/schemas/manifest.v3.schema.json +758 -0
- package/schemas/manifest.v4.schema.json +755 -0
- package/schemas/manifest.v5.schema.json +755 -0
- package/schemas/manifest.v6.schema.json +811 -0
- package/schemas/skill.context.jsonld +279 -0
- package/schemas/skill.schema.json +919 -0
- package/schemas/skill.v2.schema.json +201 -0
- package/schemas/skill.v3.schema.json +827 -0
- package/schemas/skill.v4.schema.json +822 -0
- package/schemas/skill.v5.schema.json +830 -0
- package/schemas/skill.v6.schema.json +946 -0
- package/schemas/vocabulary/keywords.json +180 -0
- package/schemas/vocabulary/workspace_tags.json +23 -0
- package/scripts/__tests__/migrate-skill-v2-to-v3.test.js +161 -0
- package/scripts/__tests__/migrate-skill-v3-to-v4.test.js +158 -0
- package/scripts/__tests__/test-export-parser-drift.js +149 -0
- package/scripts/__tests__/test-marketplace-export.js +114 -0
- package/scripts/__tests__/test-router-paths.js +82 -0
- package/scripts/__tests__/test-stability-promotion.js +244 -0
- package/scripts/__tests__/test-v3-1-alias-contract.js +109 -0
- package/scripts/__tests__/test-v3-1-skos-runtime.js +116 -0
- package/scripts/backfill-schema-version.js +198 -0
- package/scripts/build-field-reference.js +160 -0
- package/scripts/build-retrieval-baseline.js +511 -0
- package/scripts/check-markdown-links.js +211 -0
- package/scripts/check-protocol-consistency.js +979 -0
- package/scripts/export-marketplace-skills.js +610 -0
- package/scripts/export-skill.js +374 -0
- package/scripts/generate-manifest.js +787 -0
- package/scripts/lib/alias-contract.js +83 -0
- package/scripts/lib/audit-prompt-builder.js +771 -0
- package/scripts/lib/mock-grader.js +134 -0
- package/scripts/lib/parse-frontmatter.js +429 -0
- package/scripts/lib/roots.js +119 -0
- package/scripts/lint/check-archetype-sections.js +185 -0
- package/scripts/lint/check-category-enum.js +83 -0
- package/scripts/lint/check-routing-eval.js +146 -0
- package/scripts/lint/check-routing-quality.js +211 -0
- package/scripts/lint/check-stability-promotion.js +220 -0
- package/scripts/lint/format-code-frame.js +206 -0
- package/scripts/marketplace-install.js +125 -0
- package/scripts/migrate-category-to-enum.js +169 -0
- package/scripts/migrate-skill-v2-to-v3.js +424 -0
- package/scripts/migrate-skill-v3-to-v4.js +200 -0
- package/scripts/migrate-skill-v5-to-v6.js +304 -0
- package/scripts/restructure-by-category.js +85 -0
- package/scripts/seed-publication-classification.js +282 -0
- package/scripts/skill-audit.js +893 -0
- package/scripts/skill-graph-drift.js +483 -0
- package/scripts/skill-graph-route.js +766 -0
- package/scripts/skill-graph-routing-eval.js +393 -0
- package/scripts/skill-lint.js +1317 -0
- package/scripts/skill-overlap.js +213 -0
- package/scripts/verify-skill-md-export.js +201 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* skill-graph routing-eval — the harness that makes `routing_eval: present` honest.
|
|
4
|
+
*
|
|
5
|
+
* For each skill in the compiled manifest:
|
|
6
|
+
* 1. Run every `activation.examples[]` entry through skill-graph-route →
|
|
7
|
+
* the top-1 winner MUST be this skill. Else: positive-class FAIL.
|
|
8
|
+
* 2. Run every `activation.anti_examples[]` entry through skill-graph-route →
|
|
9
|
+
* the top-1 winner MUST NOT be this skill, AND (if non-null) MUST be
|
|
10
|
+
* named in this skill's `relations.boundary[]`. Else: negative-class FAIL.
|
|
11
|
+
* A null winner is COVERAGE_GAP (informational, not a FAIL — the anti-
|
|
12
|
+
* example correctly avoids this skill but nothing else absorbs it).
|
|
13
|
+
* 3. Emit a per-skill verdict + per-case evidence block.
|
|
14
|
+
*
|
|
15
|
+
* This script is the rent-proof for L1's `examples`, `anti_examples`, and
|
|
16
|
+
* `relations.boundary.{skill, reason}` fields. Until this script runs,
|
|
17
|
+
* `routing_eval: present` is a self-assertion a human reviewer cannot check.
|
|
18
|
+
*
|
|
19
|
+
* Usage:
|
|
20
|
+
* node scripts/skill-graph-routing-eval.js # all skills, text summary
|
|
21
|
+
* node scripts/skill-graph-routing-eval.js --json # structured JSON
|
|
22
|
+
* node scripts/skill-graph-routing-eval.js --skill debugging # one skill
|
|
23
|
+
* node scripts/skill-graph-routing-eval.js --quiet # exit-code only (CI)
|
|
24
|
+
* node scripts/skill-graph-routing-eval.js --manifest PATH # custom manifest
|
|
25
|
+
* node scripts/skill-graph-routing-eval.js --only-asserted # only skills with
|
|
26
|
+
* routing_eval: present
|
|
27
|
+
* node scripts/skill-graph-routing-eval.js --confusion-matrix # expected vs actual
|
|
28
|
+
*
|
|
29
|
+
* Self-contained. Only uses Node built-ins — no external dependencies.
|
|
30
|
+
* Exit 0 when every evaluated skill passes (or has no cases); exit 1 on
|
|
31
|
+
* any per-skill FAIL.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
'use strict';
|
|
35
|
+
|
|
36
|
+
const fs = require('fs');
|
|
37
|
+
const path = require('path');
|
|
38
|
+
const { routeSkills } = require('./skill-graph-route');
|
|
39
|
+
const { packageRoot, workspaceRoot } = require('./lib/roots');
|
|
40
|
+
|
|
41
|
+
const REPO_ROOT = workspaceRoot();
|
|
42
|
+
const PACKAGE_ROOT = packageRoot();
|
|
43
|
+
const DEFAULT_MANIFEST = path.join(REPO_ROOT, 'skills.manifest.json');
|
|
44
|
+
const SAMPLE_MANIFEST = path.join(REPO_ROOT, 'examples', 'skills.manifest.sample.json');
|
|
45
|
+
const PACKAGE_SAMPLE_MANIFEST = path.join(PACKAGE_ROOT, 'examples', 'skills.manifest.sample.json');
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Case evaluators
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Evaluate one positive-class prompt: the top-1 winner MUST equal `expectedSkill`.
|
|
53
|
+
*
|
|
54
|
+
* Returns { kind: 'positive', prompt, verdict, actual, reason }.
|
|
55
|
+
*/
|
|
56
|
+
function evaluatePositive(manifest, expectedSkill, prompt, todayISO) {
|
|
57
|
+
const result = routeSkills(manifest, {
|
|
58
|
+
query: prompt,
|
|
59
|
+
project: null,
|
|
60
|
+
maxResults: 1,
|
|
61
|
+
minEvalState: 'unverified',
|
|
62
|
+
pathArg: null,
|
|
63
|
+
todayISO,
|
|
64
|
+
});
|
|
65
|
+
const winner = (result.selected[0] && result.selected[0].skill.name) || null;
|
|
66
|
+
|
|
67
|
+
if (winner === expectedSkill) {
|
|
68
|
+
return {
|
|
69
|
+
kind: 'positive',
|
|
70
|
+
prompt,
|
|
71
|
+
verdict: 'PASS',
|
|
72
|
+
actual: winner,
|
|
73
|
+
reason: `top-1 winner resolved to ${expectedSkill}`,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Gather exclusion detail to explain the miss.
|
|
78
|
+
const excludedByBoundary = result.excluded
|
|
79
|
+
.filter(e => e.role === 'boundary_excluded' && e.skill.name === expectedSkill)
|
|
80
|
+
.map(e => e.reason);
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
kind: 'positive',
|
|
84
|
+
prompt,
|
|
85
|
+
verdict: 'FAIL',
|
|
86
|
+
actual: winner,
|
|
87
|
+
reason: excludedByBoundary.length > 0
|
|
88
|
+
? `expected ${expectedSkill} but was boundary-excluded (${excludedByBoundary[0]})`
|
|
89
|
+
: `expected ${expectedSkill}, got ${winner === null ? 'no winner' : winner}`,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Evaluate one negative-class prompt: the winner MUST NOT equal `excludedSkill`,
|
|
95
|
+
* AND if non-null MUST appear in `excludedSkill`'s relations.boundary[].
|
|
96
|
+
*
|
|
97
|
+
* Returns { kind: 'negative', prompt, verdict, actual, reason }.
|
|
98
|
+
*
|
|
99
|
+
* A null winner is treated as COVERAGE_GAP (informational). Rationale: the
|
|
100
|
+
* anti-example correctly avoided the skill under test, which is the primary
|
|
101
|
+
* contract. A null winner means no OTHER skill absorbed the prompt either —
|
|
102
|
+
* that is a routing coverage gap worth surfacing, but it is not a harness
|
|
103
|
+
* regression for this skill.
|
|
104
|
+
*/
|
|
105
|
+
function evaluateNegative(manifest, excludedSkill, prompt, boundaryTargets, todayISO) {
|
|
106
|
+
const result = routeSkills(manifest, {
|
|
107
|
+
query: prompt,
|
|
108
|
+
project: null,
|
|
109
|
+
maxResults: 1,
|
|
110
|
+
minEvalState: 'unverified',
|
|
111
|
+
pathArg: null,
|
|
112
|
+
todayISO,
|
|
113
|
+
});
|
|
114
|
+
const winner = (result.selected[0] && result.selected[0].skill.name) || null;
|
|
115
|
+
|
|
116
|
+
if (winner === excludedSkill) {
|
|
117
|
+
return {
|
|
118
|
+
kind: 'negative',
|
|
119
|
+
prompt,
|
|
120
|
+
verdict: 'FAIL',
|
|
121
|
+
actual: winner,
|
|
122
|
+
reason: `anti_example routed back to ${excludedSkill} — hard-negative regression`,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (winner === null) {
|
|
127
|
+
return {
|
|
128
|
+
kind: 'negative',
|
|
129
|
+
prompt,
|
|
130
|
+
verdict: 'COVERAGE_GAP',
|
|
131
|
+
actual: null,
|
|
132
|
+
reason: `no skill absorbed this anti_example — consider a boundary target the router can resolve (${excludedSkill}.relations.boundary: [${boundaryTargets.join(', ') || 'empty'}])`,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (boundaryTargets.includes(winner)) {
|
|
137
|
+
return {
|
|
138
|
+
kind: 'negative',
|
|
139
|
+
prompt,
|
|
140
|
+
verdict: 'PASS',
|
|
141
|
+
actual: winner,
|
|
142
|
+
reason: `routed to ${winner}, named in ${excludedSkill}.relations.boundary`,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
kind: 'negative',
|
|
148
|
+
prompt,
|
|
149
|
+
verdict: 'FAIL',
|
|
150
|
+
actual: winner,
|
|
151
|
+
reason: `routed to ${winner}, which is not in ${excludedSkill}.relations.boundary (${boundaryTargets.join(', ') || 'empty'}) — either the anti_example should be removed or boundary should name ${winner}`,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
// Per-skill run
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Run every positive and negative case for one skill in the manifest.
|
|
161
|
+
* Returns { skill, verdict, cases, counts }.
|
|
162
|
+
*
|
|
163
|
+
* verdict = PASS iff every case is PASS or COVERAGE_GAP (no FAILs).
|
|
164
|
+
* verdict = NO_CASES when the skill has no examples[] and no anti_examples[].
|
|
165
|
+
*/
|
|
166
|
+
function evaluateSkill(manifest, skillEntry, todayISO) {
|
|
167
|
+
const name = skillEntry.name;
|
|
168
|
+
const activation = skillEntry.activation || {};
|
|
169
|
+
const examples = Array.isArray(activation.examples) ? activation.examples : [];
|
|
170
|
+
const antiExamples = Array.isArray(activation.anti_examples) ? activation.anti_examples : [];
|
|
171
|
+
|
|
172
|
+
const boundaryTargets = extractBoundaryTargets(skillEntry);
|
|
173
|
+
|
|
174
|
+
const cases = [];
|
|
175
|
+
for (const p of examples) {
|
|
176
|
+
cases.push(evaluatePositive(manifest, name, p, todayISO));
|
|
177
|
+
}
|
|
178
|
+
for (const a of antiExamples) {
|
|
179
|
+
cases.push(evaluateNegative(manifest, name, a, boundaryTargets, todayISO));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const counts = { PASS: 0, FAIL: 0, COVERAGE_GAP: 0 };
|
|
183
|
+
for (const c of cases) counts[c.verdict]++;
|
|
184
|
+
|
|
185
|
+
const routingEvalDeclared = (skillEntry.health && skillEntry.health.routing_eval) || 'absent';
|
|
186
|
+
let verdict;
|
|
187
|
+
if (cases.length === 0) {
|
|
188
|
+
verdict = 'NO_CASES';
|
|
189
|
+
} else if (counts.FAIL > 0) {
|
|
190
|
+
verdict = 'FAIL';
|
|
191
|
+
} else {
|
|
192
|
+
verdict = 'PASS';
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
skill: name,
|
|
197
|
+
routing_eval_declared: routingEvalDeclared,
|
|
198
|
+
verdict,
|
|
199
|
+
counts,
|
|
200
|
+
case_count: cases.length,
|
|
201
|
+
cases,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/** Extract boundary skill names, handling v3 `{skill, reason}` objects and v2 bare strings. */
|
|
206
|
+
function extractBoundaryTargets(skillEntry) {
|
|
207
|
+
const b = (skillEntry.relations && skillEntry.relations.boundary) || [];
|
|
208
|
+
const out = [];
|
|
209
|
+
for (const item of b) {
|
|
210
|
+
if (typeof item === 'string') out.push(item);
|
|
211
|
+
else if (item && typeof item.skill === 'string') out.push(item.skill);
|
|
212
|
+
}
|
|
213
|
+
return out;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
// Rendering
|
|
218
|
+
// ---------------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
function renderText(reports) {
|
|
221
|
+
const lines = [];
|
|
222
|
+
for (const r of reports) {
|
|
223
|
+
if (r.verdict === 'NO_CASES') {
|
|
224
|
+
lines.push(`SKIP ${r.skill} — no examples / anti_examples declared`);
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
const tag = r.verdict === 'PASS' ? 'PASS' : 'FAIL';
|
|
228
|
+
const gap = r.counts.COVERAGE_GAP > 0 ? ` (${r.counts.COVERAGE_GAP} coverage-gap)` : '';
|
|
229
|
+
lines.push(
|
|
230
|
+
`${tag.padEnd(6)} ${r.skill.padEnd(22)} ` +
|
|
231
|
+
`${r.counts.PASS}/${r.case_count} cases pass${gap} ` +
|
|
232
|
+
`[declared routing_eval: ${r.routing_eval_declared}]`
|
|
233
|
+
);
|
|
234
|
+
for (const c of r.cases) {
|
|
235
|
+
if (c.verdict === 'PASS') continue;
|
|
236
|
+
const mark = c.verdict === 'FAIL' ? ' x' : ' !';
|
|
237
|
+
lines.push(`${mark} [${c.kind}] "${truncate(c.prompt, 72)}"`);
|
|
238
|
+
lines.push(` ${c.reason}`);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const total = reports.length;
|
|
243
|
+
const passing = reports.filter(r => r.verdict === 'PASS').length;
|
|
244
|
+
const failing = reports.filter(r => r.verdict === 'FAIL').length;
|
|
245
|
+
const skipped = reports.filter(r => r.verdict === 'NO_CASES').length;
|
|
246
|
+
lines.push('');
|
|
247
|
+
lines.push(`${total} skill(s): ${passing} PASS, ${failing} FAIL, ${skipped} SKIP.`);
|
|
248
|
+
return lines.join('\n');
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function buildConfusionMatrix(reports) {
|
|
252
|
+
const positive = {};
|
|
253
|
+
const negative = {
|
|
254
|
+
total: 0,
|
|
255
|
+
pass_boundary_target: 0,
|
|
256
|
+
coverage_gap: 0,
|
|
257
|
+
self_hit: 0,
|
|
258
|
+
off_boundary_hit: 0,
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
for (const report of reports) {
|
|
262
|
+
for (const c of report.cases) {
|
|
263
|
+
if (c.kind === 'positive') {
|
|
264
|
+
const expected = report.skill;
|
|
265
|
+
const actual = c.actual || 'NO_WINNER';
|
|
266
|
+
if (!positive[expected]) positive[expected] = {};
|
|
267
|
+
positive[expected][actual] = (positive[expected][actual] || 0) + 1;
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
negative.total++;
|
|
272
|
+
if (c.actual === null) negative.coverage_gap++;
|
|
273
|
+
else if (c.actual === report.skill) negative.self_hit++;
|
|
274
|
+
else if (c.verdict === 'PASS') negative.pass_boundary_target++;
|
|
275
|
+
else negative.off_boundary_hit++;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return { positive, negative };
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function renderConfusionMatrix(matrix) {
|
|
283
|
+
const lines = [];
|
|
284
|
+
lines.push('');
|
|
285
|
+
lines.push('POSITIVE-CASE CONFUSION MATRIX');
|
|
286
|
+
lines.push('Expected skill'.padEnd(28) + 'Actual winner'.padEnd(28) + 'Cases');
|
|
287
|
+
lines.push('-'.repeat(61));
|
|
288
|
+
|
|
289
|
+
const rows = [];
|
|
290
|
+
for (const expected of Object.keys(matrix.positive).sort()) {
|
|
291
|
+
for (const actual of Object.keys(matrix.positive[expected]).sort()) {
|
|
292
|
+
rows.push({ expected, actual, count: matrix.positive[expected][actual] });
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (rows.length === 0) {
|
|
297
|
+
lines.push('(no positive examples evaluated)');
|
|
298
|
+
} else {
|
|
299
|
+
for (const row of rows) {
|
|
300
|
+
lines.push(row.expected.padEnd(28) + row.actual.padEnd(28) + String(row.count));
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
lines.push('');
|
|
305
|
+
lines.push('NEGATIVE-CASE SUMMARY');
|
|
306
|
+
lines.push(` total: ${matrix.negative.total}`);
|
|
307
|
+
lines.push(` pass_boundary_target: ${matrix.negative.pass_boundary_target}`);
|
|
308
|
+
lines.push(` coverage_gap: ${matrix.negative.coverage_gap}`);
|
|
309
|
+
lines.push(` self_hit: ${matrix.negative.self_hit}`);
|
|
310
|
+
lines.push(` off_boundary_hit: ${matrix.negative.off_boundary_hit}`);
|
|
311
|
+
return lines.join('\n');
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function truncate(s, n) {
|
|
315
|
+
return s.length <= n ? s : s.slice(0, n - 1) + '\u2026';
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// ---------------------------------------------------------------------------
|
|
319
|
+
// CLI
|
|
320
|
+
// ---------------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
function argValue(args, flag) {
|
|
323
|
+
const i = args.indexOf(flag);
|
|
324
|
+
return i !== -1 && args[i + 1] ? args[i + 1] : null;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
function main() {
|
|
328
|
+
const args = process.argv.slice(2);
|
|
329
|
+
const outputJson = args.includes('--json');
|
|
330
|
+
const quiet = args.includes('--quiet');
|
|
331
|
+
const onlyAsserted = args.includes('--only-asserted');
|
|
332
|
+
const confusionMatrix = args.includes('--confusion-matrix');
|
|
333
|
+
const skillFilter = argValue(args, '--skill');
|
|
334
|
+
const manifestArg = argValue(args, '--manifest');
|
|
335
|
+
|
|
336
|
+
const manifestPath = manifestArg
|
|
337
|
+
? path.resolve(manifestArg)
|
|
338
|
+
: (fs.existsSync(DEFAULT_MANIFEST)
|
|
339
|
+
? DEFAULT_MANIFEST
|
|
340
|
+
: (fs.existsSync(SAMPLE_MANIFEST) ? SAMPLE_MANIFEST : PACKAGE_SAMPLE_MANIFEST));
|
|
341
|
+
|
|
342
|
+
if (!fs.existsSync(manifestPath)) {
|
|
343
|
+
console.error(`ERROR manifest not found: ${manifestPath}`);
|
|
344
|
+
console.error('Run `node scripts/generate-manifest.js --output skills.manifest.json` first, or pass --manifest <path>.');
|
|
345
|
+
process.exit(1);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
let manifest;
|
|
349
|
+
try {
|
|
350
|
+
manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
|
|
351
|
+
} catch (e) {
|
|
352
|
+
console.error(`ERROR cannot parse manifest: ${e.message}`);
|
|
353
|
+
process.exit(1);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const todayISO = new Date().toISOString().slice(0, 10);
|
|
357
|
+
const skills = Array.isArray(manifest.skills) ? manifest.skills : [];
|
|
358
|
+
let target = skills;
|
|
359
|
+
if (skillFilter) target = target.filter(s => s.name === skillFilter);
|
|
360
|
+
if (onlyAsserted) target = target.filter(s => s.health && s.health.routing_eval === 'present');
|
|
361
|
+
|
|
362
|
+
if (skillFilter && target.length === 0) {
|
|
363
|
+
console.error(`ERROR skill "${skillFilter}" not found in manifest`);
|
|
364
|
+
process.exit(1);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const reports = target.map(s => evaluateSkill(manifest, s, todayISO));
|
|
368
|
+
const matrix = confusionMatrix ? buildConfusionMatrix(reports) : null;
|
|
369
|
+
|
|
370
|
+
if (outputJson) {
|
|
371
|
+
process.stdout.write(JSON.stringify(matrix ? { reports, confusion_matrix: matrix } : { reports }, null, 2) + '\n');
|
|
372
|
+
} else if (!quiet) {
|
|
373
|
+
const text = confusionMatrix
|
|
374
|
+
? renderText(reports) + renderConfusionMatrix(matrix)
|
|
375
|
+
: renderText(reports);
|
|
376
|
+
process.stdout.write(text + '\n');
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
const anyFail = reports.some(r => r.verdict === 'FAIL');
|
|
380
|
+
process.exit(anyFail ? 1 : 0);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Allow require() for programmatic use by scripts/lint/check-routing-eval.js.
|
|
384
|
+
module.exports = {
|
|
385
|
+
buildConfusionMatrix,
|
|
386
|
+
evaluateSkill,
|
|
387
|
+
evaluatePositive,
|
|
388
|
+
evaluateNegative,
|
|
389
|
+
extractBoundaryTargets,
|
|
390
|
+
renderConfusionMatrix,
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
if (require.main === module) main();
|