@skill-graph/cli 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (330) hide show
  1. package/CHANGELOG.md +247 -0
  2. package/LICENSE +200 -0
  3. package/NOTICE +62 -0
  4. package/README.md +398 -0
  5. package/SKILL_GRAPH.md +443 -0
  6. package/bin/skill-graph.js +374 -0
  7. package/docs/ADOPTION.md +117 -0
  8. package/docs/CONFORMANCE.md +66 -0
  9. package/docs/PRIMER.md +384 -0
  10. package/docs/QUICKSTART-30MIN.md +333 -0
  11. package/docs/ROUTING-METRICS.md +120 -0
  12. package/docs/SKILL-MD-FORMAT-COMPATIBILITY.md +127 -0
  13. package/docs/SKILL_AUDIT_CHECKLIST.md +199 -0
  14. package/docs/SKILL_AUDIT_LOOP.md +195 -0
  15. package/docs/SKILL_METADATA_PROTOCOL.md +609 -0
  16. package/docs/_archived/marketplace-publication-priority-2026-05-18.md +239 -0
  17. package/docs/adr/0001-predicate-set.md +69 -0
  18. package/docs/adr/0002-json-ld-context.md +82 -0
  19. package/docs/adr/0003-ontoclean-rigidity-tags.md +65 -0
  20. package/docs/adr/0004-persistent-identifiers.md +74 -0
  21. package/docs/adr/0005-freshness-consolidation.md +70 -0
  22. package/docs/adr/0006-revise-predicate-rename.md +105 -0
  23. package/docs/adr/0007-audit-loop-cadence.md +99 -0
  24. package/docs/adr/0008-skill-surface-split-and-curation-policy.md +93 -0
  25. package/docs/category-consumers.md +168 -0
  26. package/docs/concept-map.md +194 -0
  27. package/docs/diagrams/drift-states.mmd +21 -0
  28. package/docs/diagrams/manifest-pipeline.mmd +25 -0
  29. package/docs/diagrams/routing-harness.mmd +41 -0
  30. package/docs/diagrams/starter-graph.mmd +53 -0
  31. package/docs/field-decision-guide.md +315 -0
  32. package/docs/field-rationale.md +211 -0
  33. package/docs/field-reference.generated.md +624 -0
  34. package/docs/field-reference.md +1426 -0
  35. package/docs/glossary.md +190 -0
  36. package/docs/head-noun-glossary.md +63 -0
  37. package/docs/images/audit-phases.png +0 -0
  38. package/docs/images/drift-states.png +0 -0
  39. package/docs/images/graded-mode.png +0 -0
  40. package/docs/images/manifest-pipeline.png +0 -0
  41. package/docs/images/routing-harness.png +0 -0
  42. package/docs/images/skill-anatomy.png +0 -0
  43. package/docs/images/starter-graph.png +0 -0
  44. package/docs/images/system-model.png +0 -0
  45. package/docs/integrations/github-actions.md +155 -0
  46. package/docs/manifest-field-mapping.md +443 -0
  47. package/docs/marketplace-publication-queue.generated.md +240 -0
  48. package/docs/marketplace-release-agent-prompt.md +82 -0
  49. package/docs/marketplace-skill-candidate-list.md +272 -0
  50. package/docs/marketplace-syndication.md +222 -0
  51. package/docs/migration-sample-review.md +155 -0
  52. package/docs/migrations/v4-to-v5.md +168 -0
  53. package/docs/migrations/v5-to-v6.md +221 -0
  54. package/docs/name-exceptions.yaml +37 -0
  55. package/docs/plans/marketplace-p1-public-migration-plan.md +41 -0
  56. package/docs/plans/multi-root-workspace.md +148 -0
  57. package/docs/plans/scripts-roadmap.md +107 -0
  58. package/docs/plans/v4-schema-bump.md +160 -0
  59. package/docs/plans/wave-2-extraction.md +122 -0
  60. package/docs/positioning-vs-marketplaces.md +175 -0
  61. package/docs/proposals/skill-audit-loop-positioning.md +160 -0
  62. package/docs/quality-doctrine.md +138 -0
  63. package/docs/recommended-skills.md +150 -0
  64. package/docs/research/skill-comprehension-eval-research.md +1830 -0
  65. package/docs/research/skill-retrieval-evidence.md +66 -0
  66. package/docs/skill-metadata-protocol.md +471 -0
  67. package/docs/skills-sh-maintainer-cleanup-request.md +80 -0
  68. package/examples/audits/a11y/findings.md +52 -0
  69. package/examples/audits/a11y/scorecard.md +21 -0
  70. package/examples/audits/a11y/verdict.md +44 -0
  71. package/examples/audits/debugging/findings.md +59 -0
  72. package/examples/audits/debugging/scorecard.md +22 -0
  73. package/examples/audits/debugging/verdict.md +33 -0
  74. package/examples/audits/documentation/findings.md +59 -0
  75. package/examples/audits/documentation/scorecard.md +22 -0
  76. package/examples/audits/documentation/verdict.md +33 -0
  77. package/examples/evals/a11y.json +140 -0
  78. package/examples/evals/api-design.json +52 -0
  79. package/examples/evals/code-review.json +52 -0
  80. package/examples/evals/data-modeling.json +52 -0
  81. package/examples/evals/database-migration.json +52 -0
  82. package/examples/evals/debugging.json +118 -0
  83. package/examples/evals/dependency-architecture.json +52 -0
  84. package/examples/evals/design-system-architecture.json +52 -0
  85. package/examples/evals/error-tracking.json +52 -0
  86. package/examples/evals/event-contract-design.json +52 -0
  87. package/examples/evals/form-ux-architecture.json +52 -0
  88. package/examples/evals/framework-fit-analysis.json +52 -0
  89. package/examples/evals/graph-audit.json +139 -0
  90. package/examples/evals/information-architecture.json +52 -0
  91. package/examples/evals/interaction-feedback.json +52 -0
  92. package/examples/evals/interaction-patterns.json +52 -0
  93. package/examples/evals/layout-composition.json +52 -0
  94. package/examples/evals/lint-overlay.json +117 -0
  95. package/examples/evals/microcopy.json +52 -0
  96. package/examples/evals/observability-modeling.json +52 -0
  97. package/examples/evals/pattern-recognition.json +96 -0
  98. package/examples/evals/performance-engineering.json +52 -0
  99. package/examples/evals/refactor.json +128 -0
  100. package/examples/evals/semiotics.json +52 -0
  101. package/examples/evals/skill-infrastructure.json +96 -0
  102. package/examples/evals/skill-router.json +140 -0
  103. package/examples/evals/skill-router.routing.json +113 -0
  104. package/examples/evals/system-interface-contracts.json +52 -0
  105. package/examples/evals/task-analysis.json +52 -0
  106. package/examples/evals/testing-strategy.json +118 -0
  107. package/examples/evals/type-safety.json +249 -0
  108. package/examples/evals/visual-design-foundations.json +52 -0
  109. package/examples/evals/webhook-integration.json +52 -0
  110. package/examples/exports/a11y.skill-md.md +80 -0
  111. package/examples/exports/debugging.skill-md.md +80 -0
  112. package/examples/exports/refactor.skill-md.md +78 -0
  113. package/examples/exports/testing-strategy.skill-md.md +81 -0
  114. package/examples/projects/markdown-static-site/README.md +115 -0
  115. package/examples/projects/markdown-static-site/skills/content-source-router/SKILL.md +131 -0
  116. package/examples/projects/markdown-static-site/skills/image-optimization-pipeline-config/SKILL.md +132 -0
  117. package/examples/projects/markdown-static-site/skills/link-rot-detection/SKILL.md +103 -0
  118. package/examples/projects/markdown-static-site/skills/markdown-post-frontmatter-validation/SKILL.md +133 -0
  119. package/examples/projects/markdown-static-site/skills/migrate-posts-to-v2-frontmatter/SKILL.md +140 -0
  120. package/examples/projects/saas-stripe-postgres/README.md +208 -0
  121. package/examples/projects/saas-stripe-postgres/db/migrations/0004_canonicalize_orders.sql +37 -0
  122. package/examples/projects/saas-stripe-postgres/db/schema.sql +112 -0
  123. package/examples/projects/saas-stripe-postgres/skills/migrate-orders-to-canonical-schema/SKILL.md +149 -0
  124. package/examples/projects/saas-stripe-postgres/skills/nextjs-server-action-validation/SKILL.md +154 -0
  125. package/examples/projects/saas-stripe-postgres/skills/payment-provider-router/SKILL.md +153 -0
  126. package/examples/projects/saas-stripe-postgres/skills/postgres-rls-pattern/SKILL.md +163 -0
  127. package/examples/projects/saas-stripe-postgres/skills/stripe-webhook-signature-verification/SKILL.md +137 -0
  128. package/examples/protocol/skill-metadata-template.md +301 -0
  129. package/examples/protocol/skills.manifest.sample.json +13245 -0
  130. package/examples/skill-metadata-template.md +317 -0
  131. package/examples/skills.manifest.sample.json +13519 -0
  132. package/examples/tests/v3-1-skos-fixture/SKILL.md +93 -0
  133. package/marketplace/README.md +17 -0
  134. package/marketplace/skills/a11y/SKILL.md +66 -0
  135. package/marketplace/skills/acid-fundamentals/SKILL.md +106 -0
  136. package/marketplace/skills/agent-engineering/SKILL.md +386 -0
  137. package/marketplace/skills/agent-eval-design/SKILL.md +55 -0
  138. package/marketplace/skills/ai-native-development/SKILL.md +294 -0
  139. package/marketplace/skills/api-design/SKILL.md +60 -0
  140. package/marketplace/skills/architecture-decision-records/SKILL.md +55 -0
  141. package/marketplace/skills/background-jobs/SKILL.md +265 -0
  142. package/marketplace/skills/bounded-context-mapping/SKILL.md +55 -0
  143. package/marketplace/skills/cap-theorem-tradeoffs/SKILL.md +127 -0
  144. package/marketplace/skills/client-server-boundary/SKILL.md +187 -0
  145. package/marketplace/skills/code-review/SKILL.md +120 -0
  146. package/marketplace/skills/color-system-design/SKILL.md +43 -0
  147. package/marketplace/skills/component-architecture/SKILL.md +126 -0
  148. package/marketplace/skills/compression/SKILL.md +112 -0
  149. package/marketplace/skills/conceptual-modeling/SKILL.md +181 -0
  150. package/marketplace/skills/connection-pooling/SKILL.md +105 -0
  151. package/marketplace/skills/constraint-awareness/SKILL.md +287 -0
  152. package/marketplace/skills/content-monitor/SKILL.md +209 -0
  153. package/marketplace/skills/context-engineering/SKILL.md +320 -0
  154. package/marketplace/skills/context-graph/SKILL.md +174 -0
  155. package/marketplace/skills/context-management/SKILL.md +174 -0
  156. package/marketplace/skills/context-window/SKILL.md +239 -0
  157. package/marketplace/skills/contract-testing/SKILL.md +120 -0
  158. package/marketplace/skills/cron-scheduling/SKILL.md +223 -0
  159. package/marketplace/skills/dark-mode-implementation/SKILL.md +47 -0
  160. package/marketplace/skills/data-modeling/SKILL.md +59 -0
  161. package/marketplace/skills/data-modeling-fundamentals/SKILL.md +117 -0
  162. package/marketplace/skills/database-migration/SKILL.md +429 -0
  163. package/marketplace/skills/debugging/SKILL.md +67 -0
  164. package/marketplace/skills/dependency-architecture/SKILL.md +58 -0
  165. package/marketplace/skills/design-module-composition/SKILL.md +43 -0
  166. package/marketplace/skills/design-system-architecture/SKILL.md +61 -0
  167. package/marketplace/skills/design-thinking/SKILL.md +44 -0
  168. package/marketplace/skills/diagnosis/SKILL.md +296 -0
  169. package/marketplace/skills/diff-analysis/SKILL.md +188 -0
  170. package/marketplace/skills/e2e-test-design/SKILL.md +113 -0
  171. package/marketplace/skills/entity-relationship-modeling/SKILL.md +218 -0
  172. package/marketplace/skills/epistemic-grounding/SKILL.md +112 -0
  173. package/marketplace/skills/error-boundary/SKILL.md +235 -0
  174. package/marketplace/skills/error-tracking/SKILL.md +261 -0
  175. package/marketplace/skills/eval-driven-development/SKILL.md +147 -0
  176. package/marketplace/skills/evaluation/SKILL.md +113 -0
  177. package/marketplace/skills/event-contract-design/SKILL.md +60 -0
  178. package/marketplace/skills/event-storming/SKILL.md +56 -0
  179. package/marketplace/skills/form-ux-architecture/SKILL.md +60 -0
  180. package/marketplace/skills/framework-fit-analysis/SKILL.md +59 -0
  181. package/marketplace/skills/frontend-architecture/SKILL.md +43 -0
  182. package/marketplace/skills/generative-ui/SKILL.md +118 -0
  183. package/marketplace/skills/graph-audit/SKILL.md +81 -0
  184. package/marketplace/skills/guardrails/SKILL.md +118 -0
  185. package/marketplace/skills/hooks-patterns/SKILL.md +185 -0
  186. package/marketplace/skills/http-semantics/SKILL.md +136 -0
  187. package/marketplace/skills/ideation/SKILL.md +41 -0
  188. package/marketplace/skills/indexing-strategy/SKILL.md +108 -0
  189. package/marketplace/skills/information-architecture/SKILL.md +59 -0
  190. package/marketplace/skills/integration-test-design/SKILL.md +111 -0
  191. package/marketplace/skills/intent-recognition/SKILL.md +136 -0
  192. package/marketplace/skills/interaction-feedback/SKILL.md +59 -0
  193. package/marketplace/skills/interaction-patterns/SKILL.md +59 -0
  194. package/marketplace/skills/journey-mapping/SKILL.md +41 -0
  195. package/marketplace/skills/keywords/SKILL.md +213 -0
  196. package/marketplace/skills/knowledge-modeling/SKILL.md +232 -0
  197. package/marketplace/skills/layout-composition/SKILL.md +59 -0
  198. package/marketplace/skills/linguistics/SKILL.md +429 -0
  199. package/marketplace/skills/lint-overlay/SKILL.md +76 -0
  200. package/marketplace/skills/mental-models/SKILL.md +126 -0
  201. package/marketplace/skills/merge-queue/SKILL.md +94 -0
  202. package/marketplace/skills/methodology/SKILL.md +317 -0
  203. package/marketplace/skills/microcopy/SKILL.md +232 -0
  204. package/marketplace/skills/middleware-patterns/SKILL.md +363 -0
  205. package/marketplace/skills/mobile-responsive-ux/SKILL.md +287 -0
  206. package/marketplace/skills/mutation-testing/SKILL.md +112 -0
  207. package/marketplace/skills/naming-conventions/SKILL.md +112 -0
  208. package/marketplace/skills/observability-modeling/SKILL.md +59 -0
  209. package/marketplace/skills/ontology-modeling/SKILL.md +67 -0
  210. package/marketplace/skills/owasp-security/SKILL.md +153 -0
  211. package/marketplace/skills/pattern-recognition/SKILL.md +472 -0
  212. package/marketplace/skills/performance-budgets/SKILL.md +185 -0
  213. package/marketplace/skills/performance-engineering/SKILL.md +58 -0
  214. package/marketplace/skills/performance-testing/SKILL.md +125 -0
  215. package/marketplace/skills/printify/SKILL.md +42 -0
  216. package/marketplace/skills/prioritization/SKILL.md +118 -0
  217. package/marketplace/skills/problem-framing/SKILL.md +41 -0
  218. package/marketplace/skills/problem-locating-solving/SKILL.md +203 -0
  219. package/marketplace/skills/project-knowledge-extraction/SKILL.md +54 -0
  220. package/marketplace/skills/prompt-craft/SKILL.md +134 -0
  221. package/marketplace/skills/prompt-injection-defense/SKILL.md +132 -0
  222. package/marketplace/skills/property-based-testing/SKILL.md +100 -0
  223. package/marketplace/skills/prototyping/SKILL.md +43 -0
  224. package/marketplace/skills/query-optimization/SKILL.md +144 -0
  225. package/marketplace/skills/real-time-updates/SKILL.md +324 -0
  226. package/marketplace/skills/ref-patterns/SKILL.md +284 -0
  227. package/marketplace/skills/refactor/SKILL.md +65 -0
  228. package/marketplace/skills/rendering-models/SKILL.md +142 -0
  229. package/marketplace/skills/replication-patterns/SKILL.md +110 -0
  230. package/marketplace/skills/research-synthesis/SKILL.md +41 -0
  231. package/marketplace/skills/route-handler-design/SKILL.md +347 -0
  232. package/marketplace/skills/schema-evolution/SKILL.md +140 -0
  233. package/marketplace/skills/security-fundamentals/SKILL.md +139 -0
  234. package/marketplace/skills/semantic-center/SKILL.md +194 -0
  235. package/marketplace/skills/semantic-relations/SKILL.md +250 -0
  236. package/marketplace/skills/semantics/SKILL.md +366 -0
  237. package/marketplace/skills/semiotics/SKILL.md +230 -0
  238. package/marketplace/skills/seo-strategy/SKILL.md +260 -0
  239. package/marketplace/skills/server-actions-design/SKILL.md +243 -0
  240. package/marketplace/skills/server-components-design/SKILL.md +190 -0
  241. package/marketplace/skills/sharding-strategy/SKILL.md +123 -0
  242. package/marketplace/skills/shopify/SKILL.md +42 -0
  243. package/marketplace/skills/skill-infrastructure/SKILL.md +320 -0
  244. package/marketplace/skills/skill-router/SKILL.md +71 -0
  245. package/marketplace/skills/skill-scaffold/SKILL.md +105 -0
  246. package/marketplace/skills/snapshot-testing/SKILL.md +120 -0
  247. package/marketplace/skills/spec-driven-development/SKILL.md +148 -0
  248. package/marketplace/skills/state-machine-modeling/SKILL.md +56 -0
  249. package/marketplace/skills/state-management/SKILL.md +134 -0
  250. package/marketplace/skills/streaming-architecture/SKILL.md +194 -0
  251. package/marketplace/skills/summarization/SKILL.md +156 -0
  252. package/marketplace/skills/suspense-patterns/SKILL.md +265 -0
  253. package/marketplace/skills/system-interface-contracts/SKILL.md +59 -0
  254. package/marketplace/skills/task-analysis/SKILL.md +201 -0
  255. package/marketplace/skills/taxonomy-design/SKILL.md +66 -0
  256. package/marketplace/skills/test-coverage-strategy/SKILL.md +108 -0
  257. package/marketplace/skills/test-doubles-design/SKILL.md +98 -0
  258. package/marketplace/skills/test-driven-development/SKILL.md +96 -0
  259. package/marketplace/skills/testing-strategy/SKILL.md +67 -0
  260. package/marketplace/skills/theme-system-design/SKILL.md +43 -0
  261. package/marketplace/skills/tool-call-flow/SKILL.md +229 -0
  262. package/marketplace/skills/tool-call-strategy/SKILL.md +292 -0
  263. package/marketplace/skills/transaction-isolation/SKILL.md +98 -0
  264. package/marketplace/skills/type-safety/SKILL.md +177 -0
  265. package/marketplace/skills/typography-system/SKILL.md +43 -0
  266. package/marketplace/skills/usability-testing/SKILL.md +43 -0
  267. package/marketplace/skills/user-research/SKILL.md +43 -0
  268. package/marketplace/skills/vercel-composition-patterns/SKILL.md +157 -0
  269. package/marketplace/skills/version-control/SKILL.md +233 -0
  270. package/marketplace/skills/visual-design-foundations/SKILL.md +59 -0
  271. package/marketplace/skills/visual-hierarchy/SKILL.md +43 -0
  272. package/marketplace/skills/webhook-integration/SKILL.md +331 -0
  273. package/marketplace/skills/writing-humanizer/SKILL.md +380 -0
  274. package/package.json +67 -0
  275. package/schemas/manifest.schema.json +811 -0
  276. package/schemas/manifest.v2.schema.json +164 -0
  277. package/schemas/manifest.v3.schema.json +758 -0
  278. package/schemas/manifest.v4.schema.json +755 -0
  279. package/schemas/manifest.v5.schema.json +755 -0
  280. package/schemas/manifest.v6.schema.json +811 -0
  281. package/schemas/skill.context.jsonld +279 -0
  282. package/schemas/skill.schema.json +919 -0
  283. package/schemas/skill.v2.schema.json +201 -0
  284. package/schemas/skill.v3.schema.json +827 -0
  285. package/schemas/skill.v4.schema.json +822 -0
  286. package/schemas/skill.v5.schema.json +830 -0
  287. package/schemas/skill.v6.schema.json +946 -0
  288. package/schemas/vocabulary/keywords.json +180 -0
  289. package/schemas/vocabulary/workspace_tags.json +23 -0
  290. package/scripts/__tests__/migrate-skill-v2-to-v3.test.js +161 -0
  291. package/scripts/__tests__/migrate-skill-v3-to-v4.test.js +158 -0
  292. package/scripts/__tests__/test-export-parser-drift.js +149 -0
  293. package/scripts/__tests__/test-marketplace-export.js +114 -0
  294. package/scripts/__tests__/test-router-paths.js +82 -0
  295. package/scripts/__tests__/test-stability-promotion.js +244 -0
  296. package/scripts/__tests__/test-v3-1-alias-contract.js +109 -0
  297. package/scripts/__tests__/test-v3-1-skos-runtime.js +116 -0
  298. package/scripts/backfill-schema-version.js +198 -0
  299. package/scripts/build-field-reference.js +160 -0
  300. package/scripts/build-retrieval-baseline.js +511 -0
  301. package/scripts/check-markdown-links.js +211 -0
  302. package/scripts/check-protocol-consistency.js +979 -0
  303. package/scripts/export-marketplace-skills.js +610 -0
  304. package/scripts/export-skill.js +374 -0
  305. package/scripts/generate-manifest.js +787 -0
  306. package/scripts/lib/alias-contract.js +83 -0
  307. package/scripts/lib/audit-prompt-builder.js +771 -0
  308. package/scripts/lib/mock-grader.js +134 -0
  309. package/scripts/lib/parse-frontmatter.js +429 -0
  310. package/scripts/lib/roots.js +119 -0
  311. package/scripts/lint/check-archetype-sections.js +185 -0
  312. package/scripts/lint/check-category-enum.js +83 -0
  313. package/scripts/lint/check-routing-eval.js +146 -0
  314. package/scripts/lint/check-routing-quality.js +211 -0
  315. package/scripts/lint/check-stability-promotion.js +220 -0
  316. package/scripts/lint/format-code-frame.js +206 -0
  317. package/scripts/marketplace-install.js +125 -0
  318. package/scripts/migrate-category-to-enum.js +169 -0
  319. package/scripts/migrate-skill-v2-to-v3.js +424 -0
  320. package/scripts/migrate-skill-v3-to-v4.js +200 -0
  321. package/scripts/migrate-skill-v5-to-v6.js +304 -0
  322. package/scripts/restructure-by-category.js +85 -0
  323. package/scripts/seed-publication-classification.js +282 -0
  324. package/scripts/skill-audit.js +893 -0
  325. package/scripts/skill-graph-drift.js +483 -0
  326. package/scripts/skill-graph-route.js +766 -0
  327. package/scripts/skill-graph-routing-eval.js +393 -0
  328. package/scripts/skill-lint.js +1317 -0
  329. package/scripts/skill-overlap.js +213 -0
  330. package/scripts/verify-skill-md-export.js +201 -0
@@ -0,0 +1,511 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * build-retrieval-baseline — Phase 0a evidence for the Skill Graph v5 plan.
4
+ *
5
+ * Produces `evals/retrieval-baseline-v0.json`: a 30-query corpus spanning
6
+ * UX, UI, Visual Design, Digital Design, Development, Code Quality, and
7
+ * AI Agent Web Dev, with each query's live router top-5 captured against
8
+ * the current manifest. The artifact is the before-state every later
9
+ * migration/authoring phase will be compared against.
10
+ *
11
+ * Per `docs/plans/skill-taxonomy-v5-and-gap-fill.md` § Phase 0:
12
+ * For each query record:
13
+ * (i) which existing skill(s) the router activates today
14
+ * (ii) whether a reviewer agrees
15
+ * (iii) which skill should have activated if it existed
16
+ *
17
+ * This builder records (i) live from the router, captures the agent's
18
+ * single-rater (ii)/(iii) assessment, and leaves a `human_reviewer_override`
19
+ * slot null for the user to fill on review. It also tags each query whose
20
+ * intent probes one of the four deferred-pending-eval skills:
21
+ * state-management, component-architecture, security-fundamentals,
22
+ * mental-models.
23
+ *
24
+ * Usage:
25
+ * node scripts/build-retrieval-baseline.js # writes file
26
+ * node scripts/build-retrieval-baseline.js --dry-run # stdout only
27
+ * node scripts/build-retrieval-baseline.js --manifest PATH # custom manifest
28
+ *
29
+ * Re-runnable: rerun after manifest changes to refresh router_top_5.
30
+ * Self-contained. Only Node built-ins + the existing routeSkills module.
31
+ */
32
+
33
+ 'use strict';
34
+
35
+ const fs = require('fs');
36
+ const path = require('path');
37
+ const { routeSkills } = require('./skill-graph-route');
38
+ const { workspaceRoot } = require('./lib/roots');
39
+
40
+ const REPO_ROOT = workspaceRoot();
41
+ const DEFAULT_MANIFEST = path.join(REPO_ROOT, 'examples', 'skills.manifest.sample.json');
42
+ const OUTPUT_PATH = path.join(REPO_ROOT, 'evals', 'retrieval-baseline-v0.json');
43
+ const TOP_K = 5;
44
+
45
+ // ---------------------------------------------------------------------------
46
+ // 30-query corpus
47
+ // ---------------------------------------------------------------------------
48
+ //
49
+ // Authoring rules:
50
+ // - Realistic ask language a developer/designer would actually type.
51
+ // - Topic assignment is the primary axis; one topic per query.
52
+ // - `deferred_skill_probe` is set when the query's intent specifically tests
53
+ // whether one of the four deferred skills (state-management,
54
+ // component-architecture, security-fundamentals, mental-models) is needed.
55
+ // At least 2 distinct probes per deferred skill so single-query noise
56
+ // cannot falsify the gap claim.
57
+ // - `agent_expectation_if_missing` records what skill (existing or
58
+ // hypothetical) the agent reviewer believes should top the result.
59
+ // This is a single-rater opinion; the user's `human_reviewer_override`
60
+ // supersedes it.
61
+
62
+ const QUERIES = [
63
+ // ----- UX (4) -----
64
+ {
65
+ id: 1,
66
+ topic: 'UX',
67
+ prompt: 'Whats the right empty state for a search results page with no matches',
68
+ agent_expectation_if_missing: 'form-ux-architecture',
69
+ deferred_skill_probe: null,
70
+ note: 'Empty-state design is a recurring UX micro-pattern; tests whether the library has a coherent home for it.',
71
+ },
72
+ {
73
+ id: 2,
74
+ topic: 'UX',
75
+ prompt: 'User keeps getting confused about how to undo their last action — what UX patterns help',
76
+ agent_expectation_if_missing: 'interaction-patterns',
77
+ deferred_skill_probe: null,
78
+ note: 'Undo affordance is a Norman-style mental-model query; partial probe for mental-models gap.',
79
+ },
80
+ {
81
+ id: 3,
82
+ topic: 'UX',
83
+ prompt: 'Should I use a modal or a side panel for editing this item',
84
+ agent_expectation_if_missing: 'interaction-patterns',
85
+ deferred_skill_probe: null,
86
+ note: 'Modal-vs-panel is a classic UX decision; tests whether interaction-patterns owns it cleanly.',
87
+ },
88
+ {
89
+ id: 4,
90
+ topic: 'UX',
91
+ prompt: 'How do I think about progressive disclosure when designing a complex form',
92
+ agent_expectation_if_missing: 'mental-models',
93
+ deferred_skill_probe: 'mental-models',
94
+ note: 'Progressive disclosure is a named UX mental model (Nielsen); probes mental-models gap.',
95
+ },
96
+
97
+ // ----- UI (4) -----
98
+ {
99
+ id: 5,
100
+ topic: 'UI',
101
+ prompt: 'I need to design a data table with sortable columns and selectable rows',
102
+ agent_expectation_if_missing: 'design-module-composition',
103
+ deferred_skill_probe: null,
104
+ note: 'Data-table is a high-frequency UI pattern; tests composition coverage.',
105
+ },
106
+ {
107
+ id: 6,
108
+ topic: 'UI',
109
+ prompt: 'Whats the right button hierarchy when I have three actions of similar importance',
110
+ agent_expectation_if_missing: 'visual-hierarchy',
111
+ deferred_skill_probe: null,
112
+ note: 'Button-hierarchy is a visual-design + UI decision; tests visual-hierarchy fit.',
113
+ },
114
+ {
115
+ id: 7,
116
+ topic: 'UI',
117
+ prompt: 'How should I show 50 plus items in a list — pagination, infinite scroll, or virtualization',
118
+ agent_expectation_if_missing: 'interaction-patterns',
119
+ deferred_skill_probe: null,
120
+ note: 'List rendering strategy spans UX + frontend perf; boundary query.',
121
+ },
122
+ {
123
+ id: 8,
124
+ topic: 'UI',
125
+ prompt: 'Tooltip vs popover — which one for an inline help icon',
126
+ agent_expectation_if_missing: 'interaction-patterns',
127
+ deferred_skill_probe: null,
128
+ note: 'Component-vocab decision; tests fine-grained UI coverage.',
129
+ },
130
+
131
+ // ----- Visual Design (4) -----
132
+ {
133
+ id: 9,
134
+ topic: 'Visual Design',
135
+ prompt: 'How do I pick a color palette that works in both light and dark mode',
136
+ agent_expectation_if_missing: 'color-system-design',
137
+ deferred_skill_probe: null,
138
+ note: 'Direct color-system query; tests router on existing color-system-design.',
139
+ },
140
+ {
141
+ id: 10,
142
+ topic: 'Visual Design',
143
+ prompt: 'Whats the right type scale for a dense data dashboard',
144
+ agent_expectation_if_missing: 'typography-system',
145
+ deferred_skill_probe: null,
146
+ note: 'Type-scale query; tests typography-system fit.',
147
+ },
148
+ {
149
+ id: 11,
150
+ topic: 'Visual Design',
151
+ prompt: 'My icons look inconsistent across the app — how do I systematize them',
152
+ agent_expectation_if_missing: 'design-system-architecture',
153
+ deferred_skill_probe: null,
154
+ note: 'Icon-system is a sub-discipline of design-system-architecture; coverage probe.',
155
+ },
156
+ {
157
+ id: 12,
158
+ topic: 'Visual Design',
159
+ prompt: 'How do I balance whitespace without making the page look empty',
160
+ agent_expectation_if_missing: 'layout-composition',
161
+ deferred_skill_probe: null,
162
+ note: 'Whitespace is a Tufte-style visual-design concept; tests layout-composition.',
163
+ },
164
+
165
+ // ----- Digital Design (3) -----
166
+ {
167
+ id: 13,
168
+ topic: 'Digital Design',
169
+ prompt: 'Im building a design system from scratch — what tokens do I need first',
170
+ agent_expectation_if_missing: 'design-system-architecture',
171
+ deferred_skill_probe: null,
172
+ note: 'Design-token entry-point query; tests whether existing skill answers it.',
173
+ },
174
+ {
175
+ id: 14,
176
+ topic: 'Digital Design',
177
+ prompt: 'How do I structure components so they compose well across products',
178
+ agent_expectation_if_missing: 'component-architecture',
179
+ deferred_skill_probe: 'component-architecture',
180
+ note: 'Direct probe for component-architecture deferred skill.',
181
+ },
182
+ {
183
+ id: 15,
184
+ topic: 'Digital Design',
185
+ prompt: 'Whats the difference between primitives, patterns, and templates in a design system',
186
+ agent_expectation_if_missing: 'design-module-composition',
187
+ deferred_skill_probe: 'component-architecture',
188
+ note: 'Brad Frost atomic-design vocabulary; secondary probe for component-architecture.',
189
+ },
190
+
191
+ // ----- Development (5) -----
192
+ {
193
+ id: 16,
194
+ topic: 'Development',
195
+ prompt: 'Where should I do form validation — client, server, or both',
196
+ agent_expectation_if_missing: 'form-ux-architecture',
197
+ deferred_skill_probe: null,
198
+ note: 'Cross-boundary dev question; tests whether form-ux-architecture or client-server-boundary surfaces.',
199
+ },
200
+ {
201
+ id: 17,
202
+ topic: 'Development',
203
+ prompt: 'I have a list of items that needs filtering, sorting, and pagination — whats the architecture',
204
+ agent_expectation_if_missing: 'state-management',
205
+ deferred_skill_probe: 'state-management',
206
+ note: 'Probe for state-management deferred skill; list-state is the canonical use case.',
207
+ },
208
+ {
209
+ id: 18,
210
+ topic: 'Development',
211
+ prompt: 'How do I structure a Next.js app for the App Router',
212
+ agent_expectation_if_missing: 'frontend-architecture',
213
+ deferred_skill_probe: null,
214
+ note: 'Next App Router query; tests rendering-models / frontend-architecture coverage.',
215
+ },
216
+ {
217
+ id: 19,
218
+ topic: 'Development',
219
+ prompt: 'Should this be a Server Component or a Client Component',
220
+ agent_expectation_if_missing: 'client-server-boundary',
221
+ deferred_skill_probe: null,
222
+ note: 'Wave 2 skill (client-server-boundary) was authored explicitly for this query.',
223
+ },
224
+ {
225
+ id: 20,
226
+ topic: 'Development',
227
+ prompt: 'How do I handle state that needs to live across multiple routes',
228
+ agent_expectation_if_missing: 'state-management',
229
+ deferred_skill_probe: 'state-management',
230
+ note: 'Second probe for state-management; cross-route state is the harder case.',
231
+ },
232
+
233
+ // ----- Code Quality (5) -----
234
+ {
235
+ id: 21,
236
+ topic: 'Code Quality',
237
+ prompt: 'How do I write tests for code that calls an external API',
238
+ agent_expectation_if_missing: 'test-doubles-design',
239
+ deferred_skill_probe: null,
240
+ note: 'Wave 4 skill (test-doubles-design) was authored for exactly this question.',
241
+ },
242
+ {
243
+ id: 22,
244
+ topic: 'Code Quality',
245
+ prompt: 'What should I do with this 800 line component thats getting hard to maintain',
246
+ agent_expectation_if_missing: 'refactor',
247
+ deferred_skill_probe: 'component-architecture',
248
+ note: 'Refactor query that also probes whether component-architecture would help here.',
249
+ },
250
+ {
251
+ id: 23,
252
+ topic: 'Code Quality',
253
+ prompt: 'How do I make sure my refactor doesnt break anything',
254
+ agent_expectation_if_missing: 'refactor',
255
+ deferred_skill_probe: null,
256
+ note: 'Tests refactor + testing-strategy coverage.',
257
+ },
258
+ {
259
+ id: 24,
260
+ topic: 'Code Quality',
261
+ prompt: 'How do I detect security vulnerabilities in my code before shipping',
262
+ agent_expectation_if_missing: 'security-fundamentals',
263
+ deferred_skill_probe: 'security-fundamentals',
264
+ note: 'Direct probe for security-fundamentals deferred skill.',
265
+ },
266
+ {
267
+ id: 25,
268
+ topic: 'Code Quality',
269
+ prompt: 'OWASP says I should validate input — where do I actually do that in a typed system',
270
+ agent_expectation_if_missing: 'security-fundamentals',
271
+ deferred_skill_probe: 'security-fundamentals',
272
+ note: 'Second probe for security-fundamentals; intersects with type-safety boundary.',
273
+ },
274
+
275
+ // ----- AI Agent Web Dev (5) -----
276
+ {
277
+ id: 26,
278
+ topic: 'AI Agent Web Dev',
279
+ prompt: 'Im building an agent that calls tools — how should I structure the tool definitions',
280
+ agent_expectation_if_missing: 'tool-call-flow',
281
+ deferred_skill_probe: null,
282
+ note: 'Wave 3 skill (tool-call-flow) was authored for this question.',
283
+ },
284
+ {
285
+ id: 27,
286
+ topic: 'AI Agent Web Dev',
287
+ prompt: 'The model passes my evals but users complain about quality — whats wrong',
288
+ agent_expectation_if_missing: 'eval-driven-development',
289
+ deferred_skill_probe: null,
290
+ note: 'Goodhart-style query; eval-driven-development addresses it.',
291
+ },
292
+ {
293
+ id: 28,
294
+ topic: 'AI Agent Web Dev',
295
+ prompt: 'How do I stream model output to the browser without blocking',
296
+ agent_expectation_if_missing: 'streaming-architecture',
297
+ deferred_skill_probe: null,
298
+ note: 'Wave 3 skill (streaming-architecture).',
299
+ },
300
+ {
301
+ id: 29,
302
+ topic: 'AI Agent Web Dev',
303
+ prompt: 'Someone could inject instructions into our agents context — how do I defend',
304
+ agent_expectation_if_missing: 'prompt-injection-defense',
305
+ deferred_skill_probe: null,
306
+ note: 'Wave 3 skill (prompt-injection-defense).',
307
+ },
308
+ {
309
+ id: 30,
310
+ topic: 'AI Agent Web Dev',
311
+ prompt: 'How do I think about race conditions when multiple tool calls run in parallel',
312
+ agent_expectation_if_missing: 'mental-models',
313
+ deferred_skill_probe: 'mental-models',
314
+ note: 'Concurrency mental-model query; second probe for mental-models gap.',
315
+ },
316
+ ];
317
+
318
+ // ---------------------------------------------------------------------------
319
+ // Builder
320
+ // ---------------------------------------------------------------------------
321
+
322
+ function loadManifest(manifestPath) {
323
+ const raw = fs.readFileSync(manifestPath, 'utf8');
324
+ return JSON.parse(raw);
325
+ }
326
+
327
+ function runQuery(manifest, prompt, todayISO) {
328
+ const result = routeSkills(manifest, {
329
+ query: prompt,
330
+ project: null,
331
+ maxResults: TOP_K,
332
+ minEvalState: 'unverified',
333
+ pathArg: null,
334
+ todayISO,
335
+ });
336
+
337
+ const selected = result.selected.map((entry, idx) => ({
338
+ rank: idx + 1,
339
+ skill: entry.skill.name,
340
+ score: entry.score,
341
+ role: entry.role,
342
+ reasons: entry.reasons,
343
+ }));
344
+
345
+ const coLoaded = (result.coLoaded || []).map(entry => ({
346
+ skill: entry.skill.name,
347
+ role: entry.role,
348
+ reason: entry.reason,
349
+ }));
350
+
351
+ return {
352
+ top_k: selected,
353
+ co_loaded: coLoaded,
354
+ no_match: selected.length === 0,
355
+ notes: result.notes || [],
356
+ };
357
+ }
358
+
359
+ function assessAgreement(query, routerResult) {
360
+ // Agent single-rater assessment. The human_reviewer_override stays null
361
+ // for the user to fill in on review.
362
+ const topPick = routerResult.top_k[0]?.skill || null;
363
+ const expected = query.agent_expectation_if_missing;
364
+
365
+ let agrees;
366
+ let routing_kind;
367
+ if (routerResult.no_match) {
368
+ agrees = false;
369
+ routing_kind = 'no_match';
370
+ } else if (topPick === expected) {
371
+ agrees = true;
372
+ routing_kind = 'top1_matches_expectation';
373
+ } else {
374
+ // Top-1 differs from agent expectation. Check if expectation appears in top-K.
375
+ const expectedInTopK = routerResult.top_k.some(e => e.skill === expected);
376
+ agrees = false;
377
+ routing_kind = expectedInTopK ? 'expected_in_top_k_but_not_top1' : 'expected_absent_from_top_k';
378
+ }
379
+
380
+ return {
381
+ agrees_with_top_router_pick: agrees,
382
+ routing_kind,
383
+ expected_skill: expected,
384
+ actual_top_skill: topPick,
385
+ };
386
+ }
387
+
388
+ function build({ manifestPath = DEFAULT_MANIFEST, dryRun = false } = {}) {
389
+ const manifest = loadManifest(manifestPath);
390
+ const todayISO = new Date().toISOString().slice(0, 10);
391
+
392
+ const queries = QUERIES.map(q => {
393
+ const routerResult = runQuery(manifest, q.prompt, todayISO);
394
+ const agentAssessment = assessAgreement(q, routerResult);
395
+ return {
396
+ id: q.id,
397
+ topic: q.topic,
398
+ prompt: q.prompt,
399
+ deferred_skill_probe: q.deferred_skill_probe,
400
+ authoring_note: q.note,
401
+ router_top_5: routerResult.top_k,
402
+ router_co_loaded: routerResult.co_loaded,
403
+ router_notes: routerResult.notes,
404
+ agent_assessment: {
405
+ ...agentAssessment,
406
+ note: q.note,
407
+ },
408
+ human_reviewer_override: null,
409
+ };
410
+ });
411
+
412
+ const topicDistribution = queries.reduce((acc, q) => {
413
+ acc[q.topic] = (acc[q.topic] || 0) + 1;
414
+ return acc;
415
+ }, {});
416
+
417
+ const deferredSkillProbes = queries.reduce((acc, q) => {
418
+ if (q.deferred_skill_probe) {
419
+ acc[q.deferred_skill_probe] = (acc[q.deferred_skill_probe] || 0) + 1;
420
+ }
421
+ return acc;
422
+ }, {});
423
+
424
+ const agreementRate = {
425
+ total: queries.length,
426
+ agent_agrees_with_top_pick: queries.filter(q => q.agent_assessment.agrees_with_top_router_pick).length,
427
+ no_match: queries.filter(q => q.router_top_5.length === 0).length,
428
+ expected_in_top_k_but_not_top1: queries.filter(q => q.agent_assessment.routing_kind === 'expected_in_top_k_but_not_top1').length,
429
+ expected_absent_from_top_k: queries.filter(q => q.agent_assessment.routing_kind === 'expected_absent_from_top_k').length,
430
+ };
431
+
432
+ const artifact = {
433
+ version: 0,
434
+ generated_at: new Date().toISOString(),
435
+ generator: 'scripts/build-retrieval-baseline.js',
436
+ manifest_used: path.relative(REPO_ROOT, manifestPath),
437
+ manifest_skill_count: Array.isArray(manifest.skills) ? manifest.skills.length : null,
438
+ total_queries: queries.length,
439
+ topic_distribution: topicDistribution,
440
+ deferred_skill_probes: deferredSkillProbes,
441
+ agent_agreement_summary: agreementRate,
442
+ method: {
443
+ top_k: TOP_K,
444
+ min_eval_state: 'unverified',
445
+ project: null,
446
+ path: null,
447
+ stopword_handling: 'router default (see scripts/skill-graph-route.js § STOPWORDS)',
448
+ },
449
+ reviewer_protocol: {
450
+ step_1: 'Read each query.prompt; decide what skill SHOULD have top-1.',
451
+ step_2: 'Compare to router_top_5[0].skill.',
452
+ step_3: 'Set human_reviewer_override.agrees_with_router (bool).',
453
+ step_4: 'Set human_reviewer_override.expected_skill (existing-skill name or hypothetical-skill name with leading `~`).',
454
+ step_5: 'For queries tagged deferred_skill_probe, decide whether the deferred skill would actually fill the gap or whether an existing skill suffices.',
455
+ step_6: 'Record per-query notes.',
456
+ step_7: 'Aggregate: compute agreement-rate, deferred-skill-needed-rate, and the migration-justification metrics in docs/plans/skill-taxonomy-v5-and-gap-fill.md § Verification gate item 9.',
457
+ },
458
+ queries,
459
+ };
460
+
461
+ if (dryRun) {
462
+ process.stdout.write(JSON.stringify(artifact, null, 2) + '\n');
463
+ return artifact;
464
+ }
465
+
466
+ fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
467
+ fs.writeFileSync(OUTPUT_PATH, JSON.stringify(artifact, null, 2) + '\n', 'utf8');
468
+ return artifact;
469
+ }
470
+
471
+ // ---------------------------------------------------------------------------
472
+ // CLI
473
+ // ---------------------------------------------------------------------------
474
+
475
+ function parseArgs(argv) {
476
+ const args = { manifestPath: DEFAULT_MANIFEST, dryRun: false };
477
+ for (let i = 0; i < argv.length; i += 1) {
478
+ const a = argv[i];
479
+ if (a === '--manifest') {
480
+ args.manifestPath = path.resolve(process.cwd(), argv[i + 1]);
481
+ i += 1;
482
+ } else if (a === '--dry-run') {
483
+ args.dryRun = true;
484
+ } else if (a === '--help' || a === '-h') {
485
+ process.stdout.write('Usage: node scripts/build-retrieval-baseline.js [--manifest PATH] [--dry-run]\n');
486
+ process.exit(0);
487
+ } else {
488
+ process.stderr.write(`Unknown arg: ${a}\n`);
489
+ process.exit(2);
490
+ }
491
+ }
492
+ return args;
493
+ }
494
+
495
+ function main() {
496
+ const args = parseArgs(process.argv.slice(2));
497
+ const artifact = build(args);
498
+ if (!args.dryRun) {
499
+ process.stdout.write(`Wrote ${path.relative(REPO_ROOT, OUTPUT_PATH)}\n`);
500
+ process.stdout.write(` total_queries: ${artifact.total_queries}\n`);
501
+ process.stdout.write(` topics: ${JSON.stringify(artifact.topic_distribution)}\n`);
502
+ process.stdout.write(` deferred_skill_probes: ${JSON.stringify(artifact.deferred_skill_probes)}\n`);
503
+ process.stdout.write(` agent_agreement_summary: ${JSON.stringify(artifact.agent_agreement_summary)}\n`);
504
+ }
505
+ }
506
+
507
+ if (require.main === module) {
508
+ main();
509
+ }
510
+
511
+ module.exports = { build, QUERIES };