agentic-qe 3.4.1 โ†’ 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (496) hide show
  1. package/.claude/skills/.validation/README.md +111 -0
  2. package/.claude/skills/.validation/examples/chaos-engineering-output.example.json +530 -0
  3. package/.claude/skills/.validation/examples/performance-testing-output.example.json +252 -0
  4. package/.claude/skills/.validation/examples/security-testing-output.example.json +413 -0
  5. package/.claude/skills/.validation/examples/testability-scoring-output.example.json +350 -0
  6. package/.claude/skills/.validation/schemas/skill-eval.schema.json +452 -0
  7. package/.claude/skills/.validation/schemas/skill-frontmatter.schema.json +341 -0
  8. package/.claude/skills/.validation/schemas/skill-output-meta.schema.json +199 -0
  9. package/.claude/skills/.validation/schemas/skill-output.template.json +610 -0
  10. package/.claude/skills/.validation/skill-validation-mcp-integration.md +250 -0
  11. package/.claude/skills/.validation/templates/eval.template.yaml +366 -0
  12. package/.claude/skills/.validation/templates/schemas/output.json +145 -0
  13. package/.claude/skills/.validation/templates/security-testing-eval.template.yaml +725 -0
  14. package/.claude/skills/.validation/templates/skill-frontmatter.example.yaml +225 -0
  15. package/.claude/skills/.validation/templates/validate.template.sh +1060 -0
  16. package/.claude/skills/.validation/templates/validator-lib.sh +1031 -0
  17. package/.claude/skills/.validation/test-data/invalid-output.json +5 -0
  18. package/.claude/skills/.validation/test-data/minimal-output.json +9 -0
  19. package/.claude/skills/.validation/test-data/sample-output.json +73 -0
  20. package/.claude/skills/TRUST-TIERS.md +158 -0
  21. package/.claude/skills/a11y-ally/SKILL.md +1664 -1658
  22. package/.claude/skills/a11y-ally/evals/a11y-ally.yaml +376 -0
  23. package/.claude/skills/a11y-ally/schemas/output.json +549 -0
  24. package/.claude/skills/a11y-ally/scripts/validate.sh +690 -0
  25. package/.claude/skills/accessibility-testing/SKILL.md +8 -1
  26. package/.claude/skills/accessibility-testing/evals/accessibility-testing.yaml +719 -0
  27. package/.claude/skills/accessibility-testing/schemas/output.json +776 -0
  28. package/.claude/skills/accessibility-testing/scripts/validate.sh +943 -0
  29. package/.claude/skills/accessibility-testing/test-data/sample-output.json +191 -0
  30. package/.claude/skills/agentic-quality-engineering/SKILL.md +4 -0
  31. package/.claude/skills/agentic-quality-engineering/schemas/output.json +577 -0
  32. package/.claude/skills/api-testing-patterns/SKILL.md +5 -0
  33. package/.claude/skills/api-testing-patterns/evals/api-testing-patterns.yaml +696 -0
  34. package/.claude/skills/api-testing-patterns/schemas/output.json +845 -0
  35. package/.claude/skills/api-testing-patterns/scripts/validate.sh +931 -0
  36. package/.claude/skills/aqe-v2-v3-migration/schemas/output.json +513 -0
  37. package/.claude/skills/aqe-v2-v3-migration/skill.md +4 -0
  38. package/.claude/skills/brutal-honesty-review/SKILL.md +5 -0
  39. package/.claude/skills/brutal-honesty-review/schemas/output.json +291 -0
  40. package/.claude/skills/brutal-honesty-review/scripts/validate.sh +130 -0
  41. package/.claude/skills/bug-reporting-excellence/SKILL.md +5 -0
  42. package/.claude/skills/bug-reporting-excellence/schemas/output.json +288 -0
  43. package/.claude/skills/bug-reporting-excellence/scripts/validate.sh +108 -0
  44. package/.claude/skills/chaos-engineering-resilience/SKILL.md +5 -0
  45. package/.claude/skills/chaos-engineering-resilience/evals/chaos-engineering-resilience.yaml +761 -0
  46. package/.claude/skills/chaos-engineering-resilience/schemas/output.json +1205 -0
  47. package/.claude/skills/chaos-engineering-resilience/scripts/validate.sh +1107 -0
  48. package/.claude/skills/cicd-pipeline-qe-orchestrator/SKILL.md +6 -0
  49. package/.claude/skills/cicd-pipeline-qe-orchestrator/evals/cicd-pipeline-qe-orchestrator.yaml +157 -0
  50. package/.claude/skills/cicd-pipeline-qe-orchestrator/schemas/output.json +542 -0
  51. package/.claude/skills/cicd-pipeline-qe-orchestrator/scripts/validate.sh +310 -0
  52. package/.claude/skills/code-review-quality/SKILL.md +5 -0
  53. package/.claude/skills/code-review-quality/schemas/output.json +264 -0
  54. package/.claude/skills/code-review-quality/scripts/validate.sh +113 -0
  55. package/.claude/skills/compatibility-testing/evals/compatibility-testing.yaml +410 -0
  56. package/.claude/skills/compatibility-testing/schemas/output.json +551 -0
  57. package/.claude/skills/compatibility-testing/scripts/validate.sh +605 -0
  58. package/.claude/skills/compliance-testing/SKILL.md +7 -0
  59. package/.claude/skills/compliance-testing/evals/compliance-testing.yaml +1107 -0
  60. package/.claude/skills/compliance-testing/schemas/output.json +845 -0
  61. package/.claude/skills/compliance-testing/scripts/validate.sh +888 -0
  62. package/.claude/skills/consultancy-practices/SKILL.md +4 -0
  63. package/.claude/skills/consultancy-practices/schemas/output.json +282 -0
  64. package/.claude/skills/context-driven-testing/SKILL.md +3 -0
  65. package/.claude/skills/contract-testing/SKILL.md +5 -0
  66. package/.claude/skills/contract-testing/evals/contract-testing.yaml +748 -0
  67. package/.claude/skills/contract-testing/schemas/output.json +638 -0
  68. package/.claude/skills/contract-testing/scripts/validate.sh +1033 -0
  69. package/.claude/skills/database-testing/SKILL.md +5 -0
  70. package/.claude/skills/database-testing/evals/database-testing.yaml +968 -0
  71. package/.claude/skills/database-testing/schemas/output.json +1446 -0
  72. package/.claude/skills/database-testing/scripts/validate.sh +1099 -0
  73. package/.claude/skills/exploratory-testing-advanced/SKILL.md +3 -0
  74. package/.claude/skills/holistic-testing-pact/SKILL.md +3 -0
  75. package/.claude/skills/localization-testing/SKILL.md +6 -0
  76. package/.claude/skills/localization-testing/evals/localization-testing.yaml +544 -0
  77. package/.claude/skills/localization-testing/schemas/output.json +325 -0
  78. package/.claude/skills/localization-testing/scripts/validate.sh +131 -0
  79. package/.claude/skills/mobile-testing/SKILL.md +6 -0
  80. package/.claude/skills/mobile-testing/evals/mobile-testing.yaml +537 -0
  81. package/.claude/skills/mobile-testing/schemas/output.json +318 -0
  82. package/.claude/skills/mobile-testing/scripts/validate.sh +127 -0
  83. package/.claude/skills/mutation-testing/SKILL.md +8 -1
  84. package/.claude/skills/mutation-testing/evals/mutation-testing.yaml +652 -0
  85. package/.claude/skills/mutation-testing/schemas/output.json +707 -0
  86. package/.claude/skills/mutation-testing/scripts/validate.sh +992 -0
  87. package/.claude/skills/mutation-testing/test-data/sample-output.json +295 -0
  88. package/.claude/skills/n8n-expression-testing/SKILL.md +6 -0
  89. package/.claude/skills/n8n-expression-testing/evals/n8n-expression-testing.yaml +450 -0
  90. package/.claude/skills/n8n-expression-testing/schemas/output.json +369 -0
  91. package/.claude/skills/n8n-expression-testing/scripts/validate.sh +162 -0
  92. package/.claude/skills/n8n-integration-testing-patterns/SKILL.md +6 -0
  93. package/.claude/skills/n8n-integration-testing-patterns/evals/n8n-integration-testing-patterns.yaml +522 -0
  94. package/.claude/skills/n8n-integration-testing-patterns/schemas/output.json +291 -0
  95. package/.claude/skills/n8n-integration-testing-patterns/scripts/validate.sh +104 -0
  96. package/.claude/skills/n8n-security-testing/SKILL.md +6 -0
  97. package/.claude/skills/n8n-security-testing/evals/n8n-security-testing.yaml +493 -0
  98. package/.claude/skills/n8n-security-testing/schemas/output.json +293 -0
  99. package/.claude/skills/n8n-security-testing/scripts/validate.sh +120 -0
  100. package/.claude/skills/n8n-trigger-testing-strategies/SKILL.md +6 -0
  101. package/.claude/skills/n8n-trigger-testing-strategies/evals/n8n-trigger-testing-strategies.yaml +500 -0
  102. package/.claude/skills/n8n-trigger-testing-strategies/schemas/output.json +295 -0
  103. package/.claude/skills/n8n-trigger-testing-strategies/scripts/validate.sh +121 -0
  104. package/.claude/skills/n8n-workflow-testing-fundamentals/SKILL.md +6 -0
  105. package/.claude/skills/n8n-workflow-testing-fundamentals/evals/n8n-workflow-testing-fundamentals.yaml +497 -0
  106. package/.claude/skills/n8n-workflow-testing-fundamentals/schemas/output.json +254 -0
  107. package/.claude/skills/n8n-workflow-testing-fundamentals/scripts/validate.sh +126 -0
  108. package/.claude/skills/performance-analysis/SKILL.md +6 -0
  109. package/.claude/skills/performance-analysis/evals/performance-analysis.yaml +144 -0
  110. package/.claude/skills/performance-analysis/schemas/output.json +588 -0
  111. package/.claude/skills/performance-analysis/scripts/validate.sh +63 -0
  112. package/.claude/skills/performance-testing/SKILL.md +5 -0
  113. package/.claude/skills/performance-testing/evals/performance-testing.yaml +772 -0
  114. package/.claude/skills/performance-testing/schemas/output.json +1184 -0
  115. package/.claude/skills/performance-testing/scripts/validate.sh +1090 -0
  116. package/.claude/skills/qcsd-ideation-swarm/SKILL.md +1759 -1753
  117. package/.claude/skills/qcsd-ideation-swarm/evals/qcsd-ideation-swarm.yaml +138 -0
  118. package/.claude/skills/qcsd-ideation-swarm/schemas/output.json +568 -0
  119. package/.claude/skills/qcsd-ideation-swarm/scripts/validate.sh +56 -0
  120. package/.claude/skills/qe-chaos-resilience/SKILL.md +6 -0
  121. package/.claude/skills/qe-chaos-resilience/evals/qe-chaos-resilience.yaml +443 -0
  122. package/.claude/skills/qe-chaos-resilience/schemas/output.json +314 -0
  123. package/.claude/skills/qe-chaos-resilience/scripts/validate.sh +401 -0
  124. package/.claude/skills/qe-code-intelligence/SKILL.md +6 -0
  125. package/.claude/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +459 -0
  126. package/.claude/skills/qe-code-intelligence/schemas/output.json +315 -0
  127. package/.claude/skills/qe-code-intelligence/scripts/validate.sh +408 -0
  128. package/.claude/skills/qe-contract-testing/SKILL.md +6 -0
  129. package/.claude/skills/qe-contract-testing/evals/qe-contract-testing.yaml +513 -0
  130. package/.claude/skills/qe-contract-testing/schemas/output.json +295 -0
  131. package/.claude/skills/qe-contract-testing/scripts/validate.sh +421 -0
  132. package/.claude/skills/qe-coverage-analysis/SKILL.md +6 -0
  133. package/.claude/skills/qe-coverage-analysis/evals/qe-coverage-analysis.yaml +494 -0
  134. package/.claude/skills/qe-coverage-analysis/schemas/output.json +286 -0
  135. package/.claude/skills/qe-coverage-analysis/scripts/validate.sh +453 -0
  136. package/.claude/skills/qe-defect-intelligence/SKILL.md +6 -0
  137. package/.claude/skills/qe-defect-intelligence/evals/qe-defect-intelligence.yaml +511 -0
  138. package/.claude/skills/qe-defect-intelligence/schemas/output.json +283 -0
  139. package/.claude/skills/qe-defect-intelligence/scripts/validate.sh +466 -0
  140. package/.claude/skills/qe-iterative-loop/SKILL.md +445 -440
  141. package/.claude/skills/qe-iterative-loop/schemas/output.json +264 -0
  142. package/.claude/skills/qe-iterative-loop/scripts/validate.sh +474 -0
  143. package/.claude/skills/qe-learning-optimization/SKILL.md +6 -0
  144. package/.claude/skills/qe-learning-optimization/evals/qe-learning-optimization.yaml +144 -0
  145. package/.claude/skills/qe-learning-optimization/schemas/output.json +288 -0
  146. package/.claude/skills/qe-learning-optimization/scripts/validate.sh +56 -0
  147. package/.claude/skills/qe-quality-assessment/SKILL.md +6 -0
  148. package/.claude/skills/qe-quality-assessment/evals/qe-quality-assessment.yaml +506 -0
  149. package/.claude/skills/qe-quality-assessment/schemas/output.json +550 -0
  150. package/.claude/skills/qe-quality-assessment/scripts/validate.sh +352 -0
  151. package/.claude/skills/qe-requirements-validation/SKILL.md +6 -0
  152. package/.claude/skills/qe-requirements-validation/evals/qe-requirements-validation.yaml +598 -0
  153. package/.claude/skills/qe-requirements-validation/schemas/output.json +587 -0
  154. package/.claude/skills/qe-requirements-validation/scripts/validate.sh +275 -0
  155. package/.claude/skills/qe-security-compliance/SKILL.md +6 -0
  156. package/.claude/skills/qe-security-compliance/evals/qe-security-compliance.yaml +595 -0
  157. package/.claude/skills/qe-security-compliance/schemas/output.json +498 -0
  158. package/.claude/skills/qe-security-compliance/scripts/validate.sh +305 -0
  159. package/.claude/skills/qe-test-execution/SKILL.md +6 -0
  160. package/.claude/skills/qe-test-execution/evals/qe-test-execution.yaml +607 -0
  161. package/.claude/skills/qe-test-execution/schemas/output.json +529 -0
  162. package/.claude/skills/qe-test-execution/scripts/validate.sh +323 -0
  163. package/.claude/skills/qe-test-generation/SKILL.md +6 -0
  164. package/.claude/skills/qe-test-generation/evals/qe-test-generation.yaml +148 -0
  165. package/.claude/skills/qe-test-generation/schemas/output.json +439 -0
  166. package/.claude/skills/qe-test-generation/scripts/validate.sh +56 -0
  167. package/.claude/skills/qe-visual-accessibility/SKILL.md +6 -0
  168. package/.claude/skills/qe-visual-accessibility/evals/qe-visual-accessibility.yaml +142 -0
  169. package/.claude/skills/qe-visual-accessibility/schemas/output.json +491 -0
  170. package/.claude/skills/qe-visual-accessibility/scripts/validate.sh +56 -0
  171. package/.claude/skills/quality-metrics/SKILL.md +6 -0
  172. package/.claude/skills/quality-metrics/evals/quality-metrics.yaml +494 -0
  173. package/.claude/skills/quality-metrics/schemas/output.json +403 -0
  174. package/.claude/skills/quality-metrics/scripts/validate.sh +434 -0
  175. package/.claude/skills/refactoring-patterns/SKILL.md +5 -0
  176. package/.claude/skills/refactoring-patterns/schemas/output.json +475 -0
  177. package/.claude/skills/refactoring-patterns/scripts/validate.sh +241 -0
  178. package/.claude/skills/regression-testing/SKILL.md +6 -0
  179. package/.claude/skills/regression-testing/evals/regression-testing.yaml +504 -0
  180. package/.claude/skills/regression-testing/schemas/output.json +311 -0
  181. package/.claude/skills/regression-testing/scripts/validate.sh +130 -0
  182. package/.claude/skills/risk-based-testing/SKILL.md +6 -0
  183. package/.claude/skills/risk-based-testing/evals/risk-based-testing.yaml +141 -0
  184. package/.claude/skills/risk-based-testing/schemas/output.json +480 -0
  185. package/.claude/skills/risk-based-testing/scripts/validate.sh +56 -0
  186. package/.claude/skills/security-testing/SKILL.md +5 -0
  187. package/.claude/skills/security-testing/evals/security-testing.yaml +789 -0
  188. package/.claude/skills/security-testing/schemas/output.json +879 -0
  189. package/.claude/skills/security-testing/scripts/validate.sh +967 -0
  190. package/.claude/skills/security-visual-testing/SKILL.md +223 -217
  191. package/.claude/skills/security-visual-testing/evals/security-visual-testing.yaml +163 -0
  192. package/.claude/skills/security-visual-testing/schemas/output.json +486 -0
  193. package/.claude/skills/security-visual-testing/scripts/validate.sh +748 -0
  194. package/.claude/skills/sherlock-review/SKILL.md +5 -0
  195. package/.claude/skills/sherlock-review/schemas/output.json +297 -0
  196. package/.claude/skills/sherlock-review/scripts/validate.sh +115 -0
  197. package/.claude/skills/shift-left-testing/SKILL.md +6 -0
  198. package/.claude/skills/shift-left-testing/evals/shift-left-testing.yaml +145 -0
  199. package/.claude/skills/shift-left-testing/schemas/output.json +459 -0
  200. package/.claude/skills/shift-left-testing/scripts/validate.sh +56 -0
  201. package/.claude/skills/shift-right-testing/SKILL.md +6 -0
  202. package/.claude/skills/shift-right-testing/evals/shift-right-testing.yaml +147 -0
  203. package/.claude/skills/shift-right-testing/schemas/output.json +418 -0
  204. package/.claude/skills/shift-right-testing/scripts/validate.sh +56 -0
  205. package/.claude/skills/six-thinking-hats/SKILL.md +3 -0
  206. package/.claude/skills/tdd-london-chicago/SKILL.md +5 -0
  207. package/.claude/skills/tdd-london-chicago/schemas/output.json +444 -0
  208. package/.claude/skills/tdd-london-chicago/scripts/validate.sh +214 -0
  209. package/.claude/skills/technical-writing/SKILL.md +4 -0
  210. package/.claude/skills/technical-writing/schemas/output.json +268 -0
  211. package/.claude/skills/test-automation-strategy/SKILL.md +6 -0
  212. package/.claude/skills/test-automation-strategy/evals/test-automation-strategy.yaml +148 -0
  213. package/.claude/skills/test-automation-strategy/schemas/output.json +444 -0
  214. package/.claude/skills/test-automation-strategy/scripts/validate.sh +195 -0
  215. package/.claude/skills/test-data-management/SKILL.md +6 -0
  216. package/.claude/skills/test-data-management/evals/test-data-management.yaml +504 -0
  217. package/.claude/skills/test-data-management/schemas/output.json +284 -0
  218. package/.claude/skills/test-data-management/scripts/validate.sh +137 -0
  219. package/.claude/skills/test-design-techniques/SKILL.md +6 -0
  220. package/.claude/skills/test-design-techniques/evals/test-design-techniques.yaml +142 -0
  221. package/.claude/skills/test-design-techniques/schemas/output.json +295 -0
  222. package/.claude/skills/test-design-techniques/scripts/validate.sh +91 -0
  223. package/.claude/skills/test-environment-management/SKILL.md +4 -0
  224. package/.claude/skills/test-environment-management/schemas/output.json +310 -0
  225. package/.claude/skills/test-reporting-analytics/SKILL.md +6 -0
  226. package/.claude/skills/test-reporting-analytics/evals/test-reporting-analytics.yaml +155 -0
  227. package/.claude/skills/test-reporting-analytics/schemas/output.json +329 -0
  228. package/.claude/skills/test-reporting-analytics/scripts/validate.sh +77 -0
  229. package/.claude/skills/testability-scoring/SKILL.md +5 -0
  230. package/.claude/skills/testability-scoring/evals/testability-scoring.yaml +814 -0
  231. package/.claude/skills/testability-scoring/schemas/output.json +606 -0
  232. package/.claude/skills/testability-scoring/scripts/validate.sh +891 -0
  233. package/.claude/skills/trust-tier-manifest.json +2333 -0
  234. package/.claude/skills/verification-quality/SKILL.md +6 -0
  235. package/.claude/skills/verification-quality/evals/verification-quality.yaml +150 -0
  236. package/.claude/skills/verification-quality/schemas/output.json +432 -0
  237. package/.claude/skills/verification-quality/scripts/validate.sh +77 -0
  238. package/.claude/skills/visual-testing-advanced/SKILL.md +6 -0
  239. package/.claude/skills/visual-testing-advanced/evals/visual-testing-advanced.yaml +154 -0
  240. package/.claude/skills/visual-testing-advanced/schemas/output.json +294 -0
  241. package/.claude/skills/visual-testing-advanced/scripts/validate.sh +77 -0
  242. package/.claude/skills/xp-practices/SKILL.md +3 -0
  243. package/README.md +39 -2
  244. package/package.json +4 -1
  245. package/scripts/run-skill-eval.ts +1097 -0
  246. package/scripts/test-schema-validation.js +301 -0
  247. package/scripts/update-skill-manifest.ts +751 -0
  248. package/scripts/validate-skill-frontmatter.js +141 -0
  249. package/v3/CHANGELOG.md +21 -0
  250. package/v3/README.md +43 -3
  251. package/v3/assets/agents/v3/qe-test-idea-rewriter.md +375 -375
  252. package/v3/assets/skills/.validation/README.md +111 -0
  253. package/v3/assets/skills/.validation/examples/chaos-engineering-output.example.json +530 -0
  254. package/v3/assets/skills/.validation/examples/performance-testing-output.example.json +252 -0
  255. package/v3/assets/skills/.validation/examples/security-testing-output.example.json +413 -0
  256. package/v3/assets/skills/.validation/examples/testability-scoring-output.example.json +350 -0
  257. package/v3/assets/skills/.validation/schemas/skill-eval.schema.json +452 -0
  258. package/v3/assets/skills/.validation/schemas/skill-frontmatter.schema.json +341 -0
  259. package/v3/assets/skills/.validation/schemas/skill-output-meta.schema.json +199 -0
  260. package/v3/assets/skills/.validation/schemas/skill-output.template.json +610 -0
  261. package/v3/assets/skills/.validation/skill-validation-mcp-integration.md +250 -0
  262. package/v3/assets/skills/.validation/templates/eval.template.yaml +366 -0
  263. package/v3/assets/skills/.validation/templates/schemas/output.json +145 -0
  264. package/v3/assets/skills/.validation/templates/security-testing-eval.template.yaml +725 -0
  265. package/v3/assets/skills/.validation/templates/skill-frontmatter.example.yaml +225 -0
  266. package/v3/assets/skills/.validation/templates/validate.template.sh +1060 -0
  267. package/v3/assets/skills/.validation/templates/validator-lib.sh +1031 -0
  268. package/v3/assets/skills/.validation/test-data/invalid-output.json +5 -0
  269. package/v3/assets/skills/.validation/test-data/minimal-output.json +9 -0
  270. package/v3/assets/skills/.validation/test-data/sample-output.json +73 -0
  271. package/v3/assets/skills/accessibility-testing/SKILL.md +8 -1
  272. package/v3/assets/skills/accessibility-testing/evals/accessibility-testing.yaml +719 -0
  273. package/v3/assets/skills/accessibility-testing/schemas/output.json +776 -0
  274. package/v3/assets/skills/accessibility-testing/scripts/validate.sh +943 -0
  275. package/v3/assets/skills/accessibility-testing/test-data/sample-output.json +191 -0
  276. package/v3/assets/skills/agentic-quality-engineering/SKILL.md +4 -0
  277. package/v3/assets/skills/agentic-quality-engineering/schemas/output.json +577 -0
  278. package/v3/assets/skills/api-testing-patterns/SKILL.md +5 -0
  279. package/v3/assets/skills/api-testing-patterns/evals/api-testing-patterns.yaml +696 -0
  280. package/v3/assets/skills/api-testing-patterns/schemas/output.json +845 -0
  281. package/v3/assets/skills/api-testing-patterns/scripts/validate.sh +931 -0
  282. package/v3/assets/skills/aqe-v2-v3-migration/schemas/output.json +513 -0
  283. package/v3/assets/skills/aqe-v2-v3-migration/skill.md +20 -16
  284. package/v3/assets/skills/bug-reporting-excellence/SKILL.md +5 -0
  285. package/v3/assets/skills/bug-reporting-excellence/schemas/output.json +288 -0
  286. package/v3/assets/skills/bug-reporting-excellence/scripts/validate.sh +108 -0
  287. package/v3/assets/skills/chaos-engineering-resilience/SKILL.md +5 -0
  288. package/v3/assets/skills/chaos-engineering-resilience/evals/chaos-engineering-resilience.yaml +761 -0
  289. package/v3/assets/skills/chaos-engineering-resilience/schemas/output.json +1205 -0
  290. package/v3/assets/skills/chaos-engineering-resilience/scripts/validate.sh +1107 -0
  291. package/v3/assets/skills/code-review-quality/SKILL.md +5 -0
  292. package/v3/assets/skills/code-review-quality/schemas/output.json +264 -0
  293. package/v3/assets/skills/code-review-quality/scripts/validate.sh +113 -0
  294. package/v3/assets/skills/compatibility-testing/evals/compatibility-testing.yaml +410 -0
  295. package/v3/assets/skills/compatibility-testing/schemas/output.json +551 -0
  296. package/v3/assets/skills/compatibility-testing/scripts/validate.sh +605 -0
  297. package/v3/assets/skills/compliance-testing/SKILL.md +7 -0
  298. package/v3/assets/skills/compliance-testing/evals/compliance-testing.yaml +1107 -0
  299. package/v3/assets/skills/compliance-testing/schemas/output.json +845 -0
  300. package/v3/assets/skills/compliance-testing/scripts/validate.sh +888 -0
  301. package/v3/assets/skills/consultancy-practices/SKILL.md +4 -0
  302. package/v3/assets/skills/consultancy-practices/schemas/output.json +282 -0
  303. package/v3/assets/skills/context-driven-testing/SKILL.md +3 -0
  304. package/v3/assets/skills/contract-testing/SKILL.md +5 -0
  305. package/v3/assets/skills/contract-testing/evals/contract-testing.yaml +748 -0
  306. package/v3/assets/skills/contract-testing/schemas/output.json +638 -0
  307. package/v3/assets/skills/contract-testing/scripts/validate.sh +1033 -0
  308. package/v3/assets/skills/database-testing/SKILL.md +5 -0
  309. package/v3/assets/skills/database-testing/evals/database-testing.yaml +968 -0
  310. package/v3/assets/skills/database-testing/schemas/output.json +1446 -0
  311. package/v3/assets/skills/database-testing/scripts/validate.sh +1099 -0
  312. package/v3/assets/skills/exploratory-testing-advanced/SKILL.md +3 -0
  313. package/v3/assets/skills/holistic-testing-pact/SKILL.md +3 -0
  314. package/v3/assets/skills/localization-testing/SKILL.md +6 -0
  315. package/v3/assets/skills/localization-testing/evals/localization-testing.yaml +544 -0
  316. package/v3/assets/skills/localization-testing/schemas/output.json +325 -0
  317. package/v3/assets/skills/localization-testing/scripts/validate.sh +131 -0
  318. package/v3/assets/skills/mobile-testing/SKILL.md +6 -0
  319. package/v3/assets/skills/mobile-testing/evals/mobile-testing.yaml +537 -0
  320. package/v3/assets/skills/mobile-testing/schemas/output.json +318 -0
  321. package/v3/assets/skills/mobile-testing/scripts/validate.sh +127 -0
  322. package/v3/assets/skills/mutation-testing/SKILL.md +8 -1
  323. package/v3/assets/skills/mutation-testing/evals/mutation-testing.yaml +652 -0
  324. package/v3/assets/skills/mutation-testing/schemas/output.json +707 -0
  325. package/v3/assets/skills/mutation-testing/scripts/validate.sh +992 -0
  326. package/v3/assets/skills/mutation-testing/test-data/sample-output.json +295 -0
  327. package/v3/assets/skills/n8n-expression-testing/scripts/validate.sh +162 -0
  328. package/v3/assets/skills/n8n-integration-testing-patterns/scripts/validate.sh +104 -0
  329. package/v3/assets/skills/n8n-security-testing/scripts/validate.sh +120 -0
  330. package/v3/assets/skills/n8n-trigger-testing-strategies/scripts/validate.sh +121 -0
  331. package/v3/assets/skills/n8n-workflow-testing-fundamentals/scripts/validate.sh +126 -0
  332. package/v3/assets/skills/performance-testing/SKILL.md +5 -0
  333. package/v3/assets/skills/performance-testing/evals/performance-testing.yaml +772 -0
  334. package/v3/assets/skills/performance-testing/schemas/output.json +1184 -0
  335. package/v3/assets/skills/performance-testing/scripts/validate.sh +1090 -0
  336. package/v3/assets/skills/qe-chaos-resilience/SKILL.md +6 -0
  337. package/v3/assets/skills/qe-chaos-resilience/evals/qe-chaos-resilience.yaml +443 -0
  338. package/v3/assets/skills/qe-chaos-resilience/schemas/output.json +314 -0
  339. package/v3/assets/skills/qe-chaos-resilience/scripts/validate.sh +401 -0
  340. package/v3/assets/skills/qe-code-intelligence/SKILL.md +6 -0
  341. package/v3/assets/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +459 -0
  342. package/v3/assets/skills/qe-code-intelligence/schemas/output.json +315 -0
  343. package/v3/assets/skills/qe-code-intelligence/scripts/validate.sh +408 -0
  344. package/v3/assets/skills/qe-contract-testing/SKILL.md +6 -0
  345. package/v3/assets/skills/qe-contract-testing/evals/qe-contract-testing.yaml +513 -0
  346. package/v3/assets/skills/qe-contract-testing/schemas/output.json +295 -0
  347. package/v3/assets/skills/qe-contract-testing/scripts/validate.sh +421 -0
  348. package/v3/assets/skills/qe-coverage-analysis/SKILL.md +6 -0
  349. package/v3/assets/skills/qe-coverage-analysis/evals/qe-coverage-analysis.yaml +494 -0
  350. package/v3/assets/skills/qe-coverage-analysis/schemas/output.json +286 -0
  351. package/v3/assets/skills/qe-coverage-analysis/scripts/validate.sh +453 -0
  352. package/v3/assets/skills/qe-defect-intelligence/SKILL.md +6 -0
  353. package/v3/assets/skills/qe-defect-intelligence/evals/qe-defect-intelligence.yaml +511 -0
  354. package/v3/assets/skills/qe-defect-intelligence/schemas/output.json +283 -0
  355. package/v3/assets/skills/qe-defect-intelligence/scripts/validate.sh +466 -0
  356. package/v3/assets/skills/qe-iterative-loop/SKILL.md +445 -443
  357. package/v3/assets/skills/qe-iterative-loop/schemas/output.json +264 -0
  358. package/v3/assets/skills/qe-iterative-loop/scripts/validate.sh +474 -0
  359. package/v3/assets/skills/qe-learning-optimization/SKILL.md +6 -0
  360. package/v3/assets/skills/qe-learning-optimization/evals/qe-learning-optimization.yaml +144 -0
  361. package/v3/assets/skills/qe-learning-optimization/schemas/output.json +288 -0
  362. package/v3/assets/skills/qe-learning-optimization/scripts/validate.sh +56 -0
  363. package/v3/assets/skills/qe-quality-assessment/SKILL.md +6 -0
  364. package/v3/assets/skills/qe-quality-assessment/evals/qe-quality-assessment.yaml +506 -0
  365. package/v3/assets/skills/qe-quality-assessment/schemas/output.json +550 -0
  366. package/v3/assets/skills/qe-quality-assessment/scripts/validate.sh +352 -0
  367. package/v3/assets/skills/qe-requirements-validation/SKILL.md +6 -0
  368. package/v3/assets/skills/qe-requirements-validation/evals/qe-requirements-validation.yaml +598 -0
  369. package/v3/assets/skills/qe-requirements-validation/schemas/output.json +587 -0
  370. package/v3/assets/skills/qe-requirements-validation/scripts/validate.sh +275 -0
  371. package/v3/assets/skills/qe-security-compliance/SKILL.md +6 -0
  372. package/v3/assets/skills/qe-security-compliance/evals/qe-security-compliance.yaml +595 -0
  373. package/v3/assets/skills/qe-security-compliance/schemas/output.json +498 -0
  374. package/v3/assets/skills/qe-security-compliance/scripts/validate.sh +305 -0
  375. package/v3/assets/skills/qe-test-execution/SKILL.md +6 -0
  376. package/v3/assets/skills/qe-test-execution/evals/qe-test-execution.yaml +607 -0
  377. package/v3/assets/skills/qe-test-execution/schemas/output.json +529 -0
  378. package/v3/assets/skills/qe-test-execution/scripts/validate.sh +323 -0
  379. package/v3/assets/skills/qe-test-generation/SKILL.md +6 -0
  380. package/v3/assets/skills/qe-test-generation/evals/qe-test-generation.yaml +148 -0
  381. package/v3/assets/skills/qe-test-generation/schemas/output.json +439 -0
  382. package/v3/assets/skills/qe-test-generation/scripts/validate.sh +56 -0
  383. package/v3/assets/skills/qe-visual-accessibility/SKILL.md +6 -0
  384. package/v3/assets/skills/qe-visual-accessibility/evals/qe-visual-accessibility.yaml +142 -0
  385. package/v3/assets/skills/qe-visual-accessibility/schemas/output.json +491 -0
  386. package/v3/assets/skills/qe-visual-accessibility/scripts/validate.sh +56 -0
  387. package/v3/assets/skills/quality-metrics/SKILL.md +6 -0
  388. package/v3/assets/skills/quality-metrics/evals/quality-metrics.yaml +494 -0
  389. package/v3/assets/skills/quality-metrics/schemas/output.json +403 -0
  390. package/v3/assets/skills/quality-metrics/scripts/validate.sh +434 -0
  391. package/v3/assets/skills/refactoring-patterns/SKILL.md +5 -0
  392. package/v3/assets/skills/refactoring-patterns/schemas/output.json +475 -0
  393. package/v3/assets/skills/refactoring-patterns/scripts/validate.sh +241 -0
  394. package/v3/assets/skills/regression-testing/SKILL.md +6 -0
  395. package/v3/assets/skills/regression-testing/evals/regression-testing.yaml +504 -0
  396. package/v3/assets/skills/regression-testing/schemas/output.json +311 -0
  397. package/v3/assets/skills/regression-testing/scripts/validate.sh +130 -0
  398. package/v3/assets/skills/risk-based-testing/SKILL.md +6 -0
  399. package/v3/assets/skills/risk-based-testing/evals/risk-based-testing.yaml +141 -0
  400. package/v3/assets/skills/risk-based-testing/schemas/output.json +480 -0
  401. package/v3/assets/skills/risk-based-testing/scripts/validate.sh +56 -0
  402. package/v3/assets/skills/security-testing/SKILL.md +5 -0
  403. package/v3/assets/skills/security-testing/evals/security-testing.yaml +789 -0
  404. package/v3/assets/skills/security-testing/schemas/output.json +879 -0
  405. package/v3/assets/skills/security-testing/scripts/validate.sh +967 -0
  406. package/v3/assets/skills/shift-left-testing/SKILL.md +6 -0
  407. package/v3/assets/skills/shift-left-testing/evals/shift-left-testing.yaml +145 -0
  408. package/v3/assets/skills/shift-left-testing/schemas/output.json +459 -0
  409. package/v3/assets/skills/shift-left-testing/scripts/validate.sh +56 -0
  410. package/v3/assets/skills/shift-right-testing/SKILL.md +6 -0
  411. package/v3/assets/skills/shift-right-testing/evals/shift-right-testing.yaml +147 -0
  412. package/v3/assets/skills/shift-right-testing/schemas/output.json +418 -0
  413. package/v3/assets/skills/shift-right-testing/scripts/validate.sh +56 -0
  414. package/v3/assets/skills/six-thinking-hats/SKILL.md +3 -0
  415. package/v3/assets/skills/tdd-london-chicago/SKILL.md +5 -0
  416. package/v3/assets/skills/tdd-london-chicago/schemas/output.json +444 -0
  417. package/v3/assets/skills/tdd-london-chicago/scripts/validate.sh +214 -0
  418. package/v3/assets/skills/technical-writing/SKILL.md +4 -0
  419. package/v3/assets/skills/technical-writing/schemas/output.json +268 -0
  420. package/v3/assets/skills/test-automation-strategy/SKILL.md +6 -0
  421. package/v3/assets/skills/test-automation-strategy/evals/test-automation-strategy.yaml +148 -0
  422. package/v3/assets/skills/test-automation-strategy/schemas/output.json +444 -0
  423. package/v3/assets/skills/test-automation-strategy/scripts/validate.sh +195 -0
  424. package/v3/assets/skills/test-data-management/SKILL.md +6 -0
  425. package/v3/assets/skills/test-data-management/evals/test-data-management.yaml +504 -0
  426. package/v3/assets/skills/test-data-management/schemas/output.json +284 -0
  427. package/v3/assets/skills/test-data-management/scripts/validate.sh +137 -0
  428. package/v3/assets/skills/test-design-techniques/SKILL.md +6 -0
  429. package/v3/assets/skills/test-design-techniques/evals/test-design-techniques.yaml +142 -0
  430. package/v3/assets/skills/test-design-techniques/schemas/output.json +295 -0
  431. package/v3/assets/skills/test-design-techniques/scripts/validate.sh +91 -0
  432. package/v3/assets/skills/test-environment-management/SKILL.md +4 -0
  433. package/v3/assets/skills/test-environment-management/schemas/output.json +310 -0
  434. package/v3/assets/skills/test-reporting-analytics/SKILL.md +6 -0
  435. package/v3/assets/skills/test-reporting-analytics/evals/test-reporting-analytics.yaml +155 -0
  436. package/v3/assets/skills/test-reporting-analytics/schemas/output.json +329 -0
  437. package/v3/assets/skills/test-reporting-analytics/scripts/validate.sh +77 -0
  438. package/v3/assets/skills/verification-quality/SKILL.md +6 -0
  439. package/v3/assets/skills/verification-quality/evals/verification-quality.yaml +150 -0
  440. package/v3/assets/skills/verification-quality/schemas/output.json +432 -0
  441. package/v3/assets/skills/verification-quality/scripts/validate.sh +77 -0
  442. package/v3/assets/skills/visual-testing-advanced/SKILL.md +6 -0
  443. package/v3/assets/skills/visual-testing-advanced/evals/visual-testing-advanced.yaml +154 -0
  444. package/v3/assets/skills/visual-testing-advanced/schemas/output.json +294 -0
  445. package/v3/assets/skills/visual-testing-advanced/scripts/validate.sh +77 -0
  446. package/v3/assets/skills/xp-practices/SKILL.md +3 -0
  447. package/v3/assets/templates/validation-report.md.hbs +139 -0
  448. package/v3/assets/templates/validation-summary.json +56 -0
  449. package/v3/dist/cli/bundle.js +7368 -3418
  450. package/v3/dist/cli/commands/eval.d.ts +18 -0
  451. package/v3/dist/cli/commands/eval.d.ts.map +1 -0
  452. package/v3/dist/cli/commands/eval.js +505 -0
  453. package/v3/dist/cli/commands/eval.js.map +1 -0
  454. package/v3/dist/cli/commands/validate-swarm.d.ts +16 -0
  455. package/v3/dist/cli/commands/validate-swarm.d.ts.map +1 -0
  456. package/v3/dist/cli/commands/validate-swarm.js +251 -0
  457. package/v3/dist/cli/commands/validate-swarm.js.map +1 -0
  458. package/v3/dist/cli/commands/validate.d.ts +14 -0
  459. package/v3/dist/cli/commands/validate.d.ts.map +1 -0
  460. package/v3/dist/cli/commands/validate.js +408 -0
  461. package/v3/dist/cli/commands/validate.js.map +1 -0
  462. package/v3/dist/cli/index.js +6 -0
  463. package/v3/dist/cli/index.js.map +1 -1
  464. package/v3/dist/index.d.ts +2 -0
  465. package/v3/dist/index.d.ts.map +1 -1
  466. package/v3/dist/index.js +10 -0
  467. package/v3/dist/index.js.map +1 -1
  468. package/v3/dist/init/skills-installer.d.ts +6 -0
  469. package/v3/dist/init/skills-installer.d.ts.map +1 -1
  470. package/v3/dist/init/skills-installer.js +48 -0
  471. package/v3/dist/init/skills-installer.js.map +1 -1
  472. package/v3/dist/learning/index.d.ts +2 -0
  473. package/v3/dist/learning/index.d.ts.map +1 -1
  474. package/v3/dist/learning/index.js +4 -0
  475. package/v3/dist/learning/index.js.map +1 -1
  476. package/v3/dist/learning/skill-validation-learner.d.ts +264 -0
  477. package/v3/dist/learning/skill-validation-learner.d.ts.map +1 -0
  478. package/v3/dist/learning/skill-validation-learner.js +515 -0
  479. package/v3/dist/learning/skill-validation-learner.js.map +1 -0
  480. package/v3/dist/validation/index.d.ts +19 -0
  481. package/v3/dist/validation/index.d.ts.map +1 -0
  482. package/v3/dist/validation/index.js +31 -0
  483. package/v3/dist/validation/index.js.map +1 -0
  484. package/v3/dist/validation/parallel-eval-runner.d.ts +307 -0
  485. package/v3/dist/validation/parallel-eval-runner.d.ts.map +1 -0
  486. package/v3/dist/validation/parallel-eval-runner.js +566 -0
  487. package/v3/dist/validation/parallel-eval-runner.js.map +1 -0
  488. package/v3/dist/validation/swarm-skill-validator.d.ts +282 -0
  489. package/v3/dist/validation/swarm-skill-validator.d.ts.map +1 -0
  490. package/v3/dist/validation/swarm-skill-validator.js +460 -0
  491. package/v3/dist/validation/swarm-skill-validator.js.map +1 -0
  492. package/v3/dist/validation/validation-result-aggregator.d.ts +232 -0
  493. package/v3/dist/validation/validation-result-aggregator.d.ts.map +1 -0
  494. package/v3/dist/validation/validation-result-aggregator.js +630 -0
  495. package/v3/dist/validation/validation-result-aggregator.js.map +1 -0
  496. package/v3/package.json +1 -1
@@ -0,0 +1,250 @@
1
+ # Skill Validation MCP Integration Specification
2
+
3
+ **Version**: 1.0.0
4
+ **Created**: 2026-02-02
5
+ **Status**: Active
6
+
7
+ ## Overview
8
+
9
+ All skill validation components MUST use AQE MCP tool calls for shared learning. This ensures validation patterns, outcomes, and insights are stored in the ReasoningBank and shared across the QE agent fleet.
10
+
11
+ ## Required MCP Tool Calls
12
+
13
+ ### 1. Pattern Storage
14
+
15
+ Skills MUST store successful validation patterns for future reference:
16
+
17
+ ```typescript
18
+ // After successful validation
19
+ mcp__agentic-qe__memory_store({
20
+ key: `skill-validation-${skillName}-${timestamp}`,
21
+ value: {
22
+ skillName: string,
23
+ trustTier: number,
24
+ validationResult: ValidationResult,
25
+ model: string,
26
+ passRate: number,
27
+ patterns: LearnedPattern[]
28
+ },
29
+ namespace: "skill-validation"
30
+ })
31
+ ```
32
+
33
+ ### 2. Pattern Query
34
+
35
+ Before validation, query for existing patterns:
36
+
37
+ ```typescript
38
+ // Query learned patterns
39
+ const existingPatterns = await mcp__agentic-qe__memory_query({
40
+ pattern: `skill-validation-${skillName}-*`,
41
+ namespace: "skill-validation",
42
+ limit: 10
43
+ })
44
+
45
+ // Use patterns to inform validation expectations
46
+ ```
47
+
48
+ ### 3. Outcome Tracking
49
+
50
+ Track all validation outcomes for the learning feedback loop:
51
+
52
+ ```typescript
53
+ // Record validation outcome
54
+ mcp__agentic-qe__test_outcome_track({
55
+ testId: `skill-${skillName}-${evalId}`,
56
+ generatedBy: agentId,
57
+ patternId: usedPatternId,
58
+ passed: boolean,
59
+ coverage: {
60
+ lines: number,
61
+ branches: number,
62
+ functions: number
63
+ },
64
+ executionTime: number,
65
+ flaky: false
66
+ })
67
+ ```
68
+
69
+ ### 4. Cross-Agent Learning Share
70
+
71
+ Share validation insights with the learning coordinator:
72
+
73
+ ```typescript
74
+ // Share learning with fleet
75
+ mcp__agentic-qe__memory_share({
76
+ sourceAgentId: currentAgentId,
77
+ targetAgentIds: ["qe-learning-coordinator", "qe-queen-coordinator"],
78
+ knowledgeDomain: "skill-validation",
79
+ data: {
80
+ skillName: string,
81
+ insights: ValidationInsight[],
82
+ recommendations: string[]
83
+ }
84
+ })
85
+ ```
86
+
87
+ ### 5. Quality Gate Integration
88
+
89
+ Update skill quality scores via quality assessment:
90
+
91
+ ```typescript
92
+ // After validation completes
93
+ mcp__agentic-qe__quality_assess({
94
+ target: `skill:${skillName}`,
95
+ metrics: {
96
+ passRate: number,
97
+ schemaCompliance: boolean,
98
+ validatorPassed: boolean,
99
+ evalSuiteScore: number
100
+ },
101
+ updateQualityScore: true
102
+ })
103
+ ```
104
+
105
+ ## Memory Namespace Structure
106
+
107
+ ```
108
+ aqe/skill-validation/
109
+ โ”œโ”€โ”€ patterns/
110
+ โ”‚ โ”œโ”€โ”€ security-testing/* - Security validation patterns
111
+ โ”‚ โ”œโ”€โ”€ accessibility-testing/* - A11y validation patterns
112
+ โ”‚ โ””โ”€โ”€ {skill-name}/* - Per-skill patterns
113
+ โ”œโ”€โ”€ outcomes/
114
+ โ”‚ โ”œโ”€โ”€ by-skill/ - Outcomes grouped by skill
115
+ โ”‚ โ”œโ”€โ”€ by-model/ - Outcomes grouped by model
116
+ โ”‚ โ””โ”€โ”€ by-date/ - Outcomes grouped by date
117
+ โ”œโ”€โ”€ insights/
118
+ โ”‚ โ”œโ”€โ”€ cross-model/ - Cross-model behavior differences
119
+ โ”‚ โ”œโ”€โ”€ regressions/ - Detected regressions
120
+ โ”‚ โ””โ”€โ”€ improvements/ - Improvement recommendations
121
+ โ””โ”€โ”€ confidence/
122
+ โ””โ”€โ”€ {skill-name}/ - Confidence scores per skill
123
+ ```
124
+
125
+ ## Eval Runner Integration
126
+
127
+ The `scripts/run-skill-eval.ts` evaluation runner MUST:
128
+
129
+ 1. **Before running evals**: Query ReasoningBank for learned patterns
130
+ 2. **During evals**: Track each test case outcome
131
+ 3. **After evals**: Store patterns and share learning
132
+ 4. **On regression**: Alert via quality gate
133
+
134
+ ```typescript
135
+ // Evaluation runner pseudocode
136
+ async function runSkillEval(skill: string, model: string) {
137
+ // 1. Query existing patterns
138
+ const patterns = await mcp__agentic-qe__memory_query({
139
+ pattern: `skill-validation-${skill}-*`,
140
+ namespace: "skill-validation"
141
+ });
142
+
143
+ // 2. Run evaluation test cases
144
+ const results = await runTestCases(skill, model, patterns);
145
+
146
+ // 3. Track outcomes
147
+ for (const result of results) {
148
+ await mcp__agentic-qe__test_outcome_track({
149
+ testId: result.id,
150
+ passed: result.passed,
151
+ // ...
152
+ });
153
+ }
154
+
155
+ // 4. Store new patterns
156
+ await mcp__agentic-qe__memory_store({
157
+ key: `skill-validation-${skill}-${Date.now()}`,
158
+ value: { results, patterns: extractPatterns(results) },
159
+ namespace: "skill-validation"
160
+ });
161
+
162
+ // 5. Share learning
163
+ await mcp__agentic-qe__memory_share({
164
+ sourceAgentId: "eval-runner",
165
+ targetAgentIds: ["qe-learning-coordinator"],
166
+ knowledgeDomain: "skill-validation",
167
+ data: summarizeResults(results)
168
+ });
169
+
170
+ // 6. Update quality gate
171
+ await mcp__agentic-qe__quality_assess({
172
+ target: `skill:${skill}`,
173
+ metrics: calculateMetrics(results),
174
+ updateQualityScore: true
175
+ });
176
+
177
+ return results;
178
+ }
179
+ ```
180
+
181
+ ## Validator Script Integration
182
+
183
+ Bash validators should call the MCP tools via the CLI wrapper:
184
+
185
+ ```bash
186
+ # In validate.sh after validation
187
+ store_validation_result() {
188
+ local skill="$1"
189
+ local result="$2"
190
+
191
+ npx aqe memory store \
192
+ --key "skill-validation-${skill}-$(date +%s)" \
193
+ --value "$result" \
194
+ --namespace skill-validation
195
+ }
196
+
197
+ track_outcome() {
198
+ local test_id="$1"
199
+ local passed="$2"
200
+
201
+ npx aqe feedback track \
202
+ --test-id "$test_id" \
203
+ --passed "$passed"
204
+ }
205
+ ```
206
+
207
+ ## CI Pipeline Integration
208
+
209
+ GitHub Actions workflow MUST use MCP tools:
210
+
211
+ ```yaml
212
+ - name: Query Baseline Patterns
213
+ run: |
214
+ npx aqe memory query \
215
+ --pattern "skill-validation-${{ matrix.skill }}-*" \
216
+ --namespace skill-validation \
217
+ --limit 5 \
218
+ --output baseline-patterns.json
219
+
220
+ - name: Run Validation
221
+ run: |
222
+ npx ts-node scripts/run-skill-eval.ts \
223
+ --skill "${{ matrix.skill }}" \
224
+ --model "${{ matrix.model }}" \
225
+ --use-mcp-learning
226
+
227
+ - name: Share Results with Fleet
228
+ run: |
229
+ npx aqe memory share \
230
+ --source "ci-validator" \
231
+ --targets "qe-learning-coordinator" \
232
+ --domain "skill-validation" \
233
+ --data-file validation-results.json
234
+ ```
235
+
236
+ ## Success Criteria
237
+
238
+ - [ ] All validators call `memory_store` after validation
239
+ - [ ] Eval runner queries patterns before running
240
+ - [ ] Outcomes tracked via `test_outcome_track`
241
+ - [ ] Learning shared with coordinator
242
+ - [ ] Quality gate updated with validation metrics
243
+ - [ ] CI pipeline uses MCP tools for learning
244
+
245
+ ## References
246
+
247
+ - [ADR-056: Deterministic Skill Validation System](../v3/implementation/adrs/ADR-056-skill-validation-system.md)
248
+ - [ADR-021: QE ReasoningBank](../v3/implementation/adrs/v3-adrs.md#adr-021)
249
+ - [ADR-023: Quality Feedback Loop](../v3/implementation/adrs/v3-adrs.md#adr-023)
250
+ - [AQE MCP Tools Reference](../reference/aqe-fleet.md)
@@ -0,0 +1,366 @@
1
+ # =============================================================================
2
+ # AQE Skill Evaluation Test Suite Template
3
+ # Copy this template to: .claude/skills/{skill-name}/evals/{skill-name}.yaml
4
+ # =============================================================================
5
+ #
6
+ # This evaluation suite validates skill behavior through:
7
+ # 1. Input/expected-output test cases
8
+ # 2. Multi-model consistency testing
9
+ # 3. Semantic validation of reasoning quality
10
+ # 4. AQE MCP integration for shared learning (NEW)
11
+ # 5. ReasoningBank pattern storage (NEW)
12
+ #
13
+ # Schema: docs/schemas/skill-eval.schema.json
14
+ # MCP Spec: docs/specs/skill-validation-mcp-integration.md
15
+ # Runner: scripts/run-skill-eval.ts
16
+ #
17
+ # For a comprehensive example, see: docs/templates/security-testing-eval.template.yaml
18
+ # =============================================================================
19
+
20
+ skill: REPLACE_WITH_SKILL_NAME
21
+ version: 1.0.0
22
+ description: >
23
+ Evaluation test suite for REPLACE_WITH_SKILL_NAME skill.
24
+ Tests core functionality across multiple models to ensure consistent,
25
+ high-quality output.
26
+
27
+ # =============================================================================
28
+ # Multi-Model Configuration
29
+ # =============================================================================
30
+ # Test across multiple models to ensure consistent behavior and identify
31
+ # model-specific quirks. Results are compared to detect variance.
32
+
33
+ models_to_test:
34
+ - claude-3.5-sonnet # Primary model (high accuracy expected)
35
+ - claude-3-haiku # Fast model (ensure it meets minimum quality)
36
+ # - gpt-4o # Optional: Cross-vendor validation
37
+
38
+ # =============================================================================
39
+ # MCP Integration Configuration (NEW in v1.4)
40
+ # =============================================================================
41
+ # Per docs/specs/skill-validation-mcp-integration.md
42
+ # Enable to integrate with AQE ReasoningBank for shared learning.
43
+
44
+ mcp_integration:
45
+ enabled: true
46
+ namespace: skill-validation
47
+
48
+ # Query existing patterns before running evals
49
+ query_patterns: true
50
+
51
+ # Track each test outcome for the learning feedback loop
52
+ track_outcomes: true
53
+
54
+ # Store successful patterns after evals complete
55
+ store_patterns: true
56
+
57
+ # Share learning with fleet coordinator agents
58
+ share_learning: true
59
+
60
+ # Update quality gate with validation metrics
61
+ update_quality_gate: true
62
+
63
+ # Agents to share learning with
64
+ target_agents:
65
+ - qe-learning-coordinator
66
+ - qe-queen-coordinator
67
+
68
+ # =============================================================================
69
+ # ReasoningBank Learning Configuration (NEW in v1.4)
70
+ # =============================================================================
71
+
72
+ learning:
73
+ store_success_patterns: true
74
+ store_failure_patterns: true
75
+ pattern_ttl_days: 90
76
+ min_confidence_to_store: 0.7
77
+ cross_model_comparison: true
78
+
79
+ # =============================================================================
80
+ # Result Format Configuration (NEW in v1.4)
81
+ # =============================================================================
82
+
83
+ result_format:
84
+ json_output: true
85
+ markdown_report: false
86
+ include_raw_output: false
87
+ include_timing: true
88
+ include_token_usage: true
89
+
90
+ # =============================================================================
91
+ # Environment Setup
92
+ # =============================================================================
93
+
94
+ setup:
95
+ required_tools: []
96
+ # - tool_name
97
+ environment_variables: {}
98
+ # ENV_VAR: value
99
+ fixtures: []
100
+ # - name: fixture_name
101
+ # path: fixtures/fixture_name.json
102
+ # content: |
103
+ # { "key": "value" }
104
+
105
+ # =============================================================================
106
+ # Test Cases
107
+ # =============================================================================
108
+ # Each test case validates a specific behavior or scenario.
109
+ # IDs follow the pattern: tc{NNN}_{short_description}
110
+ # =============================================================================
111
+
112
+ test_cases:
113
+ # -------------------------------------------------------------------------
114
+ # Basic Functionality Tests
115
+ # -------------------------------------------------------------------------
116
+
117
+ - id: tc001_basic_invocation
118
+ description: "Skill responds to basic invocation with valid output"
119
+ category: basic
120
+ priority: critical
121
+
122
+ input:
123
+ prompt: |
124
+ Analyze the following code for issues:
125
+ ```javascript
126
+ function hello() {
127
+ console.log("Hello, World!");
128
+ }
129
+ ```
130
+ context:
131
+ language: javascript
132
+
133
+ expected_output:
134
+ must_contain:
135
+ - "function"
136
+ - "hello"
137
+ must_not_contain:
138
+ - "error"
139
+ - "unable to analyze"
140
+ # finding_count:
141
+ # min: 0
142
+ # max: 5
143
+
144
+ validation:
145
+ schema_check: true
146
+ keyword_match_threshold: 0.8
147
+ reasoning_quality_min: 0.6
148
+
149
+ - id: tc002_handles_empty_input
150
+ description: "Skill handles empty or minimal input gracefully"
151
+ category: edge_cases
152
+ priority: high
153
+
154
+ input:
155
+ prompt: "Analyze this code:"
156
+ context:
157
+ language: unknown
158
+
159
+ expected_output:
160
+ must_contain:
161
+ - "no code"
162
+ - "provide"
163
+ must_not_contain:
164
+ - "exception"
165
+ - "crash"
166
+
167
+ validation:
168
+ schema_check: true
169
+ allow_partial: true
170
+
171
+ # -------------------------------------------------------------------------
172
+ # Core Functionality Tests - CUSTOMIZE THESE
173
+ # -------------------------------------------------------------------------
174
+
175
+ - id: tc003_core_feature_1
176
+ description: "DESCRIBE WHAT THIS TEST VALIDATES"
177
+ category: core
178
+ priority: high
179
+
180
+ input:
181
+ code: |
182
+ // Replace with relevant code sample
183
+ const example = "test";
184
+ context:
185
+ language: javascript
186
+ framework: nodejs
187
+
188
+ expected_output:
189
+ must_contain:
190
+ - "EXPECTED_KEYWORD_1"
191
+ - "EXPECTED_KEYWORD_2"
192
+ severity_classification: medium
193
+
194
+ validation:
195
+ schema_check: true
196
+ keyword_match_threshold: 0.8
197
+
198
+ # -------------------------------------------------------------------------
199
+ # Negative Tests (Should NOT find issues)
200
+ # -------------------------------------------------------------------------
201
+
202
+ - id: tc010_no_false_positives
203
+ description: "Skill does not flag clean code as problematic"
204
+ category: negative
205
+ priority: high
206
+
207
+ input:
208
+ code: |
209
+ // Well-written, clean code
210
+ function add(a, b) {
211
+ return a + b;
212
+ }
213
+ context:
214
+ language: javascript
215
+
216
+ expected_output:
217
+ must_contain:
218
+ - "no issues"
219
+ # OR
220
+ # - "clean"
221
+ # - "good"
222
+ must_not_contain:
223
+ - "critical"
224
+ - "vulnerability"
225
+ - "error"
226
+
227
+ validation:
228
+ schema_check: true
229
+ finding_count:
230
+ max: 1 # Allow at most 1 minor finding
231
+
232
+ # -------------------------------------------------------------------------
233
+ # Edge Cases
234
+ # -------------------------------------------------------------------------
235
+
236
+ - id: tc020_large_input
237
+ description: "Skill handles large input without truncation issues"
238
+ category: edge_cases
239
+ priority: medium
240
+ skip: false
241
+
242
+ input:
243
+ file_path: fixtures/large_sample.js
244
+ context:
245
+ language: javascript
246
+
247
+ expected_output:
248
+ must_contain:
249
+ - "analyzed"
250
+
251
+ validation:
252
+ schema_check: true
253
+
254
+ timeout_ms: 60000 # Longer timeout for large files
255
+
256
+ - id: tc021_special_characters
257
+ description: "Skill handles special characters in input"
258
+ category: edge_cases
259
+ priority: medium
260
+
261
+ input:
262
+ code: |
263
+ const emoji = "Hello ๐ŸŒ!";
264
+ const unicode = "ๆ—ฅๆœฌ่ชžใƒ†ใ‚นใƒˆ";
265
+ const escape = "Line1\nLine2\tTabbed";
266
+ context:
267
+ language: javascript
268
+
269
+ expected_output:
270
+ must_not_contain:
271
+ - "encoding error"
272
+ - "parse error"
273
+
274
+ validation:
275
+ schema_check: true
276
+
277
+ # -------------------------------------------------------------------------
278
+ # Multi-Language Support (if applicable)
279
+ # -------------------------------------------------------------------------
280
+
281
+ - id: tc030_python_support
282
+ description: "Skill correctly analyzes Python code"
283
+ category: language_support
284
+ priority: medium
285
+ skip: true # Enable if skill supports Python
286
+
287
+ input:
288
+ code: |
289
+ def hello():
290
+ print("Hello, World!")
291
+ context:
292
+ language: python
293
+
294
+ expected_output:
295
+ must_contain:
296
+ - "def"
297
+ - "function"
298
+
299
+ validation:
300
+ schema_check: true
301
+
302
+ # -------------------------------------------------------------------------
303
+ # Integration Scenarios
304
+ # -------------------------------------------------------------------------
305
+
306
+ - id: tc040_with_context
307
+ description: "Skill uses provided context appropriately"
308
+ category: integration
309
+ priority: medium
310
+
311
+ input:
312
+ code: |
313
+ app.get('/api/users', (req, res) => {
314
+ const users = db.query('SELECT * FROM users');
315
+ res.json(users);
316
+ });
317
+ context:
318
+ language: javascript
319
+ framework: express
320
+ environment: production
321
+ options:
322
+ detailed: true
323
+
324
+ expected_output:
325
+ must_contain:
326
+ - "express"
327
+ - "api"
328
+
329
+ validation:
330
+ schema_check: true
331
+ grading_rubric:
332
+ completeness: 0.4
333
+ accuracy: 0.4
334
+ actionability: 0.2
335
+
336
+ # =============================================================================
337
+ # Success Criteria
338
+ # =============================================================================
339
+ # Define what constitutes a passing evaluation suite run.
340
+ # =============================================================================
341
+
342
+ success_criteria:
343
+ # Minimum percentage of tests that must pass
344
+ pass_rate: 0.9
345
+
346
+ # Critical tests must have 100% pass rate
347
+ critical_pass_rate: 1.0
348
+
349
+ # Average reasoning quality across all tests
350
+ avg_reasoning_quality: 0.7
351
+
352
+ # Maximum time for entire suite (5 minutes)
353
+ max_execution_time_ms: 300000
354
+
355
+ # Maximum variance between different models (0.1 = 10%)
356
+ cross_model_variance: 0.15
357
+
358
+ # =============================================================================
359
+ # Metadata
360
+ # =============================================================================
361
+
362
+ metadata:
363
+ author: "@your-github-handle"
364
+ created: "2026-02-02"
365
+ last_updated: "2026-02-02"
366
+ coverage_target: "Core functionality and common edge cases"