agentic-qe 3.4.1 → 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (496) hide show
  1. package/.claude/skills/.validation/README.md +111 -0
  2. package/.claude/skills/.validation/examples/chaos-engineering-output.example.json +530 -0
  3. package/.claude/skills/.validation/examples/performance-testing-output.example.json +252 -0
  4. package/.claude/skills/.validation/examples/security-testing-output.example.json +413 -0
  5. package/.claude/skills/.validation/examples/testability-scoring-output.example.json +350 -0
  6. package/.claude/skills/.validation/schemas/skill-eval.schema.json +452 -0
  7. package/.claude/skills/.validation/schemas/skill-frontmatter.schema.json +341 -0
  8. package/.claude/skills/.validation/schemas/skill-output-meta.schema.json +199 -0
  9. package/.claude/skills/.validation/schemas/skill-output.template.json +610 -0
  10. package/.claude/skills/.validation/skill-validation-mcp-integration.md +250 -0
  11. package/.claude/skills/.validation/templates/eval.template.yaml +366 -0
  12. package/.claude/skills/.validation/templates/schemas/output.json +145 -0
  13. package/.claude/skills/.validation/templates/security-testing-eval.template.yaml +725 -0
  14. package/.claude/skills/.validation/templates/skill-frontmatter.example.yaml +225 -0
  15. package/.claude/skills/.validation/templates/validate.template.sh +1060 -0
  16. package/.claude/skills/.validation/templates/validator-lib.sh +1031 -0
  17. package/.claude/skills/.validation/test-data/invalid-output.json +5 -0
  18. package/.claude/skills/.validation/test-data/minimal-output.json +9 -0
  19. package/.claude/skills/.validation/test-data/sample-output.json +73 -0
  20. package/.claude/skills/TRUST-TIERS.md +158 -0
  21. package/.claude/skills/a11y-ally/SKILL.md +1664 -1658
  22. package/.claude/skills/a11y-ally/evals/a11y-ally.yaml +376 -0
  23. package/.claude/skills/a11y-ally/schemas/output.json +549 -0
  24. package/.claude/skills/a11y-ally/scripts/validate.sh +690 -0
  25. package/.claude/skills/accessibility-testing/SKILL.md +8 -1
  26. package/.claude/skills/accessibility-testing/evals/accessibility-testing.yaml +719 -0
  27. package/.claude/skills/accessibility-testing/schemas/output.json +776 -0
  28. package/.claude/skills/accessibility-testing/scripts/validate.sh +943 -0
  29. package/.claude/skills/accessibility-testing/test-data/sample-output.json +191 -0
  30. package/.claude/skills/agentic-quality-engineering/SKILL.md +4 -0
  31. package/.claude/skills/agentic-quality-engineering/schemas/output.json +577 -0
  32. package/.claude/skills/api-testing-patterns/SKILL.md +5 -0
  33. package/.claude/skills/api-testing-patterns/evals/api-testing-patterns.yaml +696 -0
  34. package/.claude/skills/api-testing-patterns/schemas/output.json +845 -0
  35. package/.claude/skills/api-testing-patterns/scripts/validate.sh +931 -0
  36. package/.claude/skills/aqe-v2-v3-migration/schemas/output.json +513 -0
  37. package/.claude/skills/aqe-v2-v3-migration/skill.md +4 -0
  38. package/.claude/skills/brutal-honesty-review/SKILL.md +5 -0
  39. package/.claude/skills/brutal-honesty-review/schemas/output.json +291 -0
  40. package/.claude/skills/brutal-honesty-review/scripts/validate.sh +130 -0
  41. package/.claude/skills/bug-reporting-excellence/SKILL.md +5 -0
  42. package/.claude/skills/bug-reporting-excellence/schemas/output.json +288 -0
  43. package/.claude/skills/bug-reporting-excellence/scripts/validate.sh +108 -0
  44. package/.claude/skills/chaos-engineering-resilience/SKILL.md +5 -0
  45. package/.claude/skills/chaos-engineering-resilience/evals/chaos-engineering-resilience.yaml +761 -0
  46. package/.claude/skills/chaos-engineering-resilience/schemas/output.json +1205 -0
  47. package/.claude/skills/chaos-engineering-resilience/scripts/validate.sh +1107 -0
  48. package/.claude/skills/cicd-pipeline-qe-orchestrator/SKILL.md +6 -0
  49. package/.claude/skills/cicd-pipeline-qe-orchestrator/evals/cicd-pipeline-qe-orchestrator.yaml +157 -0
  50. package/.claude/skills/cicd-pipeline-qe-orchestrator/schemas/output.json +542 -0
  51. package/.claude/skills/cicd-pipeline-qe-orchestrator/scripts/validate.sh +310 -0
  52. package/.claude/skills/code-review-quality/SKILL.md +5 -0
  53. package/.claude/skills/code-review-quality/schemas/output.json +264 -0
  54. package/.claude/skills/code-review-quality/scripts/validate.sh +113 -0
  55. package/.claude/skills/compatibility-testing/evals/compatibility-testing.yaml +410 -0
  56. package/.claude/skills/compatibility-testing/schemas/output.json +551 -0
  57. package/.claude/skills/compatibility-testing/scripts/validate.sh +605 -0
  58. package/.claude/skills/compliance-testing/SKILL.md +7 -0
  59. package/.claude/skills/compliance-testing/evals/compliance-testing.yaml +1107 -0
  60. package/.claude/skills/compliance-testing/schemas/output.json +845 -0
  61. package/.claude/skills/compliance-testing/scripts/validate.sh +888 -0
  62. package/.claude/skills/consultancy-practices/SKILL.md +4 -0
  63. package/.claude/skills/consultancy-practices/schemas/output.json +282 -0
  64. package/.claude/skills/context-driven-testing/SKILL.md +3 -0
  65. package/.claude/skills/contract-testing/SKILL.md +5 -0
  66. package/.claude/skills/contract-testing/evals/contract-testing.yaml +748 -0
  67. package/.claude/skills/contract-testing/schemas/output.json +638 -0
  68. package/.claude/skills/contract-testing/scripts/validate.sh +1033 -0
  69. package/.claude/skills/database-testing/SKILL.md +5 -0
  70. package/.claude/skills/database-testing/evals/database-testing.yaml +968 -0
  71. package/.claude/skills/database-testing/schemas/output.json +1446 -0
  72. package/.claude/skills/database-testing/scripts/validate.sh +1099 -0
  73. package/.claude/skills/exploratory-testing-advanced/SKILL.md +3 -0
  74. package/.claude/skills/holistic-testing-pact/SKILL.md +3 -0
  75. package/.claude/skills/localization-testing/SKILL.md +6 -0
  76. package/.claude/skills/localization-testing/evals/localization-testing.yaml +544 -0
  77. package/.claude/skills/localization-testing/schemas/output.json +325 -0
  78. package/.claude/skills/localization-testing/scripts/validate.sh +131 -0
  79. package/.claude/skills/mobile-testing/SKILL.md +6 -0
  80. package/.claude/skills/mobile-testing/evals/mobile-testing.yaml +537 -0
  81. package/.claude/skills/mobile-testing/schemas/output.json +318 -0
  82. package/.claude/skills/mobile-testing/scripts/validate.sh +127 -0
  83. package/.claude/skills/mutation-testing/SKILL.md +8 -1
  84. package/.claude/skills/mutation-testing/evals/mutation-testing.yaml +652 -0
  85. package/.claude/skills/mutation-testing/schemas/output.json +707 -0
  86. package/.claude/skills/mutation-testing/scripts/validate.sh +992 -0
  87. package/.claude/skills/mutation-testing/test-data/sample-output.json +295 -0
  88. package/.claude/skills/n8n-expression-testing/SKILL.md +6 -0
  89. package/.claude/skills/n8n-expression-testing/evals/n8n-expression-testing.yaml +450 -0
  90. package/.claude/skills/n8n-expression-testing/schemas/output.json +369 -0
  91. package/.claude/skills/n8n-expression-testing/scripts/validate.sh +162 -0
  92. package/.claude/skills/n8n-integration-testing-patterns/SKILL.md +6 -0
  93. package/.claude/skills/n8n-integration-testing-patterns/evals/n8n-integration-testing-patterns.yaml +522 -0
  94. package/.claude/skills/n8n-integration-testing-patterns/schemas/output.json +291 -0
  95. package/.claude/skills/n8n-integration-testing-patterns/scripts/validate.sh +104 -0
  96. package/.claude/skills/n8n-security-testing/SKILL.md +6 -0
  97. package/.claude/skills/n8n-security-testing/evals/n8n-security-testing.yaml +493 -0
  98. package/.claude/skills/n8n-security-testing/schemas/output.json +293 -0
  99. package/.claude/skills/n8n-security-testing/scripts/validate.sh +120 -0
  100. package/.claude/skills/n8n-trigger-testing-strategies/SKILL.md +6 -0
  101. package/.claude/skills/n8n-trigger-testing-strategies/evals/n8n-trigger-testing-strategies.yaml +500 -0
  102. package/.claude/skills/n8n-trigger-testing-strategies/schemas/output.json +295 -0
  103. package/.claude/skills/n8n-trigger-testing-strategies/scripts/validate.sh +121 -0
  104. package/.claude/skills/n8n-workflow-testing-fundamentals/SKILL.md +6 -0
  105. package/.claude/skills/n8n-workflow-testing-fundamentals/evals/n8n-workflow-testing-fundamentals.yaml +497 -0
  106. package/.claude/skills/n8n-workflow-testing-fundamentals/schemas/output.json +254 -0
  107. package/.claude/skills/n8n-workflow-testing-fundamentals/scripts/validate.sh +126 -0
  108. package/.claude/skills/performance-analysis/SKILL.md +6 -0
  109. package/.claude/skills/performance-analysis/evals/performance-analysis.yaml +144 -0
  110. package/.claude/skills/performance-analysis/schemas/output.json +588 -0
  111. package/.claude/skills/performance-analysis/scripts/validate.sh +63 -0
  112. package/.claude/skills/performance-testing/SKILL.md +5 -0
  113. package/.claude/skills/performance-testing/evals/performance-testing.yaml +772 -0
  114. package/.claude/skills/performance-testing/schemas/output.json +1184 -0
  115. package/.claude/skills/performance-testing/scripts/validate.sh +1090 -0
  116. package/.claude/skills/qcsd-ideation-swarm/SKILL.md +1759 -1753
  117. package/.claude/skills/qcsd-ideation-swarm/evals/qcsd-ideation-swarm.yaml +138 -0
  118. package/.claude/skills/qcsd-ideation-swarm/schemas/output.json +568 -0
  119. package/.claude/skills/qcsd-ideation-swarm/scripts/validate.sh +56 -0
  120. package/.claude/skills/qe-chaos-resilience/SKILL.md +6 -0
  121. package/.claude/skills/qe-chaos-resilience/evals/qe-chaos-resilience.yaml +443 -0
  122. package/.claude/skills/qe-chaos-resilience/schemas/output.json +314 -0
  123. package/.claude/skills/qe-chaos-resilience/scripts/validate.sh +401 -0
  124. package/.claude/skills/qe-code-intelligence/SKILL.md +6 -0
  125. package/.claude/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +459 -0
  126. package/.claude/skills/qe-code-intelligence/schemas/output.json +315 -0
  127. package/.claude/skills/qe-code-intelligence/scripts/validate.sh +408 -0
  128. package/.claude/skills/qe-contract-testing/SKILL.md +6 -0
  129. package/.claude/skills/qe-contract-testing/evals/qe-contract-testing.yaml +513 -0
  130. package/.claude/skills/qe-contract-testing/schemas/output.json +295 -0
  131. package/.claude/skills/qe-contract-testing/scripts/validate.sh +421 -0
  132. package/.claude/skills/qe-coverage-analysis/SKILL.md +6 -0
  133. package/.claude/skills/qe-coverage-analysis/evals/qe-coverage-analysis.yaml +494 -0
  134. package/.claude/skills/qe-coverage-analysis/schemas/output.json +286 -0
  135. package/.claude/skills/qe-coverage-analysis/scripts/validate.sh +453 -0
  136. package/.claude/skills/qe-defect-intelligence/SKILL.md +6 -0
  137. package/.claude/skills/qe-defect-intelligence/evals/qe-defect-intelligence.yaml +511 -0
  138. package/.claude/skills/qe-defect-intelligence/schemas/output.json +283 -0
  139. package/.claude/skills/qe-defect-intelligence/scripts/validate.sh +466 -0
  140. package/.claude/skills/qe-iterative-loop/SKILL.md +445 -440
  141. package/.claude/skills/qe-iterative-loop/schemas/output.json +264 -0
  142. package/.claude/skills/qe-iterative-loop/scripts/validate.sh +474 -0
  143. package/.claude/skills/qe-learning-optimization/SKILL.md +6 -0
  144. package/.claude/skills/qe-learning-optimization/evals/qe-learning-optimization.yaml +144 -0
  145. package/.claude/skills/qe-learning-optimization/schemas/output.json +288 -0
  146. package/.claude/skills/qe-learning-optimization/scripts/validate.sh +56 -0
  147. package/.claude/skills/qe-quality-assessment/SKILL.md +6 -0
  148. package/.claude/skills/qe-quality-assessment/evals/qe-quality-assessment.yaml +506 -0
  149. package/.claude/skills/qe-quality-assessment/schemas/output.json +550 -0
  150. package/.claude/skills/qe-quality-assessment/scripts/validate.sh +352 -0
  151. package/.claude/skills/qe-requirements-validation/SKILL.md +6 -0
  152. package/.claude/skills/qe-requirements-validation/evals/qe-requirements-validation.yaml +598 -0
  153. package/.claude/skills/qe-requirements-validation/schemas/output.json +587 -0
  154. package/.claude/skills/qe-requirements-validation/scripts/validate.sh +275 -0
  155. package/.claude/skills/qe-security-compliance/SKILL.md +6 -0
  156. package/.claude/skills/qe-security-compliance/evals/qe-security-compliance.yaml +595 -0
  157. package/.claude/skills/qe-security-compliance/schemas/output.json +498 -0
  158. package/.claude/skills/qe-security-compliance/scripts/validate.sh +305 -0
  159. package/.claude/skills/qe-test-execution/SKILL.md +6 -0
  160. package/.claude/skills/qe-test-execution/evals/qe-test-execution.yaml +607 -0
  161. package/.claude/skills/qe-test-execution/schemas/output.json +529 -0
  162. package/.claude/skills/qe-test-execution/scripts/validate.sh +323 -0
  163. package/.claude/skills/qe-test-generation/SKILL.md +6 -0
  164. package/.claude/skills/qe-test-generation/evals/qe-test-generation.yaml +148 -0
  165. package/.claude/skills/qe-test-generation/schemas/output.json +439 -0
  166. package/.claude/skills/qe-test-generation/scripts/validate.sh +56 -0
  167. package/.claude/skills/qe-visual-accessibility/SKILL.md +6 -0
  168. package/.claude/skills/qe-visual-accessibility/evals/qe-visual-accessibility.yaml +142 -0
  169. package/.claude/skills/qe-visual-accessibility/schemas/output.json +491 -0
  170. package/.claude/skills/qe-visual-accessibility/scripts/validate.sh +56 -0
  171. package/.claude/skills/quality-metrics/SKILL.md +6 -0
  172. package/.claude/skills/quality-metrics/evals/quality-metrics.yaml +494 -0
  173. package/.claude/skills/quality-metrics/schemas/output.json +403 -0
  174. package/.claude/skills/quality-metrics/scripts/validate.sh +434 -0
  175. package/.claude/skills/refactoring-patterns/SKILL.md +5 -0
  176. package/.claude/skills/refactoring-patterns/schemas/output.json +475 -0
  177. package/.claude/skills/refactoring-patterns/scripts/validate.sh +241 -0
  178. package/.claude/skills/regression-testing/SKILL.md +6 -0
  179. package/.claude/skills/regression-testing/evals/regression-testing.yaml +504 -0
  180. package/.claude/skills/regression-testing/schemas/output.json +311 -0
  181. package/.claude/skills/regression-testing/scripts/validate.sh +130 -0
  182. package/.claude/skills/risk-based-testing/SKILL.md +6 -0
  183. package/.claude/skills/risk-based-testing/evals/risk-based-testing.yaml +141 -0
  184. package/.claude/skills/risk-based-testing/schemas/output.json +480 -0
  185. package/.claude/skills/risk-based-testing/scripts/validate.sh +56 -0
  186. package/.claude/skills/security-testing/SKILL.md +5 -0
  187. package/.claude/skills/security-testing/evals/security-testing.yaml +789 -0
  188. package/.claude/skills/security-testing/schemas/output.json +879 -0
  189. package/.claude/skills/security-testing/scripts/validate.sh +967 -0
  190. package/.claude/skills/security-visual-testing/SKILL.md +223 -217
  191. package/.claude/skills/security-visual-testing/evals/security-visual-testing.yaml +163 -0
  192. package/.claude/skills/security-visual-testing/schemas/output.json +486 -0
  193. package/.claude/skills/security-visual-testing/scripts/validate.sh +748 -0
  194. package/.claude/skills/sherlock-review/SKILL.md +5 -0
  195. package/.claude/skills/sherlock-review/schemas/output.json +297 -0
  196. package/.claude/skills/sherlock-review/scripts/validate.sh +115 -0
  197. package/.claude/skills/shift-left-testing/SKILL.md +6 -0
  198. package/.claude/skills/shift-left-testing/evals/shift-left-testing.yaml +145 -0
  199. package/.claude/skills/shift-left-testing/schemas/output.json +459 -0
  200. package/.claude/skills/shift-left-testing/scripts/validate.sh +56 -0
  201. package/.claude/skills/shift-right-testing/SKILL.md +6 -0
  202. package/.claude/skills/shift-right-testing/evals/shift-right-testing.yaml +147 -0
  203. package/.claude/skills/shift-right-testing/schemas/output.json +418 -0
  204. package/.claude/skills/shift-right-testing/scripts/validate.sh +56 -0
  205. package/.claude/skills/six-thinking-hats/SKILL.md +3 -0
  206. package/.claude/skills/tdd-london-chicago/SKILL.md +5 -0
  207. package/.claude/skills/tdd-london-chicago/schemas/output.json +444 -0
  208. package/.claude/skills/tdd-london-chicago/scripts/validate.sh +214 -0
  209. package/.claude/skills/technical-writing/SKILL.md +4 -0
  210. package/.claude/skills/technical-writing/schemas/output.json +268 -0
  211. package/.claude/skills/test-automation-strategy/SKILL.md +6 -0
  212. package/.claude/skills/test-automation-strategy/evals/test-automation-strategy.yaml +148 -0
  213. package/.claude/skills/test-automation-strategy/schemas/output.json +444 -0
  214. package/.claude/skills/test-automation-strategy/scripts/validate.sh +195 -0
  215. package/.claude/skills/test-data-management/SKILL.md +6 -0
  216. package/.claude/skills/test-data-management/evals/test-data-management.yaml +504 -0
  217. package/.claude/skills/test-data-management/schemas/output.json +284 -0
  218. package/.claude/skills/test-data-management/scripts/validate.sh +137 -0
  219. package/.claude/skills/test-design-techniques/SKILL.md +6 -0
  220. package/.claude/skills/test-design-techniques/evals/test-design-techniques.yaml +142 -0
  221. package/.claude/skills/test-design-techniques/schemas/output.json +295 -0
  222. package/.claude/skills/test-design-techniques/scripts/validate.sh +91 -0
  223. package/.claude/skills/test-environment-management/SKILL.md +4 -0
  224. package/.claude/skills/test-environment-management/schemas/output.json +310 -0
  225. package/.claude/skills/test-reporting-analytics/SKILL.md +6 -0
  226. package/.claude/skills/test-reporting-analytics/evals/test-reporting-analytics.yaml +155 -0
  227. package/.claude/skills/test-reporting-analytics/schemas/output.json +329 -0
  228. package/.claude/skills/test-reporting-analytics/scripts/validate.sh +77 -0
  229. package/.claude/skills/testability-scoring/SKILL.md +5 -0
  230. package/.claude/skills/testability-scoring/evals/testability-scoring.yaml +814 -0
  231. package/.claude/skills/testability-scoring/schemas/output.json +606 -0
  232. package/.claude/skills/testability-scoring/scripts/validate.sh +891 -0
  233. package/.claude/skills/trust-tier-manifest.json +2333 -0
  234. package/.claude/skills/verification-quality/SKILL.md +6 -0
  235. package/.claude/skills/verification-quality/evals/verification-quality.yaml +150 -0
  236. package/.claude/skills/verification-quality/schemas/output.json +432 -0
  237. package/.claude/skills/verification-quality/scripts/validate.sh +77 -0
  238. package/.claude/skills/visual-testing-advanced/SKILL.md +6 -0
  239. package/.claude/skills/visual-testing-advanced/evals/visual-testing-advanced.yaml +154 -0
  240. package/.claude/skills/visual-testing-advanced/schemas/output.json +294 -0
  241. package/.claude/skills/visual-testing-advanced/scripts/validate.sh +77 -0
  242. package/.claude/skills/xp-practices/SKILL.md +3 -0
  243. package/README.md +39 -2
  244. package/package.json +4 -1
  245. package/scripts/run-skill-eval.ts +1097 -0
  246. package/scripts/test-schema-validation.js +301 -0
  247. package/scripts/update-skill-manifest.ts +751 -0
  248. package/scripts/validate-skill-frontmatter.js +141 -0
  249. package/v3/CHANGELOG.md +21 -0
  250. package/v3/README.md +43 -3
  251. package/v3/assets/agents/v3/qe-test-idea-rewriter.md +375 -375
  252. package/v3/assets/skills/.validation/README.md +111 -0
  253. package/v3/assets/skills/.validation/examples/chaos-engineering-output.example.json +530 -0
  254. package/v3/assets/skills/.validation/examples/performance-testing-output.example.json +252 -0
  255. package/v3/assets/skills/.validation/examples/security-testing-output.example.json +413 -0
  256. package/v3/assets/skills/.validation/examples/testability-scoring-output.example.json +350 -0
  257. package/v3/assets/skills/.validation/schemas/skill-eval.schema.json +452 -0
  258. package/v3/assets/skills/.validation/schemas/skill-frontmatter.schema.json +341 -0
  259. package/v3/assets/skills/.validation/schemas/skill-output-meta.schema.json +199 -0
  260. package/v3/assets/skills/.validation/schemas/skill-output.template.json +610 -0
  261. package/v3/assets/skills/.validation/skill-validation-mcp-integration.md +250 -0
  262. package/v3/assets/skills/.validation/templates/eval.template.yaml +366 -0
  263. package/v3/assets/skills/.validation/templates/schemas/output.json +145 -0
  264. package/v3/assets/skills/.validation/templates/security-testing-eval.template.yaml +725 -0
  265. package/v3/assets/skills/.validation/templates/skill-frontmatter.example.yaml +225 -0
  266. package/v3/assets/skills/.validation/templates/validate.template.sh +1060 -0
  267. package/v3/assets/skills/.validation/templates/validator-lib.sh +1031 -0
  268. package/v3/assets/skills/.validation/test-data/invalid-output.json +5 -0
  269. package/v3/assets/skills/.validation/test-data/minimal-output.json +9 -0
  270. package/v3/assets/skills/.validation/test-data/sample-output.json +73 -0
  271. package/v3/assets/skills/accessibility-testing/SKILL.md +8 -1
  272. package/v3/assets/skills/accessibility-testing/evals/accessibility-testing.yaml +719 -0
  273. package/v3/assets/skills/accessibility-testing/schemas/output.json +776 -0
  274. package/v3/assets/skills/accessibility-testing/scripts/validate.sh +943 -0
  275. package/v3/assets/skills/accessibility-testing/test-data/sample-output.json +191 -0
  276. package/v3/assets/skills/agentic-quality-engineering/SKILL.md +4 -0
  277. package/v3/assets/skills/agentic-quality-engineering/schemas/output.json +577 -0
  278. package/v3/assets/skills/api-testing-patterns/SKILL.md +5 -0
  279. package/v3/assets/skills/api-testing-patterns/evals/api-testing-patterns.yaml +696 -0
  280. package/v3/assets/skills/api-testing-patterns/schemas/output.json +845 -0
  281. package/v3/assets/skills/api-testing-patterns/scripts/validate.sh +931 -0
  282. package/v3/assets/skills/aqe-v2-v3-migration/schemas/output.json +513 -0
  283. package/v3/assets/skills/aqe-v2-v3-migration/skill.md +20 -16
  284. package/v3/assets/skills/bug-reporting-excellence/SKILL.md +5 -0
  285. package/v3/assets/skills/bug-reporting-excellence/schemas/output.json +288 -0
  286. package/v3/assets/skills/bug-reporting-excellence/scripts/validate.sh +108 -0
  287. package/v3/assets/skills/chaos-engineering-resilience/SKILL.md +5 -0
  288. package/v3/assets/skills/chaos-engineering-resilience/evals/chaos-engineering-resilience.yaml +761 -0
  289. package/v3/assets/skills/chaos-engineering-resilience/schemas/output.json +1205 -0
  290. package/v3/assets/skills/chaos-engineering-resilience/scripts/validate.sh +1107 -0
  291. package/v3/assets/skills/code-review-quality/SKILL.md +5 -0
  292. package/v3/assets/skills/code-review-quality/schemas/output.json +264 -0
  293. package/v3/assets/skills/code-review-quality/scripts/validate.sh +113 -0
  294. package/v3/assets/skills/compatibility-testing/evals/compatibility-testing.yaml +410 -0
  295. package/v3/assets/skills/compatibility-testing/schemas/output.json +551 -0
  296. package/v3/assets/skills/compatibility-testing/scripts/validate.sh +605 -0
  297. package/v3/assets/skills/compliance-testing/SKILL.md +7 -0
  298. package/v3/assets/skills/compliance-testing/evals/compliance-testing.yaml +1107 -0
  299. package/v3/assets/skills/compliance-testing/schemas/output.json +845 -0
  300. package/v3/assets/skills/compliance-testing/scripts/validate.sh +888 -0
  301. package/v3/assets/skills/consultancy-practices/SKILL.md +4 -0
  302. package/v3/assets/skills/consultancy-practices/schemas/output.json +282 -0
  303. package/v3/assets/skills/context-driven-testing/SKILL.md +3 -0
  304. package/v3/assets/skills/contract-testing/SKILL.md +5 -0
  305. package/v3/assets/skills/contract-testing/evals/contract-testing.yaml +748 -0
  306. package/v3/assets/skills/contract-testing/schemas/output.json +638 -0
  307. package/v3/assets/skills/contract-testing/scripts/validate.sh +1033 -0
  308. package/v3/assets/skills/database-testing/SKILL.md +5 -0
  309. package/v3/assets/skills/database-testing/evals/database-testing.yaml +968 -0
  310. package/v3/assets/skills/database-testing/schemas/output.json +1446 -0
  311. package/v3/assets/skills/database-testing/scripts/validate.sh +1099 -0
  312. package/v3/assets/skills/exploratory-testing-advanced/SKILL.md +3 -0
  313. package/v3/assets/skills/holistic-testing-pact/SKILL.md +3 -0
  314. package/v3/assets/skills/localization-testing/SKILL.md +6 -0
  315. package/v3/assets/skills/localization-testing/evals/localization-testing.yaml +544 -0
  316. package/v3/assets/skills/localization-testing/schemas/output.json +325 -0
  317. package/v3/assets/skills/localization-testing/scripts/validate.sh +131 -0
  318. package/v3/assets/skills/mobile-testing/SKILL.md +6 -0
  319. package/v3/assets/skills/mobile-testing/evals/mobile-testing.yaml +537 -0
  320. package/v3/assets/skills/mobile-testing/schemas/output.json +318 -0
  321. package/v3/assets/skills/mobile-testing/scripts/validate.sh +127 -0
  322. package/v3/assets/skills/mutation-testing/SKILL.md +8 -1
  323. package/v3/assets/skills/mutation-testing/evals/mutation-testing.yaml +652 -0
  324. package/v3/assets/skills/mutation-testing/schemas/output.json +707 -0
  325. package/v3/assets/skills/mutation-testing/scripts/validate.sh +992 -0
  326. package/v3/assets/skills/mutation-testing/test-data/sample-output.json +295 -0
  327. package/v3/assets/skills/n8n-expression-testing/scripts/validate.sh +162 -0
  328. package/v3/assets/skills/n8n-integration-testing-patterns/scripts/validate.sh +104 -0
  329. package/v3/assets/skills/n8n-security-testing/scripts/validate.sh +120 -0
  330. package/v3/assets/skills/n8n-trigger-testing-strategies/scripts/validate.sh +121 -0
  331. package/v3/assets/skills/n8n-workflow-testing-fundamentals/scripts/validate.sh +126 -0
  332. package/v3/assets/skills/performance-testing/SKILL.md +5 -0
  333. package/v3/assets/skills/performance-testing/evals/performance-testing.yaml +772 -0
  334. package/v3/assets/skills/performance-testing/schemas/output.json +1184 -0
  335. package/v3/assets/skills/performance-testing/scripts/validate.sh +1090 -0
  336. package/v3/assets/skills/qe-chaos-resilience/SKILL.md +6 -0
  337. package/v3/assets/skills/qe-chaos-resilience/evals/qe-chaos-resilience.yaml +443 -0
  338. package/v3/assets/skills/qe-chaos-resilience/schemas/output.json +314 -0
  339. package/v3/assets/skills/qe-chaos-resilience/scripts/validate.sh +401 -0
  340. package/v3/assets/skills/qe-code-intelligence/SKILL.md +6 -0
  341. package/v3/assets/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +459 -0
  342. package/v3/assets/skills/qe-code-intelligence/schemas/output.json +315 -0
  343. package/v3/assets/skills/qe-code-intelligence/scripts/validate.sh +408 -0
  344. package/v3/assets/skills/qe-contract-testing/SKILL.md +6 -0
  345. package/v3/assets/skills/qe-contract-testing/evals/qe-contract-testing.yaml +513 -0
  346. package/v3/assets/skills/qe-contract-testing/schemas/output.json +295 -0
  347. package/v3/assets/skills/qe-contract-testing/scripts/validate.sh +421 -0
  348. package/v3/assets/skills/qe-coverage-analysis/SKILL.md +6 -0
  349. package/v3/assets/skills/qe-coverage-analysis/evals/qe-coverage-analysis.yaml +494 -0
  350. package/v3/assets/skills/qe-coverage-analysis/schemas/output.json +286 -0
  351. package/v3/assets/skills/qe-coverage-analysis/scripts/validate.sh +453 -0
  352. package/v3/assets/skills/qe-defect-intelligence/SKILL.md +6 -0
  353. package/v3/assets/skills/qe-defect-intelligence/evals/qe-defect-intelligence.yaml +511 -0
  354. package/v3/assets/skills/qe-defect-intelligence/schemas/output.json +283 -0
  355. package/v3/assets/skills/qe-defect-intelligence/scripts/validate.sh +466 -0
  356. package/v3/assets/skills/qe-iterative-loop/SKILL.md +445 -443
  357. package/v3/assets/skills/qe-iterative-loop/schemas/output.json +264 -0
  358. package/v3/assets/skills/qe-iterative-loop/scripts/validate.sh +474 -0
  359. package/v3/assets/skills/qe-learning-optimization/SKILL.md +6 -0
  360. package/v3/assets/skills/qe-learning-optimization/evals/qe-learning-optimization.yaml +144 -0
  361. package/v3/assets/skills/qe-learning-optimization/schemas/output.json +288 -0
  362. package/v3/assets/skills/qe-learning-optimization/scripts/validate.sh +56 -0
  363. package/v3/assets/skills/qe-quality-assessment/SKILL.md +6 -0
  364. package/v3/assets/skills/qe-quality-assessment/evals/qe-quality-assessment.yaml +506 -0
  365. package/v3/assets/skills/qe-quality-assessment/schemas/output.json +550 -0
  366. package/v3/assets/skills/qe-quality-assessment/scripts/validate.sh +352 -0
  367. package/v3/assets/skills/qe-requirements-validation/SKILL.md +6 -0
  368. package/v3/assets/skills/qe-requirements-validation/evals/qe-requirements-validation.yaml +598 -0
  369. package/v3/assets/skills/qe-requirements-validation/schemas/output.json +587 -0
  370. package/v3/assets/skills/qe-requirements-validation/scripts/validate.sh +275 -0
  371. package/v3/assets/skills/qe-security-compliance/SKILL.md +6 -0
  372. package/v3/assets/skills/qe-security-compliance/evals/qe-security-compliance.yaml +595 -0
  373. package/v3/assets/skills/qe-security-compliance/schemas/output.json +498 -0
  374. package/v3/assets/skills/qe-security-compliance/scripts/validate.sh +305 -0
  375. package/v3/assets/skills/qe-test-execution/SKILL.md +6 -0
  376. package/v3/assets/skills/qe-test-execution/evals/qe-test-execution.yaml +607 -0
  377. package/v3/assets/skills/qe-test-execution/schemas/output.json +529 -0
  378. package/v3/assets/skills/qe-test-execution/scripts/validate.sh +323 -0
  379. package/v3/assets/skills/qe-test-generation/SKILL.md +6 -0
  380. package/v3/assets/skills/qe-test-generation/evals/qe-test-generation.yaml +148 -0
  381. package/v3/assets/skills/qe-test-generation/schemas/output.json +439 -0
  382. package/v3/assets/skills/qe-test-generation/scripts/validate.sh +56 -0
  383. package/v3/assets/skills/qe-visual-accessibility/SKILL.md +6 -0
  384. package/v3/assets/skills/qe-visual-accessibility/evals/qe-visual-accessibility.yaml +142 -0
  385. package/v3/assets/skills/qe-visual-accessibility/schemas/output.json +491 -0
  386. package/v3/assets/skills/qe-visual-accessibility/scripts/validate.sh +56 -0
  387. package/v3/assets/skills/quality-metrics/SKILL.md +6 -0
  388. package/v3/assets/skills/quality-metrics/evals/quality-metrics.yaml +494 -0
  389. package/v3/assets/skills/quality-metrics/schemas/output.json +403 -0
  390. package/v3/assets/skills/quality-metrics/scripts/validate.sh +434 -0
  391. package/v3/assets/skills/refactoring-patterns/SKILL.md +5 -0
  392. package/v3/assets/skills/refactoring-patterns/schemas/output.json +475 -0
  393. package/v3/assets/skills/refactoring-patterns/scripts/validate.sh +241 -0
  394. package/v3/assets/skills/regression-testing/SKILL.md +6 -0
  395. package/v3/assets/skills/regression-testing/evals/regression-testing.yaml +504 -0
  396. package/v3/assets/skills/regression-testing/schemas/output.json +311 -0
  397. package/v3/assets/skills/regression-testing/scripts/validate.sh +130 -0
  398. package/v3/assets/skills/risk-based-testing/SKILL.md +6 -0
  399. package/v3/assets/skills/risk-based-testing/evals/risk-based-testing.yaml +141 -0
  400. package/v3/assets/skills/risk-based-testing/schemas/output.json +480 -0
  401. package/v3/assets/skills/risk-based-testing/scripts/validate.sh +56 -0
  402. package/v3/assets/skills/security-testing/SKILL.md +5 -0
  403. package/v3/assets/skills/security-testing/evals/security-testing.yaml +789 -0
  404. package/v3/assets/skills/security-testing/schemas/output.json +879 -0
  405. package/v3/assets/skills/security-testing/scripts/validate.sh +967 -0
  406. package/v3/assets/skills/shift-left-testing/SKILL.md +6 -0
  407. package/v3/assets/skills/shift-left-testing/evals/shift-left-testing.yaml +145 -0
  408. package/v3/assets/skills/shift-left-testing/schemas/output.json +459 -0
  409. package/v3/assets/skills/shift-left-testing/scripts/validate.sh +56 -0
  410. package/v3/assets/skills/shift-right-testing/SKILL.md +6 -0
  411. package/v3/assets/skills/shift-right-testing/evals/shift-right-testing.yaml +147 -0
  412. package/v3/assets/skills/shift-right-testing/schemas/output.json +418 -0
  413. package/v3/assets/skills/shift-right-testing/scripts/validate.sh +56 -0
  414. package/v3/assets/skills/six-thinking-hats/SKILL.md +3 -0
  415. package/v3/assets/skills/tdd-london-chicago/SKILL.md +5 -0
  416. package/v3/assets/skills/tdd-london-chicago/schemas/output.json +444 -0
  417. package/v3/assets/skills/tdd-london-chicago/scripts/validate.sh +214 -0
  418. package/v3/assets/skills/technical-writing/SKILL.md +4 -0
  419. package/v3/assets/skills/technical-writing/schemas/output.json +268 -0
  420. package/v3/assets/skills/test-automation-strategy/SKILL.md +6 -0
  421. package/v3/assets/skills/test-automation-strategy/evals/test-automation-strategy.yaml +148 -0
  422. package/v3/assets/skills/test-automation-strategy/schemas/output.json +444 -0
  423. package/v3/assets/skills/test-automation-strategy/scripts/validate.sh +195 -0
  424. package/v3/assets/skills/test-data-management/SKILL.md +6 -0
  425. package/v3/assets/skills/test-data-management/evals/test-data-management.yaml +504 -0
  426. package/v3/assets/skills/test-data-management/schemas/output.json +284 -0
  427. package/v3/assets/skills/test-data-management/scripts/validate.sh +137 -0
  428. package/v3/assets/skills/test-design-techniques/SKILL.md +6 -0
  429. package/v3/assets/skills/test-design-techniques/evals/test-design-techniques.yaml +142 -0
  430. package/v3/assets/skills/test-design-techniques/schemas/output.json +295 -0
  431. package/v3/assets/skills/test-design-techniques/scripts/validate.sh +91 -0
  432. package/v3/assets/skills/test-environment-management/SKILL.md +4 -0
  433. package/v3/assets/skills/test-environment-management/schemas/output.json +310 -0
  434. package/v3/assets/skills/test-reporting-analytics/SKILL.md +6 -0
  435. package/v3/assets/skills/test-reporting-analytics/evals/test-reporting-analytics.yaml +155 -0
  436. package/v3/assets/skills/test-reporting-analytics/schemas/output.json +329 -0
  437. package/v3/assets/skills/test-reporting-analytics/scripts/validate.sh +77 -0
  438. package/v3/assets/skills/verification-quality/SKILL.md +6 -0
  439. package/v3/assets/skills/verification-quality/evals/verification-quality.yaml +150 -0
  440. package/v3/assets/skills/verification-quality/schemas/output.json +432 -0
  441. package/v3/assets/skills/verification-quality/scripts/validate.sh +77 -0
  442. package/v3/assets/skills/visual-testing-advanced/SKILL.md +6 -0
  443. package/v3/assets/skills/visual-testing-advanced/evals/visual-testing-advanced.yaml +154 -0
  444. package/v3/assets/skills/visual-testing-advanced/schemas/output.json +294 -0
  445. package/v3/assets/skills/visual-testing-advanced/scripts/validate.sh +77 -0
  446. package/v3/assets/skills/xp-practices/SKILL.md +3 -0
  447. package/v3/assets/templates/validation-report.md.hbs +139 -0
  448. package/v3/assets/templates/validation-summary.json +56 -0
  449. package/v3/dist/cli/bundle.js +7368 -3418
  450. package/v3/dist/cli/commands/eval.d.ts +18 -0
  451. package/v3/dist/cli/commands/eval.d.ts.map +1 -0
  452. package/v3/dist/cli/commands/eval.js +505 -0
  453. package/v3/dist/cli/commands/eval.js.map +1 -0
  454. package/v3/dist/cli/commands/validate-swarm.d.ts +16 -0
  455. package/v3/dist/cli/commands/validate-swarm.d.ts.map +1 -0
  456. package/v3/dist/cli/commands/validate-swarm.js +251 -0
  457. package/v3/dist/cli/commands/validate-swarm.js.map +1 -0
  458. package/v3/dist/cli/commands/validate.d.ts +14 -0
  459. package/v3/dist/cli/commands/validate.d.ts.map +1 -0
  460. package/v3/dist/cli/commands/validate.js +408 -0
  461. package/v3/dist/cli/commands/validate.js.map +1 -0
  462. package/v3/dist/cli/index.js +6 -0
  463. package/v3/dist/cli/index.js.map +1 -1
  464. package/v3/dist/index.d.ts +2 -0
  465. package/v3/dist/index.d.ts.map +1 -1
  466. package/v3/dist/index.js +10 -0
  467. package/v3/dist/index.js.map +1 -1
  468. package/v3/dist/init/skills-installer.d.ts +6 -0
  469. package/v3/dist/init/skills-installer.d.ts.map +1 -1
  470. package/v3/dist/init/skills-installer.js +48 -0
  471. package/v3/dist/init/skills-installer.js.map +1 -1
  472. package/v3/dist/learning/index.d.ts +2 -0
  473. package/v3/dist/learning/index.d.ts.map +1 -1
  474. package/v3/dist/learning/index.js +4 -0
  475. package/v3/dist/learning/index.js.map +1 -1
  476. package/v3/dist/learning/skill-validation-learner.d.ts +264 -0
  477. package/v3/dist/learning/skill-validation-learner.d.ts.map +1 -0
  478. package/v3/dist/learning/skill-validation-learner.js +515 -0
  479. package/v3/dist/learning/skill-validation-learner.js.map +1 -0
  480. package/v3/dist/validation/index.d.ts +19 -0
  481. package/v3/dist/validation/index.d.ts.map +1 -0
  482. package/v3/dist/validation/index.js +31 -0
  483. package/v3/dist/validation/index.js.map +1 -0
  484. package/v3/dist/validation/parallel-eval-runner.d.ts +307 -0
  485. package/v3/dist/validation/parallel-eval-runner.d.ts.map +1 -0
  486. package/v3/dist/validation/parallel-eval-runner.js +566 -0
  487. package/v3/dist/validation/parallel-eval-runner.js.map +1 -0
  488. package/v3/dist/validation/swarm-skill-validator.d.ts +282 -0
  489. package/v3/dist/validation/swarm-skill-validator.d.ts.map +1 -0
  490. package/v3/dist/validation/swarm-skill-validator.js +460 -0
  491. package/v3/dist/validation/swarm-skill-validator.js.map +1 -0
  492. package/v3/dist/validation/validation-result-aggregator.d.ts +232 -0
  493. package/v3/dist/validation/validation-result-aggregator.d.ts.map +1 -0
  494. package/v3/dist/validation/validation-result-aggregator.js +630 -0
  495. package/v3/dist/validation/validation-result-aggregator.js.map +1 -0
  496. package/v3/package.json +1 -1
@@ -0,0 +1,307 @@
1
+ /**
2
+ * Parallel Evaluation Runner for Skill Validation
3
+ * ADR-056 Phase 5: Distributed test execution across worker pool
4
+ *
5
+ * This module provides parallel execution of skill evaluation test cases
6
+ * using a worker pool pattern for faster skill validation.
7
+ *
8
+ * Features:
9
+ * - Worker pool with configurable concurrency
10
+ * - Batch-based test case distribution
11
+ * - Automatic retry of failed tests
12
+ * - Progress tracking and heartbeat monitoring
13
+ * - Integration with SkillValidationLearner for outcome recording
14
+ * - Parallel speedup measurement vs sequential execution
15
+ *
16
+ * @module validation/parallel-eval-runner
17
+ * @see .claude/skills/.validation/schemas/skill-eval.schema.json
18
+ */
19
+ import { SkillValidationLearner, TestCaseResult } from '../learning/skill-validation-learner.js';
20
+ /**
21
+ * Configuration for the parallel evaluation runner
22
+ */
23
+ export interface ParallelEvalConfig {
24
+ /** Maximum number of concurrent workers (default: 5) */
25
+ maxWorkers: number;
26
+ /** Number of test cases per worker batch */
27
+ batchSize: number;
28
+ /** Whether to retry failed tests once */
29
+ retryFailedTests: boolean;
30
+ /** Timeout per test case in milliseconds */
31
+ timeout: number;
32
+ /** Directory containing skill eval suites */
33
+ skillsDir: string;
34
+ /** Progress reporting interval in milliseconds */
35
+ progressIntervalMs: number;
36
+ }
37
+ /**
38
+ * Default configuration values
39
+ */
40
+ export declare const DEFAULT_PARALLEL_EVAL_CONFIG: ParallelEvalConfig;
41
+ /**
42
+ * Test case input structure from eval YAML
43
+ */
44
+ export interface EvalTestCaseInput {
45
+ code: string;
46
+ context: {
47
+ language?: string;
48
+ wcagLevel?: string;
49
+ description?: string;
50
+ environment?: string;
51
+ options?: Record<string, unknown>;
52
+ };
53
+ }
54
+ /**
55
+ * Expected output structure from eval YAML
56
+ */
57
+ export interface EvalTestCaseExpectedOutput {
58
+ must_contain?: string[];
59
+ must_not_contain?: string[];
60
+ must_match_regex?: string[];
61
+ finding_count?: {
62
+ min?: number;
63
+ max?: number;
64
+ };
65
+ severity_classification?: string;
66
+ }
67
+ /**
68
+ * Validation criteria from eval YAML
69
+ */
70
+ export interface EvalTestCaseValidation {
71
+ schema_check?: boolean;
72
+ keyword_match_threshold?: number;
73
+ reasoning_quality_min?: number;
74
+ grading_rubric?: {
75
+ completeness?: number;
76
+ accuracy?: number;
77
+ actionability?: number;
78
+ };
79
+ }
80
+ /**
81
+ * Single test case from eval suite YAML
82
+ */
83
+ export interface EvalTestCase {
84
+ id: string;
85
+ description: string;
86
+ category: string;
87
+ priority: 'critical' | 'high' | 'medium' | 'low';
88
+ input: EvalTestCaseInput;
89
+ expected_output: EvalTestCaseExpectedOutput;
90
+ validation?: EvalTestCaseValidation;
91
+ }
92
+ /**
93
+ * Complete eval suite loaded from YAML
94
+ */
95
+ export interface EvalSuite {
96
+ skill: string;
97
+ version: string;
98
+ description: string;
99
+ models_to_test: string[];
100
+ mcp_integration?: {
101
+ enabled: boolean;
102
+ namespace?: string;
103
+ query_patterns?: boolean;
104
+ track_outcomes?: boolean;
105
+ store_patterns?: boolean;
106
+ share_learning?: boolean;
107
+ update_quality_gate?: boolean;
108
+ target_agents?: string[];
109
+ };
110
+ learning?: {
111
+ store_success_patterns?: boolean;
112
+ store_failure_patterns?: boolean;
113
+ pattern_ttl_days?: number;
114
+ min_confidence_to_store?: number;
115
+ cross_model_comparison?: boolean;
116
+ };
117
+ setup?: {
118
+ required_tools?: string[];
119
+ environment_variables?: Record<string, string>;
120
+ fixtures?: Array<{
121
+ name: string;
122
+ content: string;
123
+ }>;
124
+ };
125
+ test_cases: EvalTestCase[];
126
+ success_criteria: {
127
+ pass_rate: number;
128
+ critical_pass_rate: number;
129
+ avg_reasoning_quality?: number;
130
+ max_execution_time_ms?: number;
131
+ cross_model_variance?: number;
132
+ };
133
+ metadata?: Record<string, unknown>;
134
+ }
135
+ /**
136
+ * Task unit sent to a worker
137
+ */
138
+ export interface TestCaseTask {
139
+ /** Name of the skill being validated */
140
+ skillName: string;
141
+ /** Unique test case identifier */
142
+ testCaseId: string;
143
+ /** Full test case definition */
144
+ testCase: EvalTestCase;
145
+ /** Model to use for this test */
146
+ model: string;
147
+ /** Worker batch ID */
148
+ batchId: number;
149
+ /** Index within the batch */
150
+ indexInBatch: number;
151
+ }
152
+ /**
153
+ * Result of parallel evaluation for a skill
154
+ */
155
+ export interface ParallelEvalResult {
156
+ /** Name of the skill evaluated */
157
+ skill: string;
158
+ /** Model used for evaluation */
159
+ model: string;
160
+ /** Total number of test cases */
161
+ totalTests: number;
162
+ /** Number of tests that passed */
163
+ passedTests: number;
164
+ /** Number of tests that failed */
165
+ failedTests: number;
166
+ /** Number of tests that were skipped */
167
+ skippedTests: number;
168
+ /** Pass rate as a decimal (0-1) */
169
+ passRate: number;
170
+ /** Individual test case results */
171
+ testResults: TestCaseResult[];
172
+ /** Total duration in milliseconds */
173
+ totalDurationMs: number;
174
+ /** Speedup factor vs sequential (e.g., 3.5x faster) */
175
+ parallelSpeedup: number;
176
+ /** Average reasoning quality score */
177
+ avgReasoningQuality: number;
178
+ /** Whether the eval suite passed overall */
179
+ passed: boolean;
180
+ /** Workers used in this run */
181
+ workersUsed: number;
182
+ /** Timestamp of evaluation */
183
+ timestamp: Date;
184
+ }
185
+ /**
186
+ * Worker communication message types
187
+ */
188
+ export interface WorkerMessage {
189
+ type: 'task' | 'result' | 'error' | 'heartbeat' | 'progress';
190
+ workerId: number;
191
+ payload: unknown;
192
+ }
193
+ /**
194
+ * Progress report from a worker
195
+ */
196
+ export interface WorkerProgress {
197
+ workerId: number;
198
+ tasksCompleted: number;
199
+ tasksTotal: number;
200
+ currentTask?: string;
201
+ elapsedMs: number;
202
+ }
203
+ /**
204
+ * Aggregated progress across all workers
205
+ */
206
+ export interface EvalProgress {
207
+ skill: string;
208
+ model: string;
209
+ totalTasks: number;
210
+ completedTasks: number;
211
+ failedTasks: number;
212
+ activeWorkers: number;
213
+ elapsedMs: number;
214
+ estimatedRemainingMs: number;
215
+ workerProgress: WorkerProgress[];
216
+ }
217
+ /**
218
+ * LLM executor interface for test case execution
219
+ */
220
+ export interface LLMExecutor {
221
+ execute(prompt: string, model: string, options?: {
222
+ timeout?: number;
223
+ }): Promise<{
224
+ output: string;
225
+ tokensUsed: number;
226
+ durationMs: number;
227
+ }>;
228
+ }
229
+ /**
230
+ * Mock LLM executor for testing
231
+ */
232
+ export declare class MockLLMExecutor implements LLMExecutor {
233
+ execute(prompt: string, model: string, options?: {
234
+ timeout?: number;
235
+ }): Promise<{
236
+ output: string;
237
+ tokensUsed: number;
238
+ durationMs: number;
239
+ }>;
240
+ private generateMockResponse;
241
+ }
242
+ /**
243
+ * Parallel evaluation runner using worker pool pattern
244
+ * Distributes eval test cases across agents for faster execution
245
+ */
246
+ export declare class ParallelEvalRunner {
247
+ private readonly config;
248
+ private readonly skillValidationLearner;
249
+ private readonly executor;
250
+ private readonly workers;
251
+ private progressCallback?;
252
+ constructor(config: ParallelEvalConfig | undefined, skillValidationLearner: SkillValidationLearner, executor?: LLMExecutor);
253
+ /**
254
+ * Set progress callback for reporting
255
+ */
256
+ onProgress(callback: (progress: EvalProgress) => void): void;
257
+ /**
258
+ * Run eval suite for a skill in parallel
259
+ */
260
+ runEvalParallel(skill: string, model: string): Promise<ParallelEvalResult>;
261
+ /**
262
+ * Run multiple skill evals in parallel
263
+ */
264
+ runMultipleEvalsParallel(skills: string[], models: string[]): Promise<Map<string, ParallelEvalResult[]>>;
265
+ /**
266
+ * Load eval suite from skill's evals/ directory
267
+ */
268
+ loadEvalSuite(skill: string): EvalSuite | null;
269
+ /**
270
+ * Create test case tasks from suite
271
+ */
272
+ private createTasks;
273
+ /**
274
+ * Partition test cases into batches for workers
275
+ */
276
+ private partitionTestCases;
277
+ /**
278
+ * Determine trust tier based on suite configuration
279
+ */
280
+ private determineTrustTier;
281
+ /**
282
+ * Start progress reporting interval
283
+ */
284
+ private startProgressReporting;
285
+ /**
286
+ * Record validation outcome to learner
287
+ */
288
+ private recordOutcome;
289
+ /**
290
+ * Get worker pool status
291
+ */
292
+ getWorkerStatus(): Array<{
293
+ id: number;
294
+ status: string;
295
+ tasksCompleted: number;
296
+ tasksFailed: number;
297
+ }>;
298
+ /**
299
+ * Reset worker statistics
300
+ */
301
+ resetWorkers(): void;
302
+ }
303
+ /**
304
+ * Create a ParallelEvalRunner instance
305
+ */
306
+ export declare function createParallelEvalRunner(skillValidationLearner: SkillValidationLearner, config?: Partial<ParallelEvalConfig>, executor?: LLMExecutor): ParallelEvalRunner;
307
+ //# sourceMappingURL=parallel-eval-runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parallel-eval-runner.d.ts","sourceRoot":"","sources":["../../src/validation/parallel-eval-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAKH,OAAO,EACL,sBAAsB,EACtB,cAAc,EAIf,MAAM,yCAAyC,CAAC;AAMjD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,wDAAwD;IACxD,UAAU,EAAE,MAAM,CAAC;IAEnB,4CAA4C;IAC5C,SAAS,EAAE,MAAM,CAAC;IAElB,yCAAyC;IACzC,gBAAgB,EAAE,OAAO,CAAC;IAE1B,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAEhB,6CAA6C;IAC7C,SAAS,EAAE,MAAM,CAAC;IAElB,kDAAkD;IAClD,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AAED;;GAEG;AACH,eAAO,MAAM,4BAA4B,EAAE,kBAO1C,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE;QACP,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACnC,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,0BAA0B;IACzC,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,aAAa,CAAC,EAAE;QACd,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,GAAG,CAAC,EAAE,MAAM,CAAC;KACd,CAAC;IACF,uBAAuB,CAAC,EAAE,MAAM,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,cAAc,CAAC,EAAE;QACf,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,aAAa,CAAC,EAAE,MAAM,CAAC;KACxB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACjD,KAAK,EAAE,iBAAiB,CAAC;IACzB,eAAe,EAAE,0BAA0B,CAAC;IAC5C,UAAU,CAAC,EAAE,sBAAsB,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,eAAe,CAAC,EAAE;QAChB,OAAO,EAAE,OAAO,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,mBAAmB,CAAC,EAAE,OAAO,CAAC;QAC9B,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;KAC1B,CAAC;IACF,QAAQ,CAAC,EAAE;QACT,sBAAsB,CAAC,EAAE,OAAO,CAAC;QACjC,sBAAsB,CAAC,EAAE,OAAO,CAAC;QACjC,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,uBAAuB,CAAC,EAAE,MAAM,CAAC;QACjC,sBAAsB,CAAC,EAAE,OAAO,CAAC;KAClC,CAAC;IACF,KAAK,CAAC,EAAE;QACN,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;QAC1B,qBAAqB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAC/C,QAAQ,CAAC,EAAE,KAAK,CAAC;YACf,IAAI,EAAE,MAAM,CAAC;YACb,OAAO,EAAE,MAAM,CAAC;SACjB,CAAC,CAAC;KACJ,CAAC;IACF,UAAU,EAAE,YAAY,EAAE,CAAC;IAC3B,gBAAgB,EAAE;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,kBAAkB,EAAE,MAAM,CAAC;QAC3B,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,oBAAoB,CAAC,EAAE,MAAM,CAAC;KAC/B,CAAC;IACF,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,wCAAwC;IACxC,SAAS,EAAE,MAAM,CAAC;IAElB,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;IAEnB,gCAAgC;IAChC,QAAQ,EAAE,YAAY,CAAC;IAEvB,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IAEd,sBAAsB;IACtB,OAAO,EAAE,MAAM,CAAC;IAEhB,6BAA6B;IAC7B,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IAEd,gCAAgC;IAChC,KAAK,EAAE,MAAM,CAAC;IAEd,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IAEnB,kCAAkC;IAClC,WAAW,EAAE,MAAM,CAAC;IAEpB,kCAAkC;IAClC,WAAW,EAAE,MAAM,CAAC;IAEpB,wCAAwC;IACxC,YAAY,EAAE,MAAM,CAAC;IAErB,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IAEjB,mCAAmC;IACnC,WAAW,EAAE,cAAc,EAAE,CAAC;IAE9B,qCAAqC;IACrC,eAAe,EAAE,MAAM,CAAC;IAExB,uDAAuD;IACvD,eAAe,EAAE,MAAM,CAAC;IAExB,sCAAsC;IACtC,mBAAmB,EAAE,MAAM,CAAC;IAE5B,4CAA4C;IAC5C,MAAM,EAAE,OAAO,CAAC;IAEhB,+BAA+B;IAC/B,WAAW,EAAE,MAAM,CAAC;IAEpB,8BAA8B;IAC9B,SAAS,EAAE,IAAI,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,WAAW,GAAG,UAAU,CAAC;IAC7D,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,cAAc,EAAE,cAAc,EAAE,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,OAAO,CACL,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAC7B,OAAO,CAAC;QACT,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,qBAAa,eAAgB,YAAW,WAAW;IAC3C,OAAO,CACX,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAC7B,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAetE,OAAO,CAAC,oBAAoB;CA4B7B;AA6QD;;;GAGG;AACH,qBAAa,kBAAkB;IAK3B,OAAO,CAAC,QAAQ,CAAC,MAAM;IACvB,OAAO,CAAC,QAAQ,CAAC,sBAAsB;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ;IAN3B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,OAAO,CAAC,gBAAgB,CAAC,CAAmC;gBAGzC,MAAM,EAAE,kBAAkB,YAA+B,EACzD,sBAAsB,EAAE,sBAAsB,EAC9C,QAAQ,GAAE,WAAmC;IAQhE;;OAEG;IACH,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,YAAY,KAAK,IAAI,GAAG,IAAI;IAI5D;;OAEG;IACG,eAAe,CACnB,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,kBAAkB,CAAC;IA+G9B;;OAEG;IACG,wBAAwB,CAC5B,MAAM,EAAE,MAAM,EAAE,EAChB,MAAM,EAAE,MAAM,EAAE,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,kBAAkB,EAAE,CAAC,CAAC;IA4B7C;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI;IAwB9C;;OAEG;IACH,OAAO,CAAC,WAAW;IAWnB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAa1B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAc1B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IA4C9B;;OAEG;YACW,aAAa;IAM3B;;OAEG;IACH,eAAe,IAAI,KAAK,CAAC;QACvB,EAAE,EAAE,MAAM,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,cAAc,EAAE,MAAM,CAAC;QACvB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IASF;;OAEG;IACH,YAAY,IAAI,IAAI;CAOrB;AAMD;;GAEG;AACH,wBAAgB,wBAAwB,CACtC,sBAAsB,EAAE,sBAAsB,EAC9C,MAAM,GAAE,OAAO,CAAC,kBAAkB,CAAM,EACxC,QAAQ,CAAC,EAAE,WAAW,GACrB,kBAAkB,CAOpB"}