agentic-qe 3.9.14 → 3.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. package/.claude/skills/accessibility-testing/SKILL.md +18 -0
  2. package/.claude/skills/enterprise-integration-testing/SKILL.md +1 -1
  3. package/.claude/skills/pentest-validation/SKILL.md +1 -1
  4. package/.claude/skills/qe-browser/evals/qe-browser.yaml +46 -63
  5. package/.claude/skills/qe-browser/scripts/smoke-test.sh +16 -4
  6. package/.claude/skills/skills-manifest.json +1 -1
  7. package/CHANGELOG.md +27 -0
  8. package/assets/skills/accessibility-testing/SKILL.md +18 -0
  9. package/assets/skills/qe-browser/evals/qe-browser.yaml +46 -63
  10. package/assets/skills/qe-browser/scripts/smoke-test.sh +16 -4
  11. package/assets/skills/skills-manifest.json +1 -1
  12. package/dist/cli/bundle.js +5 -5
  13. package/dist/cli/chunks/adapter-P2EPBJLF.js +2 -0
  14. package/dist/cli/chunks/{agent-booster-wasm-O6EXMPMF.js → agent-booster-wasm-NA2VN5U2.js} +2 -2
  15. package/dist/cli/chunks/{agent-handler-4LPJVTJU.js → agent-handler-NRIF5IOC.js} +2 -2
  16. package/dist/cli/chunks/{agent-memory-branch-PRR323JZ.js → agent-memory-branch-NW3UB2UW.js} +2 -2
  17. package/dist/cli/chunks/aqe-learning-engine-6UITO3NJ.js +2 -0
  18. package/dist/cli/chunks/{audit-YCKXQOXD.js → audit-5CSEDVJR.js} +2 -2
  19. package/dist/cli/chunks/base-JHBJXVGP.js +2 -0
  20. package/dist/cli/chunks/{better-sqlite3-44RIH62G.js → better-sqlite3-FZ32SHZ6.js} +2 -2
  21. package/dist/cli/chunks/{brain-handler-IAYJBAFL.js → brain-handler-ISVST4MR.js} +3 -3
  22. package/dist/cli/chunks/{branch-enumerator-RVVS2UYR.js → branch-enumerator-IUHWHIMV.js} +2 -2
  23. package/dist/cli/chunks/{browser-L6IKC4PR.js → browser-D4F3327X.js} +2 -2
  24. package/dist/cli/chunks/browser-workflow-U4OON5DZ.js +2 -0
  25. package/dist/cli/chunks/{chunk-WE7KIXR6.js → chunk-2BBKAX7X.js} +2 -2
  26. package/dist/cli/chunks/{chunk-DNFDSDKE.js → chunk-2CN2NPJQ.js} +2 -2
  27. package/dist/cli/chunks/{chunk-QLWP7ZIT.js → chunk-2L5ZFBHP.js} +2 -2
  28. package/dist/cli/chunks/{chunk-DMZ2XOEW.js → chunk-2XW36KDQ.js} +2 -2
  29. package/dist/cli/chunks/{chunk-HEQ23HBE.js → chunk-37ZSWRRP.js} +2 -2
  30. package/dist/cli/chunks/{chunk-SY74WSBN.js → chunk-3BA2FGSA.js} +2 -2
  31. package/dist/cli/chunks/{chunk-MXIFVOYA.js → chunk-3FUKJT4S.js} +2 -2
  32. package/dist/cli/chunks/{chunk-YPMBD44A.js → chunk-3NIHJIWP.js} +2 -2
  33. package/dist/cli/chunks/{chunk-3GIZRGX6.js → chunk-3RSPEFU3.js} +2 -2
  34. package/dist/cli/chunks/{chunk-M2RO34V7.js → chunk-3Y4YZDHJ.js} +4 -4
  35. package/dist/cli/chunks/{chunk-LKGLKX2A.js → chunk-4M7RBSW6.js} +2 -2
  36. package/dist/cli/chunks/{chunk-5U6M4XCP.js → chunk-4T36OQUK.js} +3 -3
  37. package/dist/cli/chunks/{chunk-ME4LVBTT.js → chunk-56YHSI6R.js} +2 -2
  38. package/dist/cli/chunks/{chunk-S2KGZJHS.js → chunk-62MVVEGH.js} +1 -1
  39. package/dist/cli/chunks/{chunk-MFGYMNNY.js → chunk-65QA7MYW.js} +2 -2
  40. package/dist/cli/chunks/{chunk-4EQWK6P6.js → chunk-6AMD4PZB.js} +2 -2
  41. package/dist/cli/chunks/{chunk-J5YOXLBD.js → chunk-6BHAGCZD.js} +1 -1
  42. package/dist/cli/chunks/{chunk-6J6GXRHX.js → chunk-6F3H2C5H.js} +3 -3
  43. package/dist/cli/chunks/{chunk-J3XLOHWB.js → chunk-6OEGZSRK.js} +2 -2
  44. package/dist/cli/chunks/{chunk-5MWNXDQW.js → chunk-6TOUMCSE.js} +2 -2
  45. package/dist/cli/chunks/{chunk-KFAN5FZM.js → chunk-6Z7LYE2B.js} +2 -2
  46. package/dist/cli/chunks/{chunk-KJB6JJXT.js → chunk-7J5KWUC2.js} +1 -1
  47. package/dist/cli/chunks/{chunk-IH4OOAEP.js → chunk-7ZPNQ3T6.js} +1 -1
  48. package/dist/cli/chunks/{chunk-KHUFPJE4.js → chunk-AE65B2ZE.js} +2 -2
  49. package/dist/cli/chunks/{chunk-BNPJMPM3.js → chunk-AVKUFN3C.js} +2 -2
  50. package/dist/cli/chunks/{chunk-3ZKTPPIC.js → chunk-B4BOOOYM.js} +1 -1
  51. package/dist/cli/chunks/{chunk-A2TK52B2.js → chunk-BGUCXJEJ.js} +2 -2
  52. package/dist/cli/chunks/{chunk-TOSFK4VG.js → chunk-C5BRTU4V.js} +1 -1
  53. package/dist/cli/chunks/{chunk-BFCMOMOF.js → chunk-CF3W34BA.js} +1 -1
  54. package/dist/cli/chunks/{chunk-RQFC7Q33.js → chunk-CSB2M7IX.js} +2 -2
  55. package/dist/cli/chunks/{chunk-DGUAAG3T.js → chunk-CUQBOVRP.js} +1 -1
  56. package/dist/cli/chunks/{chunk-KVN4FTPT.js → chunk-CWWUBZNX.js} +2 -2
  57. package/dist/cli/chunks/{chunk-VEAVDXXT.js → chunk-D32YCVCA.js} +2 -2
  58. package/dist/cli/chunks/{chunk-ORHCO2I7.js → chunk-D3FV5NNA.js} +2 -2
  59. package/dist/cli/chunks/{chunk-JNLKLC7G.js → chunk-D3ZUSXFY.js} +2 -2
  60. package/dist/cli/chunks/{chunk-IOHDE27P.js → chunk-DATFN5DG.js} +2 -2
  61. package/dist/cli/chunks/{chunk-ZCRLZINE.js → chunk-DPEG44BS.js} +2 -2
  62. package/dist/cli/chunks/{chunk-GIUAY4KY.js → chunk-DYQ7HTEU.js} +3 -3
  63. package/dist/cli/chunks/{chunk-WUYJYRBY.js → chunk-E3EDVRB5.js} +1 -1
  64. package/dist/cli/chunks/{chunk-JOGKQTCU.js → chunk-ELLNKP3I.js} +3 -3
  65. package/dist/cli/chunks/{chunk-Q4DAYLH4.js → chunk-EORSYD66.js} +2 -2
  66. package/dist/cli/chunks/{chunk-GALNZTYE.js → chunk-ESQSX37W.js} +2 -2
  67. package/dist/cli/chunks/{chunk-ERMIIRYN.js → chunk-EY2DGYQX.js} +1 -1
  68. package/dist/cli/chunks/chunk-FEVTMHO6.js +2 -0
  69. package/dist/cli/chunks/{chunk-3PZ2QLLR.js → chunk-FSOY2XB4.js} +3 -3
  70. package/dist/cli/chunks/{chunk-PZF5ISGS.js → chunk-FW6QBTPE.js} +1 -1
  71. package/dist/cli/chunks/{chunk-ZU5VHW6P.js → chunk-G3FUOFXA.js} +2 -2
  72. package/dist/cli/chunks/{chunk-JWR3NNFA.js → chunk-GHEO5YVA.js} +1 -1
  73. package/dist/cli/chunks/{chunk-7732TLNA.js → chunk-GLJ6CJNY.js} +2 -2
  74. package/dist/cli/chunks/{chunk-L4IWZXSC.js → chunk-GWUO3RY5.js} +1 -1
  75. package/dist/cli/chunks/{chunk-BOMJHE4I.js → chunk-H6DAP4KS.js} +2 -2
  76. package/dist/cli/chunks/{chunk-6EZY2QVM.js → chunk-HJNFBJT5.js} +8 -8
  77. package/dist/cli/chunks/{chunk-I3P5RZX7.js → chunk-HNAQNAGI.js} +3 -3
  78. package/dist/cli/chunks/{chunk-Z3PSKGBO.js → chunk-HPY7HGVQ.js} +2 -2
  79. package/dist/cli/chunks/{chunk-OGLS6HB4.js → chunk-HWK27KJK.js} +1 -1
  80. package/dist/cli/chunks/{chunk-2L3BCZ2F.js → chunk-ICXWXO5P.js} +1 -1
  81. package/dist/cli/chunks/{chunk-6BK2S2WE.js → chunk-IHDW4HW7.js} +2 -2
  82. package/dist/cli/chunks/{chunk-NWZYULZE.js → chunk-II5KTTIS.js} +4 -4
  83. package/dist/cli/chunks/{chunk-VUS4VRPQ.js → chunk-IMLH32AG.js} +2 -2
  84. package/dist/cli/chunks/{chunk-S6SM7LJK.js → chunk-IPTGPCEL.js} +2 -2
  85. package/dist/cli/chunks/{chunk-JLBUS4PR.js → chunk-IPVKVPAF.js} +2 -2
  86. package/dist/cli/chunks/{chunk-FNKKENLS.js → chunk-ISNXE6TP.js} +2 -2
  87. package/dist/cli/chunks/{chunk-LWHM5BWD.js → chunk-JAIIPEE6.js} +2 -2
  88. package/dist/cli/chunks/{chunk-UIYK4Y7K.js → chunk-JJDHD7SC.js} +2 -2
  89. package/dist/cli/chunks/{chunk-VDGWDDBC.js → chunk-JQRR37YY.js} +2 -2
  90. package/dist/cli/chunks/{chunk-DD4T7ZUM.js → chunk-KJTXY3DW.js} +1 -1
  91. package/dist/cli/chunks/{chunk-UUVTLYUK.js → chunk-KKBSX43Q.js} +2 -2
  92. package/dist/cli/chunks/{chunk-ZFZX2NA3.js → chunk-LLVS3UYZ.js} +2 -2
  93. package/dist/cli/chunks/{chunk-3NUMJB3U.js → chunk-LNUWNRRJ.js} +9 -9
  94. package/dist/cli/chunks/{chunk-G7L5Y47C.js → chunk-MBDX4OHD.js} +1 -1
  95. package/dist/cli/chunks/{chunk-P5YSBYZV.js → chunk-MHL6CPGY.js} +1 -1
  96. package/dist/cli/chunks/{chunk-VQB5SLXZ.js → chunk-MO4Q5ZGE.js} +2 -2
  97. package/dist/cli/chunks/{chunk-XNXOW7BQ.js → chunk-MS6T4VCU.js} +2 -2
  98. package/dist/cli/chunks/chunk-N2RPLPVG.js +18 -0
  99. package/dist/cli/chunks/{chunk-AEQMQVDJ.js → chunk-NL2PQLRD.js} +2 -2
  100. package/dist/cli/chunks/{chunk-PFSSI3YR.js → chunk-NWMYV7KG.js} +2 -2
  101. package/dist/cli/chunks/{chunk-G2KQUEGY.js → chunk-O6CHLZYT.js} +2 -2
  102. package/dist/cli/chunks/{chunk-VU4QET3C.js → chunk-O6J7MKRV.js} +2 -2
  103. package/dist/cli/chunks/{chunk-IL3ESNHA.js → chunk-OBZMQFWG.js} +3 -3
  104. package/dist/cli/chunks/{chunk-CGC63MGF.js → chunk-OHESV6I3.js} +4 -4
  105. package/dist/cli/chunks/{chunk-OZBTC26R.js → chunk-OMTYOIOS.js} +2 -2
  106. package/dist/cli/chunks/{chunk-5DWTU2PN.js → chunk-ORKGVVXK.js} +1 -1
  107. package/dist/cli/chunks/{chunk-UG2J5MTY.js → chunk-OUP2X3LT.js} +1 -1
  108. package/dist/cli/chunks/{chunk-GOTVYSX5.js → chunk-PSOIVDD2.js} +3 -3
  109. package/dist/cli/chunks/{chunk-FWI7PK4X.js → chunk-Q3E6Z2RQ.js} +1 -1
  110. package/dist/cli/chunks/{chunk-FFACJODG.js → chunk-Q3GDSFGA.js} +2 -2
  111. package/dist/cli/chunks/{chunk-TJOZPPZR.js → chunk-QNLZKD7E.js} +2 -2
  112. package/dist/cli/chunks/{chunk-WVN4YJSF.js → chunk-R4G64UH3.js} +1 -1
  113. package/dist/cli/chunks/{chunk-WELNFHDN.js → chunk-REUGPZIA.js} +1 -1
  114. package/dist/cli/chunks/{chunk-D6APBW66.js → chunk-RODJK6G6.js} +1 -1
  115. package/dist/cli/chunks/{chunk-AOYN6XYJ.js → chunk-RPOW5LWO.js} +1 -1
  116. package/dist/cli/chunks/{chunk-YZBY4ZRF.js → chunk-RSOOENGE.js} +2 -2
  117. package/dist/cli/chunks/{chunk-4JYLXYAH.js → chunk-RTVD7SZQ.js} +1 -1
  118. package/dist/cli/chunks/{chunk-QUK34CSU.js → chunk-RWVFOXYM.js} +1 -1
  119. package/dist/cli/chunks/{chunk-SUXUCW5I.js → chunk-SSQ42GI7.js} +2 -2
  120. package/dist/cli/chunks/{chunk-7IJBZGZY.js → chunk-SXUX6PTE.js} +1 -1
  121. package/dist/cli/chunks/{chunk-HJJTLOWZ.js → chunk-TJDAXI6E.js} +2 -2
  122. package/dist/cli/chunks/{chunk-AHVUILGE.js → chunk-TZWCVGG2.js} +1 -1
  123. package/dist/cli/chunks/{chunk-WSZVSDD5.js → chunk-U4WDJPRL.js} +2 -2
  124. package/dist/cli/chunks/{chunk-GM5LIYLQ.js → chunk-UZOFXYNC.js} +2 -2
  125. package/dist/cli/chunks/{chunk-NWGIQCCO.js → chunk-V65DK6ZZ.js} +1 -1
  126. package/dist/cli/chunks/{chunk-Y3X5OS5F.js → chunk-V6QARSVQ.js} +2 -2
  127. package/dist/cli/chunks/{chunk-LJ3WQIBN.js → chunk-V775NJOM.js} +1 -1
  128. package/dist/cli/chunks/{chunk-FYBTGEOV.js → chunk-VBEVZUQW.js} +1 -1
  129. package/dist/cli/chunks/{chunk-6GKXC6DE.js → chunk-VDDQSW4L.js} +2 -2
  130. package/dist/cli/chunks/{chunk-VVKENR4I.js → chunk-W4PUKTBF.js} +1 -1
  131. package/dist/cli/chunks/{chunk-XH774Q6Z.js → chunk-WAQ3U4FC.js} +1 -1
  132. package/dist/cli/chunks/{chunk-I5U64MHU.js → chunk-WCOJGDGO.js} +2 -2
  133. package/dist/cli/chunks/{chunk-JN7MSUGK.js → chunk-WI6UA5HV.js} +2 -2
  134. package/dist/cli/chunks/{chunk-GUEK5DOK.js → chunk-XALB3KRR.js} +2 -2
  135. package/dist/cli/chunks/{chunk-5HUAD4UU.js → chunk-XXRDI53H.js} +2 -2
  136. package/dist/cli/chunks/{chunk-DJLRRCR6.js → chunk-YCYFUYCE.js} +2 -2
  137. package/dist/cli/chunks/{chunk-4RDTR2OF.js → chunk-ZHTCZGML.js} +2 -2
  138. package/dist/cli/chunks/{chunk-36YZ2UF7.js → chunk-ZOAPOVYW.js} +2 -2
  139. package/dist/cli/chunks/{chunk-UAAJ3BSW.js → chunk-ZSC7NHK3.js} +2 -2
  140. package/dist/cli/chunks/{ci-QQ4F5TYO.js → ci-BGUUHDUS.js} +2 -2
  141. package/dist/cli/chunks/{ci-output-QC4WRENA.js → ci-output-CDFRGBNU.js} +2 -2
  142. package/dist/cli/chunks/{circuit-breaker-HFQUNYTK.js → circuit-breaker-S3NFX6RQ.js} +2 -2
  143. package/dist/cli/chunks/{claude-flow-setup-XNSCWIUN.js → claude-flow-setup-AJIPJFMC.js} +2 -2
  144. package/dist/cli/chunks/client-UHHMKUPF.js +2 -0
  145. package/dist/cli/chunks/{cline-installer-S3CECBYL.js → cline-installer-6O4KZ5UH.js} +2 -2
  146. package/dist/cli/chunks/{code-QHUELDUH.js → code-IZALXUL5.js} +2 -2
  147. package/dist/cli/chunks/{code-index-extractor-QLA5EQTE.js → code-index-extractor-N7VDH4XF.js} +2 -2
  148. package/dist/cli/chunks/{codex-installer-CQ7JNNOA.js → codex-installer-35GTWCFJ.js} +2 -2
  149. package/dist/cli/chunks/{completions-S33U3KIT.js → completions-XPJFHGG2.js} +2 -2
  150. package/dist/cli/chunks/{complexity-analyzer-J7IEW3EM.js → complexity-analyzer-DNLCJGUJ.js} +2 -2
  151. package/dist/cli/chunks/{continuedev-installer-DVU24D37.js → continuedev-installer-PXAIKNYO.js} +2 -2
  152. package/dist/cli/chunks/{copilot-installer-DYISOPBC.js → copilot-installer-2TQ5IZNY.js} +2 -2
  153. package/dist/cli/chunks/{cost-tracker-URJADKKS.js → cost-tracker-A5XH4W6S.js} +2 -2
  154. package/dist/cli/chunks/{coverage-NGMKJNW6.js → coverage-EPIMXTLW.js} +3 -3
  155. package/dist/cli/chunks/cross-domain-router-GEBXTPZW.js +2 -0
  156. package/dist/cli/chunks/{cursor-installer-DFBJRDSY.js → cursor-installer-WMMRPW44.js} +2 -2
  157. package/dist/cli/chunks/{daemon-JSKKVVCV.js → daemon-55DSODOH.js} +3 -3
  158. package/dist/cli/chunks/{dag-attention-scheduler-YITFS6MR.js → dag-attention-scheduler-5DSWXMMK.js} +2 -2
  159. package/dist/cli/chunks/{detect-5APZP2D3.js → detect-NOS46AWN.js} +2 -2
  160. package/dist/cli/chunks/{domain-handler-WHH6XPSZ.js → domain-handler-BFRNU6S3.js} +2 -2
  161. package/dist/cli/chunks/{domain-transfer-SERX53SS.js → domain-transfer-BPSA4HFR.js} +2 -2
  162. package/dist/cli/chunks/dream-XBRGYPBC.js +2 -0
  163. package/dist/cli/chunks/esm-node-EV4HOHTZ.js +2 -0
  164. package/dist/cli/chunks/eval-YDLM23FB.js +15 -0
  165. package/dist/cli/chunks/{fast-paths-4MSOOJVK.js → fast-paths-GISZ7HBO.js} +2 -2
  166. package/dist/cli/chunks/{feature-flags-WJT57IEJ.js → feature-flags-3FLJF6CQ.js} +2 -2
  167. package/dist/cli/chunks/{feature-flags-564YFR65.js → feature-flags-5WGZLUPQ.js} +2 -2
  168. package/dist/cli/chunks/{file-discovery-5QVG65WF.js → file-discovery-PPHUMFSI.js} +2 -2
  169. package/dist/cli/chunks/{fleet-CF5JRK5B.js → fleet-LHFZ53FS.js} +3 -3
  170. package/dist/cli/chunks/{gnn-wrapper-NYQ77563.js → gnn-wrapper-Z45DRWGI.js} +2 -2
  171. package/dist/cli/chunks/{heartbeat-handler-42QJPKHE.js → heartbeat-handler-VMLJ5PDH.js} +4 -4
  172. package/dist/cli/chunks/{heartbeat-scheduler-7XEAV74Y.js → heartbeat-scheduler-GUWYDHEY.js} +2 -2
  173. package/dist/cli/chunks/hnsw-adapter-BG73YVXI.js +2 -0
  174. package/dist/cli/chunks/hnsw-index-K73ZTLJN.js +2 -0
  175. package/dist/cli/chunks/{hnsw-legacy-bridge-ZRR6PGWN.js → hnsw-legacy-bridge-S7UZWDUP.js} +2 -2
  176. package/dist/cli/chunks/{hnswlib-node-DY7NOW76.js → hnswlib-node-MIAAS7OI.js} +2 -2
  177. package/dist/cli/chunks/{hooks-IRRHEJLR.js → hooks-ZN4FV5XS.js} +6 -6
  178. package/dist/cli/chunks/{hybrid-router-S4JXJLUY.js → hybrid-router-Y6LBFPL7.js} +2 -2
  179. package/dist/cli/chunks/{hypergraph-engine-WFHZJMP2.js → hypergraph-engine-KK73LZYZ.js} +2 -2
  180. package/dist/cli/chunks/{hypergraph-handler-LHUKR7PT.js → hypergraph-handler-OBGDPIWG.js} +3 -3
  181. package/dist/cli/chunks/impact-analyzer-GWIUYYCQ.js +2 -0
  182. package/dist/cli/chunks/{init-handler-ZBIW7PSB.js → init-handler-JJJ7VHM4.js} +6 -6
  183. package/dist/cli/chunks/init-wizard-RIJJAKE3.js +2 -0
  184. package/dist/cli/chunks/kernel-SIPBCRGL.js +2 -0
  185. package/dist/cli/chunks/{kilocode-installer-JYHA4NW7.js → kilocode-installer-EF6DPX2D.js} +2 -2
  186. package/dist/cli/chunks/{kiro-installer-OBXTRK77.js → kiro-installer-SG74CEVO.js} +2 -2
  187. package/dist/cli/chunks/knowledge-graph-NOYZXHIG.js +2 -0
  188. package/dist/cli/chunks/{learning-OAEJHEJD.js → learning-LVWYMHF6.js} +3 -3
  189. package/dist/cli/chunks/{llm-router-NNEOZAVB.js → llm-router-QFK7MNPY.js} +4 -4
  190. package/dist/cli/chunks/{load-WIKLBESW.js → load-6XPV4WA2.js} +2 -2
  191. package/dist/cli/chunks/load-test-DIMUH75F.js +2 -0
  192. package/dist/cli/chunks/{mcp-KR43GFGP.js → mcp-GZXOPYMH.js} +2 -2
  193. package/dist/cli/chunks/{memory-KQIYKAYU.js → memory-QTE2Z5HU.js} +5 -5
  194. package/dist/cli/chunks/memory-backend-IWOGO4BV.js +2 -0
  195. package/dist/cli/chunks/{memory-handlers-LAI4AO4O.js → memory-handlers-PC4P4YEF.js} +2 -2
  196. package/dist/cli/chunks/{multi-model-executor-NPS3VOAR.js → multi-model-executor-FZOPSUOT.js} +2 -2
  197. package/dist/cli/chunks/{opencode-installer-FE475WME.js → opencode-installer-CITDTCUQ.js} +2 -2
  198. package/dist/cli/chunks/{orchestrator-MW4MZ3RW.js → orchestrator-ZTG7MFHQ.js} +20 -20
  199. package/dist/cli/chunks/{pipeline-R2TGZSUA.js → pipeline-QXO4EJP4.js} +2 -2
  200. package/dist/cli/chunks/{platform-N2GZBCBP.js → platform-JJEDYCAK.js} +2 -2
  201. package/dist/cli/chunks/{plugin-V2AQF23V.js → plugin-ZEKRM6F7.js} +2 -2
  202. package/dist/cli/chunks/{prime-radiant-advanced-wasm-ZQSYRLRS.js → prime-radiant-advanced-wasm-GRS4T6LR.js} +2 -2
  203. package/dist/cli/chunks/protocol-executor-NT4TILJW.js +2 -0
  204. package/dist/cli/chunks/{protocol-handler-V3U724F2.js → protocol-handler-KWI2T6OR.js} +2 -2
  205. package/dist/cli/chunks/{prove-NCH56HZB.js → prove-7FJN2HEH.js} +2 -2
  206. package/dist/cli/chunks/{provider-manager-7NUHS3T5.js → provider-manager-QYYZZLLO.js} +2 -2
  207. package/dist/cli/chunks/qe-reasoning-bank-VDROHXFS.js +2 -0
  208. package/dist/cli/chunks/{quality-J3IVPPPW.js → quality-4NHO2NY5.js} +2 -2
  209. package/dist/cli/chunks/queen-coordinator-7LO73IV3.js +2 -0
  210. package/dist/cli/chunks/{real-embeddings-VURZXAEY.js → real-embeddings-SIELAOWX.js} +2 -2
  211. package/dist/cli/chunks/{roocode-installer-STWFJGMR.js → roocode-installer-ANYXH3NR.js} +2 -2
  212. package/dist/cli/chunks/router-SFVOLN2W.js +2 -0
  213. package/dist/cli/chunks/routing-feedback-MRFV7WUZ.js +2 -0
  214. package/dist/cli/chunks/{routing-handler-6GZT7EXM.js → routing-handler-6QQHK4KV.js} +2 -2
  215. package/dist/cli/chunks/{ruvector-commands-LCPAH6WH.js → ruvector-commands-YEUA3MZB.js} +2 -2
  216. package/dist/cli/chunks/{rvf-dual-writer-76LWFT5X.js → rvf-dual-writer-UJFRHPVE.js} +2 -2
  217. package/dist/cli/chunks/{rvf-migration-adapter-G7KFWJKY.js → rvf-migration-adapter-CSDFG7UA.js} +2 -2
  218. package/dist/cli/chunks/{rvf-migration-coordinator-3DXOUPWF.js → rvf-migration-coordinator-QO7OENQF.js} +2 -2
  219. package/dist/cli/chunks/rvf-native-adapter-6L7FZZC7.js +2 -0
  220. package/dist/cli/chunks/safe-db-PYWBVGOV.js +2 -0
  221. package/dist/cli/chunks/schedule-BUQU75HY.js +2 -0
  222. package/dist/cli/chunks/scheduler-XOSKWZO2.js +2 -0
  223. package/dist/cli/chunks/{security-XDBPYA5T.js → security-QCIUC5FX.js} +3 -3
  224. package/dist/cli/chunks/shared-rvf-adapter-OZ7S4ZQL.js +2 -0
  225. package/dist/cli/chunks/{shared-rvf-dual-writer-UGXTS2XI.js → shared-rvf-dual-writer-LVJJS3PD.js} +2 -2
  226. package/dist/cli/chunks/sqlite-persistence-7UXQZ5Y6.js +2 -0
  227. package/dist/cli/chunks/{status-handler-ZLXES3QL.js → status-handler-O77A4JWM.js} +2 -2
  228. package/dist/cli/chunks/{structural-health-4PFCOQ7T.js → structural-health-WIIBIIQI.js} +2 -2
  229. package/dist/cli/chunks/{sync-3DROIEZN.js → sync-UN6D6OF2.js} +2 -2
  230. package/dist/cli/chunks/{task-handler-4UGZ5VIE.js → task-handler-RBOSMJCK.js} +2 -2
  231. package/dist/cli/chunks/{task-handlers-I5VVBTWI.js → task-handlers-VWMXQKAT.js} +2 -2
  232. package/dist/cli/chunks/{test-Z2MM7O5A.js → test-XK5M3PGT.js} +4 -4
  233. package/dist/cli/chunks/{test-scheduling-AP76GKN5.js → test-scheduling-CTQ3JJ7G.js} +3 -3
  234. package/dist/cli/chunks/{token-bootstrap-P54N6PZV.js → token-bootstrap-D5CQ3I5M.js} +2 -2
  235. package/dist/cli/chunks/{token-usage-AXAZTPA6.js → token-usage-L4QSLWQ5.js} +2 -2
  236. package/dist/cli/chunks/{transformers-VOZ2Z7FS.js → transformers-SRPJDBKA.js} +2 -2
  237. package/dist/cli/chunks/{tree-sitter-wasm-parser-IYKLBTXB.js → tree-sitter-wasm-parser-DOU5ITWB.js} +2 -2
  238. package/dist/cli/chunks/{types-TKDH5EBW.js → types-LECYZUBN.js} +2 -2
  239. package/dist/cli/chunks/unified-memory-L6735TFM.js +2 -0
  240. package/dist/cli/chunks/unified-memory-hnsw-7TZNEDGY.js +2 -0
  241. package/dist/cli/chunks/unified-persistence-FNYZ3C2B.js +2 -0
  242. package/dist/cli/chunks/{validate-ZGF5FVZE.js → validate-SIKNIS6B.js} +2 -2
  243. package/dist/cli/chunks/{validate-swarm-OPWWUWYA.js → validate-swarm-EHAMMILJ.js} +2 -2
  244. package/dist/cli/chunks/{vibium-HEQUGRKJ.js → vibium-LJ4NYXMZ.js} +2 -2
  245. package/dist/cli/chunks/visual-security-SMPZPDY4.js +2 -0
  246. package/dist/cli/chunks/{web-tree-sitter-JZ7JCJJR.js → web-tree-sitter-DZQ3T4JD.js} +2 -2
  247. package/dist/cli/chunks/{windsurf-installer-2SGRCS6C.js → windsurf-installer-U6Z73O6J.js} +2 -2
  248. package/dist/cli/chunks/witness-chain-FNCOTWFA.js +2 -0
  249. package/dist/cli/chunks/{witness-chain-LJUYOCB5.js → witness-chain-JE3QU4YP.js} +2 -2
  250. package/dist/cli/chunks/{workflow-QORI3KO7.js → workflow-62QXGZQO.js} +4 -4
  251. package/dist/cli/chunks/workflow-orchestrator-RCR4N7OA.js +2 -0
  252. package/dist/cli/chunks/{wrappers-3NR7LNB5.js → wrappers-CAX5EZSH.js} +2 -2
  253. package/dist/cli/commands/eval.js +86 -0
  254. package/dist/init/browser-engine-installer.d.ts +60 -1
  255. package/dist/init/browser-engine-installer.js +73 -3
  256. package/dist/init/phases/09-assets.js +9 -2
  257. package/dist/mcp/bundle.js +1 -1
  258. package/dist/validation/command-eval-runner.d.ts +154 -0
  259. package/dist/validation/command-eval-runner.js +293 -0
  260. package/dist/validation/index.d.ts +2 -0
  261. package/dist/validation/index.js +4 -0
  262. package/package.json +1 -1
  263. package/dist/cli/chunks/adapter-W2EM7ZQ7.js +0 -2
  264. package/dist/cli/chunks/aqe-learning-engine-JQMPNHQU.js +0 -2
  265. package/dist/cli/chunks/base-EOPYE6JR.js +0 -2
  266. package/dist/cli/chunks/browser-workflow-CSARLAJD.js +0 -2
  267. package/dist/cli/chunks/chunk-53KLVMAT.js +0 -18
  268. package/dist/cli/chunks/chunk-STYNOKOM.js +0 -2
  269. package/dist/cli/chunks/client-3GECKJWB.js +0 -2
  270. package/dist/cli/chunks/cross-domain-router-UYONX76U.js +0 -2
  271. package/dist/cli/chunks/dream-Z5QDQJSM.js +0 -2
  272. package/dist/cli/chunks/esm-node-FOI5Y3O2.js +0 -2
  273. package/dist/cli/chunks/eval-FM3BLVFO.js +0 -15
  274. package/dist/cli/chunks/hnsw-adapter-NOAQ76QF.js +0 -2
  275. package/dist/cli/chunks/hnsw-index-KDIZQRSB.js +0 -2
  276. package/dist/cli/chunks/impact-analyzer-VDXJBWFG.js +0 -2
  277. package/dist/cli/chunks/init-wizard-Q6DEACKX.js +0 -2
  278. package/dist/cli/chunks/kernel-YNQOGTM5.js +0 -2
  279. package/dist/cli/chunks/knowledge-graph-GOZG3YOM.js +0 -2
  280. package/dist/cli/chunks/load-test-UJLTMPXW.js +0 -2
  281. package/dist/cli/chunks/memory-backend-WJH6XER3.js +0 -2
  282. package/dist/cli/chunks/protocol-executor-LYYYHXAX.js +0 -2
  283. package/dist/cli/chunks/qe-reasoning-bank-JK4UKLYY.js +0 -2
  284. package/dist/cli/chunks/queen-coordinator-6OKLSCHV.js +0 -2
  285. package/dist/cli/chunks/router-DDZHCGUD.js +0 -2
  286. package/dist/cli/chunks/routing-feedback-BA66F37P.js +0 -2
  287. package/dist/cli/chunks/rvf-native-adapter-TXTRJNPW.js +0 -2
  288. package/dist/cli/chunks/safe-db-IFBVWIGF.js +0 -2
  289. package/dist/cli/chunks/schedule-WMZLOPCW.js +0 -2
  290. package/dist/cli/chunks/scheduler-WBORWJD5.js +0 -2
  291. package/dist/cli/chunks/shared-rvf-adapter-SQQ27EEI.js +0 -2
  292. package/dist/cli/chunks/sqlite-persistence-XH47B3BG.js +0 -2
  293. package/dist/cli/chunks/unified-memory-NHJHPLIC.js +0 -2
  294. package/dist/cli/chunks/unified-memory-hnsw-IMMCI2XK.js +0 -2
  295. package/dist/cli/chunks/unified-persistence-JS6CEV35.js +0 -2
  296. package/dist/cli/chunks/visual-security-XLO5JBVP.js +0 -2
  297. package/dist/cli/chunks/witness-chain-PPWPHYTY.js +0 -2
  298. package/dist/cli/chunks/workflow-orchestrator-H4DKRHVP.js +0 -2
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Command Eval Runner — shell-command-based eval suite execution.
3
+ *
4
+ * Sibling to {@link ParallelEvalRunner} (which evaluates LLM prompts against
5
+ * keyword-match patterns). This runner is for skills whose eval suite is a
6
+ * set of shell commands that produce JSON envelopes — e.g. qe-browser, whose
7
+ * five primitives (assert, batch, visual-diff, check-injection, intent-score)
8
+ * are Node scripts that exit 0/1 with structured stdout.
9
+ *
10
+ * Detection is by suite shape, not by an explicit type tag: if the first test
11
+ * case has `input.command`, we treat the suite as command-mode.
12
+ *
13
+ * Supported assertions per test case:
14
+ * - `exit_code`: strict equality against the process exit code
15
+ * - `json_fields`: a map of dotted JSONPath -> expected value, evaluated
16
+ * against parsed stdout JSON
17
+ * - `severity_at_least`: ordered comparison against
18
+ * `.output.checkInjection.severity` in the range
19
+ * `none < low < medium < high < critical`
20
+ * - `candidate_count_at_least`: numeric lower bound on
21
+ * `.output.intentScore.candidateCount`
22
+ *
23
+ * Setup steps in `input.setup[]` run sequentially before the main command;
24
+ * any non-zero setup exit short-circuits the test case as failed.
25
+ */
26
+ declare const SEVERITY_ORDER: readonly ["none", "low", "medium", "high", "critical"];
27
+ export type Severity = (typeof SEVERITY_ORDER)[number];
28
+ /**
29
+ * Return true iff `actual` is at or above `threshold` on the severity scale.
30
+ * Unknown severities return false (conservative — we never silently pass).
31
+ */
32
+ export declare function severityAtLeast(actual: unknown, threshold: Severity): boolean;
33
+ /**
34
+ * Evaluate a simple dotted JSONPath like `.output.assert.passed` against an
35
+ * already-parsed JSON value. Returns `undefined` if any segment is missing.
36
+ *
37
+ * We intentionally support only dotted paths (no `[idx]`, no filters) because
38
+ * that's the full surface area used by qe-browser's output schemas.
39
+ */
40
+ export declare function evalJsonPath(obj: unknown, jsonPath: string): unknown;
41
+ export type TestCasePriority = 'critical' | 'high' | 'medium' | 'low';
42
+ export interface CommandEvalExpected {
43
+ exit_code?: number;
44
+ /** Dotted JSONPath → expected JSON value (deep equality). */
45
+ json_fields?: Record<string, unknown>;
46
+ severity_at_least?: Severity;
47
+ candidate_count_at_least?: number;
48
+ }
49
+ export interface CommandEvalTestCase {
50
+ id: string;
51
+ description?: string;
52
+ category?: string;
53
+ priority?: TestCasePriority;
54
+ input: {
55
+ setup?: string[];
56
+ command: string;
57
+ };
58
+ expected: CommandEvalExpected;
59
+ }
60
+ export interface CommandEvalSuite {
61
+ skill: string;
62
+ version?: string;
63
+ status?: 'design-spec' | 'active' | 'deprecated';
64
+ description?: string;
65
+ test_cases: CommandEvalTestCase[];
66
+ validation?: {
67
+ required_pass_rate?: number;
68
+ critical_must_pass?: boolean;
69
+ };
70
+ }
71
+ export interface CommandEvalTestResult {
72
+ testId: string;
73
+ passed: boolean;
74
+ category?: string;
75
+ priority?: TestCasePriority;
76
+ exitCode: number | null;
77
+ stdoutSnippet: string;
78
+ stderrSnippet: string;
79
+ /** Assertion failures — empty when passed. */
80
+ failures: string[];
81
+ durationMs: number;
82
+ /** Set when a setup step failed before the main command ran. */
83
+ setupFailure?: string;
84
+ }
85
+ export interface CommandEvalResult {
86
+ skill: string;
87
+ passed: boolean;
88
+ totalTests: number;
89
+ passedTests: number;
90
+ failedTests: number;
91
+ passRate: number;
92
+ /** True iff every test with priority=critical passed. */
93
+ criticalPassed: boolean;
94
+ testResults: CommandEvalTestResult[];
95
+ totalDurationMs: number;
96
+ timestamp: Date;
97
+ }
98
+ /** Injectable runner so tests don't spawn real processes. */
99
+ export type CommandRunner = (cmd: string, options: {
100
+ timeoutMs: number;
101
+ cwd: string;
102
+ }) => {
103
+ status: number | null;
104
+ stdout: string;
105
+ stderr: string;
106
+ error?: Error;
107
+ };
108
+ /**
109
+ * A suite is command-eval mode when the first test case has `input.command`.
110
+ * We use this to dispatch from the shared `aqe eval` CLI without requiring a
111
+ * new explicit type tag in every existing LLM-style yaml.
112
+ */
113
+ export declare function isCommandEvalSuite(raw: unknown): raw is CommandEvalSuite;
114
+ /**
115
+ * Validate a single test-case result against its expected block.
116
+ * Returns the list of failures (empty == passed).
117
+ *
118
+ * Parses stdout as JSON only if at least one assertion needs it, so commands
119
+ * that only care about `exit_code` don't require JSON output.
120
+ */
121
+ export declare function validateCommandResult(tc: CommandEvalTestCase, exitCode: number | null, stdout: string): {
122
+ passed: boolean;
123
+ failures: string[];
124
+ };
125
+ export interface CommandEvalRunnerOptions {
126
+ /** Directory containing skill directories (default: `.claude/skills`). */
127
+ skillsDir?: string;
128
+ /** Per-command timeout in milliseconds (default: 60s). */
129
+ timeoutMs?: number;
130
+ /** Working directory for commands (default: `process.cwd()`). */
131
+ cwd?: string;
132
+ /** Injected runner for tests. */
133
+ runner?: CommandRunner;
134
+ }
135
+ export declare class CommandEvalRunner {
136
+ private readonly skillsDir;
137
+ private readonly timeoutMs;
138
+ private readonly cwd;
139
+ private readonly runner;
140
+ constructor(options?: CommandEvalRunnerOptions);
141
+ /**
142
+ * Load `${skill}/evals/${skill}.yaml` and return it iff it's a command-mode
143
+ * suite. Returns null if missing, unparseable, or not command-mode.
144
+ */
145
+ loadSuite(skill: string): CommandEvalSuite | null;
146
+ run(skill: string): Promise<CommandEvalResult>;
147
+ private executeCase;
148
+ }
149
+ /**
150
+ * Convenience factory.
151
+ */
152
+ export declare function createCommandEvalRunner(options?: CommandEvalRunnerOptions): CommandEvalRunner;
153
+ export {};
154
+ //# sourceMappingURL=command-eval-runner.d.ts.map
@@ -0,0 +1,293 @@
1
+ /**
2
+ * Command Eval Runner — shell-command-based eval suite execution.
3
+ *
4
+ * Sibling to {@link ParallelEvalRunner} (which evaluates LLM prompts against
5
+ * keyword-match patterns). This runner is for skills whose eval suite is a
6
+ * set of shell commands that produce JSON envelopes — e.g. qe-browser, whose
7
+ * five primitives (assert, batch, visual-diff, check-injection, intent-score)
8
+ * are Node scripts that exit 0/1 with structured stdout.
9
+ *
10
+ * Detection is by suite shape, not by an explicit type tag: if the first test
11
+ * case has `input.command`, we treat the suite as command-mode.
12
+ *
13
+ * Supported assertions per test case:
14
+ * - `exit_code`: strict equality against the process exit code
15
+ * - `json_fields`: a map of dotted JSONPath -> expected value, evaluated
16
+ * against parsed stdout JSON
17
+ * - `severity_at_least`: ordered comparison against
18
+ * `.output.checkInjection.severity` in the range
19
+ * `none < low < medium < high < critical`
20
+ * - `candidate_count_at_least`: numeric lower bound on
21
+ * `.output.intentScore.candidateCount`
22
+ *
23
+ * Setup steps in `input.setup[]` run sequentially before the main command;
24
+ * any non-zero setup exit short-circuits the test case as failed.
25
+ */
26
+ import * as fs from 'node:fs';
27
+ import * as path from 'node:path';
28
+ import * as yaml from 'yaml';
29
+ import { spawnSync as realSpawnSync } from 'node:child_process';
30
+ import { toErrorMessage } from '../shared/error-utils.js';
31
+ // ============================================================================
32
+ // Severity ordering
33
+ // ============================================================================
34
+ const SEVERITY_ORDER = ['none', 'low', 'medium', 'high', 'critical'];
35
+ /**
36
+ * Return true iff `actual` is at or above `threshold` on the severity scale.
37
+ * Unknown severities return false (conservative — we never silently pass).
38
+ */
39
+ export function severityAtLeast(actual, threshold) {
40
+ if (typeof actual !== 'string')
41
+ return false;
42
+ const a = SEVERITY_ORDER.indexOf(actual);
43
+ const t = SEVERITY_ORDER.indexOf(threshold);
44
+ return a >= 0 && t >= 0 && a >= t;
45
+ }
46
+ // ============================================================================
47
+ // JSON path evaluation
48
+ // ============================================================================
49
+ /**
50
+ * Evaluate a simple dotted JSONPath like `.output.assert.passed` against an
51
+ * already-parsed JSON value. Returns `undefined` if any segment is missing.
52
+ *
53
+ * We intentionally support only dotted paths (no `[idx]`, no filters) because
54
+ * that's the full surface area used by qe-browser's output schemas.
55
+ */
56
+ export function evalJsonPath(obj, jsonPath) {
57
+ if (typeof jsonPath !== 'string' || !jsonPath.startsWith('.'))
58
+ return undefined;
59
+ const parts = jsonPath === '.' ? [] : jsonPath.slice(1).split('.');
60
+ let cur = obj;
61
+ for (const part of parts) {
62
+ if (cur == null || typeof cur !== 'object')
63
+ return undefined;
64
+ cur = cur[part];
65
+ }
66
+ return cur;
67
+ }
68
+ const defaultCommandRunner = (cmd, opts) => {
69
+ const result = realSpawnSync('bash', ['-c', cmd], {
70
+ timeout: opts.timeoutMs,
71
+ cwd: opts.cwd,
72
+ encoding: 'utf8',
73
+ maxBuffer: 16 * 1024 * 1024,
74
+ });
75
+ return {
76
+ status: result.status,
77
+ stdout: typeof result.stdout === 'string' ? result.stdout : '',
78
+ stderr: typeof result.stderr === 'string' ? result.stderr : '',
79
+ error: result.error,
80
+ };
81
+ };
82
+ // ============================================================================
83
+ // Suite detection
84
+ // ============================================================================
85
+ /**
86
+ * A suite is command-eval mode when the first test case has `input.command`.
87
+ * We use this to dispatch from the shared `aqe eval` CLI without requiring a
88
+ * new explicit type tag in every existing LLM-style yaml.
89
+ */
90
+ export function isCommandEvalSuite(raw) {
91
+ if (!raw || typeof raw !== 'object')
92
+ return false;
93
+ const obj = raw;
94
+ if (!Array.isArray(obj.test_cases) || obj.test_cases.length === 0)
95
+ return false;
96
+ const first = obj.test_cases[0];
97
+ return typeof first?.input?.command === 'string';
98
+ }
99
+ // ============================================================================
100
+ // Result validation
101
+ // ============================================================================
102
+ /**
103
+ * Validate a single test-case result against its expected block.
104
+ * Returns the list of failures (empty == passed).
105
+ *
106
+ * Parses stdout as JSON only if at least one assertion needs it, so commands
107
+ * that only care about `exit_code` don't require JSON output.
108
+ */
109
+ export function validateCommandResult(tc, exitCode, stdout) {
110
+ const failures = [];
111
+ const expected = tc.expected || {};
112
+ if (expected.exit_code !== undefined && exitCode !== expected.exit_code) {
113
+ failures.push(`exit_code: expected ${expected.exit_code}, got ${exitCode}`);
114
+ }
115
+ const needsJson = !!expected.json_fields ||
116
+ expected.severity_at_least !== undefined ||
117
+ expected.candidate_count_at_least !== undefined;
118
+ let parsed = null;
119
+ if (needsJson) {
120
+ try {
121
+ parsed = JSON.parse(stdout);
122
+ }
123
+ catch (e) {
124
+ failures.push(`stdout is not valid JSON: ${toErrorMessage(e)}`);
125
+ return { passed: false, failures };
126
+ }
127
+ }
128
+ if (expected.json_fields) {
129
+ for (const [jsonPath, want] of Object.entries(expected.json_fields)) {
130
+ const got = evalJsonPath(parsed, jsonPath);
131
+ if (!deepEqual(got, want)) {
132
+ failures.push(`json_fields ${jsonPath}: expected ${JSON.stringify(want)}, got ${JSON.stringify(got)}`);
133
+ }
134
+ }
135
+ }
136
+ if (expected.severity_at_least !== undefined) {
137
+ const sev = evalJsonPath(parsed, '.output.checkInjection.severity');
138
+ if (!severityAtLeast(sev, expected.severity_at_least)) {
139
+ failures.push(`severity_at_least: expected >= ${expected.severity_at_least}, got ${JSON.stringify(sev)}`);
140
+ }
141
+ }
142
+ if (expected.candidate_count_at_least !== undefined) {
143
+ const count = evalJsonPath(parsed, '.output.intentScore.candidateCount');
144
+ const countNum = typeof count === 'number' ? count : Number.NaN;
145
+ if (!(countNum >= expected.candidate_count_at_least)) {
146
+ failures.push(`candidate_count_at_least: expected >= ${expected.candidate_count_at_least}, got ${JSON.stringify(count)}`);
147
+ }
148
+ }
149
+ return { passed: failures.length === 0, failures };
150
+ }
151
+ /**
152
+ * Strict structural equality for JSON values — used to compare
153
+ * `json_fields` expected values against extracted paths. Avoids the pitfalls
154
+ * of `JSON.stringify` comparison (e.g. key order variance on objects) by
155
+ * walking both sides.
156
+ */
157
+ function deepEqual(a, b) {
158
+ if (a === b)
159
+ return true;
160
+ if (a == null || b == null)
161
+ return a === b;
162
+ if (typeof a !== typeof b)
163
+ return false;
164
+ if (Array.isArray(a)) {
165
+ if (!Array.isArray(b) || a.length !== b.length)
166
+ return false;
167
+ return a.every((v, i) => deepEqual(v, b[i]));
168
+ }
169
+ if (typeof a === 'object') {
170
+ if (typeof b !== 'object' || Array.isArray(b))
171
+ return false;
172
+ const ao = a;
173
+ const bo = b;
174
+ const ak = Object.keys(ao);
175
+ const bk = Object.keys(bo);
176
+ if (ak.length !== bk.length)
177
+ return false;
178
+ return ak.every((k) => deepEqual(ao[k], bo[k]));
179
+ }
180
+ return false;
181
+ }
182
+ export class CommandEvalRunner {
183
+ skillsDir;
184
+ timeoutMs;
185
+ cwd;
186
+ runner;
187
+ constructor(options = {}) {
188
+ this.skillsDir = options.skillsDir ?? '.claude/skills';
189
+ this.timeoutMs = options.timeoutMs ?? 60_000;
190
+ this.cwd = options.cwd ?? process.cwd();
191
+ this.runner = options.runner ?? defaultCommandRunner;
192
+ }
193
+ /**
194
+ * Load `${skill}/evals/${skill}.yaml` and return it iff it's a command-mode
195
+ * suite. Returns null if missing, unparseable, or not command-mode.
196
+ */
197
+ loadSuite(skill) {
198
+ const baseDir = path.isAbsolute(this.skillsDir)
199
+ ? this.skillsDir
200
+ : path.join(this.cwd, this.skillsDir);
201
+ const evalPath = path.join(baseDir, skill, 'evals', `${skill}.yaml`);
202
+ if (!fs.existsSync(evalPath))
203
+ return null;
204
+ try {
205
+ const raw = yaml.parse(fs.readFileSync(evalPath, 'utf-8'));
206
+ if (!isCommandEvalSuite(raw))
207
+ return null;
208
+ return raw;
209
+ }
210
+ catch {
211
+ return null;
212
+ }
213
+ }
214
+ async run(skill) {
215
+ const start = Date.now();
216
+ const suite = this.loadSuite(skill);
217
+ if (!suite) {
218
+ throw new Error(`Command-eval suite not found or not command-mode at ${this.skillsDir}/${skill}/evals/${skill}.yaml`);
219
+ }
220
+ const results = [];
221
+ // Sequential by design: setup steps (e.g. `vibium go <url>`) mutate
222
+ // shared browser state, so interleaving test cases would be racy.
223
+ for (const tc of suite.test_cases) {
224
+ results.push(this.executeCase(tc));
225
+ }
226
+ const passedTests = results.filter((r) => r.passed).length;
227
+ const failedTests = results.length - passedTests;
228
+ const passRate = results.length > 0 ? passedTests / results.length : 0;
229
+ const requiredPassRate = suite.validation?.required_pass_rate ?? 0.9;
230
+ const criticalMustPass = suite.validation?.critical_must_pass ?? true;
231
+ const criticalResults = results.filter((r) => r.priority === 'critical');
232
+ const criticalPassed = criticalResults.every((r) => r.passed);
233
+ const passed = passRate >= requiredPassRate && (!criticalMustPass || criticalPassed);
234
+ return {
235
+ skill,
236
+ passed,
237
+ totalTests: results.length,
238
+ passedTests,
239
+ failedTests,
240
+ passRate,
241
+ criticalPassed,
242
+ testResults: results,
243
+ totalDurationMs: Date.now() - start,
244
+ timestamp: new Date(),
245
+ };
246
+ }
247
+ executeCase(tc) {
248
+ const start = Date.now();
249
+ const setup = tc.input?.setup ?? [];
250
+ for (const step of setup) {
251
+ const r = this.runner(step, { timeoutMs: this.timeoutMs, cwd: this.cwd });
252
+ if (r.status !== 0) {
253
+ return {
254
+ testId: tc.id,
255
+ passed: false,
256
+ category: tc.category,
257
+ priority: tc.priority,
258
+ exitCode: r.status,
259
+ stdoutSnippet: truncate(r.stdout),
260
+ stderrSnippet: truncate(r.stderr),
261
+ failures: [],
262
+ setupFailure: `setup step failed (exit ${r.status}): ${step}`,
263
+ durationMs: Date.now() - start,
264
+ };
265
+ }
266
+ }
267
+ const r = this.runner(tc.input.command, { timeoutMs: this.timeoutMs, cwd: this.cwd });
268
+ const v = validateCommandResult(tc, r.status, r.stdout);
269
+ return {
270
+ testId: tc.id,
271
+ passed: v.passed,
272
+ category: tc.category,
273
+ priority: tc.priority,
274
+ exitCode: r.status,
275
+ stdoutSnippet: truncate(r.stdout),
276
+ stderrSnippet: truncate(r.stderr),
277
+ failures: v.failures,
278
+ durationMs: Date.now() - start,
279
+ };
280
+ }
281
+ }
282
+ function truncate(s, max = 500) {
283
+ if (s.length <= max)
284
+ return s;
285
+ return `${s.slice(0, max)}…[truncated ${s.length - max}b]`;
286
+ }
287
+ /**
288
+ * Convenience factory.
289
+ */
290
+ export function createCommandEvalRunner(options = {}) {
291
+ return new CommandEvalRunner(options);
292
+ }
293
+ //# sourceMappingURL=command-eval-runner.js.map
@@ -14,6 +14,8 @@ export { SwarmSkillValidator, createSwarmSkillValidator, DEFAULT_SWARM_VALIDATIO
14
14
  export type { SwarmValidationConfig, SwarmTopology, SwarmValidationResult, SwarmValidationSummary, SkillValidatorFn, } from './swarm-skill-validator.js';
15
15
  export { ParallelEvalRunner, createParallelEvalRunner, DEFAULT_PARALLEL_EVAL_CONFIG, MockLLMExecutor, } from './parallel-eval-runner.js';
16
16
  export type { ParallelEvalConfig, EvalTestCase, EvalTestCaseInput, EvalTestCaseExpectedOutput, EvalTestCaseValidation, EvalSuite, TestCaseTask, ParallelEvalResult, WorkerMessage, WorkerProgress, EvalProgress, LLMExecutor, } from './parallel-eval-runner.js';
17
+ export { CommandEvalRunner, createCommandEvalRunner, evalJsonPath, severityAtLeast, isCommandEvalSuite, validateCommandResult, } from './command-eval-runner.js';
18
+ export type { CommandEvalSuite, CommandEvalTestCase, CommandEvalExpected, CommandEvalResult, CommandEvalTestResult, CommandEvalRunnerOptions, CommandRunner, Severity, TestCasePriority, } from './command-eval-runner.js';
17
19
  export { ValidationResultAggregator, createValidationResultAggregator, } from './validation-result-aggregator.js';
18
20
  export type { AggregatedValidationReport, AggregatorConfig, CrossModelReport, IssueSeverity, ModelAnomaly, ParallelValidationRunResult, RegressionReport, SkillValidationSummary, ValidationIssue, } from './validation-result-aggregator.js';
19
21
  export { TriggerOptimizer, createTriggerOptimizer, parseSkillFrontmatter, } from './trigger-optimizer.js';
@@ -25,6 +25,10 @@ P0_SKILLS, DEFAULT_VALIDATION_MODELS, } from './swarm-skill-validator.js';
25
25
  // ============================================================================
26
26
  export { ParallelEvalRunner, createParallelEvalRunner, DEFAULT_PARALLEL_EVAL_CONFIG, MockLLMExecutor, } from './parallel-eval-runner.js';
27
27
  // ============================================================================
28
+ // Command Eval Runner (ADR-091: qe-browser and other shell-based skills)
29
+ // ============================================================================
30
+ export { CommandEvalRunner, createCommandEvalRunner, evalJsonPath, severityAtLeast, isCommandEvalSuite, validateCommandResult, } from './command-eval-runner.js';
31
+ // ============================================================================
28
32
  // Validation Result Aggregator (ADR-056 Phase 5)
29
33
  // ============================================================================
30
34
  export { ValidationResultAggregator, createValidationResultAggregator, } from './validation-result-aggregator.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentic-qe",
3
- "version": "3.9.14",
3
+ "version": "3.9.15",
4
4
  "description": "Agentic Quality Engineering V3 - Domain-Driven Design Architecture with 13 Bounded Contexts, O(log n) coverage analysis, ReasoningBank learning, 60 specialized QE agents, mathematical Coherence verification, deep Claude Flow integration",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b,c}from"./chunk-5U6M4XCP.js";import"./chunk-AEQMQVDJ.js";import"./chunk-QUK34CSU.js";import"./chunk-DMZ2XOEW.js";import"./chunk-UIYK4Y7K.js";import"./chunk-DGUAAG3T.js";export{a as AgentBoosterAdapter,b as createAgentBoosterAdapter,c as createAgentBoosterAdapterSync};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b,c,d,e,f}from"./chunk-JOGKQTCU.js";import"./chunk-7732TLNA.js";import"./chunk-3PZ2QLLR.js";import"./chunk-NWZYULZE.js";import"./chunk-KJB6JJXT.js";import"./chunk-S2KGZJHS.js";import"./chunk-NWGIQCCO.js";import"./chunk-UAAJ3BSW.js";import"./chunk-ZCRLZINE.js";import"./chunk-TJOZPPZR.js";import"./chunk-4EQWK6P6.js";import"./chunk-Y3X5OS5F.js";import"./chunk-WSZVSDD5.js";import"./chunk-ZFZX2NA3.js";import"./chunk-UUVTLYUK.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-DMZ2XOEW.js";import"./chunk-PZF5ISGS.js";import"./chunk-UIYK4Y7K.js";import"./chunk-G7L5Y47C.js";import"./chunk-LWHM5BWD.js";import"./chunk-J3XLOHWB.js";import"./chunk-JN7MSUGK.js";import"./chunk-JNLKLC7G.js";import"./chunk-WE7KIXR6.js";import"./chunk-XNXOW7BQ.js";import"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";export{b as AQELearningEngine,a as DEFAULT_ENGINE_CONFIG,c as DEFAULT_META_LEARNING_CONFIG,d as MetaLearningEngine,e as createAQELearningEngine,f as createDefaultLearningEngine};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b,c,d,e,f}from"./chunk-BNPJMPM3.js";import"./chunk-VQB5SLXZ.js";import"./chunk-ZFZX2NA3.js";import"./chunk-UUVTLYUK.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-DMZ2XOEW.js";import"./chunk-UIYK4Y7K.js";import"./chunk-G7L5Y47C.js";import"./chunk-LWHM5BWD.js";import"./chunk-J3XLOHWB.js";import"./chunk-JN7MSUGK.js";import"./chunk-JNLKLC7G.js";import"./chunk-WE7KIXR6.js";import"./chunk-XNXOW7BQ.js";import"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";export{f as MCPToolBase,e as defaultToolLogger,d as getMemoryBackend,b as getSharedMemoryBackend,a as registerRvfResetFn,c as resetSharedMemoryBackend};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a}from"./chunk-FFACJODG.js";import"./chunk-BNPJMPM3.js";import"./chunk-VQB5SLXZ.js";import"./chunk-ZFZX2NA3.js";import"./chunk-UUVTLYUK.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-DMZ2XOEW.js";import"./chunk-UIYK4Y7K.js";import"./chunk-G7L5Y47C.js";import"./chunk-LWHM5BWD.js";import"./chunk-J3XLOHWB.js";import"./chunk-JN7MSUGK.js";import"./chunk-JNLKLC7G.js";import"./chunk-WE7KIXR6.js";import"./chunk-XNXOW7BQ.js";import"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";export{a as BrowserWorkflowTool};
@@ -1,18 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a as x,c as N}from"./chunk-UIYK4Y7K.js";import{b as w,c as P,d as O}from"./chunk-JNLKLC7G.js";N();O();import*as k from"fs";import*as T from"path";import*as L from"yaml";var D=5,F=4,U=3e4,W=".claude/skills",G=5e3,$=500,A=100,V=1e3,I=500,H=.8,Q=.2,K=.1,v=.1,q=.1,B=.1,z=.1,X=200,j=500,Y=300,J=1,Z=.9,M={maxWorkers:D,batchSize:F,retryFailedTests:!0,timeout:U,skillsDir:W,progressIntervalMs:G},R=class{async execute(e,s,t){let r=P(A,$+A);return await new Promise(i=>setTimeout(i,r)),{output:this.generateMockResponse(e),tokensUsed:w(I,V+I),durationMs:r}}generateMockResponse(e){let s=[];return e.includes("alt")&&s.push("alt","1.1.1","perceivable"),e.includes("contrast")&&s.push("contrast","1.4.3","4.5:1"),e.includes("keyboard")&&s.push("keyboard","2.1.1","operable","button"),e.includes("label")&&s.push("label","3.3.2","understandable"),e.includes("ARIA")&&s.push("ARIA","4.1.2","robust"),e.includes("focus")&&s.push("focus","2.4.7","outline"),e.includes("lang")&&s.push("lang","3.1.1","language"),e.includes("caption")&&s.push("caption","1.2.2","track"),e.includes("accessible")&&s.push("accessible","compliant"),s.length===0&&s.push("accessibility","WCAG","finding"),`Analysis complete. Found issues related to: ${s.join(", ")}.
3
- Recommendations: Implement proper ${s[0]} attributes for better accessibility.
4
- Severity: ${e.includes("critical")?"critical":"serious"}`}},y=class{constructor(e,s,t){this.executor=s;this.config=t;this.id=e}executor;config;id;status="idle";tasksCompleted=0;tasksFailed=0;lastHeartbeat=Date.now();async executeBatch(e){this.status="running";let s=[];for(let t of e){this.lastHeartbeat=Date.now();try{let r=await this.executeTestCase(t);if(s.push(r),r.passed)this.tasksCompleted++;else if(this.tasksFailed++,this.config.retryFailedTests&&!r.passed){let a=await this.executeTestCase(t,!0);a.passed&&(s[s.length-1]=a,this.tasksCompleted++,this.tasksFailed--)}}catch(r){this.tasksFailed++,s.push({testId:t.testCaseId,passed:!1,expectedPatterns:t.testCase.expected_output.must_contain||[],actualPatterns:[],reasoningQuality:0,category:t.testCase.category,priority:t.testCase.priority,error:x(r)})}}return this.status="idle",s}async executeTestCase(e,s=!1){let t=Date.now(),r=this.buildPrompt(e),a=await this.executor.execute(r,e.model,{timeout:this.config.timeout}),i=this.validateResponse(a.output,e.testCase.expected_output,e.testCase.validation);return{testId:e.testCaseId,passed:i.passed,expectedPatterns:e.testCase.expected_output.must_contain||[],actualPatterns:i.foundPatterns,reasoningQuality:i.reasoningQuality,executionTimeMs:Date.now()-t,category:e.testCase.category,priority:e.testCase.priority,error:i.error}}buildPrompt(e){let{testCase:s}=e,t=s.input.context;return`Analyze the following ${t.language||"HTML"} code for accessibility issues.
5
- Context: ${t.description||s.description}
6
- WCAG Level: ${t.wcagLevel||"AA"}
7
-
8
- Code:
9
- \`\`\`${t.language||"html"}
10
- ${s.input.code}
11
- \`\`\`
12
-
13
- Provide a detailed analysis including:
14
- 1. All accessibility issues found
15
- 2. WCAG success criteria violated
16
- 3. Severity classification (critical/serious/moderate/minor)
17
- 4. Remediation recommendations with code examples`}validateResponse(e,s,t){let r=[],a=[],i=s.must_contain||[],o=0;for(let u of i)new RegExp(u,"i").test(e)&&(r.push(u),o++);let n=s.must_not_contain||[];for(let u of n)new RegExp(u,"i").test(e)&&a.push(`Output should not contain: ${u}`);let d=s.must_match_regex||[];for(let u of d)new RegExp(u,"i").test(e)||a.push(`Output should match regex: ${u}`);let p=i.length>0?o/i.length:1,g=t?.keyword_match_threshold??H,h=p>=g,l=this.calculateReasoningQuality(e,t),m=t?.reasoning_quality_min??0,_=l>=m;return{passed:h&&_&&a.length===0,foundPatterns:r,reasoningQuality:l,error:a.length>0?a.join("; "):void 0}}calculateReasoningQuality(e,s){let t=0;if(e.length>X&&(t+=Q),e.length>j&&(t+=K),e.includes(`
18
- `)&&(t+=v),(e.includes("1.")||e.includes("-"))&&(t+=v),e.includes("```")&&(t+=v),e.includes("WCAG")&&(t+=q),(e.includes("remediation")||e.includes("recommendation"))&&(t+=B),(e.includes("severity")||e.includes("critical"))&&(t+=z),s?.grading_rubric){let r=s.grading_rubric,a=0;r.completeness&&e.length>Y&&(a+=r.completeness),r.accuracy&&e.includes("WCAG")&&(a+=r.accuracy),r.actionability&&(e.includes("fix")||e.includes("change"))&&(a+=r.actionability),t=Math.max(t,a)}return Math.min(t,1)}getProgress(e){return{workerId:this.id,tasksCompleted:this.tasksCompleted,tasksTotal:e,elapsedMs:Date.now()-this.lastHeartbeat}}},S=class{constructor(e=M,s,t=new R){this.config=e;this.skillValidationLearner=s;this.executor=t;for(let r=0;r<e.maxWorkers;r++)this.workers.push(new y(r,t,e))}config;skillValidationLearner;executor;workers=[];progressCallback;onProgress(e){this.progressCallback=e}async runEvalParallel(e,s){let t=Date.now(),r=this.loadEvalSuite(e);if(!r)throw new Error(`Failed to load eval suite for skill: ${e}`);let a=this.createTasks(r,s),i=this.partitionTestCases(a,this.config.batchSize),o=a.length*(this.config.timeout/2),n=[],d=[];for(let c=0;c<i.length;c++){let b=this.workers[c%this.workers.length];d.push(b.executeBatch(i[c]))}let p=this.startProgressReporting(e,s,a.length,t);try{let c=await Promise.all(d);for(let b of c)n.push(...b)}finally{p&&clearInterval(p)}let g=Date.now()-t,h=n.filter(c=>c.passed).length,l=n.filter(c=>!c.passed).length,m=a.length-n.length,_=n.length>0?h/n.length:0,C=n.length>0?n.reduce((c,b)=>c+b.reasoningQuality,0)/n.length:0,u=g>0?o/g:1,f=_>=r.success_criteria.pass_rate&&C>=(r.success_criteria.avg_reasoning_quality??0);return await this.recordOutcome({skillName:e,trustTier:this.determineTrustTier(r),validationLevel:"eval",model:s,passed:f,score:_,testCaseResults:n,timestamp:new Date,runId:`${e}-${s}-${Date.now()}`,metadata:{duration:g,parallelSpeedup:u,workersUsed:this.config.maxWorkers,version:r.version}}),{skill:e,model:s,totalTests:a.length,passedTests:h,failedTests:l,skippedTests:m,passRate:_,testResults:n,totalDurationMs:g,parallelSpeedup:u,avgReasoningQuality:C,passed:f,workersUsed:this.config.maxWorkers,timestamp:new Date}}async runMultipleEvalsParallel(e,s){let t=new Map,r=[];for(let o of e){t.set(o,[]);for(let n of s)r.push({skill:o,model:n})}let a=r.map(async({skill:o,model:n})=>{let d=await this.runEvalParallel(o,n);return{skill:o,result:d}}),i=await Promise.all(a);for(let{skill:o,result:n}of i)t.get(o).push(n);return t}loadEvalSuite(e){let s=T.isAbsolute(this.config.skillsDir)?this.config.skillsDir:T.join(process.cwd(),this.config.skillsDir),t=T.join(s,e,"evals",`${e}.yaml`);if(!k.existsSync(t))return console.error(`Eval suite not found: ${t}`),null;try{let r=k.readFileSync(t,"utf-8");return L.parse(r)}catch(r){return console.error(`Failed to parse eval suite: ${r instanceof Error?r.message:r}`),null}}createTasks(e,s){return e.test_cases.map((t,r)=>({skillName:e.skill,testCaseId:t.id,testCase:t,model:s,batchId:Math.floor(r/this.config.batchSize),indexInBatch:r%this.config.batchSize}))}partitionTestCases(e,s){let t=[];for(let r=0;r<e.length;r+=s)t.push(e.slice(r,r+s));return t}determineTrustTier(e){let s=e.mcp_integration?.enabled??!1,t=e.learning?.cross_model_comparison??!1,r=e.success_criteria.critical_pass_rate??0;return s&&t&&r>=J?3:s||r>=Z?2:1}startProgressReporting(e,s,t,r){return this.progressCallback?setInterval(()=>{let a=Date.now()-r,i=this.workers.reduce((l,m)=>l+m.tasksCompleted,0),o=this.workers.reduce((l,m)=>l+m.tasksFailed,0),n=this.workers.filter(l=>l.status==="running").length,d=a>0?i/a:0,p=t-i,g=d>0?p/d:0,h={skill:e,model:s,totalTasks:t,completedTasks:i,failedTasks:o,activeWorkers:n,elapsedMs:a,estimatedRemainingMs:g,workerProgress:this.workers.map(l=>l.getProgress(t))};this.progressCallback(h)},this.config.progressIntervalMs):null}async recordOutcome(e){await this.skillValidationLearner.recordValidationOutcome(e)}getWorkerStatus(){return this.workers.map(e=>({id:e.id,status:e.status,tasksCompleted:e.tasksCompleted,tasksFailed:e.tasksFailed}))}resetWorkers(){for(let e of this.workers)e.tasksCompleted=0,e.tasksFailed=0,e.status="idle"}};function re(E,e={},s){let t={...M,...e};return new S(t,E,s)}export{M as a,re as b};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a as e,b as g,e as r}from"./chunk-GUEK5DOK.js";import{c as o}from"./chunk-DGUAAG3T.js";var L=o(()=>{"use strict";e();g();r()});export{L as a};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{c as a,d as b,e as c}from"./chunk-GIUAY4KY.js";import"./chunk-ME4LVBTT.js";import"./chunk-UIYK4Y7K.js";import"./chunk-DGUAAG3T.js";c();export{a as VibiumClientImpl,b as VibiumClientProvider};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b}from"./chunk-KFAN5FZM.js";import"./chunk-2L3BCZ2F.js";import"./chunk-PZF5ISGS.js";import"./chunk-G7L5Y47C.js";import"./chunk-DGUAAG3T.js";export{a as CrossDomainEventRouter,b as createCrossDomainRouter};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u}from"./chunk-M2RO34V7.js";import"./chunk-HJJTLOWZ.js";import"./chunk-JOGKQTCU.js";import"./chunk-7732TLNA.js";import"./chunk-3PZ2QLLR.js";import"./chunk-NWZYULZE.js";import"./chunk-KJB6JJXT.js";import"./chunk-S2KGZJHS.js";import"./chunk-NWGIQCCO.js";import"./chunk-UAAJ3BSW.js";import"./chunk-ZCRLZINE.js";import"./chunk-TJOZPPZR.js";import"./chunk-4EQWK6P6.js";import"./chunk-Y3X5OS5F.js";import"./chunk-WSZVSDD5.js";import"./chunk-KVN4FTPT.js";import"./chunk-ZFZX2NA3.js";import"./chunk-UUVTLYUK.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-DMZ2XOEW.js";import"./chunk-PZF5ISGS.js";import"./chunk-UIYK4Y7K.js";import"./chunk-G7L5Y47C.js";import"./chunk-LWHM5BWD.js";import"./chunk-J3XLOHWB.js";import"./chunk-JN7MSUGK.js";import"./chunk-JNLKLC7G.js";import"./chunk-WE7KIXR6.js";import"./chunk-XNXOW7BQ.js";import"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";export{t as BUILT_IN_STRATEGIES,b as ConceptGraph,g as DEFAULT_ACTIVATION_CONFIG,a as DEFAULT_CONCEPT_GRAPH_CONFIG,m as DEFAULT_DREAM_CONFIG,q as DEFAULT_DREAM_SCHEDULER_CONFIG,i as DEFAULT_INSIGHT_CONFIG,k as DEFAULT_VALIDATION_THRESHOLDS,n as DreamEngine,r as DreamScheduler,f as HISTORY_TRIM_TARGET_RATIO,j as InsightGenerator,d as MAX_ACTIVATION_HISTORY_ENTRIES,e as MAX_COACTIVATION_ENTRIES,l as RVCOWBranchManager,u as SpeculativeDreamer,h as SpreadingActivation,c as createConceptGraph,o as createDreamEngine,s as createDreamScheduler,p as default};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b,c,d,e,f,g,h,i}from"./chunk-G7L5Y47C.js";import"./chunk-DGUAAG3T.js";export{h as NIL,d as parse,b as stringify,c as v1,e as v3,f as v4,g as v5,a as validate,i as version};
@@ -1,15 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a as S}from"./chunk-DNFDSDKE.js";import{a as w,b as g}from"./chunk-53KLVMAT.js";import{a as y}from"./chunk-MFGYMNNY.js";import"./chunk-WUYJYRBY.js";import"./chunk-FNKKENLS.js";import"./chunk-2L3BCZ2F.js";import"./chunk-KJB6JJXT.js";import"./chunk-NWGIQCCO.js";import"./chunk-UAAJ3BSW.js";import"./chunk-WSZVSDD5.js";import"./chunk-ZFZX2NA3.js";import"./chunk-UUVTLYUK.js";import"./chunk-4RDTR2OF.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-DMZ2XOEW.js";import"./chunk-PZF5ISGS.js";import"./chunk-UIYK4Y7K.js";import"./chunk-G7L5Y47C.js";import"./chunk-LWHM5BWD.js";import"./chunk-J3XLOHWB.js";import"./chunk-JN7MSUGK.js";import"./chunk-JNLKLC7G.js";import"./chunk-WE7KIXR6.js";import"./chunk-XNXOW7BQ.js";import"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";import{Command as E}from"commander";import s from"chalk";import*as d from"fs";import*as p from"path";var f=["accessibility-testing","security-testing","performance-testing","chaos-engineering-resilience","contract-testing"],b=["risk-based-testing","test-design-techniques","regression-testing","test-data-management","localization-testing"],T=["quality-metrics","refactoring-patterns","iterative-loop","stream-chain"];function x(t){switch(t){case 3:return f;case 2:return[...f,...b];case 1:return[...f,...b,...T];default:return f}}var h="";function F(t){let e=Math.round(t.completedTasks/t.totalTasks*100),o=k(t.elapsedMs),l=k(t.estimatedRemainingMs),a=O(e,30),i=`${s.cyan("Progress:")} ${a} ${e}% | ${s.green(t.completedTasks)}/${t.totalTasks} tasks | ${s.yellow(t.failedTasks)} failed | ${s.blue(t.activeWorkers)} workers | ${o} elapsed, ~${l} remaining`;h&&process.stdout.write("\r"+" ".repeat(h.length)+"\r"),process.stdout.write(i),h=i}function O(t,e){let o=Math.round(t/100*e),l=e-o;return s.green("=".repeat(o))+s.gray("-".repeat(l))}function k(t){if(t<1e3)return`${t}ms`;if(t<6e4)return`${(t/1e3).toFixed(1)}s`;let e=Math.floor(t/6e4),o=Math.round(t%6e4/1e3);return`${e}m ${o}s`}function L(t){let e=[];e.push(""),e.push(s.bold(`Eval Results: ${t.skill}`)+s.dim(` (${t.model})`)),e.push("=".repeat(60));let o=t.passed?s.green("PASSED"):s.red("FAILED");e.push(`Status: ${o}`),e.push(""),e.push(s.bold("Metrics:")),e.push(` Total Tests: ${t.totalTests}`),e.push(` Passed: ${s.green(t.passedTests)}`),e.push(` Failed: ${s.red(t.failedTests)}`),e.push(` Skipped: ${s.yellow(t.skippedTests)}`),e.push(` Pass Rate: ${(t.passRate*100).toFixed(1)}%`),e.push(` Reasoning Avg: ${(t.avgReasoningQuality*100).toFixed(1)}%`),e.push(""),e.push(s.bold("Performance:")),e.push(` Duration: ${k(t.totalDurationMs)}`),e.push(` Workers Used: ${t.workersUsed}`),e.push(` Parallel Speedup: ${s.cyan(t.parallelSpeedup.toFixed(2)+"x")}`),e.push("");let l=t.testResults.filter(a=>!a.passed);if(l.length>0){e.push(s.bold.red("Failed Tests:"));for(let a of l.slice(0,5))e.push(` ${s.red("x")} ${a.testId}`),a.error&&e.push(` ${s.dim(a.error)}`);l.length>5&&e.push(` ... and ${l.length-5} more`),e.push("")}return e.join(`
3
- `)}function M(t){let e=[];e.push("# Skill Evaluation Report"),e.push(""),e.push(`Generated: ${new Date().toISOString()}`),e.push(""),e.push("## Summary"),e.push(""),e.push("| Skill | Model | Pass Rate | Duration | Speedup | Status |"),e.push("|-------|-------|-----------|----------|---------|--------|");for(let o of t){let l=o.passed?"PASSED":"FAILED";e.push(`| ${o.skill} | ${o.model} | ${(o.passRate*100).toFixed(1)}% | ${k(o.totalDurationMs)} | ${o.parallelSpeedup.toFixed(2)}x | ${l} |`)}e.push(""),e.push("## Detailed Results"),e.push("");for(let o of t){e.push(`### ${o.skill} (${o.model})`),e.push(""),e.push(`- **Total Tests:** ${o.totalTests}`),e.push(`- **Passed:** ${o.passedTests}`),e.push(`- **Failed:** ${o.failedTests}`),e.push(`- **Pass Rate:** ${(o.passRate*100).toFixed(1)}%`),e.push(`- **Avg Reasoning Quality:** ${(o.avgReasoningQuality*100).toFixed(1)}%`),e.push("");let l=o.testResults.filter(a=>!a.passed);if(l.length>0){e.push("**Failed Tests:**"),e.push("");for(let a of l)e.push(`- \`${a.testId}\`: ${a.error||"No error message"}`);e.push("")}}return e.join(`
4
- `)}async function $(){let t=await S({sqlite:{dbPath:".agentic-qe/eval-runner.db"},hnsw:{M:16,efConstruction:100,efSearch:50}});await t.initialize();let e=y(t);return{runner:g(e),learner:e,reasoningBank:t}}async function C(t){console.log(s.bold(`
5
- AQE Parallel Eval Runner`)),console.log(s.dim(`ADR-056 Phase 5: Worker Pool Pattern
6
- `));let{runner:e,reasoningBank:o}=await $();try{let l={...w,maxWorkers:t.parallel?t.workers:1,batchSize:t.batchSize,timeout:t.timeout,retryFailedTests:t.retry},a=g(y(o),l);t.verbose&&a.onProgress(F),console.log(s.cyan(`Running eval suite: ${t.skill}`)),console.log(s.cyan(`Model: ${t.model}`)),console.log(s.cyan(`Mode: ${t.parallel?`Parallel (${t.workers} workers)`:"Sequential"}`)),console.log("");let i=await a.runEvalParallel(t.skill,t.model);if(h&&console.log(""),console.log(L(i)),t.output){let r=p.resolve(t.output);d.writeFileSync(r,JSON.stringify(i,null,2)),console.log(s.green(`Results saved to: ${r}`))}process.exit(i.passed?0:1)}finally{await o.dispose()}}async function A(t){console.log(s.bold(`
7
- AQE Parallel Eval Runner - Multi-Skill`)),console.log(s.dim(`ADR-056 Phase 5: Worker Pool Pattern
8
- `));let{runner:e,learner:o,reasoningBank:l}=await $();try{let a=x(t.skillsTier),i=t.models.split(",").map(u=>u.trim());console.log(s.cyan(`Running evals for ${a.length} skills`)),console.log(s.cyan(`Models: ${i.join(", ")}`)),console.log(s.cyan(`Mode: ${t.parallel?`Parallel (${t.workers} workers)`:"Sequential"}`)),console.log("");let r={...w,maxWorkers:t.parallel?t.workers:1},m=await g(o,r).runMultipleEvalsParallel(a,i),c=[];for(let u of m.values())c.push(...u);console.log(s.bold(`
9
- Eval Summary`)),console.log("=".repeat(60));let R=0,v=0;for(let u of c){let P=u.passed?s.green("v"):s.red("x");console.log(`${P} ${u.skill} (${u.model}): ${(u.passRate*100).toFixed(1)}% (${u.parallelSpeedup.toFixed(2)}x speedup)`),u.passed?R++:v++}if(console.log(""),console.log(`Total: ${s.green(R)} passed, ${s.red(v)} failed`),t.output){let u=p.resolve(t.output);(u.endsWith(".md")?"markdown":"json")==="markdown"?d.writeFileSync(u,M(c)):d.writeFileSync(u,JSON.stringify(c,null,2)),console.log(s.green(`
10
- Report saved to: ${u}`))}process.exit(v===0?0:1)}finally{await l.dispose()}}async function D(t){let{learner:e,reasoningBank:o}=await $();try{let l=await e.getSkillConfidence(t);if(!l){console.log(s.yellow(`No validation history found for skill: ${t}`));return}if(console.log(s.bold(`
11
- Skill Validation Status: ${t}`)),console.log("=".repeat(50)),console.log(`Average Score: ${(l.avgScore*100).toFixed(1)}%`),console.log(`Trend: ${l.trend}`),console.log(`Validation Count: ${l.outcomes.length}`),console.log(`Last Updated: ${l.lastUpdated}`),l.byLevel){console.log(`
12
- By Validation Level:`);for(let[i,r]of Object.entries(l.byLevel))r!==void 0&&console.log(` ${i}: ${(r*100).toFixed(1)}%`)}let a=await e.getCrossModelAnalysis(t);if(a&&(console.log(`
13
- Cross-Model Analysis:`),console.log(` Variance: ${(a.variance*100).toFixed(2)}%`),console.log(` Has Anomalies: ${a.hasAnomalies}`),a.models)){console.log(`
14
- Model Performance:`);for(let[i,r]of Object.entries(a.models))console.log(` ${i}: ${(r.passRate*100).toFixed(1)}% pass rate (${r.sampleCount} samples)`)}}finally{await o.dispose()}}async function I(t){let{learner:e,reasoningBank:o}=await $();try{let l=await e.queryValidationPatterns(t.skill,50);if(l.length===0){console.log(s.yellow(`No validation patterns found for skill: ${t.skill}`));return}let a=await e.extractLearnedPatterns(t.skill),i=await e.getValidationTrends(t.skill),r={skill:t.skill,generated:new Date().toISOString(),patternCount:l.length,learnedPatterns:a,trends:i};if(t.format==="json"){let n=JSON.stringify(r,null,2);t.output?(d.writeFileSync(p.resolve(t.output),n),console.log(s.green(`Report saved to: ${t.output}`))):console.log(n)}else{let n=[];if(n.push(`# Validation Report: ${t.skill}`),n.push(""),n.push(`Generated: ${r.generated}`),n.push(""),n.push("## Overview"),n.push(`- Patterns Collected: ${r.patternCount}`),n.push(`- Learned Patterns: ${r.learnedPatterns.length}`),n.push(""),r.trends&&(n.push("## Trends"),n.push(`- Overall: ${r.trends.overall}`),n.push(`- Recent Pass Rate: ${(r.trends.recentPassRate*100).toFixed(1)}%`),n.push("")),r.learnedPatterns.length>0){n.push("## Learned Patterns"),n.push("");for(let c of r.learnedPatterns)n.push(`### ${c.category}`),n.push(`- Confidence: ${(c.confidence*100).toFixed(1)}%`),n.push(`- Observations: ${c.observationCount}`),n.push(`- Models: ${c.models.join(", ")}`),n.push("")}let m=n.join(`
15
- `);t.output?(d.writeFileSync(p.resolve(t.output),m),console.log(s.green(`Report saved to: ${t.output}`))):console.log(m)}}finally{await o.dispose()}}function G(){let t=new E("eval").description("Run skill evaluation suites in parallel");return t.command("run").description("Run eval suite for a single skill").requiredOption("-s, --skill <skill>","Skill name to evaluate").requiredOption("-m, --model <model>","Model to use (e.g., claude-3.5-sonnet)").option("-p, --parallel","Enable parallel execution",!1).option("-w, --workers <n>","Number of parallel workers",parseInt,5).option("-b, --batch-size <n>","Test cases per batch",parseInt,4).option("-t, --timeout <ms>","Timeout per test case (ms)",parseInt,3e4).option("--no-retry","Disable retry of failed tests").option("-o, --output <path>","Output file path for results").option("-v, --verbose","Show progress during execution",!1).action(async e=>{await C({skill:e.skill,model:e.model,parallel:e.parallel,workers:e.workers,batchSize:e.batchSize,timeout:e.timeout,retry:e.retry!==!1,output:e.output,verbose:e.verbose})}),t.command("run-all").description("Run eval suites for multiple skills").option("--skills-tier <tier>","Skill tier (1=all, 2=P0+P1, 3=P0 only)",parseInt,3).option("--models <models>","Comma-separated models to test","claude-3.5-sonnet").option("-p, --parallel","Enable parallel execution",!0).option("-w, --workers <n>","Number of parallel workers",parseInt,5).option("-o, --output <path>","Output file path for report").option("-v, --verbose","Show progress during execution",!1).action(async e=>{await A({skillsTier:e.skillsTier,models:e.models,parallel:e.parallel,workers:e.workers,output:e.output,verbose:e.verbose})}),t.command("status").description("Show validation status for a skill").requiredOption("-s, --skill <skill>","Skill name").action(async e=>{await D(e.skill)}),t.command("report").description("Generate validation report for a skill").requiredOption("-s, --skill <skill>","Skill name").option("-f, --format <format>","Output format (json|markdown)","markdown").option("-o, --output <path>","Output file path").action(async e=>{await I({skill:e.skill,format:e.format,output:e.output})}),t}export{G as createEvalCommand};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,c as b}from"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";b();export{a as HnswAdapter};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b,c,d}from"./chunk-RQFC7Q33.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-JNLKLC7G.js";import"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";export{a as DEFAULT_HNSW_CONFIG,b as HNSWIndex,d as benchmarkHNSW,c as createHNSWIndex};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a}from"./chunk-GM5LIYLQ.js";import"./chunk-GOTVYSX5.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-DMZ2XOEW.js";import"./chunk-PZF5ISGS.js";import"./chunk-UIYK4Y7K.js";import"./chunk-G7L5Y47C.js";import"./chunk-DGUAAG3T.js";export{a as ImpactAnalyzerService};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{a,b,c,d}from"./chunk-CGC63MGF.js";import"./chunk-HEQ23HBE.js";import"./chunk-BFCMOMOF.js";import"./chunk-XH774Q6Z.js";import"./chunk-L4IWZXSC.js";import"./chunk-DMZ2XOEW.js";import"./chunk-UIYK4Y7K.js";import"./chunk-LWHM5BWD.js";import"./chunk-J3XLOHWB.js";import"./chunk-DGUAAG3T.js";export{a as InitOrchestrator,b as createInitOrchestrator,d as formatInitResult,c as quickInit};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{v as a,w as b}from"./chunk-6EZY2QVM.js";import"./chunk-JWR3NNFA.js";import"./chunk-J5YOXLBD.js";import"./chunk-I3P5RZX7.js";import"./chunk-UG2J5MTY.js";import"./chunk-G2KQUEGY.js";import"./chunk-KHUFPJE4.js";import"./chunk-5DWTU2PN.js";import"./chunk-36YZ2UF7.js";import"./chunk-VDGWDDBC.js";import"./chunk-D6APBW66.js";import"./chunk-FNKKENLS.js";import"./chunk-2L3BCZ2F.js";import"./chunk-GIUAY4KY.js";import"./chunk-M2RO34V7.js";import"./chunk-HJJTLOWZ.js";import"./chunk-JOGKQTCU.js";import"./chunk-7732TLNA.js";import"./chunk-3PZ2QLLR.js";import"./chunk-NWZYULZE.js";import"./chunk-KJB6JJXT.js";import"./chunk-S2KGZJHS.js";import"./chunk-NWGIQCCO.js";import"./chunk-UAAJ3BSW.js";import"./chunk-ZCRLZINE.js";import"./chunk-SUXUCW5I.js";import"./chunk-TJOZPPZR.js";import"./chunk-VQB5SLXZ.js";import"./chunk-6BK2S2WE.js";import"./chunk-ME4LVBTT.js";import"./chunk-5MWNXDQW.js";import"./chunk-RQFC7Q33.js";import"./chunk-4EQWK6P6.js";import"./chunk-GM5LIYLQ.js";import"./chunk-GOTVYSX5.js";import"./chunk-ZU5VHW6P.js";import"./chunk-SY74WSBN.js";import"./chunk-Y3X5OS5F.js";import"./chunk-WSZVSDD5.js";import"./chunk-KVN4FTPT.js";import"./chunk-ZFZX2NA3.js";import"./chunk-UUVTLYUK.js";import"./chunk-4RDTR2OF.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-VU4QET3C.js";import"./chunk-DMZ2XOEW.js";import"./chunk-PZF5ISGS.js";import"./chunk-UIYK4Y7K.js";import"./chunk-G7L5Y47C.js";import"./chunk-A2TK52B2.js";import"./chunk-LWHM5BWD.js";import"./chunk-J3XLOHWB.js";import"./chunk-JN7MSUGK.js";import"./chunk-JNLKLC7G.js";import"./chunk-WE7KIXR6.js";import"./chunk-XNXOW7BQ.js";import"./chunk-JLBUS4PR.js";import"./chunk-DJLRRCR6.js";import"./chunk-5HUAD4UU.js";import"./chunk-DGUAAG3T.js";export{a as QEKernelImpl,b as createKernel};
@@ -1,2 +0,0 @@
1
- import{createRequire as __cr}from"module";const require=__cr(import.meta.url);if(process.argv.includes('--version')||process.argv.includes('-v')){console.log("3.9.14");process.exit(0)}
2
- import{g as a}from"./chunk-GOTVYSX5.js";import"./chunk-STYNOKOM.js";import"./chunk-GUEK5DOK.js";import"./chunk-DMZ2XOEW.js";import"./chunk-PZF5ISGS.js";import"./chunk-UIYK4Y7K.js";import"./chunk-DGUAAG3T.js";export{a as KnowledgeGraphService};