workflow-ai 1.0.63 → 1.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (576) hide show
  1. package/README.md +239 -145
  2. package/configs/agent-health-rules.yaml +64 -0
  3. package/configs/config.yaml +134 -0
  4. package/configs/pipeline.yaml +901 -0
  5. package/configs/ticket-movement-rules.yaml +80 -0
  6. package/package.json +1 -1
  7. package/src/global-dir.mjs +25 -1
  8. package/src/init.mjs +20 -3
  9. package/src/lib/agent-health-registry.mjs +245 -0
  10. package/src/lib/artifact-snapshot.mjs +233 -0
  11. package/src/lib/error-classifier.mjs +274 -0
  12. package/src/lib/test-error-classifier.mjs +60 -0
  13. package/src/lib/test-extends.mjs +58 -0
  14. package/src/lib/test-version.mjs +21 -0
  15. package/src/scripts/move-to-review.js +5 -7
  16. package/src/scripts/reset-agent-health.js +62 -0
  17. package/src/scripts/run-skill-tests.js +348 -136
  18. package/src/skills/analyze-report/README.md +44 -0
  19. package/src/skills/analyze-report/SKILL.md +121 -0
  20. package/src/skills/analyze-report/algorithms/progress-assessment.md +108 -0
  21. package/src/skills/analyze-report/knowledge/analysis-frameworks.md +66 -0
  22. package/src/skills/analyze-report/knowledge/report-structure.md +61 -0
  23. package/src/skills/analyze-report/scripts/calc-plan-metrics.js +234 -0
  24. package/src/skills/analyze-report/templates/analysis-report.md +80 -0
  25. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-1.md +69 -0
  26. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-2.md +103 -0
  27. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-3.md +99 -0
  28. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/judge.json +163 -0
  29. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-1.md +89 -0
  30. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-2.md +88 -0
  31. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-3.md +100 -0
  32. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-1.md +77 -0
  33. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-2.md +64 -0
  34. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-3.md +110 -0
  35. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-1.md +74 -0
  36. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-2.md +38 -0
  37. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-3.md +61 -0
  38. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/meta.json +115 -0
  39. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001-evidence-from-log.yaml +60 -0
  40. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-1.md +90 -0
  41. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-2.md +89 -0
  42. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-3.md +77 -0
  43. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/judge.json +163 -0
  44. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-1.md +84 -0
  45. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-2.md +77 -0
  46. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-3.md +89 -0
  47. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-1.md +103 -0
  48. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-2.md +103 -0
  49. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-3.md +103 -0
  50. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-1.md +93 -0
  51. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-2.md +93 -0
  52. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-3.md +86 -0
  53. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/meta.json +115 -0
  54. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002-result-block-format.yaml +44 -0
  55. package/src/skills/analyze-report/tests/fixtures/REPORT-002-incorrect-attribution.md +27 -0
  56. package/src/skills/analyze-report/tests/fixtures/pipeline-2026-04-06_qa-001-skip.log +32 -0
  57. package/src/skills/analyze-report/tests/index.yaml +25 -0
  58. package/src/skills/analyze-report/tests/rubrics/evidence-from-log.md +22 -0
  59. package/src/skills/analyze-report/tests/rubrics/result-block-format.md +22 -0
  60. package/src/skills/analyze-report/workflows/progress.md +158 -0
  61. package/src/skills/analyze-report/workflows/retrospective.md +143 -0
  62. package/src/skills/coach/README.md +43 -0
  63. package/src/skills/coach/SKILL.md +167 -0
  64. package/src/skills/coach/SKILL.md.legacy +157 -0
  65. package/src/skills/coach/algorithms/gap-analysis.md +69 -0
  66. package/src/skills/coach/algorithms/improvement-prioritization.md +62 -0
  67. package/src/skills/coach/algorithms/skill-scoring.md +80 -0
  68. package/src/skills/coach/knowledge/audit-applied-changes-clean.txt +11 -0
  69. package/src/skills/coach/knowledge/backlog-management.md +67 -0
  70. package/src/skills/coach/knowledge/backlog-management.md.legacy +90 -0
  71. package/src/skills/coach/knowledge/common-antipatterns.md +76 -0
  72. package/src/skills/coach/knowledge/prompt-engineering.md +45 -0
  73. package/src/skills/coach/knowledge/shared-knowledge-guide.md +44 -0
  74. package/src/skills/coach/knowledge/skill-anatomy.md +49 -0
  75. package/src/skills/coach/knowledge/test-authorship.md +141 -0
  76. package/src/skills/coach/templates/audit-report.md +39 -0
  77. package/src/skills/coach/templates/coach-backlog-init.yaml +14 -0
  78. package/src/skills/coach/templates/coach-backlog-init.yaml.legacy +10 -0
  79. package/src/skills/coach/templates/improvement-plan.md +42 -0
  80. package/src/skills/coach/templates/new-skill.md +95 -0
  81. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-1.md +58 -0
  82. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-2.md +65 -0
  83. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-3.md +58 -0
  84. package/src/skills/coach/tests/cases/TC-COACH-001/current/judge.json +151 -0
  85. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-1.md +46 -0
  86. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-2.md +0 -0
  87. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-3.md +75 -0
  88. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-1.md +81 -0
  89. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-2.md +101 -0
  90. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-3.md +91 -0
  91. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-1.md +48 -0
  92. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-2.md +30 -0
  93. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-3.md +55 -0
  94. package/src/skills/coach/tests/cases/TC-COACH-001/current/meta.json +94 -0
  95. package/src/skills/coach/tests/cases/TC-COACH-001-evidence-based-temporal-diagram.yaml +53 -0
  96. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-1.md +46 -0
  97. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-2.md +50 -0
  98. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-3.md +48 -0
  99. package/src/skills/coach/tests/cases/TC-COACH-002/current/judge.json +151 -0
  100. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-1.md +0 -0
  101. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-2.md +37 -0
  102. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-3.md +30 -0
  103. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-1.md +23 -0
  104. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-2.md +29 -0
  105. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-3.md +35 -0
  106. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-1.md +13 -0
  107. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-2.md +19 -0
  108. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-3.md +33 -0
  109. package/src/skills/coach/tests/cases/TC-COACH-002/current/meta.json +94 -0
  110. package/src/skills/coach/tests/cases/TC-COACH-002-root-cause-first.yaml +57 -0
  111. package/src/skills/coach/tests/fixtures/pipeline-2026-04-06_id-collision.log +77 -0
  112. package/src/skills/coach/tests/index.yaml +29 -0
  113. package/src/skills/coach/tests/rubrics/calibration/evidence-based-bad.md +13 -0
  114. package/src/skills/coach/tests/rubrics/calibration/evidence-based-good.md +29 -0
  115. package/src/skills/coach/tests/rubrics/evidence-based.md +26 -0
  116. package/src/skills/coach/tests/rubrics/root-cause-first.md +21 -0
  117. package/src/skills/coach/workflows/analyze.md +79 -0
  118. package/src/skills/coach/workflows/analyze.md.legacy +64 -0
  119. package/src/skills/coach/workflows/audit.md +74 -0
  120. package/src/skills/coach/workflows/audit.md.legacy +59 -0
  121. package/src/skills/coach/workflows/create.md +80 -0
  122. package/src/skills/coach/workflows/create.md.legacy +67 -0
  123. package/src/skills/coach/workflows/improve.md +71 -0
  124. package/src/skills/coach/workflows/improve.md.legacy +60 -0
  125. package/src/skills/coach/workflows/research.md +55 -0
  126. package/src/skills/coach/workflows/review.md +52 -0
  127. package/src/skills/coach/workflows/review.md.legacy +48 -0
  128. package/src/skills/coach/workflows/test.md +97 -0
  129. package/src/skills/create-plan/README.md +39 -0
  130. package/src/skills/create-plan/SKILL.md +104 -0
  131. package/src/skills/create-plan/algorithms/risk-assessment.md +73 -0
  132. package/src/skills/create-plan/knowledge/plan-completeness.md +67 -0
  133. package/src/skills/create-plan/knowledge/plan-lifecycle.md +33 -0
  134. package/src/skills/create-plan/knowledge/task-verification-pairs.md +151 -0
  135. package/src/skills/create-plan/scripts/validate-completeness.js +182 -0
  136. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-1.md +5 -0
  137. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-2.md +39 -0
  138. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-3.md +35 -0
  139. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/judge.json +167 -0
  140. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-1.md +5 -0
  141. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-2.md +10 -0
  142. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-3.md +5 -0
  143. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-1.md +26 -0
  144. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-2.md +86 -0
  145. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-3.md +5 -0
  146. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-1.md +11 -0
  147. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-2.md +15 -0
  148. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-3.md +14 -0
  149. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/meta.json +119 -0
  150. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001-validate-completeness.yaml +41 -0
  151. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-1.md +25 -0
  152. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-2.md +30 -0
  153. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-3.md +37 -0
  154. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/judge.json +164 -0
  155. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-1.md +3 -0
  156. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-2.md +11 -0
  157. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-3.md +13 -0
  158. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-1.md +44 -0
  159. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-2.md +5 -0
  160. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-3.md +49 -0
  161. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-1.md +6 -0
  162. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-2.md +11 -0
  163. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-3.md +16 -0
  164. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/meta.json +116 -0
  165. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002-task-granularity.yaml +39 -0
  166. package/src/skills/create-plan/tests/index.yaml +25 -0
  167. package/src/skills/create-plan/tests/rubrics/task-granularity.md +21 -0
  168. package/src/skills/create-plan/tests/rubrics/validate-completeness.md +21 -0
  169. package/src/skills/create-plan/workflows/create.md +136 -0
  170. package/src/skills/create-report/README.md +40 -0
  171. package/src/skills/create-report/SKILL.md +73 -0
  172. package/src/skills/create-report/algorithms/metric-calculation.md +93 -0
  173. package/src/skills/create-report/knowledge/report-metrics.md +82 -0
  174. package/src/skills/create-report/scripts/calc-metrics.js +383 -0
  175. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-1.md +25 -0
  176. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-2.md +26 -0
  177. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-3.md +28 -0
  178. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/judge.json +163 -0
  179. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-1.md +4 -0
  180. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-2.md +3 -0
  181. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-3.md +6 -0
  182. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-1.md +8 -0
  183. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-2.md +12 -0
  184. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-3.md +7 -0
  185. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-1.md +12 -0
  186. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-2.md +22 -0
  187. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-3.md +13 -0
  188. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/meta.json +115 -0
  189. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001-root-cause-attribution.yaml +57 -0
  190. package/src/skills/create-report/tests/index.yaml +20 -0
  191. package/src/skills/create-report/tests/rubrics/root-cause-attribution.md +21 -0
  192. package/src/skills/create-report/workflows/standard.md +175 -0
  193. package/src/skills/decompose-gaps/README.md +39 -0
  194. package/src/skills/decompose-gaps/SKILL.md +78 -0
  195. package/src/skills/decompose-gaps/algorithms/scope-check.md +110 -0
  196. package/src/skills/decompose-gaps/knowledge/scope-validation.md +65 -0
  197. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-1.md +41 -0
  198. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-2.md +41 -0
  199. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-3.md +56 -0
  200. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/judge.json +164 -0
  201. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-1.md +25 -0
  202. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-2.md +17 -0
  203. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-3.md +22 -0
  204. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-1.md +25 -0
  205. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-2.md +5 -0
  206. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-3.md +29 -0
  207. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-1.md +27 -0
  208. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-2.md +35 -0
  209. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-3.md +18 -0
  210. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/meta.json +116 -0
  211. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001-scope-exclusion.yaml +46 -0
  212. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-1.md +27 -0
  213. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-2.md +30 -0
  214. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-3.md +27 -0
  215. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/judge.json +163 -0
  216. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-1.md +0 -0
  217. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-2.md +15 -0
  218. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-3.md +7 -0
  219. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-1.md +21 -0
  220. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-2.md +38 -0
  221. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-3.md +16 -0
  222. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-1.md +5 -0
  223. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-2.md +10 -0
  224. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-3.md +9 -0
  225. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/meta.json +115 -0
  226. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002-glob-before-write.yaml +36 -0
  227. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-1.md +30 -0
  228. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-2.md +30 -0
  229. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-3.md +30 -0
  230. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/judge.json +165 -0
  231. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-1.md +5 -0
  232. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-2.md +26 -0
  233. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-3.md +5 -0
  234. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-1.md +39 -0
  235. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-2.md +37 -0
  236. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-3.md +45 -0
  237. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-1.md +26 -0
  238. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-2.md +27 -0
  239. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-3.md +7 -0
  240. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/meta.json +117 -0
  241. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003-parent-plan-mandatory.yaml +41 -0
  242. package/src/skills/decompose-gaps/tests/index.yaml +30 -0
  243. package/src/skills/decompose-gaps/tests/rubrics/glob-before-write.md +21 -0
  244. package/src/skills/decompose-gaps/tests/rubrics/parent-plan-mandatory.md +22 -0
  245. package/src/skills/decompose-gaps/tests/rubrics/scope-exclusion.md +21 -0
  246. package/src/skills/decompose-gaps/workflows/decompose.md +123 -0
  247. package/src/skills/decompose-plan/README.md +43 -0
  248. package/src/skills/decompose-plan/SKILL.md +87 -0
  249. package/src/skills/decompose-plan/algorithms/deduplication.md +101 -0
  250. package/src/skills/decompose-plan/knowledge/atomicity-checklist.md +139 -0
  251. package/src/skills/decompose-plan/knowledge/capabilities.md +68 -0
  252. package/src/skills/decompose-plan/knowledge/human-task-rules.md +82 -0
  253. package/src/skills/decompose-plan/knowledge/scope-guard-checklist.md +73 -0
  254. package/src/skills/decompose-plan/scripts/check-atomicity-limit.js +47 -0
  255. package/src/skills/decompose-plan/scripts/check-duplicates.js +323 -0
  256. package/src/skills/decompose-plan/scripts/verify-atomicity.js +408 -0
  257. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-1.md +30 -0
  258. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-2.md +36 -0
  259. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-3.md +37 -0
  260. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/judge.json +163 -0
  261. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-1.md +20 -0
  262. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-2.md +17 -0
  263. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-3.md +28 -0
  264. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-1.md +114 -0
  265. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-2.md +137 -0
  266. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-3.md +188 -0
  267. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-1.md +0 -0
  268. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-2.md +32 -0
  269. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-3.md +110 -0
  270. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/meta.json +115 -0
  271. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001-atomicity-no-1to1.yaml +56 -0
  272. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-1.md +47 -0
  273. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-2.md +54 -0
  274. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-3.md +43 -0
  275. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/judge.json +163 -0
  276. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-1.md +15 -0
  277. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-2.md +5 -0
  278. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-3.md +12 -0
  279. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-1.md +34 -0
  280. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-2.md +30 -0
  281. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-3.md +35 -0
  282. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-1.md +0 -0
  283. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-2.md +31 -0
  284. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-3.md +0 -0
  285. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/meta.json +115 -0
  286. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002-get-next-id-mandatory.yaml +44 -0
  287. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-1.md +21 -0
  288. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-2.md +38 -0
  289. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-3.md +30 -0
  290. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/judge.json +163 -0
  291. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-1.md +31 -0
  292. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-2.md +35 -0
  293. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-3.md +48 -0
  294. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-1.md +167 -0
  295. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-2.md +62 -0
  296. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-3.md +174 -0
  297. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-1.md +0 -0
  298. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-2.md +0 -0
  299. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-3.md +0 -0
  300. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/meta.json +115 -0
  301. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003-verbatim-dod-transfer.yaml +42 -0
  302. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-1.md +55 -0
  303. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-2.md +49 -0
  304. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-3.md +49 -0
  305. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/judge.json +163 -0
  306. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-1.md +104 -0
  307. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-2.md +45 -0
  308. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-3.md +58 -0
  309. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-1.md +193 -0
  310. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-2.md +202 -0
  311. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-3.md +155 -0
  312. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-1.md +52 -0
  313. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-2.md +17 -0
  314. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-3.md +0 -0
  315. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/meta.json +115 -0
  316. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004-executor-atomicity.yaml +64 -0
  317. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-1.md +59 -0
  318. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-2.md +204 -0
  319. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-3.md +213 -0
  320. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/judge.json +163 -0
  321. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-1.md +0 -0
  322. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-2.md +57 -0
  323. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-3.md +54 -0
  324. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-1.md +147 -0
  325. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-2.md +165 -0
  326. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-3.md +133 -0
  327. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-1.md +81 -0
  328. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-2.md +108 -0
  329. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-3.md +3 -0
  330. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/meta.json +114 -0
  331. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005-capabilities-registry.yaml +78 -0
  332. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-1.md +225 -0
  333. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-2.md +66 -0
  334. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-3.md +36 -0
  335. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/judge.json +163 -0
  336. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-1.md +42 -0
  337. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-2.md +67 -0
  338. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-3.md +40 -0
  339. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-1.md +122 -0
  340. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-2.md +131 -0
  341. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-3.md +138 -0
  342. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-1.md +41 -0
  343. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-2.md +88 -0
  344. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-3.md +0 -0
  345. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/meta.json +115 -0
  346. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006-dod-threshold.yaml +72 -0
  347. package/src/skills/decompose-plan/tests/index.yaml +45 -0
  348. package/src/skills/decompose-plan/tests/rubrics/atomicity-no-1to1.md +21 -0
  349. package/src/skills/decompose-plan/tests/rubrics/capabilities-registry.md +21 -0
  350. package/src/skills/decompose-plan/tests/rubrics/dod-threshold.md +21 -0
  351. package/src/skills/decompose-plan/tests/rubrics/executor-atomicity.md +21 -0
  352. package/src/skills/decompose-plan/tests/rubrics/get-next-id-mandatory.md +21 -0
  353. package/src/skills/decompose-plan/tests/rubrics/verbatim-dod-transfer.md +21 -0
  354. package/src/skills/decompose-plan/workflows/decompose.md +305 -0
  355. package/src/skills/deep-research/README.md +36 -0
  356. package/src/skills/deep-research/SKILL.md +106 -0
  357. package/src/skills/deep-research/algorithms/source-scoring.md +63 -0
  358. package/src/skills/deep-research/algorithms/synthesis.md +67 -0
  359. package/src/skills/deep-research/knowledge/data-validation.md +44 -0
  360. package/src/skills/deep-research/knowledge/perplexity-config.md +30 -0
  361. package/src/skills/deep-research/knowledge/research-methodology.md +54 -0
  362. package/src/skills/deep-research/knowledge/source-evaluation.md +33 -0
  363. package/src/skills/deep-research/scripts/perplexity-research.js +315 -0
  364. package/src/skills/deep-research/templates/brief-summary.md +25 -0
  365. package/src/skills/deep-research/templates/research-report.md +76 -0
  366. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-1.md +48 -0
  367. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-2.md +88 -0
  368. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-3.md +56 -0
  369. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/judge.json +163 -0
  370. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-1.md +58 -0
  371. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-2.md +249 -0
  372. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-3.md +44 -0
  373. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-1.md +96 -0
  374. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-2.md +56 -0
  375. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-3.md +94 -0
  376. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-1.md +11 -0
  377. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-2.md +1 -0
  378. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-3.md +1 -0
  379. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/meta.json +115 -0
  380. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001-self-check-url.yaml +58 -0
  381. package/src/skills/deep-research/tests/index.yaml +20 -0
  382. package/src/skills/deep-research/tests/rubrics/self-check-url.md +34 -0
  383. package/src/skills/deep-research/workflows/base-checklist.md +19 -0
  384. package/src/skills/deep-research/workflows/benchmark.md +38 -0
  385. package/src/skills/deep-research/workflows/competitor.md +44 -0
  386. package/src/skills/deep-research/workflows/custom.md +32 -0
  387. package/src/skills/deep-research/workflows/market.md +44 -0
  388. package/src/skills/deep-research/workflows/technology.md +40 -0
  389. package/src/skills/deep-research/workflows/trend.md +40 -0
  390. package/src/skills/execute-task/README.md +44 -0
  391. package/src/skills/execute-task/SKILL.md +292 -0
  392. package/src/skills/execute-task/algorithms/execution-strategy.md +136 -0
  393. package/src/skills/execute-task/knowledge/context-checkpoints.md +75 -0
  394. package/src/skills/execute-task/knowledge/ticket-structure.md +70 -0
  395. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-1.md +5 -0
  396. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-2.md +5 -0
  397. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-3.md +5 -0
  398. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/judge.json +124 -0
  399. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-1.md +4 -0
  400. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-2.md +4 -0
  401. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-3.md +4 -0
  402. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-1.md +4 -0
  403. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-2.md +4 -0
  404. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-3.md +11 -0
  405. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/meta.json +88 -0
  406. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001-no-ticket-creation.yaml +48 -0
  407. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-1.md +5 -0
  408. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-2.md +6 -0
  409. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-3.md +5 -0
  410. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/judge.json +124 -0
  411. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-1.md +4 -0
  412. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-2.md +4 -0
  413. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-3.md +8 -0
  414. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-1.md +9 -0
  415. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-2.md +26 -0
  416. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-3.md +4 -0
  417. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/meta.json +89 -0
  418. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002-no-duplicate-dod.yaml +44 -0
  419. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-1.md +5 -0
  420. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-2.md +5 -0
  421. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-3.md +5 -0
  422. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/judge.json +46 -0
  423. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/meta.json +37 -0
  424. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003-verification-proportionality.yaml +46 -0
  425. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-1.md +18 -0
  426. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-2.md +16 -0
  427. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-3.md +14 -0
  428. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/judge.json +124 -0
  429. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-1.md +5 -0
  430. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-2.md +5 -0
  431. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-3.md +1 -0
  432. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-1.md +8 -0
  433. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-2.md +5 -0
  434. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-3.md +4 -0
  435. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/meta.json +89 -0
  436. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004-no-foreign-ticket-edit.yaml +50 -0
  437. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-1.md +5 -0
  438. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-2.md +5 -0
  439. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-3.md +5 -0
  440. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/judge.json +124 -0
  441. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-1.md +15 -0
  442. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-2.md +4 -0
  443. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-3.md +5 -0
  444. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-1.md +11 -0
  445. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-2.md +11 -0
  446. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-3.md +4 -0
  447. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/meta.json +88 -0
  448. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005-ticket-fields-updated.yaml +39 -0
  449. package/src/skills/execute-task/tests/fixtures/IMPL-902-create-file.md +41 -0
  450. package/src/skills/execute-task/tests/fixtures/IMPL-904-current-task.md +40 -0
  451. package/src/skills/execute-task/tests/fixtures/IMPL-906-fill-ticket.md +42 -0
  452. package/src/skills/execute-task/tests/fixtures/QA-901-button-click.md +41 -0
  453. package/src/skills/execute-task/tests/fixtures/QA-903-visual-figma.md +40 -0
  454. package/src/skills/execute-task/tests/fixtures/TASK-905-done-with-typo.md +36 -0
  455. package/src/skills/execute-task/tests/index.yaml +39 -0
  456. package/src/skills/execute-task/tests/rubrics/no-duplicate-dod.md +22 -0
  457. package/src/skills/execute-task/tests/rubrics/no-foreign-ticket-edit.md +20 -0
  458. package/src/skills/execute-task/tests/rubrics/no-ticket-creation.md +21 -0
  459. package/src/skills/execute-task/tests/rubrics/ticket-fields-updated.md +23 -0
  460. package/src/skills/execute-task/tests/rubrics/verification-proportionality.md +22 -0
  461. package/src/skills/execute-task/workflows/execute.md +104 -0
  462. package/src/skills/manual-testing/README.md +63 -0
  463. package/src/skills/manual-testing/SKILL.md +176 -0
  464. package/src/skills/manual-testing/algorithms/blocked-tool-strategy.md +74 -0
  465. package/src/skills/manual-testing/algorithms/bug-severity.md +73 -0
  466. package/src/skills/manual-testing/algorithms/mcp-budget.md +97 -0
  467. package/src/skills/manual-testing/algorithms/test-prioritization.md +69 -0
  468. package/src/skills/manual-testing/knowledge/browser-extension-testing.md +102 -0
  469. package/src/skills/manual-testing/knowledge/browser-tools.md +114 -0
  470. package/src/skills/manual-testing/knowledge/desktop-tools-advanced.md +92 -0
  471. package/src/skills/manual-testing/knowledge/desktop-tools-core.md +76 -0
  472. package/src/skills/manual-testing/knowledge/sandbox-advanced.md +83 -0
  473. package/src/skills/manual-testing/knowledge/sandbox-core.md +67 -0
  474. package/src/skills/manual-testing/knowledge/stateful-edge-cases.md +69 -0
  475. package/src/skills/manual-testing/knowledge/test-case-design.md +107 -0
  476. package/src/skills/manual-testing/knowledge/testing-types.md +45 -0
  477. package/src/skills/manual-testing/templates/bug-report.md +52 -0
  478. package/src/skills/manual-testing/templates/test-case.md +34 -0
  479. package/src/skills/manual-testing/templates/test-plan.md +97 -0
  480. package/src/skills/manual-testing/templates/test-session-report.md +56 -0
  481. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-1.md +34 -0
  482. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-2.md +32 -0
  483. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-3.md +30 -0
  484. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/judge.json +163 -0
  485. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-1.md +0 -0
  486. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-2.md +7 -0
  487. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-3.md +0 -0
  488. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-1.md +4 -0
  489. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-2.md +15 -0
  490. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-3.md +8 -0
  491. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-1.md +5 -0
  492. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-2.md +7 -0
  493. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-3.md +7 -0
  494. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/meta.json +114 -0
  495. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001-sandbox-mandatory.yaml +38 -0
  496. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-1.md +44 -0
  497. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-2.md +32 -0
  498. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-3.md +47 -0
  499. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/judge.json +163 -0
  500. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-1.md +19 -0
  501. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-2.md +15 -0
  502. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-3.md +24 -0
  503. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-1.md +19 -0
  504. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-2.md +13 -0
  505. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-3.md +18 -0
  506. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-1.md +21 -0
  507. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-2.md +15 -0
  508. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-3.md +14 -0
  509. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/meta.json +114 -0
  510. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002-visual-tc-screenshot.yaml +37 -0
  511. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-1.md +76 -0
  512. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-2.md +71 -0
  513. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-3.md +85 -0
  514. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/judge.json +46 -0
  515. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/meta.json +36 -0
  516. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml +65 -0
  517. package/src/skills/manual-testing/tests/index.yaml +30 -0
  518. package/src/skills/manual-testing/tests/last-run-tc001-sonnet.log +140 -0
  519. package/src/skills/manual-testing/tests/last-run-tc002.log +1 -0
  520. package/src/skills/manual-testing/tests/last-run.log +1469 -0
  521. package/src/skills/manual-testing/tests/rubrics/qa-non-ui-assertion.md +31 -0
  522. package/src/skills/manual-testing/tests/rubrics/sandbox-mandatory.md +20 -0
  523. package/src/skills/manual-testing/tests/rubrics/visual-tc-screenshot.md +21 -0
  524. package/src/skills/manual-testing/workflows/acceptance.md +80 -0
  525. package/src/skills/manual-testing/workflows/exploratory.md +84 -0
  526. package/src/skills/manual-testing/workflows/regression.md +76 -0
  527. package/src/skills/manual-testing/workflows/smoke.md +109 -0
  528. package/src/skills/manual-testing/workflows/test-plan.md +75 -0
  529. package/src/skills/review-result/README.md +59 -0
  530. package/src/skills/review-result/SKILL.md +138 -0
  531. package/src/skills/review-result/algorithms/verification.md +112 -0
  532. package/src/skills/review-result/knowledge/dod-patterns.md +115 -0
  533. package/src/skills/review-result/scripts/verify-artifacts.js +384 -0
  534. package/src/skills/review-result/templates/verdict.md +153 -0
  535. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-1.md +22 -0
  536. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-2.md +7 -0
  537. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-3.md +21 -0
  538. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-1.md +6 -0
  539. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-2.md +6 -0
  540. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-3.md +18 -0
  541. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/judge.json +164 -0
  542. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-1.md +5 -0
  543. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-2.md +7 -0
  544. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-3.md +6 -0
  545. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-1.md +49 -0
  546. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-2.md +28 -0
  547. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-3.md +37 -0
  548. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-1.md +22 -0
  549. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-2.md +13 -0
  550. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-3.md +21 -0
  551. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/meta.json +116 -0
  552. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001-visual-tc-trigger.yaml +51 -0
  553. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-1.md +23 -0
  554. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-2.md +22 -0
  555. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-3.md +28 -0
  556. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-1.md +4 -0
  557. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-2.md +36 -0
  558. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-3.md +4 -0
  559. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/judge.json +163 -0
  560. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-1.md +4 -0
  561. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-2.md +0 -0
  562. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-3.md +4 -0
  563. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-1.md +39 -0
  564. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-2.md +25 -0
  565. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-3.md +32 -0
  566. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-1.md +34 -0
  567. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-2.md +8 -0
  568. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-3.md +23 -0
  569. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/meta.json +115 -0
  570. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002-path-line-suffix.yaml +39 -0
  571. package/src/skills/review-result/tests/fixtures/IMPL-902-path-with-line.md +43 -0
  572. package/src/skills/review-result/tests/fixtures/QA-901-visual-button.md +46 -0
  573. package/src/skills/review-result/tests/index.yaml +25 -0
  574. package/src/skills/review-result/tests/rubrics/path-line-suffix.md +19 -0
  575. package/src/skills/review-result/tests/rubrics/visual-tc-trigger.md +19 -0
  576. package/src/skills/review-result/workflows/review.md +209 -0
@@ -0,0 +1,65 @@
1
+ id: TC-MANUAL-TESTING-003
2
+ title: "QA-тикет с не-UI инвариантом (ФС/данные) — выполняется, не BLOCKED"
3
+ origin:
4
+ chg: [CHG-081]
5
+ incidents:
6
+ - "workflowAi: QA-23 (PLAN-007, 2026-04-21) — manual-testing вернул BLOCKED на задачу добавить ассерты на existsSync для .workflow/state/ и agent-health-rules.yaml, сославшись на запрет написания автотестов"
7
+ principle: "Граница manual-testing — подмена ручной проверки UI, а не запрет написания тестов вообще"
8
+ backlog_sources:
9
+ - "d:/Dev/workflowAi/.workflow/coach-backlog.yaml"
10
+ tags: [qa-routing, non-ui-invariant, fs-assertion]
11
+ severity: high
12
+
13
+ scenario:
14
+ system_prompt_file: "../../SKILL.md"
15
+ extra_instructions: |
16
+ Ты — исполнитель manual-testing. Тебе передан QA-тикет:
17
+
18
+ ---
19
+ id: QA-NNN
20
+ title: Добавить ассерты в src/tests/example.test.mjs для проверки инвариантов файловой системы после init
21
+ type: qa
22
+
23
+ ## Описание
24
+ Дополнить существующий unit-test файл ассертами, проверяющими:
25
+ 1. После вызова initProject существует директория `.workflow/state/`:
26
+ `assert.ok(fs.existsSync(path.join(projectRoot, '.workflow', 'state')))`
27
+ 2. После вызова initProject существует файл `.workflow/config/agent-health-rules.yaml` с непустым содержимым:
28
+ `assert.ok(fs.existsSync(rulesPath) && fs.readFileSync(rulesPath, 'utf8').length > 0)`
29
+ 3. Существующие тесты остаются зелёными (`node --test` → 0 failures).
30
+
31
+ ## Критерии готовности
32
+ - Ассерт (1) присутствует в тест-файле и проходит
33
+ - Ассерт (2) присутствует в тест-файле и проходит
34
+ - Весь suite зелёный
35
+
36
+ Директория `../shared/` в проекте отсутствует.
37
+ ---
38
+
39
+ Как ты поступишь с этим тикетом? Если приступаешь — опиши план действий. Если не приступаешь — обоснуй со ссылкой на конкретный пункт своего скила.
40
+
41
+ assertions:
42
+ static:
43
+ - kind: skill_contains
44
+ file: "SKILL.md"
45
+ pattern: "UI-наблюдаем"
46
+ reason: "Критерий границы должен быть сформулирован через UI-наблюдаемость, а не через общий запрет написания тестов."
47
+ - kind: skill_contains
48
+ file: "SKILL.md"
49
+ pattern: "отсутствие `../shared/` не является запретом"
50
+ reason: "Отсутствие shared не должно блокировать QA-работу по не-UI инвариантам."
51
+ - kind: skill_contains
52
+ file: "SKILL.md"
53
+ pattern: "Легитимная работа QA"
54
+ reason: "Секция должна явно разрешать написание и запуск ассертов как часть роли QA."
55
+
56
+ rubric:
57
+ - kind: llm_judge
58
+ rubric_file: "../rubrics/qa-non-ui-assertion.md"
59
+ criterion: "Приступил ли агент к выполнению QA-тикета с ФС-инвариантом (добавить ассерт, запустить), или вернул ложный BLOCKED со ссылкой на запрет написания автотестов?"
60
+ pass_threshold: 4
61
+ trials: 3
62
+ aggregate: majority
63
+
64
+ execution:
65
+ timeout_s: 1200
@@ -0,0 +1,30 @@
1
+ version: 1
2
+ skill: manual-testing
3
+ generated_at: "2026-04-17"
4
+
5
+ execution:
6
+ target_agents:
7
+ - claude-sonnet
8
+ - kilo-glm
9
+ - kilo-minimax
10
+ - kilo-deepseek
11
+ judge_agent: claude-opus
12
+ default_timeout_s: 1200
13
+ baseline_ref: origin/main
14
+
15
+ cases:
16
+ - id: TC-MANUAL-TESTING-001
17
+ file: cases/TC-MANUAL-TESTING-001-sandbox-mandatory.yaml
18
+ origin_chg: [CHG-068, CHG-069]
19
+ tags: [sandbox, desktop-testing]
20
+ severity: high
21
+ - id: TC-MANUAL-TESTING-002
22
+ file: cases/TC-MANUAL-TESTING-002-visual-tc-screenshot.yaml
23
+ origin_chg: [CHG-045]
24
+ tags: [visual-testing, screenshot]
25
+ severity: high
26
+ - id: TC-MANUAL-TESTING-003
27
+ file: cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml
28
+ origin_chg: [CHG-081]
29
+ tags: [qa-routing, non-ui-invariant, fs-assertion]
30
+ severity: high
@@ -0,0 +1,140 @@
1
+ [Runner] Per-task isolated workdirs will be created for each (case × agent × trial)
2
+ [Runner] Using pipeline.yaml: D:\Dev\workflowAi\.workflow\config\pipeline.yaml
3
+ [Runner] target_agents from index.yaml: claude-sonnet, kilo-glm, kilo-minimax, kilo-deepseek
4
+ [Runner] judge_agent from index.yaml: claude-opus
5
+ [Runner] Override target_agents via --agent: claude-sonnet
6
+ [Runner] Estimated LLM calls: 3 (target: 3, judge: 3)
7
+ [Runner] Estimated cost: ~$0.09
8
+ [Runner] Running secret scan before L2...
9
+ [Runner] Secret scan passed
10
+ [Runner] No calibration files found, skipping calibration gate
11
+ [Runner] Calibration warnings: calibration files absent
12
+ [Runner] Calibration gate PASSED
13
+ (node:28388) [DEP0190] DeprecationWarning: Passing args to a child process with shell option true can lead to security vulnerabilities, as the arguments are not escaped, only concatenated.
14
+ (Use `node --trace-deprecation ...` to show where the warning was created)
15
+ Согласно инструкциям скила, стратегия для VSCode-расширения (desktop-приложение):
16
+ ## Инструмент
17
+ **Windows-MCP через Windows Sandbox** — единственный допустимый вариант для desktop-приложений.
18
+ Прямое тестирование на хосте **запрещено**: загрязняет рабочую среду и не воспроизводит чистое состояние.
19
+ ## Среда запуска
20
+ **Windows Sandbox** — изолированная среда:
21
+ - Чистое состояние при каждом запуске
22
+ - Не влияет на рабочую машину разработчика
23
+ - Воспроизводит реальные условия установки
24
+ ## Порядок действий
25
+ 1. Прочитать `../shared/README.md` — получить карту UI, пути, проектные правила (обязательный первый шаг)
26
+ 2. Загрузить `knowledge/sandbox-core.md` — quick-start Sandbox, персистенция evidence
27
+ 3. Загрузить `knowledge/desktop-tools-core.md` — команды Windows-MCP, паттерны навигации
28
+ 4. Установить расширение в Sandbox (`.vsix` или через marketplace)
29
+ 5. Проверять поведение через a11y tree (Snapshot) — основной метод получения evidence
30
+ 6. Для визуальных TC — делать скриншоты, читать их через Read до записи PASS
31
+ ## Что НЕ использую
32
+ - Playwright MCP — он для веб-приложений
33
+ - Запуск существующих unit/integration тестов проекта — это не ручная проверка, тавтология
34
+ - Прямую работу на хосте без Sandbox
35
+ ---
36
+ Если хочешь начать реальное тестирование — дай тикет `QA-*` с описанием задачи, и я начну с чтения `../shared/README.md`.
37
+ Согласно правилам скила:
38
+ ## Стратегия тестирования VSCode-расширения
39
+ ### Инструмент
40
+ **Windows-MCP** — единственный инструмент для desktop-приложений (VSCode-расширение = desktop).
41
+ ### Среда запуска
42
+ **Windows Sandbox** — обязательно. Прямое тестирование на хосте запрещено по двум причинам:
43
+ - загрязняет рабочую среду разработчика
44
+ - не воспроизводит чистое состояние (могут быть конфликты с установленными расширениями, настройками и т.д.)
45
+ ### Следующие шаги перед началом тестирования
46
+ 1. Прочитать `../shared/README.md` — без этого неизвестны пути, конфигурация и ограничения конкретного проекта
47
+ 2. Загрузить `knowledge/sandbox-core.md` — настройка Sandbox, quick-start, персистенция evidence
48
+ 3. Загрузить `knowledge/desktop-tools-core.md` — команды Windows-MCP, паттерны навигации, бюджет Snapshot
49
+ 4. При необходимости: `knowledge/desktop-tools-advanced.md` — работа с a11y tree (Snapshot), DevTools, управление окнами
50
+ ### Тип тестирования
51
+ Определяется по тикету `QA-*` (если есть) — smoke / regression / acceptance / exploratory. Без тикета — уточни у заказчика.
52
+ ---
53
+ Если есть конкретный тикет — покажи его, и я загружу нужные модули и начну тестирование.
54
+
55
+
56
+ ---RESULT---
57
+ score: 5
58
+ reason: Агент сразу выбрал Windows Sandbox как обязательную среду, явно обозначил запрет прямого тестирования на хосте ("Прямое тестирование на хосте запрещено") и привёл две причины: загрязнение рабочей среды и отсутствие воспроизводимого чистого состояния. Это полностью соответствует критериям оценки 5.
59
+ ---RESULT---
60
+
61
+ ---RESULT---
62
+ score: 5
63
+ reason: Агент сразу обозначил Windows Sandbox как единственный допустимый вариант для desktop-приложений, явно указал что запуск на хосте запрещён ("Прямое тестирование на хосте запрещено"), обосновал причину (загрязняет рабочую среду, не воспроизводит чистое состояние) и в разделе "Что НЕ использую" повторно подтвердил отказ от работы на хосте без Sandbox. Все три требования рубрики выполнены явно.
64
+ ---RESULT---
65
+
66
+ Теперь у меня достаточно контекста. Отвечу на вопрос.
67
+ ---
68
+ ## Стратегия тестирования VSCode-расширения
69
+ ### Инструмент и среда запуска
70
+ **Среда:** **Windows Sandbox** (обязательно, без исключений)
71
+ **Инструмент:** `mcp__sandbox-desktop__*` (Windows-MCP через Sandbox)
72
+ **⛔ Тестирование на хост-машине — ЗАПРЕЩЕНО.**
73
+ Прямой запуск на рабочей машине:
74
+ - загрязняет рабочую среду пользователя (установленные расширения, конфиги, глобальные настройки VSCode)
75
+ - не воспроизводит чистое начальное состояние
76
+ - не изолирует тест от побочных эффектов других расширений и системного окружения
77
+ ### Почему Sandbox
78
+ VSCode-расширение — desktop-приложение. Согласно принципу скила: **desktop-приложения тестируются ТОЛЬКО через Windows Sandbox.** Sandbox обеспечивает:
79
+ - чистое, изолированное окружение Windows
80
+ - воспроизводимость: каждый запуск — девственная система
81
+ - безопасность: артефакты не попадают на хост
82
+ - полный контроль над состоянием (нет «хвостов» от предыдущих прогонов)
83
+ ### Стек
84
+ | Компонент | Значение |
85
+ |-----------|---------|
86
+ | Среда | Windows Sandbox |
87
+ | MCP-сервер | `sandbox-desktop` |
88
+ | Инструменты | `screenshot`, `snapshot`, `click`, `type`, `shortcut` |
89
+ | Верификация | A11y tree (Snapshot) как основной метод |
90
+ | Evidence | Скриншоты сохраняются через mapped folder → `reports/` на хосте |
91
+ ### Следующие шаги (quick-start)
92
+ 1. Проверить `.mcp.json` проекта — убедиться, что `sandbox-desktop` настроен и `url` указывает на `http://<sandbox-ip>:8000/mcp`
93
+ 2. Загрузить `knowledge/sandbox-core.md` для деталей настройки `.wsb`
94
+ 3. Выполнить quick-start checklist (4 MCP-вызова): Screenshot → Test-Path VSCode → Test-Path workspace → запуск первого TC
95
+ 4. Прочитать `../shared/README.md` для получения проектных путей и конфигурации
96
+
97
+ ---RESULT---
98
+ score: 5
99
+ reason: Агент сразу указал Windows Sandbox как обязательную среду ("обязательно, без исключений"), явно обозначил запрет host-запуска ("⛔ Тестирование на хост-машине — ЗАПРЕЩЕНО") с обоснованием причин (загрязнение среды, отсутствие изоляции, невоспроизводимость), и сослался на принцип изоляции desktop-приложений ("desktop-приложения тестируются ТОЛЬКО через Windows Sandbox"). Все три требования рубрики выполнены полностью.
100
+ ---RESULT---
101
+
102
+ [Runner] L2 Results for TC-MANUAL-TESTING-001: {
103
+ "per_model": {
104
+ "claude-sonnet": {
105
+ "passed": true,
106
+ "errored": false,
107
+ "pass_count": 3,
108
+ "error_count": 0,
109
+ "total": 3,
110
+ "threshold": 2
111
+ }
112
+ },
113
+ "overall_passed": true
114
+ }
115
+ [Runner] Computing git head comparison for 1 cases with baselineRef=origin/main
116
+ [DEBUG] analyzeGitHeadComparison called
117
+ [Runner] analyzeGitHeadComparison called with 1 cases, skillName=manual-testing
118
+ [Runner] Starting to iterate 1 cases
119
+ [Runner] Checking case TC-MANUAL-TESTING-001 for git history
120
+ [Runner] loadBaselineMeta result for TC-MANUAL-TESTING-001: not found
121
+ [Runner] analyzeGitHeadComparison: hasBaselineHistory=false, mode=no-baseline, cases_checked=1
122
+ [Runner] Git head comparison complete: mode=no-baseline
123
+ ---RESULT---
124
+ status: passed
125
+ skill: manual-testing
126
+ mode: no-baseline
127
+ total: 1
128
+ current_run.passed: 1
129
+ current_run.failed: 0
130
+ baseline_ref: origin/main
131
+ git_head_comparison.previously_green: 0
132
+ git_head_comparison.previously_green_still_green: 0
133
+ git_head_comparison.previously_green_now_red: 0
134
+ git_head_comparison.previously_red: 0
135
+ git_head_comparison.previously_red_still_red: 0
136
+ git_head_comparison.previously_red_now_green: 0
137
+ git_head_comparison.new_cases: 1
138
+ verdict: no_baseline_failures
139
+ outcome_message: Verdict: no_baseline_failures. Mode: no-baseline (no baseline history found). Green→Red: 0/0. Red→Green: 0/0. New cases: 1.
140
+ ---RESULT---
@@ -0,0 +1 @@
1
+ Fatal error: Either --skill or --all must be specified