workflow-ai 1.0.63 → 1.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (576) hide show
  1. package/README.md +239 -145
  2. package/configs/agent-health-rules.yaml +64 -0
  3. package/configs/config.yaml +134 -0
  4. package/configs/pipeline.yaml +901 -0
  5. package/configs/ticket-movement-rules.yaml +80 -0
  6. package/package.json +1 -1
  7. package/src/global-dir.mjs +25 -1
  8. package/src/init.mjs +20 -3
  9. package/src/lib/agent-health-registry.mjs +245 -0
  10. package/src/lib/artifact-snapshot.mjs +233 -0
  11. package/src/lib/error-classifier.mjs +274 -0
  12. package/src/lib/test-error-classifier.mjs +60 -0
  13. package/src/lib/test-extends.mjs +58 -0
  14. package/src/lib/test-version.mjs +21 -0
  15. package/src/scripts/move-to-review.js +5 -7
  16. package/src/scripts/reset-agent-health.js +62 -0
  17. package/src/scripts/run-skill-tests.js +348 -136
  18. package/src/skills/analyze-report/README.md +44 -0
  19. package/src/skills/analyze-report/SKILL.md +121 -0
  20. package/src/skills/analyze-report/algorithms/progress-assessment.md +108 -0
  21. package/src/skills/analyze-report/knowledge/analysis-frameworks.md +66 -0
  22. package/src/skills/analyze-report/knowledge/report-structure.md +61 -0
  23. package/src/skills/analyze-report/scripts/calc-plan-metrics.js +234 -0
  24. package/src/skills/analyze-report/templates/analysis-report.md +80 -0
  25. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-1.md +69 -0
  26. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-2.md +103 -0
  27. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-3.md +99 -0
  28. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/judge.json +163 -0
  29. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-1.md +89 -0
  30. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-2.md +88 -0
  31. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-3.md +100 -0
  32. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-1.md +77 -0
  33. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-2.md +64 -0
  34. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-3.md +110 -0
  35. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-1.md +74 -0
  36. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-2.md +38 -0
  37. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-3.md +61 -0
  38. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/meta.json +115 -0
  39. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001-evidence-from-log.yaml +60 -0
  40. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-1.md +90 -0
  41. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-2.md +89 -0
  42. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-3.md +77 -0
  43. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/judge.json +163 -0
  44. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-1.md +84 -0
  45. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-2.md +77 -0
  46. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-3.md +89 -0
  47. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-1.md +103 -0
  48. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-2.md +103 -0
  49. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-3.md +103 -0
  50. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-1.md +93 -0
  51. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-2.md +93 -0
  52. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-3.md +86 -0
  53. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/meta.json +115 -0
  54. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002-result-block-format.yaml +44 -0
  55. package/src/skills/analyze-report/tests/fixtures/REPORT-002-incorrect-attribution.md +27 -0
  56. package/src/skills/analyze-report/tests/fixtures/pipeline-2026-04-06_qa-001-skip.log +32 -0
  57. package/src/skills/analyze-report/tests/index.yaml +25 -0
  58. package/src/skills/analyze-report/tests/rubrics/evidence-from-log.md +22 -0
  59. package/src/skills/analyze-report/tests/rubrics/result-block-format.md +22 -0
  60. package/src/skills/analyze-report/workflows/progress.md +158 -0
  61. package/src/skills/analyze-report/workflows/retrospective.md +143 -0
  62. package/src/skills/coach/README.md +43 -0
  63. package/src/skills/coach/SKILL.md +167 -0
  64. package/src/skills/coach/SKILL.md.legacy +157 -0
  65. package/src/skills/coach/algorithms/gap-analysis.md +69 -0
  66. package/src/skills/coach/algorithms/improvement-prioritization.md +62 -0
  67. package/src/skills/coach/algorithms/skill-scoring.md +80 -0
  68. package/src/skills/coach/knowledge/audit-applied-changes-clean.txt +11 -0
  69. package/src/skills/coach/knowledge/backlog-management.md +67 -0
  70. package/src/skills/coach/knowledge/backlog-management.md.legacy +90 -0
  71. package/src/skills/coach/knowledge/common-antipatterns.md +76 -0
  72. package/src/skills/coach/knowledge/prompt-engineering.md +45 -0
  73. package/src/skills/coach/knowledge/shared-knowledge-guide.md +44 -0
  74. package/src/skills/coach/knowledge/skill-anatomy.md +49 -0
  75. package/src/skills/coach/knowledge/test-authorship.md +141 -0
  76. package/src/skills/coach/templates/audit-report.md +39 -0
  77. package/src/skills/coach/templates/coach-backlog-init.yaml +14 -0
  78. package/src/skills/coach/templates/coach-backlog-init.yaml.legacy +10 -0
  79. package/src/skills/coach/templates/improvement-plan.md +42 -0
  80. package/src/skills/coach/templates/new-skill.md +95 -0
  81. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-1.md +58 -0
  82. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-2.md +65 -0
  83. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-3.md +58 -0
  84. package/src/skills/coach/tests/cases/TC-COACH-001/current/judge.json +151 -0
  85. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-1.md +46 -0
  86. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-2.md +0 -0
  87. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-3.md +75 -0
  88. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-1.md +81 -0
  89. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-2.md +101 -0
  90. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-3.md +91 -0
  91. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-1.md +48 -0
  92. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-2.md +30 -0
  93. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-3.md +55 -0
  94. package/src/skills/coach/tests/cases/TC-COACH-001/current/meta.json +94 -0
  95. package/src/skills/coach/tests/cases/TC-COACH-001-evidence-based-temporal-diagram.yaml +53 -0
  96. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-1.md +46 -0
  97. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-2.md +50 -0
  98. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-3.md +48 -0
  99. package/src/skills/coach/tests/cases/TC-COACH-002/current/judge.json +151 -0
  100. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-1.md +0 -0
  101. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-2.md +37 -0
  102. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-3.md +30 -0
  103. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-1.md +23 -0
  104. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-2.md +29 -0
  105. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-3.md +35 -0
  106. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-1.md +13 -0
  107. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-2.md +19 -0
  108. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-3.md +33 -0
  109. package/src/skills/coach/tests/cases/TC-COACH-002/current/meta.json +94 -0
  110. package/src/skills/coach/tests/cases/TC-COACH-002-root-cause-first.yaml +57 -0
  111. package/src/skills/coach/tests/fixtures/pipeline-2026-04-06_id-collision.log +77 -0
  112. package/src/skills/coach/tests/index.yaml +29 -0
  113. package/src/skills/coach/tests/rubrics/calibration/evidence-based-bad.md +13 -0
  114. package/src/skills/coach/tests/rubrics/calibration/evidence-based-good.md +29 -0
  115. package/src/skills/coach/tests/rubrics/evidence-based.md +26 -0
  116. package/src/skills/coach/tests/rubrics/root-cause-first.md +21 -0
  117. package/src/skills/coach/workflows/analyze.md +79 -0
  118. package/src/skills/coach/workflows/analyze.md.legacy +64 -0
  119. package/src/skills/coach/workflows/audit.md +74 -0
  120. package/src/skills/coach/workflows/audit.md.legacy +59 -0
  121. package/src/skills/coach/workflows/create.md +80 -0
  122. package/src/skills/coach/workflows/create.md.legacy +67 -0
  123. package/src/skills/coach/workflows/improve.md +71 -0
  124. package/src/skills/coach/workflows/improve.md.legacy +60 -0
  125. package/src/skills/coach/workflows/research.md +55 -0
  126. package/src/skills/coach/workflows/review.md +52 -0
  127. package/src/skills/coach/workflows/review.md.legacy +48 -0
  128. package/src/skills/coach/workflows/test.md +97 -0
  129. package/src/skills/create-plan/README.md +39 -0
  130. package/src/skills/create-plan/SKILL.md +104 -0
  131. package/src/skills/create-plan/algorithms/risk-assessment.md +73 -0
  132. package/src/skills/create-plan/knowledge/plan-completeness.md +67 -0
  133. package/src/skills/create-plan/knowledge/plan-lifecycle.md +33 -0
  134. package/src/skills/create-plan/knowledge/task-verification-pairs.md +151 -0
  135. package/src/skills/create-plan/scripts/validate-completeness.js +182 -0
  136. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-1.md +5 -0
  137. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-2.md +39 -0
  138. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-3.md +35 -0
  139. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/judge.json +167 -0
  140. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-1.md +5 -0
  141. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-2.md +10 -0
  142. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-3.md +5 -0
  143. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-1.md +26 -0
  144. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-2.md +86 -0
  145. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-3.md +5 -0
  146. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-1.md +11 -0
  147. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-2.md +15 -0
  148. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-3.md +14 -0
  149. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/meta.json +119 -0
  150. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001-validate-completeness.yaml +41 -0
  151. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-1.md +25 -0
  152. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-2.md +30 -0
  153. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-3.md +37 -0
  154. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/judge.json +164 -0
  155. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-1.md +3 -0
  156. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-2.md +11 -0
  157. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-3.md +13 -0
  158. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-1.md +44 -0
  159. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-2.md +5 -0
  160. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-3.md +49 -0
  161. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-1.md +6 -0
  162. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-2.md +11 -0
  163. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-3.md +16 -0
  164. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/meta.json +116 -0
  165. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002-task-granularity.yaml +39 -0
  166. package/src/skills/create-plan/tests/index.yaml +25 -0
  167. package/src/skills/create-plan/tests/rubrics/task-granularity.md +21 -0
  168. package/src/skills/create-plan/tests/rubrics/validate-completeness.md +21 -0
  169. package/src/skills/create-plan/workflows/create.md +136 -0
  170. package/src/skills/create-report/README.md +40 -0
  171. package/src/skills/create-report/SKILL.md +73 -0
  172. package/src/skills/create-report/algorithms/metric-calculation.md +93 -0
  173. package/src/skills/create-report/knowledge/report-metrics.md +82 -0
  174. package/src/skills/create-report/scripts/calc-metrics.js +383 -0
  175. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-1.md +25 -0
  176. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-2.md +26 -0
  177. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-3.md +28 -0
  178. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/judge.json +163 -0
  179. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-1.md +4 -0
  180. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-2.md +3 -0
  181. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-3.md +6 -0
  182. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-1.md +8 -0
  183. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-2.md +12 -0
  184. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-3.md +7 -0
  185. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-1.md +12 -0
  186. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-2.md +22 -0
  187. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-3.md +13 -0
  188. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/meta.json +115 -0
  189. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001-root-cause-attribution.yaml +57 -0
  190. package/src/skills/create-report/tests/index.yaml +20 -0
  191. package/src/skills/create-report/tests/rubrics/root-cause-attribution.md +21 -0
  192. package/src/skills/create-report/workflows/standard.md +175 -0
  193. package/src/skills/decompose-gaps/README.md +39 -0
  194. package/src/skills/decompose-gaps/SKILL.md +78 -0
  195. package/src/skills/decompose-gaps/algorithms/scope-check.md +110 -0
  196. package/src/skills/decompose-gaps/knowledge/scope-validation.md +65 -0
  197. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-1.md +41 -0
  198. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-2.md +41 -0
  199. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-3.md +56 -0
  200. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/judge.json +164 -0
  201. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-1.md +25 -0
  202. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-2.md +17 -0
  203. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-3.md +22 -0
  204. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-1.md +25 -0
  205. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-2.md +5 -0
  206. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-3.md +29 -0
  207. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-1.md +27 -0
  208. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-2.md +35 -0
  209. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-3.md +18 -0
  210. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/meta.json +116 -0
  211. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001-scope-exclusion.yaml +46 -0
  212. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-1.md +27 -0
  213. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-2.md +30 -0
  214. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-3.md +27 -0
  215. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/judge.json +163 -0
  216. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-1.md +0 -0
  217. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-2.md +15 -0
  218. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-3.md +7 -0
  219. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-1.md +21 -0
  220. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-2.md +38 -0
  221. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-3.md +16 -0
  222. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-1.md +5 -0
  223. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-2.md +10 -0
  224. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-3.md +9 -0
  225. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/meta.json +115 -0
  226. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002-glob-before-write.yaml +36 -0
  227. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-1.md +30 -0
  228. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-2.md +30 -0
  229. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-3.md +30 -0
  230. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/judge.json +165 -0
  231. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-1.md +5 -0
  232. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-2.md +26 -0
  233. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-3.md +5 -0
  234. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-1.md +39 -0
  235. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-2.md +37 -0
  236. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-3.md +45 -0
  237. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-1.md +26 -0
  238. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-2.md +27 -0
  239. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-3.md +7 -0
  240. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/meta.json +117 -0
  241. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003-parent-plan-mandatory.yaml +41 -0
  242. package/src/skills/decompose-gaps/tests/index.yaml +30 -0
  243. package/src/skills/decompose-gaps/tests/rubrics/glob-before-write.md +21 -0
  244. package/src/skills/decompose-gaps/tests/rubrics/parent-plan-mandatory.md +22 -0
  245. package/src/skills/decompose-gaps/tests/rubrics/scope-exclusion.md +21 -0
  246. package/src/skills/decompose-gaps/workflows/decompose.md +123 -0
  247. package/src/skills/decompose-plan/README.md +43 -0
  248. package/src/skills/decompose-plan/SKILL.md +87 -0
  249. package/src/skills/decompose-plan/algorithms/deduplication.md +101 -0
  250. package/src/skills/decompose-plan/knowledge/atomicity-checklist.md +139 -0
  251. package/src/skills/decompose-plan/knowledge/capabilities.md +68 -0
  252. package/src/skills/decompose-plan/knowledge/human-task-rules.md +82 -0
  253. package/src/skills/decompose-plan/knowledge/scope-guard-checklist.md +73 -0
  254. package/src/skills/decompose-plan/scripts/check-atomicity-limit.js +47 -0
  255. package/src/skills/decompose-plan/scripts/check-duplicates.js +323 -0
  256. package/src/skills/decompose-plan/scripts/verify-atomicity.js +408 -0
  257. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-1.md +30 -0
  258. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-2.md +36 -0
  259. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-3.md +37 -0
  260. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/judge.json +163 -0
  261. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-1.md +20 -0
  262. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-2.md +17 -0
  263. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-3.md +28 -0
  264. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-1.md +114 -0
  265. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-2.md +137 -0
  266. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-3.md +188 -0
  267. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-1.md +0 -0
  268. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-2.md +32 -0
  269. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-3.md +110 -0
  270. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/meta.json +115 -0
  271. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001-atomicity-no-1to1.yaml +56 -0
  272. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-1.md +47 -0
  273. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-2.md +54 -0
  274. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-3.md +43 -0
  275. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/judge.json +163 -0
  276. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-1.md +15 -0
  277. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-2.md +5 -0
  278. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-3.md +12 -0
  279. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-1.md +34 -0
  280. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-2.md +30 -0
  281. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-3.md +35 -0
  282. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-1.md +0 -0
  283. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-2.md +31 -0
  284. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-3.md +0 -0
  285. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/meta.json +115 -0
  286. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002-get-next-id-mandatory.yaml +44 -0
  287. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-1.md +21 -0
  288. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-2.md +38 -0
  289. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-3.md +30 -0
  290. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/judge.json +163 -0
  291. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-1.md +31 -0
  292. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-2.md +35 -0
  293. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-3.md +48 -0
  294. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-1.md +167 -0
  295. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-2.md +62 -0
  296. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-3.md +174 -0
  297. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-1.md +0 -0
  298. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-2.md +0 -0
  299. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-3.md +0 -0
  300. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/meta.json +115 -0
  301. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003-verbatim-dod-transfer.yaml +42 -0
  302. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-1.md +55 -0
  303. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-2.md +49 -0
  304. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-3.md +49 -0
  305. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/judge.json +163 -0
  306. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-1.md +104 -0
  307. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-2.md +45 -0
  308. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-3.md +58 -0
  309. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-1.md +193 -0
  310. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-2.md +202 -0
  311. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-3.md +155 -0
  312. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-1.md +52 -0
  313. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-2.md +17 -0
  314. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-3.md +0 -0
  315. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/meta.json +115 -0
  316. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004-executor-atomicity.yaml +64 -0
  317. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-1.md +59 -0
  318. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-2.md +204 -0
  319. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-3.md +213 -0
  320. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/judge.json +163 -0
  321. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-1.md +0 -0
  322. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-2.md +57 -0
  323. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-3.md +54 -0
  324. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-1.md +147 -0
  325. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-2.md +165 -0
  326. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-3.md +133 -0
  327. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-1.md +81 -0
  328. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-2.md +108 -0
  329. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-3.md +3 -0
  330. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/meta.json +114 -0
  331. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005-capabilities-registry.yaml +78 -0
  332. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-1.md +225 -0
  333. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-2.md +66 -0
  334. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-3.md +36 -0
  335. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/judge.json +163 -0
  336. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-1.md +42 -0
  337. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-2.md +67 -0
  338. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-3.md +40 -0
  339. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-1.md +122 -0
  340. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-2.md +131 -0
  341. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-3.md +138 -0
  342. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-1.md +41 -0
  343. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-2.md +88 -0
  344. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-3.md +0 -0
  345. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/meta.json +115 -0
  346. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006-dod-threshold.yaml +72 -0
  347. package/src/skills/decompose-plan/tests/index.yaml +45 -0
  348. package/src/skills/decompose-plan/tests/rubrics/atomicity-no-1to1.md +21 -0
  349. package/src/skills/decompose-plan/tests/rubrics/capabilities-registry.md +21 -0
  350. package/src/skills/decompose-plan/tests/rubrics/dod-threshold.md +21 -0
  351. package/src/skills/decompose-plan/tests/rubrics/executor-atomicity.md +21 -0
  352. package/src/skills/decompose-plan/tests/rubrics/get-next-id-mandatory.md +21 -0
  353. package/src/skills/decompose-plan/tests/rubrics/verbatim-dod-transfer.md +21 -0
  354. package/src/skills/decompose-plan/workflows/decompose.md +305 -0
  355. package/src/skills/deep-research/README.md +36 -0
  356. package/src/skills/deep-research/SKILL.md +106 -0
  357. package/src/skills/deep-research/algorithms/source-scoring.md +63 -0
  358. package/src/skills/deep-research/algorithms/synthesis.md +67 -0
  359. package/src/skills/deep-research/knowledge/data-validation.md +44 -0
  360. package/src/skills/deep-research/knowledge/perplexity-config.md +30 -0
  361. package/src/skills/deep-research/knowledge/research-methodology.md +54 -0
  362. package/src/skills/deep-research/knowledge/source-evaluation.md +33 -0
  363. package/src/skills/deep-research/scripts/perplexity-research.js +315 -0
  364. package/src/skills/deep-research/templates/brief-summary.md +25 -0
  365. package/src/skills/deep-research/templates/research-report.md +76 -0
  366. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-1.md +48 -0
  367. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-2.md +88 -0
  368. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-3.md +56 -0
  369. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/judge.json +163 -0
  370. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-1.md +58 -0
  371. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-2.md +249 -0
  372. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-3.md +44 -0
  373. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-1.md +96 -0
  374. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-2.md +56 -0
  375. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-3.md +94 -0
  376. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-1.md +11 -0
  377. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-2.md +1 -0
  378. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-3.md +1 -0
  379. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/meta.json +115 -0
  380. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001-self-check-url.yaml +58 -0
  381. package/src/skills/deep-research/tests/index.yaml +20 -0
  382. package/src/skills/deep-research/tests/rubrics/self-check-url.md +34 -0
  383. package/src/skills/deep-research/workflows/base-checklist.md +19 -0
  384. package/src/skills/deep-research/workflows/benchmark.md +38 -0
  385. package/src/skills/deep-research/workflows/competitor.md +44 -0
  386. package/src/skills/deep-research/workflows/custom.md +32 -0
  387. package/src/skills/deep-research/workflows/market.md +44 -0
  388. package/src/skills/deep-research/workflows/technology.md +40 -0
  389. package/src/skills/deep-research/workflows/trend.md +40 -0
  390. package/src/skills/execute-task/README.md +44 -0
  391. package/src/skills/execute-task/SKILL.md +292 -0
  392. package/src/skills/execute-task/algorithms/execution-strategy.md +136 -0
  393. package/src/skills/execute-task/knowledge/context-checkpoints.md +75 -0
  394. package/src/skills/execute-task/knowledge/ticket-structure.md +70 -0
  395. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-1.md +5 -0
  396. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-2.md +5 -0
  397. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-3.md +5 -0
  398. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/judge.json +124 -0
  399. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-1.md +4 -0
  400. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-2.md +4 -0
  401. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-3.md +4 -0
  402. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-1.md +4 -0
  403. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-2.md +4 -0
  404. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-3.md +11 -0
  405. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/meta.json +88 -0
  406. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001-no-ticket-creation.yaml +48 -0
  407. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-1.md +5 -0
  408. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-2.md +6 -0
  409. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-3.md +5 -0
  410. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/judge.json +124 -0
  411. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-1.md +4 -0
  412. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-2.md +4 -0
  413. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-3.md +8 -0
  414. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-1.md +9 -0
  415. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-2.md +26 -0
  416. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-3.md +4 -0
  417. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/meta.json +89 -0
  418. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002-no-duplicate-dod.yaml +44 -0
  419. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-1.md +5 -0
  420. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-2.md +5 -0
  421. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-3.md +5 -0
  422. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/judge.json +46 -0
  423. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/meta.json +37 -0
  424. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003-verification-proportionality.yaml +46 -0
  425. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-1.md +18 -0
  426. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-2.md +16 -0
  427. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-3.md +14 -0
  428. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/judge.json +124 -0
  429. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-1.md +5 -0
  430. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-2.md +5 -0
  431. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-3.md +1 -0
  432. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-1.md +8 -0
  433. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-2.md +5 -0
  434. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-3.md +4 -0
  435. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/meta.json +89 -0
  436. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004-no-foreign-ticket-edit.yaml +50 -0
  437. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-1.md +5 -0
  438. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-2.md +5 -0
  439. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-3.md +5 -0
  440. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/judge.json +124 -0
  441. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-1.md +15 -0
  442. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-2.md +4 -0
  443. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-3.md +5 -0
  444. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-1.md +11 -0
  445. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-2.md +11 -0
  446. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-3.md +4 -0
  447. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/meta.json +88 -0
  448. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005-ticket-fields-updated.yaml +39 -0
  449. package/src/skills/execute-task/tests/fixtures/IMPL-902-create-file.md +41 -0
  450. package/src/skills/execute-task/tests/fixtures/IMPL-904-current-task.md +40 -0
  451. package/src/skills/execute-task/tests/fixtures/IMPL-906-fill-ticket.md +42 -0
  452. package/src/skills/execute-task/tests/fixtures/QA-901-button-click.md +41 -0
  453. package/src/skills/execute-task/tests/fixtures/QA-903-visual-figma.md +40 -0
  454. package/src/skills/execute-task/tests/fixtures/TASK-905-done-with-typo.md +36 -0
  455. package/src/skills/execute-task/tests/index.yaml +39 -0
  456. package/src/skills/execute-task/tests/rubrics/no-duplicate-dod.md +22 -0
  457. package/src/skills/execute-task/tests/rubrics/no-foreign-ticket-edit.md +20 -0
  458. package/src/skills/execute-task/tests/rubrics/no-ticket-creation.md +21 -0
  459. package/src/skills/execute-task/tests/rubrics/ticket-fields-updated.md +23 -0
  460. package/src/skills/execute-task/tests/rubrics/verification-proportionality.md +22 -0
  461. package/src/skills/execute-task/workflows/execute.md +104 -0
  462. package/src/skills/manual-testing/README.md +63 -0
  463. package/src/skills/manual-testing/SKILL.md +176 -0
  464. package/src/skills/manual-testing/algorithms/blocked-tool-strategy.md +74 -0
  465. package/src/skills/manual-testing/algorithms/bug-severity.md +73 -0
  466. package/src/skills/manual-testing/algorithms/mcp-budget.md +97 -0
  467. package/src/skills/manual-testing/algorithms/test-prioritization.md +69 -0
  468. package/src/skills/manual-testing/knowledge/browser-extension-testing.md +102 -0
  469. package/src/skills/manual-testing/knowledge/browser-tools.md +114 -0
  470. package/src/skills/manual-testing/knowledge/desktop-tools-advanced.md +92 -0
  471. package/src/skills/manual-testing/knowledge/desktop-tools-core.md +76 -0
  472. package/src/skills/manual-testing/knowledge/sandbox-advanced.md +83 -0
  473. package/src/skills/manual-testing/knowledge/sandbox-core.md +67 -0
  474. package/src/skills/manual-testing/knowledge/stateful-edge-cases.md +69 -0
  475. package/src/skills/manual-testing/knowledge/test-case-design.md +107 -0
  476. package/src/skills/manual-testing/knowledge/testing-types.md +45 -0
  477. package/src/skills/manual-testing/templates/bug-report.md +52 -0
  478. package/src/skills/manual-testing/templates/test-case.md +34 -0
  479. package/src/skills/manual-testing/templates/test-plan.md +97 -0
  480. package/src/skills/manual-testing/templates/test-session-report.md +56 -0
  481. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-1.md +34 -0
  482. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-2.md +32 -0
  483. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-3.md +30 -0
  484. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/judge.json +163 -0
  485. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-1.md +0 -0
  486. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-2.md +7 -0
  487. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-3.md +0 -0
  488. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-1.md +4 -0
  489. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-2.md +15 -0
  490. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-3.md +8 -0
  491. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-1.md +5 -0
  492. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-2.md +7 -0
  493. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-3.md +7 -0
  494. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/meta.json +114 -0
  495. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001-sandbox-mandatory.yaml +38 -0
  496. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-1.md +44 -0
  497. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-2.md +32 -0
  498. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-3.md +47 -0
  499. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/judge.json +163 -0
  500. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-1.md +19 -0
  501. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-2.md +15 -0
  502. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-3.md +24 -0
  503. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-1.md +19 -0
  504. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-2.md +13 -0
  505. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-3.md +18 -0
  506. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-1.md +21 -0
  507. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-2.md +15 -0
  508. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-3.md +14 -0
  509. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/meta.json +114 -0
  510. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002-visual-tc-screenshot.yaml +37 -0
  511. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-1.md +76 -0
  512. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-2.md +71 -0
  513. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-3.md +85 -0
  514. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/judge.json +46 -0
  515. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/meta.json +36 -0
  516. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml +65 -0
  517. package/src/skills/manual-testing/tests/index.yaml +30 -0
  518. package/src/skills/manual-testing/tests/last-run-tc001-sonnet.log +140 -0
  519. package/src/skills/manual-testing/tests/last-run-tc002.log +1 -0
  520. package/src/skills/manual-testing/tests/last-run.log +1469 -0
  521. package/src/skills/manual-testing/tests/rubrics/qa-non-ui-assertion.md +31 -0
  522. package/src/skills/manual-testing/tests/rubrics/sandbox-mandatory.md +20 -0
  523. package/src/skills/manual-testing/tests/rubrics/visual-tc-screenshot.md +21 -0
  524. package/src/skills/manual-testing/workflows/acceptance.md +80 -0
  525. package/src/skills/manual-testing/workflows/exploratory.md +84 -0
  526. package/src/skills/manual-testing/workflows/regression.md +76 -0
  527. package/src/skills/manual-testing/workflows/smoke.md +109 -0
  528. package/src/skills/manual-testing/workflows/test-plan.md +75 -0
  529. package/src/skills/review-result/README.md +59 -0
  530. package/src/skills/review-result/SKILL.md +138 -0
  531. package/src/skills/review-result/algorithms/verification.md +112 -0
  532. package/src/skills/review-result/knowledge/dod-patterns.md +115 -0
  533. package/src/skills/review-result/scripts/verify-artifacts.js +384 -0
  534. package/src/skills/review-result/templates/verdict.md +153 -0
  535. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-1.md +22 -0
  536. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-2.md +7 -0
  537. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-3.md +21 -0
  538. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-1.md +6 -0
  539. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-2.md +6 -0
  540. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-3.md +18 -0
  541. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/judge.json +164 -0
  542. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-1.md +5 -0
  543. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-2.md +7 -0
  544. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-3.md +6 -0
  545. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-1.md +49 -0
  546. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-2.md +28 -0
  547. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-3.md +37 -0
  548. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-1.md +22 -0
  549. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-2.md +13 -0
  550. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-3.md +21 -0
  551. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/meta.json +116 -0
  552. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001-visual-tc-trigger.yaml +51 -0
  553. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-1.md +23 -0
  554. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-2.md +22 -0
  555. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-3.md +28 -0
  556. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-1.md +4 -0
  557. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-2.md +36 -0
  558. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-3.md +4 -0
  559. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/judge.json +163 -0
  560. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-1.md +4 -0
  561. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-2.md +0 -0
  562. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-3.md +4 -0
  563. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-1.md +39 -0
  564. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-2.md +25 -0
  565. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-3.md +32 -0
  566. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-1.md +34 -0
  567. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-2.md +8 -0
  568. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-3.md +23 -0
  569. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/meta.json +115 -0
  570. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002-path-line-suffix.yaml +39 -0
  571. package/src/skills/review-result/tests/fixtures/IMPL-902-path-with-line.md +43 -0
  572. package/src/skills/review-result/tests/fixtures/QA-901-visual-button.md +46 -0
  573. package/src/skills/review-result/tests/index.yaml +25 -0
  574. package/src/skills/review-result/tests/rubrics/path-line-suffix.md +19 -0
  575. package/src/skills/review-result/tests/rubrics/visual-tc-trigger.md +19 -0
  576. package/src/skills/review-result/workflows/review.md +209 -0
@@ -0,0 +1,163 @@
1
+ {
2
+ "per_model": {
3
+ "claude-sonnet": {
4
+ "pass_count": 3,
5
+ "total": 3,
6
+ "trials": [
7
+ {
8
+ "trial": 1,
9
+ "score": 5,
10
+ "passed": true
11
+ },
12
+ {
13
+ "trial": 2,
14
+ "score": 5,
15
+ "passed": true
16
+ },
17
+ {
18
+ "trial": 3,
19
+ "score": 5,
20
+ "passed": true
21
+ }
22
+ ]
23
+ },
24
+ "kilo-glm": {
25
+ "pass_count": 3,
26
+ "total": 3,
27
+ "trials": [
28
+ {
29
+ "trial": 1,
30
+ "score": 5,
31
+ "passed": true
32
+ },
33
+ {
34
+ "trial": 2,
35
+ "score": 5,
36
+ "passed": true
37
+ },
38
+ {
39
+ "trial": 3,
40
+ "score": 5,
41
+ "passed": true
42
+ }
43
+ ]
44
+ },
45
+ "kilo-minimax": {
46
+ "pass_count": 3,
47
+ "total": 3,
48
+ "trials": [
49
+ {
50
+ "trial": 1,
51
+ "score": 5,
52
+ "passed": true
53
+ },
54
+ {
55
+ "trial": 2,
56
+ "score": 5,
57
+ "passed": true
58
+ },
59
+ {
60
+ "trial": 3,
61
+ "score": 5,
62
+ "passed": true
63
+ }
64
+ ]
65
+ },
66
+ "kilo-deepseek": {
67
+ "pass_count": 1,
68
+ "total": 3,
69
+ "trials": [
70
+ {
71
+ "trial": 1,
72
+ "score": 1,
73
+ "passed": false
74
+ },
75
+ {
76
+ "trial": 2,
77
+ "score": 5,
78
+ "passed": true
79
+ },
80
+ {
81
+ "trial": 3,
82
+ "score": 1,
83
+ "passed": false
84
+ }
85
+ ]
86
+ }
87
+ },
88
+ "rubric_scores": [
89
+ {
90
+ "agentId": "kilo-deepseek",
91
+ "trial": 1,
92
+ "score": 1,
93
+ "errored": false
94
+ },
95
+ {
96
+ "agentId": "kilo-deepseek",
97
+ "trial": 2,
98
+ "score": 5,
99
+ "errored": false
100
+ },
101
+ {
102
+ "agentId": "kilo-deepseek",
103
+ "trial": 3,
104
+ "score": 1,
105
+ "errored": false
106
+ },
107
+ {
108
+ "agentId": "kilo-glm",
109
+ "trial": 1,
110
+ "score": 5,
111
+ "errored": false
112
+ },
113
+ {
114
+ "agentId": "kilo-glm",
115
+ "trial": 2,
116
+ "score": 5,
117
+ "errored": false
118
+ },
119
+ {
120
+ "agentId": "kilo-glm",
121
+ "trial": 3,
122
+ "score": 5,
123
+ "errored": false
124
+ },
125
+ {
126
+ "agentId": "kilo-minimax",
127
+ "trial": 1,
128
+ "score": 5,
129
+ "errored": false
130
+ },
131
+ {
132
+ "agentId": "kilo-minimax",
133
+ "trial": 2,
134
+ "score": 5,
135
+ "errored": false
136
+ },
137
+ {
138
+ "agentId": "kilo-minimax",
139
+ "trial": 3,
140
+ "score": 5,
141
+ "errored": false
142
+ },
143
+ {
144
+ "agentId": "claude-sonnet",
145
+ "trial": 1,
146
+ "score": 5,
147
+ "errored": false
148
+ },
149
+ {
150
+ "agentId": "claude-sonnet",
151
+ "trial": 2,
152
+ "score": 5,
153
+ "errored": false
154
+ },
155
+ {
156
+ "agentId": "claude-sonnet",
157
+ "trial": 3,
158
+ "score": 5,
159
+ "errored": false
160
+ }
161
+ ],
162
+ "timestamp": "2026-04-21T08:55:00.948Z"
163
+ }
@@ -0,0 +1,7 @@
1
+ **Инструмент:** Windows‑MCP через **Sandbox** (изолированная среда).
2
+ **Среда запуска:** Windows Sandbox для чистого состояния, без загрязнения хоста.
3
+ **Знания:** `desktop‑tools‑core.md`, `sandbox‑core.md`.
4
+ **Первый шаг:** загрузка `../shared/README.md` для проектных правил.
5
+ **Артефакты:** скриншоты и evidence сохраняются в `reports/`.
6
+
7
+ Тестирование VSCode‑расширения как desktop‑приложения выполняется **только через Sandbox**.
@@ -0,0 +1,4 @@
1
+ **Инструмент:** Windows-MCP
2
+ **Среда:** Windows Sandbox (изолированная среда)
3
+
4
+ Desktop-приложения (включая VSCode-расширения) тестируются **только через Sandbox** — прямое тестирование на хосте запрещено, так как загрязняет рабочую среду и не воспроизводит чистое состояние установки. Потребуется загрузить `knowledge/desktop-tools-core.md` + `knowledge/sandbox-core.md`.
@@ -0,0 +1,15 @@
1
+ **Инструмент:** Windows-MCP (команды навигации, кликов, Snapshot/a11y tree) **через Windows Sandbox**.
2
+
3
+ **Обоснование:**
4
+
5
+ 1. **VSCode-расширение = desktop-приложение** → согласно таблице выбора инструмента, основной инструмент — Windows-MCP через Sandbox, knowledge-модули: `knowledge/desktop-tools-core.md` + `knowledge/sandbox-core.md`.
6
+
7
+ 2. **Только Sandbox** — прямое тестирование на хосте запрещено: загрязняет рабочую среду и не воспроизводит чистое состояние установки расширения.
8
+
9
+ 3. **Порядок действий:**
10
+ - Загрузить `../shared/README.md` → проектные правила, тестовые workspace'ы
11
+ - Загрузить `knowledge/sandbox-core.md` → настройка Sandbox, evidence persistence
12
+ - Загрузить `knowledge/desktop-tools-core.md` → команды Windows-MCP, паттерны навигации по VSCode
13
+ - Настроить Sandbox с предустановленным VSCode + расширением
14
+ - Выполнять тест-кейсы через Windows-MCP (запуск VSCode, навигация по UI, клики, ввод)
15
+ - Evidence: a11y tree assertions через Snapshot, скриншоты для визуальных TC
@@ -0,0 +1,8 @@
1
+ **Инструмент:** Windows-MCP через Sandbox (изолированная среда).
2
+
3
+ **Обоснование:** VSCode-расширение — desktop-приложение. Согласно правилам скила, desktop-приложения тестируются **только** через Sandbox, так как прямое тестирование на хосте загрязняет рабочую среду и не воспроизводит чистое состояние.
4
+
5
+ **Knowledge-модули для загрузки:**
6
+ 1. `knowledge/desktop-tools-core.md` — команды Windows-MCP, паттерны навигации
7
+ 2. `knowledge/sandbox-core.md` — настройка Sandbox, quick-start, персистенция evidence
8
+ 3. `../shared/README.md` — проектные правила и конфигурация (обязательный первый шаг)
@@ -0,0 +1,5 @@
1
+ **Инструмент:** Windows-MCP
2
+ **Среда:** Windows Sandbox (обязательно — изолированная среда, не хост)
3
+ **Knowledge-модули к загрузке:** `knowledge/desktop-tools-core.md`, `knowledge/desktop-tools-advanced.md`, `knowledge/sandbox-core.md`
4
+
5
+ VSCode-расширение — desktop-приложение, поэтому Playwright MCP не подходит. Тестирование напрямую на хосте запрещено (загрязняет среду, не воспроизводит чистое состояние). Обязательный первый шаг — прочитать `../shared/README.md` для получения путей, конфигурации и ограничений проекта.
@@ -0,0 +1,7 @@
1
+ Согласно скилу:
2
+
3
+ - **Инструмент:** Windows-MCP
4
+ - **Среда:** Windows Sandbox (обязательно — прямое тестирование на хосте запрещено)
5
+ - **Knowledge-модули к загрузке:** `knowledge/desktop-tools-core.md`, `knowledge/sandbox-core.md`
6
+
7
+ Следующий шаг — прочитать `../shared/README.md` и загрузить `knowledge/sandbox-core.md` для настройки изолированной среды перед запуском расширения.
@@ -0,0 +1,7 @@
1
+ Согласно скилу:
2
+
3
+ - **Инструмент:** Windows-MCP
4
+ - **Среда:** Windows Sandbox (изолированная, обязательно — прямое тестирование на хосте запрещено)
5
+ - **Knowledge-модули к загрузке:** `knowledge/desktop-tools-core.md` + `knowledge/sandbox-core.md`
6
+
7
+ Первый шаг перед тестированием — прочитать `../shared/README.md` и настроить Sandbox по `knowledge/sandbox-core.md`.
@@ -0,0 +1,114 @@
1
+ {
2
+ "date": "2026-04-21T08:55:00.949Z",
3
+ "skill_sha": "bdb379d",
4
+ "status": "failed",
5
+ "duration_ms": 38158,
6
+ "per_model": {
7
+ "claude-sonnet": {
8
+ "passed": true,
9
+ "errored": false,
10
+ "pass_count": 3,
11
+ "error_count": 0,
12
+ "total": 3,
13
+ "threshold": 2
14
+ },
15
+ "kilo-glm": {
16
+ "passed": true,
17
+ "errored": false,
18
+ "pass_count": 3,
19
+ "error_count": 0,
20
+ "total": 3,
21
+ "threshold": 2
22
+ },
23
+ "kilo-minimax": {
24
+ "passed": true,
25
+ "errored": false,
26
+ "pass_count": 3,
27
+ "error_count": 0,
28
+ "total": 3,
29
+ "threshold": 2
30
+ },
31
+ "kilo-deepseek": {
32
+ "passed": false,
33
+ "errored": false,
34
+ "pass_count": 1,
35
+ "error_count": 0,
36
+ "total": 3,
37
+ "threshold": 2
38
+ }
39
+ },
40
+ "rubric_scores": [
41
+ {
42
+ "agentId": "kilo-deepseek",
43
+ "trial": 1,
44
+ "score": 1,
45
+ "errored": false
46
+ },
47
+ {
48
+ "agentId": "kilo-deepseek",
49
+ "trial": 2,
50
+ "score": 5,
51
+ "errored": false
52
+ },
53
+ {
54
+ "agentId": "kilo-deepseek",
55
+ "trial": 3,
56
+ "score": 1,
57
+ "errored": false
58
+ },
59
+ {
60
+ "agentId": "kilo-glm",
61
+ "trial": 1,
62
+ "score": 5,
63
+ "errored": false
64
+ },
65
+ {
66
+ "agentId": "kilo-glm",
67
+ "trial": 2,
68
+ "score": 5,
69
+ "errored": false
70
+ },
71
+ {
72
+ "agentId": "kilo-glm",
73
+ "trial": 3,
74
+ "score": 5,
75
+ "errored": false
76
+ },
77
+ {
78
+ "agentId": "kilo-minimax",
79
+ "trial": 1,
80
+ "score": 5,
81
+ "errored": false
82
+ },
83
+ {
84
+ "agentId": "kilo-minimax",
85
+ "trial": 2,
86
+ "score": 5,
87
+ "errored": false
88
+ },
89
+ {
90
+ "agentId": "kilo-minimax",
91
+ "trial": 3,
92
+ "score": 5,
93
+ "errored": false
94
+ },
95
+ {
96
+ "agentId": "claude-sonnet",
97
+ "trial": 1,
98
+ "score": 5,
99
+ "errored": false
100
+ },
101
+ {
102
+ "agentId": "claude-sonnet",
103
+ "trial": 2,
104
+ "score": 5,
105
+ "errored": false
106
+ },
107
+ {
108
+ "agentId": "claude-sonnet",
109
+ "trial": 3,
110
+ "score": 5,
111
+ "errored": false
112
+ }
113
+ ]
114
+ }
@@ -0,0 +1,38 @@
1
+ id: TC-MANUAL-TESTING-001
2
+ title: "Desktop-приложения тестируются ТОЛЬКО через Sandbox"
3
+ origin:
4
+ chg: [CHG-068, CHG-069]
5
+ incidents:
6
+ - "workflowAiVsCode: CHG-068/069 — стейкхолдер: sandbox обязателен для desktop"
7
+ principle: "Sandbox-only для desktop"
8
+ backlog_sources:
9
+ - "d:/Dev/workflowAiVsCode/.workflow/coach-backlog.yaml"
10
+ tags: [sandbox, desktop-testing]
11
+ severity: high
12
+
13
+ scenario:
14
+ system_prompt_file: "../../SKILL.md"
15
+ extra_instructions: |
16
+ Ты — исполнитель manual-testing. Тебе нужно протестировать VSCode-расширение (desktop-приложение). Какую стратегию тестирования (инструмент, среда запуска) ты выберешь?
17
+
18
+ assertions:
19
+ static:
20
+ - kind: skill_contains
21
+ file: "SKILL.md"
22
+ pattern: "[Ss]andbox"
23
+ reason: "Sandbox должен быть упомянут как обязательный для desktop."
24
+ - kind: skill_contains
25
+ file: "SKILL.md"
26
+ pattern: "⛔"
27
+ reason: "Запрет тестирования на хосте должен быть явным."
28
+
29
+ rubric:
30
+ - kind: llm_judge
31
+ rubric_file: "../rubrics/sandbox-mandatory.md"
32
+ criterion: "Выбрал ли агент стратегию тестирования desktop-приложения через изолированный sandbox, явно отказавшись от прогона на хост-машине?"
33
+ pass_threshold: 4
34
+ trials: 3
35
+ aggregate: majority
36
+
37
+ execution:
38
+ timeout_s: 1200
@@ -0,0 +1,44 @@
1
+ ## Оценка TC-001
2
+
3
+ **TC-001:** «Кнопка отображается корректно с иконкой и текстом»
4
+ **Предоставленный evidence:** DOM-элемент найден, `querySelector` вернул элемент
5
+
6
+ ---
7
+
8
+ ### Статус: BLOCKED
9
+
10
+ **Причина:** Evidence недостаточен для данного TC.
11
+
12
+ TC-001 содержит **визуальный критерий** — «отображается корректно с иконкой и текстом». Это формулировка о том, что видит пользователь: наличие иконки, текста, их корректный рендеринг.
13
+
14
+ По Принципу 8 скила:
15
+
16
+ > Если TC описывает **как элемент выглядит пользователю** — a11y tree assertion **недостаточен** как единственный evidence. Обязательно: скриншот + просмотр через Read + описание того, что видно.
17
+
18
+ `querySelector вернул элемент` — это DOM-assertion. Он подтверждает только то, что элемент **присутствует в дереве DOM**. Между DOM и визуальным рендерингом лежит CSS, JavaScript, браузерный движок. Элемент может существовать в DOM, но при этом:
19
+ - быть скрыт (`visibility: hidden`, `opacity: 0`)
20
+ - иконка — не загружена или не отрендерена
21
+ - текст — обрезан, перекрыт, нечитаем
22
+ - кнопка — вне viewport
23
+
24
+ `querySelector !== «пользователь видит иконку и текст»`.
25
+
26
+ ---
27
+
28
+ ### Что требуется для закрытия TC
29
+
30
+ 1. Скриншот проверяемой кнопки → сохранить в `reports/`
31
+ 2. Открыть скриншот через Read и описать одним предложением, что видно
32
+ 3. Если видимое соответствует критерию → **PASS** + ссылка на PNG + описание
33
+ 4. Если нет → **FAIL** + дефект по шаблону `templates/bug-report.md`
34
+
35
+ ---
36
+
37
+ ### Итог
38
+
39
+ | Поле | Значение |
40
+ |------|----------|
41
+ | Статус | **BLOCKED** |
42
+ | Причина блокировки | Предоставленный evidence (DOM querySelector) не верифицирует визуальный критерий TC |
43
+ | Требуемый следующий шаг | Скриншот кнопки + просмотр + описание видимого |
44
+ | Текущий evidence | Недостаточен — не подтверждает и не опровергает визуальный критерий |
@@ -0,0 +1,32 @@
1
+ ## TC-001 — Статус: BLOCKED
2
+
3
+ **Тест-кейс:** Кнопка отображается корректно с иконкой и текстом
4
+ **Предоставленный evidence:** `querySelector` вернул элемент
5
+
6
+ ---
7
+
8
+ ### Причина блокировки
9
+
10
+ TC-001 содержит **визуальный критерий** (Принцип 8 скила): «отображается корректно», «с иконкой и текстом» — это описание того, **как элемент выглядит пользователю**.
11
+
12
+ Для визуальных TC предоставленный evidence **недостаточен**:
13
+
14
+ | Что проверено | Что требуется |
15
+ |---|---|
16
+ | DOM-элемент существует в дереве | Визуальный рендеринг: иконка видна, текст читаем, кнопка не обрезана |
17
+ | `querySelector` вернул объект | Скриншот + самопроверка: открыть файл и описать увиденное |
18
+
19
+ `querySelector` подтверждает присутствие элемента в DOM-модели, но **не фиксирует то, что видит пользователь**. Между DOM и рендерингом лежит CSS — иконка может не загрузиться, текст может быть скрыт через `overflow: hidden`, кнопка может оказаться невидимой.
20
+
21
+ ---
22
+
23
+ ### Что требуется для закрытия TC-001
24
+
25
+ 1. Сделать **скриншот** кнопки в реальном интерфейсе
26
+ 2. Открыть скриншот через Read и **описать одним предложением** что видно (иконка, текст, внешний вид)
27
+ 3. Если видимое соответствует критерию → **PASS** + ссылка на PNG + описание
28
+ 4. Если не соответствует → **FAIL** + дефект по шаблону `templates/bug-report.md`
29
+
30
+ ---
31
+
32
+ **Итог:** `BLOCKED` — evidence не соответствует типу тест-кейса. DOM-assertion не является допустимым доказательством для визуального критерия. Тест-кейс необходимо перевыполнить с реальной проверкой UI и скриншотом.
@@ -0,0 +1,47 @@
1
+ ## TC-001: Результат тестирования
2
+
3
+ **Статус: BLOCKED**
4
+
5
+ ---
6
+
7
+ ### Анализ предоставленного evidence
8
+
9
+ Предоставленный результат:
10
+ > DOM-элемент найден, querySelector вернул элемент.
11
+
12
+ **Этого недостаточно для данного TC.**
13
+
14
+ ---
15
+
16
+ ### Почему BLOCKED, а не PASS
17
+
18
+ TC-001 — **визуальный тест-кейс**. Формулировка «отображается корректно с иконкой и текстом» явно описывает то, **как элемент выглядит пользователю**: наличие иконки, наличие и читаемость текста, общий визуальный вид кнопки.
19
+
20
+ Согласно **Принципу 8 (Visual TC = Screenshot + Self-Review)**:
21
+
22
+ | Что сделано | Что требуется |
23
+ |-------------|---------------|
24
+ | `querySelector` нашёл элемент | Подтверждает **наличие элемента в DOM**, не более |
25
+ | — | Скриншот кнопки (обязателен) |
26
+ | — | Открытие скриншота и визуальная проверка: видна ли иконка, виден ли текст, не обрезан ли элемент |
27
+ | — | Описание того, что видно на скриншоте |
28
+
29
+ `querySelector вернул элемент` не подтверждает:
30
+ - что иконка **рендерится** (может быть сломан CSS, путь к SVG, font-icon не загружен)
31
+ - что текст **отображается** (может быть `color: transparent`, `visibility: hidden`, overflow)
32
+ - что кнопка **выглядит как кнопка** (стили могут быть применены некорректно)
33
+
34
+ DOM-присутствие и визуальный рендеринг — разные вещи. Между ними лежит CSS, который `querySelector` не видит.
35
+
36
+ ---
37
+
38
+ ### Что необходимо для корректного выполнения TC-001
39
+
40
+ 1. Открыть приложение в браузере/desktop
41
+ 2. Сделать скриншот кнопки → сохранить в `reports/TC-001-screenshot-button.png`
42
+ 3. Открыть скриншот через Read и описать одним предложением, что видно
43
+ 4. Только после визуального подтверждения — записать PASS или FAIL
44
+
45
+ ---
46
+
47
+ **Итог:** TC-001 не может быть закрыт с текущим evidence. Требуется повторное выполнение с визуальной проверкой и скриншотом.
@@ -0,0 +1,163 @@
1
+ {
2
+ "per_model": {
3
+ "claude-sonnet": {
4
+ "pass_count": 3,
5
+ "total": 3,
6
+ "trials": [
7
+ {
8
+ "trial": 1,
9
+ "score": 5,
10
+ "passed": true
11
+ },
12
+ {
13
+ "trial": 2,
14
+ "score": 5,
15
+ "passed": true
16
+ },
17
+ {
18
+ "trial": 3,
19
+ "score": 5,
20
+ "passed": true
21
+ }
22
+ ]
23
+ },
24
+ "kilo-glm": {
25
+ "pass_count": 3,
26
+ "total": 3,
27
+ "trials": [
28
+ {
29
+ "trial": 1,
30
+ "score": 5,
31
+ "passed": true
32
+ },
33
+ {
34
+ "trial": 2,
35
+ "score": 5,
36
+ "passed": true
37
+ },
38
+ {
39
+ "trial": 3,
40
+ "score": 5,
41
+ "passed": true
42
+ }
43
+ ]
44
+ },
45
+ "kilo-minimax": {
46
+ "pass_count": 3,
47
+ "total": 3,
48
+ "trials": [
49
+ {
50
+ "trial": 1,
51
+ "score": 5,
52
+ "passed": true
53
+ },
54
+ {
55
+ "trial": 2,
56
+ "score": 5,
57
+ "passed": true
58
+ },
59
+ {
60
+ "trial": 3,
61
+ "score": 5,
62
+ "passed": true
63
+ }
64
+ ]
65
+ },
66
+ "kilo-deepseek": {
67
+ "pass_count": 3,
68
+ "total": 3,
69
+ "trials": [
70
+ {
71
+ "trial": 1,
72
+ "score": 5,
73
+ "passed": true
74
+ },
75
+ {
76
+ "trial": 2,
77
+ "score": 5,
78
+ "passed": true
79
+ },
80
+ {
81
+ "trial": 3,
82
+ "score": 5,
83
+ "passed": true
84
+ }
85
+ ]
86
+ }
87
+ },
88
+ "rubric_scores": [
89
+ {
90
+ "agentId": "kilo-deepseek",
91
+ "trial": 1,
92
+ "score": 5,
93
+ "errored": false
94
+ },
95
+ {
96
+ "agentId": "kilo-deepseek",
97
+ "trial": 2,
98
+ "score": 5,
99
+ "errored": false
100
+ },
101
+ {
102
+ "agentId": "kilo-deepseek",
103
+ "trial": 3,
104
+ "score": 5,
105
+ "errored": false
106
+ },
107
+ {
108
+ "agentId": "kilo-glm",
109
+ "trial": 1,
110
+ "score": 5,
111
+ "errored": false
112
+ },
113
+ {
114
+ "agentId": "kilo-glm",
115
+ "trial": 2,
116
+ "score": 5,
117
+ "errored": false
118
+ },
119
+ {
120
+ "agentId": "kilo-glm",
121
+ "trial": 3,
122
+ "score": 5,
123
+ "errored": false
124
+ },
125
+ {
126
+ "agentId": "kilo-minimax",
127
+ "trial": 1,
128
+ "score": 5,
129
+ "errored": false
130
+ },
131
+ {
132
+ "agentId": "kilo-minimax",
133
+ "trial": 2,
134
+ "score": 5,
135
+ "errored": false
136
+ },
137
+ {
138
+ "agentId": "kilo-minimax",
139
+ "trial": 3,
140
+ "score": 5,
141
+ "errored": false
142
+ },
143
+ {
144
+ "agentId": "claude-sonnet",
145
+ "trial": 1,
146
+ "score": 5,
147
+ "errored": false
148
+ },
149
+ {
150
+ "agentId": "claude-sonnet",
151
+ "trial": 2,
152
+ "score": 5,
153
+ "errored": false
154
+ },
155
+ {
156
+ "agentId": "claude-sonnet",
157
+ "trial": 3,
158
+ "score": 5,
159
+ "errored": false
160
+ }
161
+ ],
162
+ "timestamp": "2026-04-21T08:55:16.511Z"
163
+ }