workflow-ai 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (599) hide show
  1. package/CHANGELOG.md +61 -0
  2. package/README.md +37 -377
  3. package/configs/pipeline.yaml +113 -4
  4. package/package.json +54 -44
  5. package/src/lib/operations/tickets.mjs +305 -207
  6. package/src/lib/utils.mjs +286 -286
  7. package/src/runner.mjs +316 -35
  8. package/src/scripts/check-conditions.js +2 -2
  9. package/src/scripts/check-relevance.js +3 -1
  10. package/src/scripts/get-next-id.js +144 -41
  11. package/src/scripts/mark-blocked.js +160 -0
  12. package/src/scripts/move-ticket.js +305 -83
  13. package/src/scripts/pick-next-task.js +782 -93
  14. package/src/skills/__test-cal-001-1777553217513/SKILL.md +2 -0
  15. package/src/skills/__test-runner-1777553217483/SKILL.md +5 -0
  16. package/src/skills/coach/SKILL.md +2 -2
  17. package/src/skills/execute-task/SKILL.md +1 -1
  18. package/src/skills/manual-testing/SKILL.md +2 -0
  19. package/src/skills/review-result/SKILL.md +23 -1
  20. package/src/scripts/tests/timeout-cascade.test.js +0 -28
  21. package/src/skills/analyze-report/README.md +0 -44
  22. package/src/skills/analyze-report/algorithms/progress-assessment.md +0 -108
  23. package/src/skills/analyze-report/knowledge/analysis-frameworks.md +0 -66
  24. package/src/skills/analyze-report/knowledge/report-structure.md +0 -61
  25. package/src/skills/analyze-report/scripts/calc-plan-metrics.js +0 -234
  26. package/src/skills/analyze-report/templates/analysis-report.md +0 -80
  27. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-1.md +0 -5
  28. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-2.md +0 -98
  29. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-3.md +0 -99
  30. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/judge.json +0 -163
  31. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -89
  32. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -88
  33. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -100
  34. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-1.md +0 -77
  35. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-2.md +0 -64
  36. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-3.md +0 -110
  37. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-1.md +0 -74
  38. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-2.md +0 -38
  39. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-3.md +0 -61
  40. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/meta.json +0 -115
  41. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001-evidence-from-log.yaml +0 -60
  42. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-1.md +0 -90
  43. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-2.md +0 -89
  44. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-3.md +0 -5
  45. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/judge.json +0 -163
  46. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-1.md +0 -84
  47. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-2.md +0 -77
  48. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-3.md +0 -89
  49. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-1.md +0 -103
  50. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-2.md +0 -103
  51. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-3.md +0 -103
  52. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-1.md +0 -93
  53. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-2.md +0 -93
  54. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-3.md +0 -86
  55. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/meta.json +0 -115
  56. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002-result-block-format.yaml +0 -44
  57. package/src/skills/analyze-report/tests/fixtures/REPORT-002-incorrect-attribution.md +0 -27
  58. package/src/skills/analyze-report/tests/fixtures/pipeline-2026-04-06_qa-001-skip.log +0 -32
  59. package/src/skills/analyze-report/tests/index.yaml +0 -25
  60. package/src/skills/analyze-report/tests/rubrics/evidence-from-log.md +0 -22
  61. package/src/skills/analyze-report/tests/rubrics/result-block-format.md +0 -22
  62. package/src/skills/analyze-report/workflows/progress.md +0 -158
  63. package/src/skills/analyze-report/workflows/retrospective.md +0 -143
  64. package/src/skills/coach/README.md +0 -43
  65. package/src/skills/coach/SKILL.md.legacy +0 -157
  66. package/src/skills/coach/algorithms/gap-analysis.md +0 -69
  67. package/src/skills/coach/algorithms/improvement-prioritization.md +0 -62
  68. package/src/skills/coach/algorithms/skill-scoring.md +0 -80
  69. package/src/skills/coach/knowledge/audit-applied-changes-clean.txt +0 -11
  70. package/src/skills/coach/knowledge/backlog-management.md +0 -67
  71. package/src/skills/coach/knowledge/backlog-management.md.legacy +0 -90
  72. package/src/skills/coach/knowledge/common-antipatterns.md +0 -76
  73. package/src/skills/coach/knowledge/prompt-engineering.md +0 -45
  74. package/src/skills/coach/knowledge/shared-knowledge-guide.md +0 -44
  75. package/src/skills/coach/knowledge/skill-anatomy.md +0 -49
  76. package/src/skills/coach/knowledge/test-authorship.md +0 -141
  77. package/src/skills/coach/templates/audit-report.md +0 -39
  78. package/src/skills/coach/templates/coach-backlog-init.yaml +0 -14
  79. package/src/skills/coach/templates/coach-backlog-init.yaml.legacy +0 -10
  80. package/src/skills/coach/templates/improvement-plan.md +0 -42
  81. package/src/skills/coach/templates/new-skill.md +0 -95
  82. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-1.md +0 -58
  83. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-2.md +0 -65
  84. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-3.md +0 -58
  85. package/src/skills/coach/tests/cases/TC-COACH-001/current/judge.json +0 -151
  86. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-1.md +0 -46
  87. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-2.md +0 -0
  88. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-3.md +0 -75
  89. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-1.md +0 -81
  90. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-2.md +0 -101
  91. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-3.md +0 -91
  92. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-1.md +0 -48
  93. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-2.md +0 -30
  94. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-3.md +0 -55
  95. package/src/skills/coach/tests/cases/TC-COACH-001/current/meta.json +0 -94
  96. package/src/skills/coach/tests/cases/TC-COACH-001-evidence-based-temporal-diagram.yaml +0 -53
  97. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-1.md +0 -46
  98. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-2.md +0 -50
  99. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-3.md +0 -48
  100. package/src/skills/coach/tests/cases/TC-COACH-002/current/judge.json +0 -151
  101. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-1.md +0 -0
  102. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-2.md +0 -37
  103. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-3.md +0 -30
  104. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-1.md +0 -23
  105. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-2.md +0 -29
  106. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-3.md +0 -35
  107. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-1.md +0 -13
  108. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-2.md +0 -19
  109. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-3.md +0 -33
  110. package/src/skills/coach/tests/cases/TC-COACH-002/current/meta.json +0 -94
  111. package/src/skills/coach/tests/cases/TC-COACH-002-root-cause-first.yaml +0 -57
  112. package/src/skills/coach/tests/fixtures/pipeline-2026-04-06_id-collision.log +0 -77
  113. package/src/skills/coach/tests/index.yaml +0 -29
  114. package/src/skills/coach/tests/rubrics/calibration/evidence-based-bad.md +0 -13
  115. package/src/skills/coach/tests/rubrics/calibration/evidence-based-good.md +0 -29
  116. package/src/skills/coach/tests/rubrics/evidence-based.md +0 -26
  117. package/src/skills/coach/tests/rubrics/root-cause-first.md +0 -21
  118. package/src/skills/coach/workflows/analyze.md +0 -79
  119. package/src/skills/coach/workflows/analyze.md.legacy +0 -64
  120. package/src/skills/coach/workflows/audit.md +0 -74
  121. package/src/skills/coach/workflows/audit.md.legacy +0 -59
  122. package/src/skills/coach/workflows/create.md +0 -80
  123. package/src/skills/coach/workflows/create.md.legacy +0 -67
  124. package/src/skills/coach/workflows/improve.md +0 -71
  125. package/src/skills/coach/workflows/improve.md.legacy +0 -60
  126. package/src/skills/coach/workflows/research.md +0 -55
  127. package/src/skills/coach/workflows/review.md +0 -52
  128. package/src/skills/coach/workflows/review.md.legacy +0 -48
  129. package/src/skills/coach/workflows/test.md +0 -97
  130. package/src/skills/create-plan/README.md +0 -39
  131. package/src/skills/create-plan/algorithms/risk-assessment.md +0 -73
  132. package/src/skills/create-plan/knowledge/plan-completeness.md +0 -67
  133. package/src/skills/create-plan/knowledge/plan-lifecycle.md +0 -33
  134. package/src/skills/create-plan/knowledge/task-verification-pairs.md +0 -151
  135. package/src/skills/create-plan/knowledge/test-hygiene.md +0 -47
  136. package/src/skills/create-plan/scripts/validate-completeness.js +0 -182
  137. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-1.md +0 -5
  138. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-2.md +0 -39
  139. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-3.md +0 -35
  140. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/judge.json +0 -167
  141. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -5
  142. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -10
  143. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -5
  144. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-1.md +0 -26
  145. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-2.md +0 -86
  146. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-3.md +0 -5
  147. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-1.md +0 -11
  148. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-2.md +0 -15
  149. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-3.md +0 -14
  150. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/meta.json +0 -119
  151. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001-validate-completeness.yaml +0 -41
  152. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-1.md +0 -25
  153. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-2.md +0 -30
  154. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-3.md +0 -37
  155. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/judge.json +0 -164
  156. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -3
  157. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -11
  158. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -13
  159. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-1.md +0 -44
  160. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-2.md +0 -5
  161. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-3.md +0 -49
  162. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-1.md +0 -6
  163. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-2.md +0 -11
  164. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-3.md +0 -16
  165. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/meta.json +0 -116
  166. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002-task-granularity.yaml +0 -39
  167. package/src/skills/create-plan/tests/index.yaml +0 -25
  168. package/src/skills/create-plan/tests/rubrics/task-granularity.md +0 -21
  169. package/src/skills/create-plan/tests/rubrics/validate-completeness.md +0 -21
  170. package/src/skills/create-plan/workflows/create.md +0 -136
  171. package/src/skills/create-report/README.md +0 -40
  172. package/src/skills/create-report/algorithms/metric-calculation.md +0 -93
  173. package/src/skills/create-report/knowledge/report-metrics.md +0 -82
  174. package/src/skills/create-report/scripts/calc-metrics.js +0 -383
  175. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-1.md +0 -25
  176. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-2.md +0 -26
  177. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-3.md +0 -28
  178. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/judge.json +0 -163
  179. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -4
  180. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -3
  181. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -6
  182. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-1.md +0 -8
  183. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-2.md +0 -12
  184. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-3.md +0 -7
  185. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-1.md +0 -12
  186. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-2.md +0 -22
  187. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-3.md +0 -13
  188. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/meta.json +0 -115
  189. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001-root-cause-attribution.yaml +0 -57
  190. package/src/skills/create-report/tests/index.yaml +0 -20
  191. package/src/skills/create-report/tests/rubrics/root-cause-attribution.md +0 -21
  192. package/src/skills/create-report/workflows/standard.md +0 -175
  193. package/src/skills/decompose-gaps/README.md +0 -39
  194. package/src/skills/decompose-gaps/algorithms/scope-check.md +0 -110
  195. package/src/skills/decompose-gaps/knowledge/scope-validation.md +0 -65
  196. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-1.md +0 -41
  197. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-2.md +0 -41
  198. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-3.md +0 -56
  199. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/judge.json +0 -164
  200. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-1.md +0 -25
  201. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-2.md +0 -17
  202. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-3.md +0 -22
  203. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-1.md +0 -25
  204. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-2.md +0 -5
  205. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-3.md +0 -29
  206. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-1.md +0 -27
  207. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-2.md +0 -35
  208. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-3.md +0 -18
  209. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/meta.json +0 -116
  210. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001-scope-exclusion.yaml +0 -46
  211. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-1.md +0 -27
  212. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-2.md +0 -30
  213. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-3.md +0 -27
  214. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/judge.json +0 -163
  215. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-1.md +0 -0
  216. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-2.md +0 -15
  217. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-3.md +0 -7
  218. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-1.md +0 -21
  219. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-2.md +0 -38
  220. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-3.md +0 -16
  221. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-1.md +0 -5
  222. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-2.md +0 -10
  223. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-3.md +0 -9
  224. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/meta.json +0 -115
  225. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002-glob-before-write.yaml +0 -36
  226. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-1.md +0 -30
  227. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-2.md +0 -30
  228. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-3.md +0 -30
  229. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/judge.json +0 -165
  230. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-1.md +0 -5
  231. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-2.md +0 -26
  232. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-3.md +0 -5
  233. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-1.md +0 -39
  234. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-2.md +0 -37
  235. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-3.md +0 -45
  236. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-1.md +0 -26
  237. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-2.md +0 -27
  238. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-3.md +0 -7
  239. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/meta.json +0 -117
  240. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003-parent-plan-mandatory.yaml +0 -41
  241. package/src/skills/decompose-gaps/tests/index.yaml +0 -30
  242. package/src/skills/decompose-gaps/tests/rubrics/glob-before-write.md +0 -21
  243. package/src/skills/decompose-gaps/tests/rubrics/parent-plan-mandatory.md +0 -22
  244. package/src/skills/decompose-gaps/tests/rubrics/scope-exclusion.md +0 -21
  245. package/src/skills/decompose-gaps/workflows/decompose.md +0 -123
  246. package/src/skills/decompose-plan/README.md +0 -43
  247. package/src/skills/decompose-plan/algorithms/deduplication.md +0 -101
  248. package/src/skills/decompose-plan/knowledge/atomicity-checklist.md +0 -139
  249. package/src/skills/decompose-plan/knowledge/capabilities.md +0 -68
  250. package/src/skills/decompose-plan/knowledge/human-task-rules.md +0 -82
  251. package/src/skills/decompose-plan/knowledge/scope-guard-checklist.md +0 -73
  252. package/src/skills/decompose-plan/scripts/check-atomicity-limit.js +0 -47
  253. package/src/skills/decompose-plan/scripts/check-duplicates.js +0 -323
  254. package/src/skills/decompose-plan/scripts/verify-atomicity.js +0 -408
  255. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-1.md +0 -30
  256. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-2.md +0 -36
  257. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-3.md +0 -37
  258. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/judge.json +0 -163
  259. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -20
  260. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -17
  261. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -28
  262. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-1.md +0 -114
  263. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-2.md +0 -137
  264. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-3.md +0 -188
  265. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-1.md +0 -0
  266. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-2.md +0 -32
  267. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-3.md +0 -110
  268. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/meta.json +0 -115
  269. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001-atomicity-no-1to1.yaml +0 -56
  270. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-1.md +0 -47
  271. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-2.md +0 -54
  272. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-3.md +0 -43
  273. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/judge.json +0 -163
  274. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -15
  275. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -5
  276. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -12
  277. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-1.md +0 -34
  278. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-2.md +0 -30
  279. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-3.md +0 -35
  280. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-1.md +0 -0
  281. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-2.md +0 -31
  282. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-3.md +0 -0
  283. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/meta.json +0 -115
  284. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002-get-next-id-mandatory.yaml +0 -44
  285. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-1.md +0 -21
  286. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-2.md +0 -38
  287. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-3.md +0 -30
  288. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/judge.json +0 -163
  289. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-1.md +0 -31
  290. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-2.md +0 -35
  291. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-3.md +0 -48
  292. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-1.md +0 -167
  293. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-2.md +0 -62
  294. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-3.md +0 -174
  295. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-1.md +0 -0
  296. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-2.md +0 -0
  297. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-3.md +0 -0
  298. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/meta.json +0 -115
  299. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003-verbatim-dod-transfer.yaml +0 -42
  300. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-1.md +0 -55
  301. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-2.md +0 -49
  302. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-3.md +0 -49
  303. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/judge.json +0 -163
  304. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-1.md +0 -104
  305. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-2.md +0 -45
  306. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-3.md +0 -58
  307. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-1.md +0 -193
  308. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-2.md +0 -202
  309. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-3.md +0 -155
  310. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-1.md +0 -52
  311. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-2.md +0 -17
  312. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-3.md +0 -0
  313. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/meta.json +0 -115
  314. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004-executor-atomicity.yaml +0 -64
  315. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-1.md +0 -59
  316. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-2.md +0 -204
  317. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-3.md +0 -213
  318. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/judge.json +0 -163
  319. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-1.md +0 -0
  320. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-2.md +0 -57
  321. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-3.md +0 -54
  322. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-1.md +0 -147
  323. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-2.md +0 -165
  324. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-3.md +0 -133
  325. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-1.md +0 -81
  326. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-2.md +0 -108
  327. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-3.md +0 -3
  328. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/meta.json +0 -114
  329. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005-capabilities-registry.yaml +0 -78
  330. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-1.md +0 -225
  331. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-2.md +0 -66
  332. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-3.md +0 -36
  333. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/judge.json +0 -163
  334. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-1.md +0 -42
  335. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-2.md +0 -67
  336. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-3.md +0 -40
  337. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-1.md +0 -122
  338. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-2.md +0 -131
  339. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-3.md +0 -138
  340. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-1.md +0 -41
  341. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-2.md +0 -88
  342. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-3.md +0 -0
  343. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/meta.json +0 -115
  344. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006-dod-threshold.yaml +0 -72
  345. package/src/skills/decompose-plan/tests/index.yaml +0 -45
  346. package/src/skills/decompose-plan/tests/rubrics/atomicity-no-1to1.md +0 -21
  347. package/src/skills/decompose-plan/tests/rubrics/capabilities-registry.md +0 -21
  348. package/src/skills/decompose-plan/tests/rubrics/dod-threshold.md +0 -21
  349. package/src/skills/decompose-plan/tests/rubrics/executor-atomicity.md +0 -21
  350. package/src/skills/decompose-plan/tests/rubrics/get-next-id-mandatory.md +0 -21
  351. package/src/skills/decompose-plan/tests/rubrics/verbatim-dod-transfer.md +0 -21
  352. package/src/skills/decompose-plan/workflows/decompose.md +0 -305
  353. package/src/skills/deep-research/README.md +0 -36
  354. package/src/skills/deep-research/algorithms/source-scoring.md +0 -63
  355. package/src/skills/deep-research/algorithms/synthesis.md +0 -67
  356. package/src/skills/deep-research/knowledge/data-validation.md +0 -44
  357. package/src/skills/deep-research/knowledge/perplexity-config.md +0 -30
  358. package/src/skills/deep-research/knowledge/research-methodology.md +0 -54
  359. package/src/skills/deep-research/knowledge/source-evaluation.md +0 -33
  360. package/src/skills/deep-research/scripts/perplexity-research.js +0 -315
  361. package/src/skills/deep-research/templates/brief-summary.md +0 -25
  362. package/src/skills/deep-research/templates/research-report.md +0 -76
  363. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-1.md +0 -48
  364. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-2.md +0 -88
  365. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-3.md +0 -56
  366. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/judge.json +0 -163
  367. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-1.md +0 -58
  368. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-2.md +0 -249
  369. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-3.md +0 -44
  370. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-1.md +0 -96
  371. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-2.md +0 -56
  372. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-3.md +0 -94
  373. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-1.md +0 -11
  374. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-2.md +0 -1
  375. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-3.md +0 -1
  376. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/meta.json +0 -115
  377. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001-self-check-url.yaml +0 -58
  378. package/src/skills/deep-research/tests/index.yaml +0 -20
  379. package/src/skills/deep-research/tests/rubrics/self-check-url.md +0 -34
  380. package/src/skills/deep-research/workflows/base-checklist.md +0 -19
  381. package/src/skills/deep-research/workflows/benchmark.md +0 -38
  382. package/src/skills/deep-research/workflows/competitor.md +0 -44
  383. package/src/skills/deep-research/workflows/custom.md +0 -32
  384. package/src/skills/deep-research/workflows/market.md +0 -44
  385. package/src/skills/deep-research/workflows/technology.md +0 -40
  386. package/src/skills/deep-research/workflows/trend.md +0 -40
  387. package/src/skills/execute-task/README.md +0 -44
  388. package/src/skills/execute-task/algorithms/execution-strategy.md +0 -136
  389. package/src/skills/execute-task/knowledge/context-checkpoints.md +0 -75
  390. package/src/skills/execute-task/knowledge/ticket-structure.md +0 -70
  391. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-1.md +0 -5
  392. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-2.md +0 -5
  393. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-3.md +0 -5
  394. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/judge.json +0 -124
  395. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-1.md +0 -4
  396. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-2.md +0 -4
  397. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-3.md +0 -4
  398. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-1.md +0 -4
  399. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-2.md +0 -4
  400. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-3.md +0 -11
  401. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/meta.json +0 -88
  402. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001-no-ticket-creation.yaml +0 -48
  403. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-1.md +0 -5
  404. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-2.md +0 -6
  405. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-3.md +0 -5
  406. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/judge.json +0 -124
  407. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-1.md +0 -4
  408. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-2.md +0 -4
  409. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-3.md +0 -8
  410. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-1.md +0 -9
  411. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-2.md +0 -26
  412. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-3.md +0 -4
  413. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/meta.json +0 -89
  414. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002-no-duplicate-dod.yaml +0 -44
  415. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-1.md +0 -5
  416. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-2.md +0 -5
  417. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-3.md +0 -5
  418. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/judge.json +0 -46
  419. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/meta.json +0 -37
  420. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003-verification-proportionality.yaml +0 -46
  421. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-1.md +0 -18
  422. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-2.md +0 -16
  423. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-3.md +0 -14
  424. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/judge.json +0 -124
  425. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-1.md +0 -5
  426. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-2.md +0 -5
  427. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-3.md +0 -1
  428. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-1.md +0 -8
  429. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-2.md +0 -5
  430. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-3.md +0 -4
  431. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/meta.json +0 -89
  432. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004-no-foreign-ticket-edit.yaml +0 -50
  433. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-1.md +0 -5
  434. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-2.md +0 -5
  435. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-3.md +0 -5
  436. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/judge.json +0 -124
  437. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-1.md +0 -15
  438. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-2.md +0 -4
  439. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-3.md +0 -5
  440. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-1.md +0 -11
  441. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-2.md +0 -11
  442. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-3.md +0 -4
  443. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/meta.json +0 -88
  444. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005-ticket-fields-updated.yaml +0 -39
  445. package/src/skills/execute-task/tests/fixtures/IMPL-902-create-file.md +0 -41
  446. package/src/skills/execute-task/tests/fixtures/IMPL-904-current-task.md +0 -40
  447. package/src/skills/execute-task/tests/fixtures/IMPL-906-fill-ticket.md +0 -42
  448. package/src/skills/execute-task/tests/fixtures/QA-901-button-click.md +0 -41
  449. package/src/skills/execute-task/tests/fixtures/QA-903-visual-figma.md +0 -40
  450. package/src/skills/execute-task/tests/fixtures/TASK-905-done-with-typo.md +0 -36
  451. package/src/skills/execute-task/tests/index.yaml +0 -39
  452. package/src/skills/execute-task/tests/rubrics/no-duplicate-dod.md +0 -22
  453. package/src/skills/execute-task/tests/rubrics/no-foreign-ticket-edit.md +0 -20
  454. package/src/skills/execute-task/tests/rubrics/no-ticket-creation.md +0 -21
  455. package/src/skills/execute-task/tests/rubrics/ticket-fields-updated.md +0 -23
  456. package/src/skills/execute-task/tests/rubrics/verification-proportionality.md +0 -22
  457. package/src/skills/execute-task/workflows/execute.md +0 -104
  458. package/src/skills/manual-testing/README.md +0 -63
  459. package/src/skills/manual-testing/algorithms/blocked-tool-strategy.md +0 -74
  460. package/src/skills/manual-testing/algorithms/bug-severity.md +0 -73
  461. package/src/skills/manual-testing/algorithms/mcp-budget.md +0 -97
  462. package/src/skills/manual-testing/algorithms/test-prioritization.md +0 -69
  463. package/src/skills/manual-testing/knowledge/browser-extension-testing.md +0 -102
  464. package/src/skills/manual-testing/knowledge/browser-tools.md +0 -114
  465. package/src/skills/manual-testing/knowledge/desktop-tools-advanced.md +0 -92
  466. package/src/skills/manual-testing/knowledge/desktop-tools-core.md +0 -76
  467. package/src/skills/manual-testing/knowledge/sandbox-advanced.md +0 -83
  468. package/src/skills/manual-testing/knowledge/sandbox-core.md +0 -67
  469. package/src/skills/manual-testing/knowledge/stateful-edge-cases.md +0 -69
  470. package/src/skills/manual-testing/knowledge/test-case-design.md +0 -107
  471. package/src/skills/manual-testing/knowledge/testing-types.md +0 -45
  472. package/src/skills/manual-testing/templates/bug-report.md +0 -52
  473. package/src/skills/manual-testing/templates/test-case.md +0 -34
  474. package/src/skills/manual-testing/templates/test-plan.md +0 -97
  475. package/src/skills/manual-testing/templates/test-session-report.md +0 -56
  476. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-1.md +0 -34
  477. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-2.md +0 -32
  478. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-3.md +0 -30
  479. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/judge.json +0 -163
  480. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-1.md +0 -0
  481. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-2.md +0 -7
  482. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-3.md +0 -0
  483. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-1.md +0 -4
  484. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-2.md +0 -15
  485. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-3.md +0 -8
  486. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-1.md +0 -5
  487. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-2.md +0 -7
  488. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-3.md +0 -7
  489. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/meta.json +0 -114
  490. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001-sandbox-mandatory.yaml +0 -38
  491. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-1.md +0 -44
  492. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-2.md +0 -32
  493. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-3.md +0 -47
  494. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/judge.json +0 -163
  495. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-1.md +0 -19
  496. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-2.md +0 -15
  497. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-3.md +0 -24
  498. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-1.md +0 -19
  499. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-2.md +0 -13
  500. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-3.md +0 -18
  501. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-1.md +0 -21
  502. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-2.md +0 -15
  503. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-3.md +0 -14
  504. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/meta.json +0 -114
  505. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002-visual-tc-screenshot.yaml +0 -37
  506. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-1.md +0 -76
  507. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-2.md +0 -71
  508. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-3.md +0 -85
  509. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/judge.json +0 -46
  510. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/meta.json +0 -36
  511. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml +0 -65
  512. package/src/skills/manual-testing/tests/index.yaml +0 -30
  513. package/src/skills/manual-testing/tests/last-run-tc001-sonnet.log +0 -140
  514. package/src/skills/manual-testing/tests/last-run-tc002.log +0 -1
  515. package/src/skills/manual-testing/tests/last-run.log +0 -1469
  516. package/src/skills/manual-testing/tests/rubrics/qa-non-ui-assertion.md +0 -31
  517. package/src/skills/manual-testing/tests/rubrics/sandbox-mandatory.md +0 -20
  518. package/src/skills/manual-testing/tests/rubrics/visual-tc-screenshot.md +0 -21
  519. package/src/skills/manual-testing/workflows/acceptance.md +0 -80
  520. package/src/skills/manual-testing/workflows/exploratory.md +0 -84
  521. package/src/skills/manual-testing/workflows/regression.md +0 -76
  522. package/src/skills/manual-testing/workflows/smoke.md +0 -109
  523. package/src/skills/manual-testing/workflows/test-plan.md +0 -75
  524. package/src/skills/review-result/README.md +0 -59
  525. package/src/skills/review-result/algorithms/verification.md +0 -112
  526. package/src/skills/review-result/knowledge/baseline-snapshot-validation.md +0 -67
  527. package/src/skills/review-result/knowledge/dod-patterns.md +0 -116
  528. package/src/skills/review-result/knowledge/test-hygiene.md +0 -44
  529. package/src/skills/review-result/scripts/verify-artifacts.js +0 -497
  530. package/src/skills/review-result/templates/verdict.md +0 -153
  531. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-1.md +0 -22
  532. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-2.md +0 -7
  533. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-3.md +0 -21
  534. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-1.md +0 -6
  535. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-2.md +0 -6
  536. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-3.md +0 -6
  537. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/judge.json +0 -164
  538. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-1.md +0 -5
  539. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-2.md +0 -7
  540. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-3.md +0 -6
  541. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-1.md +0 -49
  542. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-2.md +0 -28
  543. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-3.md +0 -37
  544. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-1.md +0 -22
  545. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-2.md +0 -13
  546. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-3.md +0 -21
  547. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/meta.json +0 -116
  548. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001-visual-tc-trigger.yaml +0 -51
  549. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-1.md +0 -23
  550. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-2.md +0 -22
  551. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-3.md +0 -28
  552. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-1.md +0 -4
  553. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-2.md +0 -4
  554. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-3.md +0 -4
  555. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/judge.json +0 -163
  556. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-1.md +0 -4
  557. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-2.md +0 -0
  558. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-3.md +0 -4
  559. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-1.md +0 -39
  560. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-2.md +0 -25
  561. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-3.md +0 -32
  562. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-1.md +0 -34
  563. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-2.md +0 -8
  564. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-3.md +0 -23
  565. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/meta.json +0 -115
  566. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002-path-line-suffix.yaml +0 -39
  567. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-1.md +0 -40
  568. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-2.md +0 -15
  569. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-3.md +0 -7
  570. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/judge.json +0 -163
  571. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-1.md +0 -5
  572. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-2.md +0 -5
  573. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-3.md +0 -11
  574. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-1.md +0 -16
  575. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-2.md +0 -18
  576. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-3.md +0 -17
  577. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-1.md +0 -17
  578. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-2.md +0 -31
  579. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-3.md +0 -5
  580. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/meta.json +0 -115
  581. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003-test-isolation.yaml +0 -50
  582. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-1.md +0 -5
  583. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-2.md +0 -5
  584. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-3.md +0 -6
  585. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/judge.json +0 -46
  586. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/meta.json +0 -37
  587. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004-baseline-snapshot.yaml +0 -50
  588. package/src/skills/review-result/tests/fixtures/IMPL-902-path-with-line.md +0 -43
  589. package/src/skills/review-result/tests/fixtures/QA-901-visual-button.md +0 -46
  590. package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/QA-904.md +0 -51
  591. package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs +0 -36
  592. package/src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/QA-905.md +0 -62
  593. package/src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/baseline.test.mjs +0 -124
  594. package/src/skills/review-result/tests/index.yaml +0 -35
  595. package/src/skills/review-result/tests/rubrics/baseline-snapshot.md +0 -20
  596. package/src/skills/review-result/tests/rubrics/path-line-suffix.md +0 -19
  597. package/src/skills/review-result/tests/rubrics/test-isolation.md +0 -20
  598. package/src/skills/review-result/tests/rubrics/visual-tc-trigger.md +0 -19
  599. package/src/skills/review-result/workflows/review.md +0 -209
@@ -1,163 +0,0 @@
1
- {
2
- "per_model": {
3
- "claude-sonnet": {
4
- "pass_count": 3,
5
- "total": 3,
6
- "trials": [
7
- {
8
- "trial": 1,
9
- "score": 5,
10
- "passed": true
11
- },
12
- {
13
- "trial": 2,
14
- "score": 5,
15
- "passed": true
16
- },
17
- {
18
- "trial": 3,
19
- "score": 5,
20
- "passed": true
21
- }
22
- ]
23
- },
24
- "kilo-glm": {
25
- "pass_count": 3,
26
- "total": 3,
27
- "trials": [
28
- {
29
- "trial": 1,
30
- "score": 5,
31
- "passed": true
32
- },
33
- {
34
- "trial": 2,
35
- "score": 5,
36
- "passed": true
37
- },
38
- {
39
- "trial": 3,
40
- "score": 5,
41
- "passed": true
42
- }
43
- ]
44
- },
45
- "kilo-minimax": {
46
- "pass_count": 3,
47
- "total": 3,
48
- "trials": [
49
- {
50
- "trial": 1,
51
- "score": 5,
52
- "passed": true
53
- },
54
- {
55
- "trial": 2,
56
- "score": 5,
57
- "passed": true
58
- },
59
- {
60
- "trial": 3,
61
- "score": 5,
62
- "passed": true
63
- }
64
- ]
65
- },
66
- "kilo-deepseek": {
67
- "pass_count": 1,
68
- "total": 3,
69
- "trials": [
70
- {
71
- "trial": 1,
72
- "score": 1,
73
- "passed": false
74
- },
75
- {
76
- "trial": 2,
77
- "score": 5,
78
- "passed": true
79
- },
80
- {
81
- "trial": 3,
82
- "score": 1,
83
- "passed": false
84
- }
85
- ]
86
- }
87
- },
88
- "rubric_scores": [
89
- {
90
- "agentId": "kilo-deepseek",
91
- "trial": 1,
92
- "score": 1,
93
- "errored": false
94
- },
95
- {
96
- "agentId": "kilo-deepseek",
97
- "trial": 2,
98
- "score": 5,
99
- "errored": false
100
- },
101
- {
102
- "agentId": "kilo-deepseek",
103
- "trial": 3,
104
- "score": 1,
105
- "errored": false
106
- },
107
- {
108
- "agentId": "kilo-glm",
109
- "trial": 1,
110
- "score": 5,
111
- "errored": false
112
- },
113
- {
114
- "agentId": "kilo-glm",
115
- "trial": 2,
116
- "score": 5,
117
- "errored": false
118
- },
119
- {
120
- "agentId": "kilo-glm",
121
- "trial": 3,
122
- "score": 5,
123
- "errored": false
124
- },
125
- {
126
- "agentId": "kilo-minimax",
127
- "trial": 1,
128
- "score": 5,
129
- "errored": false
130
- },
131
- {
132
- "agentId": "kilo-minimax",
133
- "trial": 2,
134
- "score": 5,
135
- "errored": false
136
- },
137
- {
138
- "agentId": "kilo-minimax",
139
- "trial": 3,
140
- "score": 5,
141
- "errored": false
142
- },
143
- {
144
- "agentId": "claude-sonnet",
145
- "trial": 1,
146
- "score": 5,
147
- "errored": false
148
- },
149
- {
150
- "agentId": "claude-sonnet",
151
- "trial": 2,
152
- "score": 5,
153
- "errored": false
154
- },
155
- {
156
- "agentId": "claude-sonnet",
157
- "trial": 3,
158
- "score": 5,
159
- "errored": false
160
- }
161
- ],
162
- "timestamp": "2026-04-21T08:55:00.948Z"
163
- }
@@ -1,7 +0,0 @@
1
- **Инструмент:** Windows‑MCP через **Sandbox** (изолированная среда).
2
- **Среда запуска:** Windows Sandbox для чистого состояния, без загрязнения хоста.
3
- **Знания:** `desktop‑tools‑core.md`, `sandbox‑core.md`.
4
- **Первый шаг:** загрузка `../shared/README.md` для проектных правил.
5
- **Артефакты:** скриншоты и evidence сохраняются в `reports/`.
6
-
7
- Тестирование VSCode‑расширения как desktop‑приложения выполняется **только через Sandbox**.
@@ -1,4 +0,0 @@
1
- **Инструмент:** Windows-MCP
2
- **Среда:** Windows Sandbox (изолированная среда)
3
-
4
- Desktop-приложения (включая VSCode-расширения) тестируются **только через Sandbox** — прямое тестирование на хосте запрещено, так как загрязняет рабочую среду и не воспроизводит чистое состояние установки. Потребуется загрузить `knowledge/desktop-tools-core.md` + `knowledge/sandbox-core.md`.
@@ -1,15 +0,0 @@
1
- **Инструмент:** Windows-MCP (команды навигации, кликов, Snapshot/a11y tree) **через Windows Sandbox**.
2
-
3
- **Обоснование:**
4
-
5
- 1. **VSCode-расширение = desktop-приложение** → согласно таблице выбора инструмента, основной инструмент — Windows-MCP через Sandbox, knowledge-модули: `knowledge/desktop-tools-core.md` + `knowledge/sandbox-core.md`.
6
-
7
- 2. **Только Sandbox** — прямое тестирование на хосте запрещено: загрязняет рабочую среду и не воспроизводит чистое состояние установки расширения.
8
-
9
- 3. **Порядок действий:**
10
- - Загрузить `../shared/README.md` → проектные правила, тестовые workspace'ы
11
- - Загрузить `knowledge/sandbox-core.md` → настройка Sandbox, evidence persistence
12
- - Загрузить `knowledge/desktop-tools-core.md` → команды Windows-MCP, паттерны навигации по VSCode
13
- - Настроить Sandbox с предустановленным VSCode + расширением
14
- - Выполнять тест-кейсы через Windows-MCP (запуск VSCode, навигация по UI, клики, ввод)
15
- - Evidence: a11y tree assertions через Snapshot, скриншоты для визуальных TC
@@ -1,8 +0,0 @@
1
- **Инструмент:** Windows-MCP через Sandbox (изолированная среда).
2
-
3
- **Обоснование:** VSCode-расширение — desktop-приложение. Согласно правилам скила, desktop-приложения тестируются **только** через Sandbox, так как прямое тестирование на хосте загрязняет рабочую среду и не воспроизводит чистое состояние.
4
-
5
- **Knowledge-модули для загрузки:**
6
- 1. `knowledge/desktop-tools-core.md` — команды Windows-MCP, паттерны навигации
7
- 2. `knowledge/sandbox-core.md` — настройка Sandbox, quick-start, персистенция evidence
8
- 3. `../shared/README.md` — проектные правила и конфигурация (обязательный первый шаг)
@@ -1,5 +0,0 @@
1
- **Инструмент:** Windows-MCP
2
- **Среда:** Windows Sandbox (обязательно — изолированная среда, не хост)
3
- **Knowledge-модули к загрузке:** `knowledge/desktop-tools-core.md`, `knowledge/desktop-tools-advanced.md`, `knowledge/sandbox-core.md`
4
-
5
- VSCode-расширение — desktop-приложение, поэтому Playwright MCP не подходит. Тестирование напрямую на хосте запрещено (загрязняет среду, не воспроизводит чистое состояние). Обязательный первый шаг — прочитать `../shared/README.md` для получения путей, конфигурации и ограничений проекта.
@@ -1,7 +0,0 @@
1
- Согласно скилу:
2
-
3
- - **Инструмент:** Windows-MCP
4
- - **Среда:** Windows Sandbox (обязательно — прямое тестирование на хосте запрещено)
5
- - **Knowledge-модули к загрузке:** `knowledge/desktop-tools-core.md`, `knowledge/sandbox-core.md`
6
-
7
- Следующий шаг — прочитать `../shared/README.md` и загрузить `knowledge/sandbox-core.md` для настройки изолированной среды перед запуском расширения.
@@ -1,7 +0,0 @@
1
- Согласно скилу:
2
-
3
- - **Инструмент:** Windows-MCP
4
- - **Среда:** Windows Sandbox (изолированная, обязательно — прямое тестирование на хосте запрещено)
5
- - **Knowledge-модули к загрузке:** `knowledge/desktop-tools-core.md` + `knowledge/sandbox-core.md`
6
-
7
- Первый шаг перед тестированием — прочитать `../shared/README.md` и настроить Sandbox по `knowledge/sandbox-core.md`.
@@ -1,114 +0,0 @@
1
- {
2
- "date": "2026-04-21T08:55:00.949Z",
3
- "skill_sha": "bdb379d",
4
- "status": "failed",
5
- "duration_ms": 38158,
6
- "per_model": {
7
- "claude-sonnet": {
8
- "passed": true,
9
- "errored": false,
10
- "pass_count": 3,
11
- "error_count": 0,
12
- "total": 3,
13
- "threshold": 2
14
- },
15
- "kilo-glm": {
16
- "passed": true,
17
- "errored": false,
18
- "pass_count": 3,
19
- "error_count": 0,
20
- "total": 3,
21
- "threshold": 2
22
- },
23
- "kilo-minimax": {
24
- "passed": true,
25
- "errored": false,
26
- "pass_count": 3,
27
- "error_count": 0,
28
- "total": 3,
29
- "threshold": 2
30
- },
31
- "kilo-deepseek": {
32
- "passed": false,
33
- "errored": false,
34
- "pass_count": 1,
35
- "error_count": 0,
36
- "total": 3,
37
- "threshold": 2
38
- }
39
- },
40
- "rubric_scores": [
41
- {
42
- "agentId": "kilo-deepseek",
43
- "trial": 1,
44
- "score": 1,
45
- "errored": false
46
- },
47
- {
48
- "agentId": "kilo-deepseek",
49
- "trial": 2,
50
- "score": 5,
51
- "errored": false
52
- },
53
- {
54
- "agentId": "kilo-deepseek",
55
- "trial": 3,
56
- "score": 1,
57
- "errored": false
58
- },
59
- {
60
- "agentId": "kilo-glm",
61
- "trial": 1,
62
- "score": 5,
63
- "errored": false
64
- },
65
- {
66
- "agentId": "kilo-glm",
67
- "trial": 2,
68
- "score": 5,
69
- "errored": false
70
- },
71
- {
72
- "agentId": "kilo-glm",
73
- "trial": 3,
74
- "score": 5,
75
- "errored": false
76
- },
77
- {
78
- "agentId": "kilo-minimax",
79
- "trial": 1,
80
- "score": 5,
81
- "errored": false
82
- },
83
- {
84
- "agentId": "kilo-minimax",
85
- "trial": 2,
86
- "score": 5,
87
- "errored": false
88
- },
89
- {
90
- "agentId": "kilo-minimax",
91
- "trial": 3,
92
- "score": 5,
93
- "errored": false
94
- },
95
- {
96
- "agentId": "claude-sonnet",
97
- "trial": 1,
98
- "score": 5,
99
- "errored": false
100
- },
101
- {
102
- "agentId": "claude-sonnet",
103
- "trial": 2,
104
- "score": 5,
105
- "errored": false
106
- },
107
- {
108
- "agentId": "claude-sonnet",
109
- "trial": 3,
110
- "score": 5,
111
- "errored": false
112
- }
113
- ]
114
- }
@@ -1,38 +0,0 @@
1
- id: TC-MANUAL-TESTING-001
2
- title: "Desktop-приложения тестируются ТОЛЬКО через Sandbox"
3
- origin:
4
- chg: [CHG-068, CHG-069]
5
- incidents:
6
- - "workflowAiVsCode: CHG-068/069 — стейкхолдер: sandbox обязателен для desktop"
7
- principle: "Sandbox-only для desktop"
8
- backlog_sources:
9
- - "d:/Dev/workflowAiVsCode/.workflow/coach-backlog.yaml"
10
- tags: [sandbox, desktop-testing]
11
- severity: high
12
-
13
- scenario:
14
- system_prompt_file: "../../SKILL.md"
15
- extra_instructions: |
16
- Ты — исполнитель manual-testing. Тебе нужно протестировать VSCode-расширение (desktop-приложение). Какую стратегию тестирования (инструмент, среда запуска) ты выберешь?
17
-
18
- assertions:
19
- static:
20
- - kind: skill_contains
21
- file: "SKILL.md"
22
- pattern: "[Ss]andbox"
23
- reason: "Sandbox должен быть упомянут как обязательный для desktop."
24
- - kind: skill_contains
25
- file: "SKILL.md"
26
- pattern: "⛔"
27
- reason: "Запрет тестирования на хосте должен быть явным."
28
-
29
- rubric:
30
- - kind: llm_judge
31
- rubric_file: "../rubrics/sandbox-mandatory.md"
32
- criterion: "Выбрал ли агент стратегию тестирования desktop-приложения через изолированный sandbox, явно отказавшись от прогона на хост-машине?"
33
- pass_threshold: 4
34
- trials: 3
35
- aggregate: majority
36
-
37
- execution:
38
- timeout_s: 1200
@@ -1,44 +0,0 @@
1
- ## Оценка TC-001
2
-
3
- **TC-001:** «Кнопка отображается корректно с иконкой и текстом»
4
- **Предоставленный evidence:** DOM-элемент найден, `querySelector` вернул элемент
5
-
6
- ---
7
-
8
- ### Статус: BLOCKED
9
-
10
- **Причина:** Evidence недостаточен для данного TC.
11
-
12
- TC-001 содержит **визуальный критерий** — «отображается корректно с иконкой и текстом». Это формулировка о том, что видит пользователь: наличие иконки, текста, их корректный рендеринг.
13
-
14
- По Принципу 8 скила:
15
-
16
- > Если TC описывает **как элемент выглядит пользователю** — a11y tree assertion **недостаточен** как единственный evidence. Обязательно: скриншот + просмотр через Read + описание того, что видно.
17
-
18
- `querySelector вернул элемент` — это DOM-assertion. Он подтверждает только то, что элемент **присутствует в дереве DOM**. Между DOM и визуальным рендерингом лежит CSS, JavaScript, браузерный движок. Элемент может существовать в DOM, но при этом:
19
- - быть скрыт (`visibility: hidden`, `opacity: 0`)
20
- - иконка — не загружена или не отрендерена
21
- - текст — обрезан, перекрыт, нечитаем
22
- - кнопка — вне viewport
23
-
24
- `querySelector !== «пользователь видит иконку и текст»`.
25
-
26
- ---
27
-
28
- ### Что требуется для закрытия TC
29
-
30
- 1. Скриншот проверяемой кнопки → сохранить в `reports/`
31
- 2. Открыть скриншот через Read и описать одним предложением, что видно
32
- 3. Если видимое соответствует критерию → **PASS** + ссылка на PNG + описание
33
- 4. Если нет → **FAIL** + дефект по шаблону `templates/bug-report.md`
34
-
35
- ---
36
-
37
- ### Итог
38
-
39
- | Поле | Значение |
40
- |------|----------|
41
- | Статус | **BLOCKED** |
42
- | Причина блокировки | Предоставленный evidence (DOM querySelector) не верифицирует визуальный критерий TC |
43
- | Требуемый следующий шаг | Скриншот кнопки + просмотр + описание видимого |
44
- | Текущий evidence | Недостаточен — не подтверждает и не опровергает визуальный критерий |
@@ -1,32 +0,0 @@
1
- ## TC-001 — Статус: BLOCKED
2
-
3
- **Тест-кейс:** Кнопка отображается корректно с иконкой и текстом
4
- **Предоставленный evidence:** `querySelector` вернул элемент
5
-
6
- ---
7
-
8
- ### Причина блокировки
9
-
10
- TC-001 содержит **визуальный критерий** (Принцип 8 скила): «отображается корректно», «с иконкой и текстом» — это описание того, **как элемент выглядит пользователю**.
11
-
12
- Для визуальных TC предоставленный evidence **недостаточен**:
13
-
14
- | Что проверено | Что требуется |
15
- |---|---|
16
- | DOM-элемент существует в дереве | Визуальный рендеринг: иконка видна, текст читаем, кнопка не обрезана |
17
- | `querySelector` вернул объект | Скриншот + самопроверка: открыть файл и описать увиденное |
18
-
19
- `querySelector` подтверждает присутствие элемента в DOM-модели, но **не фиксирует то, что видит пользователь**. Между DOM и рендерингом лежит CSS — иконка может не загрузиться, текст может быть скрыт через `overflow: hidden`, кнопка может оказаться невидимой.
20
-
21
- ---
22
-
23
- ### Что требуется для закрытия TC-001
24
-
25
- 1. Сделать **скриншот** кнопки в реальном интерфейсе
26
- 2. Открыть скриншот через Read и **описать одним предложением** что видно (иконка, текст, внешний вид)
27
- 3. Если видимое соответствует критерию → **PASS** + ссылка на PNG + описание
28
- 4. Если не соответствует → **FAIL** + дефект по шаблону `templates/bug-report.md`
29
-
30
- ---
31
-
32
- **Итог:** `BLOCKED` — evidence не соответствует типу тест-кейса. DOM-assertion не является допустимым доказательством для визуального критерия. Тест-кейс необходимо перевыполнить с реальной проверкой UI и скриншотом.
@@ -1,47 +0,0 @@
1
- ## TC-001: Результат тестирования
2
-
3
- **Статус: BLOCKED**
4
-
5
- ---
6
-
7
- ### Анализ предоставленного evidence
8
-
9
- Предоставленный результат:
10
- > DOM-элемент найден, querySelector вернул элемент.
11
-
12
- **Этого недостаточно для данного TC.**
13
-
14
- ---
15
-
16
- ### Почему BLOCKED, а не PASS
17
-
18
- TC-001 — **визуальный тест-кейс**. Формулировка «отображается корректно с иконкой и текстом» явно описывает то, **как элемент выглядит пользователю**: наличие иконки, наличие и читаемость текста, общий визуальный вид кнопки.
19
-
20
- Согласно **Принципу 8 (Visual TC = Screenshot + Self-Review)**:
21
-
22
- | Что сделано | Что требуется |
23
- |-------------|---------------|
24
- | `querySelector` нашёл элемент | Подтверждает **наличие элемента в DOM**, не более |
25
- | — | Скриншот кнопки (обязателен) |
26
- | — | Открытие скриншота и визуальная проверка: видна ли иконка, виден ли текст, не обрезан ли элемент |
27
- | — | Описание того, что видно на скриншоте |
28
-
29
- `querySelector вернул элемент` не подтверждает:
30
- - что иконка **рендерится** (может быть сломан CSS, путь к SVG, font-icon не загружен)
31
- - что текст **отображается** (может быть `color: transparent`, `visibility: hidden`, overflow)
32
- - что кнопка **выглядит как кнопка** (стили могут быть применены некорректно)
33
-
34
- DOM-присутствие и визуальный рендеринг — разные вещи. Между ними лежит CSS, который `querySelector` не видит.
35
-
36
- ---
37
-
38
- ### Что необходимо для корректного выполнения TC-001
39
-
40
- 1. Открыть приложение в браузере/desktop
41
- 2. Сделать скриншот кнопки → сохранить в `reports/TC-001-screenshot-button.png`
42
- 3. Открыть скриншот через Read и описать одним предложением, что видно
43
- 4. Только после визуального подтверждения — записать PASS или FAIL
44
-
45
- ---
46
-
47
- **Итог:** TC-001 не может быть закрыт с текущим evidence. Требуется повторное выполнение с визуальной проверкой и скриншотом.
@@ -1,163 +0,0 @@
1
- {
2
- "per_model": {
3
- "claude-sonnet": {
4
- "pass_count": 3,
5
- "total": 3,
6
- "trials": [
7
- {
8
- "trial": 1,
9
- "score": 5,
10
- "passed": true
11
- },
12
- {
13
- "trial": 2,
14
- "score": 5,
15
- "passed": true
16
- },
17
- {
18
- "trial": 3,
19
- "score": 5,
20
- "passed": true
21
- }
22
- ]
23
- },
24
- "kilo-glm": {
25
- "pass_count": 3,
26
- "total": 3,
27
- "trials": [
28
- {
29
- "trial": 1,
30
- "score": 5,
31
- "passed": true
32
- },
33
- {
34
- "trial": 2,
35
- "score": 5,
36
- "passed": true
37
- },
38
- {
39
- "trial": 3,
40
- "score": 5,
41
- "passed": true
42
- }
43
- ]
44
- },
45
- "kilo-minimax": {
46
- "pass_count": 3,
47
- "total": 3,
48
- "trials": [
49
- {
50
- "trial": 1,
51
- "score": 5,
52
- "passed": true
53
- },
54
- {
55
- "trial": 2,
56
- "score": 5,
57
- "passed": true
58
- },
59
- {
60
- "trial": 3,
61
- "score": 5,
62
- "passed": true
63
- }
64
- ]
65
- },
66
- "kilo-deepseek": {
67
- "pass_count": 3,
68
- "total": 3,
69
- "trials": [
70
- {
71
- "trial": 1,
72
- "score": 5,
73
- "passed": true
74
- },
75
- {
76
- "trial": 2,
77
- "score": 5,
78
- "passed": true
79
- },
80
- {
81
- "trial": 3,
82
- "score": 5,
83
- "passed": true
84
- }
85
- ]
86
- }
87
- },
88
- "rubric_scores": [
89
- {
90
- "agentId": "kilo-deepseek",
91
- "trial": 1,
92
- "score": 5,
93
- "errored": false
94
- },
95
- {
96
- "agentId": "kilo-deepseek",
97
- "trial": 2,
98
- "score": 5,
99
- "errored": false
100
- },
101
- {
102
- "agentId": "kilo-deepseek",
103
- "trial": 3,
104
- "score": 5,
105
- "errored": false
106
- },
107
- {
108
- "agentId": "kilo-glm",
109
- "trial": 1,
110
- "score": 5,
111
- "errored": false
112
- },
113
- {
114
- "agentId": "kilo-glm",
115
- "trial": 2,
116
- "score": 5,
117
- "errored": false
118
- },
119
- {
120
- "agentId": "kilo-glm",
121
- "trial": 3,
122
- "score": 5,
123
- "errored": false
124
- },
125
- {
126
- "agentId": "kilo-minimax",
127
- "trial": 1,
128
- "score": 5,
129
- "errored": false
130
- },
131
- {
132
- "agentId": "kilo-minimax",
133
- "trial": 2,
134
- "score": 5,
135
- "errored": false
136
- },
137
- {
138
- "agentId": "kilo-minimax",
139
- "trial": 3,
140
- "score": 5,
141
- "errored": false
142
- },
143
- {
144
- "agentId": "claude-sonnet",
145
- "trial": 1,
146
- "score": 5,
147
- "errored": false
148
- },
149
- {
150
- "agentId": "claude-sonnet",
151
- "trial": 2,
152
- "score": 5,
153
- "errored": false
154
- },
155
- {
156
- "agentId": "claude-sonnet",
157
- "trial": 3,
158
- "score": 5,
159
- "errored": false
160
- }
161
- ],
162
- "timestamp": "2026-04-21T08:55:16.511Z"
163
- }