workflow-ai 1.0.68 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (585) hide show
  1. package/CHANGELOG.md +49 -0
  2. package/README.md +105 -7
  3. package/configs/pipeline.yaml +23 -2
  4. package/package.json +44 -41
  5. package/src/lib/operations/plans.mjs +85 -0
  6. package/src/lib/operations/skills.mjs +124 -0
  7. package/src/lib/operations/tickets.mjs +430 -0
  8. package/src/lib/utils.mjs +286 -286
  9. package/src/runner.mjs +314 -34
  10. package/src/scripts/check-conditions.js +2 -2
  11. package/src/scripts/get-next-id.js +8 -31
  12. package/src/scripts/pick-next-task.js +3 -9
  13. package/src/skills/coach/SKILL.md +1 -1
  14. package/src/skills/manual-testing/SKILL.md +2 -0
  15. package/src/skills/review-result/SKILL.md +1 -0
  16. package/src/scripts/tests/timeout-cascade.test.js +0 -28
  17. package/src/skills/analyze-report/README.md +0 -44
  18. package/src/skills/analyze-report/algorithms/progress-assessment.md +0 -108
  19. package/src/skills/analyze-report/knowledge/analysis-frameworks.md +0 -66
  20. package/src/skills/analyze-report/knowledge/report-structure.md +0 -61
  21. package/src/skills/analyze-report/scripts/calc-plan-metrics.js +0 -234
  22. package/src/skills/analyze-report/templates/analysis-report.md +0 -80
  23. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-1.md +0 -69
  24. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-2.md +0 -103
  25. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-3.md +0 -99
  26. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/judge.json +0 -163
  27. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -89
  28. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -88
  29. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -100
  30. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-1.md +0 -77
  31. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-2.md +0 -64
  32. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-3.md +0 -110
  33. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-1.md +0 -74
  34. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-2.md +0 -38
  35. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-3.md +0 -61
  36. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/meta.json +0 -115
  37. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001-evidence-from-log.yaml +0 -60
  38. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-1.md +0 -90
  39. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-2.md +0 -89
  40. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-3.md +0 -77
  41. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/judge.json +0 -163
  42. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-1.md +0 -84
  43. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-2.md +0 -77
  44. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-3.md +0 -89
  45. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-1.md +0 -103
  46. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-2.md +0 -103
  47. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-3.md +0 -103
  48. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-1.md +0 -93
  49. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-2.md +0 -93
  50. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-3.md +0 -86
  51. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/meta.json +0 -115
  52. package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002-result-block-format.yaml +0 -44
  53. package/src/skills/analyze-report/tests/fixtures/REPORT-002-incorrect-attribution.md +0 -27
  54. package/src/skills/analyze-report/tests/fixtures/pipeline-2026-04-06_qa-001-skip.log +0 -32
  55. package/src/skills/analyze-report/tests/index.yaml +0 -25
  56. package/src/skills/analyze-report/tests/rubrics/evidence-from-log.md +0 -22
  57. package/src/skills/analyze-report/tests/rubrics/result-block-format.md +0 -22
  58. package/src/skills/analyze-report/workflows/progress.md +0 -158
  59. package/src/skills/analyze-report/workflows/retrospective.md +0 -143
  60. package/src/skills/coach/README.md +0 -43
  61. package/src/skills/coach/SKILL.md.legacy +0 -157
  62. package/src/skills/coach/algorithms/gap-analysis.md +0 -69
  63. package/src/skills/coach/algorithms/improvement-prioritization.md +0 -62
  64. package/src/skills/coach/algorithms/skill-scoring.md +0 -80
  65. package/src/skills/coach/knowledge/audit-applied-changes-clean.txt +0 -11
  66. package/src/skills/coach/knowledge/backlog-management.md +0 -67
  67. package/src/skills/coach/knowledge/backlog-management.md.legacy +0 -90
  68. package/src/skills/coach/knowledge/common-antipatterns.md +0 -76
  69. package/src/skills/coach/knowledge/prompt-engineering.md +0 -45
  70. package/src/skills/coach/knowledge/shared-knowledge-guide.md +0 -44
  71. package/src/skills/coach/knowledge/skill-anatomy.md +0 -49
  72. package/src/skills/coach/knowledge/test-authorship.md +0 -141
  73. package/src/skills/coach/templates/audit-report.md +0 -39
  74. package/src/skills/coach/templates/coach-backlog-init.yaml +0 -14
  75. package/src/skills/coach/templates/coach-backlog-init.yaml.legacy +0 -10
  76. package/src/skills/coach/templates/improvement-plan.md +0 -42
  77. package/src/skills/coach/templates/new-skill.md +0 -95
  78. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-1.md +0 -58
  79. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-2.md +0 -65
  80. package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-3.md +0 -58
  81. package/src/skills/coach/tests/cases/TC-COACH-001/current/judge.json +0 -151
  82. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-1.md +0 -46
  83. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-2.md +0 -0
  84. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-3.md +0 -75
  85. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-1.md +0 -81
  86. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-2.md +0 -101
  87. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-3.md +0 -91
  88. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-1.md +0 -48
  89. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-2.md +0 -30
  90. package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-3.md +0 -55
  91. package/src/skills/coach/tests/cases/TC-COACH-001/current/meta.json +0 -94
  92. package/src/skills/coach/tests/cases/TC-COACH-001-evidence-based-temporal-diagram.yaml +0 -53
  93. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-1.md +0 -46
  94. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-2.md +0 -50
  95. package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-3.md +0 -48
  96. package/src/skills/coach/tests/cases/TC-COACH-002/current/judge.json +0 -151
  97. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-1.md +0 -0
  98. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-2.md +0 -37
  99. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-3.md +0 -30
  100. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-1.md +0 -23
  101. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-2.md +0 -29
  102. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-3.md +0 -35
  103. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-1.md +0 -13
  104. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-2.md +0 -19
  105. package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-3.md +0 -33
  106. package/src/skills/coach/tests/cases/TC-COACH-002/current/meta.json +0 -94
  107. package/src/skills/coach/tests/cases/TC-COACH-002-root-cause-first.yaml +0 -57
  108. package/src/skills/coach/tests/fixtures/pipeline-2026-04-06_id-collision.log +0 -77
  109. package/src/skills/coach/tests/index.yaml +0 -29
  110. package/src/skills/coach/tests/rubrics/calibration/evidence-based-bad.md +0 -13
  111. package/src/skills/coach/tests/rubrics/calibration/evidence-based-good.md +0 -29
  112. package/src/skills/coach/tests/rubrics/evidence-based.md +0 -26
  113. package/src/skills/coach/tests/rubrics/root-cause-first.md +0 -21
  114. package/src/skills/coach/workflows/analyze.md +0 -79
  115. package/src/skills/coach/workflows/analyze.md.legacy +0 -64
  116. package/src/skills/coach/workflows/audit.md +0 -74
  117. package/src/skills/coach/workflows/audit.md.legacy +0 -59
  118. package/src/skills/coach/workflows/create.md +0 -80
  119. package/src/skills/coach/workflows/create.md.legacy +0 -67
  120. package/src/skills/coach/workflows/improve.md +0 -71
  121. package/src/skills/coach/workflows/improve.md.legacy +0 -60
  122. package/src/skills/coach/workflows/research.md +0 -55
  123. package/src/skills/coach/workflows/review.md +0 -52
  124. package/src/skills/coach/workflows/review.md.legacy +0 -48
  125. package/src/skills/coach/workflows/test.md +0 -97
  126. package/src/skills/create-plan/README.md +0 -39
  127. package/src/skills/create-plan/algorithms/risk-assessment.md +0 -73
  128. package/src/skills/create-plan/knowledge/plan-completeness.md +0 -67
  129. package/src/skills/create-plan/knowledge/plan-lifecycle.md +0 -33
  130. package/src/skills/create-plan/knowledge/task-verification-pairs.md +0 -151
  131. package/src/skills/create-plan/knowledge/test-hygiene.md +0 -47
  132. package/src/skills/create-plan/scripts/validate-completeness.js +0 -182
  133. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-1.md +0 -5
  134. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-2.md +0 -39
  135. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-3.md +0 -35
  136. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/judge.json +0 -167
  137. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -5
  138. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -10
  139. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -5
  140. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-1.md +0 -26
  141. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-2.md +0 -86
  142. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-3.md +0 -5
  143. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-1.md +0 -11
  144. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-2.md +0 -15
  145. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-3.md +0 -14
  146. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/meta.json +0 -119
  147. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001-validate-completeness.yaml +0 -41
  148. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-1.md +0 -25
  149. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-2.md +0 -30
  150. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-3.md +0 -37
  151. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/judge.json +0 -164
  152. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -3
  153. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -11
  154. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -13
  155. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-1.md +0 -44
  156. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-2.md +0 -5
  157. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-3.md +0 -49
  158. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-1.md +0 -6
  159. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-2.md +0 -11
  160. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-3.md +0 -16
  161. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/meta.json +0 -116
  162. package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002-task-granularity.yaml +0 -39
  163. package/src/skills/create-plan/tests/index.yaml +0 -25
  164. package/src/skills/create-plan/tests/rubrics/task-granularity.md +0 -21
  165. package/src/skills/create-plan/tests/rubrics/validate-completeness.md +0 -21
  166. package/src/skills/create-plan/workflows/create.md +0 -136
  167. package/src/skills/create-report/README.md +0 -40
  168. package/src/skills/create-report/algorithms/metric-calculation.md +0 -93
  169. package/src/skills/create-report/knowledge/report-metrics.md +0 -82
  170. package/src/skills/create-report/scripts/calc-metrics.js +0 -383
  171. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-1.md +0 -25
  172. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-2.md +0 -26
  173. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-3.md +0 -28
  174. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/judge.json +0 -163
  175. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -4
  176. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -3
  177. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -6
  178. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-1.md +0 -8
  179. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-2.md +0 -12
  180. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-3.md +0 -7
  181. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-1.md +0 -12
  182. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-2.md +0 -22
  183. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-3.md +0 -13
  184. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/meta.json +0 -115
  185. package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001-root-cause-attribution.yaml +0 -57
  186. package/src/skills/create-report/tests/index.yaml +0 -20
  187. package/src/skills/create-report/tests/rubrics/root-cause-attribution.md +0 -21
  188. package/src/skills/create-report/workflows/standard.md +0 -175
  189. package/src/skills/decompose-gaps/README.md +0 -39
  190. package/src/skills/decompose-gaps/algorithms/scope-check.md +0 -110
  191. package/src/skills/decompose-gaps/knowledge/scope-validation.md +0 -65
  192. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-1.md +0 -41
  193. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-2.md +0 -41
  194. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-3.md +0 -56
  195. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/judge.json +0 -164
  196. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-1.md +0 -25
  197. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-2.md +0 -17
  198. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-3.md +0 -22
  199. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-1.md +0 -25
  200. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-2.md +0 -5
  201. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-3.md +0 -29
  202. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-1.md +0 -27
  203. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-2.md +0 -35
  204. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-3.md +0 -18
  205. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/meta.json +0 -116
  206. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001-scope-exclusion.yaml +0 -46
  207. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-1.md +0 -27
  208. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-2.md +0 -30
  209. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-3.md +0 -27
  210. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/judge.json +0 -163
  211. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-1.md +0 -0
  212. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-2.md +0 -15
  213. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-3.md +0 -7
  214. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-1.md +0 -21
  215. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-2.md +0 -38
  216. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-3.md +0 -16
  217. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-1.md +0 -5
  218. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-2.md +0 -10
  219. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-3.md +0 -9
  220. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/meta.json +0 -115
  221. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002-glob-before-write.yaml +0 -36
  222. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-1.md +0 -30
  223. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-2.md +0 -30
  224. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-3.md +0 -30
  225. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/judge.json +0 -165
  226. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-1.md +0 -5
  227. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-2.md +0 -26
  228. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-3.md +0 -5
  229. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-1.md +0 -39
  230. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-2.md +0 -37
  231. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-3.md +0 -45
  232. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-1.md +0 -26
  233. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-2.md +0 -27
  234. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-3.md +0 -7
  235. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/meta.json +0 -117
  236. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003-parent-plan-mandatory.yaml +0 -41
  237. package/src/skills/decompose-gaps/tests/index.yaml +0 -30
  238. package/src/skills/decompose-gaps/tests/rubrics/glob-before-write.md +0 -21
  239. package/src/skills/decompose-gaps/tests/rubrics/parent-plan-mandatory.md +0 -22
  240. package/src/skills/decompose-gaps/tests/rubrics/scope-exclusion.md +0 -21
  241. package/src/skills/decompose-gaps/workflows/decompose.md +0 -123
  242. package/src/skills/decompose-plan/README.md +0 -43
  243. package/src/skills/decompose-plan/algorithms/deduplication.md +0 -101
  244. package/src/skills/decompose-plan/knowledge/atomicity-checklist.md +0 -139
  245. package/src/skills/decompose-plan/knowledge/capabilities.md +0 -68
  246. package/src/skills/decompose-plan/knowledge/human-task-rules.md +0 -82
  247. package/src/skills/decompose-plan/knowledge/scope-guard-checklist.md +0 -73
  248. package/src/skills/decompose-plan/scripts/check-atomicity-limit.js +0 -47
  249. package/src/skills/decompose-plan/scripts/check-duplicates.js +0 -323
  250. package/src/skills/decompose-plan/scripts/verify-atomicity.js +0 -408
  251. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-1.md +0 -30
  252. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-2.md +0 -36
  253. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-3.md +0 -37
  254. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/judge.json +0 -163
  255. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -20
  256. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -17
  257. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -28
  258. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-1.md +0 -114
  259. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-2.md +0 -137
  260. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-3.md +0 -188
  261. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-1.md +0 -0
  262. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-2.md +0 -32
  263. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-3.md +0 -110
  264. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/meta.json +0 -115
  265. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001-atomicity-no-1to1.yaml +0 -56
  266. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-1.md +0 -47
  267. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-2.md +0 -54
  268. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-3.md +0 -43
  269. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/judge.json +0 -163
  270. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -15
  271. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -5
  272. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -12
  273. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-1.md +0 -34
  274. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-2.md +0 -30
  275. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-3.md +0 -35
  276. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-1.md +0 -0
  277. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-2.md +0 -31
  278. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-3.md +0 -0
  279. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/meta.json +0 -115
  280. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002-get-next-id-mandatory.yaml +0 -44
  281. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-1.md +0 -21
  282. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-2.md +0 -38
  283. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-3.md +0 -30
  284. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/judge.json +0 -163
  285. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-1.md +0 -31
  286. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-2.md +0 -35
  287. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-3.md +0 -48
  288. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-1.md +0 -167
  289. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-2.md +0 -62
  290. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-3.md +0 -174
  291. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-1.md +0 -0
  292. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-2.md +0 -0
  293. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-3.md +0 -0
  294. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/meta.json +0 -115
  295. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003-verbatim-dod-transfer.yaml +0 -42
  296. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-1.md +0 -55
  297. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-2.md +0 -49
  298. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-3.md +0 -49
  299. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/judge.json +0 -163
  300. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-1.md +0 -104
  301. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-2.md +0 -45
  302. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-3.md +0 -58
  303. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-1.md +0 -193
  304. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-2.md +0 -202
  305. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-3.md +0 -155
  306. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-1.md +0 -52
  307. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-2.md +0 -17
  308. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-3.md +0 -0
  309. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/meta.json +0 -115
  310. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004-executor-atomicity.yaml +0 -64
  311. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-1.md +0 -59
  312. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-2.md +0 -204
  313. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-3.md +0 -213
  314. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/judge.json +0 -163
  315. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-1.md +0 -0
  316. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-2.md +0 -57
  317. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-3.md +0 -54
  318. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-1.md +0 -147
  319. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-2.md +0 -165
  320. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-3.md +0 -133
  321. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-1.md +0 -81
  322. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-2.md +0 -108
  323. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-3.md +0 -3
  324. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/meta.json +0 -114
  325. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005-capabilities-registry.yaml +0 -78
  326. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-1.md +0 -225
  327. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-2.md +0 -66
  328. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-3.md +0 -36
  329. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/judge.json +0 -163
  330. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-1.md +0 -42
  331. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-2.md +0 -67
  332. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-3.md +0 -40
  333. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-1.md +0 -122
  334. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-2.md +0 -131
  335. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-3.md +0 -138
  336. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-1.md +0 -41
  337. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-2.md +0 -88
  338. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-3.md +0 -0
  339. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/meta.json +0 -115
  340. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006-dod-threshold.yaml +0 -72
  341. package/src/skills/decompose-plan/tests/index.yaml +0 -45
  342. package/src/skills/decompose-plan/tests/rubrics/atomicity-no-1to1.md +0 -21
  343. package/src/skills/decompose-plan/tests/rubrics/capabilities-registry.md +0 -21
  344. package/src/skills/decompose-plan/tests/rubrics/dod-threshold.md +0 -21
  345. package/src/skills/decompose-plan/tests/rubrics/executor-atomicity.md +0 -21
  346. package/src/skills/decompose-plan/tests/rubrics/get-next-id-mandatory.md +0 -21
  347. package/src/skills/decompose-plan/tests/rubrics/verbatim-dod-transfer.md +0 -21
  348. package/src/skills/decompose-plan/workflows/decompose.md +0 -305
  349. package/src/skills/deep-research/README.md +0 -36
  350. package/src/skills/deep-research/algorithms/source-scoring.md +0 -63
  351. package/src/skills/deep-research/algorithms/synthesis.md +0 -67
  352. package/src/skills/deep-research/knowledge/data-validation.md +0 -44
  353. package/src/skills/deep-research/knowledge/perplexity-config.md +0 -30
  354. package/src/skills/deep-research/knowledge/research-methodology.md +0 -54
  355. package/src/skills/deep-research/knowledge/source-evaluation.md +0 -33
  356. package/src/skills/deep-research/scripts/perplexity-research.js +0 -315
  357. package/src/skills/deep-research/templates/brief-summary.md +0 -25
  358. package/src/skills/deep-research/templates/research-report.md +0 -76
  359. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-1.md +0 -48
  360. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-2.md +0 -88
  361. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-3.md +0 -56
  362. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/judge.json +0 -163
  363. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-1.md +0 -58
  364. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-2.md +0 -249
  365. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-3.md +0 -44
  366. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-1.md +0 -96
  367. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-2.md +0 -56
  368. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-3.md +0 -94
  369. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-1.md +0 -11
  370. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-2.md +0 -1
  371. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-3.md +0 -1
  372. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/meta.json +0 -115
  373. package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001-self-check-url.yaml +0 -58
  374. package/src/skills/deep-research/tests/index.yaml +0 -20
  375. package/src/skills/deep-research/tests/rubrics/self-check-url.md +0 -34
  376. package/src/skills/deep-research/workflows/base-checklist.md +0 -19
  377. package/src/skills/deep-research/workflows/benchmark.md +0 -38
  378. package/src/skills/deep-research/workflows/competitor.md +0 -44
  379. package/src/skills/deep-research/workflows/custom.md +0 -32
  380. package/src/skills/deep-research/workflows/market.md +0 -44
  381. package/src/skills/deep-research/workflows/technology.md +0 -40
  382. package/src/skills/deep-research/workflows/trend.md +0 -40
  383. package/src/skills/execute-task/README.md +0 -44
  384. package/src/skills/execute-task/algorithms/execution-strategy.md +0 -136
  385. package/src/skills/execute-task/knowledge/context-checkpoints.md +0 -75
  386. package/src/skills/execute-task/knowledge/ticket-structure.md +0 -70
  387. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-1.md +0 -5
  388. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-2.md +0 -5
  389. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-3.md +0 -5
  390. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/judge.json +0 -124
  391. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-1.md +0 -4
  392. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-2.md +0 -4
  393. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-3.md +0 -4
  394. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-1.md +0 -4
  395. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-2.md +0 -4
  396. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-3.md +0 -11
  397. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/meta.json +0 -88
  398. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001-no-ticket-creation.yaml +0 -48
  399. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-1.md +0 -5
  400. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-2.md +0 -6
  401. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-3.md +0 -5
  402. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/judge.json +0 -124
  403. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-1.md +0 -4
  404. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-2.md +0 -4
  405. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-3.md +0 -8
  406. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-1.md +0 -9
  407. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-2.md +0 -26
  408. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-3.md +0 -4
  409. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/meta.json +0 -89
  410. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002-no-duplicate-dod.yaml +0 -44
  411. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-1.md +0 -5
  412. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-2.md +0 -5
  413. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-3.md +0 -5
  414. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/judge.json +0 -46
  415. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/meta.json +0 -37
  416. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003-verification-proportionality.yaml +0 -46
  417. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-1.md +0 -18
  418. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-2.md +0 -16
  419. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-3.md +0 -14
  420. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/judge.json +0 -124
  421. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-1.md +0 -5
  422. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-2.md +0 -5
  423. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-3.md +0 -1
  424. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-1.md +0 -8
  425. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-2.md +0 -5
  426. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-3.md +0 -4
  427. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/meta.json +0 -89
  428. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004-no-foreign-ticket-edit.yaml +0 -50
  429. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-1.md +0 -5
  430. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-2.md +0 -5
  431. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-3.md +0 -5
  432. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/judge.json +0 -124
  433. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-1.md +0 -15
  434. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-2.md +0 -4
  435. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-3.md +0 -5
  436. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-1.md +0 -11
  437. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-2.md +0 -11
  438. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-3.md +0 -4
  439. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/meta.json +0 -88
  440. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005-ticket-fields-updated.yaml +0 -39
  441. package/src/skills/execute-task/tests/fixtures/IMPL-902-create-file.md +0 -41
  442. package/src/skills/execute-task/tests/fixtures/IMPL-904-current-task.md +0 -40
  443. package/src/skills/execute-task/tests/fixtures/IMPL-906-fill-ticket.md +0 -42
  444. package/src/skills/execute-task/tests/fixtures/QA-901-button-click.md +0 -41
  445. package/src/skills/execute-task/tests/fixtures/QA-903-visual-figma.md +0 -40
  446. package/src/skills/execute-task/tests/fixtures/TASK-905-done-with-typo.md +0 -36
  447. package/src/skills/execute-task/tests/index.yaml +0 -39
  448. package/src/skills/execute-task/tests/rubrics/no-duplicate-dod.md +0 -22
  449. package/src/skills/execute-task/tests/rubrics/no-foreign-ticket-edit.md +0 -20
  450. package/src/skills/execute-task/tests/rubrics/no-ticket-creation.md +0 -21
  451. package/src/skills/execute-task/tests/rubrics/ticket-fields-updated.md +0 -23
  452. package/src/skills/execute-task/tests/rubrics/verification-proportionality.md +0 -22
  453. package/src/skills/execute-task/workflows/execute.md +0 -104
  454. package/src/skills/manual-testing/README.md +0 -63
  455. package/src/skills/manual-testing/algorithms/blocked-tool-strategy.md +0 -74
  456. package/src/skills/manual-testing/algorithms/bug-severity.md +0 -73
  457. package/src/skills/manual-testing/algorithms/mcp-budget.md +0 -97
  458. package/src/skills/manual-testing/algorithms/test-prioritization.md +0 -69
  459. package/src/skills/manual-testing/knowledge/browser-extension-testing.md +0 -102
  460. package/src/skills/manual-testing/knowledge/browser-tools.md +0 -114
  461. package/src/skills/manual-testing/knowledge/desktop-tools-advanced.md +0 -92
  462. package/src/skills/manual-testing/knowledge/desktop-tools-core.md +0 -76
  463. package/src/skills/manual-testing/knowledge/sandbox-advanced.md +0 -83
  464. package/src/skills/manual-testing/knowledge/sandbox-core.md +0 -67
  465. package/src/skills/manual-testing/knowledge/stateful-edge-cases.md +0 -69
  466. package/src/skills/manual-testing/knowledge/test-case-design.md +0 -107
  467. package/src/skills/manual-testing/knowledge/testing-types.md +0 -45
  468. package/src/skills/manual-testing/templates/bug-report.md +0 -52
  469. package/src/skills/manual-testing/templates/test-case.md +0 -34
  470. package/src/skills/manual-testing/templates/test-plan.md +0 -97
  471. package/src/skills/manual-testing/templates/test-session-report.md +0 -56
  472. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-1.md +0 -34
  473. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-2.md +0 -32
  474. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-3.md +0 -30
  475. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/judge.json +0 -163
  476. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-1.md +0 -0
  477. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-2.md +0 -7
  478. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-3.md +0 -0
  479. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-1.md +0 -4
  480. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-2.md +0 -15
  481. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-3.md +0 -8
  482. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-1.md +0 -5
  483. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-2.md +0 -7
  484. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-3.md +0 -7
  485. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/meta.json +0 -114
  486. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001-sandbox-mandatory.yaml +0 -38
  487. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-1.md +0 -44
  488. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-2.md +0 -32
  489. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-3.md +0 -47
  490. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/judge.json +0 -163
  491. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-1.md +0 -19
  492. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-2.md +0 -15
  493. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-3.md +0 -24
  494. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-1.md +0 -19
  495. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-2.md +0 -13
  496. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-3.md +0 -18
  497. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-1.md +0 -21
  498. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-2.md +0 -15
  499. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-3.md +0 -14
  500. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/meta.json +0 -114
  501. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002-visual-tc-screenshot.yaml +0 -37
  502. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-1.md +0 -76
  503. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-2.md +0 -71
  504. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-3.md +0 -85
  505. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/judge.json +0 -46
  506. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/meta.json +0 -36
  507. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml +0 -65
  508. package/src/skills/manual-testing/tests/index.yaml +0 -30
  509. package/src/skills/manual-testing/tests/last-run-tc001-sonnet.log +0 -140
  510. package/src/skills/manual-testing/tests/last-run-tc002.log +0 -1
  511. package/src/skills/manual-testing/tests/last-run.log +0 -1469
  512. package/src/skills/manual-testing/tests/rubrics/qa-non-ui-assertion.md +0 -31
  513. package/src/skills/manual-testing/tests/rubrics/sandbox-mandatory.md +0 -20
  514. package/src/skills/manual-testing/tests/rubrics/visual-tc-screenshot.md +0 -21
  515. package/src/skills/manual-testing/workflows/acceptance.md +0 -80
  516. package/src/skills/manual-testing/workflows/exploratory.md +0 -84
  517. package/src/skills/manual-testing/workflows/regression.md +0 -76
  518. package/src/skills/manual-testing/workflows/smoke.md +0 -109
  519. package/src/skills/manual-testing/workflows/test-plan.md +0 -75
  520. package/src/skills/review-result/README.md +0 -59
  521. package/src/skills/review-result/algorithms/verification.md +0 -112
  522. package/src/skills/review-result/knowledge/dod-patterns.md +0 -115
  523. package/src/skills/review-result/knowledge/test-hygiene.md +0 -44
  524. package/src/skills/review-result/scripts/verify-artifacts.js +0 -497
  525. package/src/skills/review-result/templates/verdict.md +0 -153
  526. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-1.md +0 -22
  527. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-2.md +0 -7
  528. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-3.md +0 -21
  529. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-1.md +0 -6
  530. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-2.md +0 -6
  531. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-3.md +0 -18
  532. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/judge.json +0 -164
  533. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-1.md +0 -5
  534. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-2.md +0 -7
  535. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-3.md +0 -6
  536. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-1.md +0 -49
  537. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-2.md +0 -28
  538. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-3.md +0 -37
  539. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-1.md +0 -22
  540. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-2.md +0 -13
  541. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-3.md +0 -21
  542. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/meta.json +0 -116
  543. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001-visual-tc-trigger.yaml +0 -51
  544. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-1.md +0 -23
  545. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-2.md +0 -22
  546. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-3.md +0 -28
  547. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-1.md +0 -4
  548. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-2.md +0 -36
  549. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-3.md +0 -4
  550. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/judge.json +0 -163
  551. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-1.md +0 -4
  552. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-2.md +0 -0
  553. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-3.md +0 -4
  554. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-1.md +0 -39
  555. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-2.md +0 -25
  556. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-3.md +0 -32
  557. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-1.md +0 -34
  558. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-2.md +0 -8
  559. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-3.md +0 -23
  560. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/meta.json +0 -115
  561. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002-path-line-suffix.yaml +0 -39
  562. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-1.md +0 -7
  563. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-2.md +0 -7
  564. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-3.md +0 -7
  565. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/judge.json +0 -163
  566. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-1.md +0 -5
  567. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-2.md +0 -5
  568. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-3.md +0 -11
  569. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-1.md +0 -16
  570. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-2.md +0 -18
  571. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-3.md +0 -17
  572. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-1.md +0 -17
  573. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-2.md +0 -31
  574. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-3.md +0 -5
  575. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/meta.json +0 -115
  576. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003-test-isolation.yaml +0 -50
  577. package/src/skills/review-result/tests/fixtures/IMPL-902-path-with-line.md +0 -43
  578. package/src/skills/review-result/tests/fixtures/QA-901-visual-button.md +0 -46
  579. package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/QA-904.md +0 -51
  580. package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs +0 -36
  581. package/src/skills/review-result/tests/index.yaml +0 -30
  582. package/src/skills/review-result/tests/rubrics/path-line-suffix.md +0 -19
  583. package/src/skills/review-result/tests/rubrics/test-isolation.md +0 -20
  584. package/src/skills/review-result/tests/rubrics/visual-tc-trigger.md +0 -19
  585. package/src/skills/review-result/workflows/review.md +0 -209
@@ -1,163 +0,0 @@
1
- {
2
- "per_model": {
3
- "claude-sonnet": {
4
- "pass_count": 3,
5
- "total": 3,
6
- "trials": [
7
- {
8
- "trial": 1,
9
- "score": 5,
10
- "passed": true
11
- },
12
- {
13
- "trial": 2,
14
- "score": 5,
15
- "passed": true
16
- },
17
- {
18
- "trial": 3,
19
- "score": 4,
20
- "passed": true
21
- }
22
- ]
23
- },
24
- "kilo-glm": {
25
- "pass_count": 3,
26
- "total": 3,
27
- "trials": [
28
- {
29
- "trial": 1,
30
- "score": 4,
31
- "passed": true
32
- },
33
- {
34
- "trial": 2,
35
- "score": 4,
36
- "passed": true
37
- },
38
- {
39
- "trial": 3,
40
- "score": 4,
41
- "passed": true
42
- }
43
- ]
44
- },
45
- "kilo-minimax": {
46
- "pass_count": 3,
47
- "total": 3,
48
- "trials": [
49
- {
50
- "trial": 1,
51
- "score": 4,
52
- "passed": true
53
- },
54
- {
55
- "trial": 2,
56
- "score": 4,
57
- "passed": true
58
- },
59
- {
60
- "trial": 3,
61
- "score": 4,
62
- "passed": true
63
- }
64
- ]
65
- },
66
- "kilo-deepseek": {
67
- "pass_count": 2,
68
- "total": 3,
69
- "trials": [
70
- {
71
- "trial": 1,
72
- "score": 3,
73
- "passed": false
74
- },
75
- {
76
- "trial": 2,
77
- "score": 4,
78
- "passed": true
79
- },
80
- {
81
- "trial": 3,
82
- "score": 4,
83
- "passed": true
84
- }
85
- ]
86
- }
87
- },
88
- "rubric_scores": [
89
- {
90
- "agentId": "claude-sonnet",
91
- "trial": 1,
92
- "score": 5,
93
- "errored": false
94
- },
95
- {
96
- "agentId": "claude-sonnet",
97
- "trial": 2,
98
- "score": 5,
99
- "errored": false
100
- },
101
- {
102
- "agentId": "claude-sonnet",
103
- "trial": 3,
104
- "score": 4,
105
- "errored": false
106
- },
107
- {
108
- "agentId": "kilo-deepseek",
109
- "trial": 1,
110
- "score": 3,
111
- "errored": false
112
- },
113
- {
114
- "agentId": "kilo-deepseek",
115
- "trial": 2,
116
- "score": 4,
117
- "errored": false
118
- },
119
- {
120
- "agentId": "kilo-deepseek",
121
- "trial": 3,
122
- "score": 4,
123
- "errored": false
124
- },
125
- {
126
- "agentId": "kilo-glm",
127
- "trial": 1,
128
- "score": 4,
129
- "errored": false
130
- },
131
- {
132
- "agentId": "kilo-glm",
133
- "trial": 2,
134
- "score": 4,
135
- "errored": false
136
- },
137
- {
138
- "agentId": "kilo-glm",
139
- "trial": 3,
140
- "score": 4,
141
- "errored": false
142
- },
143
- {
144
- "agentId": "kilo-minimax",
145
- "trial": 1,
146
- "score": 4,
147
- "errored": false
148
- },
149
- {
150
- "agentId": "kilo-minimax",
151
- "trial": 2,
152
- "score": 4,
153
- "errored": false
154
- },
155
- {
156
- "agentId": "kilo-minimax",
157
- "trial": 3,
158
- "score": 4,
159
- "errored": false
160
- }
161
- ],
162
- "timestamp": "2026-04-21T14:36:41.299Z"
163
- }
@@ -1,5 +0,0 @@
1
- ---RESULT---
2
- status: failed
3
- issues:
4
- - "Пункт DoD #3 не выполнен: тест нарушает изоляцию — пишет в рабочий репозиторий проекта, cleanup не защищает от прерывания"
5
- ---RESULT---
@@ -1,5 +0,0 @@
1
- ---RESULT---
2
- status: failed
3
- issues:
4
- - "Пункт DoD 3 не выполнен: ожидалось создание tmp-корня и изоляция в teardown/after-hook, получено: корень артефактов привязан к реальному корню репозитория (строка `const testProjectRoot = join(__dirname, '..', '..');`), cleanup удаляет конкретный файл, cleanup вызывается в теле test-функции"
5
- ---RESULT---
@@ -1,11 +0,0 @@
1
- Проверка тикета QA-904 выявила следующие проблемы:
2
-
3
- 1. **Несоответствие путей** — в DoD указан файл `tests/fixtures/QA‑904‑test‑isolation‑violation/example‑test.mjs`, но фактический путь `.workflow/src/skills/review‑result/tests/fixtures/QA‑904‑test‑isolation‑violation/example‑test.mjs`.
4
- 2. **Нарушение изоляции теста** — тест пишет артефакты в рабочий репозиторий (строка `const testProjectRoot = join(__dirname, '..', '..');`), cleanup удаляет только конкретный файл, а не весь tmp‑корень, и находится в теле теста, а не в teardown/after‑hook.
5
-
6
- ---RESULT---
7
- status: failed
8
- issues:
9
- - "Пункт DoD #1 не выполнен: ожидался файл tests/fixtures/QA-904-test-isolation-violation/example-test.mjs, фактический путь .workflow/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs"
10
- - "Пункт DoD #3 не выполнен: тест нарушает изоляцию — корень артефактов привязан к рабочему репозиторию (строка 10), cleanup удаляет конкретный файл, а не tmp-корень, cleanup в теле теста, а не в teardown/after-hook"
11
- ---RESULT---
@@ -1,16 +0,0 @@
1
- Вердикт: **failed**.
2
-
3
- Проверка DoD-критериев:
4
-
5
- 1. **Файл создан** — подтверждено.
6
- 2. **Тест проходит** — принято по условию.
7
- 3. **Изоляция** — **нарушена** по трём причинам:
8
- - `testProjectRoot` указывает на корень репозитория (`join(__dirname, '..', '..')`), а не на системный tmp-каталог.
9
- - `writeFileSync(testConfigPath, ...)` пишет в `.workflow/config/sample.yaml` внутри рабочего репозитория.
10
- - `cleanupTestConfig()` вызывается в теле test-функции, а не в `after()`/`teardown`; при падении assertion файл остаётся в репозитории.
11
-
12
- ---RESULT---
13
- status: failed
14
- issues:
15
- - "Пункт DoD 3 не выполнен: тест пишет в рабочий репозиторий (testProjectRoot = join(__dirname, '..', '..') → корень репозитория), а не в tmp-корень; cleanup вызывается в теле test-функции, а не в teardown/after-hook; при падении assertion артефакт остаётся в репозитории"
16
- ---RESULT---
@@ -1,18 +0,0 @@
1
- Тикет прочитан. Три критерия DoD. Критерий 3 (изоляция) нарушен — анализ ниже.
2
-
3
- **DoD-проверка:**
4
-
5
- 1. **Файл создан** — подтверждено, файл существует.
6
- 2. **Тест проходит** — формально `node --test` success, принимается.
7
- 3. **Изоляция** — НАРУШЕНА:
8
- - `testProjectRoot = join(__dirname, '..', '..')` — указывает на корень репозитория, а не на системный `tmp`.
9
- - `createTestConfig` пишет в `.workflow/config/sample.yaml` внутри рабочего репозитория.
10
- - `cleanupTestConfig` удаляет конкретный файл, не tmp-корень целиком.
11
- - Cleanup вызывается в теле test-функции, а не в `teardown`/`after`-хуке — при падении assertion до строки cleanup файл остаётся в репозитории.
12
-
13
- Вердикт: **failed**.
14
- ---RESULT---
15
- status: failed
16
- issues:
17
- - "Пункт DoD 3 не выполнен: ожидалось что тест создаёт и уничтожает tmp-корень, не пишет в рабочий репозиторий (изоляция в teardown/after-hook); получено: testProjectRoot привязан к реальному корню репозитория (join(__dirname, '..', '..')), createTestConfig пишет в .workflow/config/sample.yaml рабочего репозитория, cleanupTestConfig удаляет конкретный файл а не tmp-корень, cleanup вызывается в теле test-функции а не в teardown — файл: src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs:12-22"
18
- ---RESULT---
@@ -1,17 +0,0 @@
1
- Скрипт недоступен — перехожу к ручной проверке (fallback по workflow).
2
- Проверка завершена. Файл существует, но нарушает изоляцию. Формирую вердикт.
3
- Проверка тикета QA-904 завершена. DoD критерий #3 (изоляция теста) не выполнен — три нарушения:
4
-
5
- 1. **Корень артефактов — рабочий репозиторий** (`example-test.mjs:16`): `join(__dirname, '..', '..')` указывает на корень проекта, а не на системный tmp.
6
- 2. **Cleanup удаляет конкретный файл** (`:28`): `rmSync(testConfigPath)`, не весь tmp-корень — при прерывании артефакт остаётся.
7
- 3. **Cleanup в теле test-функции** (`:35`), а не в teardown/after-hook — падение assertion до cleanup = утечка state.
8
-
9
- ```
10
- ---RESULT---
11
- status: failed
12
- issues:
13
- - "DoD #3 не выполнен: тест пишет в рабочий репозиторий — example-test.mjs:16-17, testProjectRoot и testConfigPath указывают на .workflow/config/sample.yaml внутри корня репозитория, а не на tmp-корень через стандартную функцию ОС"
14
- - "DoD #3 не выполнен: cleanup удаляет конкретный файл (example-test.mjs:28 — rmSync(testConfigPath)), а не весь tmp-корень рекурсивно — при прерывании до вызова cleanup файл остаётся в рабочем репозитории"
15
- - "DoD #3 не выполнен: cleanup вызывается в теле test-функции (example-test.mjs:35), а не в teardown/after-hook — падение assertion до этой строки оставляет артефакт в репозитории"
16
- ---RESULT---
17
- ```
@@ -1,17 +0,0 @@
1
- Проверяю DoD пункт 3 (изоляция). Из предоставленного содержимого теста:
2
-
3
- **Проблемы изоляции:**
4
- 1. **Корень артефактов** — `testProjectRoot = join(__dirname, '..', '..')` привязан к реальному репозиторию, а не к tmp-корню
5
- 2. **Запись в рабочий репозиторий** — пишет в `.workflow/config/sample.yaml`
6
- 3. **Cleanup в теле теста** — `cleanupTestConfig()` вызывается в теле test-функции, а не в after-hooks/teardown
7
- 4. **Удаляется конкретный файл** — `rmSync(testConfigPath)`, а не рекурсивное удаление tmp-корня
8
-
9
- DoD требует: "создаёт и уничтожает tmp-корень, не пишет в рабочий репозиторий (изоляция в teardown/after-hook)".
10
-
11
- ---
12
-
13
- ---RESULT---
14
- status: failed
15
- issues:
16
- - "Пункт DoD #3 не выполнен: тест пишет в рабочий репозиторий (.workflow/config/sample.yaml), а не в tmp-корень; cleanup выполняется в теле теста, а не в teardown/after-hook"
17
- ---RESULT---
@@ -1,31 +0,0 @@
1
- Проверяю DoD тикета QA-904:
2
-
3
- **Пункт 1** — Файл создан: ✅ Существует
4
-
5
- **Пункт 2** — Тест проходит: ✅ Формально `node --test` success
6
-
7
- **Пункт 3** — Изоляция (tmp-корень, не рабочий репозиторий): ❌ **НАРУШЕН**
8
-
9
- Анализ теста из тикета:
10
-
11
- 1. `testProjectRoot = join(__dirname, '..', '..')` — вычисляется от расположения теста, ведёт к реальному корню репозитория
12
- 2. `writeFileSync(testConfigPath, ...)` — пишет в `.workflow/config/sample.yaml` рабочего репозитория
13
- 3. `cleanupTestConfig()` — вызывается **внутри** test-функции, не в afterEach/teardown. При падении assertion до этой строки файл останется в рабочем репозитории
14
-
15
- Это прямое нарушение изоляции: тест пишет в `.workflow/config/`, а не в tmp-корень (напр. `os.tmpdir()`).
16
-
17
- Секции "## Ревью" в тикете нет — это первое ревью.
18
-
19
- ---
20
-
21
- ## Вердикт
22
-
23
- **DoD пункт 3 не выполнен:** тест записывает артефакты в рабочий репозиторий проекта, а не в tmp-корень. Cleanup находится внутри тела функции, а не в afterEach/teardown, что оставляет файл при падении.
24
-
25
- ---
26
-
27
- ---RESULT---
28
- status: failed
29
- issues:
30
- - "DoD пункт 3 нарушен: тест пишет в рабочий репозиторий (`.workflow/config/sample.yaml`), а не в tmp-корень. cleanup вызывается в теле test-функции, а не в teardown/afterEach — при падении assertion файл останется в рабочем репозитории. Ожидалось: `os.tmpdir()` + afterEach/teardown."
31
- ---RESULT---
@@ -1,5 +0,0 @@
1
- ---RESULT---
2
- status: failed
3
- issues:
4
- - "Пункт DoD #3 не выполнен: ожидался tmp-корень с изоляцией в teardown/after-hook, но тест пишет в `.workflow/config/` рабочего репозитория (`testProjectRoot = join(__dirname, '..', '..')`), cleanup вызывается в теле теста (строка 25), а не в afterEach/teardown"
5
- ---RESULT---
@@ -1,115 +0,0 @@
1
- {
2
- "date": "2026-04-21T14:36:41.300Z",
3
- "skill_sha": "2bf55c8",
4
- "status": "passed",
5
- "duration_ms": 356716,
6
- "l1_skipped": true,
7
- "per_model": {
8
- "claude-sonnet": {
9
- "passed": true,
10
- "errored": false,
11
- "pass_count": 3,
12
- "error_count": 0,
13
- "total": 3,
14
- "threshold": 2
15
- },
16
- "kilo-glm": {
17
- "passed": true,
18
- "errored": false,
19
- "pass_count": 3,
20
- "error_count": 0,
21
- "total": 3,
22
- "threshold": 2
23
- },
24
- "kilo-minimax": {
25
- "passed": true,
26
- "errored": false,
27
- "pass_count": 3,
28
- "error_count": 0,
29
- "total": 3,
30
- "threshold": 2
31
- },
32
- "kilo-deepseek": {
33
- "passed": true,
34
- "errored": false,
35
- "pass_count": 2,
36
- "error_count": 0,
37
- "total": 3,
38
- "threshold": 2
39
- }
40
- },
41
- "rubric_scores": [
42
- {
43
- "agentId": "claude-sonnet",
44
- "trial": 1,
45
- "score": 5,
46
- "errored": false
47
- },
48
- {
49
- "agentId": "claude-sonnet",
50
- "trial": 2,
51
- "score": 5,
52
- "errored": false
53
- },
54
- {
55
- "agentId": "claude-sonnet",
56
- "trial": 3,
57
- "score": 4,
58
- "errored": false
59
- },
60
- {
61
- "agentId": "kilo-deepseek",
62
- "trial": 1,
63
- "score": 3,
64
- "errored": false
65
- },
66
- {
67
- "agentId": "kilo-deepseek",
68
- "trial": 2,
69
- "score": 4,
70
- "errored": false
71
- },
72
- {
73
- "agentId": "kilo-deepseek",
74
- "trial": 3,
75
- "score": 4,
76
- "errored": false
77
- },
78
- {
79
- "agentId": "kilo-glm",
80
- "trial": 1,
81
- "score": 4,
82
- "errored": false
83
- },
84
- {
85
- "agentId": "kilo-glm",
86
- "trial": 2,
87
- "score": 4,
88
- "errored": false
89
- },
90
- {
91
- "agentId": "kilo-glm",
92
- "trial": 3,
93
- "score": 4,
94
- "errored": false
95
- },
96
- {
97
- "agentId": "kilo-minimax",
98
- "trial": 1,
99
- "score": 4,
100
- "errored": false
101
- },
102
- {
103
- "agentId": "kilo-minimax",
104
- "trial": 2,
105
- "score": 4,
106
- "errored": false
107
- },
108
- {
109
- "agentId": "kilo-minimax",
110
- "trial": 3,
111
- "score": 4,
112
- "errored": false
113
- }
114
- ]
115
- }
@@ -1,50 +0,0 @@
1
- id: TC-REVIEW-RESULT-003
2
- title: "Ревьюер ловит нарушение изоляции тестов"
3
- origin:
4
- chg: [CHG-test-hygiene-2026-04-21]
5
- incidents:
6
- - "workflowAi: тест error-classifier.test.mjs писал в реальный .workflow/config/agent-health-rules.yaml, полагаясь на cleanup в теле функции; при прерывании оставлял артефакты в рабочем репо и ложно срабатывали механические проверки"
7
- principle: "Проверка изоляции теста ортогональна результату прогона: зелёный тест, пишущий в рабочий репозиторий — токсичный"
8
- backlog_sources:
9
- - "d:/Dev/workflowAi/.workflow/coach-backlog.yaml"
10
- tags: [test-hygiene, isolation, knowledge-test-hygiene]
11
- severity: high
12
-
13
- scenario:
14
- system_prompt_file: "../../SKILL.md"
15
- extra_instructions: |
16
- Проведи ревью тикета QA-904, который лежит в .workflow/tickets/review/QA-904.md.
17
- DoD тикета явно требует изоляции теста (tmp-корень, не запись в рабочий
18
- репозиторий). Содержимое файла теста, на который ссылается секция
19
- «Изменённые файлы», приведено ниже (блок "## Test file content") —
20
- используй его для оценки изоляции. Тест формально проходит
21
- (`node --test` success), но проверка изоляции ортогональна результату
22
- прогона.
23
- inputs:
24
- - kind: ticket_file
25
- path: "../fixtures/QA-904-test-isolation-violation/QA-904.md"
26
- ticket_id: QA-904
27
- dest_dir: review
28
- - kind: file
29
- path: "../fixtures/QA-904-test-isolation-violation/example-test.mjs"
30
- as: "Test file content (src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs)"
31
-
32
- assertions:
33
- deterministic:
34
- - kind: output_contains_all
35
- values: ["---RESULT---"]
36
- reason: "Результат должен быть в формате пайплайна."
37
- - kind: output_contains_all
38
- values: ["failed"]
39
- reason: "Вердикт — failed по нарушению изоляции."
40
-
41
- rubric:
42
- - kind: llm_judge
43
- rubric_file: "../rubrics/test-isolation.md"
44
- criterion: "Поймал ли ревьюер нарушение изоляции теста и оформил failed с конкретным issue, процитировав проблемные строки теста?"
45
- pass_threshold: 4
46
- trials: 3
47
- aggregate: majority
48
-
49
- execution:
50
- timeout_s: 1200
@@ -1,43 +0,0 @@
1
- ---
2
- id: IMPL-902
3
- title: "Исправить парсинг URL в фоновом скрипте"
4
- priority: 3
5
- type: impl
6
- required_capabilities: []
7
- created_at: "2026-04-19T00:00:00Z"
8
- updated_at: "2026-04-19T00:00:00Z"
9
- completed_at: "2026-04-19T00:00:00Z"
10
- parent_plan: ""
11
- parent_task: ""
12
- dependencies: []
13
- conditions: []
14
- context:
15
- files: []
16
- references: []
17
- notes: |
18
- Сценарий для регрессионного теста скила review-result (TC-REVIEW-RESULT-002).
19
- ⚠️ Workdir изолированный: настоящего src/background/index.ts здесь нет —
20
- путь указан в отраслевой нотации path:line-range, корректное поведение
21
- ревьюера должно учитывать этот формат и не помечать файл как missing.
22
- complexity: simple
23
- tags:
24
- - impl
25
- ---
26
-
27
- ## Описание
28
-
29
- Исправить парсинг URL-параметра `redirect_to` в обработчике сообщений фонового скрипта.
30
-
31
- ## Критерии готовности
32
-
33
- - [x] Парсер обрабатывает URL с query string без потери параметров
34
- - [x] Добавлен unit-тест на edge-case с пустым `redirect_to`
35
-
36
- ## Изменённые файлы
37
-
38
- - src/background/index.ts:204-211
39
-
40
- ## Result
41
-
42
- Логика парсинга вынесена в отдельную функцию, добавлен тест.
43
- Все пункты DoD выполнены.
@@ -1,46 +0,0 @@
1
- ---
2
- id: QA-901
3
- title: "Проверить визуальное соответствие кнопки Submit макету"
4
- priority: 3
5
- type: qa
6
- required_capabilities: []
7
- created_at: "2026-04-19T00:00:00Z"
8
- updated_at: "2026-04-19T00:00:00Z"
9
- completed_at: "2026-04-19T00:00:00Z"
10
- parent_plan: ""
11
- parent_task: ""
12
- dependencies: []
13
- conditions: []
14
- context:
15
- files: []
16
- references: []
17
- notes: |
18
- Сценарий для регрессионного теста скила review-result (TC-REVIEW-RESULT-001).
19
- ⚠️ Workdir изолированный: нет настоящих макетов или приложения. Считай, что
20
- исполнитель уже отметил все пункты DoD как выполненные и заполнил Result.
21
- Задача теста — проверить поведение ревьюера на визуальном критерии без
22
- приложенных скриншотов.
23
- complexity: simple
24
- tags:
25
- - qa
26
- - visual
27
- ---
28
-
29
- ## Описание
30
-
31
- Проверить, что кнопка Submit на форме входа визуально соответствует макету Figma.
32
-
33
- ## Критерии готовности
34
-
35
- - [x] Кнопка визуально соответствует макету
36
- - [x] Цвет кнопки совпадает с design tokens
37
- - [x] Скругление углов 8px
38
-
39
- ## Изменённые файлы
40
-
41
- - src/components/SubmitButton.tsx
42
-
43
- ## Result
44
-
45
- Кнопка Submit реализована и проверена визуально. Все три пункта DoD выполнены.
46
- Скриншоты не прикладывал — проверял локально в браузере.
@@ -1,51 +0,0 @@
1
- ---
2
- id: QA-904
3
- title: "Добавить unit-тест для config-loader'а"
4
- priority: 3
5
- type: qa
6
- required_capabilities: []
7
- created_at: "2026-04-21T00:00:00Z"
8
- updated_at: "2026-04-21T00:00:00Z"
9
- completed_at: "2026-04-21T00:00:00Z"
10
- parent_plan: ""
11
- parent_task: ""
12
- dependencies: []
13
- conditions: []
14
- context:
15
- files: []
16
- references: []
17
- notes: |
18
- Сценарий для регрессионного теста скила review-result (TC-REVIEW-RESULT-003).
19
- Тест в разделе «Изменённые файлы» нарушает изоляцию: пишет в рабочий
20
- репозиторий проекта, cleanup удаляет конкретный файл (а не корень),
21
- cleanup вызывается в теле test-функции, а не в teardown. Ревьюер
22
- должен вернуть failed с конкретным issue про изоляцию, даже если
23
- тест формально «проходит».
24
- complexity: simple
25
- tags:
26
- - qa
27
- - test-isolation
28
- ---
29
-
30
- ## Описание
31
-
32
- Добавить unit-тест для config-loader'а, покрывающий чтение YAML-файла конфигурации.
33
-
34
- ## Критерии готовности (Definition of Done)
35
-
36
- - [x] Файл `tests/fixtures/QA-904-test-isolation-violation/example-test.mjs` создан с юнит-тестом
37
- - [x] Тест проходит (`node --test`)
38
- - [x] Тест создаёт и уничтожает tmp-корень, не пишет в рабочий репозиторий (изоляция в teardown/after-hook)
39
-
40
- ## Изменённые файлы
41
-
42
- - `src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs`
43
-
44
- ## Результат выполнения
45
-
46
- ### Summary
47
- Создан юнит-тест в `src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs`. Тест покрывает базовый сценарий загрузки YAML-конфига, прогон `node --test` проходит.
48
-
49
- ### Изменённые файлы
50
-
51
- - `src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs` — новый юнит-тест
@@ -1,36 +0,0 @@
1
- // Фикстура для TC-REVIEW-RESULT-003.
2
- // Этот тест НАРУШАЕТ изоляцию: пишет в рабочий репозиторий проекта,
3
- // а не в стандартный системный tmp-корень. Ревьюер должен поймать это
4
- // нарушение и вернуть failed, даже если тест формально «проходит».
5
-
6
- import { test } from 'node:test';
7
- import assert from 'node:assert/strict';
8
- import { writeFileSync, mkdirSync, rmSync } from 'node:fs';
9
- import { join, dirname } from 'node:path';
10
- import { fileURLToPath } from 'node:url';
11
-
12
- const __filename = fileURLToPath(import.meta.url);
13
- const __dirname = dirname(__filename);
14
- // ⛔ Нарушение: корень артефактов привязан к реальному корню репозитория,
15
- // а не к стандартному tmp-корню.
16
- const testProjectRoot = join(__dirname, '..', '..');
17
- const testConfigPath = join(testProjectRoot, '.workflow', 'config', 'sample.yaml');
18
-
19
- function createTestConfig(content) {
20
- const dir = join(testProjectRoot, '.workflow', 'config');
21
- mkdirSync(dir, { recursive: true });
22
- writeFileSync(testConfigPath, content, 'utf-8');
23
- }
24
-
25
- function cleanupTestConfig() {
26
- // ⛔ Нарушение: удаляется конкретный файл, не корень. При падении assertion
27
- // до этой строки файл останется в рабочем репозитории.
28
- try { rmSync(testConfigPath); } catch (e) {}
29
- }
30
-
31
- test('пример теста с нарушением изоляции', () => {
32
- createTestConfig('key: value\n');
33
- // ... проверки ...
34
- assert.ok(true);
35
- cleanupTestConfig();
36
- });