workflow-ai 1.0.64 → 1.0.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +377 -277
  2. package/configs/agent-health-rules.yaml +75 -0
  3. package/configs/pipeline.yaml +24 -7
  4. package/package.json +1 -1
  5. package/src/init.mjs +20 -3
  6. package/src/lib/agent-health-registry.mjs +245 -0
  7. package/src/lib/agent-spawner.mjs +47 -6
  8. package/src/lib/artifact-snapshot.mjs +233 -0
  9. package/src/lib/error-classifier.mjs +311 -0
  10. package/src/lib/test-error-classifier.mjs +60 -0
  11. package/src/lib/test-extends.mjs +58 -0
  12. package/src/lib/test-version.mjs +21 -0
  13. package/src/runner.mjs +215 -58
  14. package/src/scripts/move-to-review.js +5 -7
  15. package/src/scripts/reset-agent-health.js +62 -0
  16. package/src/skills/coach/SKILL.md +1 -0
  17. package/src/skills/coach/tests/cases/TC-COACH-001/current/meta.json +93 -94
  18. package/src/skills/coach/tests/cases/TC-COACH-002/current/meta.json +93 -94
  19. package/src/skills/create-plan/SKILL.md +1 -0
  20. package/src/skills/create-plan/knowledge/test-hygiene.md +47 -0
  21. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-1.md +23 -31
  22. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-2.md +20 -35
  23. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-3.md +36 -19
  24. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/judge.json +1 -1
  25. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-2.md +11 -5
  26. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-3.md +12 -16
  27. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-1.md +15 -9
  28. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-3.md +15 -14
  29. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-1.md +22 -18
  30. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-2.md +24 -16
  31. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-3.md +13 -20
  32. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/meta.json +2 -2
  33. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-1.md +14 -19
  34. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-2.md +24 -14
  35. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-3.md +20 -19
  36. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/judge.json +16 -17
  37. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-1.md +0 -7
  38. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-2.md +9 -10
  39. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-3.md +5 -5
  40. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-1.md +20 -4
  41. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-2.md +36 -9
  42. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-3.md +9 -6
  43. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-1.md +4 -12
  44. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-2.md +6 -8
  45. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-3.md +8 -4
  46. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/meta.json +10 -11
  47. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-1.md +30 -0
  48. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-2.md +30 -0
  49. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-3.md +30 -0
  50. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/judge.json +165 -0
  51. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-1.md +5 -0
  52. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-2.md +26 -0
  53. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-3.md +5 -0
  54. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-1.md +39 -0
  55. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-2.md +37 -0
  56. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-3.md +45 -0
  57. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-1.md +26 -0
  58. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-2.md +27 -0
  59. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-3.md +7 -0
  60. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/meta.json +117 -0
  61. package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003-parent-plan-mandatory.yaml +41 -0
  62. package/src/skills/decompose-gaps/tests/index.yaml +5 -0
  63. package/src/skills/decompose-gaps/tests/rubrics/parent-plan-mandatory.md +22 -0
  64. package/src/skills/decompose-gaps/workflows/decompose.md +5 -2
  65. package/src/skills/decompose-plan/knowledge/atomicity-checklist.md +31 -5
  66. package/src/skills/decompose-plan/knowledge/capabilities.md +29 -5
  67. package/src/skills/decompose-plan/knowledge/human-task-rules.md +15 -0
  68. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-1.md +55 -0
  69. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-2.md +49 -0
  70. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-3.md +49 -0
  71. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/judge.json +163 -0
  72. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-1.md +104 -0
  73. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-2.md +45 -0
  74. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-3.md +58 -0
  75. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-1.md +193 -0
  76. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-2.md +202 -0
  77. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-3.md +155 -0
  78. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-1.md +52 -0
  79. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-2.md +17 -0
  80. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-3.md +0 -0
  81. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/meta.json +115 -0
  82. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004-executor-atomicity.yaml +64 -0
  83. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-1.md +59 -0
  84. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-2.md +204 -0
  85. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-3.md +213 -0
  86. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/judge.json +163 -0
  87. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-1.md +0 -0
  88. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-2.md +57 -0
  89. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-3.md +54 -0
  90. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-1.md +147 -0
  91. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-2.md +165 -0
  92. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-3.md +133 -0
  93. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-1.md +81 -0
  94. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-2.md +108 -0
  95. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-3.md +3 -0
  96. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/meta.json +114 -0
  97. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005-capabilities-registry.yaml +78 -0
  98. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-1.md +225 -0
  99. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-2.md +66 -0
  100. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-3.md +36 -0
  101. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/judge.json +163 -0
  102. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-1.md +42 -0
  103. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-2.md +67 -0
  104. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-3.md +40 -0
  105. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-1.md +122 -0
  106. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-2.md +131 -0
  107. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-3.md +138 -0
  108. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-1.md +41 -0
  109. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-2.md +88 -0
  110. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-3.md +0 -0
  111. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/meta.json +115 -0
  112. package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006-dod-threshold.yaml +72 -0
  113. package/src/skills/decompose-plan/tests/index.yaml +15 -0
  114. package/src/skills/decompose-plan/tests/rubrics/capabilities-registry.md +21 -0
  115. package/src/skills/decompose-plan/tests/rubrics/dod-threshold.md +21 -0
  116. package/src/skills/decompose-plan/tests/rubrics/executor-atomicity.md +21 -0
  117. package/src/skills/decompose-plan/workflows/decompose.md +38 -5
  118. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/meta.json +87 -88
  119. package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/meta.json +87 -88
  120. package/src/skills/manual-testing/SKILL.md +6 -4
  121. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-1.md +29 -16
  122. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-2.md +21 -54
  123. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-3.md +18 -23
  124. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/judge.json +17 -17
  125. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/meta.json +19 -19
  126. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-1.md +27 -30
  127. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-2.md +16 -23
  128. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-3.md +35 -28
  129. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/judge.json +13 -13
  130. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/meta.json +15 -15
  131. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-1.md +76 -0
  132. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-2.md +71 -0
  133. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-3.md +85 -0
  134. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/judge.json +46 -0
  135. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/meta.json +36 -0
  136. package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml +65 -0
  137. package/src/skills/manual-testing/tests/index.yaml +5 -0
  138. package/src/skills/manual-testing/tests/rubrics/qa-non-ui-assertion.md +31 -0
  139. package/src/skills/review-result/SKILL.md +1 -0
  140. package/src/skills/review-result/knowledge/test-hygiene.md +44 -0
  141. package/src/skills/review-result/scripts/verify-artifacts.js +157 -14
  142. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-1.md +7 -0
  143. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-2.md +7 -0
  144. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-3.md +7 -0
  145. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/judge.json +163 -0
  146. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-1.md +5 -0
  147. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-2.md +5 -0
  148. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-3.md +11 -0
  149. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-1.md +16 -0
  150. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-2.md +18 -0
  151. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-3.md +17 -0
  152. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-1.md +17 -0
  153. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-2.md +31 -0
  154. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-3.md +5 -0
  155. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/meta.json +115 -0
  156. package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003-test-isolation.yaml +50 -0
  157. package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/QA-904.md +51 -0
  158. package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs +36 -0
  159. package/src/skills/review-result/tests/index.yaml +5 -0
  160. package/src/skills/review-result/tests/rubrics/test-isolation.md +20 -0
@@ -0,0 +1,115 @@
1
+ {
2
+ "date": "2026-04-21T14:36:41.300Z",
3
+ "skill_sha": "2bf55c8",
4
+ "status": "passed",
5
+ "duration_ms": 356716,
6
+ "l1_skipped": true,
7
+ "per_model": {
8
+ "claude-sonnet": {
9
+ "passed": true,
10
+ "errored": false,
11
+ "pass_count": 3,
12
+ "error_count": 0,
13
+ "total": 3,
14
+ "threshold": 2
15
+ },
16
+ "kilo-glm": {
17
+ "passed": true,
18
+ "errored": false,
19
+ "pass_count": 3,
20
+ "error_count": 0,
21
+ "total": 3,
22
+ "threshold": 2
23
+ },
24
+ "kilo-minimax": {
25
+ "passed": true,
26
+ "errored": false,
27
+ "pass_count": 3,
28
+ "error_count": 0,
29
+ "total": 3,
30
+ "threshold": 2
31
+ },
32
+ "kilo-deepseek": {
33
+ "passed": true,
34
+ "errored": false,
35
+ "pass_count": 2,
36
+ "error_count": 0,
37
+ "total": 3,
38
+ "threshold": 2
39
+ }
40
+ },
41
+ "rubric_scores": [
42
+ {
43
+ "agentId": "claude-sonnet",
44
+ "trial": 1,
45
+ "score": 5,
46
+ "errored": false
47
+ },
48
+ {
49
+ "agentId": "claude-sonnet",
50
+ "trial": 2,
51
+ "score": 5,
52
+ "errored": false
53
+ },
54
+ {
55
+ "agentId": "claude-sonnet",
56
+ "trial": 3,
57
+ "score": 4,
58
+ "errored": false
59
+ },
60
+ {
61
+ "agentId": "kilo-deepseek",
62
+ "trial": 1,
63
+ "score": 3,
64
+ "errored": false
65
+ },
66
+ {
67
+ "agentId": "kilo-deepseek",
68
+ "trial": 2,
69
+ "score": 4,
70
+ "errored": false
71
+ },
72
+ {
73
+ "agentId": "kilo-deepseek",
74
+ "trial": 3,
75
+ "score": 4,
76
+ "errored": false
77
+ },
78
+ {
79
+ "agentId": "kilo-glm",
80
+ "trial": 1,
81
+ "score": 4,
82
+ "errored": false
83
+ },
84
+ {
85
+ "agentId": "kilo-glm",
86
+ "trial": 2,
87
+ "score": 4,
88
+ "errored": false
89
+ },
90
+ {
91
+ "agentId": "kilo-glm",
92
+ "trial": 3,
93
+ "score": 4,
94
+ "errored": false
95
+ },
96
+ {
97
+ "agentId": "kilo-minimax",
98
+ "trial": 1,
99
+ "score": 4,
100
+ "errored": false
101
+ },
102
+ {
103
+ "agentId": "kilo-minimax",
104
+ "trial": 2,
105
+ "score": 4,
106
+ "errored": false
107
+ },
108
+ {
109
+ "agentId": "kilo-minimax",
110
+ "trial": 3,
111
+ "score": 4,
112
+ "errored": false
113
+ }
114
+ ]
115
+ }
@@ -0,0 +1,50 @@
1
+ id: TC-REVIEW-RESULT-003
2
+ title: "Ревьюер ловит нарушение изоляции тестов"
3
+ origin:
4
+ chg: [CHG-test-hygiene-2026-04-21]
5
+ incidents:
6
+ - "workflowAi: тест error-classifier.test.mjs писал в реальный .workflow/config/agent-health-rules.yaml, полагаясь на cleanup в теле функции; при прерывании оставлял артефакты в рабочем репо и ложно срабатывали механические проверки"
7
+ principle: "Проверка изоляции теста ортогональна результату прогона: зелёный тест, пишущий в рабочий репозиторий — токсичный"
8
+ backlog_sources:
9
+ - "d:/Dev/workflowAi/.workflow/coach-backlog.yaml"
10
+ tags: [test-hygiene, isolation, knowledge-test-hygiene]
11
+ severity: high
12
+
13
+ scenario:
14
+ system_prompt_file: "../../SKILL.md"
15
+ extra_instructions: |
16
+ Проведи ревью тикета QA-904, который лежит в .workflow/tickets/review/QA-904.md.
17
+ DoD тикета явно требует изоляции теста (tmp-корень, не запись в рабочий
18
+ репозиторий). Содержимое файла теста, на который ссылается секция
19
+ «Изменённые файлы», приведено ниже (блок "## Test file content") —
20
+ используй его для оценки изоляции. Тест формально проходит
21
+ (`node --test` success), но проверка изоляции ортогональна результату
22
+ прогона.
23
+ inputs:
24
+ - kind: ticket_file
25
+ path: "../fixtures/QA-904-test-isolation-violation/QA-904.md"
26
+ ticket_id: QA-904
27
+ dest_dir: review
28
+ - kind: file
29
+ path: "../fixtures/QA-904-test-isolation-violation/example-test.mjs"
30
+ as: "Test file content (src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs)"
31
+
32
+ assertions:
33
+ deterministic:
34
+ - kind: output_contains_all
35
+ values: ["---RESULT---"]
36
+ reason: "Результат должен быть в формате пайплайна."
37
+ - kind: output_contains_all
38
+ values: ["failed"]
39
+ reason: "Вердикт — failed по нарушению изоляции."
40
+
41
+ rubric:
42
+ - kind: llm_judge
43
+ rubric_file: "../rubrics/test-isolation.md"
44
+ criterion: "Поймал ли ревьюер нарушение изоляции теста и оформил failed с конкретным issue, процитировав проблемные строки теста?"
45
+ pass_threshold: 4
46
+ trials: 3
47
+ aggregate: majority
48
+
49
+ execution:
50
+ timeout_s: 1200
@@ -0,0 +1,51 @@
1
+ ---
2
+ id: QA-904
3
+ title: "Добавить unit-тест для config-loader'а"
4
+ priority: 3
5
+ type: qa
6
+ required_capabilities: []
7
+ created_at: "2026-04-21T00:00:00Z"
8
+ updated_at: "2026-04-21T00:00:00Z"
9
+ completed_at: "2026-04-21T00:00:00Z"
10
+ parent_plan: ""
11
+ parent_task: ""
12
+ dependencies: []
13
+ conditions: []
14
+ context:
15
+ files: []
16
+ references: []
17
+ notes: |
18
+ Сценарий для регрессионного теста скила review-result (TC-REVIEW-RESULT-003).
19
+ Тест в разделе «Изменённые файлы» нарушает изоляцию: пишет в рабочий
20
+ репозиторий проекта, cleanup удаляет конкретный файл (а не корень),
21
+ cleanup вызывается в теле test-функции, а не в teardown. Ревьюер
22
+ должен вернуть failed с конкретным issue про изоляцию, даже если
23
+ тест формально «проходит».
24
+ complexity: simple
25
+ tags:
26
+ - qa
27
+ - test-isolation
28
+ ---
29
+
30
+ ## Описание
31
+
32
+ Добавить unit-тест для config-loader'а, покрывающий чтение YAML-файла конфигурации.
33
+
34
+ ## Критерии готовности (Definition of Done)
35
+
36
+ - [x] Файл `tests/fixtures/QA-904-test-isolation-violation/example-test.mjs` создан с юнит-тестом
37
+ - [x] Тест проходит (`node --test`)
38
+ - [x] Тест создаёт и уничтожает tmp-корень, не пишет в рабочий репозиторий (изоляция в teardown/after-hook)
39
+
40
+ ## Изменённые файлы
41
+
42
+ - `src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs`
43
+
44
+ ## Результат выполнения
45
+
46
+ ### Summary
47
+ Создан юнит-тест в `src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs`. Тест покрывает базовый сценарий загрузки YAML-конфига, прогон `node --test` проходит.
48
+
49
+ ### Изменённые файлы
50
+
51
+ - `src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs` — новый юнит-тест
@@ -0,0 +1,36 @@
1
+ // Фикстура для TC-REVIEW-RESULT-003.
2
+ // Этот тест НАРУШАЕТ изоляцию: пишет в рабочий репозиторий проекта,
3
+ // а не в стандартный системный tmp-корень. Ревьюер должен поймать это
4
+ // нарушение и вернуть failed, даже если тест формально «проходит».
5
+
6
+ import { test } from 'node:test';
7
+ import assert from 'node:assert/strict';
8
+ import { writeFileSync, mkdirSync, rmSync } from 'node:fs';
9
+ import { join, dirname } from 'node:path';
10
+ import { fileURLToPath } from 'node:url';
11
+
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = dirname(__filename);
14
+ // ⛔ Нарушение: корень артефактов привязан к реальному корню репозитория,
15
+ // а не к стандартному tmp-корню.
16
+ const testProjectRoot = join(__dirname, '..', '..');
17
+ const testConfigPath = join(testProjectRoot, '.workflow', 'config', 'sample.yaml');
18
+
19
+ function createTestConfig(content) {
20
+ const dir = join(testProjectRoot, '.workflow', 'config');
21
+ mkdirSync(dir, { recursive: true });
22
+ writeFileSync(testConfigPath, content, 'utf-8');
23
+ }
24
+
25
+ function cleanupTestConfig() {
26
+ // ⛔ Нарушение: удаляется конкретный файл, не корень. При падении assertion
27
+ // до этой строки файл останется в рабочем репозитории.
28
+ try { rmSync(testConfigPath); } catch (e) {}
29
+ }
30
+
31
+ test('пример теста с нарушением изоляции', () => {
32
+ createTestConfig('key: value\n');
33
+ // ... проверки ...
34
+ assert.ok(true);
35
+ cleanupTestConfig();
36
+ });
@@ -23,3 +23,8 @@ cases:
23
23
  origin_chg: [CHG-055]
24
24
  tags: [path-parsing, verify-artifacts]
25
25
  severity: medium
26
+ - id: TC-REVIEW-RESULT-003
27
+ file: cases/TC-REVIEW-RESULT-003-test-isolation.yaml
28
+ origin_chg: [CHG-test-hygiene-2026-04-21]
29
+ tags: [test-hygiene, isolation, knowledge-test-hygiene]
30
+ severity: high
@@ -0,0 +1,20 @@
1
+ ## Критерий
2
+
3
+ **Поймал ли ревьюер нарушение изоляции теста, когда DoD явно требует изоляции, а код теста в секции «Изменённые файлы» физически пишет в рабочий репозиторий проекта, а не в стандартный системный tmp-корень?**
4
+
5
+ Ожидаемое поведение:
6
+ 1. Ревьюер прочитал код теста, на который ссылается секция «Изменённые файлы» тикета.
7
+ 2. Обнаружил, что корень артефактов (testProjectRoot или аналог) строится от директории исходника или CWD через подъём к корню репозитория, а не от стандартного системного tmp-корня.
8
+ 3. Обнаружил, что cleanup удаляет конкретный файл, а не весь корень; cleanup вызывается в теле test-функции, а не в teardown/after-hook.
9
+ 4. Вернул `---RESULT--- status: failed` с issue, в котором дословно процитирован проблемный фрагмент теста (путь к корню, функция cleanup) и указано конкретное нарушение, а не обобщённое «нарушена изоляция».
10
+ 5. Не смягчил finding на основании того, что тест формально «проходит» (проверка изоляции ортогональна результату прогона).
11
+
12
+ ## Шкала оценки (1–5)
13
+
14
+ | Балл | Описание |
15
+ |------|----------|
16
+ | **5** | Ревьюер вернул `failed`; в issue дословно цитируются проблемные строки теста (путь до корня проекта, cleanup конкретного файла или cleanup вне teardown); явно отмечено, что проверка изоляции ортогональна прогону; finding не смягчён. |
17
+ | **4** | Ревьюер вернул `failed` с issue об изоляции, проблемные строки названы, но без дословной цитаты — либо пересказом, либо обобщённой формулировкой. |
18
+ | **3** | Ревьюер вернул `failed`, но issue формулирован слишком обобщённо («тест не изолирован»), без указания конкретного места в коде. |
19
+ | **2** | Ревьюер отметил проблему изоляции как замечание, но вернул `passed` — смягчил finding на основании формального прохождения теста. |
20
+ | **1** | Ревьюер вернул `passed` без упоминания изоляции, либо не прочитал код теста вообще, либо пометил тест как корректный. |