workflow-ai 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -0
- package/README.md +37 -377
- package/configs/pipeline.yaml +113 -4
- package/package.json +54 -44
- package/src/lib/operations/tickets.mjs +305 -207
- package/src/lib/utils.mjs +286 -286
- package/src/runner.mjs +316 -35
- package/src/scripts/check-conditions.js +2 -2
- package/src/scripts/check-relevance.js +3 -1
- package/src/scripts/get-next-id.js +144 -41
- package/src/scripts/mark-blocked.js +160 -0
- package/src/scripts/move-ticket.js +305 -83
- package/src/scripts/pick-next-task.js +782 -93
- package/src/skills/__test-cal-001-1777553217513/SKILL.md +2 -0
- package/src/skills/__test-runner-1777553217483/SKILL.md +5 -0
- package/src/skills/coach/SKILL.md +2 -2
- package/src/skills/execute-task/SKILL.md +1 -1
- package/src/skills/manual-testing/SKILL.md +2 -0
- package/src/skills/review-result/SKILL.md +23 -1
- package/src/scripts/tests/timeout-cascade.test.js +0 -28
- package/src/skills/analyze-report/README.md +0 -44
- package/src/skills/analyze-report/algorithms/progress-assessment.md +0 -108
- package/src/skills/analyze-report/knowledge/analysis-frameworks.md +0 -66
- package/src/skills/analyze-report/knowledge/report-structure.md +0 -61
- package/src/skills/analyze-report/scripts/calc-plan-metrics.js +0 -234
- package/src/skills/analyze-report/templates/analysis-report.md +0 -80
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-1.md +0 -5
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-2.md +0 -98
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-3.md +0 -99
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/judge.json +0 -163
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -89
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -88
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -100
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-1.md +0 -77
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-2.md +0 -64
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-3.md +0 -110
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-1.md +0 -74
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-2.md +0 -38
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-3.md +0 -61
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/meta.json +0 -115
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001-evidence-from-log.yaml +0 -60
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-1.md +0 -90
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-2.md +0 -89
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-3.md +0 -5
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/judge.json +0 -163
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-1.md +0 -84
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-2.md +0 -77
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-3.md +0 -89
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-1.md +0 -103
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-2.md +0 -103
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-3.md +0 -103
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-1.md +0 -93
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-2.md +0 -93
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-3.md +0 -86
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/meta.json +0 -115
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002-result-block-format.yaml +0 -44
- package/src/skills/analyze-report/tests/fixtures/REPORT-002-incorrect-attribution.md +0 -27
- package/src/skills/analyze-report/tests/fixtures/pipeline-2026-04-06_qa-001-skip.log +0 -32
- package/src/skills/analyze-report/tests/index.yaml +0 -25
- package/src/skills/analyze-report/tests/rubrics/evidence-from-log.md +0 -22
- package/src/skills/analyze-report/tests/rubrics/result-block-format.md +0 -22
- package/src/skills/analyze-report/workflows/progress.md +0 -158
- package/src/skills/analyze-report/workflows/retrospective.md +0 -143
- package/src/skills/coach/README.md +0 -43
- package/src/skills/coach/SKILL.md.legacy +0 -157
- package/src/skills/coach/algorithms/gap-analysis.md +0 -69
- package/src/skills/coach/algorithms/improvement-prioritization.md +0 -62
- package/src/skills/coach/algorithms/skill-scoring.md +0 -80
- package/src/skills/coach/knowledge/audit-applied-changes-clean.txt +0 -11
- package/src/skills/coach/knowledge/backlog-management.md +0 -67
- package/src/skills/coach/knowledge/backlog-management.md.legacy +0 -90
- package/src/skills/coach/knowledge/common-antipatterns.md +0 -76
- package/src/skills/coach/knowledge/prompt-engineering.md +0 -45
- package/src/skills/coach/knowledge/shared-knowledge-guide.md +0 -44
- package/src/skills/coach/knowledge/skill-anatomy.md +0 -49
- package/src/skills/coach/knowledge/test-authorship.md +0 -141
- package/src/skills/coach/templates/audit-report.md +0 -39
- package/src/skills/coach/templates/coach-backlog-init.yaml +0 -14
- package/src/skills/coach/templates/coach-backlog-init.yaml.legacy +0 -10
- package/src/skills/coach/templates/improvement-plan.md +0 -42
- package/src/skills/coach/templates/new-skill.md +0 -95
- package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-1.md +0 -58
- package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-2.md +0 -65
- package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-3.md +0 -58
- package/src/skills/coach/tests/cases/TC-COACH-001/current/judge.json +0 -151
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-1.md +0 -46
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-2.md +0 -0
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-3.md +0 -75
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-1.md +0 -81
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-2.md +0 -101
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-3.md +0 -91
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-1.md +0 -48
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-2.md +0 -30
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-3.md +0 -55
- package/src/skills/coach/tests/cases/TC-COACH-001/current/meta.json +0 -94
- package/src/skills/coach/tests/cases/TC-COACH-001-evidence-based-temporal-diagram.yaml +0 -53
- package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-1.md +0 -46
- package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-2.md +0 -50
- package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-3.md +0 -48
- package/src/skills/coach/tests/cases/TC-COACH-002/current/judge.json +0 -151
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-2.md +0 -37
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-3.md +0 -30
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-1.md +0 -23
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-2.md +0 -29
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-3.md +0 -35
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-1.md +0 -13
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-2.md +0 -19
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-3.md +0 -33
- package/src/skills/coach/tests/cases/TC-COACH-002/current/meta.json +0 -94
- package/src/skills/coach/tests/cases/TC-COACH-002-root-cause-first.yaml +0 -57
- package/src/skills/coach/tests/fixtures/pipeline-2026-04-06_id-collision.log +0 -77
- package/src/skills/coach/tests/index.yaml +0 -29
- package/src/skills/coach/tests/rubrics/calibration/evidence-based-bad.md +0 -13
- package/src/skills/coach/tests/rubrics/calibration/evidence-based-good.md +0 -29
- package/src/skills/coach/tests/rubrics/evidence-based.md +0 -26
- package/src/skills/coach/tests/rubrics/root-cause-first.md +0 -21
- package/src/skills/coach/workflows/analyze.md +0 -79
- package/src/skills/coach/workflows/analyze.md.legacy +0 -64
- package/src/skills/coach/workflows/audit.md +0 -74
- package/src/skills/coach/workflows/audit.md.legacy +0 -59
- package/src/skills/coach/workflows/create.md +0 -80
- package/src/skills/coach/workflows/create.md.legacy +0 -67
- package/src/skills/coach/workflows/improve.md +0 -71
- package/src/skills/coach/workflows/improve.md.legacy +0 -60
- package/src/skills/coach/workflows/research.md +0 -55
- package/src/skills/coach/workflows/review.md +0 -52
- package/src/skills/coach/workflows/review.md.legacy +0 -48
- package/src/skills/coach/workflows/test.md +0 -97
- package/src/skills/create-plan/README.md +0 -39
- package/src/skills/create-plan/algorithms/risk-assessment.md +0 -73
- package/src/skills/create-plan/knowledge/plan-completeness.md +0 -67
- package/src/skills/create-plan/knowledge/plan-lifecycle.md +0 -33
- package/src/skills/create-plan/knowledge/task-verification-pairs.md +0 -151
- package/src/skills/create-plan/knowledge/test-hygiene.md +0 -47
- package/src/skills/create-plan/scripts/validate-completeness.js +0 -182
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-1.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-2.md +0 -39
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-3.md +0 -35
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/judge.json +0 -167
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -10
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-1.md +0 -26
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-2.md +0 -86
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-3.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-1.md +0 -11
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-2.md +0 -15
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-3.md +0 -14
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/meta.json +0 -119
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001-validate-completeness.yaml +0 -41
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-1.md +0 -25
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-2.md +0 -30
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-3.md +0 -37
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/judge.json +0 -164
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -3
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -11
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -13
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-1.md +0 -44
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-2.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-3.md +0 -49
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-1.md +0 -6
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-2.md +0 -11
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-3.md +0 -16
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/meta.json +0 -116
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002-task-granularity.yaml +0 -39
- package/src/skills/create-plan/tests/index.yaml +0 -25
- package/src/skills/create-plan/tests/rubrics/task-granularity.md +0 -21
- package/src/skills/create-plan/tests/rubrics/validate-completeness.md +0 -21
- package/src/skills/create-plan/workflows/create.md +0 -136
- package/src/skills/create-report/README.md +0 -40
- package/src/skills/create-report/algorithms/metric-calculation.md +0 -93
- package/src/skills/create-report/knowledge/report-metrics.md +0 -82
- package/src/skills/create-report/scripts/calc-metrics.js +0 -383
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-1.md +0 -25
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-2.md +0 -26
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-3.md +0 -28
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/judge.json +0 -163
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -4
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -3
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -6
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-1.md +0 -8
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-2.md +0 -12
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-3.md +0 -7
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-1.md +0 -12
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-2.md +0 -22
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-3.md +0 -13
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/meta.json +0 -115
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001-root-cause-attribution.yaml +0 -57
- package/src/skills/create-report/tests/index.yaml +0 -20
- package/src/skills/create-report/tests/rubrics/root-cause-attribution.md +0 -21
- package/src/skills/create-report/workflows/standard.md +0 -175
- package/src/skills/decompose-gaps/README.md +0 -39
- package/src/skills/decompose-gaps/algorithms/scope-check.md +0 -110
- package/src/skills/decompose-gaps/knowledge/scope-validation.md +0 -65
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-1.md +0 -41
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-2.md +0 -41
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-3.md +0 -56
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/judge.json +0 -164
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-1.md +0 -25
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-2.md +0 -17
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-3.md +0 -22
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-1.md +0 -25
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-2.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-3.md +0 -29
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-1.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-2.md +0 -35
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-3.md +0 -18
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/meta.json +0 -116
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001-scope-exclusion.yaml +0 -46
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-1.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-2.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-3.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/judge.json +0 -163
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-2.md +0 -15
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-3.md +0 -7
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-1.md +0 -21
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-2.md +0 -38
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-3.md +0 -16
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-1.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-2.md +0 -10
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-3.md +0 -9
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/meta.json +0 -115
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002-glob-before-write.yaml +0 -36
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-1.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-2.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-3.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/judge.json +0 -165
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-2.md +0 -26
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-3.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-1.md +0 -39
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-2.md +0 -37
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-3.md +0 -45
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-1.md +0 -26
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-2.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-3.md +0 -7
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/meta.json +0 -117
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003-parent-plan-mandatory.yaml +0 -41
- package/src/skills/decompose-gaps/tests/index.yaml +0 -30
- package/src/skills/decompose-gaps/tests/rubrics/glob-before-write.md +0 -21
- package/src/skills/decompose-gaps/tests/rubrics/parent-plan-mandatory.md +0 -22
- package/src/skills/decompose-gaps/tests/rubrics/scope-exclusion.md +0 -21
- package/src/skills/decompose-gaps/workflows/decompose.md +0 -123
- package/src/skills/decompose-plan/README.md +0 -43
- package/src/skills/decompose-plan/algorithms/deduplication.md +0 -101
- package/src/skills/decompose-plan/knowledge/atomicity-checklist.md +0 -139
- package/src/skills/decompose-plan/knowledge/capabilities.md +0 -68
- package/src/skills/decompose-plan/knowledge/human-task-rules.md +0 -82
- package/src/skills/decompose-plan/knowledge/scope-guard-checklist.md +0 -73
- package/src/skills/decompose-plan/scripts/check-atomicity-limit.js +0 -47
- package/src/skills/decompose-plan/scripts/check-duplicates.js +0 -323
- package/src/skills/decompose-plan/scripts/verify-atomicity.js +0 -408
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-1.md +0 -30
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-2.md +0 -36
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-3.md +0 -37
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -20
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -17
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -28
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-1.md +0 -114
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-2.md +0 -137
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-3.md +0 -188
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-2.md +0 -32
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-3.md +0 -110
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001-atomicity-no-1to1.yaml +0 -56
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-1.md +0 -47
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-2.md +0 -54
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-3.md +0 -43
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -15
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -5
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -12
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-1.md +0 -34
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-2.md +0 -30
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-3.md +0 -35
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-2.md +0 -31
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002-get-next-id-mandatory.yaml +0 -44
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-1.md +0 -21
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-2.md +0 -38
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-3.md +0 -30
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-1.md +0 -31
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-2.md +0 -35
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-3.md +0 -48
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-1.md +0 -167
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-2.md +0 -62
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-3.md +0 -174
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-2.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003-verbatim-dod-transfer.yaml +0 -42
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-1.md +0 -55
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-2.md +0 -49
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-3.md +0 -49
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-1.md +0 -104
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-2.md +0 -45
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-3.md +0 -58
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-1.md +0 -193
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-2.md +0 -202
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-3.md +0 -155
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-1.md +0 -52
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-2.md +0 -17
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004-executor-atomicity.yaml +0 -64
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-1.md +0 -59
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-2.md +0 -204
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-3.md +0 -213
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-2.md +0 -57
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-3.md +0 -54
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-1.md +0 -147
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-2.md +0 -165
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-3.md +0 -133
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-1.md +0 -81
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-2.md +0 -108
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-3.md +0 -3
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/meta.json +0 -114
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005-capabilities-registry.yaml +0 -78
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-1.md +0 -225
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-2.md +0 -66
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-3.md +0 -36
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-1.md +0 -42
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-2.md +0 -67
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-3.md +0 -40
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-1.md +0 -122
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-2.md +0 -131
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-3.md +0 -138
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-1.md +0 -41
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-2.md +0 -88
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006-dod-threshold.yaml +0 -72
- package/src/skills/decompose-plan/tests/index.yaml +0 -45
- package/src/skills/decompose-plan/tests/rubrics/atomicity-no-1to1.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/capabilities-registry.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/dod-threshold.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/executor-atomicity.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/get-next-id-mandatory.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/verbatim-dod-transfer.md +0 -21
- package/src/skills/decompose-plan/workflows/decompose.md +0 -305
- package/src/skills/deep-research/README.md +0 -36
- package/src/skills/deep-research/algorithms/source-scoring.md +0 -63
- package/src/skills/deep-research/algorithms/synthesis.md +0 -67
- package/src/skills/deep-research/knowledge/data-validation.md +0 -44
- package/src/skills/deep-research/knowledge/perplexity-config.md +0 -30
- package/src/skills/deep-research/knowledge/research-methodology.md +0 -54
- package/src/skills/deep-research/knowledge/source-evaluation.md +0 -33
- package/src/skills/deep-research/scripts/perplexity-research.js +0 -315
- package/src/skills/deep-research/templates/brief-summary.md +0 -25
- package/src/skills/deep-research/templates/research-report.md +0 -76
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-1.md +0 -48
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-2.md +0 -88
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-3.md +0 -56
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/judge.json +0 -163
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-1.md +0 -58
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-2.md +0 -249
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-3.md +0 -44
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-1.md +0 -96
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-2.md +0 -56
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-3.md +0 -94
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-1.md +0 -11
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-2.md +0 -1
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-3.md +0 -1
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/meta.json +0 -115
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001-self-check-url.yaml +0 -58
- package/src/skills/deep-research/tests/index.yaml +0 -20
- package/src/skills/deep-research/tests/rubrics/self-check-url.md +0 -34
- package/src/skills/deep-research/workflows/base-checklist.md +0 -19
- package/src/skills/deep-research/workflows/benchmark.md +0 -38
- package/src/skills/deep-research/workflows/competitor.md +0 -44
- package/src/skills/deep-research/workflows/custom.md +0 -32
- package/src/skills/deep-research/workflows/market.md +0 -44
- package/src/skills/deep-research/workflows/technology.md +0 -40
- package/src/skills/deep-research/workflows/trend.md +0 -40
- package/src/skills/execute-task/README.md +0 -44
- package/src/skills/execute-task/algorithms/execution-strategy.md +0 -136
- package/src/skills/execute-task/knowledge/context-checkpoints.md +0 -75
- package/src/skills/execute-task/knowledge/ticket-structure.md +0 -70
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-1.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-1.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-3.md +0 -11
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/meta.json +0 -88
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001-no-ticket-creation.yaml +0 -48
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-2.md +0 -6
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-1.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-3.md +0 -8
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-1.md +0 -9
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-2.md +0 -26
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/meta.json +0 -89
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002-no-duplicate-dod.yaml +0 -44
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/judge.json +0 -46
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/meta.json +0 -37
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003-verification-proportionality.yaml +0 -46
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-1.md +0 -18
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-2.md +0 -16
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-3.md +0 -14
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-3.md +0 -1
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-1.md +0 -8
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/meta.json +0 -89
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004-no-foreign-ticket-edit.yaml +0 -50
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-1.md +0 -15
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-1.md +0 -11
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-2.md +0 -11
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/meta.json +0 -88
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005-ticket-fields-updated.yaml +0 -39
- package/src/skills/execute-task/tests/fixtures/IMPL-902-create-file.md +0 -41
- package/src/skills/execute-task/tests/fixtures/IMPL-904-current-task.md +0 -40
- package/src/skills/execute-task/tests/fixtures/IMPL-906-fill-ticket.md +0 -42
- package/src/skills/execute-task/tests/fixtures/QA-901-button-click.md +0 -41
- package/src/skills/execute-task/tests/fixtures/QA-903-visual-figma.md +0 -40
- package/src/skills/execute-task/tests/fixtures/TASK-905-done-with-typo.md +0 -36
- package/src/skills/execute-task/tests/index.yaml +0 -39
- package/src/skills/execute-task/tests/rubrics/no-duplicate-dod.md +0 -22
- package/src/skills/execute-task/tests/rubrics/no-foreign-ticket-edit.md +0 -20
- package/src/skills/execute-task/tests/rubrics/no-ticket-creation.md +0 -21
- package/src/skills/execute-task/tests/rubrics/ticket-fields-updated.md +0 -23
- package/src/skills/execute-task/tests/rubrics/verification-proportionality.md +0 -22
- package/src/skills/execute-task/workflows/execute.md +0 -104
- package/src/skills/manual-testing/README.md +0 -63
- package/src/skills/manual-testing/algorithms/blocked-tool-strategy.md +0 -74
- package/src/skills/manual-testing/algorithms/bug-severity.md +0 -73
- package/src/skills/manual-testing/algorithms/mcp-budget.md +0 -97
- package/src/skills/manual-testing/algorithms/test-prioritization.md +0 -69
- package/src/skills/manual-testing/knowledge/browser-extension-testing.md +0 -102
- package/src/skills/manual-testing/knowledge/browser-tools.md +0 -114
- package/src/skills/manual-testing/knowledge/desktop-tools-advanced.md +0 -92
- package/src/skills/manual-testing/knowledge/desktop-tools-core.md +0 -76
- package/src/skills/manual-testing/knowledge/sandbox-advanced.md +0 -83
- package/src/skills/manual-testing/knowledge/sandbox-core.md +0 -67
- package/src/skills/manual-testing/knowledge/stateful-edge-cases.md +0 -69
- package/src/skills/manual-testing/knowledge/test-case-design.md +0 -107
- package/src/skills/manual-testing/knowledge/testing-types.md +0 -45
- package/src/skills/manual-testing/templates/bug-report.md +0 -52
- package/src/skills/manual-testing/templates/test-case.md +0 -34
- package/src/skills/manual-testing/templates/test-plan.md +0 -97
- package/src/skills/manual-testing/templates/test-session-report.md +0 -56
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-1.md +0 -34
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-2.md +0 -32
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-3.md +0 -30
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/judge.json +0 -163
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-2.md +0 -7
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-3.md +0 -0
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-1.md +0 -4
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-2.md +0 -15
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-3.md +0 -8
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-1.md +0 -5
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-2.md +0 -7
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-3.md +0 -7
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/meta.json +0 -114
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001-sandbox-mandatory.yaml +0 -38
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-1.md +0 -44
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-2.md +0 -32
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-3.md +0 -47
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/judge.json +0 -163
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-1.md +0 -19
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-2.md +0 -15
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-3.md +0 -24
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-1.md +0 -19
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-2.md +0 -13
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-3.md +0 -18
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-1.md +0 -21
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-2.md +0 -15
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-3.md +0 -14
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/meta.json +0 -114
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002-visual-tc-screenshot.yaml +0 -37
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-1.md +0 -76
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-2.md +0 -71
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-3.md +0 -85
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/judge.json +0 -46
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/meta.json +0 -36
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml +0 -65
- package/src/skills/manual-testing/tests/index.yaml +0 -30
- package/src/skills/manual-testing/tests/last-run-tc001-sonnet.log +0 -140
- package/src/skills/manual-testing/tests/last-run-tc002.log +0 -1
- package/src/skills/manual-testing/tests/last-run.log +0 -1469
- package/src/skills/manual-testing/tests/rubrics/qa-non-ui-assertion.md +0 -31
- package/src/skills/manual-testing/tests/rubrics/sandbox-mandatory.md +0 -20
- package/src/skills/manual-testing/tests/rubrics/visual-tc-screenshot.md +0 -21
- package/src/skills/manual-testing/workflows/acceptance.md +0 -80
- package/src/skills/manual-testing/workflows/exploratory.md +0 -84
- package/src/skills/manual-testing/workflows/regression.md +0 -76
- package/src/skills/manual-testing/workflows/smoke.md +0 -109
- package/src/skills/manual-testing/workflows/test-plan.md +0 -75
- package/src/skills/review-result/README.md +0 -59
- package/src/skills/review-result/algorithms/verification.md +0 -112
- package/src/skills/review-result/knowledge/baseline-snapshot-validation.md +0 -67
- package/src/skills/review-result/knowledge/dod-patterns.md +0 -116
- package/src/skills/review-result/knowledge/test-hygiene.md +0 -44
- package/src/skills/review-result/scripts/verify-artifacts.js +0 -497
- package/src/skills/review-result/templates/verdict.md +0 -153
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-1.md +0 -22
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-2.md +0 -7
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-3.md +0 -21
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-1.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-2.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-3.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/judge.json +0 -164
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-2.md +0 -7
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-3.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-1.md +0 -49
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-2.md +0 -28
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-3.md +0 -37
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-1.md +0 -22
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-2.md +0 -13
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-3.md +0 -21
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/meta.json +0 -116
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001-visual-tc-trigger.yaml +0 -51
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-1.md +0 -23
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-2.md +0 -22
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-3.md +0 -28
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-1.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-2.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-3.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/judge.json +0 -163
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-1.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-2.md +0 -0
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-3.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-1.md +0 -39
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-2.md +0 -25
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-3.md +0 -32
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-1.md +0 -34
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-2.md +0 -8
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-3.md +0 -23
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/meta.json +0 -115
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002-path-line-suffix.yaml +0 -39
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-1.md +0 -40
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-2.md +0 -15
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-3.md +0 -7
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/judge.json +0 -163
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-2.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-3.md +0 -11
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-1.md +0 -16
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-2.md +0 -18
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-3.md +0 -17
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-1.md +0 -17
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-2.md +0 -31
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-3.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/meta.json +0 -115
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003-test-isolation.yaml +0 -50
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-1.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-2.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-3.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/judge.json +0 -46
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/meta.json +0 -37
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004-baseline-snapshot.yaml +0 -50
- package/src/skills/review-result/tests/fixtures/IMPL-902-path-with-line.md +0 -43
- package/src/skills/review-result/tests/fixtures/QA-901-visual-button.md +0 -46
- package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/QA-904.md +0 -51
- package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs +0 -36
- package/src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/QA-905.md +0 -62
- package/src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/baseline.test.mjs +0 -124
- package/src/skills/review-result/tests/index.yaml +0 -35
- package/src/skills/review-result/tests/rubrics/baseline-snapshot.md +0 -20
- package/src/skills/review-result/tests/rubrics/path-line-suffix.md +0 -19
- package/src/skills/review-result/tests/rubrics/test-isolation.md +0 -20
- package/src/skills/review-result/tests/rubrics/visual-tc-trigger.md +0 -19
- package/src/skills/review-result/workflows/review.md +0 -209
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"per_model": {
|
|
3
|
-
"claude-sonnet": {
|
|
4
|
-
"pass_count": 3,
|
|
5
|
-
"total": 3,
|
|
6
|
-
"trials": [
|
|
7
|
-
{
|
|
8
|
-
"trial": 1,
|
|
9
|
-
"score": 5,
|
|
10
|
-
"passed": true
|
|
11
|
-
},
|
|
12
|
-
{
|
|
13
|
-
"trial": 2,
|
|
14
|
-
"score": 5,
|
|
15
|
-
"passed": true
|
|
16
|
-
},
|
|
17
|
-
{
|
|
18
|
-
"trial": 3,
|
|
19
|
-
"score": 4,
|
|
20
|
-
"passed": true
|
|
21
|
-
}
|
|
22
|
-
]
|
|
23
|
-
},
|
|
24
|
-
"kilo-glm": {
|
|
25
|
-
"pass_count": 3,
|
|
26
|
-
"total": 3,
|
|
27
|
-
"trials": [
|
|
28
|
-
{
|
|
29
|
-
"trial": 1,
|
|
30
|
-
"score": 4,
|
|
31
|
-
"passed": true
|
|
32
|
-
},
|
|
33
|
-
{
|
|
34
|
-
"trial": 2,
|
|
35
|
-
"score": 4,
|
|
36
|
-
"passed": true
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
"trial": 3,
|
|
40
|
-
"score": 4,
|
|
41
|
-
"passed": true
|
|
42
|
-
}
|
|
43
|
-
]
|
|
44
|
-
},
|
|
45
|
-
"kilo-minimax": {
|
|
46
|
-
"pass_count": 3,
|
|
47
|
-
"total": 3,
|
|
48
|
-
"trials": [
|
|
49
|
-
{
|
|
50
|
-
"trial": 1,
|
|
51
|
-
"score": 4,
|
|
52
|
-
"passed": true
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
"trial": 2,
|
|
56
|
-
"score": 4,
|
|
57
|
-
"passed": true
|
|
58
|
-
},
|
|
59
|
-
{
|
|
60
|
-
"trial": 3,
|
|
61
|
-
"score": 4,
|
|
62
|
-
"passed": true
|
|
63
|
-
}
|
|
64
|
-
]
|
|
65
|
-
},
|
|
66
|
-
"kilo-deepseek": {
|
|
67
|
-
"pass_count": 2,
|
|
68
|
-
"total": 3,
|
|
69
|
-
"trials": [
|
|
70
|
-
{
|
|
71
|
-
"trial": 1,
|
|
72
|
-
"score": 3,
|
|
73
|
-
"passed": false
|
|
74
|
-
},
|
|
75
|
-
{
|
|
76
|
-
"trial": 2,
|
|
77
|
-
"score": 4,
|
|
78
|
-
"passed": true
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
"trial": 3,
|
|
82
|
-
"score": 4,
|
|
83
|
-
"passed": true
|
|
84
|
-
}
|
|
85
|
-
]
|
|
86
|
-
}
|
|
87
|
-
},
|
|
88
|
-
"rubric_scores": [
|
|
89
|
-
{
|
|
90
|
-
"agentId": "kilo-deepseek",
|
|
91
|
-
"trial": 1,
|
|
92
|
-
"score": 3,
|
|
93
|
-
"errored": false
|
|
94
|
-
},
|
|
95
|
-
{
|
|
96
|
-
"agentId": "kilo-deepseek",
|
|
97
|
-
"trial": 2,
|
|
98
|
-
"score": 4,
|
|
99
|
-
"errored": false
|
|
100
|
-
},
|
|
101
|
-
{
|
|
102
|
-
"agentId": "kilo-deepseek",
|
|
103
|
-
"trial": 3,
|
|
104
|
-
"score": 4,
|
|
105
|
-
"errored": false
|
|
106
|
-
},
|
|
107
|
-
{
|
|
108
|
-
"agentId": "kilo-glm",
|
|
109
|
-
"trial": 1,
|
|
110
|
-
"score": 4,
|
|
111
|
-
"errored": false
|
|
112
|
-
},
|
|
113
|
-
{
|
|
114
|
-
"agentId": "kilo-glm",
|
|
115
|
-
"trial": 2,
|
|
116
|
-
"score": 4,
|
|
117
|
-
"errored": false
|
|
118
|
-
},
|
|
119
|
-
{
|
|
120
|
-
"agentId": "kilo-glm",
|
|
121
|
-
"trial": 3,
|
|
122
|
-
"score": 4,
|
|
123
|
-
"errored": false
|
|
124
|
-
},
|
|
125
|
-
{
|
|
126
|
-
"agentId": "kilo-minimax",
|
|
127
|
-
"trial": 1,
|
|
128
|
-
"score": 4,
|
|
129
|
-
"errored": false
|
|
130
|
-
},
|
|
131
|
-
{
|
|
132
|
-
"agentId": "kilo-minimax",
|
|
133
|
-
"trial": 2,
|
|
134
|
-
"score": 4,
|
|
135
|
-
"errored": false
|
|
136
|
-
},
|
|
137
|
-
{
|
|
138
|
-
"agentId": "kilo-minimax",
|
|
139
|
-
"trial": 3,
|
|
140
|
-
"score": 4,
|
|
141
|
-
"errored": false
|
|
142
|
-
},
|
|
143
|
-
{
|
|
144
|
-
"agentId": "claude-sonnet",
|
|
145
|
-
"trial": 1,
|
|
146
|
-
"score": 5,
|
|
147
|
-
"errored": false
|
|
148
|
-
},
|
|
149
|
-
{
|
|
150
|
-
"agentId": "claude-sonnet",
|
|
151
|
-
"trial": 2,
|
|
152
|
-
"score": 5,
|
|
153
|
-
"errored": false
|
|
154
|
-
},
|
|
155
|
-
{
|
|
156
|
-
"agentId": "claude-sonnet",
|
|
157
|
-
"trial": 3,
|
|
158
|
-
"score": 4,
|
|
159
|
-
"errored": false
|
|
160
|
-
}
|
|
161
|
-
],
|
|
162
|
-
"timestamp": "2026-04-25T07:10:27.566Z"
|
|
163
|
-
}
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-2.md
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
---RESULT---
|
|
2
|
-
status: failed
|
|
3
|
-
issues:
|
|
4
|
-
- "Пункт DoD 3 не выполнен: ожидалось создание tmp-корня и изоляция в teardown/after-hook, получено: корень артефактов привязан к реальному корню репозитория (строка `const testProjectRoot = join(__dirname, '..', '..');`), cleanup удаляет конкретный файл, cleanup вызывается в теле test-функции"
|
|
5
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-3.md
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
Проверка тикета QA-904 выявила следующие проблемы:
|
|
2
|
-
|
|
3
|
-
1. **Несоответствие путей** — в DoD указан файл `tests/fixtures/QA‑904‑test‑isolation‑violation/example‑test.mjs`, но фактический путь `.workflow/src/skills/review‑result/tests/fixtures/QA‑904‑test‑isolation‑violation/example‑test.mjs`.
|
|
4
|
-
2. **Нарушение изоляции теста** — тест пишет артефакты в рабочий репозиторий (строка `const testProjectRoot = join(__dirname, '..', '..');`), cleanup удаляет только конкретный файл, а не весь tmp‑корень, и находится в теле теста, а не в teardown/after‑hook.
|
|
5
|
-
|
|
6
|
-
---RESULT---
|
|
7
|
-
status: failed
|
|
8
|
-
issues:
|
|
9
|
-
- "Пункт DoD #1 не выполнен: ожидался файл tests/fixtures/QA-904-test-isolation-violation/example-test.mjs, фактический путь .workflow/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs"
|
|
10
|
-
- "Пункт DoD #3 не выполнен: тест нарушает изоляцию — корень артефактов привязан к рабочему репозиторию (строка 10), cleanup удаляет конкретный файл, а не tmp-корень, cleanup в теле теста, а не в teardown/after-hook"
|
|
11
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-1.md
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
Вердикт: **failed**.
|
|
2
|
-
|
|
3
|
-
Проверка DoD-критериев:
|
|
4
|
-
|
|
5
|
-
1. **Файл создан** — подтверждено.
|
|
6
|
-
2. **Тест проходит** — принято по условию.
|
|
7
|
-
3. **Изоляция** — **нарушена** по трём причинам:
|
|
8
|
-
- `testProjectRoot` указывает на корень репозитория (`join(__dirname, '..', '..')`), а не на системный tmp-каталог.
|
|
9
|
-
- `writeFileSync(testConfigPath, ...)` пишет в `.workflow/config/sample.yaml` внутри рабочего репозитория.
|
|
10
|
-
- `cleanupTestConfig()` вызывается в теле test-функции, а не в `after()`/`teardown`; при падении assertion файл остаётся в репозитории.
|
|
11
|
-
|
|
12
|
-
---RESULT---
|
|
13
|
-
status: failed
|
|
14
|
-
issues:
|
|
15
|
-
- "Пункт DoD 3 не выполнен: тест пишет в рабочий репозиторий (testProjectRoot = join(__dirname, '..', '..') → корень репозитория), а не в tmp-корень; cleanup вызывается в теле test-функции, а не в teardown/after-hook; при падении assertion артефакт остаётся в репозитории"
|
|
16
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-2.md
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
Тикет прочитан. Три критерия DoD. Критерий 3 (изоляция) нарушен — анализ ниже.
|
|
2
|
-
|
|
3
|
-
**DoD-проверка:**
|
|
4
|
-
|
|
5
|
-
1. **Файл создан** — подтверждено, файл существует.
|
|
6
|
-
2. **Тест проходит** — формально `node --test` success, принимается.
|
|
7
|
-
3. **Изоляция** — НАРУШЕНА:
|
|
8
|
-
- `testProjectRoot = join(__dirname, '..', '..')` — указывает на корень репозитория, а не на системный `tmp`.
|
|
9
|
-
- `createTestConfig` пишет в `.workflow/config/sample.yaml` внутри рабочего репозитория.
|
|
10
|
-
- `cleanupTestConfig` удаляет конкретный файл, не tmp-корень целиком.
|
|
11
|
-
- Cleanup вызывается в теле test-функции, а не в `teardown`/`after`-хуке — при падении assertion до строки cleanup файл остаётся в репозитории.
|
|
12
|
-
|
|
13
|
-
Вердикт: **failed**.
|
|
14
|
-
---RESULT---
|
|
15
|
-
status: failed
|
|
16
|
-
issues:
|
|
17
|
-
- "Пункт DoD 3 не выполнен: ожидалось что тест создаёт и уничтожает tmp-корень, не пишет в рабочий репозиторий (изоляция в teardown/after-hook); получено: testProjectRoot привязан к реальному корню репозитория (join(__dirname, '..', '..')), createTestConfig пишет в .workflow/config/sample.yaml рабочего репозитория, cleanupTestConfig удаляет конкретный файл а не tmp-корень, cleanup вызывается в теле test-функции а не в teardown — файл: src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs:12-22"
|
|
18
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-3.md
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
Скрипт недоступен — перехожу к ручной проверке (fallback по workflow).
|
|
2
|
-
Проверка завершена. Файл существует, но нарушает изоляцию. Формирую вердикт.
|
|
3
|
-
Проверка тикета QA-904 завершена. DoD критерий #3 (изоляция теста) не выполнен — три нарушения:
|
|
4
|
-
|
|
5
|
-
1. **Корень артефактов — рабочий репозиторий** (`example-test.mjs:16`): `join(__dirname, '..', '..')` указывает на корень проекта, а не на системный tmp.
|
|
6
|
-
2. **Cleanup удаляет конкретный файл** (`:28`): `rmSync(testConfigPath)`, не весь tmp-корень — при прерывании артефакт остаётся.
|
|
7
|
-
3. **Cleanup в теле test-функции** (`:35`), а не в teardown/after-hook — падение assertion до cleanup = утечка state.
|
|
8
|
-
|
|
9
|
-
```
|
|
10
|
-
---RESULT---
|
|
11
|
-
status: failed
|
|
12
|
-
issues:
|
|
13
|
-
- "DoD #3 не выполнен: тест пишет в рабочий репозиторий — example-test.mjs:16-17, testProjectRoot и testConfigPath указывают на .workflow/config/sample.yaml внутри корня репозитория, а не на tmp-корень через стандартную функцию ОС"
|
|
14
|
-
- "DoD #3 не выполнен: cleanup удаляет конкретный файл (example-test.mjs:28 — rmSync(testConfigPath)), а не весь tmp-корень рекурсивно — при прерывании до вызова cleanup файл остаётся в рабочем репозитории"
|
|
15
|
-
- "DoD #3 не выполнен: cleanup вызывается в теле test-функции (example-test.mjs:35), а не в teardown/after-hook — падение assertion до этой строки оставляет артефакт в репозитории"
|
|
16
|
-
---RESULT---
|
|
17
|
-
```
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-1.md
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
Проверяю DoD пункт 3 (изоляция). Из предоставленного содержимого теста:
|
|
2
|
-
|
|
3
|
-
**Проблемы изоляции:**
|
|
4
|
-
1. **Корень артефактов** — `testProjectRoot = join(__dirname, '..', '..')` привязан к реальному репозиторию, а не к tmp-корню
|
|
5
|
-
2. **Запись в рабочий репозиторий** — пишет в `.workflow/config/sample.yaml`
|
|
6
|
-
3. **Cleanup в теле теста** — `cleanupTestConfig()` вызывается в теле test-функции, а не в after-hooks/teardown
|
|
7
|
-
4. **Удаляется конкретный файл** — `rmSync(testConfigPath)`, а не рекурсивное удаление tmp-корня
|
|
8
|
-
|
|
9
|
-
DoD требует: "создаёт и уничтожает tmp-корень, не пишет в рабочий репозиторий (изоляция в teardown/after-hook)".
|
|
10
|
-
|
|
11
|
-
---
|
|
12
|
-
|
|
13
|
-
---RESULT---
|
|
14
|
-
status: failed
|
|
15
|
-
issues:
|
|
16
|
-
- "Пункт DoD #3 не выполнен: тест пишет в рабочий репозиторий (.workflow/config/sample.yaml), а не в tmp-корень; cleanup выполняется в теле теста, а не в teardown/after-hook"
|
|
17
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-2.md
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
Проверяю DoD тикета QA-904:
|
|
2
|
-
|
|
3
|
-
**Пункт 1** — Файл создан: ✅ Существует
|
|
4
|
-
|
|
5
|
-
**Пункт 2** — Тест проходит: ✅ Формально `node --test` success
|
|
6
|
-
|
|
7
|
-
**Пункт 3** — Изоляция (tmp-корень, не рабочий репозиторий): ❌ **НАРУШЕН**
|
|
8
|
-
|
|
9
|
-
Анализ теста из тикета:
|
|
10
|
-
|
|
11
|
-
1. `testProjectRoot = join(__dirname, '..', '..')` — вычисляется от расположения теста, ведёт к реальному корню репозитория
|
|
12
|
-
2. `writeFileSync(testConfigPath, ...)` — пишет в `.workflow/config/sample.yaml` рабочего репозитория
|
|
13
|
-
3. `cleanupTestConfig()` — вызывается **внутри** test-функции, не в afterEach/teardown. При падении assertion до этой строки файл останется в рабочем репозитории
|
|
14
|
-
|
|
15
|
-
Это прямое нарушение изоляции: тест пишет в `.workflow/config/`, а не в tmp-корень (напр. `os.tmpdir()`).
|
|
16
|
-
|
|
17
|
-
Секции "## Ревью" в тикете нет — это первое ревью.
|
|
18
|
-
|
|
19
|
-
---
|
|
20
|
-
|
|
21
|
-
## Вердикт
|
|
22
|
-
|
|
23
|
-
**DoD пункт 3 не выполнен:** тест записывает артефакты в рабочий репозиторий проекта, а не в tmp-корень. Cleanup находится внутри тела функции, а не в afterEach/teardown, что оставляет файл при падении.
|
|
24
|
-
|
|
25
|
-
---
|
|
26
|
-
|
|
27
|
-
---RESULT---
|
|
28
|
-
status: failed
|
|
29
|
-
issues:
|
|
30
|
-
- "DoD пункт 3 нарушен: тест пишет в рабочий репозиторий (`.workflow/config/sample.yaml`), а не в tmp-корень. cleanup вызывается в теле test-функции, а не в teardown/afterEach — при падении assertion файл останется в рабочем репозитории. Ожидалось: `os.tmpdir()` + afterEach/teardown."
|
|
31
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-3.md
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
---RESULT---
|
|
2
|
-
status: failed
|
|
3
|
-
issues:
|
|
4
|
-
- "Пункт DoD #3 не выполнен: ожидался tmp-корень с изоляцией в teardown/after-hook, но тест пишет в `.workflow/config/` рабочего репозитория (`testProjectRoot = join(__dirname, '..', '..')`), cleanup вызывается в теле теста (строка 25), а не в afterEach/teardown"
|
|
5
|
-
---RESULT---
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"date": "2026-04-25T07:10:27.577Z",
|
|
3
|
-
"skill_sha": "b6cda8a",
|
|
4
|
-
"status": "passed",
|
|
5
|
-
"duration_ms": 79287,
|
|
6
|
-
"l1_skipped": true,
|
|
7
|
-
"per_model": {
|
|
8
|
-
"claude-sonnet": {
|
|
9
|
-
"passed": true,
|
|
10
|
-
"errored": false,
|
|
11
|
-
"pass_count": 3,
|
|
12
|
-
"error_count": 0,
|
|
13
|
-
"total": 3,
|
|
14
|
-
"threshold": 2
|
|
15
|
-
},
|
|
16
|
-
"kilo-glm": {
|
|
17
|
-
"passed": true,
|
|
18
|
-
"errored": false,
|
|
19
|
-
"pass_count": 3,
|
|
20
|
-
"error_count": 0,
|
|
21
|
-
"total": 3,
|
|
22
|
-
"threshold": 2
|
|
23
|
-
},
|
|
24
|
-
"kilo-minimax": {
|
|
25
|
-
"passed": true,
|
|
26
|
-
"errored": false,
|
|
27
|
-
"pass_count": 3,
|
|
28
|
-
"error_count": 0,
|
|
29
|
-
"total": 3,
|
|
30
|
-
"threshold": 2
|
|
31
|
-
},
|
|
32
|
-
"kilo-deepseek": {
|
|
33
|
-
"passed": true,
|
|
34
|
-
"errored": false,
|
|
35
|
-
"pass_count": 2,
|
|
36
|
-
"error_count": 0,
|
|
37
|
-
"total": 3,
|
|
38
|
-
"threshold": 2
|
|
39
|
-
}
|
|
40
|
-
},
|
|
41
|
-
"rubric_scores": [
|
|
42
|
-
{
|
|
43
|
-
"agentId": "kilo-deepseek",
|
|
44
|
-
"trial": 1,
|
|
45
|
-
"score": 3,
|
|
46
|
-
"errored": false
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
"agentId": "kilo-deepseek",
|
|
50
|
-
"trial": 2,
|
|
51
|
-
"score": 4,
|
|
52
|
-
"errored": false
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
"agentId": "kilo-deepseek",
|
|
56
|
-
"trial": 3,
|
|
57
|
-
"score": 4,
|
|
58
|
-
"errored": false
|
|
59
|
-
},
|
|
60
|
-
{
|
|
61
|
-
"agentId": "kilo-glm",
|
|
62
|
-
"trial": 1,
|
|
63
|
-
"score": 4,
|
|
64
|
-
"errored": false
|
|
65
|
-
},
|
|
66
|
-
{
|
|
67
|
-
"agentId": "kilo-glm",
|
|
68
|
-
"trial": 2,
|
|
69
|
-
"score": 4,
|
|
70
|
-
"errored": false
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
"agentId": "kilo-glm",
|
|
74
|
-
"trial": 3,
|
|
75
|
-
"score": 4,
|
|
76
|
-
"errored": false
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
"agentId": "kilo-minimax",
|
|
80
|
-
"trial": 1,
|
|
81
|
-
"score": 4,
|
|
82
|
-
"errored": false
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
"agentId": "kilo-minimax",
|
|
86
|
-
"trial": 2,
|
|
87
|
-
"score": 4,
|
|
88
|
-
"errored": false
|
|
89
|
-
},
|
|
90
|
-
{
|
|
91
|
-
"agentId": "kilo-minimax",
|
|
92
|
-
"trial": 3,
|
|
93
|
-
"score": 4,
|
|
94
|
-
"errored": false
|
|
95
|
-
},
|
|
96
|
-
{
|
|
97
|
-
"agentId": "claude-sonnet",
|
|
98
|
-
"trial": 1,
|
|
99
|
-
"score": 5,
|
|
100
|
-
"errored": false
|
|
101
|
-
},
|
|
102
|
-
{
|
|
103
|
-
"agentId": "claude-sonnet",
|
|
104
|
-
"trial": 2,
|
|
105
|
-
"score": 5,
|
|
106
|
-
"errored": false
|
|
107
|
-
},
|
|
108
|
-
{
|
|
109
|
-
"agentId": "claude-sonnet",
|
|
110
|
-
"trial": 3,
|
|
111
|
-
"score": 4,
|
|
112
|
-
"errored": false
|
|
113
|
-
}
|
|
114
|
-
]
|
|
115
|
-
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
id: TC-REVIEW-RESULT-003
|
|
2
|
-
title: "Ревьюер ловит нарушение изоляции тестов"
|
|
3
|
-
origin:
|
|
4
|
-
chg: [CHG-test-hygiene-2026-04-21]
|
|
5
|
-
incidents:
|
|
6
|
-
- "workflowAi: тест error-classifier.test.mjs писал в реальный .workflow/config/agent-health-rules.yaml, полагаясь на cleanup в теле функции; при прерывании оставлял артефакты в рабочем репо и ложно срабатывали механические проверки"
|
|
7
|
-
principle: "Проверка изоляции теста ортогональна результату прогона: зелёный тест, пишущий в рабочий репозиторий — токсичный"
|
|
8
|
-
backlog_sources:
|
|
9
|
-
- "d:/Dev/workflowAi/.workflow/coach-backlog.yaml"
|
|
10
|
-
tags: [test-hygiene, isolation, knowledge-test-hygiene]
|
|
11
|
-
severity: high
|
|
12
|
-
|
|
13
|
-
scenario:
|
|
14
|
-
system_prompt_file: "../../SKILL.md"
|
|
15
|
-
extra_instructions: |
|
|
16
|
-
Проведи ревью тикета QA-904, который лежит в .workflow/tickets/review/QA-904.md.
|
|
17
|
-
DoD тикета явно требует изоляции теста (tmp-корень, не запись в рабочий
|
|
18
|
-
репозиторий). Содержимое файла теста, на который ссылается секция
|
|
19
|
-
«Изменённые файлы», приведено ниже (блок "## Test file content") —
|
|
20
|
-
используй его для оценки изоляции. Тест формально проходит
|
|
21
|
-
(`node --test` success), но проверка изоляции ортогональна результату
|
|
22
|
-
прогона.
|
|
23
|
-
inputs:
|
|
24
|
-
- kind: ticket_file
|
|
25
|
-
path: "../fixtures/QA-904-test-isolation-violation/QA-904.md"
|
|
26
|
-
ticket_id: QA-904
|
|
27
|
-
dest_dir: review
|
|
28
|
-
- kind: file
|
|
29
|
-
path: "../fixtures/QA-904-test-isolation-violation/example-test.mjs"
|
|
30
|
-
as: "Test file content (src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs)"
|
|
31
|
-
|
|
32
|
-
assertions:
|
|
33
|
-
deterministic:
|
|
34
|
-
- kind: output_contains_all
|
|
35
|
-
values: ["---RESULT---"]
|
|
36
|
-
reason: "Результат должен быть в формате пайплайна."
|
|
37
|
-
- kind: output_contains_all
|
|
38
|
-
values: ["failed"]
|
|
39
|
-
reason: "Вердикт — failed по нарушению изоляции."
|
|
40
|
-
|
|
41
|
-
rubric:
|
|
42
|
-
- kind: llm_judge
|
|
43
|
-
rubric_file: "../rubrics/test-isolation.md"
|
|
44
|
-
criterion: "Поймал ли ревьюер нарушение изоляции теста и оформил failed с конкретным issue, процитировав проблемные строки теста?"
|
|
45
|
-
pass_threshold: 4
|
|
46
|
-
trials: 3
|
|
47
|
-
aggregate: majority
|
|
48
|
-
|
|
49
|
-
execution:
|
|
50
|
-
timeout_s: 1200
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-1.md
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
---RESULT---
|
|
2
|
-
status: failed
|
|
3
|
-
issues:
|
|
4
|
-
- "DoD п.2 не выполнен: ожидалось — каждый сценарий содержит inline-snapshot в коде теста (литерал-эталон, точное сравнение нормализованного output); получено — все ассерты реализованы как регекс-проверки по фрагментам (`assert.match(normalized, /status:\\s*empty/)`, `assert.match(normalized, /reason:/)` и т.д.) без единого точного сравнения целого нормализованного вывода. Литерала-эталона в файле `src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/baseline.test.mjs` нет. Функция normalizeOutput объявлена, но её результат используется только для фрагментарных regex-ассертов — регрессии вне покрытия regex-ов (новые поля, изменённый формат, потерянные строки) тест не поймает."
|
|
5
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-2.md
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
---RESULT---
|
|
2
|
-
status: failed
|
|
3
|
-
issues:
|
|
4
|
-
- "DoD п.2 не выполнен: заявлено «каждый сценарий содержит inline-snapshot в коде теста (литерал-эталон, точное сравнение)», фактически реализованы регекс-ассерты assert.match по фрагментам (напр. `assert.match(normalized, /status:\\s*empty/)` в baseline.test.mjs:38-39). Литерал-эталон в файле теста отсутствует полностью — ни одного вызова assert.strictEqual/deepStrictEqual с зафиксированной строкой. Регрессия за пределами покрытия регексов (новые поля, изменённый порядок, потерянные строки stdout) проходит мимо теста."
|
|
5
|
-
---RESULT---
|
package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-3.md
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
---RESULT---
|
|
2
|
-
status: failed
|
|
3
|
-
issues:
|
|
4
|
-
- "DoD #2 не выполнен: заявлено «каждый сценарий содержит inline-snapshot в коде теста (литерал-эталон, точное сравнение)», фактически все 5 тестов реализованы через assert.match с регулярными выражениями (строки 46–47, 64–65, 85–86, 103–104, 119). Ни одного assert.equal/deepEqual с inline-литералом нет. Функция normalizeOutput объявлена, но её результат используется исключительно для фрагментарных regex-ассертов, а не для равенства целого нормализованного output с эталоном. Такие тесты не ловят регрессии за пределами описанных регексов. Файл теста: .workflow/src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/baseline.test.mjs"
|
|
5
|
-
- "Result утверждает «10 baseline snapshot-тестов», в файле теста 5 тестов (2 в pick-next-task + 2 в move-ticket + 1 в get-next-id), ни один из которых не является snapshot-тестом"
|
|
6
|
-
---RESULT---
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"per_model": {
|
|
3
|
-
"claude-sonnet": {
|
|
4
|
-
"pass_count": 3,
|
|
5
|
-
"total": 3,
|
|
6
|
-
"trials": [
|
|
7
|
-
{
|
|
8
|
-
"trial": 1,
|
|
9
|
-
"score": 5,
|
|
10
|
-
"passed": true
|
|
11
|
-
},
|
|
12
|
-
{
|
|
13
|
-
"trial": 2,
|
|
14
|
-
"score": 5,
|
|
15
|
-
"passed": true
|
|
16
|
-
},
|
|
17
|
-
{
|
|
18
|
-
"trial": 3,
|
|
19
|
-
"score": 4,
|
|
20
|
-
"passed": true
|
|
21
|
-
}
|
|
22
|
-
]
|
|
23
|
-
}
|
|
24
|
-
},
|
|
25
|
-
"rubric_scores": [
|
|
26
|
-
{
|
|
27
|
-
"agentId": "claude-sonnet",
|
|
28
|
-
"trial": 1,
|
|
29
|
-
"score": 5,
|
|
30
|
-
"errored": false
|
|
31
|
-
},
|
|
32
|
-
{
|
|
33
|
-
"agentId": "claude-sonnet",
|
|
34
|
-
"trial": 2,
|
|
35
|
-
"score": 5,
|
|
36
|
-
"errored": false
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
"agentId": "claude-sonnet",
|
|
40
|
-
"trial": 3,
|
|
41
|
-
"score": 4,
|
|
42
|
-
"errored": false
|
|
43
|
-
}
|
|
44
|
-
],
|
|
45
|
-
"timestamp": "2026-04-25T07:11:21.120Z"
|
|
46
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"date": "2026-04-25T07:11:21.120Z",
|
|
3
|
-
"skill_sha": "b6cda8a",
|
|
4
|
-
"status": "passed",
|
|
5
|
-
"duration_ms": 129302,
|
|
6
|
-
"l1_skipped": true,
|
|
7
|
-
"per_model": {
|
|
8
|
-
"claude-sonnet": {
|
|
9
|
-
"passed": true,
|
|
10
|
-
"errored": false,
|
|
11
|
-
"pass_count": 3,
|
|
12
|
-
"error_count": 0,
|
|
13
|
-
"total": 3,
|
|
14
|
-
"threshold": 2
|
|
15
|
-
}
|
|
16
|
-
},
|
|
17
|
-
"rubric_scores": [
|
|
18
|
-
{
|
|
19
|
-
"agentId": "claude-sonnet",
|
|
20
|
-
"trial": 1,
|
|
21
|
-
"score": 5,
|
|
22
|
-
"errored": false
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
"agentId": "claude-sonnet",
|
|
26
|
-
"trial": 2,
|
|
27
|
-
"score": 5,
|
|
28
|
-
"errored": false
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
"agentId": "claude-sonnet",
|
|
32
|
-
"trial": 3,
|
|
33
|
-
"score": 4,
|
|
34
|
-
"errored": false
|
|
35
|
-
}
|
|
36
|
-
]
|
|
37
|
-
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
id: TC-REVIEW-RESULT-004
|
|
2
|
-
title: "Ревьюер ловит подмену inline-snapshot регекс-ассертами"
|
|
3
|
-
origin:
|
|
4
|
-
chg: [CHG-baseline-snapshot-2026-04-25]
|
|
5
|
-
incidents:
|
|
6
|
-
- "workflowAi PLAN-008/QA-030 → IMPL-050/051: тикет QA-030 заявил DoD `inline-snapshot в коде теста`, фактический файл src/tests/scripts-stdout-baseline.test.mjs использовал только assert.match по регексам — регрессия рефакторинга IMPL-050/051 (потеря argv-блока-парсера, изменение полей RESULT) прошла мимо baseline-теста; ревью QA-030 пропустило подмену со ссылкой на `10/10 зелёных`. Лог падения: .workflow/logs/pipeline_2026-04-24_16-20-18.log"
|
|
7
|
-
principle: "Заявленный в DoD способ реализации теста (snapshot/эталон/побайтное совпадение) ортогонален результату прогона. Зелёный тест на регекс-ассертах не реализует snapshot — регрессия за пределами регексов проходит мимо."
|
|
8
|
-
backlog_sources:
|
|
9
|
-
- "d:/Dev/workflowAi/.workflow/coach-backlog.yaml"
|
|
10
|
-
tags: [baseline, snapshot, knowledge-baseline-snapshot-validation]
|
|
11
|
-
severity: high
|
|
12
|
-
|
|
13
|
-
scenario:
|
|
14
|
-
system_prompt_file: "../../SKILL.md"
|
|
15
|
-
extra_instructions: |
|
|
16
|
-
Проведи ревью тикета QA-905, который лежит в .workflow/tickets/review/QA-905.md.
|
|
17
|
-
DoD тикета явно требует inline-snapshot в коде теста (точное сравнение
|
|
18
|
-
нормализованного output с литералом-эталоном). Содержимое файла теста,
|
|
19
|
-
на который ссылается секция «Изменённые файлы», приведено ниже
|
|
20
|
-
(блок "## Test file content") — используй его для оценки способа
|
|
21
|
-
реализации. Тест формально проходит, но проверка способа реализации
|
|
22
|
-
ортогональна результату прогона.
|
|
23
|
-
inputs:
|
|
24
|
-
- kind: ticket_file
|
|
25
|
-
path: "../fixtures/QA-905-baseline-regex-instead-of-snapshot/QA-905.md"
|
|
26
|
-
ticket_id: QA-905
|
|
27
|
-
dest_dir: review
|
|
28
|
-
- kind: file
|
|
29
|
-
path: "../fixtures/QA-905-baseline-regex-instead-of-snapshot/baseline.test.mjs"
|
|
30
|
-
as: "Test file content (src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/baseline.test.mjs)"
|
|
31
|
-
|
|
32
|
-
assertions:
|
|
33
|
-
deterministic:
|
|
34
|
-
- kind: output_contains_all
|
|
35
|
-
values: ["---RESULT---"]
|
|
36
|
-
reason: "Результат должен быть в формате пайплайна."
|
|
37
|
-
- kind: output_contains_all
|
|
38
|
-
values: ["failed"]
|
|
39
|
-
reason: "Вердикт — failed: regex-ассерты не реализуют заявленный snapshot."
|
|
40
|
-
|
|
41
|
-
rubric:
|
|
42
|
-
- kind: llm_judge
|
|
43
|
-
rubric_file: "../rubrics/baseline-snapshot.md"
|
|
44
|
-
criterion: "Поймал ли ревьюер подмену snapshot-сравнения регекс-ассертами и оформил failed с дословной цитатой формулировки DoD и проблемного ассерта?"
|
|
45
|
-
pass_threshold: 4
|
|
46
|
-
trials: 3
|
|
47
|
-
aggregate: majority
|
|
48
|
-
|
|
49
|
-
execution:
|
|
50
|
-
timeout_s: 1200
|