workflow-ai 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -0
- package/README.md +37 -377
- package/configs/pipeline.yaml +113 -4
- package/package.json +54 -44
- package/src/lib/operations/tickets.mjs +305 -207
- package/src/lib/utils.mjs +286 -286
- package/src/runner.mjs +316 -35
- package/src/scripts/check-conditions.js +2 -2
- package/src/scripts/check-relevance.js +3 -1
- package/src/scripts/get-next-id.js +144 -41
- package/src/scripts/mark-blocked.js +160 -0
- package/src/scripts/move-ticket.js +305 -83
- package/src/scripts/pick-next-task.js +782 -93
- package/src/skills/__test-cal-001-1777553217513/SKILL.md +2 -0
- package/src/skills/__test-runner-1777553217483/SKILL.md +5 -0
- package/src/skills/coach/SKILL.md +2 -2
- package/src/skills/execute-task/SKILL.md +1 -1
- package/src/skills/manual-testing/SKILL.md +2 -0
- package/src/skills/review-result/SKILL.md +23 -1
- package/src/scripts/tests/timeout-cascade.test.js +0 -28
- package/src/skills/analyze-report/README.md +0 -44
- package/src/skills/analyze-report/algorithms/progress-assessment.md +0 -108
- package/src/skills/analyze-report/knowledge/analysis-frameworks.md +0 -66
- package/src/skills/analyze-report/knowledge/report-structure.md +0 -61
- package/src/skills/analyze-report/scripts/calc-plan-metrics.js +0 -234
- package/src/skills/analyze-report/templates/analysis-report.md +0 -80
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-1.md +0 -5
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-2.md +0 -98
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/claude-sonnet/trial-3.md +0 -99
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/judge.json +0 -163
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -89
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -88
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -100
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-1.md +0 -77
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-2.md +0 -64
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-glm/trial-3.md +0 -110
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-1.md +0 -74
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-2.md +0 -38
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/kilo-minimax/trial-3.md +0 -61
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001/current/meta.json +0 -115
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-001-evidence-from-log.yaml +0 -60
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-1.md +0 -90
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-2.md +0 -89
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/claude-sonnet/trial-3.md +0 -5
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/judge.json +0 -163
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-1.md +0 -84
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-2.md +0 -77
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-deepseek/trial-3.md +0 -89
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-1.md +0 -103
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-2.md +0 -103
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-glm/trial-3.md +0 -103
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-1.md +0 -93
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-2.md +0 -93
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/kilo-minimax/trial-3.md +0 -86
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002/current/meta.json +0 -115
- package/src/skills/analyze-report/tests/cases/TC-ANALYZE-REPORT-002-result-block-format.yaml +0 -44
- package/src/skills/analyze-report/tests/fixtures/REPORT-002-incorrect-attribution.md +0 -27
- package/src/skills/analyze-report/tests/fixtures/pipeline-2026-04-06_qa-001-skip.log +0 -32
- package/src/skills/analyze-report/tests/index.yaml +0 -25
- package/src/skills/analyze-report/tests/rubrics/evidence-from-log.md +0 -22
- package/src/skills/analyze-report/tests/rubrics/result-block-format.md +0 -22
- package/src/skills/analyze-report/workflows/progress.md +0 -158
- package/src/skills/analyze-report/workflows/retrospective.md +0 -143
- package/src/skills/coach/README.md +0 -43
- package/src/skills/coach/SKILL.md.legacy +0 -157
- package/src/skills/coach/algorithms/gap-analysis.md +0 -69
- package/src/skills/coach/algorithms/improvement-prioritization.md +0 -62
- package/src/skills/coach/algorithms/skill-scoring.md +0 -80
- package/src/skills/coach/knowledge/audit-applied-changes-clean.txt +0 -11
- package/src/skills/coach/knowledge/backlog-management.md +0 -67
- package/src/skills/coach/knowledge/backlog-management.md.legacy +0 -90
- package/src/skills/coach/knowledge/common-antipatterns.md +0 -76
- package/src/skills/coach/knowledge/prompt-engineering.md +0 -45
- package/src/skills/coach/knowledge/shared-knowledge-guide.md +0 -44
- package/src/skills/coach/knowledge/skill-anatomy.md +0 -49
- package/src/skills/coach/knowledge/test-authorship.md +0 -141
- package/src/skills/coach/templates/audit-report.md +0 -39
- package/src/skills/coach/templates/coach-backlog-init.yaml +0 -14
- package/src/skills/coach/templates/coach-backlog-init.yaml.legacy +0 -10
- package/src/skills/coach/templates/improvement-plan.md +0 -42
- package/src/skills/coach/templates/new-skill.md +0 -95
- package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-1.md +0 -58
- package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-2.md +0 -65
- package/src/skills/coach/tests/cases/TC-COACH-001/current/claude-sonnet/trial-3.md +0 -58
- package/src/skills/coach/tests/cases/TC-COACH-001/current/judge.json +0 -151
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-1.md +0 -46
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-2.md +0 -0
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-deepseek/trial-3.md +0 -75
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-1.md +0 -81
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-2.md +0 -101
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-glm/trial-3.md +0 -91
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-1.md +0 -48
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-2.md +0 -30
- package/src/skills/coach/tests/cases/TC-COACH-001/current/kilo-minimax/trial-3.md +0 -55
- package/src/skills/coach/tests/cases/TC-COACH-001/current/meta.json +0 -94
- package/src/skills/coach/tests/cases/TC-COACH-001-evidence-based-temporal-diagram.yaml +0 -53
- package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-1.md +0 -46
- package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-2.md +0 -50
- package/src/skills/coach/tests/cases/TC-COACH-002/current/claude-sonnet/trial-3.md +0 -48
- package/src/skills/coach/tests/cases/TC-COACH-002/current/judge.json +0 -151
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-2.md +0 -37
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-deepseek/trial-3.md +0 -30
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-1.md +0 -23
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-2.md +0 -29
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-glm/trial-3.md +0 -35
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-1.md +0 -13
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-2.md +0 -19
- package/src/skills/coach/tests/cases/TC-COACH-002/current/kilo-minimax/trial-3.md +0 -33
- package/src/skills/coach/tests/cases/TC-COACH-002/current/meta.json +0 -94
- package/src/skills/coach/tests/cases/TC-COACH-002-root-cause-first.yaml +0 -57
- package/src/skills/coach/tests/fixtures/pipeline-2026-04-06_id-collision.log +0 -77
- package/src/skills/coach/tests/index.yaml +0 -29
- package/src/skills/coach/tests/rubrics/calibration/evidence-based-bad.md +0 -13
- package/src/skills/coach/tests/rubrics/calibration/evidence-based-good.md +0 -29
- package/src/skills/coach/tests/rubrics/evidence-based.md +0 -26
- package/src/skills/coach/tests/rubrics/root-cause-first.md +0 -21
- package/src/skills/coach/workflows/analyze.md +0 -79
- package/src/skills/coach/workflows/analyze.md.legacy +0 -64
- package/src/skills/coach/workflows/audit.md +0 -74
- package/src/skills/coach/workflows/audit.md.legacy +0 -59
- package/src/skills/coach/workflows/create.md +0 -80
- package/src/skills/coach/workflows/create.md.legacy +0 -67
- package/src/skills/coach/workflows/improve.md +0 -71
- package/src/skills/coach/workflows/improve.md.legacy +0 -60
- package/src/skills/coach/workflows/research.md +0 -55
- package/src/skills/coach/workflows/review.md +0 -52
- package/src/skills/coach/workflows/review.md.legacy +0 -48
- package/src/skills/coach/workflows/test.md +0 -97
- package/src/skills/create-plan/README.md +0 -39
- package/src/skills/create-plan/algorithms/risk-assessment.md +0 -73
- package/src/skills/create-plan/knowledge/plan-completeness.md +0 -67
- package/src/skills/create-plan/knowledge/plan-lifecycle.md +0 -33
- package/src/skills/create-plan/knowledge/task-verification-pairs.md +0 -151
- package/src/skills/create-plan/knowledge/test-hygiene.md +0 -47
- package/src/skills/create-plan/scripts/validate-completeness.js +0 -182
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-1.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-2.md +0 -39
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/claude-sonnet/trial-3.md +0 -35
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/judge.json +0 -167
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -10
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-1.md +0 -26
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-2.md +0 -86
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-glm/trial-3.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-1.md +0 -11
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-2.md +0 -15
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/kilo-minimax/trial-3.md +0 -14
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001/current/meta.json +0 -119
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-001-validate-completeness.yaml +0 -41
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-1.md +0 -25
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-2.md +0 -30
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/claude-sonnet/trial-3.md +0 -37
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/judge.json +0 -164
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -3
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -11
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -13
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-1.md +0 -44
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-2.md +0 -5
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-glm/trial-3.md +0 -49
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-1.md +0 -6
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-2.md +0 -11
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/kilo-minimax/trial-3.md +0 -16
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002/current/meta.json +0 -116
- package/src/skills/create-plan/tests/cases/TC-CREATE-PLAN-002-task-granularity.yaml +0 -39
- package/src/skills/create-plan/tests/index.yaml +0 -25
- package/src/skills/create-plan/tests/rubrics/task-granularity.md +0 -21
- package/src/skills/create-plan/tests/rubrics/validate-completeness.md +0 -21
- package/src/skills/create-plan/workflows/create.md +0 -136
- package/src/skills/create-report/README.md +0 -40
- package/src/skills/create-report/algorithms/metric-calculation.md +0 -93
- package/src/skills/create-report/knowledge/report-metrics.md +0 -82
- package/src/skills/create-report/scripts/calc-metrics.js +0 -383
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-1.md +0 -25
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-2.md +0 -26
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/claude-sonnet/trial-3.md +0 -28
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/judge.json +0 -163
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-1.md +0 -4
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-2.md +0 -3
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-deepseek/trial-3.md +0 -6
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-1.md +0 -8
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-2.md +0 -12
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-glm/trial-3.md +0 -7
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-1.md +0 -12
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-2.md +0 -22
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/kilo-minimax/trial-3.md +0 -13
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001/current/meta.json +0 -115
- package/src/skills/create-report/tests/cases/TC-CREATE-REPORT-001-root-cause-attribution.yaml +0 -57
- package/src/skills/create-report/tests/index.yaml +0 -20
- package/src/skills/create-report/tests/rubrics/root-cause-attribution.md +0 -21
- package/src/skills/create-report/workflows/standard.md +0 -175
- package/src/skills/decompose-gaps/README.md +0 -39
- package/src/skills/decompose-gaps/algorithms/scope-check.md +0 -110
- package/src/skills/decompose-gaps/knowledge/scope-validation.md +0 -65
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-1.md +0 -41
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-2.md +0 -41
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/claude-sonnet/trial-3.md +0 -56
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/judge.json +0 -164
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-1.md +0 -25
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-2.md +0 -17
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-deepseek/trial-3.md +0 -22
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-1.md +0 -25
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-2.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-glm/trial-3.md +0 -29
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-1.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-2.md +0 -35
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/kilo-minimax/trial-3.md +0 -18
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001/current/meta.json +0 -116
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-001-scope-exclusion.yaml +0 -46
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-1.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-2.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/claude-sonnet/trial-3.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/judge.json +0 -163
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-2.md +0 -15
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-deepseek/trial-3.md +0 -7
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-1.md +0 -21
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-2.md +0 -38
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-glm/trial-3.md +0 -16
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-1.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-2.md +0 -10
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/kilo-minimax/trial-3.md +0 -9
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002/current/meta.json +0 -115
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-002-glob-before-write.yaml +0 -36
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-1.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-2.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/claude-sonnet/trial-3.md +0 -30
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/judge.json +0 -165
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-2.md +0 -26
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-deepseek/trial-3.md +0 -5
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-1.md +0 -39
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-2.md +0 -37
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-glm/trial-3.md +0 -45
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-1.md +0 -26
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-2.md +0 -27
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/kilo-minimax/trial-3.md +0 -7
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003/current/meta.json +0 -117
- package/src/skills/decompose-gaps/tests/cases/TC-DECOMPOSE-GAPS-003-parent-plan-mandatory.yaml +0 -41
- package/src/skills/decompose-gaps/tests/index.yaml +0 -30
- package/src/skills/decompose-gaps/tests/rubrics/glob-before-write.md +0 -21
- package/src/skills/decompose-gaps/tests/rubrics/parent-plan-mandatory.md +0 -22
- package/src/skills/decompose-gaps/tests/rubrics/scope-exclusion.md +0 -21
- package/src/skills/decompose-gaps/workflows/decompose.md +0 -123
- package/src/skills/decompose-plan/README.md +0 -43
- package/src/skills/decompose-plan/algorithms/deduplication.md +0 -101
- package/src/skills/decompose-plan/knowledge/atomicity-checklist.md +0 -139
- package/src/skills/decompose-plan/knowledge/capabilities.md +0 -68
- package/src/skills/decompose-plan/knowledge/human-task-rules.md +0 -82
- package/src/skills/decompose-plan/knowledge/scope-guard-checklist.md +0 -73
- package/src/skills/decompose-plan/scripts/check-atomicity-limit.js +0 -47
- package/src/skills/decompose-plan/scripts/check-duplicates.js +0 -323
- package/src/skills/decompose-plan/scripts/verify-atomicity.js +0 -408
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-1.md +0 -30
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-2.md +0 -36
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/claude-sonnet/trial-3.md +0 -37
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-1.md +0 -20
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-2.md +0 -17
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-deepseek/trial-3.md +0 -28
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-1.md +0 -114
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-2.md +0 -137
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-glm/trial-3.md +0 -188
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-2.md +0 -32
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/kilo-minimax/trial-3.md +0 -110
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-001-atomicity-no-1to1.yaml +0 -56
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-1.md +0 -47
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-2.md +0 -54
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/claude-sonnet/trial-3.md +0 -43
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-1.md +0 -15
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-2.md +0 -5
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-deepseek/trial-3.md +0 -12
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-1.md +0 -34
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-2.md +0 -30
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-glm/trial-3.md +0 -35
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-2.md +0 -31
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-002-get-next-id-mandatory.yaml +0 -44
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-1.md +0 -21
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-2.md +0 -38
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/claude-sonnet/trial-3.md +0 -30
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-1.md +0 -31
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-2.md +0 -35
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-deepseek/trial-3.md +0 -48
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-1.md +0 -167
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-2.md +0 -62
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-glm/trial-3.md +0 -174
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-2.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-003-verbatim-dod-transfer.yaml +0 -42
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-1.md +0 -55
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-2.md +0 -49
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/claude-sonnet/trial-3.md +0 -49
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-1.md +0 -104
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-2.md +0 -45
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-deepseek/trial-3.md +0 -58
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-1.md +0 -193
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-2.md +0 -202
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-glm/trial-3.md +0 -155
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-1.md +0 -52
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-2.md +0 -17
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-004-executor-atomicity.yaml +0 -64
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-1.md +0 -59
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-2.md +0 -204
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/claude-sonnet/trial-3.md +0 -213
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-2.md +0 -57
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-deepseek/trial-3.md +0 -54
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-1.md +0 -147
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-2.md +0 -165
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-glm/trial-3.md +0 -133
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-1.md +0 -81
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-2.md +0 -108
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/kilo-minimax/trial-3.md +0 -3
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005/current/meta.json +0 -114
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-005-capabilities-registry.yaml +0 -78
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-1.md +0 -225
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-2.md +0 -66
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/claude-sonnet/trial-3.md +0 -36
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/judge.json +0 -163
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-1.md +0 -42
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-2.md +0 -67
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-deepseek/trial-3.md +0 -40
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-1.md +0 -122
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-2.md +0 -131
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-glm/trial-3.md +0 -138
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-1.md +0 -41
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-2.md +0 -88
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/kilo-minimax/trial-3.md +0 -0
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006/current/meta.json +0 -115
- package/src/skills/decompose-plan/tests/cases/TC-DECOMPOSE-PLAN-006-dod-threshold.yaml +0 -72
- package/src/skills/decompose-plan/tests/index.yaml +0 -45
- package/src/skills/decompose-plan/tests/rubrics/atomicity-no-1to1.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/capabilities-registry.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/dod-threshold.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/executor-atomicity.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/get-next-id-mandatory.md +0 -21
- package/src/skills/decompose-plan/tests/rubrics/verbatim-dod-transfer.md +0 -21
- package/src/skills/decompose-plan/workflows/decompose.md +0 -305
- package/src/skills/deep-research/README.md +0 -36
- package/src/skills/deep-research/algorithms/source-scoring.md +0 -63
- package/src/skills/deep-research/algorithms/synthesis.md +0 -67
- package/src/skills/deep-research/knowledge/data-validation.md +0 -44
- package/src/skills/deep-research/knowledge/perplexity-config.md +0 -30
- package/src/skills/deep-research/knowledge/research-methodology.md +0 -54
- package/src/skills/deep-research/knowledge/source-evaluation.md +0 -33
- package/src/skills/deep-research/scripts/perplexity-research.js +0 -315
- package/src/skills/deep-research/templates/brief-summary.md +0 -25
- package/src/skills/deep-research/templates/research-report.md +0 -76
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-1.md +0 -48
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-2.md +0 -88
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/claude-haiku/trial-3.md +0 -56
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/judge.json +0 -163
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-1.md +0 -58
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-2.md +0 -249
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-free/trial-3.md +0 -44
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-1.md +0 -96
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-2.md +0 -56
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm/trial-3.md +0 -94
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-1.md +0 -11
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-2.md +0 -1
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/kilo-glm-air/trial-3.md +0 -1
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001/current/meta.json +0 -115
- package/src/skills/deep-research/tests/cases/TC-DEEP-RESEARCH-001-self-check-url.yaml +0 -58
- package/src/skills/deep-research/tests/index.yaml +0 -20
- package/src/skills/deep-research/tests/rubrics/self-check-url.md +0 -34
- package/src/skills/deep-research/workflows/base-checklist.md +0 -19
- package/src/skills/deep-research/workflows/benchmark.md +0 -38
- package/src/skills/deep-research/workflows/competitor.md +0 -44
- package/src/skills/deep-research/workflows/custom.md +0 -32
- package/src/skills/deep-research/workflows/market.md +0 -44
- package/src/skills/deep-research/workflows/technology.md +0 -40
- package/src/skills/deep-research/workflows/trend.md +0 -40
- package/src/skills/execute-task/README.md +0 -44
- package/src/skills/execute-task/algorithms/execution-strategy.md +0 -136
- package/src/skills/execute-task/knowledge/context-checkpoints.md +0 -75
- package/src/skills/execute-task/knowledge/ticket-structure.md +0 -70
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-1.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-free/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-1.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/kilo-glm-air/trial-3.md +0 -11
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001/current/meta.json +0 -88
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-001-no-ticket-creation.yaml +0 -48
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-2.md +0 -6
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-1.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-free/trial-3.md +0 -8
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-1.md +0 -9
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-2.md +0 -26
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/kilo-glm-air/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002/current/meta.json +0 -89
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-002-no-duplicate-dod.yaml +0 -44
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/judge.json +0 -46
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003/current/meta.json +0 -37
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-003-verification-proportionality.yaml +0 -46
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-1.md +0 -18
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-2.md +0 -16
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/claude-haiku/trial-3.md +0 -14
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-free/trial-3.md +0 -1
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-1.md +0 -8
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/kilo-glm-air/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004/current/meta.json +0 -89
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-004-no-foreign-ticket-edit.yaml +0 -50
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-1.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-2.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/claude-haiku/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/judge.json +0 -124
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-1.md +0 -15
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-2.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-free/trial-3.md +0 -5
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-1.md +0 -11
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-2.md +0 -11
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/kilo-glm-air/trial-3.md +0 -4
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005/current/meta.json +0 -88
- package/src/skills/execute-task/tests/cases/TC-EXECUTE-TASK-005-ticket-fields-updated.yaml +0 -39
- package/src/skills/execute-task/tests/fixtures/IMPL-902-create-file.md +0 -41
- package/src/skills/execute-task/tests/fixtures/IMPL-904-current-task.md +0 -40
- package/src/skills/execute-task/tests/fixtures/IMPL-906-fill-ticket.md +0 -42
- package/src/skills/execute-task/tests/fixtures/QA-901-button-click.md +0 -41
- package/src/skills/execute-task/tests/fixtures/QA-903-visual-figma.md +0 -40
- package/src/skills/execute-task/tests/fixtures/TASK-905-done-with-typo.md +0 -36
- package/src/skills/execute-task/tests/index.yaml +0 -39
- package/src/skills/execute-task/tests/rubrics/no-duplicate-dod.md +0 -22
- package/src/skills/execute-task/tests/rubrics/no-foreign-ticket-edit.md +0 -20
- package/src/skills/execute-task/tests/rubrics/no-ticket-creation.md +0 -21
- package/src/skills/execute-task/tests/rubrics/ticket-fields-updated.md +0 -23
- package/src/skills/execute-task/tests/rubrics/verification-proportionality.md +0 -22
- package/src/skills/execute-task/workflows/execute.md +0 -104
- package/src/skills/manual-testing/README.md +0 -63
- package/src/skills/manual-testing/algorithms/blocked-tool-strategy.md +0 -74
- package/src/skills/manual-testing/algorithms/bug-severity.md +0 -73
- package/src/skills/manual-testing/algorithms/mcp-budget.md +0 -97
- package/src/skills/manual-testing/algorithms/test-prioritization.md +0 -69
- package/src/skills/manual-testing/knowledge/browser-extension-testing.md +0 -102
- package/src/skills/manual-testing/knowledge/browser-tools.md +0 -114
- package/src/skills/manual-testing/knowledge/desktop-tools-advanced.md +0 -92
- package/src/skills/manual-testing/knowledge/desktop-tools-core.md +0 -76
- package/src/skills/manual-testing/knowledge/sandbox-advanced.md +0 -83
- package/src/skills/manual-testing/knowledge/sandbox-core.md +0 -67
- package/src/skills/manual-testing/knowledge/stateful-edge-cases.md +0 -69
- package/src/skills/manual-testing/knowledge/test-case-design.md +0 -107
- package/src/skills/manual-testing/knowledge/testing-types.md +0 -45
- package/src/skills/manual-testing/templates/bug-report.md +0 -52
- package/src/skills/manual-testing/templates/test-case.md +0 -34
- package/src/skills/manual-testing/templates/test-plan.md +0 -97
- package/src/skills/manual-testing/templates/test-session-report.md +0 -56
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-1.md +0 -34
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-2.md +0 -32
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/claude-sonnet/trial-3.md +0 -30
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/judge.json +0 -163
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-1.md +0 -0
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-2.md +0 -7
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-deepseek/trial-3.md +0 -0
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-1.md +0 -4
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-2.md +0 -15
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-glm/trial-3.md +0 -8
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-1.md +0 -5
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-2.md +0 -7
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/kilo-minimax/trial-3.md +0 -7
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001/current/meta.json +0 -114
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-001-sandbox-mandatory.yaml +0 -38
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-1.md +0 -44
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-2.md +0 -32
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/claude-sonnet/trial-3.md +0 -47
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/judge.json +0 -163
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-1.md +0 -19
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-2.md +0 -15
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-deepseek/trial-3.md +0 -24
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-1.md +0 -19
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-2.md +0 -13
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-glm/trial-3.md +0 -18
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-1.md +0 -21
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-2.md +0 -15
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/kilo-minimax/trial-3.md +0 -14
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002/current/meta.json +0 -114
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-002-visual-tc-screenshot.yaml +0 -37
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-1.md +0 -76
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-2.md +0 -71
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/claude-sonnet/trial-3.md +0 -85
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/judge.json +0 -46
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003/current/meta.json +0 -36
- package/src/skills/manual-testing/tests/cases/TC-MANUAL-TESTING-003-qa-non-ui-assertion.yaml +0 -65
- package/src/skills/manual-testing/tests/index.yaml +0 -30
- package/src/skills/manual-testing/tests/last-run-tc001-sonnet.log +0 -140
- package/src/skills/manual-testing/tests/last-run-tc002.log +0 -1
- package/src/skills/manual-testing/tests/last-run.log +0 -1469
- package/src/skills/manual-testing/tests/rubrics/qa-non-ui-assertion.md +0 -31
- package/src/skills/manual-testing/tests/rubrics/sandbox-mandatory.md +0 -20
- package/src/skills/manual-testing/tests/rubrics/visual-tc-screenshot.md +0 -21
- package/src/skills/manual-testing/workflows/acceptance.md +0 -80
- package/src/skills/manual-testing/workflows/exploratory.md +0 -84
- package/src/skills/manual-testing/workflows/regression.md +0 -76
- package/src/skills/manual-testing/workflows/smoke.md +0 -109
- package/src/skills/manual-testing/workflows/test-plan.md +0 -75
- package/src/skills/review-result/README.md +0 -59
- package/src/skills/review-result/algorithms/verification.md +0 -112
- package/src/skills/review-result/knowledge/baseline-snapshot-validation.md +0 -67
- package/src/skills/review-result/knowledge/dod-patterns.md +0 -116
- package/src/skills/review-result/knowledge/test-hygiene.md +0 -44
- package/src/skills/review-result/scripts/verify-artifacts.js +0 -497
- package/src/skills/review-result/templates/verdict.md +0 -153
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-1.md +0 -22
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-2.md +0 -7
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-haiku/trial-3.md +0 -21
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-1.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-2.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/claude-sonnet/trial-3.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/judge.json +0 -164
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-2.md +0 -7
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-deepseek/trial-3.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-1.md +0 -49
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-2.md +0 -28
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-glm/trial-3.md +0 -37
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-1.md +0 -22
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-2.md +0 -13
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/kilo-minimax/trial-3.md +0 -21
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001/current/meta.json +0 -116
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-001-visual-tc-trigger.yaml +0 -51
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-1.md +0 -23
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-2.md +0 -22
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-haiku/trial-3.md +0 -28
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-1.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-2.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/claude-sonnet/trial-3.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/judge.json +0 -163
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-1.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-2.md +0 -0
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-deepseek/trial-3.md +0 -4
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-1.md +0 -39
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-2.md +0 -25
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-glm/trial-3.md +0 -32
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-1.md +0 -34
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-2.md +0 -8
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/kilo-minimax/trial-3.md +0 -23
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002/current/meta.json +0 -115
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-002-path-line-suffix.yaml +0 -39
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-1.md +0 -40
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-2.md +0 -15
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/claude-sonnet/trial-3.md +0 -7
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/judge.json +0 -163
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-1.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-2.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-deepseek/trial-3.md +0 -11
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-1.md +0 -16
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-2.md +0 -18
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-glm/trial-3.md +0 -17
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-1.md +0 -17
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-2.md +0 -31
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/kilo-minimax/trial-3.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003/current/meta.json +0 -115
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-003-test-isolation.yaml +0 -50
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-1.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-2.md +0 -5
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/claude-sonnet/trial-3.md +0 -6
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/judge.json +0 -46
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004/current/meta.json +0 -37
- package/src/skills/review-result/tests/cases/TC-REVIEW-RESULT-004-baseline-snapshot.yaml +0 -50
- package/src/skills/review-result/tests/fixtures/IMPL-902-path-with-line.md +0 -43
- package/src/skills/review-result/tests/fixtures/QA-901-visual-button.md +0 -46
- package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/QA-904.md +0 -51
- package/src/skills/review-result/tests/fixtures/QA-904-test-isolation-violation/example-test.mjs +0 -36
- package/src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/QA-905.md +0 -62
- package/src/skills/review-result/tests/fixtures/QA-905-baseline-regex-instead-of-snapshot/baseline.test.mjs +0 -124
- package/src/skills/review-result/tests/index.yaml +0 -35
- package/src/skills/review-result/tests/rubrics/baseline-snapshot.md +0 -20
- package/src/skills/review-result/tests/rubrics/path-line-suffix.md +0 -19
- package/src/skills/review-result/tests/rubrics/test-isolation.md +0 -20
- package/src/skills/review-result/tests/rubrics/visual-tc-trigger.md +0 -19
- package/src/skills/review-result/workflows/review.md +0 -209
|
@@ -1,151 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"per_model": {
|
|
3
|
-
"claude-sonnet": {
|
|
4
|
-
"pass_count": 3,
|
|
5
|
-
"total": 3,
|
|
6
|
-
"trials": [
|
|
7
|
-
{
|
|
8
|
-
"trial": 1,
|
|
9
|
-
"score": 5,
|
|
10
|
-
"passed": true
|
|
11
|
-
},
|
|
12
|
-
{
|
|
13
|
-
"trial": 2,
|
|
14
|
-
"score": 5,
|
|
15
|
-
"passed": true
|
|
16
|
-
},
|
|
17
|
-
{
|
|
18
|
-
"trial": 3,
|
|
19
|
-
"score": 5,
|
|
20
|
-
"passed": true
|
|
21
|
-
}
|
|
22
|
-
]
|
|
23
|
-
},
|
|
24
|
-
"kilo-deepseek": {
|
|
25
|
-
"pass_count": 2,
|
|
26
|
-
"total": 3,
|
|
27
|
-
"trials": [
|
|
28
|
-
{
|
|
29
|
-
"trial": 1,
|
|
30
|
-
"score": 1,
|
|
31
|
-
"passed": false
|
|
32
|
-
},
|
|
33
|
-
{
|
|
34
|
-
"trial": 2,
|
|
35
|
-
"score": 5,
|
|
36
|
-
"passed": true
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
"trial": 3,
|
|
40
|
-
"score": 5,
|
|
41
|
-
"passed": true
|
|
42
|
-
}
|
|
43
|
-
]
|
|
44
|
-
},
|
|
45
|
-
"kilo-minimax": {
|
|
46
|
-
"pass_count": 3,
|
|
47
|
-
"total": 3,
|
|
48
|
-
"trials": [
|
|
49
|
-
{
|
|
50
|
-
"trial": 1,
|
|
51
|
-
"score": 5,
|
|
52
|
-
"passed": true
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
"trial": 2,
|
|
56
|
-
"score": 5,
|
|
57
|
-
"passed": true
|
|
58
|
-
},
|
|
59
|
-
{
|
|
60
|
-
"trial": 3,
|
|
61
|
-
"score": 5,
|
|
62
|
-
"passed": true
|
|
63
|
-
}
|
|
64
|
-
]
|
|
65
|
-
},
|
|
66
|
-
"kilo-glm": {
|
|
67
|
-
"pass_count": 3,
|
|
68
|
-
"total": 3,
|
|
69
|
-
"trials": [
|
|
70
|
-
{
|
|
71
|
-
"trial": 1,
|
|
72
|
-
"score": 5,
|
|
73
|
-
"passed": true
|
|
74
|
-
},
|
|
75
|
-
{
|
|
76
|
-
"trial": 2,
|
|
77
|
-
"score": 5,
|
|
78
|
-
"passed": true
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
"trial": 3,
|
|
82
|
-
"score": 5,
|
|
83
|
-
"passed": true
|
|
84
|
-
}
|
|
85
|
-
]
|
|
86
|
-
}
|
|
87
|
-
},
|
|
88
|
-
"rubric_scores": [
|
|
89
|
-
{
|
|
90
|
-
"agentId": "claude-sonnet",
|
|
91
|
-
"trial": 1,
|
|
92
|
-
"score": 5
|
|
93
|
-
},
|
|
94
|
-
{
|
|
95
|
-
"agentId": "claude-sonnet",
|
|
96
|
-
"trial": 2,
|
|
97
|
-
"score": 5
|
|
98
|
-
},
|
|
99
|
-
{
|
|
100
|
-
"agentId": "claude-sonnet",
|
|
101
|
-
"trial": 3,
|
|
102
|
-
"score": 5
|
|
103
|
-
},
|
|
104
|
-
{
|
|
105
|
-
"agentId": "kilo-deepseek",
|
|
106
|
-
"trial": 1,
|
|
107
|
-
"score": 1
|
|
108
|
-
},
|
|
109
|
-
{
|
|
110
|
-
"agentId": "kilo-deepseek",
|
|
111
|
-
"trial": 2,
|
|
112
|
-
"score": 5
|
|
113
|
-
},
|
|
114
|
-
{
|
|
115
|
-
"agentId": "kilo-deepseek",
|
|
116
|
-
"trial": 3,
|
|
117
|
-
"score": 5
|
|
118
|
-
},
|
|
119
|
-
{
|
|
120
|
-
"agentId": "kilo-minimax",
|
|
121
|
-
"trial": 1,
|
|
122
|
-
"score": 5
|
|
123
|
-
},
|
|
124
|
-
{
|
|
125
|
-
"agentId": "kilo-minimax",
|
|
126
|
-
"trial": 2,
|
|
127
|
-
"score": 5
|
|
128
|
-
},
|
|
129
|
-
{
|
|
130
|
-
"agentId": "kilo-minimax",
|
|
131
|
-
"trial": 3,
|
|
132
|
-
"score": 5
|
|
133
|
-
},
|
|
134
|
-
{
|
|
135
|
-
"agentId": "kilo-glm",
|
|
136
|
-
"trial": 1,
|
|
137
|
-
"score": 5
|
|
138
|
-
},
|
|
139
|
-
{
|
|
140
|
-
"agentId": "kilo-glm",
|
|
141
|
-
"trial": 2,
|
|
142
|
-
"score": 5
|
|
143
|
-
},
|
|
144
|
-
{
|
|
145
|
-
"agentId": "kilo-glm",
|
|
146
|
-
"trial": 3,
|
|
147
|
-
"score": 5
|
|
148
|
-
}
|
|
149
|
-
],
|
|
150
|
-
"timestamp": "2026-04-19T10:47:10.881Z"
|
|
151
|
-
}
|
|
File without changes
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
**Анализ проблемы «призрачного выполнения» (файлы не созданы, только stdout показал «успех»)**
|
|
2
|
-
|
|
3
|
-
---
|
|
4
|
-
|
|
5
|
-
### 📊 **Контекст**
|
|
6
|
-
Запрос стейкхолдера — «второй раз за неделю». Фактически за неделю зафиксировано **4 подтверждённых инцидента**:
|
|
7
|
-
1. **COACH‑19** (2026‑04‑15, claude‑sonnet, qwen‑code)
|
|
8
|
-
2. **QA‑18** (2026‑04‑17, claude‑haiku, паттерн «запись в stdout»)
|
|
9
|
-
3. **IMPL‑24** (2026‑04‑18, kilo‑free, паттерн «permission denied без blocked»)
|
|
10
|
-
4. **Другие случаи** (зафиксированы в бэклоге коуча)
|
|
11
|
-
|
|
12
|
-
---
|
|
13
|
-
|
|
14
|
-
### 🔍 **Findings**
|
|
15
|
-
| Уровень | Описание |
|
|
16
|
-
|---------|----------|
|
|
17
|
-
| **CRITICAL** | **Текстовые меры исчерпаны** — 6+ итераций усиления инструкций в скиле `execute‑task` (CHG‑042…CHG‑047) не остановили инциденты. Агенты системно игнорируют запреты. |
|
|
18
|
-
| **BLOCKER** | **Эскалация HUMAN‑5** (машинная защита) закрыта **фантомно** — тикет перемещён в `done/`, но `Result` пуст, DoD 0/4. Защиты 1‑3 **не реализованы**. |
|
|
19
|
-
| **HIGH** | Без машинной защиты инциденты будут повторяться независимо от количества текстовых правок. |
|
|
20
|
-
| **MEDIUM** | Новых случаев призрачного выполнения после 2026‑04‑18 12:29 не обнаружено (последний лог `pipeline_2026‑04‑19_07‑36‑07.log`). |
|
|
21
|
-
|
|
22
|
-
---
|
|
23
|
-
|
|
24
|
-
### 🛡 **Рекомендации**
|
|
25
|
-
1. **Немедленно реализовать машинную защиту** из HUMAN‑5 (рекомендуется **Защита 1** — пост‑чек вызова `Edit` на файл тикета в пайплайне).
|
|
26
|
-
2. **Проверить причину фантомного закрытия HUMAN‑5** и восстановить его выполнение.
|
|
27
|
-
3. **Внедрить автоматическую проверку** `result_filled` и физического существования файлов в пайплайне.
|
|
28
|
-
|
|
29
|
-
---
|
|
30
|
-
|
|
31
|
-
### 🛠 **Действия коуча**
|
|
32
|
-
- Правок в скилы **не вносится** — текстовые меры признаны исчерпанными (≥3 усилений на одну корневую проблему).
|
|
33
|
-
- Запись о данном анализе добавлена в `.workflow/coach‑backlog.yaml` (артефакт `stakeholder request: ghost execution repeat (2026‑04‑19 14:57)`).
|
|
34
|
-
|
|
35
|
-
---
|
|
36
|
-
|
|
37
|
-
**Вывод:** проблема **системная**, её решение требует **машинной защиты**, а не очередного текстового усилия. HUMAN‑5 — **БЛОКЕР** для устранения класса ошибки.
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
**Анализ инцидента «призрачное выполнение» (2026‑04‑19, 14:57)**
|
|
2
|
-
|
|
3
|
-
| Аспект | Состояние |
|
|
4
|
-
|--------|-----------|
|
|
5
|
-
| **Последний подтверждённый инцидент** | QA‑18 (2026‑04‑17, claude‑haiku) + IMPL‑24 (2026‑04‑18, kilo‑free) |
|
|
6
|
-
| **Логи за 2026‑04‑19** | Новых случаев `result_filled:false` не обнаружено |
|
|
7
|
-
| **Корневая причина** | Текстовые запреты в `execute-task/algorithms/execution-strategy.md` системно игнорируются агентами |
|
|
8
|
-
| **Текстовые усилия** | **Исчерпаны** (6+ итераций за 4 дня, ≥3 правок на одну проблему) |
|
|
9
|
-
| **Эскалация** | HUMAN‑5 («Машинная защита от призрачного выполнения») |
|
|
10
|
-
| **Статус HUMAN‑5** | `in‑progress` (тикет перемещён в `done/`, но `Result` пуст, DoD 0/4) |
|
|
11
|
-
| **Фактическая реализация** | **Отсутствует** – ни одна из трёх рекомендованных защит не внедрена |
|
|
12
|
-
|
|
13
|
-
**Действия коуча**
|
|
14
|
-
|
|
15
|
-
1. **Проверка эскалации** – применён блок «Отслеживание эскалаций» (SKILL.md:72‑78).
|
|
16
|
-
2. **Усиление скила коуча** – добавлена ветка «Если нет, но есть указание на ошибку» в **ГЛАВНОЕ ПРАВИЛО** (SKILL.md:23‑27). Теперь при повторном указании на ошибку коуч обязан проверить статус ранее созданной эскалации и явно сообщить о блокере.
|
|
17
|
-
3. **Запись в бэклог** – инцидент зафиксирован в `.workflow/coach-backlog.yaml` (запись от 2026‑04‑19T14:57:25+05:00).
|
|
18
|
-
4. **Тестирование** – существующий регрессионный тест TC‑COACH‑003 покрывает проверку эскалаций; прогон инициирован (verdict ожидается).
|
|
19
|
-
|
|
20
|
-
**Рекомендация стейкхолдеру**
|
|
21
|
-
|
|
22
|
-
- **БЛОКЕР**: HUMAN‑5 не реализован. Без машинной защиты инциденты призрачного выполнения будут повторяться независимо от количества текстовых правок.
|
|
23
|
-
- **Действие**: реализовать **минимум одну защиту** из HUMAN‑5 (предпочтительно **Защита 1** – пост‑чек вызова Edit на файл тикета).
|
|
24
|
-
- **Статус HUMAN‑5**: требуется переоткрыть тикет (перенести из `done/` в `ready/`) и заполнить `Result` с подтверждением внедрения защиты.
|
|
25
|
-
|
|
26
|
-
**Новых текстовых правок в `execute-task` не внесено** – принцип исчерпанности текстовых мер (SKILL.md:74‑78) запрещает дальнейшие усиления инструкций. Единственный системный путь – машинная защита.
|
|
27
|
-
|
|
28
|
-
**Затронутые файлы**
|
|
29
|
-
- `.workflow/src/skills/coach/SKILL.md` (ГЛАВНОЕ ПРАВИЛО)
|
|
30
|
-
- `.workflow/coach-backlog.yaml` (анализ инцидента)
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
Это ad-hoc запрос типа ANALYZE. Начинаю с чтения бэклога коуча и поиска evidence.
|
|
2
|
-
Проверяю эскалацию HUMAN-5 по процедуре (дословно):
|
|
3
|
-
|
|
4
|
-
**Пункт A:** `status: in-progress` (стр. 33) ≠ `done` → **FAIL**
|
|
5
|
-
**Пункт B:** Summary пуст, Started пуст, Completed пуст → **FAIL**
|
|
6
|
-
**Пункт C:** DoD 0/4 (все `- [ ]`, стр. 83-86) → **FAIL**
|
|
7
|
-
|
|
8
|
-
HUMAN-5 физически в `done/`, но закрыт **фантомно** — пайплайн автоматически переместил (баг infinite loop, зафиксирован ранее). Ни одна из трёх защит не реализована.
|
|
9
|
-
|
|
10
|
-
Записываю результат анализа в бэклог.
|
|
11
|
-
Записываю анализ в бэклог.
|
|
12
|
-
**HUMAN-5 — БЛОКЕР. Эскалация не реализована.**
|
|
13
|
-
|
|
14
|
-
Проверка по процедуре (дословно):
|
|
15
|
-
- **Пункт A:** `status: in-progress` ≠ done → **FAIL**
|
|
16
|
-
- **Пункт B:** Summary пуст, Started пуст, Completed пуст → **FAIL**
|
|
17
|
-
- **Пункт C:** DoD 0/4 (ни один checkbox не отмечен) → **FAIL**
|
|
18
|
-
|
|
19
|
-
HUMAN-5 физически лежит в `done/`, но закрыт **фантомно** — пайплайн автоматически переместил (баг infinite loop). Ни одна из трёх машинных защит не реализована.
|
|
20
|
-
|
|
21
|
-
**Текстовые меры исчерпаны:** 6+ правок execute-task за 4+ дня, 3 разных агента-нарушителя. Новые текстовые усиления не применяются — принцип исчерпанности текстовых мер.
|
|
22
|
-
|
|
23
|
-
**Требуется:** переоткрыть HUMAN-5 (перенести из `done/` в `backlog/` или `ready/`) и реализовать минимум Защиту 1 — post-check: был ли вызван Edit/Write на файл тикета. Без этого инциденты будут повторяться независимо от количества текстовых правок в скилах.
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
Процедура проверки эскалации HUMAN-5 (дословно):
|
|
2
|
-
|
|
3
|
-
**Пункт A:** `status: in-progress` (строка 33) ≠ done → **FAIL**
|
|
4
|
-
**Пункт B:** Summary пуст (строка 93), Started пуст (строка 96), Completed пуст (строка 97) → **FAIL**
|
|
5
|
-
**Пункт C:** DoD 0/4 (строки 83-86, все `- [ ]`) → **FAIL**
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
**HUMAN-5 — БЛОКЕР. Эскалация не реализована.** Все три пункта (A, B, C) не выполнены.
|
|
10
|
-
|
|
11
|
-
Текстовые меры исчерпаны (6+ итераций за 2026-04-15…2026-04-19, правило исчерпанности применено). Новых текстовых правок в скилы НЕ внесено — это не решит класс ошибки.
|
|
12
|
-
|
|
13
|
-
**Единственный вектор:** реализовать HUMAN-5 (рекомендована Защита 1 — post-check: был ли вызван Edit/Write на файл тикета). Тикет находится в `done/`, но фактически не выполнен — его нужно переоткрыть (перенести в `ready/` или `backlog/`) и реализовать.
|
|
14
|
-
|
|
15
|
-
Обновляю бэклог коуча.
|
|
16
|
-
ГЛАВНОЕ ПРАВИЛО: «Поправлял ли стейкхолдер в этой сессии?» — Нет, это первый запрос в сессии. Указание на ошибку есть → проверил на повторный инцидент → HUMAN-5 не реализован → БЛОКЕР.
|
|
17
|
-
|
|
18
|
-
---
|
|
19
|
-
|
|
20
|
-
**Итог:**
|
|
21
|
-
|
|
22
|
-
HUMAN-5 (`done/HUMAN-5.md`) — **БЛОКЕР**. Все три пункта проверки FAIL:
|
|
23
|
-
- **A:** `status: in-progress` ≠ done
|
|
24
|
-
- **B:** Result пуст (Summary, Started, Completed — всё пусто)
|
|
25
|
-
- **C:** DoD 0/4
|
|
26
|
-
|
|
27
|
-
Текстовые меры исчерпаны (6+ итераций за 2026-04-15…2026-04-19). Новых правок в скилы не внесено.
|
|
28
|
-
|
|
29
|
-
**Действие для стейкхолдера:** переоткрыть HUMAN-5 (перенести из `done/` в `ready/`), реализовать минимум Защиту 1 (post-check: парсить STDERR лог агента на наличие `Edit {ticket_path}` — если вызова Edit не было → автоматически `status: failed`). Это единственный вектор устранения класса ошибки.
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
Now I have all the data. Let me record this analysis in the backlog.
|
|
2
|
-
ГЛАВНОЕ ПРАВИЛО: «Поправлял ли стейкхолдер в этой сессии?» — **Нет**. Это первое сообщение в сессии. Стейкхолдер указывает на повторный инцидент, но не корректирует мою работу.
|
|
3
|
-
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
## Результат анализа (11-й по тому же классу ошибки)
|
|
7
|
-
|
|
8
|
-
### Проверка эскалации HUMAN-5 (процедура дословно)
|
|
9
|
-
|
|
10
|
-
| Пункт | Проверка | Результат |
|
|
11
|
-
|-------|----------|-----------|
|
|
12
|
-
| **A** | `status` = `done`? | `status: in-progress` (стр.33) → **FAIL** |
|
|
13
|
-
| **B** | Result заполнена? | Summary='', Started='', Completed='' → **FAIL** |
|
|
14
|
-
| **C** | DoD отмечены? | 0/4 (все `[ ]`) → **FAIL** |
|
|
15
|
-
|
|
16
|
-
### HUMAN-5 — **БЛОКЕР**
|
|
17
|
-
|
|
18
|
-
Тикет физически лежит в `done/`, но **не реализован**: пайплайн автоматически закрыл его (баг infinite loop, `completed_at` проставлен без фактического выполнения). Защиты 1/2/3 из HUMAN-5 **не внедрены**.
|
|
19
|
-
|
|
20
|
-
### Новые инциденты
|
|
21
|
-
|
|
22
|
-
Последний лог `pipeline_2026-04-19_07-36-07.log`: все 5 тикетов `result_filled: true`. Новых ghost execution **не обнаружено**. Инциденты стейкхолдера — ранее задокументированные QA-18 (04-17) и IMPL-24 (04-18).
|
|
23
|
-
|
|
24
|
-
### Текстовые меры: ИСЧЕРПАНЫ
|
|
25
|
-
|
|
26
|
-
6+ текстовых усилений `execute-task` за 4 дня (CHG-042, пункты 9/13/14, execution-strategy.md, шаг 5). Принцип исчерпанности (≥3 на одну корневую проблему) применён. **Новых текстовых правок не вносилось и не будет.**
|
|
27
|
-
|
|
28
|
-
### Единственный вектор: реализовать HUMAN-5
|
|
29
|
-
|
|
30
|
-
Рекомендуется **Защита 1** (post-check: `Edit` вызван на файл тикета?):
|
|
31
|
-
- После `execute-task`, перед `review/` — runner проверяет, был ли вызван `Edit`/`Write` на файл тикета
|
|
32
|
-
- Если нет → `status: failed`, автоматический возврат в `ready/`
|
|
33
|
-
- Покрывает оба паттерна (A: stdout без Edit, B: permission denied без blocked)
|
|
34
|
-
|
|
35
|
-
**Для продолжения** — переоткройте HUMAN-5 (переместите из `done/` в `in-progress/` или `ready/`) и реализуйте хотя бы одну защиту. Без машинной защиты инциденты будут повторяться независимо от текстовых правок в скилах.
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
**⛔ Проверка эскалации HUMAN-5 (выполняю дословно):**
|
|
2
|
-
|
|
3
|
-
| Пункт | Проверка | Результат |
|
|
4
|
-
|-------|----------|-----------|
|
|
5
|
-
| A | status = done (frontmatter) | **FAIL** — status: in-progress |
|
|
6
|
-
| B | Result заполнена (Summary ≠ пусто, Время заполнено, Agent used заполнен) | **FAIL** — всё пусто |
|
|
7
|
-
| C | DoD checkbox'ы отмечены (не 0/N) | **FAIL** — 0/4 |
|
|
8
|
-
|
|
9
|
-
**HUMAN-5 — БЛОКЕР.** Эскалация не реализована.
|
|
10
|
-
|
|
11
|
-
**Вывод:** Это 10+ анализ того же класса инцидентов за неделю. Текстовые меры исчерпаны (6+ правок в execute-task, 2 теста). HUMAN-5 содержит 3 варианта машинной защиты, но ни один не реализован стейкхолдером.
|
|
12
|
-
|
|
13
|
-
**Действие для стейкхолдера:** Реализуйте HUMAN-5 (Защита 1 рекомендована — post-check на вызов Edit тикета). Это единственный путь устранения класса ошибки.
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
**БЛОКЕР: HUMAN-5 не реализована**
|
|
2
|
-
|
|
3
|
-
Проверка эскалации по процедуре:
|
|
4
|
-
|
|
5
|
-
| Пункт | Ожидание | Факт | Результат |
|
|
6
|
-
|-------|----------|------|-----------|
|
|
7
|
-
| A | status = done | status: **in-progress** | FAIL |
|
|
8
|
-
| B | Result заполнена | Summary='', Started/Completed='', Agent='' | FAIL |
|
|
9
|
-
| C | DoD отмечены | DoD **0/4** | FAIL |
|
|
10
|
-
|
|
11
|
-
**Вывод:** HUMAN-5 (машинная защита) создана 2026-04-18, но за 24+ часа не реализована. Это **БЛОКЕР**.
|
|
12
|
-
|
|
13
|
-
**Факты:**
|
|
14
|
-
- Ghost execution: 9+ инцидентов за 5 дней (QA-18 2026-04-17, IMPL-24 2026-04-18, и текущий)
|
|
15
|
-
- Текстовые меры: 6+ правок в execute-task — исчерпаны
|
|
16
|
-
- Лог `pipeline_2026-04-19_07-36-07`: новых ghost execution нет (но стейкхолдер фиксирует рецидив)
|
|
17
|
-
- Последний зафиксированный инцидент: IMPL-24 (2026-04-18)
|
|
18
|
-
|
|
19
|
-
**Требуется от стейкхолдера:** реализовать HUMAN-5 (Защита 1 — post-check на физическое существование файлов после Edit). Это единственный вектор закрытия класса ошибок. Текстовые усилия скилов не работают системно на этом классе.
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
Проверяю текущий статус эскалации HUMAN-5 и наличие новых инцидентов:
|
|
2
|
-
**Критический статус:**
|
|
3
|
-
|
|
4
|
-
HUMAN-5: `status: in-progress`, Result пуст — **машинная защита не реализована**. Даже несмотря на `completed_at` в 09:57, тикет остаётся незакрытым и незавершённым.
|
|
5
|
-
|
|
6
|
-
Свежих логов после 07:36 нет. Но стейкхолдер сообщает о рецидиве — проверю последние тикеты:
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
## ⛔ БЛОКЕР: HUMAN-5 не реализован
|
|
10
|
-
|
|
11
|
-
**Статус по coach-backlog.yaml:**
|
|
12
|
-
- **7+ запросов** стейкхолдера на ghost execution за неделю
|
|
13
|
-
- **6+ текстовых правок** в execute-task и algorithms — агенты системно игнорируют
|
|
14
|
-
- **HUMAN-5** создан 2026-04-18, status: `in-progress`, Result пуст
|
|
15
|
-
- **>24 часов** тикет открыт без реализации
|
|
16
|
-
|
|
17
|
-
**Root Cause**: Текстовые меры исчерпаны. Агент, игнорирующий запрет, не прочитает усиленную версию.
|
|
18
|
-
|
|
19
|
-
**Единственный вектор решения** — машинная защита:
|
|
20
|
-
|
|
21
|
-
| Что | Где |
|
|
22
|
-
|-----|-----|
|
|
23
|
-
| **Защита 1** (рекомендована) | Post-check: runner проверяет, был ли вызван `Edit` на файл тикета. Если нет → `status: failed` автоматически. |
|
|
24
|
-
| Защита 2 | Pre-check: валидация accessibility `context.files` до запуска агента |
|
|
25
|
-
| Защита 3 | Усиление verify-artifacts: если `missing_files` не пусто → `status: failed` |
|
|
26
|
-
|
|
27
|
-
**Файл эскалации:** `.workflow/tickets/done/HUMAN-5.md` (читабелен, но не реализован)
|
|
28
|
-
|
|
29
|
-
**Действие от стейкхолдера:** реализовать **Защиту 1** (минимальная, покрывает оба паттерна). После реализации — закрыть HUMAN-5 с заполненным Result.
|
|
30
|
-
|
|
31
|
-
---
|
|
32
|
-
|
|
33
|
-
*Коуч не вносит новых текстовых правок — это не решит класс ошибки. Ожидаю реализацию HUMAN-5.*
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"date": "2026-04-25T11:54:44.043Z",
|
|
3
|
-
"skill_sha": "6df42d0",
|
|
4
|
-
"status": "passed",
|
|
5
|
-
"duration_ms": 2,
|
|
6
|
-
"per_model": {
|
|
7
|
-
"claude-sonnet": {
|
|
8
|
-
"passed": true,
|
|
9
|
-
"pass_count": 3,
|
|
10
|
-
"total": 3,
|
|
11
|
-
"threshold": 2
|
|
12
|
-
},
|
|
13
|
-
"kilo-deepseek": {
|
|
14
|
-
"passed": true,
|
|
15
|
-
"pass_count": 2,
|
|
16
|
-
"total": 3,
|
|
17
|
-
"threshold": 2
|
|
18
|
-
},
|
|
19
|
-
"kilo-minimax": {
|
|
20
|
-
"passed": true,
|
|
21
|
-
"pass_count": 3,
|
|
22
|
-
"total": 3,
|
|
23
|
-
"threshold": 2
|
|
24
|
-
},
|
|
25
|
-
"kilo-glm": {
|
|
26
|
-
"passed": true,
|
|
27
|
-
"pass_count": 3,
|
|
28
|
-
"total": 3,
|
|
29
|
-
"threshold": 2
|
|
30
|
-
}
|
|
31
|
-
},
|
|
32
|
-
"rubric_scores": [
|
|
33
|
-
{
|
|
34
|
-
"agentId": "claude-sonnet",
|
|
35
|
-
"trial": 1,
|
|
36
|
-
"score": 5
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
"agentId": "claude-sonnet",
|
|
40
|
-
"trial": 2,
|
|
41
|
-
"score": 5
|
|
42
|
-
},
|
|
43
|
-
{
|
|
44
|
-
"agentId": "claude-sonnet",
|
|
45
|
-
"trial": 3,
|
|
46
|
-
"score": 5
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
"agentId": "kilo-deepseek",
|
|
50
|
-
"trial": 1,
|
|
51
|
-
"score": 1
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
"agentId": "kilo-deepseek",
|
|
55
|
-
"trial": 2,
|
|
56
|
-
"score": 5
|
|
57
|
-
},
|
|
58
|
-
{
|
|
59
|
-
"agentId": "kilo-deepseek",
|
|
60
|
-
"trial": 3,
|
|
61
|
-
"score": 5
|
|
62
|
-
},
|
|
63
|
-
{
|
|
64
|
-
"agentId": "kilo-minimax",
|
|
65
|
-
"trial": 1,
|
|
66
|
-
"score": 5
|
|
67
|
-
},
|
|
68
|
-
{
|
|
69
|
-
"agentId": "kilo-minimax",
|
|
70
|
-
"trial": 2,
|
|
71
|
-
"score": 5
|
|
72
|
-
},
|
|
73
|
-
{
|
|
74
|
-
"agentId": "kilo-minimax",
|
|
75
|
-
"trial": 3,
|
|
76
|
-
"score": 5
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
"agentId": "kilo-glm",
|
|
80
|
-
"trial": 1,
|
|
81
|
-
"score": 5
|
|
82
|
-
},
|
|
83
|
-
{
|
|
84
|
-
"agentId": "kilo-glm",
|
|
85
|
-
"trial": 2,
|
|
86
|
-
"score": 5
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
"agentId": "kilo-glm",
|
|
90
|
-
"trial": 3,
|
|
91
|
-
"score": 5
|
|
92
|
-
}
|
|
93
|
-
]
|
|
94
|
-
}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
id: TC-COACH-002
|
|
2
|
-
title: "Coach начинает анализ с root cause первого порядка, а не с формулировок"
|
|
3
|
-
origin:
|
|
4
|
-
chg: [CHG-038, CHG-046, CHG-098]
|
|
5
|
-
incidents:
|
|
6
|
-
- "workflowAi: CHG-038 — повторный инцидент коллизии ID после текстового усиления"
|
|
7
|
-
- "PulseProxy: CHG-046 — уход в формулировки вместо root cause"
|
|
8
|
-
- "workflowAiVsCode: CHG-098 — рационализация «формулировка достаточна»"
|
|
9
|
-
principle: "Root Cause First (#1) + Evidence-Based (#2)"
|
|
10
|
-
backlog_sources:
|
|
11
|
-
- ".workflow/coach-backlog.yaml"
|
|
12
|
-
- "d:/Dev/PulseProxy/.workflow/coach-backlog.yaml"
|
|
13
|
-
- "d:/Dev/workflowAiVsCode/.workflow/coach-backlog.yaml"
|
|
14
|
-
tags: [root-cause, evidence-based, anti-pattern]
|
|
15
|
-
severity: critical
|
|
16
|
-
|
|
17
|
-
scenario:
|
|
18
|
-
system_prompt_file: "../../SKILL.md"
|
|
19
|
-
extra_instructions: |
|
|
20
|
-
Стейкхолдер просит: «Агент выполнил тикет, но файлы не созданы — только stdout показал "успех".
|
|
21
|
-
Это уже второй раз за неделю. Разберись.»
|
|
22
|
-
inputs:
|
|
23
|
-
- kind: inline
|
|
24
|
-
as: incident_description
|
|
25
|
-
content: |
|
|
26
|
-
Инцидент: execute-task агент (claude-sonnet) заявил ✅ в stdout,
|
|
27
|
-
но в тикете Result пуст, DoD 0/5. Файл knowledge/test-authorship.md
|
|
28
|
-
не создан. Инструкция против «призрачного выполнения» уже есть
|
|
29
|
-
в execution-strategy.md:59-88. Два агента её проигнорировали.
|
|
30
|
-
|
|
31
|
-
assertions:
|
|
32
|
-
static:
|
|
33
|
-
- kind: skill_contains
|
|
34
|
-
file: "SKILL.md"
|
|
35
|
-
pattern: "[Rr]oot.?[Cc]ause"
|
|
36
|
-
reason: "Принцип Root Cause First должен быть в SKILL.md коуча."
|
|
37
|
-
- kind: skill_contains
|
|
38
|
-
file: "SKILL.md"
|
|
39
|
-
pattern: "машинн.+защит"
|
|
40
|
-
reason: "Эскалация в машинную защиту при повторном инциденте."
|
|
41
|
-
|
|
42
|
-
deterministic:
|
|
43
|
-
- kind: output_contains_all
|
|
44
|
-
values: ["root cause", "машинн"]
|
|
45
|
-
- kind: output_does_not_contain
|
|
46
|
-
values: ["формулировк достаточн", "наблюдать"]
|
|
47
|
-
|
|
48
|
-
rubric:
|
|
49
|
-
- kind: llm_judge
|
|
50
|
-
rubric_file: "../rubrics/root-cause-first.md"
|
|
51
|
-
criterion: "Начинает ли коуч с невыполненного действия (1-й порядок), а затем анализирует почему инструкции допустили?"
|
|
52
|
-
pass_threshold: 4
|
|
53
|
-
trials: 3
|
|
54
|
-
aggregate: majority
|
|
55
|
-
|
|
56
|
-
execution:
|
|
57
|
-
timeout_s: 1200
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] FileGuard enabled: 2 pattern(s)
|
|
2
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] Plan ID: PLAN-003
|
|
3
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] === Pipeline Runner Started ===
|
|
4
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] Entry stage: pick-first-task
|
|
5
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] Max steps: 1500
|
|
6
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] Context: {"plan_id":"PLAN-003"}
|
|
7
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] Step 1
|
|
8
|
-
[2026-04-06 16:36:02] [INFO] [PipelineRunner] Current stage: pick-first-task
|
|
9
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] START stage="pick-first-task" agent="script-pick" skill="undefined"
|
|
10
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] RUN node .workflow/src/scripts/pick-next-task.js
|
|
11
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] Context:
|
|
12
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] plan_id: PLAN-003
|
|
13
|
-
[2026-04-06 16:36:02] [INFO] [CLI] CLI command="node" args=".workflow/src/scripts/pick-next-task.js pick-first-task
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
Context:
|
|
17
|
-
plan_id: PLAN-003" exitCode=0
|
|
18
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] OUTPUT ↓
|
|
19
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] Filtering by plan_id: PLAN-003[0m
|
|
20
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] Loaded ticket movement rules from config[0m
|
|
21
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] Running auto-correction...[0m
|
|
22
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] COACH-010: done → archive (plan PLAN-002 is archived)[0m
|
|
23
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] COACH-011: done → archive (plan PLAN-002 is archived)[0m
|
|
24
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] COACH-012: done → archive (plan PLAN-002 is archived)[0m
|
|
25
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] COACH-013: done → archive (plan PLAN-002 is archived)[0m
|
|
26
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] COACH-014: done → archive (plan PLAN-002 is archived)[0m
|
|
27
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] COACH-015: done → archive (plan PLAN-002 is archived)[0m
|
|
28
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] IMPL-002: done → archive (plan PLAN-002 is archived)[0m
|
|
29
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] IMPL-003: done → archive (plan PLAN-002 is archived)[0m
|
|
30
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] IMPL-004: done → archive (plan PLAN-002 is archived)[0m
|
|
31
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] IMPL-005: done → archive (plan PLAN-002 is archived)[0m
|
|
32
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] IMPL-006: done → archive (plan PLAN-002 is archived)[0m
|
|
33
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] IMPL-007: done → archive (plan PLAN-002 is archived)[0m
|
|
34
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] [ARCHIVE] IMPL-008: done → archive (plan PLAN-002 is archived)[0m
|
|
35
|
-
[2026-04-06 16:36:02] [INFO] [pick-first-task] [36m[2026-04-06 16:36:02] [INFO] Archived 13 ticket(s) from archived plans: COACH-010, COACH-011, COACH-012, COACH-013, COACH-014, COACH-015, IMPL-002, IMPL-003, IMPL-004, IMPL-005, IMPL-006, IMPL-007, IMPL-008[0m
|
|
36
|
-
[2026-04-06 16:36:12] [INFO] [PipelineRunner] Context updated: {"plan_id":"PLAN-003","plan_file":"plans/current/PLAN-003.md"}
|
|
37
|
-
[2026-04-06 16:36:12] [INFO] [check-plan-decomposition] GOTO check-plan-decomposition → decompose-plan status="needs_decomposition" params={"plan_file":"$result.plan_file"}
|
|
38
|
-
[2026-04-06 16:36:12] [INFO] [PipelineRunner] Waiting 5s before next stage...
|
|
39
|
-
[2026-04-06 16:36:18] [INFO] [PipelineRunner] Step 4
|
|
40
|
-
[2026-04-06 16:36:18] [INFO] [PipelineRunner] Current stage: decompose-plan
|
|
41
|
-
[2026-04-06 16:36:18] [INFO] [decompose-plan] START stage="decompose-plan" agent="claude-sonnet" skill="decompose-plan"
|
|
42
|
-
[2026-04-06 16:36:18] [INFO] [decompose-plan] RUN claude --model claude-sonnet-4-6 --permission-mode bypassPermissions decompose-plan
|
|
43
|
-
[2026-04-06 16:36:18] [INFO] [decompose-plan] Context:
|
|
44
|
-
[2026-04-06 16:36:18] [INFO] [decompose-plan] plan_id: PLAN-003
|
|
45
|
-
[2026-04-06 16:36:18] [INFO] [decompose-plan] plan_file: plans/current/PLAN-003.md
|
|
46
|
-
[2026-04-06 16:36:18] [INFO] [decompose-plan] Instructions:
|
|
47
|
-
[2026-04-06 16:36:18] [INFO] [decompose-plan] Декомпозируй план .workflow/plans/current/PLAN-003.md на тикеты.
|
|
48
|
-
[2026-04-06 16:40:31] [INFO] [CLI] CLI command="claude" args="--model claude-sonnet-4-6 --permission-mode bypassPermissions -p" exitCode=0
|
|
49
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] OUTPUT ↓
|
|
50
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] ---RESULT---
|
|
51
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] status: default
|
|
52
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] ---RESULT---
|
|
53
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] Декомпозиция завершена. Создано 5 тикетов в `.workflow/tickets/backlog/`:
|
|
54
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] | Тикет | Приоритет | Зависимости |
|
|
55
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] |-------|-----------|-------------|
|
|
56
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] | **IMPL-001** — FileGuard: режим protect_structure | 1 (critical) | — |
|
|
57
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] | **IMPL-003** — Дедупликация в pick-next-task | 1 (critical) | — |
|
|
58
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] | **IMPL-002** — Конфиг tickets/**: protected_files + trusted_stages | 2 (high) | IMPL-001 |
|
|
59
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] | **QA-001** — Тесты FileGuard protect_structure и trusted_stages | 2 (high) | IMPL-001, IMPL-002 |
|
|
60
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] | **QA-002** — Тесты дедупликации pick-next-task | 2 (high) | IMPL-003 |
|
|
61
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] **Граф зависимостей:**
|
|
62
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] ```
|
|
63
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] IMPL-001 ──► IMPL-002 ──► QA-001
|
|
64
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] IMPL-003 ──────────────► QA-002
|
|
65
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] ```
|
|
66
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] IMPL-001 и IMPL-003 можно выполнять параллельно (нет зависимостей друг от друга).
|
|
67
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] OUTPUT ↑
|
|
68
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] COMPLETE stage="decompose-plan" status="default" exitCode=0
|
|
69
|
-
[2026-04-06 16:40:31] [INFO] [PipelineRunner] Stage decompose-plan completed with status: default
|
|
70
|
-
[2026-04-06 16:40:31] [INFO] [decompose-plan] GOTO decompose-plan → check-conditions status="default"
|
|
71
|
-
[2026-04-06 16:40:31] [INFO] [PipelineRunner] Waiting 5s before next stage...
|
|
72
|
-
[2026-04-06 16:40:36] [INFO] [PipelineRunner] Step 5
|
|
73
|
-
[2026-04-06 16:40:36] [INFO] [PipelineRunner] Current stage: check-conditions
|
|
74
|
-
[2026-04-06 16:40:36] [INFO] [check-conditions] START stage="check-conditions" agent="script-check-conditions" skill="undefined"
|
|
75
|
-
[2026-04-06 16:40:36] [INFO] [check-conditions] RUN node .workflow/src/scripts/check-conditions.js
|
|
76
|
-
[2026-04-06 16:40:36] [INFO] [check-conditions] Context:
|
|
77
|
-
[2026-04-06 16:40:36] [INFO] [check-conditions] plan_id: PLAN-003
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
version: 1
|
|
2
|
-
skill: coach
|
|
3
|
-
generated_at: "2026-04-15"
|
|
4
|
-
|
|
5
|
-
# Список моделей, на которых гоняется скил coach.
|
|
6
|
-
# Единая точка правды — наследуется всеми тест-кейсами в cases/.
|
|
7
|
-
# Override возможен точечно в кейсе через поле execution.target_agents.
|
|
8
|
-
# Заполнено вручную на основе pipeline.yaml: execute-task.agents_by_type.coach.agents
|
|
9
|
-
execution:
|
|
10
|
-
target_agents:
|
|
11
|
-
- claude-sonnet # основная модель скила (agents_by_type.coach.agents[0])
|
|
12
|
-
- kilo-deepseek # fallback-модель скила (agents_by_type.coach.agents[1])
|
|
13
|
-
- kilo-minimax # дополнительная модель для сравнения
|
|
14
|
-
- kilo-glm # дополнительная модель для сравнения
|
|
15
|
-
judge_agent: claude-opus # другая модель, чтобы исключить self-enhancement bias
|
|
16
|
-
default_timeout_s: 1200
|
|
17
|
-
baseline_ref: origin/main # git ref для сравнения previously_green/red
|
|
18
|
-
|
|
19
|
-
cases:
|
|
20
|
-
- id: TC-COACH-001
|
|
21
|
-
file: cases/TC-COACH-001-evidence-based-temporal-diagram.yaml
|
|
22
|
-
origin_chg: [CHG-032]
|
|
23
|
-
tags: [evidence-based, log-analysis]
|
|
24
|
-
severity: critical
|
|
25
|
-
- id: TC-COACH-002
|
|
26
|
-
file: cases/TC-COACH-002-root-cause-first.yaml
|
|
27
|
-
origin_chg: [CHG-038, CHG-046, CHG-098]
|
|
28
|
-
tags: [root-cause, evidence-based, anti-pattern]
|
|
29
|
-
severity: critical
|