devforgeai 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +120 -0
- package/bin/devforgeai.js +0 -0
- package/package.json +9 -1
- package/src/CLAUDE.md +699 -0
- package/src/claude/hooks/phase-completion-gate.sh +0 -0
- package/src/claude/scripts/README.md +396 -0
- package/src/claude/scripts/audit-command-skill-overlap.sh +67 -0
- package/src/claude/scripts/check-hooks-fast.sh +70 -0
- package/src/claude/scripts/devforgeai-validate +6 -0
- package/src/claude/scripts/devforgeai_cli/README.md +531 -0
- package/src/claude/scripts/devforgeai_cli/__init__.py +12 -0
- package/src/claude/scripts/devforgeai_cli/cli.py +716 -0
- package/src/claude/scripts/devforgeai_cli/commands/__init__.py +1 -0
- package/src/claude/scripts/devforgeai_cli/commands/check_hooks.py +384 -0
- package/src/claude/scripts/devforgeai_cli/commands/invoke_hooks.py +149 -0
- package/src/claude/scripts/devforgeai_cli/commands/phase_commands.py +731 -0
- package/src/claude/scripts/devforgeai_cli/commands/validate_installation.py +412 -0
- package/src/claude/scripts/devforgeai_cli/context_extraction.py +426 -0
- package/src/claude/scripts/devforgeai_cli/feedback/AC_TO_TEST_MAPPING.md +636 -0
- package/src/claude/scripts/devforgeai_cli/feedback/DELIVERY_SUMMARY.txt +329 -0
- package/src/claude/scripts/devforgeai_cli/feedback/README_TEST_SPECS.md +486 -0
- package/src/claude/scripts/devforgeai_cli/feedback/TEST_IMPLEMENTATION_GUIDE.md +529 -0
- package/src/claude/scripts/devforgeai_cli/feedback/TEST_SPECIFICATIONS.md +2652 -0
- package/src/claude/scripts/devforgeai_cli/feedback/TEST_SPECS_INDEX.md +398 -0
- package/src/claude/scripts/devforgeai_cli/feedback/__init__.py +34 -0
- package/src/claude/scripts/devforgeai_cli/feedback/adaptive_questioning_engine.py +581 -0
- package/src/claude/scripts/devforgeai_cli/feedback/aggregation.py +179 -0
- package/src/claude/scripts/devforgeai_cli/feedback/commands.py +535 -0
- package/src/claude/scripts/devforgeai_cli/feedback/config_defaults.py +58 -0
- package/src/claude/scripts/devforgeai_cli/feedback/config_manager.py +423 -0
- package/src/claude/scripts/devforgeai_cli/feedback/config_models.py +192 -0
- package/src/claude/scripts/devforgeai_cli/feedback/config_schema.py +140 -0
- package/src/claude/scripts/devforgeai_cli/feedback/coverage.json +1 -0
- package/src/claude/scripts/devforgeai_cli/feedback/feature_flag.py +152 -0
- package/src/claude/scripts/devforgeai_cli/feedback/feedback_indexer.py +394 -0
- package/src/claude/scripts/devforgeai_cli/feedback/hot_reload.py +226 -0
- package/src/claude/scripts/devforgeai_cli/feedback/longitudinal.py +115 -0
- package/src/claude/scripts/devforgeai_cli/feedback/models.py +67 -0
- package/src/claude/scripts/devforgeai_cli/feedback/question_router.py +236 -0
- package/src/claude/scripts/devforgeai_cli/feedback/retrospective.py +233 -0
- package/src/claude/scripts/devforgeai_cli/feedback/skip_tracker.py +177 -0
- package/src/claude/scripts/devforgeai_cli/feedback/skip_tracking.py +221 -0
- package/src/claude/scripts/devforgeai_cli/feedback/template_engine.py +549 -0
- package/src/claude/scripts/devforgeai_cli/feedback/validation.py +163 -0
- package/src/claude/scripts/devforgeai_cli/headless/__init__.py +30 -0
- package/src/claude/scripts/devforgeai_cli/headless/answer_models.py +206 -0
- package/src/claude/scripts/devforgeai_cli/headless/answer_resolver.py +204 -0
- package/src/claude/scripts/devforgeai_cli/headless/exceptions.py +36 -0
- package/src/claude/scripts/devforgeai_cli/headless/pattern_matcher.py +156 -0
- package/src/claude/scripts/devforgeai_cli/hooks.py +313 -0
- package/src/claude/scripts/devforgeai_cli/metrics/__init__.py +46 -0
- package/src/claude/scripts/devforgeai_cli/metrics/command_metrics.py +142 -0
- package/src/claude/scripts/devforgeai_cli/metrics/failure_modes.py +152 -0
- package/src/claude/scripts/devforgeai_cli/metrics/story_segmentation.py +181 -0
- package/src/claude/scripts/devforgeai_cli/orchestrate_hooks.py +780 -0
- package/src/claude/scripts/devforgeai_cli/phase_state.py +1229 -0
- package/src/claude/scripts/devforgeai_cli/session/__init__.py +30 -0
- package/src/claude/scripts/devforgeai_cli/session/checkpoint.py +268 -0
- package/src/claude/scripts/devforgeai_cli/tests/__init__.py +1 -0
- package/src/claude/scripts/devforgeai_cli/tests/conftest.py +29 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/TEST_EXECUTION_GUIDE.md +298 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/__init__.py +3 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_adaptive_questioning_engine.py +2171 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_aggregation.py +476 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_config_defaults.py +133 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_config_manager.py +592 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_config_models.py +373 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_config_schema.py +130 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_configuration_management.py +1355 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_edge_cases.py +308 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_feature_flag.py +307 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_feedback_indexer.py +384 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_hot_reload.py +580 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_integration.py +402 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_models.py +105 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_question_routing.py +262 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_retrospective.py +333 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_skip_tracker.py +410 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_skip_tracking.py +159 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_skip_tracking_integration.py +1155 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_template_engine.py +1389 -0
- package/src/claude/scripts/devforgeai_cli/tests/feedback/test_validation_comprehensive.py +210 -0
- package/src/claude/scripts/devforgeai_cli/tests/fixtures/autonomous-deferral-story.md +46 -0
- package/src/claude/scripts/devforgeai_cli/tests/fixtures/missing-impl-notes.md +31 -0
- package/src/claude/scripts/devforgeai_cli/tests/fixtures/valid-deferral-story.md +46 -0
- package/src/claude/scripts/devforgeai_cli/tests/fixtures/valid-story-complete.md +48 -0
- package/src/claude/scripts/devforgeai_cli/tests/manual_test_invoke_hooks.sh +200 -0
- package/src/claude/scripts/devforgeai_cli/tests/session/DELIVERABLES.md +518 -0
- package/src/claude/scripts/devforgeai_cli/tests/session/TEST_SUMMARY.md +468 -0
- package/src/claude/scripts/devforgeai_cli/tests/session/__init__.py +6 -0
- package/src/claude/scripts/devforgeai_cli/tests/session/fixtures/corrupted-checkpoint.json +1 -0
- package/src/claude/scripts/devforgeai_cli/tests/session/fixtures/missing-fields-checkpoint.json +4 -0
- package/src/claude/scripts/devforgeai_cli/tests/session/fixtures/valid-checkpoint.json +15 -0
- package/src/claude/scripts/devforgeai_cli/tests/session/test_checkpoint.py +851 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_check_hooks.py +1886 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_depends_on_normalizer.py +171 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_dod_validator.py +97 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_invoke_hooks.py +1902 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_phase_commands.py +320 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_phase_commands_error_handling.py +1021 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_phase_commands_import.py +697 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_phase_state.py +2187 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_skip_tracking.py +2141 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_skip_tracking_coverage_gap.py +195 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_subagent_enforcement.py +539 -0
- package/src/claude/scripts/devforgeai_cli/tests/test_validate_installation.py +361 -0
- package/src/claude/scripts/devforgeai_cli/utils/__init__.py +11 -0
- package/src/claude/scripts/devforgeai_cli/utils/depends_on_normalizer.py +149 -0
- package/src/claude/scripts/devforgeai_cli/utils/markdown_parser.py +219 -0
- package/src/claude/scripts/devforgeai_cli/utils/story_analyzer.py +249 -0
- package/src/claude/scripts/devforgeai_cli/utils/yaml_parser.py +152 -0
- package/src/claude/scripts/devforgeai_cli/validators/__init__.py +27 -0
- package/src/claude/scripts/devforgeai_cli/validators/ast_grep_validator.py +373 -0
- package/src/claude/scripts/devforgeai_cli/validators/context_validator.py +180 -0
- package/src/claude/scripts/devforgeai_cli/validators/dod_validator.py +309 -0
- package/src/claude/scripts/devforgeai_cli/validators/git_validator.py +107 -0
- package/src/claude/scripts/devforgeai_cli/validators/grep_fallback.py +300 -0
- package/src/claude/scripts/install_hooks.sh +186 -0
- package/src/claude/scripts/invoke_feedback_hooks.sh +59 -0
- package/src/claude/scripts/migrate-ac-headers.sh +122 -0
- package/src/claude/scripts/plan_file_kb.sh +704 -0
- package/src/claude/scripts/requirements.txt +8 -0
- package/src/claude/scripts/session_catalog.sh +543 -0
- package/src/claude/scripts/setup.py +55 -0
- package/src/claude/scripts/start-devforgeai.sh +16 -0
- package/src/claude/scripts/statusline.sh +27 -0
- package/src/claude/scripts/validate_deferrals.py +344 -0
- package/src/claude/skills/designing-systems/scripts/__pycache__/detect_anti_patterns.cpython-312.pyc +0 -0
- package/src/claude/skills/designing-systems/scripts/__pycache__/validate_all_context.cpython-312.pyc +0 -0
- package/src/claude/skills/designing-systems/scripts/__pycache__/validate_architecture.cpython-312.pyc +0 -0
- package/src/claude/skills/designing-systems/scripts/__pycache__/validate_dependencies.cpython-312.pyc +0 -0
- package/src/claude/skills/devforgeai-story-creation/scripts/__pycache__/migrate_story_v1_to_v2.cpython-312.pyc +0 -0
- package/src/claude/skills/devforgeai-story-creation/scripts/tests/__pycache__/measure_accuracy.cpython-312.pyc +0 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for context-aware question routing (AC5)
|
|
3
|
+
|
|
4
|
+
Tests cover:
|
|
5
|
+
- AC5: Context-aware question routing by workflow type
|
|
6
|
+
- Question adaptation based on success/failure
|
|
7
|
+
- Cultural appropriateness validation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
from devforgeai_cli.feedback.question_router import (
|
|
12
|
+
get_context_aware_questions,
|
|
13
|
+
load_question_bank,
|
|
14
|
+
)
|
|
15
|
+
from devforgeai_cli.feedback.models import Question
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestContextAwareRouting:
|
|
19
|
+
"""AC5: Context-Aware Question Routing"""
|
|
20
|
+
|
|
21
|
+
def test_get_questions_for_dev_success(self):
|
|
22
|
+
"""
|
|
23
|
+
GIVEN a successful /dev workflow completion
|
|
24
|
+
WHEN get_context_aware_questions is called with workflow_type='dev' and success_status='success'
|
|
25
|
+
THEN it returns dev-specific success questions
|
|
26
|
+
"""
|
|
27
|
+
# Arrange
|
|
28
|
+
workflow_type = 'dev'
|
|
29
|
+
success_status = 'success'
|
|
30
|
+
|
|
31
|
+
# Act
|
|
32
|
+
questions = get_context_aware_questions(workflow_type, success_status)
|
|
33
|
+
|
|
34
|
+
# Assert
|
|
35
|
+
assert questions is not None
|
|
36
|
+
assert len(questions) >= 4
|
|
37
|
+
assert all(isinstance(q, Question) for q in questions)
|
|
38
|
+
|
|
39
|
+
# Check for dev-specific content
|
|
40
|
+
question_texts = [q.question_text.lower() for q in questions]
|
|
41
|
+
assert any('tdd' in text or 'test' in text or 'development' in text for text in question_texts)
|
|
42
|
+
|
|
43
|
+
def test_get_questions_for_qa_success(self):
|
|
44
|
+
"""
|
|
45
|
+
GIVEN a successful /qa workflow completion
|
|
46
|
+
WHEN get_context_aware_questions is called with workflow_type='qa'
|
|
47
|
+
THEN it returns qa-specific questions (coverage, validation)
|
|
48
|
+
"""
|
|
49
|
+
# Arrange
|
|
50
|
+
workflow_type = 'qa'
|
|
51
|
+
success_status = 'success'
|
|
52
|
+
|
|
53
|
+
# Act
|
|
54
|
+
questions = get_context_aware_questions(workflow_type, success_status)
|
|
55
|
+
|
|
56
|
+
# Assert
|
|
57
|
+
assert questions is not None
|
|
58
|
+
question_texts = [q.question_text.lower() for q in questions]
|
|
59
|
+
assert any('coverage' in text or 'quality' in text or 'validation' in text for text in question_texts)
|
|
60
|
+
|
|
61
|
+
def test_get_questions_for_orchestrate_success(self):
|
|
62
|
+
"""
|
|
63
|
+
GIVEN a successful /orchestrate workflow completion
|
|
64
|
+
WHEN get_context_aware_questions is called
|
|
65
|
+
THEN it returns orchestration-specific questions
|
|
66
|
+
"""
|
|
67
|
+
# Arrange
|
|
68
|
+
workflow_type = 'orchestrate'
|
|
69
|
+
success_status = 'success'
|
|
70
|
+
|
|
71
|
+
# Act
|
|
72
|
+
questions = get_context_aware_questions(workflow_type, success_status)
|
|
73
|
+
|
|
74
|
+
# Assert
|
|
75
|
+
assert questions is not None
|
|
76
|
+
question_texts = [q.question_text.lower() for q in questions]
|
|
77
|
+
assert any('workflow' in text or 'phases' in text or 'integration' in text for text in question_texts)
|
|
78
|
+
|
|
79
|
+
def test_get_questions_for_failure_differ_from_success(self):
|
|
80
|
+
"""
|
|
81
|
+
GIVEN failure vs success status
|
|
82
|
+
WHEN get_context_aware_questions is called
|
|
83
|
+
THEN failure questions differ from success questions
|
|
84
|
+
"""
|
|
85
|
+
# Arrange
|
|
86
|
+
workflow_type = 'dev'
|
|
87
|
+
|
|
88
|
+
# Act
|
|
89
|
+
success_questions = get_context_aware_questions(workflow_type, 'success')
|
|
90
|
+
failure_questions = get_context_aware_questions(workflow_type, 'failed')
|
|
91
|
+
|
|
92
|
+
# Assert
|
|
93
|
+
success_ids = [q.question_id for q in success_questions]
|
|
94
|
+
failure_ids = [q.question_id for q in failure_questions]
|
|
95
|
+
|
|
96
|
+
# At least some questions should be different
|
|
97
|
+
assert success_ids != failure_ids
|
|
98
|
+
assert len(set(success_ids) & set(failure_ids)) < len(success_ids) # Some overlap OK, but not all
|
|
99
|
+
|
|
100
|
+
def test_questions_have_appropriate_response_types(self):
|
|
101
|
+
"""
|
|
102
|
+
GIVEN context-aware questions
|
|
103
|
+
WHEN examining response types
|
|
104
|
+
THEN questions use appropriate types (rating, multiple_choice, open_text)
|
|
105
|
+
"""
|
|
106
|
+
# Arrange
|
|
107
|
+
workflow_type = 'dev'
|
|
108
|
+
success_status = 'success'
|
|
109
|
+
|
|
110
|
+
# Act
|
|
111
|
+
questions = get_context_aware_questions(workflow_type, success_status)
|
|
112
|
+
|
|
113
|
+
# Assert
|
|
114
|
+
response_types = [q.response_type for q in questions]
|
|
115
|
+
valid_types = ['rating', 'multiple_choice', 'open_text']
|
|
116
|
+
|
|
117
|
+
assert all(rt in valid_types for rt in response_types)
|
|
118
|
+
# Should have a mix of types
|
|
119
|
+
assert len(set(response_types)) >= 2
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class TestCulturalAppropriateness:
|
|
123
|
+
"""AC5: Cultural appropriateness validation"""
|
|
124
|
+
|
|
125
|
+
def test_questions_never_blame_user(self):
|
|
126
|
+
"""
|
|
127
|
+
GIVEN all workflow questions
|
|
128
|
+
WHEN examining question text
|
|
129
|
+
THEN questions never blame user (no "You failed", "You missed", etc.)
|
|
130
|
+
"""
|
|
131
|
+
# Arrange
|
|
132
|
+
workflows = ['dev', 'qa', 'orchestrate', 'release']
|
|
133
|
+
statuses = ['success', 'failed', 'partial']
|
|
134
|
+
|
|
135
|
+
blame_phrases = ['you failed', 'you missed', 'your fault', 'your mistake', 'you should have']
|
|
136
|
+
|
|
137
|
+
# Act & Assert
|
|
138
|
+
for workflow in workflows:
|
|
139
|
+
for status in statuses:
|
|
140
|
+
questions = get_context_aware_questions(workflow, status)
|
|
141
|
+
for question in questions:
|
|
142
|
+
text_lower = question.question_text.lower()
|
|
143
|
+
for blame_phrase in blame_phrases:
|
|
144
|
+
assert blame_phrase not in text_lower, \
|
|
145
|
+
f"Question '{question.question_text}' contains blame phrase '{blame_phrase}'"
|
|
146
|
+
|
|
147
|
+
def test_questions_focus_on_framework_improvement(self):
|
|
148
|
+
"""
|
|
149
|
+
GIVEN failure questions
|
|
150
|
+
WHEN examining question text
|
|
151
|
+
THEN questions frame issues as framework improvements ("How can we make this clearer?")
|
|
152
|
+
"""
|
|
153
|
+
# Arrange
|
|
154
|
+
workflow_type = 'dev'
|
|
155
|
+
success_status = 'failed'
|
|
156
|
+
|
|
157
|
+
# Act
|
|
158
|
+
questions = get_context_aware_questions(workflow_type, success_status)
|
|
159
|
+
|
|
160
|
+
# Assert
|
|
161
|
+
improvement_indicators = ['how can we', 'what would help', 'what could', 'suggestions', 'improvements']
|
|
162
|
+
question_texts = [q.question_text.lower() for q in questions]
|
|
163
|
+
|
|
164
|
+
# At least one question should focus on improvements
|
|
165
|
+
assert any(
|
|
166
|
+
any(indicator in text for indicator in improvement_indicators)
|
|
167
|
+
for text in question_texts
|
|
168
|
+
), "No questions focus on framework improvements"
|
|
169
|
+
|
|
170
|
+
def test_questions_avoid_jargon(self):
|
|
171
|
+
"""
|
|
172
|
+
GIVEN all questions
|
|
173
|
+
WHEN examining question text
|
|
174
|
+
THEN questions use clear English (not overly technical jargon)
|
|
175
|
+
"""
|
|
176
|
+
# Arrange
|
|
177
|
+
workflow_type = 'dev'
|
|
178
|
+
success_status = 'success'
|
|
179
|
+
|
|
180
|
+
# Act
|
|
181
|
+
questions = get_context_aware_questions(workflow_type, success_status)
|
|
182
|
+
|
|
183
|
+
# Assert - Questions should be understandable
|
|
184
|
+
# Check that question text is reasonably short (not overly verbose)
|
|
185
|
+
for question in questions:
|
|
186
|
+
assert len(question.question_text) < 200, \
|
|
187
|
+
f"Question too long (may be too complex): '{question.question_text}'"
|
|
188
|
+
|
|
189
|
+
# Should not use ALL CAPS for entire question (aggressive tone)
|
|
190
|
+
assert not question.question_text.isupper(), \
|
|
191
|
+
f"Question uses ALL CAPS: '{question.question_text}'"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class TestQuestionBankLoading:
|
|
195
|
+
"""Test question bank YAML loading"""
|
|
196
|
+
|
|
197
|
+
def test_load_question_bank_returns_structured_data(self):
|
|
198
|
+
"""
|
|
199
|
+
GIVEN question bank YAML file
|
|
200
|
+
WHEN load_question_bank is called
|
|
201
|
+
THEN it returns structured dictionary with workflows
|
|
202
|
+
"""
|
|
203
|
+
# Act
|
|
204
|
+
question_bank = load_question_bank()
|
|
205
|
+
|
|
206
|
+
# Assert
|
|
207
|
+
assert question_bank is not None
|
|
208
|
+
assert 'workflows' in question_bank
|
|
209
|
+
assert isinstance(question_bank['workflows'], dict)
|
|
210
|
+
|
|
211
|
+
# Check required workflows exist
|
|
212
|
+
required_workflows = ['dev', 'qa', 'orchestrate', 'release']
|
|
213
|
+
for workflow in required_workflows:
|
|
214
|
+
assert workflow in question_bank['workflows']
|
|
215
|
+
|
|
216
|
+
def test_question_bank_has_success_and_failure_questions(self):
|
|
217
|
+
"""
|
|
218
|
+
GIVEN question bank
|
|
219
|
+
WHEN examining workflow questions
|
|
220
|
+
THEN each workflow has both success_questions and failure_questions
|
|
221
|
+
"""
|
|
222
|
+
# Act
|
|
223
|
+
question_bank = load_question_bank()
|
|
224
|
+
|
|
225
|
+
# Assert
|
|
226
|
+
workflows = question_bank['workflows']
|
|
227
|
+
for workflow_name, workflow_config in workflows.items():
|
|
228
|
+
assert 'success_questions' in workflow_config, \
|
|
229
|
+
f"Workflow '{workflow_name}' missing success_questions"
|
|
230
|
+
assert 'failure_questions' in workflow_config, \
|
|
231
|
+
f"Workflow '{workflow_name}' missing failure_questions"
|
|
232
|
+
assert len(workflow_config['success_questions']) >= 3, \
|
|
233
|
+
f"Workflow '{workflow_name}' needs at least 3 success questions"
|
|
234
|
+
assert len(workflow_config['failure_questions']) >= 3, \
|
|
235
|
+
f"Workflow '{workflow_name}' needs at least 3 failure questions"
|
|
236
|
+
|
|
237
|
+
def test_question_bank_questions_have_required_fields(self):
|
|
238
|
+
"""
|
|
239
|
+
GIVEN question bank
|
|
240
|
+
WHEN examining individual questions
|
|
241
|
+
THEN each question has id, text, type (and scale/options if applicable)
|
|
242
|
+
"""
|
|
243
|
+
# Act
|
|
244
|
+
question_bank = load_question_bank()
|
|
245
|
+
|
|
246
|
+
# Assert
|
|
247
|
+
workflows = question_bank['workflows']
|
|
248
|
+
for workflow_name, workflow_config in workflows.items():
|
|
249
|
+
for question_type in ['success_questions', 'failure_questions']:
|
|
250
|
+
for question in workflow_config[question_type]:
|
|
251
|
+
assert 'id' in question, f"Question missing 'id' in {workflow_name}/{question_type}"
|
|
252
|
+
assert 'text' in question, f"Question missing 'text' in {workflow_name}/{question_type}"
|
|
253
|
+
assert 'type' in question, f"Question missing 'type' in {workflow_name}/{question_type}"
|
|
254
|
+
|
|
255
|
+
# If rating, must have scale
|
|
256
|
+
if question['type'] == 'rating':
|
|
257
|
+
assert 'scale' in question, f"Rating question missing 'scale' in {workflow_name}"
|
|
258
|
+
|
|
259
|
+
# If multiple_choice, must have options
|
|
260
|
+
if question['type'] == 'multiple_choice':
|
|
261
|
+
assert 'options' in question, f"Multiple choice question missing 'options' in {workflow_name}"
|
|
262
|
+
assert len(question['options']) >= 2, f"Multiple choice needs at least 2 options"
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for retrospective conversation functionality (AC1, AC2, AC3)
|
|
3
|
+
|
|
4
|
+
Tests cover:
|
|
5
|
+
- AC1: Retrospective triggered at operation completion
|
|
6
|
+
- AC2: Failed command with root cause feedback
|
|
7
|
+
- AC3: User opts out of feedback
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
import json
|
|
12
|
+
import uuid
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import tempfile
|
|
16
|
+
import shutil
|
|
17
|
+
|
|
18
|
+
from devforgeai_cli.feedback.retrospective import (
|
|
19
|
+
trigger_retrospective,
|
|
20
|
+
capture_feedback,
|
|
21
|
+
is_skip_selected,
|
|
22
|
+
)
|
|
23
|
+
from devforgeai_cli.feedback.models import FeedbackSession, Question
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TestRetrospectiveTrigger:
|
|
27
|
+
"""AC1: Retrospective Triggered at Operation Completion"""
|
|
28
|
+
|
|
29
|
+
def test_trigger_retrospective_success_returns_questions(self):
|
|
30
|
+
"""
|
|
31
|
+
GIVEN a successful /dev command completion
|
|
32
|
+
WHEN trigger_retrospective is called with workflow_type='dev' and success_status='success'
|
|
33
|
+
THEN it returns 4-6 targeted questions
|
|
34
|
+
"""
|
|
35
|
+
# Arrange
|
|
36
|
+
workflow_type = 'dev'
|
|
37
|
+
operation_result = {'status': 'success', 'story_id': 'STORY-001'}
|
|
38
|
+
|
|
39
|
+
# Act
|
|
40
|
+
questions = trigger_retrospective(workflow_type, operation_result)
|
|
41
|
+
|
|
42
|
+
# Assert
|
|
43
|
+
assert questions is not None
|
|
44
|
+
assert 4 <= len(questions) <= 6
|
|
45
|
+
assert all(isinstance(q, Question) for q in questions)
|
|
46
|
+
assert all(hasattr(q, 'question_id') for q in questions)
|
|
47
|
+
assert all(hasattr(q, 'question_text') for q in questions)
|
|
48
|
+
assert all(hasattr(q, 'response_type') for q in questions)
|
|
49
|
+
|
|
50
|
+
def test_trigger_retrospective_failure_returns_failure_questions(self):
|
|
51
|
+
"""
|
|
52
|
+
GIVEN a failed /qa command (AC2)
|
|
53
|
+
WHEN trigger_retrospective is called with success_status='failed'
|
|
54
|
+
THEN it returns failure-specific questions
|
|
55
|
+
"""
|
|
56
|
+
# Arrange
|
|
57
|
+
workflow_type = 'qa'
|
|
58
|
+
operation_result = {'status': 'failed', 'story_id': 'STORY-002', 'failure_reason': 'Coverage below threshold'}
|
|
59
|
+
|
|
60
|
+
# Act
|
|
61
|
+
questions = trigger_retrospective(workflow_type, operation_result)
|
|
62
|
+
|
|
63
|
+
# Assert
|
|
64
|
+
assert questions is not None
|
|
65
|
+
assert len(questions) >= 3 # At least 3 failure-specific questions
|
|
66
|
+
assert any('block' in q.question_text.lower() or 'fail' in q.question_text.lower() for q in questions)
|
|
67
|
+
assert any('help' in q.question_text.lower() or 'improve' in q.question_text.lower() for q in questions)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class TestFeedbackCapture:
|
|
71
|
+
"""AC1: Feedback capture and storage"""
|
|
72
|
+
|
|
73
|
+
@pytest.fixture
|
|
74
|
+
def temp_feedback_dir(self):
|
|
75
|
+
"""Create temporary feedback directory for tests"""
|
|
76
|
+
temp_dir = tempfile.mkdtemp()
|
|
77
|
+
yield Path(temp_dir)
|
|
78
|
+
shutil.rmtree(temp_dir)
|
|
79
|
+
|
|
80
|
+
def test_capture_feedback_stores_json_correctly(self, temp_feedback_dir):
|
|
81
|
+
"""
|
|
82
|
+
GIVEN user responses to retrospective questions
|
|
83
|
+
WHEN capture_feedback is called
|
|
84
|
+
THEN feedback is stored in devforgeai/feedback/{STORY-ID}/{timestamp}-retrospective.json
|
|
85
|
+
AND returns confirmation message
|
|
86
|
+
"""
|
|
87
|
+
# Arrange
|
|
88
|
+
story_id = 'STORY-001'
|
|
89
|
+
responses = [
|
|
90
|
+
{'question_id': 'dev_success_01', 'response': 4, 'skip': False},
|
|
91
|
+
{'question_id': 'dev_success_02', 'response': 'Green', 'skip': False},
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
# Act
|
|
95
|
+
result = capture_feedback(
|
|
96
|
+
responses=responses,
|
|
97
|
+
story_id=story_id,
|
|
98
|
+
workflow_type='dev',
|
|
99
|
+
success_status='success',
|
|
100
|
+
feedback_dir=temp_feedback_dir
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Assert
|
|
104
|
+
assert result['status'] == 'recorded'
|
|
105
|
+
assert 'feedback_id' in result
|
|
106
|
+
assert 'file_path' in result
|
|
107
|
+
|
|
108
|
+
# Verify file was created
|
|
109
|
+
story_feedback_dir = temp_feedback_dir / story_id
|
|
110
|
+
assert story_feedback_dir.exists()
|
|
111
|
+
|
|
112
|
+
json_files = list(story_feedback_dir.glob('*-retrospective.json'))
|
|
113
|
+
assert len(json_files) == 1
|
|
114
|
+
|
|
115
|
+
# Verify JSON structure
|
|
116
|
+
with open(json_files[0], 'r') as f:
|
|
117
|
+
feedback_data = json.load(f)
|
|
118
|
+
|
|
119
|
+
assert feedback_data['story_id'] == story_id
|
|
120
|
+
assert feedback_data['workflow_type'] == 'dev'
|
|
121
|
+
assert feedback_data['success_status'] == 'success'
|
|
122
|
+
assert 'feedback_id' in feedback_data
|
|
123
|
+
assert 'timestamp' in feedback_data
|
|
124
|
+
assert len(feedback_data['questions']) == 2
|
|
125
|
+
assert feedback_data['metadata']['total_questions'] == 2
|
|
126
|
+
assert feedback_data['metadata']['answered'] == 2
|
|
127
|
+
assert feedback_data['metadata']['skipped'] == 0
|
|
128
|
+
|
|
129
|
+
def test_capture_feedback_validates_required_fields(self, temp_feedback_dir):
|
|
130
|
+
"""
|
|
131
|
+
GIVEN incomplete feedback (AC2: at least 2 of 5 required)
|
|
132
|
+
WHEN capture_feedback is called with only 1 substantive response
|
|
133
|
+
THEN it raises ValueError
|
|
134
|
+
"""
|
|
135
|
+
# Arrange - Only 1 substantive response, not enough
|
|
136
|
+
story_id = 'STORY-002'
|
|
137
|
+
responses = [
|
|
138
|
+
{'question_id': 'dev_failure_01', 'response': 'Git issue', 'skip': False},
|
|
139
|
+
{'question_id': 'dev_failure_02', 'response': '', 'skip': True},
|
|
140
|
+
{'question_id': 'dev_failure_03', 'response': '', 'skip': True},
|
|
141
|
+
{'question_id': 'dev_failure_04', 'response': '', 'skip': True},
|
|
142
|
+
{'question_id': 'dev_failure_05', 'response': '', 'skip': True},
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
# Act & Assert
|
|
146
|
+
with pytest.raises(ValueError, match="At least 2 of 5 questions must have substantive responses"):
|
|
147
|
+
capture_feedback(
|
|
148
|
+
responses=responses,
|
|
149
|
+
story_id=story_id,
|
|
150
|
+
workflow_type='dev',
|
|
151
|
+
success_status='failed',
|
|
152
|
+
feedback_dir=temp_feedback_dir
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def test_capture_feedback_accepts_valid_partial_completion(self, temp_feedback_dir):
|
|
156
|
+
"""
|
|
157
|
+
GIVEN partial feedback with 2 substantive responses
|
|
158
|
+
WHEN capture_feedback is called
|
|
159
|
+
THEN it accepts and stores the feedback
|
|
160
|
+
"""
|
|
161
|
+
# Arrange - 2 substantive responses (minimum required)
|
|
162
|
+
story_id = 'STORY-003'
|
|
163
|
+
responses = [
|
|
164
|
+
{'question_id': 'qa_failure_01', 'response': 'Coverage metrics unclear', 'skip': False},
|
|
165
|
+
{'question_id': 'qa_failure_02', 'response': 'Better documentation would help', 'skip': False},
|
|
166
|
+
{'question_id': 'qa_failure_03', 'response': '', 'skip': True},
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
# Act
|
|
170
|
+
result = capture_feedback(
|
|
171
|
+
responses=responses,
|
|
172
|
+
story_id=story_id,
|
|
173
|
+
workflow_type='qa',
|
|
174
|
+
success_status='failed',
|
|
175
|
+
feedback_dir=temp_feedback_dir
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Assert
|
|
179
|
+
assert result['status'] == 'recorded'
|
|
180
|
+
assert 'feedback_id' in result
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class TestUserOptOut:
|
|
184
|
+
"""AC3: User Opts Out of Feedback"""
|
|
185
|
+
|
|
186
|
+
def test_is_skip_selected_returns_true_for_skip_option(self):
|
|
187
|
+
"""
|
|
188
|
+
GIVEN user selects "Skip feedback" option
|
|
189
|
+
WHEN is_skip_selected is called
|
|
190
|
+
THEN it returns True
|
|
191
|
+
"""
|
|
192
|
+
# Arrange
|
|
193
|
+
user_response = 'Skip feedback'
|
|
194
|
+
|
|
195
|
+
# Act
|
|
196
|
+
result = is_skip_selected(user_response)
|
|
197
|
+
|
|
198
|
+
# Assert
|
|
199
|
+
assert result is True
|
|
200
|
+
|
|
201
|
+
def test_is_skip_selected_returns_true_for_decline_variations(self):
|
|
202
|
+
"""
|
|
203
|
+
GIVEN user declines feedback with various phrases
|
|
204
|
+
WHEN is_skip_selected is called
|
|
205
|
+
THEN it returns True
|
|
206
|
+
"""
|
|
207
|
+
# Arrange
|
|
208
|
+
decline_phrases = [
|
|
209
|
+
'Skip',
|
|
210
|
+
'No thanks',
|
|
211
|
+
'Not now',
|
|
212
|
+
'Later',
|
|
213
|
+
'Decline',
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
# Act & Assert
|
|
217
|
+
for phrase in decline_phrases:
|
|
218
|
+
assert is_skip_selected(phrase) is True
|
|
219
|
+
|
|
220
|
+
def test_is_skip_selected_returns_false_for_normal_responses(self):
|
|
221
|
+
"""
|
|
222
|
+
GIVEN user provides normal feedback response
|
|
223
|
+
WHEN is_skip_selected is called
|
|
224
|
+
THEN it returns False
|
|
225
|
+
"""
|
|
226
|
+
# Arrange
|
|
227
|
+
normal_responses = [
|
|
228
|
+
'The TDD workflow was helpful',
|
|
229
|
+
'4',
|
|
230
|
+
'Green phase',
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
# Act & Assert
|
|
234
|
+
for response in normal_responses:
|
|
235
|
+
assert is_skip_selected(response) is False
|
|
236
|
+
|
|
237
|
+
@pytest.fixture
|
|
238
|
+
def temp_feedback_dir(self):
|
|
239
|
+
"""Create temporary feedback directory for tests"""
|
|
240
|
+
temp_dir = tempfile.mkdtemp()
|
|
241
|
+
yield Path(temp_dir)
|
|
242
|
+
shutil.rmtree(temp_dir)
|
|
243
|
+
|
|
244
|
+
def test_capture_feedback_respects_skip_without_storing(self, temp_feedback_dir):
|
|
245
|
+
"""
|
|
246
|
+
GIVEN user skips all questions
|
|
247
|
+
WHEN capture_feedback is called
|
|
248
|
+
THEN it does NOT create feedback file
|
|
249
|
+
AND returns skip status
|
|
250
|
+
"""
|
|
251
|
+
# Arrange
|
|
252
|
+
story_id = 'STORY-004'
|
|
253
|
+
responses = [
|
|
254
|
+
{'question_id': 'dev_success_01', 'response': '', 'skip': True},
|
|
255
|
+
{'question_id': 'dev_success_02', 'response': '', 'skip': True},
|
|
256
|
+
{'question_id': 'dev_success_03', 'response': '', 'skip': True},
|
|
257
|
+
{'question_id': 'dev_success_04', 'response': '', 'skip': True},
|
|
258
|
+
]
|
|
259
|
+
|
|
260
|
+
# Act
|
|
261
|
+
result = capture_feedback(
|
|
262
|
+
responses=responses,
|
|
263
|
+
story_id=story_id,
|
|
264
|
+
workflow_type='dev',
|
|
265
|
+
success_status='success',
|
|
266
|
+
feedback_dir=temp_feedback_dir,
|
|
267
|
+
allow_skip=True
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Assert
|
|
271
|
+
assert result['status'] == 'skipped'
|
|
272
|
+
assert 'message' in result
|
|
273
|
+
assert 'thanks' in result['message'].lower()
|
|
274
|
+
|
|
275
|
+
# Verify no file was created
|
|
276
|
+
story_feedback_dir = temp_feedback_dir / story_id
|
|
277
|
+
if story_feedback_dir.exists():
|
|
278
|
+
json_files = list(story_feedback_dir.glob('*-retrospective.json'))
|
|
279
|
+
assert len(json_files) == 0
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class TestTimestampAndMetadata:
|
|
283
|
+
"""Test ISO 8601 timestamps and metadata generation"""
|
|
284
|
+
|
|
285
|
+
@pytest.fixture
|
|
286
|
+
def temp_feedback_dir(self):
|
|
287
|
+
temp_dir = tempfile.mkdtemp()
|
|
288
|
+
yield Path(temp_dir)
|
|
289
|
+
shutil.rmtree(temp_dir)
|
|
290
|
+
|
|
291
|
+
def test_capture_feedback_generates_iso8601_timestamp(self, temp_feedback_dir):
|
|
292
|
+
"""
|
|
293
|
+
GIVEN feedback capture
|
|
294
|
+
WHEN feedback is stored
|
|
295
|
+
THEN timestamp is in ISO 8601 format
|
|
296
|
+
"""
|
|
297
|
+
# Arrange
|
|
298
|
+
story_id = 'STORY-005'
|
|
299
|
+
responses = [
|
|
300
|
+
{'question_id': 'dev_success_01', 'response': 4, 'skip': False},
|
|
301
|
+
{'question_id': 'dev_success_02', 'response': 'Refactor', 'skip': False},
|
|
302
|
+
]
|
|
303
|
+
|
|
304
|
+
# Act
|
|
305
|
+
result = capture_feedback(
|
|
306
|
+
responses=responses,
|
|
307
|
+
story_id=story_id,
|
|
308
|
+
workflow_type='dev',
|
|
309
|
+
success_status='success',
|
|
310
|
+
feedback_dir=temp_feedback_dir
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Assert
|
|
314
|
+
story_feedback_dir = temp_feedback_dir / story_id
|
|
315
|
+
json_files = list(story_feedback_dir.glob('*-retrospective.json'))
|
|
316
|
+
|
|
317
|
+
with open(json_files[0], 'r') as f:
|
|
318
|
+
feedback_data = json.load(f)
|
|
319
|
+
|
|
320
|
+
# Verify ISO 8601 format
|
|
321
|
+
timestamp = feedback_data['timestamp']
|
|
322
|
+
try:
|
|
323
|
+
datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
|
324
|
+
except ValueError:
|
|
325
|
+
pytest.fail(f"Timestamp '{timestamp}' is not valid ISO 8601")
|
|
326
|
+
|
|
327
|
+
# Verify metadata completeness
|
|
328
|
+
metadata = feedback_data['metadata']
|
|
329
|
+
assert 'duration_seconds' in metadata
|
|
330
|
+
assert 'total_questions' in metadata
|
|
331
|
+
assert 'answered' in metadata
|
|
332
|
+
assert 'skipped' in metadata
|
|
333
|
+
assert metadata['total_questions'] == metadata['answered'] + metadata['skipped']
|