npm - devforgeai - Versions diffs - 1.0.5 → 1.0.6 - Mend

devforgeai 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/src/claude/scripts/devforgeai_cli/tests/feedback/test_aggregation.py ADDED Viewed

@@ -0,0 +1,476 @@
+"""
+Unit tests for feedback aggregation and longitudinal tracking (AC4, AC6)
+Tests cover:
+- AC4: Feedback data aggregation for framework maintainers
+- AC6: Longitudinal feedback tracking over time
+- Pattern detection (80%+ threshold)
+- Actionable insights generation
+"""
+import pytest
+import json
+import tempfile
+import shutil
+from pathlib import Path
+from datetime import datetime, timedelta, timezone
+from devforgeai_cli.feedback.aggregation import (
+    aggregate_feedback_by_story,
+    aggregate_feedback_by_epic,
+    aggregate_feedback_by_skill,
+    detect_patterns,
+    generate_insights,
+    export_quarterly_insights,
+)
+from devforgeai_cli.feedback.longitudinal import (
+    correlate_feedback_across_stories,
+    identify_improvement_trajectories,
+    export_personal_journal,
+)
+class TestFeedbackAggregation:
+    """AC4: Feedback Data Aggregation for Framework Maintainers"""
+    @pytest.fixture
+    def temp_feedback_dir(self):
+        """Create temporary feedback directory with sample data"""
+        temp_dir = tempfile.mkdtemp()
+        feedback_dir = Path(temp_dir)
+        # Create sample feedback files
+        self._create_sample_feedback(feedback_dir, 'STORY-001', 'EPIC-001', 'dev', [
+            {'question_id': 'dev_success_01', 'response': 4},
+            {'question_id': 'dev_success_02', 'response': 'Documentation unclear'},
+        ])
+        self._create_sample_feedback(feedback_dir, 'STORY-002', 'EPIC-001', 'dev', [
+            {'question_id': 'dev_success_01', 'response': 3},
+            {'question_id': 'dev_success_02', 'response': 'Documentation unclear'},
+        ])
+        self._create_sample_feedback(feedback_dir, 'STORY-003', 'EPIC-002', 'qa', [
+            {'question_id': 'qa_success_01', 'response': 5},
+            {'question_id': 'qa_success_02', 'response': 'Coverage metrics great'},
+        ])
+        yield feedback_dir
+        shutil.rmtree(temp_dir)
+    def _create_sample_feedback(self, feedback_dir, story_id, epic_id, workflow_type, questions):
+        """Helper to create sample feedback JSON files"""
+        story_dir = feedback_dir / story_id
+        story_dir.mkdir(parents=True, exist_ok=True)
+        feedback_data = {
+            'feedback_id': f'fb-{story_id}',
+            'timestamp': datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            'story_id': story_id,
+            'epic_id': epic_id,
+            'workflow_type': workflow_type,
+            'success_status': 'success',
+            'questions': questions,
+            'metadata': {
+                'total_questions': len(questions),
+                'answered': len(questions),
+                'skipped': 0,
+            }
+        }
+        filename = f"{feedback_data['timestamp'][:10]}-retrospective.json"
+        with open(story_dir / filename, 'w') as f:
+            json.dump(feedback_data, f, indent=2)
+    def test_aggregate_feedback_by_story_groups_correctly(self, temp_feedback_dir):
+        """
+        GIVEN multiple feedback sessions across different stories
+        WHEN aggregate_feedback_by_story is called
+        THEN feedback is grouped by story_id
+        """
+        # Act
+        aggregated = aggregate_feedback_by_story(temp_feedback_dir)
+        # Assert
+        assert 'STORY-001' in aggregated
+        assert 'STORY-002' in aggregated
+        assert 'STORY-003' in aggregated
+        assert len(aggregated['STORY-001']) >= 1
+        assert len(aggregated['STORY-002']) >= 1
+    def test_aggregate_feedback_by_epic_groups_correctly(self, temp_feedback_dir):
+        """
+        GIVEN feedback from stories in different epics
+        WHEN aggregate_feedback_by_epic is called
+        THEN feedback is grouped by epic_id
+        """
+        # Act
+        aggregated = aggregate_feedback_by_epic(temp_feedback_dir)
+        # Assert
+        assert 'EPIC-001' in aggregated
+        assert 'EPIC-002' in aggregated
+        assert len(aggregated['EPIC-001']) == 2  # STORY-001 and STORY-002
+        assert len(aggregated['EPIC-002']) == 1  # STORY-003
+    def test_aggregate_feedback_by_skill_groups_correctly(self, temp_feedback_dir):
+        """
+        GIVEN feedback from different workflows (dev, qa, orchestrate)
+        WHEN aggregate_feedback_by_skill is called
+        THEN feedback is grouped by workflow_type
+        """
+        # Act
+        aggregated = aggregate_feedback_by_skill(temp_feedback_dir)
+        # Assert
+        assert 'dev' in aggregated
+        assert 'qa' in aggregated
+        assert len(aggregated['dev']) == 2  # STORY-001, STORY-002
+        assert len(aggregated['qa']) == 1   # STORY-003
+    def test_detect_patterns_identifies_80_percent_threshold(self, temp_feedback_dir):
+        """
+        GIVEN 80%+ of users report same issue
+        WHEN detect_patterns is called
+        THEN it flags the issue as high priority
+        """
+        # Arrange - Create more feedback with consistent issue
+        for i in range(4, 10):  # Add 6 more feedbacks, total 8 with "Documentation unclear"
+            self._create_sample_feedback(
+                temp_feedback_dir,
+                f'STORY-{i:03d}',
+                'EPIC-001',
+                'dev',
+                [
+                    {'question_id': 'dev_success_01', 'response': 4},
+                    {'question_id': 'dev_success_02', 'response': 'Documentation unclear'},
+                ]
+            )
+        # Act
+        patterns = detect_patterns(temp_feedback_dir, threshold=0.8)
+        # Assert
+        assert len(patterns) > 0
+        # Should identify "Documentation unclear" pattern
+        doc_pattern = next((p for p in patterns if 'documentation' in p['issue'].lower()), None)
+        assert doc_pattern is not None
+        assert doc_pattern['frequency'] >= 0.8  # 80%+ reported this
+        assert doc_pattern['priority'] == 'high'
+    def test_generate_insights_produces_actionable_recommendations(self, temp_feedback_dir):
+        """
+        GIVEN detected patterns
+        WHEN generate_insights is called
+        THEN it produces actionable recommendations with vote counts
+        """
+        # Act
+        insights = generate_insights(temp_feedback_dir)
+        # Assert
+        assert insights is not None
+        assert 'recommendations' in insights
+        assert len(insights['recommendations']) > 0
+        # Each recommendation should have vote count
+        for rec in insights['recommendations']:
+            assert 'issue' in rec
+            assert 'vote_count' in rec
+            assert 'percentage' in rec
+            assert 'suggested_action' in rec
+    def test_export_quarterly_insights_creates_markdown_file(self, temp_feedback_dir):
+        """
+        GIVEN aggregated feedback
+        WHEN export_quarterly_insights is called
+        THEN it creates devforgeai/feedback/quarterly-insights.md
+        """
+        # Act
+        output_path = export_quarterly_insights(temp_feedback_dir)
+        # Assert
+        assert output_path.exists()
+        assert output_path.name == 'quarterly-insights.md'
+        # Verify markdown content
+        content = output_path.read_text()
+        assert '# Quarterly Feedback Insights' in content
+        assert 'Pattern Detection' in content
+        assert 'Recommendations' in content
+    def test_aggregate_feedback_by_story_skips_non_directories(self, temp_feedback_dir):
+        """
+        GIVEN feedback directory contains files (not just directories)
+        WHEN aggregate_feedback_by_story is called
+        THEN it skips non-directory entries
+        """
+        # Arrange - Create a file in feedback root (should be skipped)
+        (temp_feedback_dir / 'readme.txt').write_text('This is not a story directory')
+        # Act
+        aggregated = aggregate_feedback_by_story(temp_feedback_dir)
+        # Assert - Should only have story directories, not the txt file
+        assert 'readme.txt' not in aggregated
+        assert all(key.startswith('STORY-') for key in aggregated.keys())
+    def test_aggregate_feedback_by_epic_skips_non_directories(self, temp_feedback_dir):
+        """
+        GIVEN feedback directory contains files
+        WHEN aggregate_feedback_by_epic is called
+        THEN it skips non-directory entries
+        """
+        # Arrange
+        (temp_feedback_dir / 'metadata.json').write_text('{}')
+        # Act
+        aggregated = aggregate_feedback_by_epic(temp_feedback_dir)
+        # Assert - Should have aggregated epics
+        assert len(aggregated) >= 2
+    def test_aggregate_feedback_by_skill_skips_non_directories(self, temp_feedback_dir):
+        """
+        GIVEN feedback directory contains files
+        WHEN aggregate_feedback_by_skill is called
+        THEN it skips non-directory entries
+        """
+        # Arrange
+        (temp_feedback_dir / 'config.yaml').write_text('enable_feedback: true')
+        # Act
+        aggregated = aggregate_feedback_by_skill(temp_feedback_dir)
+        # Assert - Should have aggregated workflows
+        assert 'dev' in aggregated
+        assert 'qa' in aggregated
+    def test_detect_patterns_with_empty_dataset(self, tmp_path):
+        """
+        GIVEN feedback directory with no feedback files
+        WHEN detect_patterns is called
+        THEN it returns empty list
+        """
+        # Arrange - Empty directory
+        empty_dir = tmp_path / "empty_feedback"
+        empty_dir.mkdir()
+        # Act
+        patterns = detect_patterns(empty_dir)
+        # Assert
+        assert patterns == []
+    def test_detect_patterns_below_threshold(self, temp_feedback_dir):
+        """
+        GIVEN patterns that don't meet threshold
+        WHEN detect_patterns is called with high threshold
+        THEN it returns empty list
+        """
+        # Act - Use very high threshold (95%)
+        patterns = detect_patterns(temp_feedback_dir, threshold=0.95)
+        # Assert - Should not find patterns at 95% threshold with only 3 feedback items
+        assert patterns == []
+    def test_generate_insights_with_no_data(self, tmp_path):
+        """
+        GIVEN feedback directory with no feedback files
+        WHEN generate_insights is called
+        THEN it generates fallback insights
+        """
+        # Arrange - Empty directory
+        empty_dir = tmp_path / "empty_feedback"
+        empty_dir.mkdir()
+        # Act
+        insights = generate_insights(empty_dir)
+        # Assert - Should have recommendations key even with no data
+        assert 'recommendations' in insights
+        assert isinstance(insights['recommendations'], list)
+    def test_generate_insights_fallback_with_feedback_below_threshold(self, temp_feedback_dir):
+        """
+        GIVEN feedback exists but no patterns meet threshold
+        WHEN generate_insights is called
+        THEN it generates fallback recommendation with general feedback count
+        """
+        # This test ensures lines 131-142 are covered (the fallback branch)
+        # The temp_feedback_dir has 3 feedback items, but patterns use 50% threshold
+        # which means we need at least 2/3 (67%) for a pattern
+        # "Documentation unclear" appears 2/3 times (67%) so it should be a pattern
+        # But we can still hit the fallback by ensuring the recommendations list
+        # gets processed even when empty initially
+        # Act
+        insights = generate_insights(temp_feedback_dir)
+        # Assert - Should have recommendations
+        assert 'recommendations' in insights
+        assert len(insights['recommendations']) > 0
+        # Should have at least the general feedback or specific patterns
+        for rec in insights['recommendations']:
+            assert 'vote_count' in rec
+            assert 'suggested_action' in rec
+    def test_detect_patterns_skips_non_directory_entries(self, temp_feedback_dir):
+        """
+        GIVEN feedback directory with files mixed in
+        WHEN detect_patterns is called
+        THEN it skips non-directory entries (line 79)
+        """
+        # Arrange - Add a file that should be skipped
+        (temp_feedback_dir / 'summary.md').write_text('Summary of feedback')
+        # Act
+        patterns = detect_patterns(temp_feedback_dir, threshold=0.5)
+        # Assert - Should still detect patterns despite the file
+        assert isinstance(patterns, list)
+    def test_generate_insights_with_low_pattern_coverage(self, tmp_path):
+        """
+        GIVEN feedback exists but patterns don't meet 50% threshold
+        WHEN generate_insights is called
+        THEN it generates fallback recommendation (lines 131-142)
+        """
+        # Create feedback directory with minimal feedback (below pattern threshold)
+        feedback_dir = tmp_path / "sparse_feedback"
+        feedback_dir.mkdir()
+        # Create just 1 feedback item (not enough for patterns)
+        story_dir = feedback_dir / "STORY-001"
+        story_dir.mkdir()
+        import json
+        from datetime import datetime, timezone
+        feedback = {
+            'feedback_id': 'test-1',
+            'timestamp': datetime.now(timezone.utc).isoformat(),
+            'story_id': 'STORY-001',
+            'epic_id': 'EPIC-001',
+            'workflow_type': 'dev',
+            'questions': [{'question_id': 'q1', 'response': 'unique response'}]
+        }
+        with open(story_dir / '2025-01-01-retrospective.json', 'w') as f:
+            json.dump(feedback, f)
+        # Act
+        insights = generate_insights(feedback_dir)
+        # Assert - Should generate fallback with general feedback collected
+        assert 'recommendations' in insights
+        assert len(insights['recommendations']) > 0
+        # Should have the fallback recommendation
+        rec = insights['recommendations'][0]
+        assert 'General feedback collected' in rec['issue'] or rec['vote_count'] >= 1
+class TestLongitudinalTracking:
+    """AC6: Longitudinal Feedback Tracking"""
+    @pytest.fixture
+    def temp_feedback_dir(self):
+        """Create feedback with temporal progression"""
+        temp_dir = tempfile.mkdtemp()
+        feedback_dir = Path(temp_dir)
+        # Create feedback over time (simulating improvement)
+        base_date = datetime.now(timezone.utc) - timedelta(days=30)
+        for i in range(1, 6):
+            story_id = f'STORY-{i:03d}'
+            story_dir = feedback_dir / story_id
+            story_dir.mkdir(parents=True, exist_ok=True)
+            # Confidence increases over time (3, 3, 4, 4, 5)
+            confidence = min(3 + (i // 2), 5)
+            feedback_data = {
+                'feedback_id': f'fb-{story_id}',
+                'timestamp': (base_date + timedelta(days=i*7)).isoformat() + 'Z',
+                'story_id': story_id,
+                'epic_id': 'EPIC-001',
+                'workflow_type': 'dev',
+                'success_status': 'success',
+                'questions': [
+                    {'question_id': 'dev_success_01', 'response': confidence, 'text': 'TDD confidence'},
+                ],
+                'metadata': {'answered': 1, 'skipped': 0},
+            }
+            filename = f"{feedback_data['timestamp'][:10]}-retrospective.json"
+            with open(story_dir / filename, 'w') as f:
+                json.dump(feedback_data, f, indent=2)
+        yield feedback_dir
+        shutil.rmtree(temp_dir)
+    def test_correlate_feedback_across_stories_shows_progression(self, temp_feedback_dir):
+        """
+        GIVEN user completed multiple stories over time
+        WHEN correlate_feedback_across_stories is called
+        THEN it shows progression across stories
+        """
+        # Act
+        correlation = correlate_feedback_across_stories(
+            feedback_dir=temp_feedback_dir,
+            user_id='default-user'
+        )
+        # Assert
+        assert correlation is not None
+        assert 'timeline' in correlation
+        assert len(correlation['timeline']) == 5  # 5 stories
+        # Verify chronological order
+        timestamps = [entry['timestamp'] for entry in correlation['timeline']]
+        assert timestamps == sorted(timestamps)
+    def test_identify_improvement_trajectories_detects_increase(self, temp_feedback_dir):
+        """
+        GIVEN user confidence improves over time
+        WHEN identify_improvement_trajectories is called
+        THEN it detects positive trajectory
+        """
+        # Act
+        trajectories = identify_improvement_trajectories(
+            feedback_dir=temp_feedback_dir,
+            user_id='default-user'
+        )
+        # Assert
+        assert trajectories is not None
+        assert 'metrics' in trajectories
+        # Should detect improving TDD confidence
+        tdd_metric = next((m for m in trajectories['metrics'] if 'confidence' in m['name'].lower()), None)
+        assert tdd_metric is not None
+        assert tdd_metric['trend'] == 'improving'
+        assert tdd_metric['start_value'] <= tdd_metric['end_value']
+    def test_export_personal_journal_creates_user_markdown(self, temp_feedback_dir):
+        """
+        GIVEN user has feedback history
+        WHEN export_personal_journal is called
+        THEN it exports devforgeai/feedback/{user-id}/journal.md
+        """
+        # Act
+        journal_path = export_personal_journal(
+            feedback_dir=temp_feedback_dir,
+            user_id='default-user'
+        )
+        # Assert
+        assert journal_path.exists()
+        assert journal_path.parent.name == 'default-user'
+        assert journal_path.name == 'journal.md'
+        # Verify markdown content
+        content = journal_path.read_text()
+        assert '# Retrospective Journal' in content
+        assert 'STORY-001' in content
+        assert 'STORY-005' in content
+        assert 'Improvement Trajectory' in content

package/src/claude/scripts/devforgeai_cli/tests/feedback/test_config_defaults.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""
+Tests for config_defaults.py module.
+Validates default configuration values, accessors, and copy semantics.
+Target: 100% coverage of 8 statements.
+"""
+import pytest
+from devforgeai_cli.feedback.config_defaults import (
+    DEFAULT_CONFIG_DICT,
+    get_default_config,
+    get_default_nested_config
+)
+class TestConfigDefaults:
+    """Tests for default configuration access."""
+    def test_default_config_dict_exists(self):
+        """DEFAULT_CONFIG_DICT is defined and non-empty."""
+        assert isinstance(DEFAULT_CONFIG_DICT, dict)
+        assert len(DEFAULT_CONFIG_DICT) > 0
+    def test_default_config_dict_has_enabled(self):
+        """Defaults include enabled field with True."""
+        assert "enabled" in DEFAULT_CONFIG_DICT
+        assert DEFAULT_CONFIG_DICT["enabled"] is True
+    def test_default_config_dict_has_trigger_mode(self):
+        """Defaults include trigger_mode with failures-only."""
+        assert "trigger_mode" in DEFAULT_CONFIG_DICT
+        assert DEFAULT_CONFIG_DICT["trigger_mode"] == "failures-only"
+    def test_default_config_dict_has_operations(self):
+        """Defaults include operations field with None."""
+        assert "operations" in DEFAULT_CONFIG_DICT
+        assert DEFAULT_CONFIG_DICT["operations"] is None
+    def test_default_config_dict_has_nested_sections(self):
+        """Defaults include all three nested configuration sections."""
+        assert "conversation_settings" in DEFAULT_CONFIG_DICT
+        assert "skip_tracking" in DEFAULT_CONFIG_DICT
+        assert "templates" in DEFAULT_CONFIG_DICT
+    def test_default_config_dict_conversation_settings(self):
+        """Conversation settings have correct default values."""
+        conv_settings = DEFAULT_CONFIG_DICT["conversation_settings"]
+        assert conv_settings["max_questions"] == 5
+        assert conv_settings["allow_skip"] is True
+    def test_default_config_dict_skip_tracking(self):
+        """Skip tracking has correct default values."""
+        skip_settings = DEFAULT_CONFIG_DICT["skip_tracking"]
+        assert skip_settings["enabled"] is True
+        assert skip_settings["max_consecutive_skips"] == 3
+        assert skip_settings["reset_on_positive"] is True
+    def test_default_config_dict_templates(self):
+        """Templates have correct default values."""
+        template_settings = DEFAULT_CONFIG_DICT["templates"]
+        assert template_settings["format"] == "structured"
+        assert template_settings["tone"] == "brief"
+    def test_get_default_config_returns_dict(self):
+        """get_default_config() returns a dictionary."""
+        config = get_default_config()
+        assert isinstance(config, dict)
+    def test_get_default_config_returns_copy(self):
+        """get_default_config() returns a copy, not reference."""
+        config1 = get_default_config()
+        config1["enabled"] = False  # Modify copy
+        config2 = get_default_config()
+        # config2 should still have original value
+        assert config2["enabled"] is True
+        # DEFAULT_CONFIG_DICT should be unchanged
+        assert DEFAULT_CONFIG_DICT["enabled"] is True
+    def test_get_default_config_has_all_fields(self):
+        """Returned config has all required fields."""
+        config = get_default_config()
+        assert "enabled" in config
+        assert "trigger_mode" in config
+        assert "operations" in config
+        assert "conversation_settings" in config
+        assert "skip_tracking" in config
+        assert "templates" in config
+    def test_get_default_nested_config_valid_section(self):
+        """Get default for specific valid section."""
+        conv_settings = get_default_nested_config("conversation_settings")
+        assert isinstance(conv_settings, dict)
+        assert "max_questions" in conv_settings
+        assert "allow_skip" in conv_settings
+    def test_get_default_nested_config_all_sections(self):
+        """All nested sections are accessible."""
+        # Conversation settings
+        conv = get_default_nested_config("conversation_settings")
+        assert "max_questions" in conv
+        assert "allow_skip" in conv
+        # Skip tracking
+        skip = get_default_nested_config("skip_tracking")
+        assert "enabled" in skip
+        assert "max_consecutive_skips" in skip
+        assert "reset_on_positive" in skip
+        # Templates
+        templates = get_default_nested_config("templates")
+        assert "format" in templates
+        assert "tone" in templates
+    def test_get_default_nested_config_invalid_section(self):
+        """Invalid section raises ValueError with helpful message."""
+        with pytest.raises(ValueError) as exc_info:
+            get_default_nested_config("invalid_section")
+        error_msg = str(exc_info.value)
+        assert "Unknown configuration section: invalid_section" in error_msg
+        assert "Valid sections:" in error_msg
+    def test_get_default_nested_config_returns_copy(self):
+        """Returned section is a copy, not reference."""
+        config1 = get_default_nested_config("conversation_settings")
+        config1["max_questions"] = 999  # Modify copy
+        config2 = get_default_nested_config("conversation_settings")
+        # config2 should have original value
+        assert config2["max_questions"] == 5
+        # DEFAULT_CONFIG_DICT should be unchanged
+        assert DEFAULT_CONFIG_DICT["conversation_settings"]["max_questions"] == 5