npm - devforgeai - Versions diffs - 1.0.4 → 1.0.6 - Mend

devforgeai 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

package/src/claude/scripts/devforgeai_cli/metrics/failure_modes.py ADDED Viewed

@@ -0,0 +1,152 @@
+"""
+Failure mode identification and ranking module.
+AC#2: Identify and rank most common failure modes from error entries.
+STORY-227: Calculate Workflow Success Metrics
+"""
+from collections import Counter
+from typing import Any, Dict, List
+# Category mapping for error types
+CATEGORY_MAP: Dict[str, str] = {
+    "test_failure": "testing",
+    "coverage_gap": "quality",
+    "validation_failure": "validation",
+    "timeout": "infrastructure",
+}
+def identify_failure_modes(error_entries: List[Dict[str, Any]]) -> List[str]:
+    """
+    Return list of unique failure modes (error_type values).
+    Args:
+        error_entries: List of error entries with 'error_type' field.
+    Returns:
+        List of unique failure mode strings.
+        Skips entries without error_type field.
+        Returns empty list for empty input.
+    """
+    if not error_entries:
+        return []
+    # Extract unique error_type values, skipping entries without the field
+    failure_modes = set()
+    for entry in error_entries:
+        error_type = entry.get("error_type")
+        if error_type is not None:
+            failure_modes.add(error_type)
+    return list(failure_modes)
+def rank_failure_modes(error_entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Return failure modes ranked by frequency (most common first).
+    Args:
+        error_entries: List of error entries with 'error_type' field.
+    Returns:
+        List of dicts, each containing:
+        - error_type: str
+        - count: int
+        - percentage: float (0-100)
+        Returns empty list for empty input.
+    """
+    if not error_entries:
+        return []
+    # Count occurrences of each error_type (skip entries without error_type)
+    error_types = [
+        entry.get("error_type")
+        for entry in error_entries
+        if entry.get("error_type") is not None
+    ]
+    if not error_types:
+        return []
+    # Count frequencies
+    counter = Counter(error_types)
+    total_errors = len(error_types)
+    # Sort by count (descending) and create result
+    ranked = []
+    for error_type, count in counter.most_common():
+        percentage = (count / total_errors) * 100.0
+        ranked.append({
+            "error_type": error_type,
+            "count": count,
+            "percentage": percentage,
+        })
+    return ranked
+def categorize_failure_mode(error_type: str) -> str:
+    """
+    Categorize error types into categories.
+    Args:
+        error_type: The error type string to categorize.
+    Returns:
+        Category string:
+        - "test_failure" -> "testing"
+        - "coverage_gap" -> "quality"
+        - "validation_failure" -> "validation"
+        - "timeout" -> "infrastructure"
+        - anything else -> "unknown"
+    """
+    return CATEGORY_MAP.get(error_type, "unknown")
+def get_failure_mode_summary(error_entries: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Return summary of failure modes.
+    Args:
+        error_entries: List of error entries with 'error_type' field.
+    Returns:
+        Dictionary containing:
+        - total_errors: int
+        - unique_failure_modes: int
+        - ranked_modes: List[Dict] (from rank_failure_modes)
+        - by_category: Dict[str, int] (count per category)
+    """
+    if not error_entries:
+        return {
+            "total_errors": 0,
+            "unique_failure_modes": 0,
+            "ranked_modes": [],
+            "by_category": {},
+        }
+    # Get failure modes and ranked list
+    failure_modes = identify_failure_modes(error_entries)
+    ranked_modes = rank_failure_modes(error_entries)
+    # Count by category
+    by_category: Dict[str, int] = {}
+    for entry in error_entries:
+        error_type = entry.get("error_type")
+        if error_type is not None:
+            category = categorize_failure_mode(error_type)
+            by_category[category] = by_category.get(category, 0) + 1
+    # Count total errors (only entries with error_type)
+    total_errors = sum(
+        1 for entry in error_entries if entry.get("error_type") is not None
+    )
+    return {
+        "total_errors": total_errors,
+        "unique_failure_modes": len(failure_modes),
+        "ranked_modes": ranked_modes,
+        "by_category": by_category,
+    }

package/src/claude/scripts/devforgeai_cli/metrics/story_segmentation.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""
+Story point segmentation and averages module.
+AC#3: Segment metrics by story points (1, 2, 3, 5, 8 - Fibonacci-based).
+STORY-227: Calculate Workflow Success Metrics
+"""
+from typing import Any, Dict, List, Optional
+# Valid Fibonacci-based story points
+VALID_STORY_POINTS: List[int] = [1, 2, 3, 5, 8]
+def get_valid_story_points() -> List[int]:
+    """
+    Return the list of valid story point values.
+    Returns:
+        [1, 2, 3, 5, 8] (Fibonacci-based story points)
+    """
+    return VALID_STORY_POINTS.copy()
+def is_valid_story_point(point: Any) -> bool:
+    """
+    Check if a value is a valid story point.
+    Args:
+        point: The value to check.
+    Returns:
+        True if point is in [1, 2, 3, 5, 8], False otherwise.
+        Returns False for None.
+    """
+    if point is None:
+        return False
+    return point in VALID_STORY_POINTS
+def segment_metrics_by_story_points(
+    workflow_metrics: List[Dict[str, Any]]
+) -> Dict[int, List[Dict[str, Any]]]:
+    """
+    Segment metrics by story points.
+    Args:
+        workflow_metrics: List of workflow metric entries with 'story_points' field.
+    Returns:
+        Dictionary with valid story points as keys:
+        {
+            1: [stories with 1 point...],
+            2: [stories with 2 points...],
+            3: [],  # empty if no stories
+            5: [],
+            8: []
+        }
+        - Only includes valid points (1, 2, 3, 5, 8)
+        - Excludes stories with missing/invalid story_points
+        - Returns structure with empty lists for empty input
+    """
+    # Initialize result with empty lists for each valid point
+    result: Dict[int, List[Dict[str, Any]]] = {
+        point: [] for point in VALID_STORY_POINTS
+    }
+    if not workflow_metrics:
+        return result
+    for metric in workflow_metrics:
+        story_points = metric.get("story_points")
+        # Skip if story_points is missing, None, or invalid
+        if not is_valid_story_point(story_points):
+            continue
+        # Add to appropriate segment
+        result[story_points].append(metric)
+    return result
+def calculate_segment_averages(
+    segments: Dict[int, List[Dict[str, Any]]]
+) -> Dict[int, Dict[str, Optional[float]]]:
+    """
+    Calculate averages per segment.
+    Args:
+        segments: Dictionary of story point segments from segment_metrics_by_story_points.
+    Returns:
+        Dictionary with averages per segment:
+        {
+            1: {"avg_completion_rate": float, "avg_error_rate": float},
+            2: {...},
+            ...
+        }
+        - Returns 0.0 for empty segments
+    """
+    result: Dict[int, Dict[str, Optional[float]]] = {}
+    for point in VALID_STORY_POINTS:
+        stories = segments.get(point, [])
+        if not stories:
+            result[point] = {
+                "avg_completion_rate": 0.0,
+                "avg_error_rate": 0.0,
+            }
+        else:
+            # Calculate averages
+            total_completion = sum(
+                story.get("completion_rate", 0.0) for story in stories
+            )
+            total_error = sum(
+                story.get("error_rate", 0.0) for story in stories
+            )
+            avg_completion = total_completion / len(stories)
+            avg_error = total_error / len(stories)
+            # Round to 2 decimal places
+            result[point] = {
+                "avg_completion_rate": round(avg_completion, 2),
+                "avg_error_rate": round(avg_error, 2),
+            }
+    return result
+def get_segmentation_summary(
+    workflow_metrics: List[Dict[str, Any]]
+) -> Dict[str, Any]:
+    """
+    Return summary of story point segmentation.
+    Args:
+        workflow_metrics: List of workflow metric entries.
+    Returns:
+        Dictionary containing:
+        - total_stories: int (all stories including invalid)
+        - segmented_stories: int (stories with valid points)
+        - excluded_stories: int (stories without valid points)
+        - segments: Dict[int, List[Dict]] (from segment_metrics_by_story_points)
+        - averages_by_segment: Dict[int, Dict] (from calculate_segment_averages)
+    """
+    if not workflow_metrics:
+        return {
+            "total_stories": 0,
+            "segmented_stories": 0,
+            "excluded_stories": 0,
+            "segments": {point: [] for point in VALID_STORY_POINTS},
+            "averages_by_segment": {
+                point: {"avg_completion_rate": 0.0, "avg_error_rate": 0.0}
+                for point in VALID_STORY_POINTS
+            },
+        }
+    # Segment the metrics
+    segments = segment_metrics_by_story_points(workflow_metrics)
+    # Count segmented stories
+    segmented_count = sum(len(stories) for stories in segments.values())
+    # Calculate total and excluded
+    total_stories = len(workflow_metrics)
+    excluded_stories = total_stories - segmented_count
+    # Calculate averages
+    averages = calculate_segment_averages(segments)
+    return {
+        "total_stories": total_stories,
+        "segmented_stories": segmented_count,
+        "excluded_stories": excluded_stories,
+        "segments": segments,
+        "averages_by_segment": averages,
+    }