PyPI - ado-git-repo-insights - Versions diffs - 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl - Mend

ado-git-repo-insights 1.2.1py3-none-any.whl → 2.7.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

ado_git_repo_insights/__init__.py +3 -3
ado_git_repo_insights/cli.py +703 -354
ado_git_repo_insights/config.py +186 -186
ado_git_repo_insights/extractor/__init__.py +1 -1
ado_git_repo_insights/extractor/ado_client.py +452 -246
ado_git_repo_insights/extractor/pr_extractor.py +239 -239
ado_git_repo_insights/ml/__init__.py +13 -0
ado_git_repo_insights/ml/date_utils.py +70 -0
ado_git_repo_insights/ml/forecaster.py +288 -0
ado_git_repo_insights/ml/insights.py +497 -0
ado_git_repo_insights/persistence/__init__.py +1 -1
ado_git_repo_insights/persistence/database.py +193 -193
ado_git_repo_insights/persistence/models.py +207 -145
ado_git_repo_insights/persistence/repository.py +662 -376
ado_git_repo_insights/transform/__init__.py +1 -1
ado_git_repo_insights/transform/aggregators.py +950 -0
ado_git_repo_insights/transform/csv_generator.py +132 -132
ado_git_repo_insights/utils/__init__.py +1 -1
ado_git_repo_insights/utils/datetime_utils.py +101 -101
ado_git_repo_insights/utils/logging_config.py +172 -172
ado_git_repo_insights/utils/run_summary.py +207 -206
{ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
{ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
{ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
{ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
{ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0

ado_git_repo_insights/transform/csv_generator.py CHANGED Viewed

@@ -1,132 +1,132 @@
-"""CSV generator for PowerBI-compatible output.
-Generates CSVs that are:
-- Schema-compliant (exact columns, exact order - Invariants 1-4)
-- Deterministic (same DB → same bytes - Adjustment 3)
-"""
-from __future__ import annotations
-import logging
-from pathlib import Path
-from typing import TYPE_CHECKING
-import pandas as pd
-from ..persistence.models import CSV_SCHEMAS, SORT_KEYS
-if TYPE_CHECKING:
-    from ..persistence.database import DatabaseManager
-logger = logging.getLogger(__name__)
-class CSVGenerationError(Exception):
-    """CSV generation failed."""
-class CSVGenerator:
-    """Generates PowerBI-compatible CSV files from SQLite.
-    Invariant 1: CSV schema is a hard contract.
-    Invariant 3: CSV output must be deterministic.
-    """
-    def __init__(self, db: DatabaseManager, output_dir: Path) -> None:
-        """Initialize the CSV generator.
-        Args:
-            db: Database manager instance.
-            output_dir: Directory for CSV output files.
-        """
-        self.db = db
-        self.output_dir = output_dir
-    def generate_all(self) -> dict[str, int]:
-        """Generate all CSV files.
-        Returns:
-            Dict mapping table names to row counts.
-        Raises:
-            CSVGenerationError: If generation fails.
-        """
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-        results: dict[str, int] = {}
-        for table_name, columns in CSV_SCHEMAS.items():
-            try:
-                count = self._generate_table(table_name, columns)
-                results[table_name] = count
-                logger.info(f"Generated {table_name}.csv: {count} rows")
-            except Exception as e:
-                raise CSVGenerationError(
-                    f"Failed to generate {table_name}.csv: {e}"
-                ) from e
-        return results
-    def _generate_table(self, table_name: str, columns: list[str]) -> int:
-        """Generate a single CSV file.
-        Args:
-            table_name: Name of the table/CSV.
-            columns: Expected column order (contract).
-        Returns:
-            Number of rows written.
-        """
-        # Query the table
-        column_list = ", ".join(columns)
-        df = pd.read_sql_query(
-            f"SELECT {column_list} FROM {table_name}",  # noqa: S608
-            self.db.connection,
-        )
-        # Ensure column order matches contract exactly (Invariant 1)
-        df = df[columns]
-        # Deterministic row ordering (Adjustment 3)
-        sort_keys = SORT_KEYS.get(table_name, columns[:1])
-        df = df.sort_values(by=sort_keys, ascending=True)
-        # Write CSV with deterministic settings
-        output_path = self.output_dir / f"{table_name}.csv"
-        df.to_csv(
-            output_path,
-            index=False,
-            encoding="utf-8",
-            lineterminator="\n",  # Unix line endings for consistency
-            date_format="%Y-%m-%dT%H:%M:%S",  # Consistent datetime format
-        )
-        return len(df)
-    def validate_schemas(self) -> bool:
-        """Validate that generated CSVs match expected schemas.
-        Returns:
-            True if all schemas valid.
-        Raises:
-            CSVGenerationError: If any schema mismatch.
-        """
-        for table_name, expected_columns in CSV_SCHEMAS.items():
-            csv_path = self.output_dir / f"{table_name}.csv"
-            if not csv_path.exists():
-                raise CSVGenerationError(f"Missing CSV: {csv_path}")
-            df = pd.read_csv(csv_path, nrows=0)  # Just read headers
-            actual_columns = list(df.columns)
-            if actual_columns != expected_columns:
-                raise CSVGenerationError(
-                    f"Schema mismatch in {table_name}.csv:\n"
-                    f"  Expected: {expected_columns}\n"
-                    f"  Actual:   {actual_columns}"
-                )
-        logger.info("All CSV schemas validated successfully")
-        return True
+"""CSV generator for PowerBI-compatible output.
+Generates CSVs that are:
+- Schema-compliant (exact columns, exact order - Invariants 1-4)
+- Deterministic (same DB → same bytes - Adjustment 3)
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING
+import pandas as pd
+from ..persistence.models import CSV_SCHEMAS, SORT_KEYS
+if TYPE_CHECKING:
+    from ..persistence.database import DatabaseManager
+logger = logging.getLogger(__name__)
+class CSVGenerationError(Exception):
+    """CSV generation failed."""
+class CSVGenerator:
+    """Generates PowerBI-compatible CSV files from SQLite.
+    Invariant 1: CSV schema is a hard contract.
+    Invariant 3: CSV output must be deterministic.
+    """
+    def __init__(self, db: DatabaseManager, output_dir: Path) -> None:
+        """Initialize the CSV generator.
+        Args:
+            db: Database manager instance.
+            output_dir: Directory for CSV output files.
+        """
+        self.db = db
+        self.output_dir = output_dir
+    def generate_all(self) -> dict[str, int]:
+        """Generate all CSV files.
+        Returns:
+            Dict mapping table names to row counts.
+        Raises:
+            CSVGenerationError: If generation fails.
+        """
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        results: dict[str, int] = {}
+        for table_name, columns in CSV_SCHEMAS.items():
+            try:
+                count = self._generate_table(table_name, columns)
+                results[table_name] = count
+                logger.info(f"Generated {table_name}.csv: {count} rows")
+            except Exception as e:
+                raise CSVGenerationError(
+                    f"Failed to generate {table_name}.csv: {e}"
+                ) from e
+        return results
+    def _generate_table(self, table_name: str, columns: list[str]) -> int:
+        """Generate a single CSV file.
+        Args:
+            table_name: Name of the table/CSV.
+            columns: Expected column order (contract).
+        Returns:
+            Number of rows written.
+        """
+        # Query the table
+        column_list = ", ".join(columns)
+        df = pd.read_sql_query(
+            f"SELECT {column_list} FROM {table_name}",  # noqa: S608
+            self.db.connection,
+        )
+        # Ensure column order matches contract exactly (Invariant 1)
+        df = df[columns]
+        # Deterministic row ordering (Adjustment 3)
+        sort_keys = SORT_KEYS.get(table_name, columns[:1])
+        df = df.sort_values(by=sort_keys, ascending=True)
+        # Write CSV with deterministic settings
+        output_path = self.output_dir / f"{table_name}.csv"
+        df.to_csv(
+            output_path,
+            index=False,
+            encoding="utf-8",
+            lineterminator="\n",  # Unix line endings for consistency
+            date_format="%Y-%m-%dT%H:%M:%S",  # Consistent datetime format
+        )
+        return len(df)
+    def validate_schemas(self) -> bool:
+        """Validate that generated CSVs match expected schemas.
+        Returns:
+            True if all schemas valid.
+        Raises:
+            CSVGenerationError: If any schema mismatch.
+        """
+        for table_name, expected_columns in CSV_SCHEMAS.items():
+            csv_path = self.output_dir / f"{table_name}.csv"
+            if not csv_path.exists():
+                raise CSVGenerationError(f"Missing CSV: {csv_path}")
+            df = pd.read_csv(csv_path, nrows=0)  # Just read headers
+            actual_columns = list(df.columns)
+            if actual_columns != expected_columns:
+                raise CSVGenerationError(
+                    f"Schema mismatch in {table_name}.csv:\n"
+                    f"  Expected: {expected_columns}\n"
+                    f"  Actual:   {actual_columns}"
+                )
+        logger.info("All CSV schemas validated successfully")
+        return True

ado_git_repo_insights/utils/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- """Utilities module for shared helper functions."""
1	+ """Utilities module for shared helper functions."""

ado_git_repo_insights/utils/datetime_utils.py CHANGED Viewed

@@ -1,101 +1,101 @@
-"""Datetime utilities for ado-git-repo-insights.
-Ported from the original generate_raw_data.py to ensure identical behavior.
-"""
-from __future__ import annotations
-import logging
-from datetime import datetime
-logger = logging.getLogger(__name__)
-def parse_iso_datetime(date_str: str | None) -> datetime | None:
-    """Parse ISO 8601 datetime strings from ADO API.
-    Handles 7-digit microseconds and 'Z' suffix quirks from ADO API responses.
-    Preserved from original implementation for compatibility.
-    Args:
-        date_str: ISO 8601 datetime string, or None.
-    Returns:
-        Parsed datetime, or None if parsing fails or input is None.
-    Examples:
-        >>> parse_iso_datetime("2024-01-15T10:30:45.1234567Z")
-        datetime.datetime(2024, 1, 15, 10, 30, 45, 123456)
-        >>> parse_iso_datetime(None)
-        None
-    """
-    if not date_str:
-        return None
-    try:
-        # Remove trailing 'Z' (Zulu/UTC indicator)
-        date_str = date_str.rstrip("Z")
-        if "." in date_str:
-            # ADO API sometimes returns 7-digit microseconds, Python only supports 6
-            date_part, microseconds = date_str.split(".")
-            microseconds = microseconds[:6]  # Truncate to 6 digits
-            date_str = f"{date_part}.{microseconds}"
-            return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f")
-        else:
-            # No microseconds
-            return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
-    except ValueError as e:
-        logger.warning(f"Failed to parse date '{date_str}': {e}")
-        return None
-def calculate_cycle_time_minutes(
-    creation_date: str | None, closed_date: str | None
-) -> float | None:
-    """Calculate PR cycle time in minutes.
-    Cycle time is the duration from PR creation to closure.
-    Minimum value is 1 minute to avoid zero/negative values.
-    Args:
-        creation_date: ISO 8601 creation date string.
-        closed_date: ISO 8601 closed date string.
-    Returns:
-        Cycle time in minutes (minimum 1.0), or None if dates are invalid.
-    Examples:
-        >>> calculate_cycle_time_minutes(
-        ...     "2024-01-15T10:00:00Z",
-        ...     "2024-01-15T10:30:00Z"
-        ... )
-        30.0
-    """
-    created = parse_iso_datetime(creation_date)
-    closed = parse_iso_datetime(closed_date)
-    if created and closed:
-        delta_seconds = (closed - created).total_seconds()
-        minutes = delta_seconds / 60
-        # Minimum 1 minute, rounded to 2 decimal places
-        return max(1.0, round(minutes, 2))
-    return None
-def format_date_for_api(dt: datetime) -> str:
-    """Format a datetime for ADO API queries.
-    Args:
-        dt: Datetime to format.
-    Returns:
-        ISO 8601 formatted string with 'Z' suffix.
-    Examples:
-        >>> format_date_for_api(datetime(2024, 1, 15, 10, 30, 0))
-        '2024-01-15T10:30:00Z'
-    """
-    return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+"""Datetime utilities for ado-git-repo-insights.
+Ported from the original generate_raw_data.py to ensure identical behavior.
+"""
+from __future__ import annotations
+import logging
+from datetime import datetime
+logger = logging.getLogger(__name__)
+def parse_iso_datetime(date_str: str | None) -> datetime | None:
+    """Parse ISO 8601 datetime strings from ADO API.
+    Handles 7-digit microseconds and 'Z' suffix quirks from ADO API responses.
+    Preserved from original implementation for compatibility.
+    Args:
+        date_str: ISO 8601 datetime string, or None.
+    Returns:
+        Parsed datetime, or None if parsing fails or input is None.
+    Examples:
+        >>> parse_iso_datetime("2024-01-15T10:30:45.1234567Z")
+        datetime.datetime(2024, 1, 15, 10, 30, 45, 123456)
+        >>> parse_iso_datetime(None)
+        None
+    """
+    if not date_str:
+        return None
+    try:
+        # Remove trailing 'Z' (Zulu/UTC indicator)
+        date_str = date_str.rstrip("Z")
+        if "." in date_str:
+            # ADO API sometimes returns 7-digit microseconds, Python only supports 6
+            date_part, microseconds = date_str.split(".")
+            microseconds = microseconds[:6]  # Truncate to 6 digits
+            date_str = f"{date_part}.{microseconds}"
+            return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f")
+        else:
+            # No microseconds
+            return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
+    except ValueError as e:
+        logger.warning(f"Failed to parse date '{date_str}': {e}")
+        return None
+def calculate_cycle_time_minutes(
+    creation_date: str | None, closed_date: str | None
+) -> float | None:
+    """Calculate PR cycle time in minutes.
+    Cycle time is the duration from PR creation to closure.
+    Minimum value is 1 minute to avoid zero/negative values.
+    Args:
+        creation_date: ISO 8601 creation date string.
+        closed_date: ISO 8601 closed date string.
+    Returns:
+        Cycle time in minutes (minimum 1.0), or None if dates are invalid.
+    Examples:
+        >>> calculate_cycle_time_minutes(
+        ...     "2024-01-15T10:00:00Z",
+        ...     "2024-01-15T10:30:00Z"
+        ... )
+        30.0
+    """
+    created = parse_iso_datetime(creation_date)
+    closed = parse_iso_datetime(closed_date)
+    if created and closed:
+        delta_seconds = (closed - created).total_seconds()
+        minutes = delta_seconds / 60
+        # Minimum 1 minute, rounded to 2 decimal places
+        return max(1.0, round(minutes, 2))
+    return None
+def format_date_for_api(dt: datetime) -> str:
+    """Format a datetime for ADO API queries.
+    Args:
+        dt: Datetime to format.
+    Returns:
+        ISO 8601 formatted string with 'Z' suffix.
+    Examples:
+        >>> format_date_for_api(datetime(2024, 1, 15, 10, 30, 0))
+        '2024-01-15T10:30:00Z'
+    """
+    return dt.strftime("%Y-%m-%dT%H:%M:%SZ")

ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl

ado-git-repo-insights 1.2.1py3-none-any.whl → 2.7.4py3-none-any.whl