ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ado_git_repo_insights/__init__.py +3 -3
  2. ado_git_repo_insights/cli.py +703 -354
  3. ado_git_repo_insights/config.py +186 -186
  4. ado_git_repo_insights/extractor/__init__.py +1 -1
  5. ado_git_repo_insights/extractor/ado_client.py +452 -246
  6. ado_git_repo_insights/extractor/pr_extractor.py +239 -239
  7. ado_git_repo_insights/ml/__init__.py +13 -0
  8. ado_git_repo_insights/ml/date_utils.py +70 -0
  9. ado_git_repo_insights/ml/forecaster.py +288 -0
  10. ado_git_repo_insights/ml/insights.py +497 -0
  11. ado_git_repo_insights/persistence/__init__.py +1 -1
  12. ado_git_repo_insights/persistence/database.py +193 -193
  13. ado_git_repo_insights/persistence/models.py +207 -145
  14. ado_git_repo_insights/persistence/repository.py +662 -376
  15. ado_git_repo_insights/transform/__init__.py +1 -1
  16. ado_git_repo_insights/transform/aggregators.py +950 -0
  17. ado_git_repo_insights/transform/csv_generator.py +132 -132
  18. ado_git_repo_insights/utils/__init__.py +1 -1
  19. ado_git_repo_insights/utils/datetime_utils.py +101 -101
  20. ado_git_repo_insights/utils/logging_config.py +172 -172
  21. ado_git_repo_insights/utils/run_summary.py +207 -206
  22. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
  23. ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
  24. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
  25. ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
  26. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
  27. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
  28. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
@@ -1,132 +1,132 @@
1
- """CSV generator for PowerBI-compatible output.
2
-
3
- Generates CSVs that are:
4
- - Schema-compliant (exact columns, exact order - Invariants 1-4)
5
- - Deterministic (same DB → same bytes - Adjustment 3)
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import logging
11
- from pathlib import Path
12
- from typing import TYPE_CHECKING
13
-
14
- import pandas as pd
15
-
16
- from ..persistence.models import CSV_SCHEMAS, SORT_KEYS
17
-
18
- if TYPE_CHECKING:
19
- from ..persistence.database import DatabaseManager
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class CSVGenerationError(Exception):
25
- """CSV generation failed."""
26
-
27
-
28
- class CSVGenerator:
29
- """Generates PowerBI-compatible CSV files from SQLite.
30
-
31
- Invariant 1: CSV schema is a hard contract.
32
- Invariant 3: CSV output must be deterministic.
33
- """
34
-
35
- def __init__(self, db: DatabaseManager, output_dir: Path) -> None:
36
- """Initialize the CSV generator.
37
-
38
- Args:
39
- db: Database manager instance.
40
- output_dir: Directory for CSV output files.
41
- """
42
- self.db = db
43
- self.output_dir = output_dir
44
-
45
- def generate_all(self) -> dict[str, int]:
46
- """Generate all CSV files.
47
-
48
- Returns:
49
- Dict mapping table names to row counts.
50
-
51
- Raises:
52
- CSVGenerationError: If generation fails.
53
- """
54
- self.output_dir.mkdir(parents=True, exist_ok=True)
55
-
56
- results: dict[str, int] = {}
57
-
58
- for table_name, columns in CSV_SCHEMAS.items():
59
- try:
60
- count = self._generate_table(table_name, columns)
61
- results[table_name] = count
62
- logger.info(f"Generated {table_name}.csv: {count} rows")
63
- except Exception as e:
64
- raise CSVGenerationError(
65
- f"Failed to generate {table_name}.csv: {e}"
66
- ) from e
67
-
68
- return results
69
-
70
- def _generate_table(self, table_name: str, columns: list[str]) -> int:
71
- """Generate a single CSV file.
72
-
73
- Args:
74
- table_name: Name of the table/CSV.
75
- columns: Expected column order (contract).
76
-
77
- Returns:
78
- Number of rows written.
79
- """
80
- # Query the table
81
- column_list = ", ".join(columns)
82
- df = pd.read_sql_query(
83
- f"SELECT {column_list} FROM {table_name}", # noqa: S608
84
- self.db.connection,
85
- )
86
-
87
- # Ensure column order matches contract exactly (Invariant 1)
88
- df = df[columns]
89
-
90
- # Deterministic row ordering (Adjustment 3)
91
- sort_keys = SORT_KEYS.get(table_name, columns[:1])
92
- df = df.sort_values(by=sort_keys, ascending=True)
93
-
94
- # Write CSV with deterministic settings
95
- output_path = self.output_dir / f"{table_name}.csv"
96
- df.to_csv(
97
- output_path,
98
- index=False,
99
- encoding="utf-8",
100
- lineterminator="\n", # Unix line endings for consistency
101
- date_format="%Y-%m-%dT%H:%M:%S", # Consistent datetime format
102
- )
103
-
104
- return len(df)
105
-
106
- def validate_schemas(self) -> bool:
107
- """Validate that generated CSVs match expected schemas.
108
-
109
- Returns:
110
- True if all schemas valid.
111
-
112
- Raises:
113
- CSVGenerationError: If any schema mismatch.
114
- """
115
- for table_name, expected_columns in CSV_SCHEMAS.items():
116
- csv_path = self.output_dir / f"{table_name}.csv"
117
-
118
- if not csv_path.exists():
119
- raise CSVGenerationError(f"Missing CSV: {csv_path}")
120
-
121
- df = pd.read_csv(csv_path, nrows=0) # Just read headers
122
- actual_columns = list(df.columns)
123
-
124
- if actual_columns != expected_columns:
125
- raise CSVGenerationError(
126
- f"Schema mismatch in {table_name}.csv:\n"
127
- f" Expected: {expected_columns}\n"
128
- f" Actual: {actual_columns}"
129
- )
130
-
131
- logger.info("All CSV schemas validated successfully")
132
- return True
1
+ """CSV generator for PowerBI-compatible output.
2
+
3
+ Generates CSVs that are:
4
+ - Schema-compliant (exact columns, exact order - Invariants 1-4)
5
+ - Deterministic (same DB → same bytes - Adjustment 3)
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING
13
+
14
+ import pandas as pd
15
+
16
+ from ..persistence.models import CSV_SCHEMAS, SORT_KEYS
17
+
18
+ if TYPE_CHECKING:
19
+ from ..persistence.database import DatabaseManager
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class CSVGenerationError(Exception):
25
+ """CSV generation failed."""
26
+
27
+
28
+ class CSVGenerator:
29
+ """Generates PowerBI-compatible CSV files from SQLite.
30
+
31
+ Invariant 1: CSV schema is a hard contract.
32
+ Invariant 3: CSV output must be deterministic.
33
+ """
34
+
35
+ def __init__(self, db: DatabaseManager, output_dir: Path) -> None:
36
+ """Initialize the CSV generator.
37
+
38
+ Args:
39
+ db: Database manager instance.
40
+ output_dir: Directory for CSV output files.
41
+ """
42
+ self.db = db
43
+ self.output_dir = output_dir
44
+
45
+ def generate_all(self) -> dict[str, int]:
46
+ """Generate all CSV files.
47
+
48
+ Returns:
49
+ Dict mapping table names to row counts.
50
+
51
+ Raises:
52
+ CSVGenerationError: If generation fails.
53
+ """
54
+ self.output_dir.mkdir(parents=True, exist_ok=True)
55
+
56
+ results: dict[str, int] = {}
57
+
58
+ for table_name, columns in CSV_SCHEMAS.items():
59
+ try:
60
+ count = self._generate_table(table_name, columns)
61
+ results[table_name] = count
62
+ logger.info(f"Generated {table_name}.csv: {count} rows")
63
+ except Exception as e:
64
+ raise CSVGenerationError(
65
+ f"Failed to generate {table_name}.csv: {e}"
66
+ ) from e
67
+
68
+ return results
69
+
70
+ def _generate_table(self, table_name: str, columns: list[str]) -> int:
71
+ """Generate a single CSV file.
72
+
73
+ Args:
74
+ table_name: Name of the table/CSV.
75
+ columns: Expected column order (contract).
76
+
77
+ Returns:
78
+ Number of rows written.
79
+ """
80
+ # Query the table
81
+ column_list = ", ".join(columns)
82
+ df = pd.read_sql_query(
83
+ f"SELECT {column_list} FROM {table_name}", # noqa: S608
84
+ self.db.connection,
85
+ )
86
+
87
+ # Ensure column order matches contract exactly (Invariant 1)
88
+ df = df[columns]
89
+
90
+ # Deterministic row ordering (Adjustment 3)
91
+ sort_keys = SORT_KEYS.get(table_name, columns[:1])
92
+ df = df.sort_values(by=sort_keys, ascending=True)
93
+
94
+ # Write CSV with deterministic settings
95
+ output_path = self.output_dir / f"{table_name}.csv"
96
+ df.to_csv(
97
+ output_path,
98
+ index=False,
99
+ encoding="utf-8",
100
+ lineterminator="\n", # Unix line endings for consistency
101
+ date_format="%Y-%m-%dT%H:%M:%S", # Consistent datetime format
102
+ )
103
+
104
+ return len(df)
105
+
106
+ def validate_schemas(self) -> bool:
107
+ """Validate that generated CSVs match expected schemas.
108
+
109
+ Returns:
110
+ True if all schemas valid.
111
+
112
+ Raises:
113
+ CSVGenerationError: If any schema mismatch.
114
+ """
115
+ for table_name, expected_columns in CSV_SCHEMAS.items():
116
+ csv_path = self.output_dir / f"{table_name}.csv"
117
+
118
+ if not csv_path.exists():
119
+ raise CSVGenerationError(f"Missing CSV: {csv_path}")
120
+
121
+ df = pd.read_csv(csv_path, nrows=0) # Just read headers
122
+ actual_columns = list(df.columns)
123
+
124
+ if actual_columns != expected_columns:
125
+ raise CSVGenerationError(
126
+ f"Schema mismatch in {table_name}.csv:\n"
127
+ f" Expected: {expected_columns}\n"
128
+ f" Actual: {actual_columns}"
129
+ )
130
+
131
+ logger.info("All CSV schemas validated successfully")
132
+ return True
@@ -1 +1 @@
1
- """Utilities module for shared helper functions."""
1
+ """Utilities module for shared helper functions."""
@@ -1,101 +1,101 @@
1
- """Datetime utilities for ado-git-repo-insights.
2
-
3
- Ported from the original generate_raw_data.py to ensure identical behavior.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- import logging
9
- from datetime import datetime
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
-
14
- def parse_iso_datetime(date_str: str | None) -> datetime | None:
15
- """Parse ISO 8601 datetime strings from ADO API.
16
-
17
- Handles 7-digit microseconds and 'Z' suffix quirks from ADO API responses.
18
- Preserved from original implementation for compatibility.
19
-
20
- Args:
21
- date_str: ISO 8601 datetime string, or None.
22
-
23
- Returns:
24
- Parsed datetime, or None if parsing fails or input is None.
25
-
26
- Examples:
27
- >>> parse_iso_datetime("2024-01-15T10:30:45.1234567Z")
28
- datetime.datetime(2024, 1, 15, 10, 30, 45, 123456)
29
- >>> parse_iso_datetime(None)
30
- None
31
- """
32
- if not date_str:
33
- return None
34
-
35
- try:
36
- # Remove trailing 'Z' (Zulu/UTC indicator)
37
- date_str = date_str.rstrip("Z")
38
-
39
- if "." in date_str:
40
- # ADO API sometimes returns 7-digit microseconds, Python only supports 6
41
- date_part, microseconds = date_str.split(".")
42
- microseconds = microseconds[:6] # Truncate to 6 digits
43
- date_str = f"{date_part}.{microseconds}"
44
- return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f")
45
- else:
46
- # No microseconds
47
- return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
48
-
49
- except ValueError as e:
50
- logger.warning(f"Failed to parse date '{date_str}': {e}")
51
- return None
52
-
53
-
54
- def calculate_cycle_time_minutes(
55
- creation_date: str | None, closed_date: str | None
56
- ) -> float | None:
57
- """Calculate PR cycle time in minutes.
58
-
59
- Cycle time is the duration from PR creation to closure.
60
- Minimum value is 1 minute to avoid zero/negative values.
61
-
62
- Args:
63
- creation_date: ISO 8601 creation date string.
64
- closed_date: ISO 8601 closed date string.
65
-
66
- Returns:
67
- Cycle time in minutes (minimum 1.0), or None if dates are invalid.
68
-
69
- Examples:
70
- >>> calculate_cycle_time_minutes(
71
- ... "2024-01-15T10:00:00Z",
72
- ... "2024-01-15T10:30:00Z"
73
- ... )
74
- 30.0
75
- """
76
- created = parse_iso_datetime(creation_date)
77
- closed = parse_iso_datetime(closed_date)
78
-
79
- if created and closed:
80
- delta_seconds = (closed - created).total_seconds()
81
- minutes = delta_seconds / 60
82
- # Minimum 1 minute, rounded to 2 decimal places
83
- return max(1.0, round(minutes, 2))
84
-
85
- return None
86
-
87
-
88
- def format_date_for_api(dt: datetime) -> str:
89
- """Format a datetime for ADO API queries.
90
-
91
- Args:
92
- dt: Datetime to format.
93
-
94
- Returns:
95
- ISO 8601 formatted string with 'Z' suffix.
96
-
97
- Examples:
98
- >>> format_date_for_api(datetime(2024, 1, 15, 10, 30, 0))
99
- '2024-01-15T10:30:00Z'
100
- """
101
- return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
1
+ """Datetime utilities for ado-git-repo-insights.
2
+
3
+ Ported from the original generate_raw_data.py to ensure identical behavior.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from datetime import datetime
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def parse_iso_datetime(date_str: str | None) -> datetime | None:
15
+ """Parse ISO 8601 datetime strings from ADO API.
16
+
17
+ Handles 7-digit microseconds and 'Z' suffix quirks from ADO API responses.
18
+ Preserved from original implementation for compatibility.
19
+
20
+ Args:
21
+ date_str: ISO 8601 datetime string, or None.
22
+
23
+ Returns:
24
+ Parsed datetime, or None if parsing fails or input is None.
25
+
26
+ Examples:
27
+ >>> parse_iso_datetime("2024-01-15T10:30:45.1234567Z")
28
+ datetime.datetime(2024, 1, 15, 10, 30, 45, 123456)
29
+ >>> parse_iso_datetime(None)
30
+ None
31
+ """
32
+ if not date_str:
33
+ return None
34
+
35
+ try:
36
+ # Remove trailing 'Z' (Zulu/UTC indicator)
37
+ date_str = date_str.rstrip("Z")
38
+
39
+ if "." in date_str:
40
+ # ADO API sometimes returns 7-digit microseconds, Python only supports 6
41
+ date_part, microseconds = date_str.split(".")
42
+ microseconds = microseconds[:6] # Truncate to 6 digits
43
+ date_str = f"{date_part}.{microseconds}"
44
+ return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f")
45
+ else:
46
+ # No microseconds
47
+ return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
48
+
49
+ except ValueError as e:
50
+ logger.warning(f"Failed to parse date '{date_str}': {e}")
51
+ return None
52
+
53
+
54
+ def calculate_cycle_time_minutes(
55
+ creation_date: str | None, closed_date: str | None
56
+ ) -> float | None:
57
+ """Calculate PR cycle time in minutes.
58
+
59
+ Cycle time is the duration from PR creation to closure.
60
+ Minimum value is 1 minute to avoid zero/negative values.
61
+
62
+ Args:
63
+ creation_date: ISO 8601 creation date string.
64
+ closed_date: ISO 8601 closed date string.
65
+
66
+ Returns:
67
+ Cycle time in minutes (minimum 1.0), or None if dates are invalid.
68
+
69
+ Examples:
70
+ >>> calculate_cycle_time_minutes(
71
+ ... "2024-01-15T10:00:00Z",
72
+ ... "2024-01-15T10:30:00Z"
73
+ ... )
74
+ 30.0
75
+ """
76
+ created = parse_iso_datetime(creation_date)
77
+ closed = parse_iso_datetime(closed_date)
78
+
79
+ if created and closed:
80
+ delta_seconds = (closed - created).total_seconds()
81
+ minutes = delta_seconds / 60
82
+ # Minimum 1 minute, rounded to 2 decimal places
83
+ return max(1.0, round(minutes, 2))
84
+
85
+ return None
86
+
87
+
88
+ def format_date_for_api(dt: datetime) -> str:
89
+ """Format a datetime for ADO API queries.
90
+
91
+ Args:
92
+ dt: Datetime to format.
93
+
94
+ Returns:
95
+ ISO 8601 formatted string with 'Z' suffix.
96
+
97
+ Examples:
98
+ >>> format_date_for_api(datetime(2024, 1, 15, 10, 30, 0))
99
+ '2024-01-15T10:30:00Z'
100
+ """
101
+ return dt.strftime("%Y-%m-%dT%H:%M:%SZ")