ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ado_git_repo_insights/__init__.py +3 -3
  2. ado_git_repo_insights/cli.py +703 -354
  3. ado_git_repo_insights/config.py +186 -186
  4. ado_git_repo_insights/extractor/__init__.py +1 -1
  5. ado_git_repo_insights/extractor/ado_client.py +452 -246
  6. ado_git_repo_insights/extractor/pr_extractor.py +239 -239
  7. ado_git_repo_insights/ml/__init__.py +13 -0
  8. ado_git_repo_insights/ml/date_utils.py +70 -0
  9. ado_git_repo_insights/ml/forecaster.py +288 -0
  10. ado_git_repo_insights/ml/insights.py +497 -0
  11. ado_git_repo_insights/persistence/__init__.py +1 -1
  12. ado_git_repo_insights/persistence/database.py +193 -193
  13. ado_git_repo_insights/persistence/models.py +207 -145
  14. ado_git_repo_insights/persistence/repository.py +662 -376
  15. ado_git_repo_insights/transform/__init__.py +1 -1
  16. ado_git_repo_insights/transform/aggregators.py +950 -0
  17. ado_git_repo_insights/transform/csv_generator.py +132 -132
  18. ado_git_repo_insights/utils/__init__.py +1 -1
  19. ado_git_repo_insights/utils/datetime_utils.py +101 -101
  20. ado_git_repo_insights/utils/logging_config.py +172 -172
  21. ado_git_repo_insights/utils/run_summary.py +207 -206
  22. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
  23. ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
  24. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
  25. ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
  26. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
  27. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
  28. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,288 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import time
6
+ from dataclasses import dataclass
7
+ from datetime import date, datetime, timedelta, timezone
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ import pandas as pd
12
+
13
+ from .date_utils import align_to_monday
14
+
15
+ if TYPE_CHECKING:
16
+ from ..persistence.database import DatabaseManager
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Schema version (locked)
21
+ PREDICTIONS_SCHEMA_VERSION = 1
22
+ GENERATOR_ID = "prophet-v1.0"
23
+ HORIZON_WEEKS = 4
24
+
25
+ # Metric definitions
26
+ METRICS = [
27
+ ("pr_throughput", "count"),
28
+ ("cycle_time_minutes", "minutes"),
29
+ ("review_time_minutes", "minutes"),
30
+ ]
31
+
32
+
33
+ @dataclass
34
+ class ForecastValue:
35
+ """Single forecast value for a period."""
36
+
37
+ period_start: str # YYYY-MM-DD (Monday-aligned)
38
+ predicted: float
39
+ lower_bound: float
40
+ upper_bound: float
41
+
42
+
43
+ @dataclass
44
+ class MetricForecast:
45
+ """Forecast for a single metric."""
46
+
47
+ metric: str
48
+ unit: str
49
+ horizon_weeks: int
50
+ values: list[dict[str, Any]]
51
+
52
+
53
+ class ProphetForecaster:
54
+ """Generate Prophet-based trend forecasts.
55
+
56
+ Reads weekly rollup data from SQLite and produces forecasts for:
57
+ - PR throughput (count per week)
58
+ - Cycle time (p50 in minutes)
59
+ - Review time (p50 in minutes, if available)
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ db: DatabaseManager,
65
+ output_dir: Path,
66
+ ) -> None:
67
+ """Initialize the forecaster.
68
+
69
+ Args:
70
+ db: Database manager with PR data.
71
+ output_dir: Directory for output files.
72
+ """
73
+ self.db = db
74
+ self.output_dir = output_dir
75
+
76
+ def generate(self) -> bool:
77
+ """Generate predictions and write to trends.json.
78
+
79
+ Returns:
80
+ True if file was written successfully, False otherwise.
81
+
82
+ Behavior:
83
+ - No data available → write empty forecasts (valid schema, tab shows "empty state")
84
+ - Prophet fit error → don't write file (tab stays disabled)
85
+ """
86
+ start_time = time.perf_counter()
87
+
88
+ # Get weekly metrics from database
89
+ df = self._get_weekly_metrics()
90
+
91
+ if df.empty:
92
+ # No data - write empty forecasts
93
+ logger.info(
94
+ "No PR data available for predictions - writing empty forecasts"
95
+ )
96
+ return self._write_predictions(forecasts=[])
97
+
98
+ # Try to import prophet
99
+ try:
100
+ from prophet import Prophet
101
+ except ImportError:
102
+ logger.warning(
103
+ "Predictions skipped: Prophet not installed. "
104
+ "Install with: pip install -e '.[ml]' "
105
+ "and ensure cmdstan/prophet prerequisites are met. "
106
+ "See https://facebook.github.io/prophet/docs/installation.html"
107
+ )
108
+ return False
109
+
110
+ forecasts: list[dict[str, Any]] = []
111
+
112
+ for metric, unit in METRICS:
113
+ try:
114
+ forecast_data = self._forecast_metric(df, metric, unit, Prophet)
115
+ if forecast_data:
116
+ forecasts.append(forecast_data)
117
+ except Exception as e:
118
+ logger.warning(f"Failed to forecast {metric}: {type(e).__name__}")
119
+ # Continue with other metrics
120
+
121
+ if not forecasts:
122
+ # All metrics failed - don't write file
123
+ logger.warning("All metric forecasts failed - not writing predictions file")
124
+ return False
125
+
126
+ elapsed = time.perf_counter() - start_time
127
+ logger.info(f"Prophet forecasting completed in {elapsed:.2f}s")
128
+
129
+ return self._write_predictions(forecasts)
130
+
131
+ def _get_weekly_metrics(self) -> pd.DataFrame:
132
+ """Get weekly metrics from database.
133
+
134
+ Returns:
135
+ DataFrame with columns: week_start, pr_count, cycle_time_p50, review_time_p50
136
+ """
137
+ query = """
138
+ SELECT
139
+ closed_date,
140
+ cycle_time_minutes
141
+ FROM pull_requests
142
+ WHERE closed_date IS NOT NULL AND status = 'completed'
143
+ ORDER BY closed_date
144
+ """
145
+ df = pd.read_sql_query(query, self.db.connection)
146
+
147
+ if df.empty:
148
+ return pd.DataFrame()
149
+
150
+ # Convert to datetime and group by ISO week
151
+ df["closed_dt"] = pd.to_datetime(df["closed_date"])
152
+ df["iso_year"] = df["closed_dt"].dt.isocalendar().year
153
+ df["iso_week"] = df["closed_dt"].dt.isocalendar().week
154
+
155
+ # Aggregate by week
156
+ weekly = (
157
+ df.groupby(["iso_year", "iso_week"])
158
+ .agg(
159
+ pr_count=("closed_date", "count"),
160
+ cycle_time_p50=("cycle_time_minutes", lambda x: x.quantile(0.5)),
161
+ )
162
+ .reset_index()
163
+ )
164
+
165
+ # Calculate week start date (Monday) using dedicated utility
166
+ weekly["week_start"] = weekly.apply(
167
+ lambda row: align_to_monday(
168
+ date.fromisocalendar(int(row["iso_year"]), int(row["iso_week"]), 1)
169
+ ),
170
+ axis=1,
171
+ )
172
+
173
+ # Rename for Prophet (ds = date, y = value)
174
+ weekly["ds"] = pd.to_datetime(weekly["week_start"])
175
+
176
+ return weekly
177
+
178
+ def _forecast_metric(
179
+ self,
180
+ df: pd.DataFrame,
181
+ metric: str,
182
+ unit: str,
183
+ prophet_cls: type,
184
+ ) -> dict[str, Any] | None:
185
+ """Forecast a single metric using Prophet.
186
+
187
+ Args:
188
+ df: Weekly metrics DataFrame.
189
+ metric: Metric name (pr_throughput, cycle_time_minutes, etc.)
190
+ unit: Unit for the metric.
191
+ prophet_cls: Prophet class (passed to avoid re-importing).
192
+
193
+ Returns:
194
+ Forecast dict or None if failed.
195
+ """
196
+ # Map metric to column
197
+ column_map = {
198
+ "pr_throughput": "pr_count",
199
+ "cycle_time_minutes": "cycle_time_p50",
200
+ "review_time_minutes": "cycle_time_p50", # Use cycle time as proxy
201
+ }
202
+
203
+ column = column_map.get(metric)
204
+ if column not in df.columns:
205
+ return None
206
+
207
+ # Prepare Prophet DataFrame
208
+ prophet_df = df[["ds", column]].copy()
209
+ prophet_df = prophet_df.rename(columns={column: "y"})
210
+ prophet_df = prophet_df.dropna()
211
+
212
+ if len(prophet_df) < 2:
213
+ logger.warning(f"Insufficient data for {metric} forecast (need >= 2 weeks)")
214
+ return None
215
+
216
+ # Fit Prophet model
217
+ model = prophet_cls(
218
+ yearly_seasonality=False,
219
+ weekly_seasonality=False,
220
+ daily_seasonality=False,
221
+ )
222
+ model.fit(prophet_df)
223
+
224
+ # Generate future dataframe (next HORIZON_WEEKS weeks, Monday-aligned)
225
+ today = date.today()
226
+ next_monday = today + timedelta(days=(7 - today.weekday()) % 7)
227
+ if next_monday == today and today.weekday() != 0:
228
+ next_monday = today + timedelta(days=7)
229
+ # If today is Monday, start from today
230
+ if today.weekday() == 0:
231
+ next_monday = today
232
+
233
+ future_dates = [next_monday + timedelta(weeks=i) for i in range(HORIZON_WEEKS)]
234
+ future_df = pd.DataFrame({"ds": pd.to_datetime(future_dates)})
235
+
236
+ # Predict
237
+ forecast = model.predict(future_df)
238
+
239
+ # Build values
240
+ values: list[dict[str, Any]] = []
241
+ for _, row in forecast.iterrows():
242
+ period_start = pd.Timestamp(row["ds"]).date()
243
+
244
+ # Ensure Monday-aligned using utility
245
+ period_start = align_to_monday(period_start)
246
+
247
+ values.append(
248
+ {
249
+ "period_start": period_start.isoformat(),
250
+ "predicted": round(float(row["yhat"]), 2),
251
+ "lower_bound": max(0, round(float(row["yhat_lower"]), 2)),
252
+ "upper_bound": round(float(row["yhat_upper"]), 2),
253
+ }
254
+ )
255
+
256
+ return {
257
+ "metric": metric,
258
+ "unit": unit,
259
+ "horizon_weeks": HORIZON_WEEKS,
260
+ "values": values,
261
+ }
262
+
263
+ def _write_predictions(self, forecasts: list[dict[str, Any]]) -> bool:
264
+ """Write predictions to trends.json.
265
+
266
+ Args:
267
+ forecasts: List of forecast dicts.
268
+
269
+ Returns:
270
+ True if written successfully.
271
+ """
272
+ predictions_dir = self.output_dir / "predictions"
273
+ predictions_dir.mkdir(parents=True, exist_ok=True)
274
+
275
+ predictions = {
276
+ "schema_version": PREDICTIONS_SCHEMA_VERSION,
277
+ "generated_at": datetime.now(timezone.utc).isoformat(),
278
+ "is_stub": False,
279
+ "generated_by": GENERATOR_ID,
280
+ "forecasts": forecasts,
281
+ }
282
+
283
+ file_path = predictions_dir / "trends.json"
284
+ with file_path.open("w", encoding="utf-8") as f:
285
+ json.dump(predictions, f, indent=2, sort_keys=True)
286
+
287
+ logger.info(f"Generated predictions/trends.json with {len(forecasts)} metrics")
288
+ return True