ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ado_git_repo_insights/__init__.py +3 -3
- ado_git_repo_insights/cli.py +703 -354
- ado_git_repo_insights/config.py +186 -186
- ado_git_repo_insights/extractor/__init__.py +1 -1
- ado_git_repo_insights/extractor/ado_client.py +452 -246
- ado_git_repo_insights/extractor/pr_extractor.py +239 -239
- ado_git_repo_insights/ml/__init__.py +13 -0
- ado_git_repo_insights/ml/date_utils.py +70 -0
- ado_git_repo_insights/ml/forecaster.py +288 -0
- ado_git_repo_insights/ml/insights.py +497 -0
- ado_git_repo_insights/persistence/__init__.py +1 -1
- ado_git_repo_insights/persistence/database.py +193 -193
- ado_git_repo_insights/persistence/models.py +207 -145
- ado_git_repo_insights/persistence/repository.py +662 -376
- ado_git_repo_insights/transform/__init__.py +1 -1
- ado_git_repo_insights/transform/aggregators.py +950 -0
- ado_git_repo_insights/transform/csv_generator.py +132 -132
- ado_git_repo_insights/utils/__init__.py +1 -1
- ado_git_repo_insights/utils/datetime_utils.py +101 -101
- ado_git_repo_insights/utils/logging_config.py +172 -172
- ado_git_repo_insights/utils/run_summary.py +207 -206
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
- ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
- ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import date, datetime, timedelta, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from .date_utils import align_to_monday
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from ..persistence.database import DatabaseManager
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Schema version (locked)
|
|
21
|
+
PREDICTIONS_SCHEMA_VERSION = 1
|
|
22
|
+
GENERATOR_ID = "prophet-v1.0"
|
|
23
|
+
HORIZON_WEEKS = 4
|
|
24
|
+
|
|
25
|
+
# Metric definitions
|
|
26
|
+
METRICS = [
|
|
27
|
+
("pr_throughput", "count"),
|
|
28
|
+
("cycle_time_minutes", "minutes"),
|
|
29
|
+
("review_time_minutes", "minutes"),
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ForecastValue:
|
|
35
|
+
"""Single forecast value for a period."""
|
|
36
|
+
|
|
37
|
+
period_start: str # YYYY-MM-DD (Monday-aligned)
|
|
38
|
+
predicted: float
|
|
39
|
+
lower_bound: float
|
|
40
|
+
upper_bound: float
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class MetricForecast:
|
|
45
|
+
"""Forecast for a single metric."""
|
|
46
|
+
|
|
47
|
+
metric: str
|
|
48
|
+
unit: str
|
|
49
|
+
horizon_weeks: int
|
|
50
|
+
values: list[dict[str, Any]]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ProphetForecaster:
|
|
54
|
+
"""Generate Prophet-based trend forecasts.
|
|
55
|
+
|
|
56
|
+
Reads weekly rollup data from SQLite and produces forecasts for:
|
|
57
|
+
- PR throughput (count per week)
|
|
58
|
+
- Cycle time (p50 in minutes)
|
|
59
|
+
- Review time (p50 in minutes, if available)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
db: DatabaseManager,
|
|
65
|
+
output_dir: Path,
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Initialize the forecaster.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
db: Database manager with PR data.
|
|
71
|
+
output_dir: Directory for output files.
|
|
72
|
+
"""
|
|
73
|
+
self.db = db
|
|
74
|
+
self.output_dir = output_dir
|
|
75
|
+
|
|
76
|
+
def generate(self) -> bool:
|
|
77
|
+
"""Generate predictions and write to trends.json.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
True if file was written successfully, False otherwise.
|
|
81
|
+
|
|
82
|
+
Behavior:
|
|
83
|
+
- No data available → write empty forecasts (valid schema, tab shows "empty state")
|
|
84
|
+
- Prophet fit error → don't write file (tab stays disabled)
|
|
85
|
+
"""
|
|
86
|
+
start_time = time.perf_counter()
|
|
87
|
+
|
|
88
|
+
# Get weekly metrics from database
|
|
89
|
+
df = self._get_weekly_metrics()
|
|
90
|
+
|
|
91
|
+
if df.empty:
|
|
92
|
+
# No data - write empty forecasts
|
|
93
|
+
logger.info(
|
|
94
|
+
"No PR data available for predictions - writing empty forecasts"
|
|
95
|
+
)
|
|
96
|
+
return self._write_predictions(forecasts=[])
|
|
97
|
+
|
|
98
|
+
# Try to import prophet
|
|
99
|
+
try:
|
|
100
|
+
from prophet import Prophet
|
|
101
|
+
except ImportError:
|
|
102
|
+
logger.warning(
|
|
103
|
+
"Predictions skipped: Prophet not installed. "
|
|
104
|
+
"Install with: pip install -e '.[ml]' "
|
|
105
|
+
"and ensure cmdstan/prophet prerequisites are met. "
|
|
106
|
+
"See https://facebook.github.io/prophet/docs/installation.html"
|
|
107
|
+
)
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
forecasts: list[dict[str, Any]] = []
|
|
111
|
+
|
|
112
|
+
for metric, unit in METRICS:
|
|
113
|
+
try:
|
|
114
|
+
forecast_data = self._forecast_metric(df, metric, unit, Prophet)
|
|
115
|
+
if forecast_data:
|
|
116
|
+
forecasts.append(forecast_data)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.warning(f"Failed to forecast {metric}: {type(e).__name__}")
|
|
119
|
+
# Continue with other metrics
|
|
120
|
+
|
|
121
|
+
if not forecasts:
|
|
122
|
+
# All metrics failed - don't write file
|
|
123
|
+
logger.warning("All metric forecasts failed - not writing predictions file")
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
elapsed = time.perf_counter() - start_time
|
|
127
|
+
logger.info(f"Prophet forecasting completed in {elapsed:.2f}s")
|
|
128
|
+
|
|
129
|
+
return self._write_predictions(forecasts)
|
|
130
|
+
|
|
131
|
+
def _get_weekly_metrics(self) -> pd.DataFrame:
|
|
132
|
+
"""Get weekly metrics from database.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
DataFrame with columns: week_start, pr_count, cycle_time_p50, review_time_p50
|
|
136
|
+
"""
|
|
137
|
+
query = """
|
|
138
|
+
SELECT
|
|
139
|
+
closed_date,
|
|
140
|
+
cycle_time_minutes
|
|
141
|
+
FROM pull_requests
|
|
142
|
+
WHERE closed_date IS NOT NULL AND status = 'completed'
|
|
143
|
+
ORDER BY closed_date
|
|
144
|
+
"""
|
|
145
|
+
df = pd.read_sql_query(query, self.db.connection)
|
|
146
|
+
|
|
147
|
+
if df.empty:
|
|
148
|
+
return pd.DataFrame()
|
|
149
|
+
|
|
150
|
+
# Convert to datetime and group by ISO week
|
|
151
|
+
df["closed_dt"] = pd.to_datetime(df["closed_date"])
|
|
152
|
+
df["iso_year"] = df["closed_dt"].dt.isocalendar().year
|
|
153
|
+
df["iso_week"] = df["closed_dt"].dt.isocalendar().week
|
|
154
|
+
|
|
155
|
+
# Aggregate by week
|
|
156
|
+
weekly = (
|
|
157
|
+
df.groupby(["iso_year", "iso_week"])
|
|
158
|
+
.agg(
|
|
159
|
+
pr_count=("closed_date", "count"),
|
|
160
|
+
cycle_time_p50=("cycle_time_minutes", lambda x: x.quantile(0.5)),
|
|
161
|
+
)
|
|
162
|
+
.reset_index()
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Calculate week start date (Monday) using dedicated utility
|
|
166
|
+
weekly["week_start"] = weekly.apply(
|
|
167
|
+
lambda row: align_to_monday(
|
|
168
|
+
date.fromisocalendar(int(row["iso_year"]), int(row["iso_week"]), 1)
|
|
169
|
+
),
|
|
170
|
+
axis=1,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Rename for Prophet (ds = date, y = value)
|
|
174
|
+
weekly["ds"] = pd.to_datetime(weekly["week_start"])
|
|
175
|
+
|
|
176
|
+
return weekly
|
|
177
|
+
|
|
178
|
+
def _forecast_metric(
|
|
179
|
+
self,
|
|
180
|
+
df: pd.DataFrame,
|
|
181
|
+
metric: str,
|
|
182
|
+
unit: str,
|
|
183
|
+
prophet_cls: type,
|
|
184
|
+
) -> dict[str, Any] | None:
|
|
185
|
+
"""Forecast a single metric using Prophet.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
df: Weekly metrics DataFrame.
|
|
189
|
+
metric: Metric name (pr_throughput, cycle_time_minutes, etc.)
|
|
190
|
+
unit: Unit for the metric.
|
|
191
|
+
prophet_cls: Prophet class (passed to avoid re-importing).
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Forecast dict or None if failed.
|
|
195
|
+
"""
|
|
196
|
+
# Map metric to column
|
|
197
|
+
column_map = {
|
|
198
|
+
"pr_throughput": "pr_count",
|
|
199
|
+
"cycle_time_minutes": "cycle_time_p50",
|
|
200
|
+
"review_time_minutes": "cycle_time_p50", # Use cycle time as proxy
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
column = column_map.get(metric)
|
|
204
|
+
if column not in df.columns:
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
# Prepare Prophet DataFrame
|
|
208
|
+
prophet_df = df[["ds", column]].copy()
|
|
209
|
+
prophet_df = prophet_df.rename(columns={column: "y"})
|
|
210
|
+
prophet_df = prophet_df.dropna()
|
|
211
|
+
|
|
212
|
+
if len(prophet_df) < 2:
|
|
213
|
+
logger.warning(f"Insufficient data for {metric} forecast (need >= 2 weeks)")
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
# Fit Prophet model
|
|
217
|
+
model = prophet_cls(
|
|
218
|
+
yearly_seasonality=False,
|
|
219
|
+
weekly_seasonality=False,
|
|
220
|
+
daily_seasonality=False,
|
|
221
|
+
)
|
|
222
|
+
model.fit(prophet_df)
|
|
223
|
+
|
|
224
|
+
# Generate future dataframe (next HORIZON_WEEKS weeks, Monday-aligned)
|
|
225
|
+
today = date.today()
|
|
226
|
+
next_monday = today + timedelta(days=(7 - today.weekday()) % 7)
|
|
227
|
+
if next_monday == today and today.weekday() != 0:
|
|
228
|
+
next_monday = today + timedelta(days=7)
|
|
229
|
+
# If today is Monday, start from today
|
|
230
|
+
if today.weekday() == 0:
|
|
231
|
+
next_monday = today
|
|
232
|
+
|
|
233
|
+
future_dates = [next_monday + timedelta(weeks=i) for i in range(HORIZON_WEEKS)]
|
|
234
|
+
future_df = pd.DataFrame({"ds": pd.to_datetime(future_dates)})
|
|
235
|
+
|
|
236
|
+
# Predict
|
|
237
|
+
forecast = model.predict(future_df)
|
|
238
|
+
|
|
239
|
+
# Build values
|
|
240
|
+
values: list[dict[str, Any]] = []
|
|
241
|
+
for _, row in forecast.iterrows():
|
|
242
|
+
period_start = pd.Timestamp(row["ds"]).date()
|
|
243
|
+
|
|
244
|
+
# Ensure Monday-aligned using utility
|
|
245
|
+
period_start = align_to_monday(period_start)
|
|
246
|
+
|
|
247
|
+
values.append(
|
|
248
|
+
{
|
|
249
|
+
"period_start": period_start.isoformat(),
|
|
250
|
+
"predicted": round(float(row["yhat"]), 2),
|
|
251
|
+
"lower_bound": max(0, round(float(row["yhat_lower"]), 2)),
|
|
252
|
+
"upper_bound": round(float(row["yhat_upper"]), 2),
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return {
|
|
257
|
+
"metric": metric,
|
|
258
|
+
"unit": unit,
|
|
259
|
+
"horizon_weeks": HORIZON_WEEKS,
|
|
260
|
+
"values": values,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
def _write_predictions(self, forecasts: list[dict[str, Any]]) -> bool:
|
|
264
|
+
"""Write predictions to trends.json.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
forecasts: List of forecast dicts.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
True if written successfully.
|
|
271
|
+
"""
|
|
272
|
+
predictions_dir = self.output_dir / "predictions"
|
|
273
|
+
predictions_dir.mkdir(parents=True, exist_ok=True)
|
|
274
|
+
|
|
275
|
+
predictions = {
|
|
276
|
+
"schema_version": PREDICTIONS_SCHEMA_VERSION,
|
|
277
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
278
|
+
"is_stub": False,
|
|
279
|
+
"generated_by": GENERATOR_ID,
|
|
280
|
+
"forecasts": forecasts,
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
file_path = predictions_dir / "trends.json"
|
|
284
|
+
with file_path.open("w", encoding="utf-8") as f:
|
|
285
|
+
json.dump(predictions, f, indent=2, sort_keys=True)
|
|
286
|
+
|
|
287
|
+
logger.info(f"Generated predictions/trends.json with {len(forecasts)} metrics")
|
|
288
|
+
return True
|