gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -1,289 +1,346 @@
1
1
  """DORA (DevOps Research and Assessment) metrics calculation."""
2
- from datetime import datetime
3
- from typing import Any, Dict, List
2
+
3
+ from datetime import datetime, timedelta
4
+ from typing import Any, Optional
4
5
 
5
6
  import numpy as np
7
+ import pytz
6
8
 
7
9
 
8
10
  class DORAMetricsCalculator:
9
11
  """Calculate DORA metrics for software delivery performance."""
10
-
11
- def __init__(self):
12
+
13
+ def __init__(self) -> None:
12
14
  """Initialize DORA metrics calculator."""
13
- self.deployment_patterns = [
14
- 'deploy', 'release', 'ship', 'live', 'production', 'prod'
15
- ]
16
- self.failure_patterns = [
17
- 'revert', 'rollback', 'hotfix', 'emergency', 'incident', 'outage'
18
- ]
19
-
20
- def calculate_dora_metrics(self,
21
- commits: List[Dict[str, Any]],
22
- prs: List[Dict[str, Any]],
23
- start_date: datetime,
24
- end_date: datetime) -> Dict[str, Any]:
15
+ self.deployment_patterns = ["deploy", "release", "ship", "live", "production", "prod"]
16
+ self.failure_patterns = ["revert", "rollback", "hotfix", "emergency", "incident", "outage"]
17
+
18
+ def _normalize_timestamp_to_utc(self, timestamp: Optional[datetime]) -> Optional[datetime]:
19
+ """Normalize any timestamp to UTC timezone-aware datetime.
20
+
21
+ WHY: Ensures all timestamps are timezone-aware UTC to prevent
22
+ comparison errors when sorting mixed timezone objects.
23
+
24
+ Args:
25
+ timestamp: DateTime object that may be timezone-naive, timezone-aware, or None
26
+
27
+ Returns:
28
+ Timezone-aware datetime in UTC, or None if input is None
29
+ """
30
+ if timestamp is None:
31
+ return None
32
+
33
+ if timestamp.tzinfo is None:
34
+ # Assume naive timestamps are UTC
35
+ return timestamp.replace(tzinfo=pytz.UTC)
36
+ else:
37
+ # Convert timezone-aware timestamps to UTC
38
+ return timestamp.astimezone(pytz.UTC)
39
+
40
+ def calculate_dora_metrics(
41
+ self,
42
+ commits: list[dict[str, Any]],
43
+ prs: list[dict[str, Any]],
44
+ start_date: datetime,
45
+ end_date: datetime,
46
+ ) -> dict[str, Any]:
25
47
  """Calculate the four key DORA metrics."""
26
-
48
+
27
49
  # Identify deployments and failures
28
50
  deployments = self._identify_deployments(commits, prs)
29
51
  failures = self._identify_failures(commits, prs)
30
-
52
+
31
53
  # Calculate metrics
32
54
  deployment_frequency = self._calculate_deployment_frequency(
33
55
  deployments, start_date, end_date
34
56
  )
35
-
57
+
36
58
  lead_time = self._calculate_lead_time(prs, deployments)
37
-
38
- change_failure_rate = self._calculate_change_failure_rate(
39
- deployments, failures
40
- )
41
-
59
+
60
+ change_failure_rate = self._calculate_change_failure_rate(deployments, failures)
61
+
42
62
  mttr = self._calculate_mttr(failures, commits)
43
-
63
+
44
64
  # Determine performance level
45
65
  performance_level = self._determine_performance_level(
46
66
  deployment_frequency, lead_time, change_failure_rate, mttr
47
67
  )
48
-
68
+
49
69
  return {
50
- 'deployment_frequency': deployment_frequency,
51
- 'lead_time_hours': lead_time,
52
- 'change_failure_rate': change_failure_rate,
53
- 'mttr_hours': mttr,
54
- 'performance_level': performance_level,
55
- 'total_deployments': len(deployments),
56
- 'total_failures': len(failures),
57
- 'metrics_period_weeks': (end_date - start_date).days / 7
70
+ "deployment_frequency": deployment_frequency,
71
+ "lead_time_hours": lead_time,
72
+ "change_failure_rate": change_failure_rate,
73
+ "mttr_hours": mttr,
74
+ "performance_level": performance_level,
75
+ "total_deployments": len(deployments),
76
+ "total_failures": len(failures),
77
+ "metrics_period_weeks": (end_date - start_date).days / 7,
58
78
  }
59
-
60
- def _identify_deployments(self, commits: List[Dict[str, Any]],
61
- prs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
79
+
80
+ def _identify_deployments(
81
+ self, commits: list[dict[str, Any]], prs: list[dict[str, Any]]
82
+ ) -> list[dict[str, Any]]:
62
83
  """Identify deployment events from commits and PRs."""
63
84
  deployments = []
64
-
85
+
65
86
  # Check commits for deployment patterns
66
87
  for commit in commits:
67
- message_lower = commit['message'].lower()
88
+ message_lower = commit["message"].lower()
68
89
  if any(pattern in message_lower for pattern in self.deployment_patterns):
69
- deployments.append({
70
- 'type': 'commit',
71
- 'timestamp': commit['timestamp'],
72
- 'identifier': commit['hash'],
73
- 'message': commit['message']
74
- })
75
-
90
+ deployments.append(
91
+ {
92
+ "type": "commit",
93
+ "timestamp": self._normalize_timestamp_to_utc(commit["timestamp"]),
94
+ "identifier": commit["hash"],
95
+ "message": commit["message"],
96
+ }
97
+ )
98
+
76
99
  # Check PR titles and labels for deployments
77
100
  for pr in prs:
78
101
  # Check title
79
- title_lower = pr.get('title', '').lower()
102
+ title_lower = pr.get("title", "").lower()
80
103
  if any(pattern in title_lower for pattern in self.deployment_patterns):
81
- deployments.append({
82
- 'type': 'pr',
83
- 'timestamp': pr.get('merged_at', pr.get('created_at')),
84
- 'identifier': f"PR#{pr['number']}",
85
- 'message': pr['title']
86
- })
104
+ raw_timestamp = pr.get("merged_at", pr.get("created_at"))
105
+ deployments.append(
106
+ {
107
+ "type": "pr",
108
+ "timestamp": self._normalize_timestamp_to_utc(raw_timestamp),
109
+ "identifier": f"PR#{pr.get('number', 'unknown')}",
110
+ "message": pr["title"],
111
+ }
112
+ )
87
113
  continue
88
-
114
+
89
115
  # Check labels
90
- labels_lower = [label.lower() for label in pr.get('labels', [])]
91
- if any(any(pattern in label for pattern in self.deployment_patterns)
92
- for label in labels_lower):
93
- deployments.append({
94
- 'type': 'pr',
95
- 'timestamp': pr.get('merged_at', pr.get('created_at')),
96
- 'identifier': f"PR#{pr['number']}",
97
- 'message': pr['title']
98
- })
99
-
100
- # Remove duplicates and sort by timestamp
116
+ labels_lower = [label.lower() for label in pr.get("labels", [])]
117
+ if any(
118
+ any(pattern in label for pattern in self.deployment_patterns)
119
+ for label in labels_lower
120
+ ):
121
+ raw_timestamp = pr.get("merged_at", pr.get("created_at"))
122
+ deployments.append(
123
+ {
124
+ "type": "pr",
125
+ "timestamp": self._normalize_timestamp_to_utc(raw_timestamp),
126
+ "identifier": f"PR#{pr.get('number', 'unknown')}",
127
+ "message": pr["title"],
128
+ }
129
+ )
130
+
131
+ # Filter out deployments with None timestamps
132
+ deployments = [d for d in deployments if d["timestamp"] is not None]
133
+
134
+ # Remove duplicates and sort by timestamp (now all are timezone-aware UTC)
101
135
  seen = set()
102
136
  unique_deployments = []
103
- for dep in sorted(deployments, key=lambda x: x['timestamp']):
137
+ for dep in sorted(deployments, key=lambda x: x["timestamp"]):
104
138
  key = f"{dep['type']}:{dep['identifier']}"
105
139
  if key not in seen:
106
140
  seen.add(key)
107
141
  unique_deployments.append(dep)
108
-
142
+
109
143
  return unique_deployments
110
-
111
- def _identify_failures(self, commits: List[Dict[str, Any]],
112
- prs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
144
+
145
+ def _identify_failures(
146
+ self, commits: list[dict[str, Any]], prs: list[dict[str, Any]]
147
+ ) -> list[dict[str, Any]]:
113
148
  """Identify failure events from commits and PRs."""
114
149
  failures = []
115
-
150
+
116
151
  # Check commits for failure patterns
117
152
  for commit in commits:
118
- message_lower = commit['message'].lower()
153
+ message_lower = commit["message"].lower()
119
154
  if any(pattern in message_lower for pattern in self.failure_patterns):
120
- failures.append({
121
- 'type': 'commit',
122
- 'timestamp': commit['timestamp'],
123
- 'identifier': commit['hash'],
124
- 'message': commit['message'],
125
- 'is_hotfix': 'hotfix' in message_lower or 'emergency' in message_lower
126
- })
127
-
155
+ failures.append(
156
+ {
157
+ "type": "commit",
158
+ "timestamp": self._normalize_timestamp_to_utc(commit["timestamp"]),
159
+ "identifier": commit["hash"],
160
+ "message": commit["message"],
161
+ "is_hotfix": "hotfix" in message_lower or "emergency" in message_lower,
162
+ }
163
+ )
164
+
128
165
  # Check PRs for failure patterns
129
166
  for pr in prs:
130
- title_lower = pr.get('title', '').lower()
131
- labels_lower = [label.lower() for label in pr.get('labels', [])]
132
-
133
- is_failure = (
134
- any(pattern in title_lower for pattern in self.failure_patterns) or
135
- any(any(pattern in label for pattern in self.failure_patterns)
136
- for label in labels_lower)
167
+ title_lower = pr.get("title", "").lower()
168
+ labels_lower = [label.lower() for label in pr.get("labels", [])]
169
+
170
+ is_failure = any(pattern in title_lower for pattern in self.failure_patterns) or any(
171
+ any(pattern in label for pattern in self.failure_patterns) for label in labels_lower
137
172
  )
138
-
173
+
139
174
  if is_failure:
140
- failures.append({
141
- 'type': 'pr',
142
- 'timestamp': pr.get('merged_at', pr.get('created_at')),
143
- 'identifier': f"PR#{pr['number']}",
144
- 'message': pr['title'],
145
- 'is_hotfix': 'hotfix' in title_lower or 'emergency' in title_lower
146
- })
147
-
175
+ raw_timestamp = pr.get("merged_at", pr.get("created_at"))
176
+ failures.append(
177
+ {
178
+ "type": "pr",
179
+ "timestamp": self._normalize_timestamp_to_utc(raw_timestamp),
180
+ "identifier": f"PR#{pr.get('number', 'unknown')}",
181
+ "message": pr["title"],
182
+ "is_hotfix": "hotfix" in title_lower or "emergency" in title_lower,
183
+ }
184
+ )
185
+
186
+ # Filter out failures with None timestamps
187
+ failures = [f for f in failures if f["timestamp"] is not None]
188
+
148
189
  return failures
149
-
150
- def _calculate_deployment_frequency(self, deployments: List[Dict[str, Any]],
151
- start_date: datetime,
152
- end_date: datetime) -> Dict[str, Any]:
190
+
191
+ def _calculate_deployment_frequency(
192
+ self, deployments: list[dict[str, Any]], start_date: datetime, end_date: datetime
193
+ ) -> dict[str, Any]:
153
194
  """Calculate deployment frequency metrics."""
154
195
  if not deployments:
155
- return {
156
- 'daily_average': 0,
157
- 'weekly_average': 0,
158
- 'category': 'Low'
159
- }
160
-
161
- # Filter deployments in date range
196
+ return {"daily_average": 0, "weekly_average": 0, "category": "Low"}
197
+
198
+ # Normalize date range to timezone-aware UTC
199
+ start_date_utc = self._normalize_timestamp_to_utc(start_date)
200
+ end_date_utc = self._normalize_timestamp_to_utc(end_date)
201
+
202
+ # Handle case where normalization failed
203
+ if start_date_utc is None or end_date_utc is None:
204
+ return {"daily_average": 0, "weekly_average": 0, "category": "Low"}
205
+
206
+ # Filter deployments in date range (timestamps are already normalized to UTC)
162
207
  period_deployments = [
163
- d for d in deployments
164
- if start_date <= d['timestamp'] <= end_date
208
+ d for d in deployments if start_date_utc <= d["timestamp"] <= end_date_utc
165
209
  ]
166
-
167
- days = (end_date - start_date).days
210
+
211
+ days = (end_date_utc - start_date_utc).days
168
212
  weeks = days / 7
169
-
213
+
170
214
  daily_avg = len(period_deployments) / days if days > 0 else 0
171
215
  weekly_avg = len(period_deployments) / weeks if weeks > 0 else 0
172
-
216
+
173
217
  # Categorize based on DORA standards
174
218
  if daily_avg >= 1:
175
- category = 'Elite' # Multiple deploys per day
219
+ category = "Elite" # Multiple deploys per day
176
220
  elif weekly_avg >= 1:
177
- category = 'High' # Between once per day and once per week
221
+ category = "High" # Between once per day and once per week
178
222
  elif weekly_avg >= 0.25:
179
- category = 'Medium' # Between once per week and once per month
223
+ category = "Medium" # Between once per week and once per month
180
224
  else:
181
- category = 'Low' # Less than once per month
182
-
183
- return {
184
- 'daily_average': daily_avg,
185
- 'weekly_average': weekly_avg,
186
- 'category': category
187
- }
188
-
189
- def _calculate_lead_time(self, prs: List[Dict[str, Any]],
190
- deployments: List[Dict[str, Any]]) -> float:
225
+ category = "Low" # Less than once per month
226
+
227
+ return {"daily_average": daily_avg, "weekly_average": weekly_avg, "category": category}
228
+
229
+ def _calculate_lead_time(
230
+ self, prs: list[dict[str, Any]], deployments: list[dict[str, Any]]
231
+ ) -> float:
191
232
  """Calculate lead time for changes in hours."""
192
233
  if not prs:
193
234
  return 0
194
-
235
+
195
236
  lead_times = []
196
-
237
+
197
238
  for pr in prs:
198
- if not pr.get('created_at') or not pr.get('merged_at'):
239
+ if not pr.get("created_at") or not pr.get("merged_at"):
199
240
  continue
200
-
241
+
201
242
  # Calculate time from PR creation to merge
202
- lead_time = (pr['merged_at'] - pr['created_at']).total_seconds() / 3600
243
+ # Normalize both timestamps to UTC
244
+ created_at = self._normalize_timestamp_to_utc(pr["created_at"])
245
+ merged_at = self._normalize_timestamp_to_utc(pr["merged_at"])
246
+
247
+ # Skip if either timestamp is None after normalization
248
+ if created_at is None or merged_at is None:
249
+ continue
250
+
251
+ lead_time = (merged_at - created_at).total_seconds() / 3600
203
252
  lead_times.append(lead_time)
204
-
253
+
205
254
  if not lead_times:
206
255
  return 0
207
-
256
+
208
257
  # Return median lead time
209
258
  return float(np.median(lead_times))
210
-
211
- def _calculate_change_failure_rate(self, deployments: List[Dict[str, Any]],
212
- failures: List[Dict[str, Any]]) -> float:
259
+
260
+ def _calculate_change_failure_rate(
261
+ self, deployments: list[dict[str, Any]], failures: list[dict[str, Any]]
262
+ ) -> float:
213
263
  """Calculate the percentage of deployments causing failures."""
214
264
  if not deployments:
215
265
  return 0
216
-
266
+
217
267
  # Count failures that occurred within 24 hours of a deployment
218
268
  failure_causing_deployments = 0
219
-
269
+
220
270
  for deployment in deployments:
221
- deploy_time = deployment['timestamp']
222
-
271
+ deploy_time = deployment["timestamp"] # Already normalized to UTC
272
+
223
273
  # Check if any failure occurred within 24 hours
224
274
  for failure in failures:
225
- failure_time = failure['timestamp']
275
+ failure_time = failure["timestamp"] # Already normalized to UTC
276
+
226
277
  time_diff = abs((failure_time - deploy_time).total_seconds() / 3600)
227
-
278
+
228
279
  if time_diff <= 24: # Within 24 hours
229
280
  failure_causing_deployments += 1
230
281
  break
231
-
282
+
232
283
  return (failure_causing_deployments / len(deployments)) * 100
233
-
234
- def _calculate_mttr(self, failures: List[Dict[str, Any]],
235
- commits: List[Dict[str, Any]]) -> float:
284
+
285
+ def _calculate_mttr(
286
+ self, failures: list[dict[str, Any]], commits: list[dict[str, Any]]
287
+ ) -> float:
236
288
  """Calculate mean time to recovery in hours."""
237
289
  if not failures:
238
290
  return 0
239
-
291
+
240
292
  recovery_times = []
241
-
293
+
242
294
  # For each failure, find the recovery time
243
- for i, failure in enumerate(failures):
244
- failure_time = failure['timestamp']
245
-
295
+ for _i, failure in enumerate(failures):
296
+ failure_time = failure["timestamp"] # Already normalized to UTC
297
+
246
298
  # Look for recovery indicators in subsequent commits
247
299
  recovery_time = None
248
-
300
+
249
301
  # Check subsequent commits for recovery patterns
250
302
  for commit in commits:
251
- if commit['timestamp'] <= failure_time:
303
+ commit_time = self._normalize_timestamp_to_utc(commit["timestamp"])
304
+
305
+ if commit_time <= failure_time:
252
306
  continue
253
-
254
- message_lower = commit['message'].lower()
255
- recovery_patterns = ['fixed', 'resolved', 'recovery', 'restored']
256
-
307
+
308
+ message_lower = commit["message"].lower()
309
+ recovery_patterns = ["fixed", "resolved", "recovery", "restored"]
310
+
257
311
  if any(pattern in message_lower for pattern in recovery_patterns):
258
- recovery_time = commit['timestamp']
312
+ recovery_time = commit_time
259
313
  break
260
-
314
+
261
315
  # If we found a recovery, calculate MTTR
262
316
  if recovery_time:
263
317
  mttr = (recovery_time - failure_time).total_seconds() / 3600
264
318
  recovery_times.append(mttr)
265
319
  # For hotfixes, assume quick recovery (2 hours)
266
- elif failure.get('is_hotfix'):
320
+ elif failure.get("is_hotfix"):
267
321
  recovery_times.append(2.0)
268
-
322
+
269
323
  if not recovery_times:
270
324
  # If no explicit recovery found, estimate based on failure type
271
325
  return 4.0 # Default 4 hours
272
-
326
+
273
327
  return float(np.mean(recovery_times))
274
-
275
- def _determine_performance_level(self, deployment_freq: Dict[str, Any],
276
- lead_time_hours: float,
277
- change_failure_rate: float,
278
- mttr_hours: float) -> str:
328
+
329
+ def _determine_performance_level(
330
+ self,
331
+ deployment_freq: dict[str, Any],
332
+ lead_time_hours: float,
333
+ change_failure_rate: float,
334
+ mttr_hours: float,
335
+ ) -> str:
279
336
  """Determine overall performance level based on DORA metrics."""
280
337
  scores = []
281
-
338
+
282
339
  # Deployment frequency score
283
- freq_category = deployment_freq['category']
284
- freq_scores = {'Elite': 4, 'High': 3, 'Medium': 2, 'Low': 1}
340
+ freq_category = deployment_freq["category"]
341
+ freq_scores = {"Elite": 4, "High": 3, "Medium": 2, "Low": 1}
285
342
  scores.append(freq_scores.get(freq_category, 1))
286
-
343
+
287
344
  # Lead time score
288
345
  if lead_time_hours < 24: # Less than one day
289
346
  scores.append(4) # Elite
@@ -293,7 +350,7 @@ class DORAMetricsCalculator:
293
350
  scores.append(2) # Medium
294
351
  else:
295
352
  scores.append(1) # Low
296
-
353
+
297
354
  # Change failure rate score
298
355
  if change_failure_rate <= 15:
299
356
  scores.append(4) # Elite (0-15%)
@@ -303,7 +360,7 @@ class DORAMetricsCalculator:
303
360
  scores.append(2) # Medium
304
361
  else:
305
362
  scores.append(1) # Low
306
-
363
+
307
364
  # MTTR score
308
365
  if mttr_hours < 1: # Less than one hour
309
366
  scores.append(4) # Elite
@@ -313,15 +370,321 @@ class DORAMetricsCalculator:
313
370
  scores.append(2) # Medium
314
371
  else:
315
372
  scores.append(1) # Low
316
-
373
+
317
374
  # Average score determines overall level
318
375
  avg_score = sum(scores) / len(scores)
319
-
376
+
320
377
  if avg_score >= 3.5:
321
- return 'Elite'
378
+ return "Elite"
322
379
  elif avg_score >= 2.5:
323
- return 'High'
380
+ return "High"
324
381
  elif avg_score >= 1.5:
325
- return 'Medium'
382
+ return "Medium"
326
383
  else:
327
- return 'Low'
384
+ return "Low"
385
+
386
+ def calculate_weekly_dora_metrics(
387
+ self,
388
+ commits: list[dict[str, Any]],
389
+ prs: list[dict[str, Any]],
390
+ start_date: datetime,
391
+ end_date: datetime,
392
+ ) -> list[dict[str, Any]]:
393
+ """Calculate DORA metrics broken down by week.
394
+
395
+ WHY: Weekly breakdowns provide trend analysis and enable identification
396
+ of performance patterns over time. This helps teams track improvements
397
+ and identify periods of degraded performance.
398
+
399
+ DESIGN DECISION: Uses Monday-Sunday week boundaries for consistency
400
+ with other reporting functions. Includes rolling averages to smooth
401
+ out weekly variations and provide clearer trend indicators.
402
+
403
+ Args:
404
+ commits: List of commit data dictionaries
405
+ prs: List of pull request data dictionaries
406
+ start_date: Start of analysis period
407
+ end_date: End of analysis period
408
+
409
+ Returns:
410
+ List of weekly DORA metrics with trend analysis
411
+ """
412
+ # Normalize date range to timezone-aware UTC
413
+ start_date_utc = self._normalize_timestamp_to_utc(start_date)
414
+ end_date_utc = self._normalize_timestamp_to_utc(end_date)
415
+
416
+ if start_date_utc is None or end_date_utc is None:
417
+ return []
418
+
419
+ # Identify deployments and failures for the entire period
420
+ all_deployments = self._identify_deployments(commits, prs)
421
+ all_failures = self._identify_failures(commits, prs)
422
+
423
+ # Generate week boundaries
424
+ weeks = self._generate_week_boundaries(start_date_utc, end_date_utc)
425
+
426
+ weekly_metrics = []
427
+ previous_weeks_data = [] # For rolling averages
428
+
429
+ for week_start, week_end in weeks:
430
+ # Filter data for this week
431
+ week_deployments = [
432
+ d for d in all_deployments if week_start <= d["timestamp"] <= week_end
433
+ ]
434
+
435
+ week_failures = [f for f in all_failures if week_start <= f["timestamp"] <= week_end]
436
+
437
+ week_commits = [
438
+ c
439
+ for c in commits
440
+ if week_start <= self._normalize_timestamp_to_utc(c["timestamp"]) <= week_end
441
+ ]
442
+
443
+ week_prs = [
444
+ pr
445
+ for pr in prs
446
+ if pr.get("merged_at")
447
+ and week_start <= self._normalize_timestamp_to_utc(pr["merged_at"]) <= week_end
448
+ ]
449
+
450
+ # Calculate weekly metrics
451
+ deployment_frequency = len(week_deployments)
452
+
453
+ # Calculate lead time for PRs merged this week
454
+ lead_times = []
455
+ for pr in week_prs:
456
+ if pr.get("created_at") and pr.get("merged_at"):
457
+ created_at = self._normalize_timestamp_to_utc(pr["created_at"])
458
+ merged_at = self._normalize_timestamp_to_utc(pr["merged_at"])
459
+
460
+ if created_at and merged_at:
461
+ lead_time = (merged_at - created_at).total_seconds() / 3600
462
+ lead_times.append(lead_time)
463
+
464
+ avg_lead_time = float(np.median(lead_times)) if lead_times else 0.0
465
+
466
+ # Calculate change failure rate
467
+ change_failure_rate = 0.0
468
+ if week_deployments:
469
+ failure_causing_deployments = 0
470
+ for deployment in week_deployments:
471
+ deploy_time = deployment["timestamp"]
472
+
473
+ # Check if any failure occurred within 24 hours
474
+ for failure in week_failures:
475
+ failure_time = failure["timestamp"]
476
+ time_diff = abs((failure_time - deploy_time).total_seconds() / 3600)
477
+
478
+ if time_diff <= 24: # Within 24 hours
479
+ failure_causing_deployments += 1
480
+ break
481
+
482
+ change_failure_rate = (failure_causing_deployments / len(week_deployments)) * 100
483
+
484
+ # Calculate MTTR for failures this week
485
+ recovery_times = []
486
+ for failure in week_failures:
487
+ failure_time = failure["timestamp"]
488
+
489
+ # Look for recovery in subsequent commits within reasonable time
490
+ recovery_time = None
491
+ for commit in week_commits:
492
+ commit_time = self._normalize_timestamp_to_utc(commit["timestamp"])
493
+
494
+ if commit_time <= failure_time:
495
+ continue
496
+
497
+ message_lower = commit["message"].lower()
498
+ recovery_patterns = ["fixed", "resolved", "recovery", "restored"]
499
+
500
+ if any(pattern in message_lower for pattern in recovery_patterns):
501
+ recovery_time = commit_time
502
+ break
503
+
504
+ if recovery_time:
505
+ mttr = (recovery_time - failure_time).total_seconds() / 3600
506
+ recovery_times.append(mttr)
507
+ elif failure.get("is_hotfix"):
508
+ recovery_times.append(2.0) # Assume quick recovery for hotfixes
509
+
510
+ avg_mttr = float(np.mean(recovery_times)) if recovery_times else 0.0
511
+
512
+ # Store current week data
513
+ week_data = {
514
+ "week_start": week_start.strftime("%Y-%m-%d"),
515
+ "week_end": week_end.strftime("%Y-%m-%d"),
516
+ "deployment_frequency": deployment_frequency,
517
+ "lead_time_hours": round(avg_lead_time, 2),
518
+ "change_failure_rate": round(change_failure_rate, 2),
519
+ "mttr_hours": round(avg_mttr, 2),
520
+ "total_failures": len(week_failures),
521
+ "total_commits": len(week_commits),
522
+ "total_prs": len(week_prs),
523
+ }
524
+
525
+ # Calculate rolling averages (4-week window)
526
+ previous_weeks_data.append(week_data.copy())
527
+ if len(previous_weeks_data) > 4:
528
+ previous_weeks_data.pop(0)
529
+
530
+ # 4-week rolling averages
531
+ if len(previous_weeks_data) >= 2:
532
+ week_data["deployment_frequency_4w_avg"] = round(
533
+ np.mean([w["deployment_frequency"] for w in previous_weeks_data]), 1
534
+ )
535
+
536
+ lead_times_4w = [
537
+ w["lead_time_hours"] for w in previous_weeks_data if w["lead_time_hours"] > 0
538
+ ]
539
+ week_data["lead_time_4w_avg"] = round(
540
+ np.mean(lead_times_4w) if lead_times_4w else 0, 1
541
+ )
542
+
543
+ cfr_4w = [
544
+ w["change_failure_rate"]
545
+ for w in previous_weeks_data
546
+ if w["change_failure_rate"] > 0
547
+ ]
548
+ week_data["change_failure_rate_4w_avg"] = round(np.mean(cfr_4w) if cfr_4w else 0, 1)
549
+
550
+ mttr_4w = [w["mttr_hours"] for w in previous_weeks_data if w["mttr_hours"] > 0]
551
+ week_data["mttr_4w_avg"] = round(np.mean(mttr_4w) if mttr_4w else 0, 1)
552
+ else:
553
+ week_data["deployment_frequency_4w_avg"] = week_data["deployment_frequency"]
554
+ week_data["lead_time_4w_avg"] = week_data["lead_time_hours"]
555
+ week_data["change_failure_rate_4w_avg"] = week_data["change_failure_rate"]
556
+ week_data["mttr_4w_avg"] = week_data["mttr_hours"]
557
+
558
+ # Calculate week-over-week changes (if we have previous week)
559
+ if len(weekly_metrics) > 0:
560
+ prev_week = weekly_metrics[-1]
561
+
562
+ # Deployment frequency change
563
+ if prev_week["deployment_frequency"] > 0:
564
+ df_change = (
565
+ (week_data["deployment_frequency"] - prev_week["deployment_frequency"])
566
+ / prev_week["deployment_frequency"]
567
+ * 100
568
+ )
569
+ week_data["deployment_frequency_change_pct"] = round(df_change, 1)
570
+ else:
571
+ week_data["deployment_frequency_change_pct"] = (
572
+ 0.0 if week_data["deployment_frequency"] == 0 else 100.0
573
+ )
574
+
575
+ # Lead time change
576
+ if prev_week["lead_time_hours"] > 0:
577
+ lt_change = (
578
+ (week_data["lead_time_hours"] - prev_week["lead_time_hours"])
579
+ / prev_week["lead_time_hours"]
580
+ * 100
581
+ )
582
+ week_data["lead_time_change_pct"] = round(lt_change, 1)
583
+ else:
584
+ week_data["lead_time_change_pct"] = (
585
+ 0.0 if week_data["lead_time_hours"] == 0 else 100.0
586
+ )
587
+
588
+ # Change failure rate change
589
+ if prev_week["change_failure_rate"] > 0:
590
+ cfr_change = (
591
+ (week_data["change_failure_rate"] - prev_week["change_failure_rate"])
592
+ / prev_week["change_failure_rate"]
593
+ * 100
594
+ )
595
+ week_data["change_failure_rate_change_pct"] = round(cfr_change, 1)
596
+ else:
597
+ week_data["change_failure_rate_change_pct"] = (
598
+ 0.0 if week_data["change_failure_rate"] == 0 else 100.0
599
+ )
600
+
601
+ # MTTR change
602
+ if prev_week["mttr_hours"] > 0:
603
+ mttr_change = (
604
+ (week_data["mttr_hours"] - prev_week["mttr_hours"])
605
+ / prev_week["mttr_hours"]
606
+ * 100
607
+ )
608
+ week_data["mttr_change_pct"] = round(mttr_change, 1)
609
+ else:
610
+ week_data["mttr_change_pct"] = 0.0 if week_data["mttr_hours"] == 0 else 100.0
611
+ else:
612
+ # First week - no changes to calculate
613
+ week_data["deployment_frequency_change_pct"] = 0.0
614
+ week_data["lead_time_change_pct"] = 0.0
615
+ week_data["change_failure_rate_change_pct"] = 0.0
616
+ week_data["mttr_change_pct"] = 0.0
617
+
618
+ # Add trend indicators
619
+ week_data["deployment_frequency_trend"] = self._get_trend_indicator(
620
+ week_data["deployment_frequency_change_pct"], "higher_better"
621
+ )
622
+ week_data["lead_time_trend"] = self._get_trend_indicator(
623
+ week_data["lead_time_change_pct"], "lower_better"
624
+ )
625
+ week_data["change_failure_rate_trend"] = self._get_trend_indicator(
626
+ week_data["change_failure_rate_change_pct"], "lower_better"
627
+ )
628
+ week_data["mttr_trend"] = self._get_trend_indicator(
629
+ week_data["mttr_change_pct"], "lower_better"
630
+ )
631
+
632
+ weekly_metrics.append(week_data)
633
+
634
+ return weekly_metrics
635
+
636
+ def _generate_week_boundaries(
637
+ self, start_date: datetime, end_date: datetime
638
+ ) -> list[tuple[datetime, datetime]]:
639
+ """Generate Monday-Sunday week boundaries for the given date range.
640
+
641
+ WHY: Consistent week boundaries ensure that weekly metrics align with
642
+ other reporting functions and provide predictable time buckets for analysis.
643
+
644
+ Args:
645
+ start_date: Start of analysis period (timezone-aware UTC)
646
+ end_date: End of analysis period (timezone-aware UTC)
647
+
648
+ Returns:
649
+ List of (week_start, week_end) tuples with Monday-Sunday boundaries
650
+ """
651
+ weeks = []
652
+
653
+ # Find the Monday of the week containing start_date
654
+ days_since_monday = start_date.weekday()
655
+ current_week_start = start_date - timedelta(days=days_since_monday)
656
+ current_week_start = current_week_start.replace(hour=0, minute=0, second=0, microsecond=0)
657
+
658
+ while current_week_start <= end_date:
659
+ week_end = current_week_start + timedelta(
660
+ days=6, hours=23, minutes=59, seconds=59, microseconds=999999
661
+ )
662
+
663
+ # Only include weeks that overlap with our analysis period
664
+ if week_end >= start_date:
665
+ weeks.append((current_week_start, week_end))
666
+
667
+ current_week_start += timedelta(days=7)
668
+
669
+ return weeks
670
+
671
+ def _get_trend_indicator(self, change_pct: float, direction: str) -> str:
672
+ """Get trend indicator based on change percentage and desired direction.
673
+
674
+ WHY: Provides intuitive trend indicators that account for whether
675
+ increases or decreases are desirable for each metric.
676
+
677
+ Args:
678
+ change_pct: Percentage change from previous period
679
+ direction: "higher_better" or "lower_better"
680
+
681
+ Returns:
682
+ Trend indicator: "improving", "declining", or "stable"
683
+ """
684
+ if abs(change_pct) < 5: # Less than 5% change considered stable
685
+ return "stable"
686
+
687
+ if direction == "higher_better":
688
+ return "improving" if change_pct > 0 else "declining"
689
+ else: # lower_better
690
+ return "improving" if change_pct < 0 else "declining"