gitflow-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ """DORA (DevOps Research and Assessment) metrics calculation."""
2
+ from datetime import datetime, timedelta
3
+ from typing import List, Dict, Any, Optional
4
+ from collections import defaultdict
5
+ import numpy as np
6
+
7
+
8
+ class DORAMetricsCalculator:
9
+ """Calculate DORA metrics for software delivery performance."""
10
+
11
+ def __init__(self):
12
+ """Initialize DORA metrics calculator."""
13
+ self.deployment_patterns = [
14
+ 'deploy', 'release', 'ship', 'live', 'production', 'prod'
15
+ ]
16
+ self.failure_patterns = [
17
+ 'revert', 'rollback', 'hotfix', 'emergency', 'incident', 'outage'
18
+ ]
19
+
20
+ def calculate_dora_metrics(self,
21
+ commits: List[Dict[str, Any]],
22
+ prs: List[Dict[str, Any]],
23
+ start_date: datetime,
24
+ end_date: datetime) -> Dict[str, Any]:
25
+ """Calculate the four key DORA metrics."""
26
+
27
+ # Identify deployments and failures
28
+ deployments = self._identify_deployments(commits, prs)
29
+ failures = self._identify_failures(commits, prs)
30
+
31
+ # Calculate metrics
32
+ deployment_frequency = self._calculate_deployment_frequency(
33
+ deployments, start_date, end_date
34
+ )
35
+
36
+ lead_time = self._calculate_lead_time(prs, deployments)
37
+
38
+ change_failure_rate = self._calculate_change_failure_rate(
39
+ deployments, failures
40
+ )
41
+
42
+ mttr = self._calculate_mttr(failures, commits)
43
+
44
+ # Determine performance level
45
+ performance_level = self._determine_performance_level(
46
+ deployment_frequency, lead_time, change_failure_rate, mttr
47
+ )
48
+
49
+ return {
50
+ 'deployment_frequency': deployment_frequency,
51
+ 'lead_time_hours': lead_time,
52
+ 'change_failure_rate': change_failure_rate,
53
+ 'mttr_hours': mttr,
54
+ 'performance_level': performance_level,
55
+ 'total_deployments': len(deployments),
56
+ 'total_failures': len(failures),
57
+ 'metrics_period_weeks': (end_date - start_date).days / 7
58
+ }
59
+
60
+ def _identify_deployments(self, commits: List[Dict[str, Any]],
61
+ prs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
62
+ """Identify deployment events from commits and PRs."""
63
+ deployments = []
64
+
65
+ # Check commits for deployment patterns
66
+ for commit in commits:
67
+ message_lower = commit['message'].lower()
68
+ if any(pattern in message_lower for pattern in self.deployment_patterns):
69
+ deployments.append({
70
+ 'type': 'commit',
71
+ 'timestamp': commit['timestamp'],
72
+ 'identifier': commit['hash'],
73
+ 'message': commit['message']
74
+ })
75
+
76
+ # Check PR titles and labels for deployments
77
+ for pr in prs:
78
+ # Check title
79
+ title_lower = pr.get('title', '').lower()
80
+ if any(pattern in title_lower for pattern in self.deployment_patterns):
81
+ deployments.append({
82
+ 'type': 'pr',
83
+ 'timestamp': pr.get('merged_at', pr.get('created_at')),
84
+ 'identifier': f"PR#{pr['number']}",
85
+ 'message': pr['title']
86
+ })
87
+ continue
88
+
89
+ # Check labels
90
+ labels_lower = [label.lower() for label in pr.get('labels', [])]
91
+ if any(any(pattern in label for pattern in self.deployment_patterns)
92
+ for label in labels_lower):
93
+ deployments.append({
94
+ 'type': 'pr',
95
+ 'timestamp': pr.get('merged_at', pr.get('created_at')),
96
+ 'identifier': f"PR#{pr['number']}",
97
+ 'message': pr['title']
98
+ })
99
+
100
+ # Remove duplicates and sort by timestamp
101
+ seen = set()
102
+ unique_deployments = []
103
+ for dep in sorted(deployments, key=lambda x: x['timestamp']):
104
+ key = f"{dep['type']}:{dep['identifier']}"
105
+ if key not in seen:
106
+ seen.add(key)
107
+ unique_deployments.append(dep)
108
+
109
+ return unique_deployments
110
+
111
+ def _identify_failures(self, commits: List[Dict[str, Any]],
112
+ prs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
113
+ """Identify failure events from commits and PRs."""
114
+ failures = []
115
+
116
+ # Check commits for failure patterns
117
+ for commit in commits:
118
+ message_lower = commit['message'].lower()
119
+ if any(pattern in message_lower for pattern in self.failure_patterns):
120
+ failures.append({
121
+ 'type': 'commit',
122
+ 'timestamp': commit['timestamp'],
123
+ 'identifier': commit['hash'],
124
+ 'message': commit['message'],
125
+ 'is_hotfix': 'hotfix' in message_lower or 'emergency' in message_lower
126
+ })
127
+
128
+ # Check PRs for failure patterns
129
+ for pr in prs:
130
+ title_lower = pr.get('title', '').lower()
131
+ labels_lower = [label.lower() for label in pr.get('labels', [])]
132
+
133
+ is_failure = (
134
+ any(pattern in title_lower for pattern in self.failure_patterns) or
135
+ any(any(pattern in label for pattern in self.failure_patterns)
136
+ for label in labels_lower)
137
+ )
138
+
139
+ if is_failure:
140
+ failures.append({
141
+ 'type': 'pr',
142
+ 'timestamp': pr.get('merged_at', pr.get('created_at')),
143
+ 'identifier': f"PR#{pr['number']}",
144
+ 'message': pr['title'],
145
+ 'is_hotfix': 'hotfix' in title_lower or 'emergency' in title_lower
146
+ })
147
+
148
+ return failures
149
+
150
+ def _calculate_deployment_frequency(self, deployments: List[Dict[str, Any]],
151
+ start_date: datetime,
152
+ end_date: datetime) -> Dict[str, Any]:
153
+ """Calculate deployment frequency metrics."""
154
+ if not deployments:
155
+ return {
156
+ 'daily_average': 0,
157
+ 'weekly_average': 0,
158
+ 'category': 'Low'
159
+ }
160
+
161
+ # Filter deployments in date range
162
+ period_deployments = [
163
+ d for d in deployments
164
+ if start_date <= d['timestamp'] <= end_date
165
+ ]
166
+
167
+ days = (end_date - start_date).days
168
+ weeks = days / 7
169
+
170
+ daily_avg = len(period_deployments) / days if days > 0 else 0
171
+ weekly_avg = len(period_deployments) / weeks if weeks > 0 else 0
172
+
173
+ # Categorize based on DORA standards
174
+ if daily_avg >= 1:
175
+ category = 'Elite' # Multiple deploys per day
176
+ elif weekly_avg >= 1:
177
+ category = 'High' # Between once per day and once per week
178
+ elif weekly_avg >= 0.25:
179
+ category = 'Medium' # Between once per week and once per month
180
+ else:
181
+ category = 'Low' # Less than once per month
182
+
183
+ return {
184
+ 'daily_average': daily_avg,
185
+ 'weekly_average': weekly_avg,
186
+ 'category': category
187
+ }
188
+
189
+ def _calculate_lead_time(self, prs: List[Dict[str, Any]],
190
+ deployments: List[Dict[str, Any]]) -> float:
191
+ """Calculate lead time for changes in hours."""
192
+ if not prs:
193
+ return 0
194
+
195
+ lead_times = []
196
+
197
+ for pr in prs:
198
+ if not pr.get('created_at') or not pr.get('merged_at'):
199
+ continue
200
+
201
+ # Calculate time from PR creation to merge
202
+ lead_time = (pr['merged_at'] - pr['created_at']).total_seconds() / 3600
203
+ lead_times.append(lead_time)
204
+
205
+ if not lead_times:
206
+ return 0
207
+
208
+ # Return median lead time
209
+ return float(np.median(lead_times))
210
+
211
+ def _calculate_change_failure_rate(self, deployments: List[Dict[str, Any]],
212
+ failures: List[Dict[str, Any]]) -> float:
213
+ """Calculate the percentage of deployments causing failures."""
214
+ if not deployments:
215
+ return 0
216
+
217
+ # Count failures that occurred within 24 hours of a deployment
218
+ failure_causing_deployments = 0
219
+
220
+ for deployment in deployments:
221
+ deploy_time = deployment['timestamp']
222
+
223
+ # Check if any failure occurred within 24 hours
224
+ for failure in failures:
225
+ failure_time = failure['timestamp']
226
+ time_diff = abs((failure_time - deploy_time).total_seconds() / 3600)
227
+
228
+ if time_diff <= 24: # Within 24 hours
229
+ failure_causing_deployments += 1
230
+ break
231
+
232
+ return (failure_causing_deployments / len(deployments)) * 100
233
+
234
+ def _calculate_mttr(self, failures: List[Dict[str, Any]],
235
+ commits: List[Dict[str, Any]]) -> float:
236
+ """Calculate mean time to recovery in hours."""
237
+ if not failures:
238
+ return 0
239
+
240
+ recovery_times = []
241
+
242
+ # For each failure, find the recovery time
243
+ for i, failure in enumerate(failures):
244
+ failure_time = failure['timestamp']
245
+
246
+ # Look for recovery indicators in subsequent commits
247
+ recovery_time = None
248
+
249
+ # Check subsequent commits for recovery patterns
250
+ for commit in commits:
251
+ if commit['timestamp'] <= failure_time:
252
+ continue
253
+
254
+ message_lower = commit['message'].lower()
255
+ recovery_patterns = ['fixed', 'resolved', 'recovery', 'restored']
256
+
257
+ if any(pattern in message_lower for pattern in recovery_patterns):
258
+ recovery_time = commit['timestamp']
259
+ break
260
+
261
+ # If we found a recovery, calculate MTTR
262
+ if recovery_time:
263
+ mttr = (recovery_time - failure_time).total_seconds() / 3600
264
+ recovery_times.append(mttr)
265
+ # For hotfixes, assume quick recovery (2 hours)
266
+ elif failure.get('is_hotfix'):
267
+ recovery_times.append(2.0)
268
+
269
+ if not recovery_times:
270
+ # If no explicit recovery found, estimate based on failure type
271
+ return 4.0 # Default 4 hours
272
+
273
+ return float(np.mean(recovery_times))
274
+
275
+ def _determine_performance_level(self, deployment_freq: Dict[str, Any],
276
+ lead_time_hours: float,
277
+ change_failure_rate: float,
278
+ mttr_hours: float) -> str:
279
+ """Determine overall performance level based on DORA metrics."""
280
+ scores = []
281
+
282
+ # Deployment frequency score
283
+ freq_category = deployment_freq['category']
284
+ freq_scores = {'Elite': 4, 'High': 3, 'Medium': 2, 'Low': 1}
285
+ scores.append(freq_scores.get(freq_category, 1))
286
+
287
+ # Lead time score
288
+ if lead_time_hours < 24: # Less than one day
289
+ scores.append(4) # Elite
290
+ elif lead_time_hours < 168: # Less than one week
291
+ scores.append(3) # High
292
+ elif lead_time_hours < 720: # Less than one month
293
+ scores.append(2) # Medium
294
+ else:
295
+ scores.append(1) # Low
296
+
297
+ # Change failure rate score
298
+ if change_failure_rate <= 15:
299
+ scores.append(4) # Elite (0-15%)
300
+ elif change_failure_rate <= 20:
301
+ scores.append(3) # High
302
+ elif change_failure_rate <= 30:
303
+ scores.append(2) # Medium
304
+ else:
305
+ scores.append(1) # Low
306
+
307
+ # MTTR score
308
+ if mttr_hours < 1: # Less than one hour
309
+ scores.append(4) # Elite
310
+ elif mttr_hours < 24: # Less than one day
311
+ scores.append(3) # High
312
+ elif mttr_hours < 168: # Less than one week
313
+ scores.append(2) # Medium
314
+ else:
315
+ scores.append(1) # Low
316
+
317
+ # Average score determines overall level
318
+ avg_score = sum(scores) / len(scores)
319
+
320
+ if avg_score >= 3.5:
321
+ return 'Elite'
322
+ elif avg_score >= 2.5:
323
+ return 'High'
324
+ elif avg_score >= 1.5:
325
+ return 'Medium'
326
+ else:
327
+ return 'Low'
File without changes
@@ -0,0 +1,171 @@
1
+ """Database models for GitFlow Analytics using SQLAlchemy."""
2
+ from datetime import datetime
3
+ from typing import Optional
4
+ from sqlalchemy import create_engine, Column, String, Integer, Float, DateTime, Boolean, JSON, Index
5
+ from sqlalchemy.ext.declarative import declarative_base
6
+ from sqlalchemy.orm import sessionmaker, Session
7
+ from pathlib import Path
8
+
9
+ Base = declarative_base()
10
+
11
+ class CachedCommit(Base):
12
+ """Cached commit analysis results."""
13
+ __tablename__ = 'cached_commits'
14
+
15
+ # Primary key
16
+ id = Column(Integer, primary_key=True)
17
+
18
+ # Commit identification
19
+ repo_path = Column(String, nullable=False)
20
+ commit_hash = Column(String, nullable=False)
21
+
22
+ # Commit data
23
+ author_name = Column(String)
24
+ author_email = Column(String)
25
+ message = Column(String)
26
+ timestamp = Column(DateTime)
27
+ branch = Column(String)
28
+ is_merge = Column(Boolean, default=False)
29
+
30
+ # Metrics
31
+ files_changed = Column(Integer)
32
+ insertions = Column(Integer)
33
+ deletions = Column(Integer)
34
+ complexity_delta = Column(Float)
35
+
36
+ # Extracted data
37
+ story_points = Column(Integer, nullable=True)
38
+ ticket_references = Column(JSON) # List of ticket IDs
39
+
40
+ # Cache metadata
41
+ cached_at = Column(DateTime, default=datetime.utcnow)
42
+ cache_version = Column(String, default="1.0")
43
+
44
+ # Indexes for performance
45
+ __table_args__ = (
46
+ Index('idx_repo_commit', 'repo_path', 'commit_hash', unique=True),
47
+ Index('idx_timestamp', 'timestamp'),
48
+ Index('idx_cached_at', 'cached_at'),
49
+ )
50
+
51
+ class DeveloperIdentity(Base):
52
+ """Developer identity mappings."""
53
+ __tablename__ = 'developer_identities'
54
+
55
+ id = Column(Integer, primary_key=True)
56
+ canonical_id = Column(String, unique=True, nullable=False)
57
+ primary_name = Column(String, nullable=False)
58
+ primary_email = Column(String, nullable=False)
59
+ github_username = Column(String, nullable=True)
60
+
61
+ # Statistics
62
+ total_commits = Column(Integer, default=0)
63
+ total_story_points = Column(Integer, default=0)
64
+ first_seen = Column(DateTime, default=datetime.utcnow)
65
+ last_seen = Column(DateTime, default=datetime.utcnow)
66
+
67
+ # Metadata
68
+ created_at = Column(DateTime, default=datetime.utcnow)
69
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
70
+
71
+ __table_args__ = (
72
+ Index('idx_primary_email', 'primary_email'),
73
+ Index('idx_canonical_id', 'canonical_id'),
74
+ )
75
+
76
+ class DeveloperAlias(Base):
77
+ """Alternative names/emails for developers."""
78
+ __tablename__ = 'developer_aliases'
79
+
80
+ id = Column(Integer, primary_key=True)
81
+ canonical_id = Column(String, nullable=False) # Foreign key to DeveloperIdentity
82
+ name = Column(String, nullable=False)
83
+ email = Column(String, nullable=False)
84
+
85
+ __table_args__ = (
86
+ Index('idx_alias_email', 'email'),
87
+ Index('idx_alias_canonical_id', 'canonical_id'),
88
+ Index('idx_name_email', 'name', 'email', unique=True),
89
+ )
90
+
91
+ class PullRequestCache(Base):
92
+ """Cached pull request data."""
93
+ __tablename__ = 'pull_request_cache'
94
+
95
+ id = Column(Integer, primary_key=True)
96
+ repo_path = Column(String, nullable=False)
97
+ pr_number = Column(Integer, nullable=False)
98
+
99
+ # PR data
100
+ title = Column(String)
101
+ description = Column(String)
102
+ author = Column(String)
103
+ created_at = Column(DateTime)
104
+ merged_at = Column(DateTime, nullable=True)
105
+
106
+ # Extracted data
107
+ story_points = Column(Integer, nullable=True)
108
+ labels = Column(JSON) # List of labels
109
+
110
+ # Associated commits
111
+ commit_hashes = Column(JSON) # List of commit hashes
112
+
113
+ # Cache metadata
114
+ cached_at = Column(DateTime, default=datetime.utcnow)
115
+
116
+ __table_args__ = (
117
+ Index('idx_repo_pr', 'repo_path', 'pr_number', unique=True),
118
+ )
119
+
120
+ class IssueCache(Base):
121
+ """Cached issue data from various platforms."""
122
+ __tablename__ = 'issue_cache'
123
+
124
+ id = Column(Integer, primary_key=True)
125
+
126
+ # Issue identification
127
+ platform = Column(String, nullable=False) # 'jira', 'github', 'clickup', 'linear'
128
+ issue_id = Column(String, nullable=False)
129
+ project_key = Column(String, nullable=False)
130
+
131
+ # Issue data
132
+ title = Column(String)
133
+ description = Column(String)
134
+ status = Column(String)
135
+ assignee = Column(String, nullable=True)
136
+ created_at = Column(DateTime)
137
+ updated_at = Column(DateTime)
138
+ resolved_at = Column(DateTime, nullable=True)
139
+
140
+ # Extracted data
141
+ story_points = Column(Integer, nullable=True)
142
+ labels = Column(JSON)
143
+
144
+ # Platform-specific data
145
+ platform_data = Column(JSON) # Additional platform-specific fields
146
+
147
+ # Cache metadata
148
+ cached_at = Column(DateTime, default=datetime.utcnow)
149
+
150
+ __table_args__ = (
151
+ Index('idx_platform_issue', 'platform', 'issue_id', unique=True),
152
+ Index('idx_project_key', 'project_key'),
153
+ )
154
+
155
+ class Database:
156
+ """Database connection manager."""
157
+
158
+ def __init__(self, db_path: Path):
159
+ """Initialize database connection."""
160
+ db_path.parent.mkdir(parents=True, exist_ok=True)
161
+ self.engine = create_engine(f'sqlite:///{db_path}')
162
+ Base.metadata.create_all(self.engine)
163
+ self.SessionLocal = sessionmaker(bind=self.engine)
164
+
165
+ def get_session(self) -> Session:
166
+ """Get a new database session."""
167
+ return self.SessionLocal()
168
+
169
+ def init_db(self):
170
+ """Initialize database tables."""
171
+ Base.metadata.create_all(self.engine)
File without changes