gitflow-analytics 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +22 -0
- gitflow_analytics/_version.py +4 -0
- gitflow_analytics/cli.py +441 -0
- gitflow_analytics/config.py +215 -0
- gitflow_analytics/core/__init__.py +0 -0
- gitflow_analytics/core/analyzer.py +195 -0
- gitflow_analytics/core/branch_mapper.py +221 -0
- gitflow_analytics/core/cache.py +275 -0
- gitflow_analytics/core/identity.py +402 -0
- gitflow_analytics/extractors/__init__.py +0 -0
- gitflow_analytics/extractors/base.py +41 -0
- gitflow_analytics/extractors/story_points.py +128 -0
- gitflow_analytics/extractors/tickets.py +157 -0
- gitflow_analytics/integrations/__init__.py +0 -0
- gitflow_analytics/integrations/github_integration.py +160 -0
- gitflow_analytics/integrations/orchestrator.py +119 -0
- gitflow_analytics/metrics/__init__.py +0 -0
- gitflow_analytics/metrics/dora.py +327 -0
- gitflow_analytics/models/__init__.py +0 -0
- gitflow_analytics/models/database.py +171 -0
- gitflow_analytics/reports/__init__.py +0 -0
- gitflow_analytics/reports/analytics_writer.py +454 -0
- gitflow_analytics/reports/csv_writer.py +311 -0
- gitflow_analytics/reports/narrative_writer.py +263 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +201 -0
- gitflow_analytics-1.0.0.dist-info/RECORD +30 -0
- gitflow_analytics-1.0.0.dist-info/WHEEL +5 -0
- gitflow_analytics-1.0.0.dist-info/entry_points.txt +2 -0
- gitflow_analytics-1.0.0.dist-info/licenses/LICENSE +21 -0
- gitflow_analytics-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"""Developer identity resolution with persistence."""
|
|
2
|
+
import uuid
|
|
3
|
+
import difflib
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from contextlib import contextmanager
|
|
8
|
+
|
|
9
|
+
from sqlalchemy.orm import Session
|
|
10
|
+
from sqlalchemy import and_, or_
|
|
11
|
+
|
|
12
|
+
from ..models.database import Database, DeveloperIdentity, DeveloperAlias
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DeveloperIdentityResolver:
|
|
16
|
+
"""Resolve and normalize developer identities across repositories."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, db_path, similarity_threshold: float = 0.85,
|
|
19
|
+
manual_mappings: Optional[List[Dict[str, Any]]] = None):
|
|
20
|
+
"""Initialize with database for persistence."""
|
|
21
|
+
self.similarity_threshold = similarity_threshold
|
|
22
|
+
self.db = Database(db_path)
|
|
23
|
+
self._cache = {} # In-memory cache for performance
|
|
24
|
+
self._load_cache()
|
|
25
|
+
|
|
26
|
+
# Store manual mappings to apply later
|
|
27
|
+
self.manual_mappings = manual_mappings
|
|
28
|
+
|
|
29
|
+
@contextmanager
|
|
30
|
+
def get_session(self):
|
|
31
|
+
"""Get database session context manager."""
|
|
32
|
+
session = self.db.get_session()
|
|
33
|
+
try:
|
|
34
|
+
yield session
|
|
35
|
+
session.commit()
|
|
36
|
+
except Exception:
|
|
37
|
+
session.rollback()
|
|
38
|
+
raise
|
|
39
|
+
finally:
|
|
40
|
+
session.close()
|
|
41
|
+
|
|
42
|
+
def _load_cache(self):
|
|
43
|
+
"""Load identities into memory cache."""
|
|
44
|
+
with self.get_session() as session:
|
|
45
|
+
# Load all identities
|
|
46
|
+
identities = session.query(DeveloperIdentity).all()
|
|
47
|
+
for identity in identities:
|
|
48
|
+
self._cache[identity.canonical_id] = {
|
|
49
|
+
'primary_name': identity.primary_name,
|
|
50
|
+
'primary_email': identity.primary_email,
|
|
51
|
+
'github_username': identity.github_username
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# Load all aliases
|
|
55
|
+
aliases = session.query(DeveloperAlias).all()
|
|
56
|
+
for alias in aliases:
|
|
57
|
+
key = f"{alias.email.lower()}:{alias.name.lower()}"
|
|
58
|
+
self._cache[key] = alias.canonical_id
|
|
59
|
+
|
|
60
|
+
def _apply_manual_mappings(self, manual_mappings: List[Dict[str, Any]]):
|
|
61
|
+
"""Apply manual identity mappings from configuration."""
|
|
62
|
+
# Clear cache to ensure we get fresh data
|
|
63
|
+
self._cache.clear()
|
|
64
|
+
self._load_cache()
|
|
65
|
+
|
|
66
|
+
with self.get_session() as session:
|
|
67
|
+
for mapping in manual_mappings:
|
|
68
|
+
canonical_email = mapping.get('canonical_email', '').lower().strip()
|
|
69
|
+
aliases = mapping.get('aliases', [])
|
|
70
|
+
|
|
71
|
+
if not canonical_email or not aliases:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
# Find the canonical identity
|
|
75
|
+
canonical_identity = session.query(DeveloperIdentity).filter(
|
|
76
|
+
DeveloperIdentity.primary_email == canonical_email
|
|
77
|
+
).first()
|
|
78
|
+
|
|
79
|
+
if not canonical_identity:
|
|
80
|
+
# Skip if canonical identity doesn't exist yet
|
|
81
|
+
print(f"Warning: Canonical identity not found for email: {canonical_email}")
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
# Process each alias
|
|
85
|
+
for alias_email in aliases:
|
|
86
|
+
alias_email = alias_email.lower().strip()
|
|
87
|
+
|
|
88
|
+
# Check if alias identity exists as a primary identity
|
|
89
|
+
alias_identity = session.query(DeveloperIdentity).filter(
|
|
90
|
+
DeveloperIdentity.primary_email == alias_email
|
|
91
|
+
).first()
|
|
92
|
+
|
|
93
|
+
if alias_identity:
|
|
94
|
+
if alias_identity.canonical_id != canonical_identity.canonical_id:
|
|
95
|
+
# Merge the identities - commit before merge to avoid locks
|
|
96
|
+
session.commit()
|
|
97
|
+
print(f"Merging identity: {alias_identity.primary_name} ({alias_email}) into {canonical_identity.primary_name} ({canonical_email})")
|
|
98
|
+
self.merge_identities(canonical_identity.canonical_id, alias_identity.canonical_id)
|
|
99
|
+
# Refresh session after merge
|
|
100
|
+
session.expire_all()
|
|
101
|
+
else:
|
|
102
|
+
# Just add as an alias if not a primary identity
|
|
103
|
+
existing_alias = session.query(DeveloperAlias).filter(
|
|
104
|
+
and_(
|
|
105
|
+
DeveloperAlias.email == alias_email,
|
|
106
|
+
DeveloperAlias.canonical_id == canonical_identity.canonical_id
|
|
107
|
+
)
|
|
108
|
+
).first()
|
|
109
|
+
|
|
110
|
+
if not existing_alias:
|
|
111
|
+
# Get the name from any existing alias with this email
|
|
112
|
+
name_for_alias = None
|
|
113
|
+
any_alias = session.query(DeveloperAlias).filter(
|
|
114
|
+
DeveloperAlias.email == alias_email
|
|
115
|
+
).first()
|
|
116
|
+
if any_alias:
|
|
117
|
+
name_for_alias = any_alias.name
|
|
118
|
+
else:
|
|
119
|
+
name_for_alias = canonical_identity.primary_name
|
|
120
|
+
|
|
121
|
+
new_alias = DeveloperAlias(
|
|
122
|
+
canonical_id=canonical_identity.canonical_id,
|
|
123
|
+
name=name_for_alias,
|
|
124
|
+
email=alias_email
|
|
125
|
+
)
|
|
126
|
+
session.add(new_alias)
|
|
127
|
+
print(f"Added alias: {alias_email} for {canonical_identity.primary_name}")
|
|
128
|
+
|
|
129
|
+
# Reload cache after all mappings
|
|
130
|
+
self._cache.clear()
|
|
131
|
+
self._load_cache()
|
|
132
|
+
|
|
133
|
+
def resolve_developer(self, name: str, email: str,
|
|
134
|
+
github_username: Optional[str] = None) -> str:
|
|
135
|
+
"""Resolve developer identity and return canonical ID."""
|
|
136
|
+
# Normalize inputs
|
|
137
|
+
name = name.strip()
|
|
138
|
+
email = email.lower().strip()
|
|
139
|
+
|
|
140
|
+
# Check cache first
|
|
141
|
+
cache_key = f"{email}:{name.lower()}"
|
|
142
|
+
if cache_key in self._cache:
|
|
143
|
+
canonical_id = self._cache[cache_key]
|
|
144
|
+
# Update stats
|
|
145
|
+
self._update_developer_stats(canonical_id)
|
|
146
|
+
return canonical_id
|
|
147
|
+
|
|
148
|
+
# Check exact email match in database
|
|
149
|
+
with self.get_session() as session:
|
|
150
|
+
# Check aliases
|
|
151
|
+
alias = session.query(DeveloperAlias).filter(
|
|
152
|
+
DeveloperAlias.email == email
|
|
153
|
+
).first()
|
|
154
|
+
|
|
155
|
+
if alias:
|
|
156
|
+
self._cache[cache_key] = alias.canonical_id
|
|
157
|
+
self._update_developer_stats(alias.canonical_id)
|
|
158
|
+
return alias.canonical_id
|
|
159
|
+
|
|
160
|
+
# Check primary identities
|
|
161
|
+
identity = session.query(DeveloperIdentity).filter(
|
|
162
|
+
DeveloperIdentity.primary_email == email
|
|
163
|
+
).first()
|
|
164
|
+
|
|
165
|
+
if identity:
|
|
166
|
+
# Add as alias if name is different
|
|
167
|
+
if identity.primary_name.lower() != name.lower():
|
|
168
|
+
self._add_alias(identity.canonical_id, name, email)
|
|
169
|
+
self._cache[cache_key] = identity.canonical_id
|
|
170
|
+
return identity.canonical_id
|
|
171
|
+
|
|
172
|
+
# Find similar developer
|
|
173
|
+
best_match = self._find_best_match(name, email)
|
|
174
|
+
|
|
175
|
+
if best_match and best_match[1] >= self.similarity_threshold:
|
|
176
|
+
canonical_id = best_match[0]
|
|
177
|
+
self._add_alias(canonical_id, name, email)
|
|
178
|
+
self._cache[cache_key] = canonical_id
|
|
179
|
+
return canonical_id
|
|
180
|
+
|
|
181
|
+
# Create new identity
|
|
182
|
+
canonical_id = self._create_identity(name, email, github_username)
|
|
183
|
+
self._cache[cache_key] = canonical_id
|
|
184
|
+
return canonical_id
|
|
185
|
+
|
|
186
|
+
def _find_best_match(self, name: str, email: str) -> Optional[Tuple[str, float]]:
|
|
187
|
+
"""Find the best matching existing developer."""
|
|
188
|
+
best_score = 0.0
|
|
189
|
+
best_canonical_id = None
|
|
190
|
+
|
|
191
|
+
name_lower = name.lower().strip()
|
|
192
|
+
email_domain = email.split('@')[1] if '@' in email else ''
|
|
193
|
+
|
|
194
|
+
with self.get_session() as session:
|
|
195
|
+
# Get all identities for comparison
|
|
196
|
+
identities = session.query(DeveloperIdentity).all()
|
|
197
|
+
|
|
198
|
+
for identity in identities:
|
|
199
|
+
score = 0.0
|
|
200
|
+
|
|
201
|
+
# Name similarity (40% weight)
|
|
202
|
+
name_sim = difflib.SequenceMatcher(
|
|
203
|
+
None, name_lower, identity.primary_name.lower()
|
|
204
|
+
).ratio()
|
|
205
|
+
score += name_sim * 0.4
|
|
206
|
+
|
|
207
|
+
# Email domain similarity (30% weight)
|
|
208
|
+
identity_domain = (identity.primary_email.split('@')[1]
|
|
209
|
+
if '@' in identity.primary_email else '')
|
|
210
|
+
if email_domain and email_domain == identity_domain:
|
|
211
|
+
score += 0.3
|
|
212
|
+
|
|
213
|
+
# Check aliases (30% weight)
|
|
214
|
+
aliases = session.query(DeveloperAlias).filter(
|
|
215
|
+
DeveloperAlias.canonical_id == identity.canonical_id
|
|
216
|
+
).all()
|
|
217
|
+
|
|
218
|
+
best_alias_score = 0.0
|
|
219
|
+
for alias in aliases:
|
|
220
|
+
alias_name_sim = difflib.SequenceMatcher(
|
|
221
|
+
None, name_lower, alias.name.lower()
|
|
222
|
+
).ratio()
|
|
223
|
+
|
|
224
|
+
# Bonus for same email domain in aliases
|
|
225
|
+
alias_domain = alias.email.split('@')[1] if '@' in alias.email else ''
|
|
226
|
+
domain_bonus = 0.2 if alias_domain == email_domain else 0.0
|
|
227
|
+
|
|
228
|
+
alias_score = alias_name_sim + domain_bonus
|
|
229
|
+
best_alias_score = max(best_alias_score, alias_score)
|
|
230
|
+
|
|
231
|
+
score += min(best_alias_score * 0.3, 0.3)
|
|
232
|
+
|
|
233
|
+
if score > best_score:
|
|
234
|
+
best_score = score
|
|
235
|
+
best_canonical_id = identity.canonical_id
|
|
236
|
+
|
|
237
|
+
return (best_canonical_id, best_score) if best_canonical_id else None
|
|
238
|
+
|
|
239
|
+
def _create_identity(self, name: str, email: str,
|
|
240
|
+
github_username: Optional[str] = None) -> str:
|
|
241
|
+
"""Create new developer identity."""
|
|
242
|
+
canonical_id = str(uuid.uuid4())
|
|
243
|
+
|
|
244
|
+
with self.get_session() as session:
|
|
245
|
+
identity = DeveloperIdentity(
|
|
246
|
+
canonical_id=canonical_id,
|
|
247
|
+
primary_name=name,
|
|
248
|
+
primary_email=email,
|
|
249
|
+
github_username=github_username,
|
|
250
|
+
total_commits=0,
|
|
251
|
+
total_story_points=0
|
|
252
|
+
)
|
|
253
|
+
session.add(identity)
|
|
254
|
+
|
|
255
|
+
# Update cache
|
|
256
|
+
self._cache[canonical_id] = {
|
|
257
|
+
'primary_name': name,
|
|
258
|
+
'primary_email': email,
|
|
259
|
+
'github_username': github_username
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return canonical_id
|
|
263
|
+
|
|
264
|
+
def _add_alias(self, canonical_id: str, name: str, email: str):
|
|
265
|
+
"""Add alias for existing developer."""
|
|
266
|
+
with self.get_session() as session:
|
|
267
|
+
# Check if alias already exists
|
|
268
|
+
existing = session.query(DeveloperAlias).filter(
|
|
269
|
+
and_(
|
|
270
|
+
DeveloperAlias.canonical_id == canonical_id,
|
|
271
|
+
DeveloperAlias.email == email.lower()
|
|
272
|
+
)
|
|
273
|
+
).first()
|
|
274
|
+
|
|
275
|
+
if not existing:
|
|
276
|
+
alias = DeveloperAlias(
|
|
277
|
+
canonical_id=canonical_id,
|
|
278
|
+
name=name,
|
|
279
|
+
email=email.lower()
|
|
280
|
+
)
|
|
281
|
+
session.add(alias)
|
|
282
|
+
|
|
283
|
+
def _update_developer_stats(self, canonical_id: str):
|
|
284
|
+
"""Update developer statistics."""
|
|
285
|
+
with self.get_session() as session:
|
|
286
|
+
identity = session.query(DeveloperIdentity).filter(
|
|
287
|
+
DeveloperIdentity.canonical_id == canonical_id
|
|
288
|
+
).first()
|
|
289
|
+
|
|
290
|
+
if identity:
|
|
291
|
+
identity.last_seen = datetime.utcnow()
|
|
292
|
+
|
|
293
|
+
def merge_identities(self, canonical_id1: str, canonical_id2: str):
|
|
294
|
+
"""Merge two developer identities."""
|
|
295
|
+
# First, add the alias outside of the main merge transaction
|
|
296
|
+
with self.get_session() as session:
|
|
297
|
+
identity2 = session.query(DeveloperIdentity).filter(
|
|
298
|
+
DeveloperIdentity.canonical_id == canonical_id2
|
|
299
|
+
).first()
|
|
300
|
+
if identity2:
|
|
301
|
+
identity2_name = identity2.primary_name
|
|
302
|
+
identity2_email = identity2.primary_email
|
|
303
|
+
|
|
304
|
+
# Add identity2's primary as alias to identity1 first
|
|
305
|
+
self._add_alias(canonical_id1, identity2_name, identity2_email)
|
|
306
|
+
|
|
307
|
+
# Now do the merge in a separate transaction
|
|
308
|
+
with self.get_session() as session:
|
|
309
|
+
# Get both identities fresh
|
|
310
|
+
identity1 = session.query(DeveloperIdentity).filter(
|
|
311
|
+
DeveloperIdentity.canonical_id == canonical_id1
|
|
312
|
+
).first()
|
|
313
|
+
identity2 = session.query(DeveloperIdentity).filter(
|
|
314
|
+
DeveloperIdentity.canonical_id == canonical_id2
|
|
315
|
+
).first()
|
|
316
|
+
|
|
317
|
+
if not identity1 or not identity2:
|
|
318
|
+
raise ValueError("One or both identities not found")
|
|
319
|
+
|
|
320
|
+
# Keep identity1, merge identity2 into it
|
|
321
|
+
identity1.total_commits += identity2.total_commits
|
|
322
|
+
identity1.total_story_points += identity2.total_story_points
|
|
323
|
+
identity1.first_seen = min(identity1.first_seen, identity2.first_seen)
|
|
324
|
+
identity1.last_seen = max(identity1.last_seen, identity2.last_seen)
|
|
325
|
+
|
|
326
|
+
# Move all aliases from identity2 to identity1
|
|
327
|
+
aliases = session.query(DeveloperAlias).filter(
|
|
328
|
+
DeveloperAlias.canonical_id == canonical_id2
|
|
329
|
+
).all()
|
|
330
|
+
|
|
331
|
+
for alias in aliases:
|
|
332
|
+
alias.canonical_id = canonical_id1
|
|
333
|
+
|
|
334
|
+
# Delete identity2
|
|
335
|
+
session.delete(identity2)
|
|
336
|
+
|
|
337
|
+
# Clear cache to force reload
|
|
338
|
+
self._cache.clear()
|
|
339
|
+
self._load_cache()
|
|
340
|
+
|
|
341
|
+
def get_developer_stats(self) -> List[Dict[str, Any]]:
|
|
342
|
+
"""Get statistics for all developers."""
|
|
343
|
+
stats = []
|
|
344
|
+
|
|
345
|
+
with self.get_session() as session:
|
|
346
|
+
identities = session.query(DeveloperIdentity).all()
|
|
347
|
+
|
|
348
|
+
for identity in identities:
|
|
349
|
+
# Count aliases
|
|
350
|
+
alias_count = session.query(DeveloperAlias).filter(
|
|
351
|
+
DeveloperAlias.canonical_id == identity.canonical_id
|
|
352
|
+
).count()
|
|
353
|
+
|
|
354
|
+
stats.append({
|
|
355
|
+
'canonical_id': identity.canonical_id,
|
|
356
|
+
'primary_name': identity.primary_name,
|
|
357
|
+
'primary_email': identity.primary_email,
|
|
358
|
+
'github_username': identity.github_username,
|
|
359
|
+
'total_commits': identity.total_commits,
|
|
360
|
+
'total_story_points': identity.total_story_points,
|
|
361
|
+
'alias_count': alias_count,
|
|
362
|
+
'first_seen': identity.first_seen,
|
|
363
|
+
'last_seen': identity.last_seen
|
|
364
|
+
})
|
|
365
|
+
|
|
366
|
+
# Sort by total commits
|
|
367
|
+
return sorted(stats, key=lambda x: x['total_commits'], reverse=True)
|
|
368
|
+
|
|
369
|
+
def update_commit_stats(self, commits: List[Dict[str, Any]]):
|
|
370
|
+
"""Update developer statistics based on commits."""
|
|
371
|
+
# Aggregate stats by canonical ID
|
|
372
|
+
stats_by_dev = defaultdict(lambda: {'commits': 0, 'story_points': 0})
|
|
373
|
+
|
|
374
|
+
for commit in commits:
|
|
375
|
+
canonical_id = self.resolve_developer(
|
|
376
|
+
commit['author_name'],
|
|
377
|
+
commit['author_email']
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
stats_by_dev[canonical_id]['commits'] += 1
|
|
381
|
+
stats_by_dev[canonical_id]['story_points'] += commit.get('story_points', 0) or 0
|
|
382
|
+
|
|
383
|
+
# Update database
|
|
384
|
+
with self.get_session() as session:
|
|
385
|
+
for canonical_id, stats in stats_by_dev.items():
|
|
386
|
+
identity = session.query(DeveloperIdentity).filter(
|
|
387
|
+
DeveloperIdentity.canonical_id == canonical_id
|
|
388
|
+
).first()
|
|
389
|
+
|
|
390
|
+
if identity:
|
|
391
|
+
identity.total_commits += stats['commits']
|
|
392
|
+
identity.total_story_points += stats['story_points']
|
|
393
|
+
identity.last_seen = datetime.utcnow()
|
|
394
|
+
|
|
395
|
+
# Apply manual mappings after all identities are created
|
|
396
|
+
if self.manual_mappings:
|
|
397
|
+
self.apply_manual_mappings()
|
|
398
|
+
|
|
399
|
+
def apply_manual_mappings(self):
|
|
400
|
+
"""Apply manual mappings - can be called explicitly after identities are created."""
|
|
401
|
+
if self.manual_mappings:
|
|
402
|
+
self._apply_manual_mappings(self.manual_mappings)
|
|
File without changes
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Base classes for pluggable extractors."""
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Any, Optional, List, Dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ExtractorBase(ABC):
|
|
7
|
+
"""Base class for all extractors."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def extract_from_text(self, text: str) -> Any:
|
|
11
|
+
"""Extract information from text."""
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StoryPointExtractorBase(ExtractorBase):
|
|
16
|
+
"""Base class for story point extractors."""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def extract_from_text(self, text: str) -> Optional[int]:
|
|
20
|
+
"""Extract story points from text."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def extract_from_pr(self, pr_data: Dict[str, Any],
|
|
25
|
+
commit_messages: Optional[List[str]] = None) -> Optional[int]:
|
|
26
|
+
"""Extract story points from pull request."""
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TicketExtractorBase(ExtractorBase):
|
|
31
|
+
"""Base class for ticket extractors."""
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def extract_from_text(self, text: str) -> List[Dict[str, str]]:
|
|
35
|
+
"""Extract ticket references from text."""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def extract_by_platform(self, text: str) -> Dict[str, List[str]]:
|
|
40
|
+
"""Extract tickets grouped by platform."""
|
|
41
|
+
pass
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Story point extraction from commits and pull requests."""
|
|
2
|
+
import re
|
|
3
|
+
from typing import Optional, List, Dict, Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StoryPointExtractor:
|
|
7
|
+
"""Extract story points from text using configurable patterns."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, patterns: Optional[List[str]] = None):
|
|
10
|
+
"""Initialize with extraction patterns."""
|
|
11
|
+
if patterns is None:
|
|
12
|
+
patterns = [
|
|
13
|
+
r'(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)', # SP: 5, story points = 3
|
|
14
|
+
r'\[(\d+)\s*(?:sp|pts?)\]', # [3sp], [5 pts]
|
|
15
|
+
r'#(\d+)sp', # #3sp
|
|
16
|
+
r'estimate:\s*(\d+)', # estimate: 5
|
|
17
|
+
r'\bSP(\d+)\b', # SP5, SP13
|
|
18
|
+
r'points?:\s*(\d+)', # points: 8
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
self.patterns = [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
|
|
22
|
+
|
|
23
|
+
def extract_from_text(self, text: str) -> Optional[int]:
|
|
24
|
+
"""Extract story points from text."""
|
|
25
|
+
if not text:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
for pattern in self.patterns:
|
|
29
|
+
match = pattern.search(text)
|
|
30
|
+
if match:
|
|
31
|
+
try:
|
|
32
|
+
points = int(match.group(1))
|
|
33
|
+
# Sanity check - story points should be reasonable
|
|
34
|
+
if 0 < points <= 100:
|
|
35
|
+
return points
|
|
36
|
+
except (ValueError, IndexError):
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
def extract_from_pr(self, pr_data: Dict[str, Any],
|
|
42
|
+
commit_messages: Optional[List[str]] = None) -> Optional[int]:
|
|
43
|
+
"""Extract story points from PR with fallback to commits."""
|
|
44
|
+
# Try PR description first (most authoritative)
|
|
45
|
+
points = self.extract_from_text(pr_data.get('description', ''))
|
|
46
|
+
if points:
|
|
47
|
+
return points
|
|
48
|
+
|
|
49
|
+
# Try PR title
|
|
50
|
+
points = self.extract_from_text(pr_data.get('title', ''))
|
|
51
|
+
if points:
|
|
52
|
+
return points
|
|
53
|
+
|
|
54
|
+
# Try PR body (if different from description)
|
|
55
|
+
if 'body' in pr_data:
|
|
56
|
+
points = self.extract_from_text(pr_data['body'])
|
|
57
|
+
if points:
|
|
58
|
+
return points
|
|
59
|
+
|
|
60
|
+
# Fallback to commit messages
|
|
61
|
+
if commit_messages:
|
|
62
|
+
commit_points = []
|
|
63
|
+
for message in commit_messages:
|
|
64
|
+
points = self.extract_from_text(message)
|
|
65
|
+
if points:
|
|
66
|
+
commit_points.append(points)
|
|
67
|
+
|
|
68
|
+
if commit_points:
|
|
69
|
+
# Use the most common value or max if no consensus
|
|
70
|
+
from collections import Counter
|
|
71
|
+
point_counts = Counter(commit_points)
|
|
72
|
+
most_common = point_counts.most_common(1)
|
|
73
|
+
if most_common:
|
|
74
|
+
return most_common[0][0]
|
|
75
|
+
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
def aggregate_story_points(self, prs: List[Dict[str, Any]],
|
|
79
|
+
commits: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
80
|
+
"""Aggregate story points from PRs and commits."""
|
|
81
|
+
# Map commits to PRs
|
|
82
|
+
pr_by_commit = {}
|
|
83
|
+
for pr in prs:
|
|
84
|
+
for commit_hash in pr.get('commit_hashes', []):
|
|
85
|
+
pr_by_commit[commit_hash] = pr
|
|
86
|
+
|
|
87
|
+
# Track which commits are associated with PRs
|
|
88
|
+
pr_commits = set(pr_by_commit.keys())
|
|
89
|
+
|
|
90
|
+
# Aggregate results
|
|
91
|
+
results = {
|
|
92
|
+
'total_story_points': 0,
|
|
93
|
+
'pr_story_points': 0,
|
|
94
|
+
'commit_story_points': 0,
|
|
95
|
+
'orphan_commits': [], # Commits without PRs
|
|
96
|
+
'unestimated_prs': [] # PRs without story points
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# Process PRs
|
|
100
|
+
for pr in prs:
|
|
101
|
+
pr_points = pr.get('story_points', 0)
|
|
102
|
+
if pr_points:
|
|
103
|
+
results['pr_story_points'] += pr_points
|
|
104
|
+
results['total_story_points'] += pr_points
|
|
105
|
+
else:
|
|
106
|
+
results['unestimated_prs'].append({
|
|
107
|
+
'number': pr['number'],
|
|
108
|
+
'title': pr['title']
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
# Process commits not in PRs
|
|
112
|
+
for commit in commits:
|
|
113
|
+
if commit['hash'] not in pr_commits:
|
|
114
|
+
commit_points = commit.get('story_points', 0)
|
|
115
|
+
if commit_points:
|
|
116
|
+
results['commit_story_points'] += commit_points
|
|
117
|
+
results['total_story_points'] += commit_points
|
|
118
|
+
|
|
119
|
+
# Track significant orphan commits
|
|
120
|
+
if commit['files_changed'] > 5 or commit['insertions'] > 100:
|
|
121
|
+
results['orphan_commits'].append({
|
|
122
|
+
'hash': commit['hash'][:7],
|
|
123
|
+
'message': commit['message'].split('\n')[0][:80],
|
|
124
|
+
'story_points': commit_points,
|
|
125
|
+
'files_changed': commit['files_changed']
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
return results
|