gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -9
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +691 -243
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +389 -96
- gitflow_analytics/core/analyzer.py +175 -78
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +242 -173
- gitflow_analytics/core/identity.py +214 -178
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +111 -88
- gitflow_analytics/integrations/github_integration.py +91 -77
- gitflow_analytics/integrations/jira_integration.py +284 -0
- gitflow_analytics/integrations/orchestrator.py +99 -72
- gitflow_analytics/metrics/dora.py +183 -179
- gitflow_analytics/models/database.py +191 -54
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +25 -8
- gitflow_analytics/reports/csv_writer.py +60 -32
- gitflow_analytics/reports/narrative_writer.py +21 -15
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,31 +1,35 @@
|
|
|
1
1
|
"""Developer identity resolution with persistence."""
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import difflib
|
|
4
|
-
|
|
5
|
-
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
4
|
+
import uuid
|
|
6
5
|
from collections import defaultdict
|
|
7
6
|
from contextlib import contextmanager
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any, Optional
|
|
8
9
|
|
|
9
|
-
from sqlalchemy
|
|
10
|
-
from sqlalchemy import and_, or_
|
|
10
|
+
from sqlalchemy import and_
|
|
11
11
|
|
|
12
|
-
from ..models.database import Database,
|
|
12
|
+
from ..models.database import Database, DeveloperAlias, DeveloperIdentity
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class DeveloperIdentityResolver:
|
|
16
16
|
"""Resolve and normalize developer identities across repositories."""
|
|
17
|
-
|
|
18
|
-
def __init__(
|
|
19
|
-
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
db_path: str,
|
|
21
|
+
similarity_threshold: float = 0.85,
|
|
22
|
+
manual_mappings: Optional[list[dict[str, Any]]] = None,
|
|
23
|
+
) -> None:
|
|
20
24
|
"""Initialize with database for persistence."""
|
|
21
25
|
self.similarity_threshold = similarity_threshold
|
|
22
26
|
self.db = Database(db_path)
|
|
23
|
-
self._cache = {} # In-memory cache for performance
|
|
27
|
+
self._cache: dict[str, str] = {} # In-memory cache for performance
|
|
24
28
|
self._load_cache()
|
|
25
|
-
|
|
29
|
+
|
|
26
30
|
# Store manual mappings to apply later
|
|
27
31
|
self.manual_mappings = manual_mappings
|
|
28
|
-
|
|
32
|
+
|
|
29
33
|
@contextmanager
|
|
30
34
|
def get_session(self):
|
|
31
35
|
"""Get database session context manager."""
|
|
@@ -38,105 +42,122 @@ class DeveloperIdentityResolver:
|
|
|
38
42
|
raise
|
|
39
43
|
finally:
|
|
40
44
|
session.close()
|
|
41
|
-
|
|
42
|
-
def _load_cache(self):
|
|
45
|
+
|
|
46
|
+
def _load_cache(self) -> None:
|
|
43
47
|
"""Load identities into memory cache."""
|
|
44
48
|
with self.get_session() as session:
|
|
45
49
|
# Load all identities
|
|
46
50
|
identities = session.query(DeveloperIdentity).all()
|
|
47
51
|
for identity in identities:
|
|
48
52
|
self._cache[identity.canonical_id] = {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
"primary_name": identity.primary_name,
|
|
54
|
+
"primary_email": identity.primary_email,
|
|
55
|
+
"github_username": identity.github_username,
|
|
52
56
|
}
|
|
53
|
-
|
|
57
|
+
|
|
54
58
|
# Load all aliases
|
|
55
59
|
aliases = session.query(DeveloperAlias).all()
|
|
56
60
|
for alias in aliases:
|
|
57
61
|
key = f"{alias.email.lower()}:{alias.name.lower()}"
|
|
58
62
|
self._cache[key] = alias.canonical_id
|
|
59
|
-
|
|
60
|
-
def _apply_manual_mappings(self, manual_mappings:
|
|
63
|
+
|
|
64
|
+
def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
|
|
61
65
|
"""Apply manual identity mappings from configuration."""
|
|
62
66
|
# Clear cache to ensure we get fresh data
|
|
63
67
|
self._cache.clear()
|
|
64
68
|
self._load_cache()
|
|
65
|
-
|
|
69
|
+
|
|
66
70
|
with self.get_session() as session:
|
|
67
71
|
for mapping in manual_mappings:
|
|
68
|
-
canonical_email = mapping.get(
|
|
69
|
-
aliases = mapping.get(
|
|
70
|
-
|
|
72
|
+
canonical_email = mapping.get("canonical_email", "").lower().strip()
|
|
73
|
+
aliases = mapping.get("aliases", [])
|
|
74
|
+
|
|
71
75
|
if not canonical_email or not aliases:
|
|
72
76
|
continue
|
|
73
|
-
|
|
77
|
+
|
|
74
78
|
# Find the canonical identity
|
|
75
|
-
canonical_identity =
|
|
76
|
-
DeveloperIdentity
|
|
77
|
-
|
|
78
|
-
|
|
79
|
+
canonical_identity = (
|
|
80
|
+
session.query(DeveloperIdentity)
|
|
81
|
+
.filter(DeveloperIdentity.primary_email == canonical_email)
|
|
82
|
+
.first()
|
|
83
|
+
)
|
|
84
|
+
|
|
79
85
|
if not canonical_identity:
|
|
80
86
|
# Skip if canonical identity doesn't exist yet
|
|
81
87
|
print(f"Warning: Canonical identity not found for email: {canonical_email}")
|
|
82
88
|
continue
|
|
83
|
-
|
|
89
|
+
|
|
84
90
|
# Process each alias
|
|
85
91
|
for alias_email in aliases:
|
|
86
92
|
alias_email = alias_email.lower().strip()
|
|
87
|
-
|
|
93
|
+
|
|
88
94
|
# Check if alias identity exists as a primary identity
|
|
89
|
-
alias_identity =
|
|
90
|
-
DeveloperIdentity
|
|
91
|
-
|
|
92
|
-
|
|
95
|
+
alias_identity = (
|
|
96
|
+
session.query(DeveloperIdentity)
|
|
97
|
+
.filter(DeveloperIdentity.primary_email == alias_email)
|
|
98
|
+
.first()
|
|
99
|
+
)
|
|
100
|
+
|
|
93
101
|
if alias_identity:
|
|
94
102
|
if alias_identity.canonical_id != canonical_identity.canonical_id:
|
|
95
103
|
# Merge the identities - commit before merge to avoid locks
|
|
96
104
|
session.commit()
|
|
97
|
-
print(
|
|
98
|
-
|
|
105
|
+
print(
|
|
106
|
+
f"Merging identity: {alias_identity.primary_name} ({alias_email}) into {canonical_identity.primary_name} ({canonical_email})"
|
|
107
|
+
)
|
|
108
|
+
self.merge_identities(
|
|
109
|
+
canonical_identity.canonical_id, alias_identity.canonical_id
|
|
110
|
+
)
|
|
99
111
|
# Refresh session after merge
|
|
100
112
|
session.expire_all()
|
|
101
113
|
else:
|
|
102
114
|
# Just add as an alias if not a primary identity
|
|
103
|
-
existing_alias =
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
115
|
+
existing_alias = (
|
|
116
|
+
session.query(DeveloperAlias)
|
|
117
|
+
.filter(
|
|
118
|
+
and_(
|
|
119
|
+
DeveloperAlias.email == alias_email,
|
|
120
|
+
DeveloperAlias.canonical_id == canonical_identity.canonical_id,
|
|
121
|
+
)
|
|
107
122
|
)
|
|
108
|
-
|
|
109
|
-
|
|
123
|
+
.first()
|
|
124
|
+
)
|
|
125
|
+
|
|
110
126
|
if not existing_alias:
|
|
111
127
|
# Get the name from any existing alias with this email
|
|
112
128
|
name_for_alias = None
|
|
113
|
-
any_alias =
|
|
114
|
-
DeveloperAlias
|
|
115
|
-
|
|
129
|
+
any_alias = (
|
|
130
|
+
session.query(DeveloperAlias)
|
|
131
|
+
.filter(DeveloperAlias.email == alias_email)
|
|
132
|
+
.first()
|
|
133
|
+
)
|
|
116
134
|
if any_alias:
|
|
117
135
|
name_for_alias = any_alias.name
|
|
118
136
|
else:
|
|
119
137
|
name_for_alias = canonical_identity.primary_name
|
|
120
|
-
|
|
138
|
+
|
|
121
139
|
new_alias = DeveloperAlias(
|
|
122
140
|
canonical_id=canonical_identity.canonical_id,
|
|
123
141
|
name=name_for_alias,
|
|
124
|
-
email=alias_email
|
|
142
|
+
email=alias_email,
|
|
125
143
|
)
|
|
126
144
|
session.add(new_alias)
|
|
127
|
-
print(
|
|
128
|
-
|
|
145
|
+
print(
|
|
146
|
+
f"Added alias: {alias_email} for {canonical_identity.primary_name}"
|
|
147
|
+
)
|
|
148
|
+
|
|
129
149
|
# Reload cache after all mappings
|
|
130
150
|
self._cache.clear()
|
|
131
151
|
self._load_cache()
|
|
132
|
-
|
|
133
|
-
def resolve_developer(
|
|
134
|
-
|
|
152
|
+
|
|
153
|
+
def resolve_developer(
|
|
154
|
+
self, name: str, email: str, github_username: Optional[str] = None
|
|
155
|
+
) -> str:
|
|
135
156
|
"""Resolve developer identity and return canonical ID."""
|
|
136
157
|
# Normalize inputs
|
|
137
158
|
name = name.strip()
|
|
138
159
|
email = email.lower().strip()
|
|
139
|
-
|
|
160
|
+
|
|
140
161
|
# Check cache first
|
|
141
162
|
cache_key = f"{email}:{name.lower()}"
|
|
142
163
|
if cache_key in self._cache:
|
|
@@ -144,103 +165,105 @@ class DeveloperIdentityResolver:
|
|
|
144
165
|
# Update stats
|
|
145
166
|
self._update_developer_stats(canonical_id)
|
|
146
167
|
return canonical_id
|
|
147
|
-
|
|
168
|
+
|
|
148
169
|
# Check exact email match in database
|
|
149
170
|
with self.get_session() as session:
|
|
150
171
|
# Check aliases
|
|
151
|
-
alias = session.query(DeveloperAlias).filter(
|
|
152
|
-
|
|
153
|
-
).first()
|
|
154
|
-
|
|
172
|
+
alias = session.query(DeveloperAlias).filter(DeveloperAlias.email == email).first()
|
|
173
|
+
|
|
155
174
|
if alias:
|
|
156
175
|
self._cache[cache_key] = alias.canonical_id
|
|
157
176
|
self._update_developer_stats(alias.canonical_id)
|
|
158
177
|
return alias.canonical_id
|
|
159
|
-
|
|
178
|
+
|
|
160
179
|
# Check primary identities
|
|
161
|
-
identity =
|
|
162
|
-
DeveloperIdentity
|
|
163
|
-
|
|
164
|
-
|
|
180
|
+
identity = (
|
|
181
|
+
session.query(DeveloperIdentity)
|
|
182
|
+
.filter(DeveloperIdentity.primary_email == email)
|
|
183
|
+
.first()
|
|
184
|
+
)
|
|
185
|
+
|
|
165
186
|
if identity:
|
|
166
187
|
# Add as alias if name is different
|
|
167
188
|
if identity.primary_name.lower() != name.lower():
|
|
168
189
|
self._add_alias(identity.canonical_id, name, email)
|
|
169
190
|
self._cache[cache_key] = identity.canonical_id
|
|
170
191
|
return identity.canonical_id
|
|
171
|
-
|
|
192
|
+
|
|
172
193
|
# Find similar developer
|
|
173
194
|
best_match = self._find_best_match(name, email)
|
|
174
|
-
|
|
195
|
+
|
|
175
196
|
if best_match and best_match[1] >= self.similarity_threshold:
|
|
176
197
|
canonical_id = best_match[0]
|
|
177
198
|
self._add_alias(canonical_id, name, email)
|
|
178
199
|
self._cache[cache_key] = canonical_id
|
|
179
200
|
return canonical_id
|
|
180
|
-
|
|
201
|
+
|
|
181
202
|
# Create new identity
|
|
182
203
|
canonical_id = self._create_identity(name, email, github_username)
|
|
183
204
|
self._cache[cache_key] = canonical_id
|
|
184
205
|
return canonical_id
|
|
185
|
-
|
|
186
|
-
def _find_best_match(self, name: str, email: str) -> Optional[
|
|
206
|
+
|
|
207
|
+
def _find_best_match(self, name: str, email: str) -> Optional[tuple[str, float]]:
|
|
187
208
|
"""Find the best matching existing developer."""
|
|
188
209
|
best_score = 0.0
|
|
189
210
|
best_canonical_id = None
|
|
190
|
-
|
|
211
|
+
|
|
191
212
|
name_lower = name.lower().strip()
|
|
192
|
-
email_domain = email.split(
|
|
193
|
-
|
|
213
|
+
email_domain = email.split("@")[1] if "@" in email else ""
|
|
214
|
+
|
|
194
215
|
with self.get_session() as session:
|
|
195
216
|
# Get all identities for comparison
|
|
196
217
|
identities = session.query(DeveloperIdentity).all()
|
|
197
|
-
|
|
218
|
+
|
|
198
219
|
for identity in identities:
|
|
199
220
|
score = 0.0
|
|
200
|
-
|
|
221
|
+
|
|
201
222
|
# Name similarity (40% weight)
|
|
202
223
|
name_sim = difflib.SequenceMatcher(
|
|
203
224
|
None, name_lower, identity.primary_name.lower()
|
|
204
225
|
).ratio()
|
|
205
226
|
score += name_sim * 0.4
|
|
206
|
-
|
|
227
|
+
|
|
207
228
|
# Email domain similarity (30% weight)
|
|
208
|
-
identity_domain = (
|
|
209
|
-
|
|
229
|
+
identity_domain = (
|
|
230
|
+
identity.primary_email.split("@")[1] if "@" in identity.primary_email else ""
|
|
231
|
+
)
|
|
210
232
|
if email_domain and email_domain == identity_domain:
|
|
211
233
|
score += 0.3
|
|
212
|
-
|
|
234
|
+
|
|
213
235
|
# Check aliases (30% weight)
|
|
214
|
-
aliases =
|
|
215
|
-
DeveloperAlias
|
|
216
|
-
|
|
217
|
-
|
|
236
|
+
aliases = (
|
|
237
|
+
session.query(DeveloperAlias)
|
|
238
|
+
.filter(DeveloperAlias.canonical_id == identity.canonical_id)
|
|
239
|
+
.all()
|
|
240
|
+
)
|
|
241
|
+
|
|
218
242
|
best_alias_score = 0.0
|
|
219
243
|
for alias in aliases:
|
|
220
244
|
alias_name_sim = difflib.SequenceMatcher(
|
|
221
245
|
None, name_lower, alias.name.lower()
|
|
222
246
|
).ratio()
|
|
223
|
-
|
|
247
|
+
|
|
224
248
|
# Bonus for same email domain in aliases
|
|
225
|
-
alias_domain = alias.email.split(
|
|
249
|
+
alias_domain = alias.email.split("@")[1] if "@" in alias.email else ""
|
|
226
250
|
domain_bonus = 0.2 if alias_domain == email_domain else 0.0
|
|
227
|
-
|
|
251
|
+
|
|
228
252
|
alias_score = alias_name_sim + domain_bonus
|
|
229
253
|
best_alias_score = max(best_alias_score, alias_score)
|
|
230
|
-
|
|
254
|
+
|
|
231
255
|
score += min(best_alias_score * 0.3, 0.3)
|
|
232
|
-
|
|
256
|
+
|
|
233
257
|
if score > best_score:
|
|
234
258
|
best_score = score
|
|
235
259
|
best_canonical_id = identity.canonical_id
|
|
236
|
-
|
|
260
|
+
|
|
237
261
|
return (best_canonical_id, best_score) if best_canonical_id else None
|
|
238
|
-
|
|
239
|
-
def _create_identity(self, name: str, email: str,
|
|
240
|
-
github_username: Optional[str] = None) -> str:
|
|
262
|
+
|
|
263
|
+
def _create_identity(self, name: str, email: str, github_username: Optional[str] = None) -> str:
|
|
241
264
|
"""Create new developer identity."""
|
|
242
265
|
canonical_id = str(uuid.uuid4())
|
|
243
|
-
|
|
266
|
+
|
|
244
267
|
with self.get_session() as session:
|
|
245
268
|
identity = DeveloperIdentity(
|
|
246
269
|
canonical_id=canonical_id,
|
|
@@ -248,155 +271,168 @@ class DeveloperIdentityResolver:
|
|
|
248
271
|
primary_email=email,
|
|
249
272
|
github_username=github_username,
|
|
250
273
|
total_commits=0,
|
|
251
|
-
total_story_points=0
|
|
274
|
+
total_story_points=0,
|
|
252
275
|
)
|
|
253
276
|
session.add(identity)
|
|
254
|
-
|
|
277
|
+
|
|
255
278
|
# Update cache
|
|
256
279
|
self._cache[canonical_id] = {
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
280
|
+
"primary_name": name,
|
|
281
|
+
"primary_email": email,
|
|
282
|
+
"github_username": github_username,
|
|
260
283
|
}
|
|
261
|
-
|
|
284
|
+
|
|
262
285
|
return canonical_id
|
|
263
|
-
|
|
286
|
+
|
|
264
287
|
def _add_alias(self, canonical_id: str, name: str, email: str):
|
|
265
288
|
"""Add alias for existing developer."""
|
|
266
289
|
with self.get_session() as session:
|
|
267
290
|
# Check if alias already exists
|
|
268
|
-
existing =
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
291
|
+
existing = (
|
|
292
|
+
session.query(DeveloperAlias)
|
|
293
|
+
.filter(
|
|
294
|
+
and_(
|
|
295
|
+
DeveloperAlias.canonical_id == canonical_id,
|
|
296
|
+
DeveloperAlias.email == email.lower(),
|
|
297
|
+
)
|
|
272
298
|
)
|
|
273
|
-
|
|
274
|
-
|
|
299
|
+
.first()
|
|
300
|
+
)
|
|
301
|
+
|
|
275
302
|
if not existing:
|
|
276
|
-
alias = DeveloperAlias(
|
|
277
|
-
canonical_id=canonical_id,
|
|
278
|
-
name=name,
|
|
279
|
-
email=email.lower()
|
|
280
|
-
)
|
|
303
|
+
alias = DeveloperAlias(canonical_id=canonical_id, name=name, email=email.lower())
|
|
281
304
|
session.add(alias)
|
|
282
|
-
|
|
305
|
+
|
|
283
306
|
def _update_developer_stats(self, canonical_id: str):
|
|
284
307
|
"""Update developer statistics."""
|
|
285
308
|
with self.get_session() as session:
|
|
286
|
-
identity =
|
|
287
|
-
DeveloperIdentity
|
|
288
|
-
|
|
289
|
-
|
|
309
|
+
identity = (
|
|
310
|
+
session.query(DeveloperIdentity)
|
|
311
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
312
|
+
.first()
|
|
313
|
+
)
|
|
314
|
+
|
|
290
315
|
if identity:
|
|
291
316
|
identity.last_seen = datetime.utcnow()
|
|
292
|
-
|
|
317
|
+
|
|
293
318
|
def merge_identities(self, canonical_id1: str, canonical_id2: str):
|
|
294
319
|
"""Merge two developer identities."""
|
|
295
320
|
# First, add the alias outside of the main merge transaction
|
|
296
321
|
with self.get_session() as session:
|
|
297
|
-
identity2 =
|
|
298
|
-
DeveloperIdentity
|
|
299
|
-
|
|
322
|
+
identity2 = (
|
|
323
|
+
session.query(DeveloperIdentity)
|
|
324
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id2)
|
|
325
|
+
.first()
|
|
326
|
+
)
|
|
300
327
|
if identity2:
|
|
301
328
|
identity2_name = identity2.primary_name
|
|
302
329
|
identity2_email = identity2.primary_email
|
|
303
|
-
|
|
330
|
+
|
|
304
331
|
# Add identity2's primary as alias to identity1 first
|
|
305
332
|
self._add_alias(canonical_id1, identity2_name, identity2_email)
|
|
306
|
-
|
|
333
|
+
|
|
307
334
|
# Now do the merge in a separate transaction
|
|
308
335
|
with self.get_session() as session:
|
|
309
336
|
# Get both identities fresh
|
|
310
|
-
identity1 =
|
|
311
|
-
DeveloperIdentity
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
337
|
+
identity1 = (
|
|
338
|
+
session.query(DeveloperIdentity)
|
|
339
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id1)
|
|
340
|
+
.first()
|
|
341
|
+
)
|
|
342
|
+
identity2 = (
|
|
343
|
+
session.query(DeveloperIdentity)
|
|
344
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id2)
|
|
345
|
+
.first()
|
|
346
|
+
)
|
|
347
|
+
|
|
317
348
|
if not identity1 or not identity2:
|
|
318
349
|
raise ValueError("One or both identities not found")
|
|
319
|
-
|
|
350
|
+
|
|
320
351
|
# Keep identity1, merge identity2 into it
|
|
321
352
|
identity1.total_commits += identity2.total_commits
|
|
322
353
|
identity1.total_story_points += identity2.total_story_points
|
|
323
354
|
identity1.first_seen = min(identity1.first_seen, identity2.first_seen)
|
|
324
355
|
identity1.last_seen = max(identity1.last_seen, identity2.last_seen)
|
|
325
|
-
|
|
356
|
+
|
|
326
357
|
# Move all aliases from identity2 to identity1
|
|
327
|
-
aliases =
|
|
328
|
-
DeveloperAlias
|
|
329
|
-
|
|
330
|
-
|
|
358
|
+
aliases = (
|
|
359
|
+
session.query(DeveloperAlias)
|
|
360
|
+
.filter(DeveloperAlias.canonical_id == canonical_id2)
|
|
361
|
+
.all()
|
|
362
|
+
)
|
|
363
|
+
|
|
331
364
|
for alias in aliases:
|
|
332
365
|
alias.canonical_id = canonical_id1
|
|
333
|
-
|
|
366
|
+
|
|
334
367
|
# Delete identity2
|
|
335
368
|
session.delete(identity2)
|
|
336
|
-
|
|
369
|
+
|
|
337
370
|
# Clear cache to force reload
|
|
338
371
|
self._cache.clear()
|
|
339
372
|
self._load_cache()
|
|
340
|
-
|
|
341
|
-
def get_developer_stats(self) ->
|
|
373
|
+
|
|
374
|
+
def get_developer_stats(self) -> list[dict[str, Any]]:
|
|
342
375
|
"""Get statistics for all developers."""
|
|
343
376
|
stats = []
|
|
344
|
-
|
|
377
|
+
|
|
345
378
|
with self.get_session() as session:
|
|
346
379
|
identities = session.query(DeveloperIdentity).all()
|
|
347
|
-
|
|
380
|
+
|
|
348
381
|
for identity in identities:
|
|
349
382
|
# Count aliases
|
|
350
|
-
alias_count =
|
|
351
|
-
DeveloperAlias
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
383
|
+
alias_count = (
|
|
384
|
+
session.query(DeveloperAlias)
|
|
385
|
+
.filter(DeveloperAlias.canonical_id == identity.canonical_id)
|
|
386
|
+
.count()
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
stats.append(
|
|
390
|
+
{
|
|
391
|
+
"canonical_id": identity.canonical_id,
|
|
392
|
+
"primary_name": identity.primary_name,
|
|
393
|
+
"primary_email": identity.primary_email,
|
|
394
|
+
"github_username": identity.github_username,
|
|
395
|
+
"total_commits": identity.total_commits,
|
|
396
|
+
"total_story_points": identity.total_story_points,
|
|
397
|
+
"alias_count": alias_count,
|
|
398
|
+
"first_seen": identity.first_seen,
|
|
399
|
+
"last_seen": identity.last_seen,
|
|
400
|
+
}
|
|
401
|
+
)
|
|
402
|
+
|
|
366
403
|
# Sort by total commits
|
|
367
|
-
return sorted(stats, key=lambda x: x[
|
|
368
|
-
|
|
369
|
-
def update_commit_stats(self, commits:
|
|
404
|
+
return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
|
|
405
|
+
|
|
406
|
+
def update_commit_stats(self, commits: list[dict[str, Any]]):
|
|
370
407
|
"""Update developer statistics based on commits."""
|
|
371
408
|
# Aggregate stats by canonical ID
|
|
372
|
-
stats_by_dev = defaultdict(lambda: {
|
|
373
|
-
|
|
409
|
+
stats_by_dev = defaultdict(lambda: {"commits": 0, "story_points": 0})
|
|
410
|
+
|
|
374
411
|
for commit in commits:
|
|
375
|
-
canonical_id = self.resolve_developer(
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
)
|
|
379
|
-
|
|
380
|
-
stats_by_dev[canonical_id]['commits'] += 1
|
|
381
|
-
stats_by_dev[canonical_id]['story_points'] += commit.get('story_points', 0) or 0
|
|
382
|
-
|
|
412
|
+
canonical_id = self.resolve_developer(commit["author_name"], commit["author_email"])
|
|
413
|
+
|
|
414
|
+
stats_by_dev[canonical_id]["commits"] += 1
|
|
415
|
+
stats_by_dev[canonical_id]["story_points"] += commit.get("story_points", 0) or 0
|
|
416
|
+
|
|
383
417
|
# Update database
|
|
384
418
|
with self.get_session() as session:
|
|
385
419
|
for canonical_id, stats in stats_by_dev.items():
|
|
386
|
-
identity =
|
|
387
|
-
DeveloperIdentity
|
|
388
|
-
|
|
389
|
-
|
|
420
|
+
identity = (
|
|
421
|
+
session.query(DeveloperIdentity)
|
|
422
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
423
|
+
.first()
|
|
424
|
+
)
|
|
425
|
+
|
|
390
426
|
if identity:
|
|
391
|
-
identity.total_commits += stats[
|
|
392
|
-
identity.total_story_points += stats[
|
|
427
|
+
identity.total_commits += stats["commits"]
|
|
428
|
+
identity.total_story_points += stats["story_points"]
|
|
393
429
|
identity.last_seen = datetime.utcnow()
|
|
394
|
-
|
|
430
|
+
|
|
395
431
|
# Apply manual mappings after all identities are created
|
|
396
432
|
if self.manual_mappings:
|
|
397
433
|
self.apply_manual_mappings()
|
|
398
|
-
|
|
434
|
+
|
|
399
435
|
def apply_manual_mappings(self):
|
|
400
436
|
"""Apply manual mappings - can be called explicitly after identities are created."""
|
|
401
437
|
if self.manual_mappings:
|
|
402
|
-
self._apply_manual_mappings(self.manual_mappings)
|
|
438
|
+
self._apply_manual_mappings(self.manual_mappings)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
"""Base classes for pluggable extractors."""
|
|
2
|
+
|
|
2
3
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Any, Optional
|
|
4
|
+
from typing import Any, Optional
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class ExtractorBase(ABC):
|
|
7
8
|
"""Base class for all extractors."""
|
|
8
|
-
|
|
9
|
+
|
|
9
10
|
@abstractmethod
|
|
10
11
|
def extract_from_text(self, text: str) -> Any:
|
|
11
12
|
"""Extract information from text."""
|
|
@@ -14,28 +15,29 @@ class ExtractorBase(ABC):
|
|
|
14
15
|
|
|
15
16
|
class StoryPointExtractorBase(ExtractorBase):
|
|
16
17
|
"""Base class for story point extractors."""
|
|
17
|
-
|
|
18
|
+
|
|
18
19
|
@abstractmethod
|
|
19
20
|
def extract_from_text(self, text: str) -> Optional[int]:
|
|
20
21
|
"""Extract story points from text."""
|
|
21
22
|
pass
|
|
22
|
-
|
|
23
|
+
|
|
23
24
|
@abstractmethod
|
|
24
|
-
def extract_from_pr(
|
|
25
|
-
|
|
25
|
+
def extract_from_pr(
|
|
26
|
+
self, pr_data: dict[str, Any], commit_messages: Optional[list[str]] = None
|
|
27
|
+
) -> Optional[int]:
|
|
26
28
|
"""Extract story points from pull request."""
|
|
27
29
|
pass
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class TicketExtractorBase(ExtractorBase):
|
|
31
33
|
"""Base class for ticket extractors."""
|
|
32
|
-
|
|
34
|
+
|
|
33
35
|
@abstractmethod
|
|
34
|
-
def extract_from_text(self, text: str) ->
|
|
36
|
+
def extract_from_text(self, text: str) -> list[dict[str, str]]:
|
|
35
37
|
"""Extract ticket references from text."""
|
|
36
38
|
pass
|
|
37
|
-
|
|
39
|
+
|
|
38
40
|
@abstractmethod
|
|
39
|
-
def extract_by_platform(self, text: str) ->
|
|
41
|
+
def extract_by_platform(self, text: str) -> dict[str, list[str]]:
|
|
40
42
|
"""Extract tickets grouped by platform."""
|
|
41
|
-
pass
|
|
43
|
+
pass
|