gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +612 -258
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +251 -141
- gitflow_analytics/core/analyzer.py +140 -103
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +240 -169
- gitflow_analytics/core/identity.py +210 -173
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +101 -87
- gitflow_analytics/integrations/github_integration.py +84 -77
- gitflow_analytics/integrations/jira_integration.py +116 -104
- gitflow_analytics/integrations/orchestrator.py +86 -85
- gitflow_analytics/metrics/dora.py +181 -177
- gitflow_analytics/models/database.py +190 -53
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +11 -4
- gitflow_analytics/reports/csv_writer.py +51 -31
- gitflow_analytics/reports/narrative_writer.py +16 -14
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"""Developer identity resolution with persistence."""
|
|
2
|
+
|
|
2
3
|
import difflib
|
|
3
4
|
import uuid
|
|
4
5
|
from collections import defaultdict
|
|
5
6
|
from contextlib import contextmanager
|
|
6
7
|
from datetime import datetime
|
|
7
|
-
from typing import Any,
|
|
8
|
+
from typing import Any, Optional
|
|
8
9
|
|
|
9
10
|
from sqlalchemy import and_
|
|
10
11
|
|
|
@@ -13,18 +14,22 @@ from ..models.database import Database, DeveloperAlias, DeveloperIdentity
|
|
|
13
14
|
|
|
14
15
|
class DeveloperIdentityResolver:
|
|
15
16
|
"""Resolve and normalize developer identities across repositories."""
|
|
16
|
-
|
|
17
|
-
def __init__(
|
|
18
|
-
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
db_path: str,
|
|
21
|
+
similarity_threshold: float = 0.85,
|
|
22
|
+
manual_mappings: Optional[list[dict[str, Any]]] = None,
|
|
23
|
+
) -> None:
|
|
19
24
|
"""Initialize with database for persistence."""
|
|
20
25
|
self.similarity_threshold = similarity_threshold
|
|
21
26
|
self.db = Database(db_path)
|
|
22
|
-
self._cache = {} # In-memory cache for performance
|
|
27
|
+
self._cache: dict[str, str] = {} # In-memory cache for performance
|
|
23
28
|
self._load_cache()
|
|
24
|
-
|
|
29
|
+
|
|
25
30
|
# Store manual mappings to apply later
|
|
26
31
|
self.manual_mappings = manual_mappings
|
|
27
|
-
|
|
32
|
+
|
|
28
33
|
@contextmanager
|
|
29
34
|
def get_session(self):
|
|
30
35
|
"""Get database session context manager."""
|
|
@@ -37,105 +42,122 @@ class DeveloperIdentityResolver:
|
|
|
37
42
|
raise
|
|
38
43
|
finally:
|
|
39
44
|
session.close()
|
|
40
|
-
|
|
41
|
-
def _load_cache(self):
|
|
45
|
+
|
|
46
|
+
def _load_cache(self) -> None:
|
|
42
47
|
"""Load identities into memory cache."""
|
|
43
48
|
with self.get_session() as session:
|
|
44
49
|
# Load all identities
|
|
45
50
|
identities = session.query(DeveloperIdentity).all()
|
|
46
51
|
for identity in identities:
|
|
47
52
|
self._cache[identity.canonical_id] = {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
53
|
+
"primary_name": identity.primary_name,
|
|
54
|
+
"primary_email": identity.primary_email,
|
|
55
|
+
"github_username": identity.github_username,
|
|
51
56
|
}
|
|
52
|
-
|
|
57
|
+
|
|
53
58
|
# Load all aliases
|
|
54
59
|
aliases = session.query(DeveloperAlias).all()
|
|
55
60
|
for alias in aliases:
|
|
56
61
|
key = f"{alias.email.lower()}:{alias.name.lower()}"
|
|
57
62
|
self._cache[key] = alias.canonical_id
|
|
58
|
-
|
|
59
|
-
def _apply_manual_mappings(self, manual_mappings:
|
|
63
|
+
|
|
64
|
+
def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
|
|
60
65
|
"""Apply manual identity mappings from configuration."""
|
|
61
66
|
# Clear cache to ensure we get fresh data
|
|
62
67
|
self._cache.clear()
|
|
63
68
|
self._load_cache()
|
|
64
|
-
|
|
69
|
+
|
|
65
70
|
with self.get_session() as session:
|
|
66
71
|
for mapping in manual_mappings:
|
|
67
|
-
canonical_email = mapping.get(
|
|
68
|
-
aliases = mapping.get(
|
|
69
|
-
|
|
72
|
+
canonical_email = mapping.get("canonical_email", "").lower().strip()
|
|
73
|
+
aliases = mapping.get("aliases", [])
|
|
74
|
+
|
|
70
75
|
if not canonical_email or not aliases:
|
|
71
76
|
continue
|
|
72
|
-
|
|
77
|
+
|
|
73
78
|
# Find the canonical identity
|
|
74
|
-
canonical_identity =
|
|
75
|
-
DeveloperIdentity
|
|
76
|
-
|
|
77
|
-
|
|
79
|
+
canonical_identity = (
|
|
80
|
+
session.query(DeveloperIdentity)
|
|
81
|
+
.filter(DeveloperIdentity.primary_email == canonical_email)
|
|
82
|
+
.first()
|
|
83
|
+
)
|
|
84
|
+
|
|
78
85
|
if not canonical_identity:
|
|
79
86
|
# Skip if canonical identity doesn't exist yet
|
|
80
87
|
print(f"Warning: Canonical identity not found for email: {canonical_email}")
|
|
81
88
|
continue
|
|
82
|
-
|
|
89
|
+
|
|
83
90
|
# Process each alias
|
|
84
91
|
for alias_email in aliases:
|
|
85
92
|
alias_email = alias_email.lower().strip()
|
|
86
|
-
|
|
93
|
+
|
|
87
94
|
# Check if alias identity exists as a primary identity
|
|
88
|
-
alias_identity =
|
|
89
|
-
DeveloperIdentity
|
|
90
|
-
|
|
91
|
-
|
|
95
|
+
alias_identity = (
|
|
96
|
+
session.query(DeveloperIdentity)
|
|
97
|
+
.filter(DeveloperIdentity.primary_email == alias_email)
|
|
98
|
+
.first()
|
|
99
|
+
)
|
|
100
|
+
|
|
92
101
|
if alias_identity:
|
|
93
102
|
if alias_identity.canonical_id != canonical_identity.canonical_id:
|
|
94
103
|
# Merge the identities - commit before merge to avoid locks
|
|
95
104
|
session.commit()
|
|
96
|
-
print(
|
|
97
|
-
|
|
105
|
+
print(
|
|
106
|
+
f"Merging identity: {alias_identity.primary_name} ({alias_email}) into {canonical_identity.primary_name} ({canonical_email})"
|
|
107
|
+
)
|
|
108
|
+
self.merge_identities(
|
|
109
|
+
canonical_identity.canonical_id, alias_identity.canonical_id
|
|
110
|
+
)
|
|
98
111
|
# Refresh session after merge
|
|
99
112
|
session.expire_all()
|
|
100
113
|
else:
|
|
101
114
|
# Just add as an alias if not a primary identity
|
|
102
|
-
existing_alias =
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
115
|
+
existing_alias = (
|
|
116
|
+
session.query(DeveloperAlias)
|
|
117
|
+
.filter(
|
|
118
|
+
and_(
|
|
119
|
+
DeveloperAlias.email == alias_email,
|
|
120
|
+
DeveloperAlias.canonical_id == canonical_identity.canonical_id,
|
|
121
|
+
)
|
|
106
122
|
)
|
|
107
|
-
|
|
108
|
-
|
|
123
|
+
.first()
|
|
124
|
+
)
|
|
125
|
+
|
|
109
126
|
if not existing_alias:
|
|
110
127
|
# Get the name from any existing alias with this email
|
|
111
128
|
name_for_alias = None
|
|
112
|
-
any_alias =
|
|
113
|
-
DeveloperAlias
|
|
114
|
-
|
|
129
|
+
any_alias = (
|
|
130
|
+
session.query(DeveloperAlias)
|
|
131
|
+
.filter(DeveloperAlias.email == alias_email)
|
|
132
|
+
.first()
|
|
133
|
+
)
|
|
115
134
|
if any_alias:
|
|
116
135
|
name_for_alias = any_alias.name
|
|
117
136
|
else:
|
|
118
137
|
name_for_alias = canonical_identity.primary_name
|
|
119
|
-
|
|
138
|
+
|
|
120
139
|
new_alias = DeveloperAlias(
|
|
121
140
|
canonical_id=canonical_identity.canonical_id,
|
|
122
141
|
name=name_for_alias,
|
|
123
|
-
email=alias_email
|
|
142
|
+
email=alias_email,
|
|
124
143
|
)
|
|
125
144
|
session.add(new_alias)
|
|
126
|
-
print(
|
|
127
|
-
|
|
145
|
+
print(
|
|
146
|
+
f"Added alias: {alias_email} for {canonical_identity.primary_name}"
|
|
147
|
+
)
|
|
148
|
+
|
|
128
149
|
# Reload cache after all mappings
|
|
129
150
|
self._cache.clear()
|
|
130
151
|
self._load_cache()
|
|
131
|
-
|
|
132
|
-
def resolve_developer(
|
|
133
|
-
|
|
152
|
+
|
|
153
|
+
def resolve_developer(
|
|
154
|
+
self, name: str, email: str, github_username: Optional[str] = None
|
|
155
|
+
) -> str:
|
|
134
156
|
"""Resolve developer identity and return canonical ID."""
|
|
135
157
|
# Normalize inputs
|
|
136
158
|
name = name.strip()
|
|
137
159
|
email = email.lower().strip()
|
|
138
|
-
|
|
160
|
+
|
|
139
161
|
# Check cache first
|
|
140
162
|
cache_key = f"{email}:{name.lower()}"
|
|
141
163
|
if cache_key in self._cache:
|
|
@@ -143,103 +165,105 @@ class DeveloperIdentityResolver:
|
|
|
143
165
|
# Update stats
|
|
144
166
|
self._update_developer_stats(canonical_id)
|
|
145
167
|
return canonical_id
|
|
146
|
-
|
|
168
|
+
|
|
147
169
|
# Check exact email match in database
|
|
148
170
|
with self.get_session() as session:
|
|
149
171
|
# Check aliases
|
|
150
|
-
alias = session.query(DeveloperAlias).filter(
|
|
151
|
-
|
|
152
|
-
).first()
|
|
153
|
-
|
|
172
|
+
alias = session.query(DeveloperAlias).filter(DeveloperAlias.email == email).first()
|
|
173
|
+
|
|
154
174
|
if alias:
|
|
155
175
|
self._cache[cache_key] = alias.canonical_id
|
|
156
176
|
self._update_developer_stats(alias.canonical_id)
|
|
157
177
|
return alias.canonical_id
|
|
158
|
-
|
|
178
|
+
|
|
159
179
|
# Check primary identities
|
|
160
|
-
identity =
|
|
161
|
-
DeveloperIdentity
|
|
162
|
-
|
|
163
|
-
|
|
180
|
+
identity = (
|
|
181
|
+
session.query(DeveloperIdentity)
|
|
182
|
+
.filter(DeveloperIdentity.primary_email == email)
|
|
183
|
+
.first()
|
|
184
|
+
)
|
|
185
|
+
|
|
164
186
|
if identity:
|
|
165
187
|
# Add as alias if name is different
|
|
166
188
|
if identity.primary_name.lower() != name.lower():
|
|
167
189
|
self._add_alias(identity.canonical_id, name, email)
|
|
168
190
|
self._cache[cache_key] = identity.canonical_id
|
|
169
191
|
return identity.canonical_id
|
|
170
|
-
|
|
192
|
+
|
|
171
193
|
# Find similar developer
|
|
172
194
|
best_match = self._find_best_match(name, email)
|
|
173
|
-
|
|
195
|
+
|
|
174
196
|
if best_match and best_match[1] >= self.similarity_threshold:
|
|
175
197
|
canonical_id = best_match[0]
|
|
176
198
|
self._add_alias(canonical_id, name, email)
|
|
177
199
|
self._cache[cache_key] = canonical_id
|
|
178
200
|
return canonical_id
|
|
179
|
-
|
|
201
|
+
|
|
180
202
|
# Create new identity
|
|
181
203
|
canonical_id = self._create_identity(name, email, github_username)
|
|
182
204
|
self._cache[cache_key] = canonical_id
|
|
183
205
|
return canonical_id
|
|
184
|
-
|
|
185
|
-
def _find_best_match(self, name: str, email: str) -> Optional[
|
|
206
|
+
|
|
207
|
+
def _find_best_match(self, name: str, email: str) -> Optional[tuple[str, float]]:
|
|
186
208
|
"""Find the best matching existing developer."""
|
|
187
209
|
best_score = 0.0
|
|
188
210
|
best_canonical_id = None
|
|
189
|
-
|
|
211
|
+
|
|
190
212
|
name_lower = name.lower().strip()
|
|
191
|
-
email_domain = email.split(
|
|
192
|
-
|
|
213
|
+
email_domain = email.split("@")[1] if "@" in email else ""
|
|
214
|
+
|
|
193
215
|
with self.get_session() as session:
|
|
194
216
|
# Get all identities for comparison
|
|
195
217
|
identities = session.query(DeveloperIdentity).all()
|
|
196
|
-
|
|
218
|
+
|
|
197
219
|
for identity in identities:
|
|
198
220
|
score = 0.0
|
|
199
|
-
|
|
221
|
+
|
|
200
222
|
# Name similarity (40% weight)
|
|
201
223
|
name_sim = difflib.SequenceMatcher(
|
|
202
224
|
None, name_lower, identity.primary_name.lower()
|
|
203
225
|
).ratio()
|
|
204
226
|
score += name_sim * 0.4
|
|
205
|
-
|
|
227
|
+
|
|
206
228
|
# Email domain similarity (30% weight)
|
|
207
|
-
identity_domain = (
|
|
208
|
-
|
|
229
|
+
identity_domain = (
|
|
230
|
+
identity.primary_email.split("@")[1] if "@" in identity.primary_email else ""
|
|
231
|
+
)
|
|
209
232
|
if email_domain and email_domain == identity_domain:
|
|
210
233
|
score += 0.3
|
|
211
|
-
|
|
234
|
+
|
|
212
235
|
# Check aliases (30% weight)
|
|
213
|
-
aliases =
|
|
214
|
-
DeveloperAlias
|
|
215
|
-
|
|
216
|
-
|
|
236
|
+
aliases = (
|
|
237
|
+
session.query(DeveloperAlias)
|
|
238
|
+
.filter(DeveloperAlias.canonical_id == identity.canonical_id)
|
|
239
|
+
.all()
|
|
240
|
+
)
|
|
241
|
+
|
|
217
242
|
best_alias_score = 0.0
|
|
218
243
|
for alias in aliases:
|
|
219
244
|
alias_name_sim = difflib.SequenceMatcher(
|
|
220
245
|
None, name_lower, alias.name.lower()
|
|
221
246
|
).ratio()
|
|
222
|
-
|
|
247
|
+
|
|
223
248
|
# Bonus for same email domain in aliases
|
|
224
|
-
alias_domain = alias.email.split(
|
|
249
|
+
alias_domain = alias.email.split("@")[1] if "@" in alias.email else ""
|
|
225
250
|
domain_bonus = 0.2 if alias_domain == email_domain else 0.0
|
|
226
|
-
|
|
251
|
+
|
|
227
252
|
alias_score = alias_name_sim + domain_bonus
|
|
228
253
|
best_alias_score = max(best_alias_score, alias_score)
|
|
229
|
-
|
|
254
|
+
|
|
230
255
|
score += min(best_alias_score * 0.3, 0.3)
|
|
231
|
-
|
|
256
|
+
|
|
232
257
|
if score > best_score:
|
|
233
258
|
best_score = score
|
|
234
259
|
best_canonical_id = identity.canonical_id
|
|
235
|
-
|
|
260
|
+
|
|
236
261
|
return (best_canonical_id, best_score) if best_canonical_id else None
|
|
237
|
-
|
|
238
|
-
def _create_identity(self, name: str, email: str,
|
|
239
|
-
github_username: Optional[str] = None) -> str:
|
|
262
|
+
|
|
263
|
+
def _create_identity(self, name: str, email: str, github_username: Optional[str] = None) -> str:
|
|
240
264
|
"""Create new developer identity."""
|
|
241
265
|
canonical_id = str(uuid.uuid4())
|
|
242
|
-
|
|
266
|
+
|
|
243
267
|
with self.get_session() as session:
|
|
244
268
|
identity = DeveloperIdentity(
|
|
245
269
|
canonical_id=canonical_id,
|
|
@@ -247,155 +271,168 @@ class DeveloperIdentityResolver:
|
|
|
247
271
|
primary_email=email,
|
|
248
272
|
github_username=github_username,
|
|
249
273
|
total_commits=0,
|
|
250
|
-
total_story_points=0
|
|
274
|
+
total_story_points=0,
|
|
251
275
|
)
|
|
252
276
|
session.add(identity)
|
|
253
|
-
|
|
277
|
+
|
|
254
278
|
# Update cache
|
|
255
279
|
self._cache[canonical_id] = {
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
280
|
+
"primary_name": name,
|
|
281
|
+
"primary_email": email,
|
|
282
|
+
"github_username": github_username,
|
|
259
283
|
}
|
|
260
|
-
|
|
284
|
+
|
|
261
285
|
return canonical_id
|
|
262
|
-
|
|
286
|
+
|
|
263
287
|
def _add_alias(self, canonical_id: str, name: str, email: str):
|
|
264
288
|
"""Add alias for existing developer."""
|
|
265
289
|
with self.get_session() as session:
|
|
266
290
|
# Check if alias already exists
|
|
267
|
-
existing =
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
291
|
+
existing = (
|
|
292
|
+
session.query(DeveloperAlias)
|
|
293
|
+
.filter(
|
|
294
|
+
and_(
|
|
295
|
+
DeveloperAlias.canonical_id == canonical_id,
|
|
296
|
+
DeveloperAlias.email == email.lower(),
|
|
297
|
+
)
|
|
271
298
|
)
|
|
272
|
-
|
|
273
|
-
|
|
299
|
+
.first()
|
|
300
|
+
)
|
|
301
|
+
|
|
274
302
|
if not existing:
|
|
275
|
-
alias = DeveloperAlias(
|
|
276
|
-
canonical_id=canonical_id,
|
|
277
|
-
name=name,
|
|
278
|
-
email=email.lower()
|
|
279
|
-
)
|
|
303
|
+
alias = DeveloperAlias(canonical_id=canonical_id, name=name, email=email.lower())
|
|
280
304
|
session.add(alias)
|
|
281
|
-
|
|
305
|
+
|
|
282
306
|
def _update_developer_stats(self, canonical_id: str):
|
|
283
307
|
"""Update developer statistics."""
|
|
284
308
|
with self.get_session() as session:
|
|
285
|
-
identity =
|
|
286
|
-
DeveloperIdentity
|
|
287
|
-
|
|
288
|
-
|
|
309
|
+
identity = (
|
|
310
|
+
session.query(DeveloperIdentity)
|
|
311
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
312
|
+
.first()
|
|
313
|
+
)
|
|
314
|
+
|
|
289
315
|
if identity:
|
|
290
316
|
identity.last_seen = datetime.utcnow()
|
|
291
|
-
|
|
317
|
+
|
|
292
318
|
def merge_identities(self, canonical_id1: str, canonical_id2: str):
|
|
293
319
|
"""Merge two developer identities."""
|
|
294
320
|
# First, add the alias outside of the main merge transaction
|
|
295
321
|
with self.get_session() as session:
|
|
296
|
-
identity2 =
|
|
297
|
-
DeveloperIdentity
|
|
298
|
-
|
|
322
|
+
identity2 = (
|
|
323
|
+
session.query(DeveloperIdentity)
|
|
324
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id2)
|
|
325
|
+
.first()
|
|
326
|
+
)
|
|
299
327
|
if identity2:
|
|
300
328
|
identity2_name = identity2.primary_name
|
|
301
329
|
identity2_email = identity2.primary_email
|
|
302
|
-
|
|
330
|
+
|
|
303
331
|
# Add identity2's primary as alias to identity1 first
|
|
304
332
|
self._add_alias(canonical_id1, identity2_name, identity2_email)
|
|
305
|
-
|
|
333
|
+
|
|
306
334
|
# Now do the merge in a separate transaction
|
|
307
335
|
with self.get_session() as session:
|
|
308
336
|
# Get both identities fresh
|
|
309
|
-
identity1 =
|
|
310
|
-
DeveloperIdentity
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
337
|
+
identity1 = (
|
|
338
|
+
session.query(DeveloperIdentity)
|
|
339
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id1)
|
|
340
|
+
.first()
|
|
341
|
+
)
|
|
342
|
+
identity2 = (
|
|
343
|
+
session.query(DeveloperIdentity)
|
|
344
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id2)
|
|
345
|
+
.first()
|
|
346
|
+
)
|
|
347
|
+
|
|
316
348
|
if not identity1 or not identity2:
|
|
317
349
|
raise ValueError("One or both identities not found")
|
|
318
|
-
|
|
350
|
+
|
|
319
351
|
# Keep identity1, merge identity2 into it
|
|
320
352
|
identity1.total_commits += identity2.total_commits
|
|
321
353
|
identity1.total_story_points += identity2.total_story_points
|
|
322
354
|
identity1.first_seen = min(identity1.first_seen, identity2.first_seen)
|
|
323
355
|
identity1.last_seen = max(identity1.last_seen, identity2.last_seen)
|
|
324
|
-
|
|
356
|
+
|
|
325
357
|
# Move all aliases from identity2 to identity1
|
|
326
|
-
aliases =
|
|
327
|
-
DeveloperAlias
|
|
328
|
-
|
|
329
|
-
|
|
358
|
+
aliases = (
|
|
359
|
+
session.query(DeveloperAlias)
|
|
360
|
+
.filter(DeveloperAlias.canonical_id == canonical_id2)
|
|
361
|
+
.all()
|
|
362
|
+
)
|
|
363
|
+
|
|
330
364
|
for alias in aliases:
|
|
331
365
|
alias.canonical_id = canonical_id1
|
|
332
|
-
|
|
366
|
+
|
|
333
367
|
# Delete identity2
|
|
334
368
|
session.delete(identity2)
|
|
335
|
-
|
|
369
|
+
|
|
336
370
|
# Clear cache to force reload
|
|
337
371
|
self._cache.clear()
|
|
338
372
|
self._load_cache()
|
|
339
|
-
|
|
340
|
-
def get_developer_stats(self) ->
|
|
373
|
+
|
|
374
|
+
def get_developer_stats(self) -> list[dict[str, Any]]:
|
|
341
375
|
"""Get statistics for all developers."""
|
|
342
376
|
stats = []
|
|
343
|
-
|
|
377
|
+
|
|
344
378
|
with self.get_session() as session:
|
|
345
379
|
identities = session.query(DeveloperIdentity).all()
|
|
346
|
-
|
|
380
|
+
|
|
347
381
|
for identity in identities:
|
|
348
382
|
# Count aliases
|
|
349
|
-
alias_count =
|
|
350
|
-
DeveloperAlias
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
383
|
+
alias_count = (
|
|
384
|
+
session.query(DeveloperAlias)
|
|
385
|
+
.filter(DeveloperAlias.canonical_id == identity.canonical_id)
|
|
386
|
+
.count()
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
stats.append(
|
|
390
|
+
{
|
|
391
|
+
"canonical_id": identity.canonical_id,
|
|
392
|
+
"primary_name": identity.primary_name,
|
|
393
|
+
"primary_email": identity.primary_email,
|
|
394
|
+
"github_username": identity.github_username,
|
|
395
|
+
"total_commits": identity.total_commits,
|
|
396
|
+
"total_story_points": identity.total_story_points,
|
|
397
|
+
"alias_count": alias_count,
|
|
398
|
+
"first_seen": identity.first_seen,
|
|
399
|
+
"last_seen": identity.last_seen,
|
|
400
|
+
}
|
|
401
|
+
)
|
|
402
|
+
|
|
365
403
|
# Sort by total commits
|
|
366
|
-
return sorted(stats, key=lambda x: x[
|
|
367
|
-
|
|
368
|
-
def update_commit_stats(self, commits:
|
|
404
|
+
return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
|
|
405
|
+
|
|
406
|
+
def update_commit_stats(self, commits: list[dict[str, Any]]):
|
|
369
407
|
"""Update developer statistics based on commits."""
|
|
370
408
|
# Aggregate stats by canonical ID
|
|
371
|
-
stats_by_dev = defaultdict(lambda: {
|
|
372
|
-
|
|
409
|
+
stats_by_dev = defaultdict(lambda: {"commits": 0, "story_points": 0})
|
|
410
|
+
|
|
373
411
|
for commit in commits:
|
|
374
|
-
canonical_id = self.resolve_developer(
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
)
|
|
378
|
-
|
|
379
|
-
stats_by_dev[canonical_id]['commits'] += 1
|
|
380
|
-
stats_by_dev[canonical_id]['story_points'] += commit.get('story_points', 0) or 0
|
|
381
|
-
|
|
412
|
+
canonical_id = self.resolve_developer(commit["author_name"], commit["author_email"])
|
|
413
|
+
|
|
414
|
+
stats_by_dev[canonical_id]["commits"] += 1
|
|
415
|
+
stats_by_dev[canonical_id]["story_points"] += commit.get("story_points", 0) or 0
|
|
416
|
+
|
|
382
417
|
# Update database
|
|
383
418
|
with self.get_session() as session:
|
|
384
419
|
for canonical_id, stats in stats_by_dev.items():
|
|
385
|
-
identity =
|
|
386
|
-
DeveloperIdentity
|
|
387
|
-
|
|
388
|
-
|
|
420
|
+
identity = (
|
|
421
|
+
session.query(DeveloperIdentity)
|
|
422
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
423
|
+
.first()
|
|
424
|
+
)
|
|
425
|
+
|
|
389
426
|
if identity:
|
|
390
|
-
identity.total_commits += stats[
|
|
391
|
-
identity.total_story_points += stats[
|
|
427
|
+
identity.total_commits += stats["commits"]
|
|
428
|
+
identity.total_story_points += stats["story_points"]
|
|
392
429
|
identity.last_seen = datetime.utcnow()
|
|
393
|
-
|
|
430
|
+
|
|
394
431
|
# Apply manual mappings after all identities are created
|
|
395
432
|
if self.manual_mappings:
|
|
396
433
|
self.apply_manual_mappings()
|
|
397
|
-
|
|
434
|
+
|
|
398
435
|
def apply_manual_mappings(self):
|
|
399
436
|
"""Apply manual mappings - can be called explicitly after identities are created."""
|
|
400
437
|
if self.manual_mappings:
|
|
401
|
-
self._apply_manual_mappings(self.manual_mappings)
|
|
438
|
+
self._apply_manual_mappings(self.manual_mappings)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
"""Base classes for pluggable extractors."""
|
|
2
|
+
|
|
2
3
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class ExtractorBase(ABC):
|
|
7
8
|
"""Base class for all extractors."""
|
|
8
|
-
|
|
9
|
+
|
|
9
10
|
@abstractmethod
|
|
10
11
|
def extract_from_text(self, text: str) -> Any:
|
|
11
12
|
"""Extract information from text."""
|
|
@@ -14,28 +15,29 @@ class ExtractorBase(ABC):
|
|
|
14
15
|
|
|
15
16
|
class StoryPointExtractorBase(ExtractorBase):
|
|
16
17
|
"""Base class for story point extractors."""
|
|
17
|
-
|
|
18
|
+
|
|
18
19
|
@abstractmethod
|
|
19
20
|
def extract_from_text(self, text: str) -> Optional[int]:
|
|
20
21
|
"""Extract story points from text."""
|
|
21
22
|
pass
|
|
22
|
-
|
|
23
|
+
|
|
23
24
|
@abstractmethod
|
|
24
|
-
def extract_from_pr(
|
|
25
|
-
|
|
25
|
+
def extract_from_pr(
|
|
26
|
+
self, pr_data: dict[str, Any], commit_messages: Optional[list[str]] = None
|
|
27
|
+
) -> Optional[int]:
|
|
26
28
|
"""Extract story points from pull request."""
|
|
27
29
|
pass
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class TicketExtractorBase(ExtractorBase):
|
|
31
33
|
"""Base class for ticket extractors."""
|
|
32
|
-
|
|
34
|
+
|
|
33
35
|
@abstractmethod
|
|
34
|
-
def extract_from_text(self, text: str) ->
|
|
36
|
+
def extract_from_text(self, text: str) -> list[dict[str, str]]:
|
|
35
37
|
"""Extract ticket references from text."""
|
|
36
38
|
pass
|
|
37
|
-
|
|
39
|
+
|
|
38
40
|
@abstractmethod
|
|
39
|
-
def extract_by_platform(self, text: str) ->
|
|
41
|
+
def extract_by_platform(self, text: str) -> dict[str, list[str]]:
|
|
40
42
|
"""Extract tickets grouped by platform."""
|
|
41
|
-
pass
|
|
43
|
+
pass
|