gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4490 -378
- gitflow_analytics/cli_rich.py +503 -0
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -398
- gitflow_analytics/core/analyzer.py +1320 -172
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +1554 -175
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +571 -185
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +77 -59
- gitflow_analytics/extractors/tickets.py +841 -89
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +258 -87
- gitflow_analytics/integrations/jira_integration.py +572 -123
- gitflow_analytics/integrations/orchestrator.py +206 -82
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +542 -179
- gitflow_analytics/models/database.py +986 -59
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +29 -0
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
- gitflow_analytics/qualitative/core/processor.py +673 -0
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +306 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
- gitflow_analytics/qualitative/utils/metrics.py +361 -0
- gitflow_analytics/qualitative/utils/text_processing.py +285 -0
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +550 -18
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1700 -216
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2289 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +724 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
- gitflow_analytics/tui/screens/configuration_screen.py +523 -0
- gitflow_analytics/tui/screens/loading_screen.py +348 -0
- gitflow_analytics/tui/screens/main_screen.py +321 -0
- gitflow_analytics/tui/screens/results_screen.py +722 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +255 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +187 -0
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,33 +1,134 @@
|
|
|
1
1
|
"""Developer identity resolution with persistence."""
|
|
2
|
+
|
|
2
3
|
import difflib
|
|
4
|
+
import logging
|
|
3
5
|
import uuid
|
|
4
6
|
from collections import defaultdict
|
|
5
7
|
from contextlib import contextmanager
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
from
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Optional
|
|
8
11
|
|
|
9
12
|
from sqlalchemy import and_
|
|
10
13
|
|
|
11
14
|
from ..models.database import Database, DeveloperAlias, DeveloperIdentity
|
|
12
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
13
18
|
|
|
14
19
|
class DeveloperIdentityResolver:
|
|
15
20
|
"""Resolve and normalize developer identities across repositories."""
|
|
16
|
-
|
|
17
|
-
def __init__(
|
|
18
|
-
|
|
19
|
-
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
db_path: str,
|
|
25
|
+
similarity_threshold: float = 0.85,
|
|
26
|
+
manual_mappings: Optional[list[dict[str, Any]]] = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Initialize with database for persistence.
|
|
30
|
+
|
|
31
|
+
WHY: This initializer handles database connection issues gracefully,
|
|
32
|
+
allowing the system to continue functioning even when persistence fails.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
db_path: Path to the SQLite database file
|
|
36
|
+
similarity_threshold: Threshold for fuzzy matching (0.0-1.0)
|
|
37
|
+
manual_mappings: Optional manual identity mappings from configuration
|
|
38
|
+
"""
|
|
20
39
|
self.similarity_threshold = similarity_threshold
|
|
21
|
-
self.
|
|
22
|
-
self._cache = {} # In-memory cache for performance
|
|
23
|
-
|
|
24
|
-
|
|
40
|
+
self.db_path = Path(db_path) # Convert string to Path
|
|
41
|
+
self._cache: dict[str, str] = {} # In-memory cache for performance
|
|
42
|
+
|
|
43
|
+
# Initialize database with error handling
|
|
44
|
+
try:
|
|
45
|
+
self.db = Database(self.db_path)
|
|
46
|
+
self._database_available = True
|
|
47
|
+
|
|
48
|
+
# Warn user if using fallback database
|
|
49
|
+
if self.db.is_readonly_fallback:
|
|
50
|
+
logger.warning(
|
|
51
|
+
"Using temporary database for identity resolution. "
|
|
52
|
+
"Identity mappings will not persist between runs. "
|
|
53
|
+
f"Check permissions on: {db_path}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Load existing data from database
|
|
57
|
+
self._load_cache()
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.error(
|
|
61
|
+
f"Failed to initialize identity database at {db_path}: {e}. "
|
|
62
|
+
"Identity resolution will work but mappings won't persist."
|
|
63
|
+
)
|
|
64
|
+
self._database_available = False
|
|
65
|
+
self.db = None
|
|
66
|
+
|
|
25
67
|
# Store manual mappings to apply later
|
|
26
68
|
self.manual_mappings = manual_mappings
|
|
27
|
-
|
|
69
|
+
|
|
70
|
+
# When database is not available, we need in-memory fallback storage
|
|
71
|
+
if not self._database_available:
|
|
72
|
+
logger.info(
|
|
73
|
+
"Database unavailable, using in-memory identity resolution. "
|
|
74
|
+
"Identity mappings will not persist between runs."
|
|
75
|
+
)
|
|
76
|
+
self._in_memory_identities: dict[str, dict[str, Any]] = {}
|
|
77
|
+
self._in_memory_aliases: dict[str, str] = {}
|
|
78
|
+
|
|
79
|
+
# Apply manual mappings to in-memory storage if provided
|
|
80
|
+
if self.manual_mappings:
|
|
81
|
+
self._apply_manual_mappings_to_memory()
|
|
82
|
+
else:
|
|
83
|
+
# Apply manual mappings to database if provided
|
|
84
|
+
if self.manual_mappings:
|
|
85
|
+
self._apply_manual_mappings(self.manual_mappings)
|
|
86
|
+
|
|
28
87
|
@contextmanager
|
|
29
88
|
def get_session(self):
|
|
30
|
-
"""
|
|
89
|
+
"""
|
|
90
|
+
Get database session context manager with fallback handling.
|
|
91
|
+
|
|
92
|
+
WHY: When database is not available, we need to provide a no-op
|
|
93
|
+
context manager that allows the code to continue without failing.
|
|
94
|
+
"""
|
|
95
|
+
if not self._database_available or not self.db:
|
|
96
|
+
# No-op context manager when database is not available
|
|
97
|
+
class NoOpSession:
|
|
98
|
+
def query(self, *args, **kwargs):
|
|
99
|
+
return NoOpQuery()
|
|
100
|
+
|
|
101
|
+
def add(self, *args, **kwargs):
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def delete(self, *args, **kwargs):
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def commit(self):
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
def rollback(self):
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
def expire_all(self):
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
class NoOpQuery:
|
|
117
|
+
def filter(self, *args, **kwargs):
|
|
118
|
+
return self
|
|
119
|
+
|
|
120
|
+
def first(self):
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
def all(self):
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
def count(self):
|
|
127
|
+
return 0
|
|
128
|
+
|
|
129
|
+
yield NoOpSession()
|
|
130
|
+
return
|
|
131
|
+
|
|
31
132
|
session = self.db.get_session()
|
|
32
133
|
try:
|
|
33
134
|
yield session
|
|
@@ -37,209 +138,285 @@ class DeveloperIdentityResolver:
|
|
|
37
138
|
raise
|
|
38
139
|
finally:
|
|
39
140
|
session.close()
|
|
40
|
-
|
|
41
|
-
def _load_cache(self):
|
|
42
|
-
"""
|
|
141
|
+
|
|
142
|
+
def _load_cache(self) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Load identities into memory cache.
|
|
145
|
+
|
|
146
|
+
WHY: When database is not available, we start with an empty cache
|
|
147
|
+
and rely on in-memory identity resolution for the current session.
|
|
148
|
+
"""
|
|
149
|
+
if not self._database_available:
|
|
150
|
+
logger.debug("Database not available, starting with empty identity cache")
|
|
151
|
+
return
|
|
152
|
+
|
|
43
153
|
with self.get_session() as session:
|
|
44
154
|
# Load all identities
|
|
45
155
|
identities = session.query(DeveloperIdentity).all()
|
|
46
156
|
for identity in identities:
|
|
47
157
|
self._cache[identity.canonical_id] = {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
158
|
+
"primary_name": identity.primary_name,
|
|
159
|
+
"primary_email": identity.primary_email,
|
|
160
|
+
"github_username": identity.github_username,
|
|
51
161
|
}
|
|
52
|
-
|
|
162
|
+
|
|
53
163
|
# Load all aliases
|
|
54
164
|
aliases = session.query(DeveloperAlias).all()
|
|
55
165
|
for alias in aliases:
|
|
56
166
|
key = f"{alias.email.lower()}:{alias.name.lower()}"
|
|
57
167
|
self._cache[key] = alias.canonical_id
|
|
58
|
-
|
|
59
|
-
def _apply_manual_mappings(self, manual_mappings:
|
|
168
|
+
|
|
169
|
+
def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
|
|
60
170
|
"""Apply manual identity mappings from configuration."""
|
|
171
|
+
# Handle database unavailable scenario
|
|
172
|
+
if not self._database_available:
|
|
173
|
+
self._apply_manual_mappings_to_memory()
|
|
174
|
+
return
|
|
175
|
+
|
|
61
176
|
# Clear cache to ensure we get fresh data
|
|
62
177
|
self._cache.clear()
|
|
63
178
|
self._load_cache()
|
|
64
|
-
|
|
179
|
+
|
|
65
180
|
with self.get_session() as session:
|
|
66
181
|
for mapping in manual_mappings:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
182
|
+
# Support both canonical_email and primary_email for backward compatibility
|
|
183
|
+
canonical_email = (
|
|
184
|
+
(mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
|
|
185
|
+
.lower()
|
|
186
|
+
.strip()
|
|
187
|
+
)
|
|
188
|
+
aliases = mapping.get("aliases", [])
|
|
189
|
+
preferred_name = mapping.get("name") # Optional display name
|
|
190
|
+
|
|
70
191
|
if not canonical_email or not aliases:
|
|
71
192
|
continue
|
|
72
|
-
|
|
73
|
-
# Find the canonical identity
|
|
74
|
-
canonical_identity =
|
|
75
|
-
DeveloperIdentity
|
|
76
|
-
|
|
77
|
-
|
|
193
|
+
|
|
194
|
+
# Find or create the canonical identity
|
|
195
|
+
canonical_identity = (
|
|
196
|
+
session.query(DeveloperIdentity)
|
|
197
|
+
.filter(DeveloperIdentity.primary_email == canonical_email)
|
|
198
|
+
.first()
|
|
199
|
+
)
|
|
200
|
+
|
|
78
201
|
if not canonical_identity:
|
|
79
|
-
#
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
202
|
+
# Create the canonical identity if it doesn't exist
|
|
203
|
+
canonical_id = str(uuid.uuid4())
|
|
204
|
+
canonical_identity = DeveloperIdentity(
|
|
205
|
+
canonical_id=canonical_id,
|
|
206
|
+
primary_name=preferred_name or canonical_email.split("@")[0],
|
|
207
|
+
primary_email=canonical_email,
|
|
208
|
+
first_seen=datetime.now(timezone.utc),
|
|
209
|
+
last_seen=datetime.now(timezone.utc),
|
|
210
|
+
total_commits=0,
|
|
211
|
+
total_story_points=0,
|
|
212
|
+
)
|
|
213
|
+
session.add(canonical_identity)
|
|
214
|
+
session.commit()
|
|
215
|
+
print(
|
|
216
|
+
f"Created canonical identity: {canonical_identity.primary_name} ({canonical_email})"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Update the preferred name if provided
|
|
220
|
+
if preferred_name and preferred_name != canonical_identity.primary_name:
|
|
221
|
+
print(
|
|
222
|
+
f"Updating display name: {canonical_identity.primary_name} → {preferred_name}"
|
|
223
|
+
)
|
|
224
|
+
canonical_identity.primary_name = preferred_name
|
|
225
|
+
|
|
83
226
|
# Process each alias
|
|
84
227
|
for alias_email in aliases:
|
|
85
228
|
alias_email = alias_email.lower().strip()
|
|
86
|
-
|
|
229
|
+
|
|
87
230
|
# Check if alias identity exists as a primary identity
|
|
88
|
-
alias_identity =
|
|
89
|
-
DeveloperIdentity
|
|
90
|
-
|
|
91
|
-
|
|
231
|
+
alias_identity = (
|
|
232
|
+
session.query(DeveloperIdentity)
|
|
233
|
+
.filter(DeveloperIdentity.primary_email == alias_email)
|
|
234
|
+
.first()
|
|
235
|
+
)
|
|
236
|
+
|
|
92
237
|
if alias_identity:
|
|
93
238
|
if alias_identity.canonical_id != canonical_identity.canonical_id:
|
|
94
239
|
# Merge the identities - commit before merge to avoid locks
|
|
95
240
|
session.commit()
|
|
96
|
-
print(
|
|
97
|
-
|
|
241
|
+
print(
|
|
242
|
+
f"Merging identity: {alias_identity.primary_name} ({alias_email}) into {canonical_identity.primary_name} ({canonical_email})"
|
|
243
|
+
)
|
|
244
|
+
self.merge_identities(
|
|
245
|
+
canonical_identity.canonical_id, alias_identity.canonical_id
|
|
246
|
+
)
|
|
98
247
|
# Refresh session after merge
|
|
99
248
|
session.expire_all()
|
|
100
249
|
else:
|
|
101
250
|
# Just add as an alias if not a primary identity
|
|
102
|
-
existing_alias =
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
251
|
+
existing_alias = (
|
|
252
|
+
session.query(DeveloperAlias)
|
|
253
|
+
.filter(
|
|
254
|
+
and_(
|
|
255
|
+
DeveloperAlias.email == alias_email,
|
|
256
|
+
DeveloperAlias.canonical_id == canonical_identity.canonical_id,
|
|
257
|
+
)
|
|
106
258
|
)
|
|
107
|
-
|
|
108
|
-
|
|
259
|
+
.first()
|
|
260
|
+
)
|
|
261
|
+
|
|
109
262
|
if not existing_alias:
|
|
110
263
|
# Get the name from any existing alias with this email
|
|
111
264
|
name_for_alias = None
|
|
112
|
-
any_alias =
|
|
113
|
-
DeveloperAlias
|
|
114
|
-
|
|
265
|
+
any_alias = (
|
|
266
|
+
session.query(DeveloperAlias)
|
|
267
|
+
.filter(DeveloperAlias.email == alias_email)
|
|
268
|
+
.first()
|
|
269
|
+
)
|
|
115
270
|
if any_alias:
|
|
116
271
|
name_for_alias = any_alias.name
|
|
117
272
|
else:
|
|
118
273
|
name_for_alias = canonical_identity.primary_name
|
|
119
|
-
|
|
274
|
+
|
|
120
275
|
new_alias = DeveloperAlias(
|
|
121
276
|
canonical_id=canonical_identity.canonical_id,
|
|
122
277
|
name=name_for_alias,
|
|
123
|
-
email=alias_email
|
|
278
|
+
email=alias_email,
|
|
124
279
|
)
|
|
125
280
|
session.add(new_alias)
|
|
126
|
-
print(
|
|
127
|
-
|
|
281
|
+
print(
|
|
282
|
+
f"Added alias: {alias_email} for {canonical_identity.primary_name}"
|
|
283
|
+
)
|
|
284
|
+
|
|
128
285
|
# Reload cache after all mappings
|
|
129
286
|
self._cache.clear()
|
|
130
287
|
self._load_cache()
|
|
131
|
-
|
|
132
|
-
def resolve_developer(
|
|
133
|
-
|
|
134
|
-
|
|
288
|
+
|
|
289
|
+
def resolve_developer(
|
|
290
|
+
self, name: str, email: str, github_username: Optional[str] = None
|
|
291
|
+
) -> str:
|
|
292
|
+
"""
|
|
293
|
+
Resolve developer identity and return canonical ID.
|
|
294
|
+
|
|
295
|
+
WHY: This method handles both database-backed and in-memory identity resolution,
|
|
296
|
+
allowing the system to function even when persistence is not available.
|
|
297
|
+
"""
|
|
298
|
+
# Use fallback resolution when database is not available
|
|
299
|
+
if not self._database_available:
|
|
300
|
+
return self._fallback_identity_resolution(name, email)
|
|
301
|
+
|
|
135
302
|
# Normalize inputs
|
|
136
303
|
name = name.strip()
|
|
137
304
|
email = email.lower().strip()
|
|
138
|
-
|
|
305
|
+
|
|
139
306
|
# Check cache first
|
|
140
307
|
cache_key = f"{email}:{name.lower()}"
|
|
141
308
|
if cache_key in self._cache:
|
|
142
309
|
canonical_id = self._cache[cache_key]
|
|
143
310
|
# Update stats
|
|
144
311
|
self._update_developer_stats(canonical_id)
|
|
312
|
+
logger.debug(f"Resolved {name} <{email}> from cache to {canonical_id}")
|
|
145
313
|
return canonical_id
|
|
146
|
-
|
|
314
|
+
|
|
147
315
|
# Check exact email match in database
|
|
148
316
|
with self.get_session() as session:
|
|
149
317
|
# Check aliases
|
|
150
|
-
alias = session.query(DeveloperAlias).filter(
|
|
151
|
-
|
|
152
|
-
).first()
|
|
153
|
-
|
|
318
|
+
alias = session.query(DeveloperAlias).filter(DeveloperAlias.email == email).first()
|
|
319
|
+
|
|
154
320
|
if alias:
|
|
321
|
+
# Found an alias with this email - add this name variant to cache and DB
|
|
155
322
|
self._cache[cache_key] = alias.canonical_id
|
|
156
323
|
self._update_developer_stats(alias.canonical_id)
|
|
324
|
+
logger.debug(f"Found alias for {email}, resolving {name} to {alias.canonical_id}")
|
|
325
|
+
# Add this name variant as an alias if it's different
|
|
326
|
+
if alias.name.lower() != name.lower():
|
|
327
|
+
logger.debug(f"Adding name variant '{name}' as alias for {email}")
|
|
328
|
+
self._add_alias(alias.canonical_id, name, email)
|
|
157
329
|
return alias.canonical_id
|
|
158
|
-
|
|
330
|
+
|
|
159
331
|
# Check primary identities
|
|
160
|
-
identity =
|
|
161
|
-
DeveloperIdentity
|
|
162
|
-
|
|
163
|
-
|
|
332
|
+
identity = (
|
|
333
|
+
session.query(DeveloperIdentity)
|
|
334
|
+
.filter(DeveloperIdentity.primary_email == email)
|
|
335
|
+
.first()
|
|
336
|
+
)
|
|
337
|
+
|
|
164
338
|
if identity:
|
|
165
339
|
# Add as alias if name is different
|
|
166
340
|
if identity.primary_name.lower() != name.lower():
|
|
167
341
|
self._add_alias(identity.canonical_id, name, email)
|
|
168
342
|
self._cache[cache_key] = identity.canonical_id
|
|
169
343
|
return identity.canonical_id
|
|
170
|
-
|
|
344
|
+
|
|
171
345
|
# Find similar developer
|
|
172
346
|
best_match = self._find_best_match(name, email)
|
|
173
|
-
|
|
347
|
+
|
|
174
348
|
if best_match and best_match[1] >= self.similarity_threshold:
|
|
175
349
|
canonical_id = best_match[0]
|
|
176
350
|
self._add_alias(canonical_id, name, email)
|
|
177
351
|
self._cache[cache_key] = canonical_id
|
|
178
352
|
return canonical_id
|
|
179
|
-
|
|
353
|
+
|
|
180
354
|
# Create new identity
|
|
355
|
+
logger.info(f"Creating new identity for {name} <{email}> - no matches found")
|
|
181
356
|
canonical_id = self._create_identity(name, email, github_username)
|
|
182
357
|
self._cache[cache_key] = canonical_id
|
|
183
358
|
return canonical_id
|
|
184
|
-
|
|
185
|
-
def _find_best_match(self, name: str, email: str) -> Optional[
|
|
359
|
+
|
|
360
|
+
def _find_best_match(self, name: str, email: str) -> Optional[tuple[str, float]]:
|
|
186
361
|
"""Find the best matching existing developer."""
|
|
187
362
|
best_score = 0.0
|
|
188
363
|
best_canonical_id = None
|
|
189
|
-
|
|
364
|
+
|
|
190
365
|
name_lower = name.lower().strip()
|
|
191
|
-
email_domain = email.split(
|
|
192
|
-
|
|
366
|
+
email_domain = email.split("@")[1] if "@" in email else ""
|
|
367
|
+
|
|
193
368
|
with self.get_session() as session:
|
|
194
369
|
# Get all identities for comparison
|
|
195
370
|
identities = session.query(DeveloperIdentity).all()
|
|
196
|
-
|
|
371
|
+
|
|
197
372
|
for identity in identities:
|
|
198
373
|
score = 0.0
|
|
199
|
-
|
|
374
|
+
|
|
200
375
|
# Name similarity (40% weight)
|
|
201
376
|
name_sim = difflib.SequenceMatcher(
|
|
202
377
|
None, name_lower, identity.primary_name.lower()
|
|
203
378
|
).ratio()
|
|
204
379
|
score += name_sim * 0.4
|
|
205
|
-
|
|
380
|
+
|
|
206
381
|
# Email domain similarity (30% weight)
|
|
207
|
-
identity_domain = (
|
|
208
|
-
|
|
382
|
+
identity_domain = (
|
|
383
|
+
identity.primary_email.split("@")[1] if "@" in identity.primary_email else ""
|
|
384
|
+
)
|
|
209
385
|
if email_domain and email_domain == identity_domain:
|
|
210
386
|
score += 0.3
|
|
211
|
-
|
|
387
|
+
|
|
212
388
|
# Check aliases (30% weight)
|
|
213
|
-
aliases =
|
|
214
|
-
DeveloperAlias
|
|
215
|
-
|
|
216
|
-
|
|
389
|
+
aliases = (
|
|
390
|
+
session.query(DeveloperAlias)
|
|
391
|
+
.filter(DeveloperAlias.canonical_id == identity.canonical_id)
|
|
392
|
+
.all()
|
|
393
|
+
)
|
|
394
|
+
|
|
217
395
|
best_alias_score = 0.0
|
|
218
396
|
for alias in aliases:
|
|
219
397
|
alias_name_sim = difflib.SequenceMatcher(
|
|
220
398
|
None, name_lower, alias.name.lower()
|
|
221
399
|
).ratio()
|
|
222
|
-
|
|
400
|
+
|
|
223
401
|
# Bonus for same email domain in aliases
|
|
224
|
-
alias_domain = alias.email.split(
|
|
402
|
+
alias_domain = alias.email.split("@")[1] if "@" in alias.email else ""
|
|
225
403
|
domain_bonus = 0.2 if alias_domain == email_domain else 0.0
|
|
226
|
-
|
|
404
|
+
|
|
227
405
|
alias_score = alias_name_sim + domain_bonus
|
|
228
406
|
best_alias_score = max(best_alias_score, alias_score)
|
|
229
|
-
|
|
407
|
+
|
|
230
408
|
score += min(best_alias_score * 0.3, 0.3)
|
|
231
|
-
|
|
409
|
+
|
|
232
410
|
if score > best_score:
|
|
233
411
|
best_score = score
|
|
234
412
|
best_canonical_id = identity.canonical_id
|
|
235
|
-
|
|
413
|
+
|
|
236
414
|
return (best_canonical_id, best_score) if best_canonical_id else None
|
|
237
|
-
|
|
238
|
-
def _create_identity(self, name: str, email: str,
|
|
239
|
-
github_username: Optional[str] = None) -> str:
|
|
415
|
+
|
|
416
|
+
def _create_identity(self, name: str, email: str, github_username: Optional[str] = None) -> str:
|
|
240
417
|
"""Create new developer identity."""
|
|
241
418
|
canonical_id = str(uuid.uuid4())
|
|
242
|
-
|
|
419
|
+
|
|
243
420
|
with self.get_session() as session:
|
|
244
421
|
identity = DeveloperIdentity(
|
|
245
422
|
canonical_id=canonical_id,
|
|
@@ -247,155 +424,364 @@ class DeveloperIdentityResolver:
|
|
|
247
424
|
primary_email=email,
|
|
248
425
|
github_username=github_username,
|
|
249
426
|
total_commits=0,
|
|
250
|
-
total_story_points=0
|
|
427
|
+
total_story_points=0,
|
|
251
428
|
)
|
|
252
429
|
session.add(identity)
|
|
253
|
-
|
|
430
|
+
|
|
254
431
|
# Update cache
|
|
255
432
|
self._cache[canonical_id] = {
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
433
|
+
"primary_name": name,
|
|
434
|
+
"primary_email": email,
|
|
435
|
+
"github_username": github_username,
|
|
259
436
|
}
|
|
260
|
-
|
|
437
|
+
|
|
261
438
|
return canonical_id
|
|
262
|
-
|
|
439
|
+
|
|
263
440
|
def _add_alias(self, canonical_id: str, name: str, email: str):
|
|
264
441
|
"""Add alias for existing developer."""
|
|
265
442
|
with self.get_session() as session:
|
|
266
443
|
# Check if alias already exists
|
|
267
|
-
existing =
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
444
|
+
existing = (
|
|
445
|
+
session.query(DeveloperAlias)
|
|
446
|
+
.filter(
|
|
447
|
+
and_(
|
|
448
|
+
DeveloperAlias.canonical_id == canonical_id,
|
|
449
|
+
DeveloperAlias.email == email.lower(),
|
|
450
|
+
)
|
|
271
451
|
)
|
|
272
|
-
|
|
273
|
-
|
|
452
|
+
.first()
|
|
453
|
+
)
|
|
454
|
+
|
|
274
455
|
if not existing:
|
|
275
|
-
alias = DeveloperAlias(
|
|
276
|
-
canonical_id=canonical_id,
|
|
277
|
-
name=name,
|
|
278
|
-
email=email.lower()
|
|
279
|
-
)
|
|
456
|
+
alias = DeveloperAlias(canonical_id=canonical_id, name=name, email=email.lower())
|
|
280
457
|
session.add(alias)
|
|
281
|
-
|
|
458
|
+
# Update cache with the new alias
|
|
459
|
+
cache_key = f"{email.lower()}:{name.lower()}"
|
|
460
|
+
self._cache[cache_key] = canonical_id
|
|
461
|
+
|
|
282
462
|
def _update_developer_stats(self, canonical_id: str):
|
|
283
463
|
"""Update developer statistics."""
|
|
284
464
|
with self.get_session() as session:
|
|
285
|
-
identity =
|
|
286
|
-
DeveloperIdentity
|
|
287
|
-
|
|
288
|
-
|
|
465
|
+
identity = (
|
|
466
|
+
session.query(DeveloperIdentity)
|
|
467
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
468
|
+
.first()
|
|
469
|
+
)
|
|
470
|
+
|
|
289
471
|
if identity:
|
|
290
472
|
identity.last_seen = datetime.utcnow()
|
|
291
|
-
|
|
473
|
+
|
|
292
474
|
def merge_identities(self, canonical_id1: str, canonical_id2: str):
|
|
293
475
|
"""Merge two developer identities."""
|
|
294
476
|
# First, add the alias outside of the main merge transaction
|
|
295
477
|
with self.get_session() as session:
|
|
296
|
-
identity2 =
|
|
297
|
-
DeveloperIdentity
|
|
298
|
-
|
|
478
|
+
identity2 = (
|
|
479
|
+
session.query(DeveloperIdentity)
|
|
480
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id2)
|
|
481
|
+
.first()
|
|
482
|
+
)
|
|
299
483
|
if identity2:
|
|
300
484
|
identity2_name = identity2.primary_name
|
|
301
485
|
identity2_email = identity2.primary_email
|
|
302
|
-
|
|
486
|
+
|
|
303
487
|
# Add identity2's primary as alias to identity1 first
|
|
304
488
|
self._add_alias(canonical_id1, identity2_name, identity2_email)
|
|
305
|
-
|
|
489
|
+
|
|
306
490
|
# Now do the merge in a separate transaction
|
|
307
491
|
with self.get_session() as session:
|
|
308
492
|
# Get both identities fresh
|
|
309
|
-
identity1 =
|
|
310
|
-
DeveloperIdentity
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
493
|
+
identity1 = (
|
|
494
|
+
session.query(DeveloperIdentity)
|
|
495
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id1)
|
|
496
|
+
.first()
|
|
497
|
+
)
|
|
498
|
+
identity2 = (
|
|
499
|
+
session.query(DeveloperIdentity)
|
|
500
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id2)
|
|
501
|
+
.first()
|
|
502
|
+
)
|
|
503
|
+
|
|
316
504
|
if not identity1 or not identity2:
|
|
317
505
|
raise ValueError("One or both identities not found")
|
|
318
|
-
|
|
506
|
+
|
|
319
507
|
# Keep identity1, merge identity2 into it
|
|
320
508
|
identity1.total_commits += identity2.total_commits
|
|
321
509
|
identity1.total_story_points += identity2.total_story_points
|
|
322
510
|
identity1.first_seen = min(identity1.first_seen, identity2.first_seen)
|
|
323
511
|
identity1.last_seen = max(identity1.last_seen, identity2.last_seen)
|
|
324
|
-
|
|
512
|
+
|
|
325
513
|
# Move all aliases from identity2 to identity1
|
|
326
|
-
aliases =
|
|
327
|
-
DeveloperAlias
|
|
328
|
-
|
|
329
|
-
|
|
514
|
+
aliases = (
|
|
515
|
+
session.query(DeveloperAlias)
|
|
516
|
+
.filter(DeveloperAlias.canonical_id == canonical_id2)
|
|
517
|
+
.all()
|
|
518
|
+
)
|
|
519
|
+
|
|
330
520
|
for alias in aliases:
|
|
331
521
|
alias.canonical_id = canonical_id1
|
|
332
|
-
|
|
522
|
+
|
|
333
523
|
# Delete identity2
|
|
334
524
|
session.delete(identity2)
|
|
335
|
-
|
|
525
|
+
|
|
336
526
|
# Clear cache to force reload
|
|
337
527
|
self._cache.clear()
|
|
338
528
|
self._load_cache()
|
|
339
|
-
|
|
340
|
-
def get_developer_stats(
|
|
341
|
-
|
|
529
|
+
|
|
530
|
+
def get_developer_stats(
|
|
531
|
+
self, ticket_coverage: Optional[dict[str, float]] = None
|
|
532
|
+
) -> list[dict[str, Any]]:
|
|
533
|
+
"""
|
|
534
|
+
Get statistics for all developers.
|
|
535
|
+
|
|
536
|
+
WHY: This method returns the authoritative developer information for reports,
|
|
537
|
+
including display names that have been updated through manual mappings.
|
|
538
|
+
It ensures that report generators get the correct canonical display names.
|
|
539
|
+
|
|
540
|
+
DESIGN DECISION: Accepts optional ticket_coverage parameter to replace the
|
|
541
|
+
previously hardcoded 0.0 ticket coverage values. This enables accurate
|
|
542
|
+
per-developer ticket coverage reporting that matches overall metrics.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
ticket_coverage: Optional dict mapping canonical_id to coverage percentage
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
List of developer statistics with accurate ticket coverage data
|
|
549
|
+
"""
|
|
342
550
|
stats = []
|
|
343
|
-
|
|
551
|
+
|
|
552
|
+
if not self._database_available:
|
|
553
|
+
# Handle in-memory fallback
|
|
554
|
+
for canonical_id, identity_data in self._in_memory_identities.items():
|
|
555
|
+
# Get actual ticket coverage if provided, otherwise default to 0.0
|
|
556
|
+
coverage_pct = 0.0
|
|
557
|
+
if ticket_coverage:
|
|
558
|
+
coverage_pct = ticket_coverage.get(canonical_id, 0.0)
|
|
559
|
+
|
|
560
|
+
stats.append(
|
|
561
|
+
{
|
|
562
|
+
"canonical_id": canonical_id,
|
|
563
|
+
"primary_name": identity_data["primary_name"],
|
|
564
|
+
"primary_email": identity_data["primary_email"],
|
|
565
|
+
"github_username": identity_data.get("github_username"),
|
|
566
|
+
"total_commits": identity_data.get("total_commits", 0),
|
|
567
|
+
"total_story_points": identity_data.get("total_story_points", 0),
|
|
568
|
+
"alias_count": 0, # Not tracked in memory
|
|
569
|
+
"first_seen": None,
|
|
570
|
+
"last_seen": None,
|
|
571
|
+
"ticket_coverage_pct": coverage_pct,
|
|
572
|
+
}
|
|
573
|
+
)
|
|
574
|
+
return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
|
|
575
|
+
|
|
344
576
|
with self.get_session() as session:
|
|
345
577
|
identities = session.query(DeveloperIdentity).all()
|
|
346
|
-
|
|
578
|
+
|
|
347
579
|
for identity in identities:
|
|
348
580
|
# Count aliases
|
|
349
|
-
alias_count =
|
|
350
|
-
DeveloperAlias
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
581
|
+
alias_count = (
|
|
582
|
+
session.query(DeveloperAlias)
|
|
583
|
+
.filter(DeveloperAlias.canonical_id == identity.canonical_id)
|
|
584
|
+
.count()
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
# Get actual ticket coverage if provided, otherwise default to 0.0
|
|
588
|
+
coverage_pct = 0.0
|
|
589
|
+
if ticket_coverage:
|
|
590
|
+
coverage_pct = ticket_coverage.get(identity.canonical_id, 0.0)
|
|
591
|
+
|
|
592
|
+
stats.append(
|
|
593
|
+
{
|
|
594
|
+
"canonical_id": identity.canonical_id,
|
|
595
|
+
"primary_name": identity.primary_name,
|
|
596
|
+
"primary_email": identity.primary_email,
|
|
597
|
+
"github_username": identity.github_username,
|
|
598
|
+
"total_commits": identity.total_commits,
|
|
599
|
+
"total_story_points": identity.total_story_points,
|
|
600
|
+
"alias_count": alias_count,
|
|
601
|
+
"first_seen": identity.first_seen,
|
|
602
|
+
"last_seen": identity.last_seen,
|
|
603
|
+
"ticket_coverage_pct": coverage_pct,
|
|
604
|
+
}
|
|
605
|
+
)
|
|
606
|
+
|
|
365
607
|
# Sort by total commits
|
|
366
|
-
return sorted(stats, key=lambda x: x[
|
|
367
|
-
|
|
368
|
-
def update_commit_stats(self, commits:
|
|
608
|
+
return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
|
|
609
|
+
|
|
610
|
+
def update_commit_stats(self, commits: list[dict[str, Any]]):
|
|
369
611
|
"""Update developer statistics based on commits."""
|
|
370
612
|
# Aggregate stats by canonical ID
|
|
371
|
-
stats_by_dev = defaultdict(lambda: {
|
|
372
|
-
|
|
613
|
+
stats_by_dev = defaultdict(lambda: {"commits": 0, "story_points": 0})
|
|
614
|
+
|
|
373
615
|
for commit in commits:
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
commit
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
616
|
+
# Debug: check if commit is actually a dictionary
|
|
617
|
+
if not isinstance(commit, dict):
|
|
618
|
+
print(f"Error: Expected commit to be dict, got {type(commit)}: {commit}")
|
|
619
|
+
continue
|
|
620
|
+
|
|
621
|
+
canonical_id = self.resolve_developer(commit["author_name"], commit["author_email"])
|
|
622
|
+
# Update the commit with the resolved canonical_id for later use in reports
|
|
623
|
+
commit["canonical_id"] = canonical_id
|
|
624
|
+
|
|
625
|
+
stats_by_dev[canonical_id]["commits"] += 1
|
|
626
|
+
stats_by_dev[canonical_id]["story_points"] += commit.get("story_points", 0) or 0
|
|
627
|
+
|
|
382
628
|
# Update database
|
|
383
629
|
with self.get_session() as session:
|
|
384
630
|
for canonical_id, stats in stats_by_dev.items():
|
|
385
|
-
identity =
|
|
386
|
-
DeveloperIdentity
|
|
387
|
-
|
|
388
|
-
|
|
631
|
+
identity = (
|
|
632
|
+
session.query(DeveloperIdentity)
|
|
633
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
634
|
+
.first()
|
|
635
|
+
)
|
|
636
|
+
|
|
389
637
|
if identity:
|
|
390
|
-
identity.total_commits += stats[
|
|
391
|
-
identity.total_story_points += stats[
|
|
638
|
+
identity.total_commits += stats["commits"]
|
|
639
|
+
identity.total_story_points += stats["story_points"]
|
|
392
640
|
identity.last_seen = datetime.utcnow()
|
|
393
|
-
|
|
641
|
+
|
|
394
642
|
# Apply manual mappings after all identities are created
|
|
395
643
|
if self.manual_mappings:
|
|
396
644
|
self.apply_manual_mappings()
|
|
397
|
-
|
|
645
|
+
|
|
398
646
|
def apply_manual_mappings(self):
|
|
399
647
|
"""Apply manual mappings - can be called explicitly after identities are created."""
|
|
400
648
|
if self.manual_mappings:
|
|
401
|
-
self._apply_manual_mappings(self.manual_mappings)
|
|
649
|
+
self._apply_manual_mappings(self.manual_mappings)
|
|
650
|
+
|
|
651
|
+
def get_canonical_name(self, canonical_id: str) -> str:
|
|
652
|
+
"""
|
|
653
|
+
Get the canonical display name for a given canonical ID.
|
|
654
|
+
|
|
655
|
+
WHY: Reports need to show the proper display name from manual mappings
|
|
656
|
+
instead of the original commit author name. This method provides the
|
|
657
|
+
authoritative display name for any canonical ID.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
canonical_id: The canonical ID to get the display name for
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
The display name that should be used in reports, or "Unknown" if not found
|
|
664
|
+
"""
|
|
665
|
+
if not self._database_available:
|
|
666
|
+
# Check in-memory storage first
|
|
667
|
+
if canonical_id in self._in_memory_identities:
|
|
668
|
+
return self._in_memory_identities[canonical_id]["primary_name"]
|
|
669
|
+
# Check cache
|
|
670
|
+
if canonical_id in self._cache:
|
|
671
|
+
cache_entry = self._cache[canonical_id]
|
|
672
|
+
if isinstance(cache_entry, dict):
|
|
673
|
+
return cache_entry.get("primary_name", "Unknown")
|
|
674
|
+
return "Unknown"
|
|
675
|
+
|
|
676
|
+
with self.get_session() as session:
|
|
677
|
+
identity = (
|
|
678
|
+
session.query(DeveloperIdentity)
|
|
679
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
680
|
+
.first()
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
if identity:
|
|
684
|
+
return identity.primary_name
|
|
685
|
+
|
|
686
|
+
return "Unknown"
|
|
687
|
+
|
|
688
|
+
def _apply_manual_mappings_to_memory(self) -> None:
|
|
689
|
+
"""
|
|
690
|
+
Apply manual mappings to in-memory storage when database is not available.
|
|
691
|
+
|
|
692
|
+
WHY: When persistence fails, we still need to apply user-configured
|
|
693
|
+
identity mappings for the current analysis session.
|
|
694
|
+
"""
|
|
695
|
+
if not self.manual_mappings:
|
|
696
|
+
return
|
|
697
|
+
|
|
698
|
+
for mapping in self.manual_mappings:
|
|
699
|
+
# Support both canonical_email and primary_email for backward compatibility
|
|
700
|
+
canonical_email = (
|
|
701
|
+
(mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
|
|
702
|
+
.lower()
|
|
703
|
+
.strip()
|
|
704
|
+
)
|
|
705
|
+
aliases = mapping.get("aliases", [])
|
|
706
|
+
preferred_name = mapping.get("name") # Optional display name
|
|
707
|
+
|
|
708
|
+
if not canonical_email or not aliases:
|
|
709
|
+
continue
|
|
710
|
+
|
|
711
|
+
# Create canonical identity in memory
|
|
712
|
+
canonical_id = str(uuid.uuid4())
|
|
713
|
+
self._in_memory_identities[canonical_id] = {
|
|
714
|
+
"primary_name": preferred_name or canonical_email.split("@")[0],
|
|
715
|
+
"primary_email": canonical_email,
|
|
716
|
+
"github_username": None,
|
|
717
|
+
"total_commits": 0,
|
|
718
|
+
"total_story_points": 0,
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
# Add to cache
|
|
722
|
+
self._cache[canonical_id] = self._in_memory_identities[canonical_id]
|
|
723
|
+
|
|
724
|
+
# Process aliases
|
|
725
|
+
for alias_email in aliases:
|
|
726
|
+
alias_email = alias_email.lower().strip()
|
|
727
|
+
alias_key = f"{alias_email}:{preferred_name or canonical_email.split('@')[0]}"
|
|
728
|
+
self._in_memory_aliases[alias_key] = canonical_id
|
|
729
|
+
self._cache[alias_key] = canonical_id
|
|
730
|
+
|
|
731
|
+
logger.debug(
|
|
732
|
+
f"Applied in-memory mapping: {preferred_name or canonical_email.split('@')[0]} "
|
|
733
|
+
f"with {len(aliases)} aliases"
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
def _fallback_identity_resolution(self, name: str, email: str) -> str:
|
|
737
|
+
"""
|
|
738
|
+
Fallback identity resolution when database is not available.
|
|
739
|
+
|
|
740
|
+
WHY: Even without persistence, we need consistent identity resolution
|
|
741
|
+
within a single analysis session to avoid duplicate developer entries.
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
name: Developer name
|
|
745
|
+
email: Developer email
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
Canonical ID for the developer
|
|
749
|
+
"""
|
|
750
|
+
# Normalize inputs
|
|
751
|
+
name = name.strip()
|
|
752
|
+
email = email.lower().strip()
|
|
753
|
+
cache_key = f"{email}:{name.lower()}"
|
|
754
|
+
|
|
755
|
+
# Check if already resolved
|
|
756
|
+
if cache_key in self._cache:
|
|
757
|
+
return self._cache[cache_key]
|
|
758
|
+
|
|
759
|
+
# Check in-memory aliases
|
|
760
|
+
if cache_key in self._in_memory_aliases:
|
|
761
|
+
canonical_id = self._in_memory_aliases[cache_key]
|
|
762
|
+
self._cache[cache_key] = canonical_id
|
|
763
|
+
return canonical_id
|
|
764
|
+
|
|
765
|
+
# Check for email match in existing identities
|
|
766
|
+
for canonical_id, identity in self._in_memory_identities.items():
|
|
767
|
+
if identity["primary_email"] == email:
|
|
768
|
+
# Add this name variant to cache
|
|
769
|
+
self._cache[cache_key] = canonical_id
|
|
770
|
+
return canonical_id
|
|
771
|
+
|
|
772
|
+
# Create new identity
|
|
773
|
+
canonical_id = str(uuid.uuid4())
|
|
774
|
+
self._in_memory_identities[canonical_id] = {
|
|
775
|
+
"primary_name": name,
|
|
776
|
+
"primary_email": email,
|
|
777
|
+
"github_username": None,
|
|
778
|
+
"total_commits": 0,
|
|
779
|
+
"total_story_points": 0,
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
# Add to cache
|
|
783
|
+
self._cache[canonical_id] = self._in_memory_identities[canonical_id]
|
|
784
|
+
self._cache[cache_key] = canonical_id
|
|
785
|
+
|
|
786
|
+
logger.debug(f"Created in-memory identity for {name} <{email}>")
|
|
787
|
+
return canonical_id
|