gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
"""Developer identity resolution with persistence."""
|
|
2
2
|
|
|
3
3
|
import difflib
|
|
4
|
+
import logging
|
|
4
5
|
import uuid
|
|
5
6
|
from collections import defaultdict
|
|
6
7
|
from contextlib import contextmanager
|
|
7
|
-
from datetime import datetime
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
8
10
|
from typing import Any, Optional
|
|
9
11
|
|
|
10
12
|
from sqlalchemy import and_
|
|
11
13
|
|
|
12
14
|
from ..models.database import Database, DeveloperAlias, DeveloperIdentity
|
|
13
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
14
18
|
|
|
15
19
|
class DeveloperIdentityResolver:
|
|
16
20
|
"""Resolve and normalize developer identities across repositories."""
|
|
@@ -21,18 +25,110 @@ class DeveloperIdentityResolver:
|
|
|
21
25
|
similarity_threshold: float = 0.85,
|
|
22
26
|
manual_mappings: Optional[list[dict[str, Any]]] = None,
|
|
23
27
|
) -> None:
|
|
24
|
-
"""
|
|
28
|
+
"""
|
|
29
|
+
Initialize with database for persistence.
|
|
30
|
+
|
|
31
|
+
WHY: This initializer handles database connection issues gracefully,
|
|
32
|
+
allowing the system to continue functioning even when persistence fails.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
db_path: Path to the SQLite database file
|
|
36
|
+
similarity_threshold: Threshold for fuzzy matching (0.0-1.0)
|
|
37
|
+
manual_mappings: Optional manual identity mappings from configuration
|
|
38
|
+
"""
|
|
25
39
|
self.similarity_threshold = similarity_threshold
|
|
26
|
-
self.
|
|
40
|
+
self.db_path = Path(db_path) # Convert string to Path
|
|
27
41
|
self._cache: dict[str, str] = {} # In-memory cache for performance
|
|
28
|
-
|
|
42
|
+
|
|
43
|
+
# Initialize database with error handling
|
|
44
|
+
try:
|
|
45
|
+
self.db = Database(self.db_path)
|
|
46
|
+
self._database_available = True
|
|
47
|
+
|
|
48
|
+
# Warn user if using fallback database
|
|
49
|
+
if self.db.is_readonly_fallback:
|
|
50
|
+
logger.warning(
|
|
51
|
+
"Using temporary database for identity resolution. "
|
|
52
|
+
"Identity mappings will not persist between runs. "
|
|
53
|
+
f"Check permissions on: {db_path}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Load existing data from database
|
|
57
|
+
self._load_cache()
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.error(
|
|
61
|
+
f"Failed to initialize identity database at {db_path}: {e}. "
|
|
62
|
+
"Identity resolution will work but mappings won't persist."
|
|
63
|
+
)
|
|
64
|
+
self._database_available = False
|
|
65
|
+
self.db = None
|
|
29
66
|
|
|
30
67
|
# Store manual mappings to apply later
|
|
31
68
|
self.manual_mappings = manual_mappings
|
|
32
69
|
|
|
70
|
+
# When database is not available, we need in-memory fallback storage
|
|
71
|
+
if not self._database_available:
|
|
72
|
+
logger.info(
|
|
73
|
+
"Database unavailable, using in-memory identity resolution. "
|
|
74
|
+
"Identity mappings will not persist between runs."
|
|
75
|
+
)
|
|
76
|
+
self._in_memory_identities: dict[str, dict[str, Any]] = {}
|
|
77
|
+
self._in_memory_aliases: dict[str, str] = {}
|
|
78
|
+
|
|
79
|
+
# Apply manual mappings to in-memory storage if provided
|
|
80
|
+
if self.manual_mappings:
|
|
81
|
+
self._apply_manual_mappings_to_memory()
|
|
82
|
+
else:
|
|
83
|
+
# Apply manual mappings to database if provided
|
|
84
|
+
if self.manual_mappings:
|
|
85
|
+
self._apply_manual_mappings(self.manual_mappings)
|
|
86
|
+
|
|
33
87
|
@contextmanager
|
|
34
88
|
def get_session(self):
|
|
35
|
-
"""
|
|
89
|
+
"""
|
|
90
|
+
Get database session context manager with fallback handling.
|
|
91
|
+
|
|
92
|
+
WHY: When database is not available, we need to provide a no-op
|
|
93
|
+
context manager that allows the code to continue without failing.
|
|
94
|
+
"""
|
|
95
|
+
if not self._database_available or not self.db:
|
|
96
|
+
# No-op context manager when database is not available
|
|
97
|
+
class NoOpSession:
|
|
98
|
+
def query(self, *args, **kwargs):
|
|
99
|
+
return NoOpQuery()
|
|
100
|
+
|
|
101
|
+
def add(self, *args, **kwargs):
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def delete(self, *args, **kwargs):
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def commit(self):
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
def rollback(self):
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
def expire_all(self):
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
class NoOpQuery:
|
|
117
|
+
def filter(self, *args, **kwargs):
|
|
118
|
+
return self
|
|
119
|
+
|
|
120
|
+
def first(self):
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
def all(self):
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
def count(self):
|
|
127
|
+
return 0
|
|
128
|
+
|
|
129
|
+
yield NoOpSession()
|
|
130
|
+
return
|
|
131
|
+
|
|
36
132
|
session = self.db.get_session()
|
|
37
133
|
try:
|
|
38
134
|
yield session
|
|
@@ -44,7 +140,16 @@ class DeveloperIdentityResolver:
|
|
|
44
140
|
session.close()
|
|
45
141
|
|
|
46
142
|
def _load_cache(self) -> None:
|
|
47
|
-
"""
|
|
143
|
+
"""
|
|
144
|
+
Load identities into memory cache.
|
|
145
|
+
|
|
146
|
+
WHY: When database is not available, we start with an empty cache
|
|
147
|
+
and rely on in-memory identity resolution for the current session.
|
|
148
|
+
"""
|
|
149
|
+
if not self._database_available:
|
|
150
|
+
logger.debug("Database not available, starting with empty identity cache")
|
|
151
|
+
return
|
|
152
|
+
|
|
48
153
|
with self.get_session() as session:
|
|
49
154
|
# Load all identities
|
|
50
155
|
identities = session.query(DeveloperIdentity).all()
|
|
@@ -63,19 +168,30 @@ class DeveloperIdentityResolver:
|
|
|
63
168
|
|
|
64
169
|
def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
|
|
65
170
|
"""Apply manual identity mappings from configuration."""
|
|
171
|
+
# Handle database unavailable scenario
|
|
172
|
+
if not self._database_available:
|
|
173
|
+
self._apply_manual_mappings_to_memory()
|
|
174
|
+
return
|
|
175
|
+
|
|
66
176
|
# Clear cache to ensure we get fresh data
|
|
67
177
|
self._cache.clear()
|
|
68
178
|
self._load_cache()
|
|
69
179
|
|
|
70
180
|
with self.get_session() as session:
|
|
71
181
|
for mapping in manual_mappings:
|
|
72
|
-
|
|
182
|
+
# Support both canonical_email and primary_email for backward compatibility
|
|
183
|
+
canonical_email = (
|
|
184
|
+
(mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
|
|
185
|
+
.lower()
|
|
186
|
+
.strip()
|
|
187
|
+
)
|
|
73
188
|
aliases = mapping.get("aliases", [])
|
|
189
|
+
preferred_name = mapping.get("name") # Optional display name
|
|
74
190
|
|
|
75
191
|
if not canonical_email or not aliases:
|
|
76
192
|
continue
|
|
77
193
|
|
|
78
|
-
# Find the canonical identity
|
|
194
|
+
# Find or create the canonical identity
|
|
79
195
|
canonical_identity = (
|
|
80
196
|
session.query(DeveloperIdentity)
|
|
81
197
|
.filter(DeveloperIdentity.primary_email == canonical_email)
|
|
@@ -83,9 +199,29 @@ class DeveloperIdentityResolver:
|
|
|
83
199
|
)
|
|
84
200
|
|
|
85
201
|
if not canonical_identity:
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
202
|
+
# Create the canonical identity if it doesn't exist
|
|
203
|
+
canonical_id = str(uuid.uuid4())
|
|
204
|
+
canonical_identity = DeveloperIdentity(
|
|
205
|
+
canonical_id=canonical_id,
|
|
206
|
+
primary_name=preferred_name or canonical_email.split("@")[0],
|
|
207
|
+
primary_email=canonical_email,
|
|
208
|
+
first_seen=datetime.now(timezone.utc),
|
|
209
|
+
last_seen=datetime.now(timezone.utc),
|
|
210
|
+
total_commits=0,
|
|
211
|
+
total_story_points=0,
|
|
212
|
+
)
|
|
213
|
+
session.add(canonical_identity)
|
|
214
|
+
session.commit()
|
|
215
|
+
print(
|
|
216
|
+
f"Created canonical identity: {canonical_identity.primary_name} ({canonical_email})"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Update the preferred name if provided
|
|
220
|
+
if preferred_name and preferred_name != canonical_identity.primary_name:
|
|
221
|
+
print(
|
|
222
|
+
f"Updating display name: {canonical_identity.primary_name} → {preferred_name}"
|
|
223
|
+
)
|
|
224
|
+
canonical_identity.primary_name = preferred_name
|
|
89
225
|
|
|
90
226
|
# Process each alias
|
|
91
227
|
for alias_email in aliases:
|
|
@@ -153,7 +289,16 @@ class DeveloperIdentityResolver:
|
|
|
153
289
|
def resolve_developer(
|
|
154
290
|
self, name: str, email: str, github_username: Optional[str] = None
|
|
155
291
|
) -> str:
|
|
156
|
-
"""
|
|
292
|
+
"""
|
|
293
|
+
Resolve developer identity and return canonical ID.
|
|
294
|
+
|
|
295
|
+
WHY: This method handles both database-backed and in-memory identity resolution,
|
|
296
|
+
allowing the system to function even when persistence is not available.
|
|
297
|
+
"""
|
|
298
|
+
# Use fallback resolution when database is not available
|
|
299
|
+
if not self._database_available:
|
|
300
|
+
return self._fallback_identity_resolution(name, email)
|
|
301
|
+
|
|
157
302
|
# Normalize inputs
|
|
158
303
|
name = name.strip()
|
|
159
304
|
email = email.lower().strip()
|
|
@@ -164,6 +309,7 @@ class DeveloperIdentityResolver:
|
|
|
164
309
|
canonical_id = self._cache[cache_key]
|
|
165
310
|
# Update stats
|
|
166
311
|
self._update_developer_stats(canonical_id)
|
|
312
|
+
logger.debug(f"Resolved {name} <{email}> from cache to {canonical_id}")
|
|
167
313
|
return canonical_id
|
|
168
314
|
|
|
169
315
|
# Check exact email match in database
|
|
@@ -172,8 +318,14 @@ class DeveloperIdentityResolver:
|
|
|
172
318
|
alias = session.query(DeveloperAlias).filter(DeveloperAlias.email == email).first()
|
|
173
319
|
|
|
174
320
|
if alias:
|
|
321
|
+
# Found an alias with this email - add this name variant to cache and DB
|
|
175
322
|
self._cache[cache_key] = alias.canonical_id
|
|
176
323
|
self._update_developer_stats(alias.canonical_id)
|
|
324
|
+
logger.debug(f"Found alias for {email}, resolving {name} to {alias.canonical_id}")
|
|
325
|
+
# Add this name variant as an alias if it's different
|
|
326
|
+
if alias.name.lower() != name.lower():
|
|
327
|
+
logger.debug(f"Adding name variant '{name}' as alias for {email}")
|
|
328
|
+
self._add_alias(alias.canonical_id, name, email)
|
|
177
329
|
return alias.canonical_id
|
|
178
330
|
|
|
179
331
|
# Check primary identities
|
|
@@ -200,6 +352,7 @@ class DeveloperIdentityResolver:
|
|
|
200
352
|
return canonical_id
|
|
201
353
|
|
|
202
354
|
# Create new identity
|
|
355
|
+
logger.info(f"Creating new identity for {name} <{email}> - no matches found")
|
|
203
356
|
canonical_id = self._create_identity(name, email, github_username)
|
|
204
357
|
self._cache[cache_key] = canonical_id
|
|
205
358
|
return canonical_id
|
|
@@ -302,6 +455,9 @@ class DeveloperIdentityResolver:
|
|
|
302
455
|
if not existing:
|
|
303
456
|
alias = DeveloperAlias(canonical_id=canonical_id, name=name, email=email.lower())
|
|
304
457
|
session.add(alias)
|
|
458
|
+
# Update cache with the new alias
|
|
459
|
+
cache_key = f"{email.lower()}:{name.lower()}"
|
|
460
|
+
self._cache[cache_key] = canonical_id
|
|
305
461
|
|
|
306
462
|
def _update_developer_stats(self, canonical_id: str):
|
|
307
463
|
"""Update developer statistics."""
|
|
@@ -371,10 +527,52 @@ class DeveloperIdentityResolver:
|
|
|
371
527
|
self._cache.clear()
|
|
372
528
|
self._load_cache()
|
|
373
529
|
|
|
374
|
-
def get_developer_stats(
|
|
375
|
-
|
|
530
|
+
def get_developer_stats(
|
|
531
|
+
self, ticket_coverage: Optional[dict[str, float]] = None
|
|
532
|
+
) -> list[dict[str, Any]]:
|
|
533
|
+
"""
|
|
534
|
+
Get statistics for all developers.
|
|
535
|
+
|
|
536
|
+
WHY: This method returns the authoritative developer information for reports,
|
|
537
|
+
including display names that have been updated through manual mappings.
|
|
538
|
+
It ensures that report generators get the correct canonical display names.
|
|
539
|
+
|
|
540
|
+
DESIGN DECISION: Accepts optional ticket_coverage parameter to replace the
|
|
541
|
+
previously hardcoded 0.0 ticket coverage values. This enables accurate
|
|
542
|
+
per-developer ticket coverage reporting that matches overall metrics.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
ticket_coverage: Optional dict mapping canonical_id to coverage percentage
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
List of developer statistics with accurate ticket coverage data
|
|
549
|
+
"""
|
|
376
550
|
stats = []
|
|
377
551
|
|
|
552
|
+
if not self._database_available:
|
|
553
|
+
# Handle in-memory fallback
|
|
554
|
+
for canonical_id, identity_data in self._in_memory_identities.items():
|
|
555
|
+
# Get actual ticket coverage if provided, otherwise default to 0.0
|
|
556
|
+
coverage_pct = 0.0
|
|
557
|
+
if ticket_coverage:
|
|
558
|
+
coverage_pct = ticket_coverage.get(canonical_id, 0.0)
|
|
559
|
+
|
|
560
|
+
stats.append(
|
|
561
|
+
{
|
|
562
|
+
"canonical_id": canonical_id,
|
|
563
|
+
"primary_name": identity_data["primary_name"],
|
|
564
|
+
"primary_email": identity_data["primary_email"],
|
|
565
|
+
"github_username": identity_data.get("github_username"),
|
|
566
|
+
"total_commits": identity_data.get("total_commits", 0),
|
|
567
|
+
"total_story_points": identity_data.get("total_story_points", 0),
|
|
568
|
+
"alias_count": 0, # Not tracked in memory
|
|
569
|
+
"first_seen": None,
|
|
570
|
+
"last_seen": None,
|
|
571
|
+
"ticket_coverage_pct": coverage_pct,
|
|
572
|
+
}
|
|
573
|
+
)
|
|
574
|
+
return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
|
|
575
|
+
|
|
378
576
|
with self.get_session() as session:
|
|
379
577
|
identities = session.query(DeveloperIdentity).all()
|
|
380
578
|
|
|
@@ -386,6 +584,11 @@ class DeveloperIdentityResolver:
|
|
|
386
584
|
.count()
|
|
387
585
|
)
|
|
388
586
|
|
|
587
|
+
# Get actual ticket coverage if provided, otherwise default to 0.0
|
|
588
|
+
coverage_pct = 0.0
|
|
589
|
+
if ticket_coverage:
|
|
590
|
+
coverage_pct = ticket_coverage.get(identity.canonical_id, 0.0)
|
|
591
|
+
|
|
389
592
|
stats.append(
|
|
390
593
|
{
|
|
391
594
|
"canonical_id": identity.canonical_id,
|
|
@@ -397,6 +600,7 @@ class DeveloperIdentityResolver:
|
|
|
397
600
|
"alias_count": alias_count,
|
|
398
601
|
"first_seen": identity.first_seen,
|
|
399
602
|
"last_seen": identity.last_seen,
|
|
603
|
+
"ticket_coverage_pct": coverage_pct,
|
|
400
604
|
}
|
|
401
605
|
)
|
|
402
606
|
|
|
@@ -409,7 +613,14 @@ class DeveloperIdentityResolver:
|
|
|
409
613
|
stats_by_dev = defaultdict(lambda: {"commits": 0, "story_points": 0})
|
|
410
614
|
|
|
411
615
|
for commit in commits:
|
|
616
|
+
# Debug: check if commit is actually a dictionary
|
|
617
|
+
if not isinstance(commit, dict):
|
|
618
|
+
print(f"Error: Expected commit to be dict, got {type(commit)}: {commit}")
|
|
619
|
+
continue
|
|
620
|
+
|
|
412
621
|
canonical_id = self.resolve_developer(commit["author_name"], commit["author_email"])
|
|
622
|
+
# Update the commit with the resolved canonical_id for later use in reports
|
|
623
|
+
commit["canonical_id"] = canonical_id
|
|
413
624
|
|
|
414
625
|
stats_by_dev[canonical_id]["commits"] += 1
|
|
415
626
|
stats_by_dev[canonical_id]["story_points"] += commit.get("story_points", 0) or 0
|
|
@@ -436,3 +647,141 @@ class DeveloperIdentityResolver:
|
|
|
436
647
|
"""Apply manual mappings - can be called explicitly after identities are created."""
|
|
437
648
|
if self.manual_mappings:
|
|
438
649
|
self._apply_manual_mappings(self.manual_mappings)
|
|
650
|
+
|
|
651
|
+
def get_canonical_name(self, canonical_id: str) -> str:
|
|
652
|
+
"""
|
|
653
|
+
Get the canonical display name for a given canonical ID.
|
|
654
|
+
|
|
655
|
+
WHY: Reports need to show the proper display name from manual mappings
|
|
656
|
+
instead of the original commit author name. This method provides the
|
|
657
|
+
authoritative display name for any canonical ID.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
canonical_id: The canonical ID to get the display name for
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
The display name that should be used in reports, or "Unknown" if not found
|
|
664
|
+
"""
|
|
665
|
+
if not self._database_available:
|
|
666
|
+
# Check in-memory storage first
|
|
667
|
+
if canonical_id in self._in_memory_identities:
|
|
668
|
+
return self._in_memory_identities[canonical_id]["primary_name"]
|
|
669
|
+
# Check cache
|
|
670
|
+
if canonical_id in self._cache:
|
|
671
|
+
cache_entry = self._cache[canonical_id]
|
|
672
|
+
if isinstance(cache_entry, dict):
|
|
673
|
+
return cache_entry.get("primary_name", "Unknown")
|
|
674
|
+
return "Unknown"
|
|
675
|
+
|
|
676
|
+
with self.get_session() as session:
|
|
677
|
+
identity = (
|
|
678
|
+
session.query(DeveloperIdentity)
|
|
679
|
+
.filter(DeveloperIdentity.canonical_id == canonical_id)
|
|
680
|
+
.first()
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
if identity:
|
|
684
|
+
return identity.primary_name
|
|
685
|
+
|
|
686
|
+
return "Unknown"
|
|
687
|
+
|
|
688
|
+
def _apply_manual_mappings_to_memory(self) -> None:
|
|
689
|
+
"""
|
|
690
|
+
Apply manual mappings to in-memory storage when database is not available.
|
|
691
|
+
|
|
692
|
+
WHY: When persistence fails, we still need to apply user-configured
|
|
693
|
+
identity mappings for the current analysis session.
|
|
694
|
+
"""
|
|
695
|
+
if not self.manual_mappings:
|
|
696
|
+
return
|
|
697
|
+
|
|
698
|
+
for mapping in self.manual_mappings:
|
|
699
|
+
# Support both canonical_email and primary_email for backward compatibility
|
|
700
|
+
canonical_email = (
|
|
701
|
+
(mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
|
|
702
|
+
.lower()
|
|
703
|
+
.strip()
|
|
704
|
+
)
|
|
705
|
+
aliases = mapping.get("aliases", [])
|
|
706
|
+
preferred_name = mapping.get("name") # Optional display name
|
|
707
|
+
|
|
708
|
+
if not canonical_email or not aliases:
|
|
709
|
+
continue
|
|
710
|
+
|
|
711
|
+
# Create canonical identity in memory
|
|
712
|
+
canonical_id = str(uuid.uuid4())
|
|
713
|
+
self._in_memory_identities[canonical_id] = {
|
|
714
|
+
"primary_name": preferred_name or canonical_email.split("@")[0],
|
|
715
|
+
"primary_email": canonical_email,
|
|
716
|
+
"github_username": None,
|
|
717
|
+
"total_commits": 0,
|
|
718
|
+
"total_story_points": 0,
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
# Add to cache
|
|
722
|
+
self._cache[canonical_id] = self._in_memory_identities[canonical_id]
|
|
723
|
+
|
|
724
|
+
# Process aliases
|
|
725
|
+
for alias_email in aliases:
|
|
726
|
+
alias_email = alias_email.lower().strip()
|
|
727
|
+
alias_key = f"{alias_email}:{preferred_name or canonical_email.split('@')[0]}"
|
|
728
|
+
self._in_memory_aliases[alias_key] = canonical_id
|
|
729
|
+
self._cache[alias_key] = canonical_id
|
|
730
|
+
|
|
731
|
+
logger.debug(
|
|
732
|
+
f"Applied in-memory mapping: {preferred_name or canonical_email.split('@')[0]} "
|
|
733
|
+
f"with {len(aliases)} aliases"
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
def _fallback_identity_resolution(self, name: str, email: str) -> str:
|
|
737
|
+
"""
|
|
738
|
+
Fallback identity resolution when database is not available.
|
|
739
|
+
|
|
740
|
+
WHY: Even without persistence, we need consistent identity resolution
|
|
741
|
+
within a single analysis session to avoid duplicate developer entries.
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
name: Developer name
|
|
745
|
+
email: Developer email
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
Canonical ID for the developer
|
|
749
|
+
"""
|
|
750
|
+
# Normalize inputs
|
|
751
|
+
name = name.strip()
|
|
752
|
+
email = email.lower().strip()
|
|
753
|
+
cache_key = f"{email}:{name.lower()}"
|
|
754
|
+
|
|
755
|
+
# Check if already resolved
|
|
756
|
+
if cache_key in self._cache:
|
|
757
|
+
return self._cache[cache_key]
|
|
758
|
+
|
|
759
|
+
# Check in-memory aliases
|
|
760
|
+
if cache_key in self._in_memory_aliases:
|
|
761
|
+
canonical_id = self._in_memory_aliases[cache_key]
|
|
762
|
+
self._cache[cache_key] = canonical_id
|
|
763
|
+
return canonical_id
|
|
764
|
+
|
|
765
|
+
# Check for email match in existing identities
|
|
766
|
+
for canonical_id, identity in self._in_memory_identities.items():
|
|
767
|
+
if identity["primary_email"] == email:
|
|
768
|
+
# Add this name variant to cache
|
|
769
|
+
self._cache[cache_key] = canonical_id
|
|
770
|
+
return canonical_id
|
|
771
|
+
|
|
772
|
+
# Create new identity
|
|
773
|
+
canonical_id = str(uuid.uuid4())
|
|
774
|
+
self._in_memory_identities[canonical_id] = {
|
|
775
|
+
"primary_name": name,
|
|
776
|
+
"primary_email": email,
|
|
777
|
+
"github_username": None,
|
|
778
|
+
"total_commits": 0,
|
|
779
|
+
"total_story_points": 0,
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
# Add to cache
|
|
783
|
+
self._cache[canonical_id] = self._in_memory_identities[canonical_id]
|
|
784
|
+
self._cache[cache_key] = canonical_id
|
|
785
|
+
|
|
786
|
+
logger.debug(f"Created in-memory identity for {name} <{email}>")
|
|
787
|
+
return canonical_id
|