gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -1,33 +1,134 @@
1
1
  """Developer identity resolution with persistence."""
2
+
2
3
  import difflib
4
+ import logging
3
5
  import uuid
4
6
  from collections import defaultdict
5
7
  from contextlib import contextmanager
6
- from datetime import datetime
7
- from typing import Any, Dict, List, Optional, Tuple
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any, Optional
8
11
 
9
12
  from sqlalchemy import and_
10
13
 
11
14
  from ..models.database import Database, DeveloperAlias, DeveloperIdentity
12
15
 
16
+ logger = logging.getLogger(__name__)
17
+
13
18
 
14
19
  class DeveloperIdentityResolver:
15
20
  """Resolve and normalize developer identities across repositories."""
16
-
17
- def __init__(self, db_path, similarity_threshold: float = 0.85,
18
- manual_mappings: Optional[List[Dict[str, Any]]] = None):
19
- """Initialize with database for persistence."""
21
+
22
+ def __init__(
23
+ self,
24
+ db_path: str,
25
+ similarity_threshold: float = 0.85,
26
+ manual_mappings: Optional[list[dict[str, Any]]] = None,
27
+ ) -> None:
28
+ """
29
+ Initialize with database for persistence.
30
+
31
+ WHY: This initializer handles database connection issues gracefully,
32
+ allowing the system to continue functioning even when persistence fails.
33
+
34
+ Args:
35
+ db_path: Path to the SQLite database file
36
+ similarity_threshold: Threshold for fuzzy matching (0.0-1.0)
37
+ manual_mappings: Optional manual identity mappings from configuration
38
+ """
20
39
  self.similarity_threshold = similarity_threshold
21
- self.db = Database(db_path)
22
- self._cache = {} # In-memory cache for performance
23
- self._load_cache()
24
-
40
+ self.db_path = Path(db_path) # Convert string to Path
41
+ self._cache: dict[str, str] = {} # In-memory cache for performance
42
+
43
+ # Initialize database with error handling
44
+ try:
45
+ self.db = Database(self.db_path)
46
+ self._database_available = True
47
+
48
+ # Warn user if using fallback database
49
+ if self.db.is_readonly_fallback:
50
+ logger.warning(
51
+ "Using temporary database for identity resolution. "
52
+ "Identity mappings will not persist between runs. "
53
+ f"Check permissions on: {db_path}"
54
+ )
55
+
56
+ # Load existing data from database
57
+ self._load_cache()
58
+
59
+ except Exception as e:
60
+ logger.error(
61
+ f"Failed to initialize identity database at {db_path}: {e}. "
62
+ "Identity resolution will work but mappings won't persist."
63
+ )
64
+ self._database_available = False
65
+ self.db = None
66
+
25
67
  # Store manual mappings to apply later
26
68
  self.manual_mappings = manual_mappings
27
-
69
+
70
+ # When database is not available, we need in-memory fallback storage
71
+ if not self._database_available:
72
+ logger.info(
73
+ "Database unavailable, using in-memory identity resolution. "
74
+ "Identity mappings will not persist between runs."
75
+ )
76
+ self._in_memory_identities: dict[str, dict[str, Any]] = {}
77
+ self._in_memory_aliases: dict[str, str] = {}
78
+
79
+ # Apply manual mappings to in-memory storage if provided
80
+ if self.manual_mappings:
81
+ self._apply_manual_mappings_to_memory()
82
+ else:
83
+ # Apply manual mappings to database if provided
84
+ if self.manual_mappings:
85
+ self._apply_manual_mappings(self.manual_mappings)
86
+
28
87
  @contextmanager
29
88
  def get_session(self):
30
- """Get database session context manager."""
89
+ """
90
+ Get database session context manager with fallback handling.
91
+
92
+ WHY: When database is not available, we need to provide a no-op
93
+ context manager that allows the code to continue without failing.
94
+ """
95
+ if not self._database_available or not self.db:
96
+ # No-op context manager when database is not available
97
+ class NoOpSession:
98
+ def query(self, *args, **kwargs):
99
+ return NoOpQuery()
100
+
101
+ def add(self, *args, **kwargs):
102
+ pass
103
+
104
+ def delete(self, *args, **kwargs):
105
+ pass
106
+
107
+ def commit(self):
108
+ pass
109
+
110
+ def rollback(self):
111
+ pass
112
+
113
+ def expire_all(self):
114
+ pass
115
+
116
+ class NoOpQuery:
117
+ def filter(self, *args, **kwargs):
118
+ return self
119
+
120
+ def first(self):
121
+ return None
122
+
123
+ def all(self):
124
+ return []
125
+
126
+ def count(self):
127
+ return 0
128
+
129
+ yield NoOpSession()
130
+ return
131
+
31
132
  session = self.db.get_session()
32
133
  try:
33
134
  yield session
@@ -37,209 +138,285 @@ class DeveloperIdentityResolver:
37
138
  raise
38
139
  finally:
39
140
  session.close()
40
-
41
- def _load_cache(self):
42
- """Load identities into memory cache."""
141
+
142
+ def _load_cache(self) -> None:
143
+ """
144
+ Load identities into memory cache.
145
+
146
+ WHY: When database is not available, we start with an empty cache
147
+ and rely on in-memory identity resolution for the current session.
148
+ """
149
+ if not self._database_available:
150
+ logger.debug("Database not available, starting with empty identity cache")
151
+ return
152
+
43
153
  with self.get_session() as session:
44
154
  # Load all identities
45
155
  identities = session.query(DeveloperIdentity).all()
46
156
  for identity in identities:
47
157
  self._cache[identity.canonical_id] = {
48
- 'primary_name': identity.primary_name,
49
- 'primary_email': identity.primary_email,
50
- 'github_username': identity.github_username
158
+ "primary_name": identity.primary_name,
159
+ "primary_email": identity.primary_email,
160
+ "github_username": identity.github_username,
51
161
  }
52
-
162
+
53
163
  # Load all aliases
54
164
  aliases = session.query(DeveloperAlias).all()
55
165
  for alias in aliases:
56
166
  key = f"{alias.email.lower()}:{alias.name.lower()}"
57
167
  self._cache[key] = alias.canonical_id
58
-
59
- def _apply_manual_mappings(self, manual_mappings: List[Dict[str, Any]]):
168
+
169
+ def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
60
170
  """Apply manual identity mappings from configuration."""
171
+ # Handle database unavailable scenario
172
+ if not self._database_available:
173
+ self._apply_manual_mappings_to_memory()
174
+ return
175
+
61
176
  # Clear cache to ensure we get fresh data
62
177
  self._cache.clear()
63
178
  self._load_cache()
64
-
179
+
65
180
  with self.get_session() as session:
66
181
  for mapping in manual_mappings:
67
- canonical_email = mapping.get('canonical_email', '').lower().strip()
68
- aliases = mapping.get('aliases', [])
69
-
182
+ # Support both canonical_email and primary_email for backward compatibility
183
+ canonical_email = (
184
+ (mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
185
+ .lower()
186
+ .strip()
187
+ )
188
+ aliases = mapping.get("aliases", [])
189
+ preferred_name = mapping.get("name") # Optional display name
190
+
70
191
  if not canonical_email or not aliases:
71
192
  continue
72
-
73
- # Find the canonical identity
74
- canonical_identity = session.query(DeveloperIdentity).filter(
75
- DeveloperIdentity.primary_email == canonical_email
76
- ).first()
77
-
193
+
194
+ # Find or create the canonical identity
195
+ canonical_identity = (
196
+ session.query(DeveloperIdentity)
197
+ .filter(DeveloperIdentity.primary_email == canonical_email)
198
+ .first()
199
+ )
200
+
78
201
  if not canonical_identity:
79
- # Skip if canonical identity doesn't exist yet
80
- print(f"Warning: Canonical identity not found for email: {canonical_email}")
81
- continue
82
-
202
+ # Create the canonical identity if it doesn't exist
203
+ canonical_id = str(uuid.uuid4())
204
+ canonical_identity = DeveloperIdentity(
205
+ canonical_id=canonical_id,
206
+ primary_name=preferred_name or canonical_email.split("@")[0],
207
+ primary_email=canonical_email,
208
+ first_seen=datetime.now(timezone.utc),
209
+ last_seen=datetime.now(timezone.utc),
210
+ total_commits=0,
211
+ total_story_points=0,
212
+ )
213
+ session.add(canonical_identity)
214
+ session.commit()
215
+ print(
216
+ f"Created canonical identity: {canonical_identity.primary_name} ({canonical_email})"
217
+ )
218
+
219
+ # Update the preferred name if provided
220
+ if preferred_name and preferred_name != canonical_identity.primary_name:
221
+ print(
222
+ f"Updating display name: {canonical_identity.primary_name} → {preferred_name}"
223
+ )
224
+ canonical_identity.primary_name = preferred_name
225
+
83
226
  # Process each alias
84
227
  for alias_email in aliases:
85
228
  alias_email = alias_email.lower().strip()
86
-
229
+
87
230
  # Check if alias identity exists as a primary identity
88
- alias_identity = session.query(DeveloperIdentity).filter(
89
- DeveloperIdentity.primary_email == alias_email
90
- ).first()
91
-
231
+ alias_identity = (
232
+ session.query(DeveloperIdentity)
233
+ .filter(DeveloperIdentity.primary_email == alias_email)
234
+ .first()
235
+ )
236
+
92
237
  if alias_identity:
93
238
  if alias_identity.canonical_id != canonical_identity.canonical_id:
94
239
  # Merge the identities - commit before merge to avoid locks
95
240
  session.commit()
96
- print(f"Merging identity: {alias_identity.primary_name} ({alias_email}) into {canonical_identity.primary_name} ({canonical_email})")
97
- self.merge_identities(canonical_identity.canonical_id, alias_identity.canonical_id)
241
+ print(
242
+ f"Merging identity: {alias_identity.primary_name} ({alias_email}) into {canonical_identity.primary_name} ({canonical_email})"
243
+ )
244
+ self.merge_identities(
245
+ canonical_identity.canonical_id, alias_identity.canonical_id
246
+ )
98
247
  # Refresh session after merge
99
248
  session.expire_all()
100
249
  else:
101
250
  # Just add as an alias if not a primary identity
102
- existing_alias = session.query(DeveloperAlias).filter(
103
- and_(
104
- DeveloperAlias.email == alias_email,
105
- DeveloperAlias.canonical_id == canonical_identity.canonical_id
251
+ existing_alias = (
252
+ session.query(DeveloperAlias)
253
+ .filter(
254
+ and_(
255
+ DeveloperAlias.email == alias_email,
256
+ DeveloperAlias.canonical_id == canonical_identity.canonical_id,
257
+ )
106
258
  )
107
- ).first()
108
-
259
+ .first()
260
+ )
261
+
109
262
  if not existing_alias:
110
263
  # Get the name from any existing alias with this email
111
264
  name_for_alias = None
112
- any_alias = session.query(DeveloperAlias).filter(
113
- DeveloperAlias.email == alias_email
114
- ).first()
265
+ any_alias = (
266
+ session.query(DeveloperAlias)
267
+ .filter(DeveloperAlias.email == alias_email)
268
+ .first()
269
+ )
115
270
  if any_alias:
116
271
  name_for_alias = any_alias.name
117
272
  else:
118
273
  name_for_alias = canonical_identity.primary_name
119
-
274
+
120
275
  new_alias = DeveloperAlias(
121
276
  canonical_id=canonical_identity.canonical_id,
122
277
  name=name_for_alias,
123
- email=alias_email
278
+ email=alias_email,
124
279
  )
125
280
  session.add(new_alias)
126
- print(f"Added alias: {alias_email} for {canonical_identity.primary_name}")
127
-
281
+ print(
282
+ f"Added alias: {alias_email} for {canonical_identity.primary_name}"
283
+ )
284
+
128
285
  # Reload cache after all mappings
129
286
  self._cache.clear()
130
287
  self._load_cache()
131
-
132
- def resolve_developer(self, name: str, email: str,
133
- github_username: Optional[str] = None) -> str:
134
- """Resolve developer identity and return canonical ID."""
288
+
289
+ def resolve_developer(
290
+ self, name: str, email: str, github_username: Optional[str] = None
291
+ ) -> str:
292
+ """
293
+ Resolve developer identity and return canonical ID.
294
+
295
+ WHY: This method handles both database-backed and in-memory identity resolution,
296
+ allowing the system to function even when persistence is not available.
297
+ """
298
+ # Use fallback resolution when database is not available
299
+ if not self._database_available:
300
+ return self._fallback_identity_resolution(name, email)
301
+
135
302
  # Normalize inputs
136
303
  name = name.strip()
137
304
  email = email.lower().strip()
138
-
305
+
139
306
  # Check cache first
140
307
  cache_key = f"{email}:{name.lower()}"
141
308
  if cache_key in self._cache:
142
309
  canonical_id = self._cache[cache_key]
143
310
  # Update stats
144
311
  self._update_developer_stats(canonical_id)
312
+ logger.debug(f"Resolved {name} <{email}> from cache to {canonical_id}")
145
313
  return canonical_id
146
-
314
+
147
315
  # Check exact email match in database
148
316
  with self.get_session() as session:
149
317
  # Check aliases
150
- alias = session.query(DeveloperAlias).filter(
151
- DeveloperAlias.email == email
152
- ).first()
153
-
318
+ alias = session.query(DeveloperAlias).filter(DeveloperAlias.email == email).first()
319
+
154
320
  if alias:
321
+ # Found an alias with this email - add this name variant to cache and DB
155
322
  self._cache[cache_key] = alias.canonical_id
156
323
  self._update_developer_stats(alias.canonical_id)
324
+ logger.debug(f"Found alias for {email}, resolving {name} to {alias.canonical_id}")
325
+ # Add this name variant as an alias if it's different
326
+ if alias.name.lower() != name.lower():
327
+ logger.debug(f"Adding name variant '{name}' as alias for {email}")
328
+ self._add_alias(alias.canonical_id, name, email)
157
329
  return alias.canonical_id
158
-
330
+
159
331
  # Check primary identities
160
- identity = session.query(DeveloperIdentity).filter(
161
- DeveloperIdentity.primary_email == email
162
- ).first()
163
-
332
+ identity = (
333
+ session.query(DeveloperIdentity)
334
+ .filter(DeveloperIdentity.primary_email == email)
335
+ .first()
336
+ )
337
+
164
338
  if identity:
165
339
  # Add as alias if name is different
166
340
  if identity.primary_name.lower() != name.lower():
167
341
  self._add_alias(identity.canonical_id, name, email)
168
342
  self._cache[cache_key] = identity.canonical_id
169
343
  return identity.canonical_id
170
-
344
+
171
345
  # Find similar developer
172
346
  best_match = self._find_best_match(name, email)
173
-
347
+
174
348
  if best_match and best_match[1] >= self.similarity_threshold:
175
349
  canonical_id = best_match[0]
176
350
  self._add_alias(canonical_id, name, email)
177
351
  self._cache[cache_key] = canonical_id
178
352
  return canonical_id
179
-
353
+
180
354
  # Create new identity
355
+ logger.info(f"Creating new identity for {name} <{email}> - no matches found")
181
356
  canonical_id = self._create_identity(name, email, github_username)
182
357
  self._cache[cache_key] = canonical_id
183
358
  return canonical_id
184
-
185
- def _find_best_match(self, name: str, email: str) -> Optional[Tuple[str, float]]:
359
+
360
+ def _find_best_match(self, name: str, email: str) -> Optional[tuple[str, float]]:
186
361
  """Find the best matching existing developer."""
187
362
  best_score = 0.0
188
363
  best_canonical_id = None
189
-
364
+
190
365
  name_lower = name.lower().strip()
191
- email_domain = email.split('@')[1] if '@' in email else ''
192
-
366
+ email_domain = email.split("@")[1] if "@" in email else ""
367
+
193
368
  with self.get_session() as session:
194
369
  # Get all identities for comparison
195
370
  identities = session.query(DeveloperIdentity).all()
196
-
371
+
197
372
  for identity in identities:
198
373
  score = 0.0
199
-
374
+
200
375
  # Name similarity (40% weight)
201
376
  name_sim = difflib.SequenceMatcher(
202
377
  None, name_lower, identity.primary_name.lower()
203
378
  ).ratio()
204
379
  score += name_sim * 0.4
205
-
380
+
206
381
  # Email domain similarity (30% weight)
207
- identity_domain = (identity.primary_email.split('@')[1]
208
- if '@' in identity.primary_email else '')
382
+ identity_domain = (
383
+ identity.primary_email.split("@")[1] if "@" in identity.primary_email else ""
384
+ )
209
385
  if email_domain and email_domain == identity_domain:
210
386
  score += 0.3
211
-
387
+
212
388
  # Check aliases (30% weight)
213
- aliases = session.query(DeveloperAlias).filter(
214
- DeveloperAlias.canonical_id == identity.canonical_id
215
- ).all()
216
-
389
+ aliases = (
390
+ session.query(DeveloperAlias)
391
+ .filter(DeveloperAlias.canonical_id == identity.canonical_id)
392
+ .all()
393
+ )
394
+
217
395
  best_alias_score = 0.0
218
396
  for alias in aliases:
219
397
  alias_name_sim = difflib.SequenceMatcher(
220
398
  None, name_lower, alias.name.lower()
221
399
  ).ratio()
222
-
400
+
223
401
  # Bonus for same email domain in aliases
224
- alias_domain = alias.email.split('@')[1] if '@' in alias.email else ''
402
+ alias_domain = alias.email.split("@")[1] if "@" in alias.email else ""
225
403
  domain_bonus = 0.2 if alias_domain == email_domain else 0.0
226
-
404
+
227
405
  alias_score = alias_name_sim + domain_bonus
228
406
  best_alias_score = max(best_alias_score, alias_score)
229
-
407
+
230
408
  score += min(best_alias_score * 0.3, 0.3)
231
-
409
+
232
410
  if score > best_score:
233
411
  best_score = score
234
412
  best_canonical_id = identity.canonical_id
235
-
413
+
236
414
  return (best_canonical_id, best_score) if best_canonical_id else None
237
-
238
- def _create_identity(self, name: str, email: str,
239
- github_username: Optional[str] = None) -> str:
415
+
416
+ def _create_identity(self, name: str, email: str, github_username: Optional[str] = None) -> str:
240
417
  """Create new developer identity."""
241
418
  canonical_id = str(uuid.uuid4())
242
-
419
+
243
420
  with self.get_session() as session:
244
421
  identity = DeveloperIdentity(
245
422
  canonical_id=canonical_id,
@@ -247,155 +424,364 @@ class DeveloperIdentityResolver:
247
424
  primary_email=email,
248
425
  github_username=github_username,
249
426
  total_commits=0,
250
- total_story_points=0
427
+ total_story_points=0,
251
428
  )
252
429
  session.add(identity)
253
-
430
+
254
431
  # Update cache
255
432
  self._cache[canonical_id] = {
256
- 'primary_name': name,
257
- 'primary_email': email,
258
- 'github_username': github_username
433
+ "primary_name": name,
434
+ "primary_email": email,
435
+ "github_username": github_username,
259
436
  }
260
-
437
+
261
438
  return canonical_id
262
-
439
+
263
440
  def _add_alias(self, canonical_id: str, name: str, email: str):
264
441
  """Add alias for existing developer."""
265
442
  with self.get_session() as session:
266
443
  # Check if alias already exists
267
- existing = session.query(DeveloperAlias).filter(
268
- and_(
269
- DeveloperAlias.canonical_id == canonical_id,
270
- DeveloperAlias.email == email.lower()
444
+ existing = (
445
+ session.query(DeveloperAlias)
446
+ .filter(
447
+ and_(
448
+ DeveloperAlias.canonical_id == canonical_id,
449
+ DeveloperAlias.email == email.lower(),
450
+ )
271
451
  )
272
- ).first()
273
-
452
+ .first()
453
+ )
454
+
274
455
  if not existing:
275
- alias = DeveloperAlias(
276
- canonical_id=canonical_id,
277
- name=name,
278
- email=email.lower()
279
- )
456
+ alias = DeveloperAlias(canonical_id=canonical_id, name=name, email=email.lower())
280
457
  session.add(alias)
281
-
458
+ # Update cache with the new alias
459
+ cache_key = f"{email.lower()}:{name.lower()}"
460
+ self._cache[cache_key] = canonical_id
461
+
282
462
  def _update_developer_stats(self, canonical_id: str):
283
463
  """Update developer statistics."""
284
464
  with self.get_session() as session:
285
- identity = session.query(DeveloperIdentity).filter(
286
- DeveloperIdentity.canonical_id == canonical_id
287
- ).first()
288
-
465
+ identity = (
466
+ session.query(DeveloperIdentity)
467
+ .filter(DeveloperIdentity.canonical_id == canonical_id)
468
+ .first()
469
+ )
470
+
289
471
  if identity:
290
472
  identity.last_seen = datetime.utcnow()
291
-
473
+
292
474
  def merge_identities(self, canonical_id1: str, canonical_id2: str):
293
475
  """Merge two developer identities."""
294
476
  # First, add the alias outside of the main merge transaction
295
477
  with self.get_session() as session:
296
- identity2 = session.query(DeveloperIdentity).filter(
297
- DeveloperIdentity.canonical_id == canonical_id2
298
- ).first()
478
+ identity2 = (
479
+ session.query(DeveloperIdentity)
480
+ .filter(DeveloperIdentity.canonical_id == canonical_id2)
481
+ .first()
482
+ )
299
483
  if identity2:
300
484
  identity2_name = identity2.primary_name
301
485
  identity2_email = identity2.primary_email
302
-
486
+
303
487
  # Add identity2's primary as alias to identity1 first
304
488
  self._add_alias(canonical_id1, identity2_name, identity2_email)
305
-
489
+
306
490
  # Now do the merge in a separate transaction
307
491
  with self.get_session() as session:
308
492
  # Get both identities fresh
309
- identity1 = session.query(DeveloperIdentity).filter(
310
- DeveloperIdentity.canonical_id == canonical_id1
311
- ).first()
312
- identity2 = session.query(DeveloperIdentity).filter(
313
- DeveloperIdentity.canonical_id == canonical_id2
314
- ).first()
315
-
493
+ identity1 = (
494
+ session.query(DeveloperIdentity)
495
+ .filter(DeveloperIdentity.canonical_id == canonical_id1)
496
+ .first()
497
+ )
498
+ identity2 = (
499
+ session.query(DeveloperIdentity)
500
+ .filter(DeveloperIdentity.canonical_id == canonical_id2)
501
+ .first()
502
+ )
503
+
316
504
  if not identity1 or not identity2:
317
505
  raise ValueError("One or both identities not found")
318
-
506
+
319
507
  # Keep identity1, merge identity2 into it
320
508
  identity1.total_commits += identity2.total_commits
321
509
  identity1.total_story_points += identity2.total_story_points
322
510
  identity1.first_seen = min(identity1.first_seen, identity2.first_seen)
323
511
  identity1.last_seen = max(identity1.last_seen, identity2.last_seen)
324
-
512
+
325
513
  # Move all aliases from identity2 to identity1
326
- aliases = session.query(DeveloperAlias).filter(
327
- DeveloperAlias.canonical_id == canonical_id2
328
- ).all()
329
-
514
+ aliases = (
515
+ session.query(DeveloperAlias)
516
+ .filter(DeveloperAlias.canonical_id == canonical_id2)
517
+ .all()
518
+ )
519
+
330
520
  for alias in aliases:
331
521
  alias.canonical_id = canonical_id1
332
-
522
+
333
523
  # Delete identity2
334
524
  session.delete(identity2)
335
-
525
+
336
526
  # Clear cache to force reload
337
527
  self._cache.clear()
338
528
  self._load_cache()
339
-
340
- def get_developer_stats(self) -> List[Dict[str, Any]]:
341
- """Get statistics for all developers."""
529
+
530
+ def get_developer_stats(
531
+ self, ticket_coverage: Optional[dict[str, float]] = None
532
+ ) -> list[dict[str, Any]]:
533
+ """
534
+ Get statistics for all developers.
535
+
536
+ WHY: This method returns the authoritative developer information for reports,
537
+ including display names that have been updated through manual mappings.
538
+ It ensures that report generators get the correct canonical display names.
539
+
540
+ DESIGN DECISION: Accepts optional ticket_coverage parameter to replace the
541
+ previously hardcoded 0.0 ticket coverage values. This enables accurate
542
+ per-developer ticket coverage reporting that matches overall metrics.
543
+
544
+ Args:
545
+ ticket_coverage: Optional dict mapping canonical_id to coverage percentage
546
+
547
+ Returns:
548
+ List of developer statistics with accurate ticket coverage data
549
+ """
342
550
  stats = []
343
-
551
+
552
+ if not self._database_available:
553
+ # Handle in-memory fallback
554
+ for canonical_id, identity_data in self._in_memory_identities.items():
555
+ # Get actual ticket coverage if provided, otherwise default to 0.0
556
+ coverage_pct = 0.0
557
+ if ticket_coverage:
558
+ coverage_pct = ticket_coverage.get(canonical_id, 0.0)
559
+
560
+ stats.append(
561
+ {
562
+ "canonical_id": canonical_id,
563
+ "primary_name": identity_data["primary_name"],
564
+ "primary_email": identity_data["primary_email"],
565
+ "github_username": identity_data.get("github_username"),
566
+ "total_commits": identity_data.get("total_commits", 0),
567
+ "total_story_points": identity_data.get("total_story_points", 0),
568
+ "alias_count": 0, # Not tracked in memory
569
+ "first_seen": None,
570
+ "last_seen": None,
571
+ "ticket_coverage_pct": coverage_pct,
572
+ }
573
+ )
574
+ return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
575
+
344
576
  with self.get_session() as session:
345
577
  identities = session.query(DeveloperIdentity).all()
346
-
578
+
347
579
  for identity in identities:
348
580
  # Count aliases
349
- alias_count = session.query(DeveloperAlias).filter(
350
- DeveloperAlias.canonical_id == identity.canonical_id
351
- ).count()
352
-
353
- stats.append({
354
- 'canonical_id': identity.canonical_id,
355
- 'primary_name': identity.primary_name,
356
- 'primary_email': identity.primary_email,
357
- 'github_username': identity.github_username,
358
- 'total_commits': identity.total_commits,
359
- 'total_story_points': identity.total_story_points,
360
- 'alias_count': alias_count,
361
- 'first_seen': identity.first_seen,
362
- 'last_seen': identity.last_seen
363
- })
364
-
581
+ alias_count = (
582
+ session.query(DeveloperAlias)
583
+ .filter(DeveloperAlias.canonical_id == identity.canonical_id)
584
+ .count()
585
+ )
586
+
587
+ # Get actual ticket coverage if provided, otherwise default to 0.0
588
+ coverage_pct = 0.0
589
+ if ticket_coverage:
590
+ coverage_pct = ticket_coverage.get(identity.canonical_id, 0.0)
591
+
592
+ stats.append(
593
+ {
594
+ "canonical_id": identity.canonical_id,
595
+ "primary_name": identity.primary_name,
596
+ "primary_email": identity.primary_email,
597
+ "github_username": identity.github_username,
598
+ "total_commits": identity.total_commits,
599
+ "total_story_points": identity.total_story_points,
600
+ "alias_count": alias_count,
601
+ "first_seen": identity.first_seen,
602
+ "last_seen": identity.last_seen,
603
+ "ticket_coverage_pct": coverage_pct,
604
+ }
605
+ )
606
+
365
607
  # Sort by total commits
366
- return sorted(stats, key=lambda x: x['total_commits'], reverse=True)
367
-
368
- def update_commit_stats(self, commits: List[Dict[str, Any]]):
608
+ return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
609
+
610
+ def update_commit_stats(self, commits: list[dict[str, Any]]):
369
611
  """Update developer statistics based on commits."""
370
612
  # Aggregate stats by canonical ID
371
- stats_by_dev = defaultdict(lambda: {'commits': 0, 'story_points': 0})
372
-
613
+ stats_by_dev = defaultdict(lambda: {"commits": 0, "story_points": 0})
614
+
373
615
  for commit in commits:
374
- canonical_id = self.resolve_developer(
375
- commit['author_name'],
376
- commit['author_email']
377
- )
378
-
379
- stats_by_dev[canonical_id]['commits'] += 1
380
- stats_by_dev[canonical_id]['story_points'] += commit.get('story_points', 0) or 0
381
-
616
+ # Debug: check if commit is actually a dictionary
617
+ if not isinstance(commit, dict):
618
+ print(f"Error: Expected commit to be dict, got {type(commit)}: {commit}")
619
+ continue
620
+
621
+ canonical_id = self.resolve_developer(commit["author_name"], commit["author_email"])
622
+ # Update the commit with the resolved canonical_id for later use in reports
623
+ commit["canonical_id"] = canonical_id
624
+
625
+ stats_by_dev[canonical_id]["commits"] += 1
626
+ stats_by_dev[canonical_id]["story_points"] += commit.get("story_points", 0) or 0
627
+
382
628
  # Update database
383
629
  with self.get_session() as session:
384
630
  for canonical_id, stats in stats_by_dev.items():
385
- identity = session.query(DeveloperIdentity).filter(
386
- DeveloperIdentity.canonical_id == canonical_id
387
- ).first()
388
-
631
+ identity = (
632
+ session.query(DeveloperIdentity)
633
+ .filter(DeveloperIdentity.canonical_id == canonical_id)
634
+ .first()
635
+ )
636
+
389
637
  if identity:
390
- identity.total_commits += stats['commits']
391
- identity.total_story_points += stats['story_points']
638
+ identity.total_commits += stats["commits"]
639
+ identity.total_story_points += stats["story_points"]
392
640
  identity.last_seen = datetime.utcnow()
393
-
641
+
394
642
  # Apply manual mappings after all identities are created
395
643
  if self.manual_mappings:
396
644
  self.apply_manual_mappings()
397
-
645
+
398
646
  def apply_manual_mappings(self):
399
647
  """Apply manual mappings - can be called explicitly after identities are created."""
400
648
  if self.manual_mappings:
401
- self._apply_manual_mappings(self.manual_mappings)
649
+ self._apply_manual_mappings(self.manual_mappings)
650
+
651
+ def get_canonical_name(self, canonical_id: str) -> str:
652
+ """
653
+ Get the canonical display name for a given canonical ID.
654
+
655
+ WHY: Reports need to show the proper display name from manual mappings
656
+ instead of the original commit author name. This method provides the
657
+ authoritative display name for any canonical ID.
658
+
659
+ Args:
660
+ canonical_id: The canonical ID to get the display name for
661
+
662
+ Returns:
663
+ The display name that should be used in reports, or "Unknown" if not found
664
+ """
665
+ if not self._database_available:
666
+ # Check in-memory storage first
667
+ if canonical_id in self._in_memory_identities:
668
+ return self._in_memory_identities[canonical_id]["primary_name"]
669
+ # Check cache
670
+ if canonical_id in self._cache:
671
+ cache_entry = self._cache[canonical_id]
672
+ if isinstance(cache_entry, dict):
673
+ return cache_entry.get("primary_name", "Unknown")
674
+ return "Unknown"
675
+
676
+ with self.get_session() as session:
677
+ identity = (
678
+ session.query(DeveloperIdentity)
679
+ .filter(DeveloperIdentity.canonical_id == canonical_id)
680
+ .first()
681
+ )
682
+
683
+ if identity:
684
+ return identity.primary_name
685
+
686
+ return "Unknown"
687
+
688
+ def _apply_manual_mappings_to_memory(self) -> None:
689
+ """
690
+ Apply manual mappings to in-memory storage when database is not available.
691
+
692
+ WHY: When persistence fails, we still need to apply user-configured
693
+ identity mappings for the current analysis session.
694
+ """
695
+ if not self.manual_mappings:
696
+ return
697
+
698
+ for mapping in self.manual_mappings:
699
+ # Support both canonical_email and primary_email for backward compatibility
700
+ canonical_email = (
701
+ (mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
702
+ .lower()
703
+ .strip()
704
+ )
705
+ aliases = mapping.get("aliases", [])
706
+ preferred_name = mapping.get("name") # Optional display name
707
+
708
+ if not canonical_email or not aliases:
709
+ continue
710
+
711
+ # Create canonical identity in memory
712
+ canonical_id = str(uuid.uuid4())
713
+ self._in_memory_identities[canonical_id] = {
714
+ "primary_name": preferred_name or canonical_email.split("@")[0],
715
+ "primary_email": canonical_email,
716
+ "github_username": None,
717
+ "total_commits": 0,
718
+ "total_story_points": 0,
719
+ }
720
+
721
+ # Add to cache
722
+ self._cache[canonical_id] = self._in_memory_identities[canonical_id]
723
+
724
+ # Process aliases
725
+ for alias_email in aliases:
726
+ alias_email = alias_email.lower().strip()
727
+ alias_key = f"{alias_email}:{preferred_name or canonical_email.split('@')[0]}"
728
+ self._in_memory_aliases[alias_key] = canonical_id
729
+ self._cache[alias_key] = canonical_id
730
+
731
+ logger.debug(
732
+ f"Applied in-memory mapping: {preferred_name or canonical_email.split('@')[0]} "
733
+ f"with {len(aliases)} aliases"
734
+ )
735
+
736
+ def _fallback_identity_resolution(self, name: str, email: str) -> str:
737
+ """
738
+ Fallback identity resolution when database is not available.
739
+
740
+ WHY: Even without persistence, we need consistent identity resolution
741
+ within a single analysis session to avoid duplicate developer entries.
742
+
743
+ Args:
744
+ name: Developer name
745
+ email: Developer email
746
+
747
+ Returns:
748
+ Canonical ID for the developer
749
+ """
750
+ # Normalize inputs
751
+ name = name.strip()
752
+ email = email.lower().strip()
753
+ cache_key = f"{email}:{name.lower()}"
754
+
755
+ # Check if already resolved
756
+ if cache_key in self._cache:
757
+ return self._cache[cache_key]
758
+
759
+ # Check in-memory aliases
760
+ if cache_key in self._in_memory_aliases:
761
+ canonical_id = self._in_memory_aliases[cache_key]
762
+ self._cache[cache_key] = canonical_id
763
+ return canonical_id
764
+
765
+ # Check for email match in existing identities
766
+ for canonical_id, identity in self._in_memory_identities.items():
767
+ if identity["primary_email"] == email:
768
+ # Add this name variant to cache
769
+ self._cache[cache_key] = canonical_id
770
+ return canonical_id
771
+
772
+ # Create new identity
773
+ canonical_id = str(uuid.uuid4())
774
+ self._in_memory_identities[canonical_id] = {
775
+ "primary_name": name,
776
+ "primary_email": email,
777
+ "github_username": None,
778
+ "total_commits": 0,
779
+ "total_story_points": 0,
780
+ }
781
+
782
+ # Add to cache
783
+ self._cache[canonical_id] = self._in_memory_identities[canonical_id]
784
+ self._cache[cache_key] = canonical_id
785
+
786
+ logger.debug(f"Created in-memory identity for {name} <{email}>")
787
+ return canonical_id