gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,20 @@
1
1
  """Developer identity resolution with persistence."""
2
2
 
3
3
  import difflib
4
+ import logging
4
5
  import uuid
5
6
  from collections import defaultdict
6
7
  from contextlib import contextmanager
7
- from datetime import datetime
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
8
10
  from typing import Any, Optional
9
11
 
10
12
  from sqlalchemy import and_
11
13
 
12
14
  from ..models.database import Database, DeveloperAlias, DeveloperIdentity
13
15
 
16
+ logger = logging.getLogger(__name__)
17
+
14
18
 
15
19
  class DeveloperIdentityResolver:
16
20
  """Resolve and normalize developer identities across repositories."""
@@ -21,18 +25,110 @@ class DeveloperIdentityResolver:
21
25
  similarity_threshold: float = 0.85,
22
26
  manual_mappings: Optional[list[dict[str, Any]]] = None,
23
27
  ) -> None:
24
- """Initialize with database for persistence."""
28
+ """
29
+ Initialize with database for persistence.
30
+
31
+ WHY: This initializer handles database connection issues gracefully,
32
+ allowing the system to continue functioning even when persistence fails.
33
+
34
+ Args:
35
+ db_path: Path to the SQLite database file
36
+ similarity_threshold: Threshold for fuzzy matching (0.0-1.0)
37
+ manual_mappings: Optional manual identity mappings from configuration
38
+ """
25
39
  self.similarity_threshold = similarity_threshold
26
- self.db = Database(db_path)
40
+ self.db_path = Path(db_path) # Convert string to Path
27
41
  self._cache: dict[str, str] = {} # In-memory cache for performance
28
- self._load_cache()
42
+
43
+ # Initialize database with error handling
44
+ try:
45
+ self.db = Database(self.db_path)
46
+ self._database_available = True
47
+
48
+ # Warn user if using fallback database
49
+ if self.db.is_readonly_fallback:
50
+ logger.warning(
51
+ "Using temporary database for identity resolution. "
52
+ "Identity mappings will not persist between runs. "
53
+ f"Check permissions on: {db_path}"
54
+ )
55
+
56
+ # Load existing data from database
57
+ self._load_cache()
58
+
59
+ except Exception as e:
60
+ logger.error(
61
+ f"Failed to initialize identity database at {db_path}: {e}. "
62
+ "Identity resolution will work but mappings won't persist."
63
+ )
64
+ self._database_available = False
65
+ self.db = None
29
66
 
30
67
  # Store manual mappings to apply later
31
68
  self.manual_mappings = manual_mappings
32
69
 
70
+ # When database is not available, we need in-memory fallback storage
71
+ if not self._database_available:
72
+ logger.info(
73
+ "Database unavailable, using in-memory identity resolution. "
74
+ "Identity mappings will not persist between runs."
75
+ )
76
+ self._in_memory_identities: dict[str, dict[str, Any]] = {}
77
+ self._in_memory_aliases: dict[str, str] = {}
78
+
79
+ # Apply manual mappings to in-memory storage if provided
80
+ if self.manual_mappings:
81
+ self._apply_manual_mappings_to_memory()
82
+ else:
83
+ # Apply manual mappings to database if provided
84
+ if self.manual_mappings:
85
+ self._apply_manual_mappings(self.manual_mappings)
86
+
33
87
  @contextmanager
34
88
  def get_session(self):
35
- """Get database session context manager."""
89
+ """
90
+ Get database session context manager with fallback handling.
91
+
92
+ WHY: When database is not available, we need to provide a no-op
93
+ context manager that allows the code to continue without failing.
94
+ """
95
+ if not self._database_available or not self.db:
96
+ # No-op context manager when database is not available
97
+ class NoOpSession:
98
+ def query(self, *args, **kwargs):
99
+ return NoOpQuery()
100
+
101
+ def add(self, *args, **kwargs):
102
+ pass
103
+
104
+ def delete(self, *args, **kwargs):
105
+ pass
106
+
107
+ def commit(self):
108
+ pass
109
+
110
+ def rollback(self):
111
+ pass
112
+
113
+ def expire_all(self):
114
+ pass
115
+
116
+ class NoOpQuery:
117
+ def filter(self, *args, **kwargs):
118
+ return self
119
+
120
+ def first(self):
121
+ return None
122
+
123
+ def all(self):
124
+ return []
125
+
126
+ def count(self):
127
+ return 0
128
+
129
+ yield NoOpSession()
130
+ return
131
+
36
132
  session = self.db.get_session()
37
133
  try:
38
134
  yield session
@@ -44,7 +140,16 @@ class DeveloperIdentityResolver:
44
140
  session.close()
45
141
 
46
142
  def _load_cache(self) -> None:
47
- """Load identities into memory cache."""
143
+ """
144
+ Load identities into memory cache.
145
+
146
+ WHY: When database is not available, we start with an empty cache
147
+ and rely on in-memory identity resolution for the current session.
148
+ """
149
+ if not self._database_available:
150
+ logger.debug("Database not available, starting with empty identity cache")
151
+ return
152
+
48
153
  with self.get_session() as session:
49
154
  # Load all identities
50
155
  identities = session.query(DeveloperIdentity).all()
@@ -63,19 +168,30 @@ class DeveloperIdentityResolver:
63
168
 
64
169
  def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
65
170
  """Apply manual identity mappings from configuration."""
171
+ # Handle database unavailable scenario
172
+ if not self._database_available:
173
+ self._apply_manual_mappings_to_memory()
174
+ return
175
+
66
176
  # Clear cache to ensure we get fresh data
67
177
  self._cache.clear()
68
178
  self._load_cache()
69
179
 
70
180
  with self.get_session() as session:
71
181
  for mapping in manual_mappings:
72
- canonical_email = mapping.get("canonical_email", "").lower().strip()
182
+ # Support both canonical_email and primary_email for backward compatibility
183
+ canonical_email = (
184
+ (mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
185
+ .lower()
186
+ .strip()
187
+ )
73
188
  aliases = mapping.get("aliases", [])
189
+ preferred_name = mapping.get("name") # Optional display name
74
190
 
75
191
  if not canonical_email or not aliases:
76
192
  continue
77
193
 
78
- # Find the canonical identity
194
+ # Find or create the canonical identity
79
195
  canonical_identity = (
80
196
  session.query(DeveloperIdentity)
81
197
  .filter(DeveloperIdentity.primary_email == canonical_email)
@@ -83,9 +199,29 @@ class DeveloperIdentityResolver:
83
199
  )
84
200
 
85
201
  if not canonical_identity:
86
- # Skip if canonical identity doesn't exist yet
87
- print(f"Warning: Canonical identity not found for email: {canonical_email}")
88
- continue
202
+ # Create the canonical identity if it doesn't exist
203
+ canonical_id = str(uuid.uuid4())
204
+ canonical_identity = DeveloperIdentity(
205
+ canonical_id=canonical_id,
206
+ primary_name=preferred_name or canonical_email.split("@")[0],
207
+ primary_email=canonical_email,
208
+ first_seen=datetime.now(timezone.utc),
209
+ last_seen=datetime.now(timezone.utc),
210
+ total_commits=0,
211
+ total_story_points=0,
212
+ )
213
+ session.add(canonical_identity)
214
+ session.commit()
215
+ print(
216
+ f"Created canonical identity: {canonical_identity.primary_name} ({canonical_email})"
217
+ )
218
+
219
+ # Update the preferred name if provided
220
+ if preferred_name and preferred_name != canonical_identity.primary_name:
221
+ print(
222
+ f"Updating display name: {canonical_identity.primary_name} → {preferred_name}"
223
+ )
224
+ canonical_identity.primary_name = preferred_name
89
225
 
90
226
  # Process each alias
91
227
  for alias_email in aliases:
@@ -153,7 +289,16 @@ class DeveloperIdentityResolver:
153
289
  def resolve_developer(
154
290
  self, name: str, email: str, github_username: Optional[str] = None
155
291
  ) -> str:
156
- """Resolve developer identity and return canonical ID."""
292
+ """
293
+ Resolve developer identity and return canonical ID.
294
+
295
+ WHY: This method handles both database-backed and in-memory identity resolution,
296
+ allowing the system to function even when persistence is not available.
297
+ """
298
+ # Use fallback resolution when database is not available
299
+ if not self._database_available:
300
+ return self._fallback_identity_resolution(name, email)
301
+
157
302
  # Normalize inputs
158
303
  name = name.strip()
159
304
  email = email.lower().strip()
@@ -164,6 +309,7 @@ class DeveloperIdentityResolver:
164
309
  canonical_id = self._cache[cache_key]
165
310
  # Update stats
166
311
  self._update_developer_stats(canonical_id)
312
+ logger.debug(f"Resolved {name} <{email}> from cache to {canonical_id}")
167
313
  return canonical_id
168
314
 
169
315
  # Check exact email match in database
@@ -172,8 +318,14 @@ class DeveloperIdentityResolver:
172
318
  alias = session.query(DeveloperAlias).filter(DeveloperAlias.email == email).first()
173
319
 
174
320
  if alias:
321
+ # Found an alias with this email - add this name variant to cache and DB
175
322
  self._cache[cache_key] = alias.canonical_id
176
323
  self._update_developer_stats(alias.canonical_id)
324
+ logger.debug(f"Found alias for {email}, resolving {name} to {alias.canonical_id}")
325
+ # Add this name variant as an alias if it's different
326
+ if alias.name.lower() != name.lower():
327
+ logger.debug(f"Adding name variant '{name}' as alias for {email}")
328
+ self._add_alias(alias.canonical_id, name, email)
177
329
  return alias.canonical_id
178
330
 
179
331
  # Check primary identities
@@ -200,6 +352,7 @@ class DeveloperIdentityResolver:
200
352
  return canonical_id
201
353
 
202
354
  # Create new identity
355
+ logger.info(f"Creating new identity for {name} <{email}> - no matches found")
203
356
  canonical_id = self._create_identity(name, email, github_username)
204
357
  self._cache[cache_key] = canonical_id
205
358
  return canonical_id
@@ -302,6 +455,9 @@ class DeveloperIdentityResolver:
302
455
  if not existing:
303
456
  alias = DeveloperAlias(canonical_id=canonical_id, name=name, email=email.lower())
304
457
  session.add(alias)
458
+ # Update cache with the new alias
459
+ cache_key = f"{email.lower()}:{name.lower()}"
460
+ self._cache[cache_key] = canonical_id
305
461
 
306
462
  def _update_developer_stats(self, canonical_id: str):
307
463
  """Update developer statistics."""
@@ -371,10 +527,52 @@ class DeveloperIdentityResolver:
371
527
  self._cache.clear()
372
528
  self._load_cache()
373
529
 
374
- def get_developer_stats(self) -> list[dict[str, Any]]:
375
- """Get statistics for all developers."""
530
+ def get_developer_stats(
531
+ self, ticket_coverage: Optional[dict[str, float]] = None
532
+ ) -> list[dict[str, Any]]:
533
+ """
534
+ Get statistics for all developers.
535
+
536
+ WHY: This method returns the authoritative developer information for reports,
537
+ including display names that have been updated through manual mappings.
538
+ It ensures that report generators get the correct canonical display names.
539
+
540
+ DESIGN DECISION: Accepts optional ticket_coverage parameter to replace the
541
+ previously hardcoded 0.0 ticket coverage values. This enables accurate
542
+ per-developer ticket coverage reporting that matches overall metrics.
543
+
544
+ Args:
545
+ ticket_coverage: Optional dict mapping canonical_id to coverage percentage
546
+
547
+ Returns:
548
+ List of developer statistics with accurate ticket coverage data
549
+ """
376
550
  stats = []
377
551
 
552
+ if not self._database_available:
553
+ # Handle in-memory fallback
554
+ for canonical_id, identity_data in self._in_memory_identities.items():
555
+ # Get actual ticket coverage if provided, otherwise default to 0.0
556
+ coverage_pct = 0.0
557
+ if ticket_coverage:
558
+ coverage_pct = ticket_coverage.get(canonical_id, 0.0)
559
+
560
+ stats.append(
561
+ {
562
+ "canonical_id": canonical_id,
563
+ "primary_name": identity_data["primary_name"],
564
+ "primary_email": identity_data["primary_email"],
565
+ "github_username": identity_data.get("github_username"),
566
+ "total_commits": identity_data.get("total_commits", 0),
567
+ "total_story_points": identity_data.get("total_story_points", 0),
568
+ "alias_count": 0, # Not tracked in memory
569
+ "first_seen": None,
570
+ "last_seen": None,
571
+ "ticket_coverage_pct": coverage_pct,
572
+ }
573
+ )
574
+ return sorted(stats, key=lambda x: x["total_commits"], reverse=True)
575
+
378
576
  with self.get_session() as session:
379
577
  identities = session.query(DeveloperIdentity).all()
380
578
 
@@ -386,6 +584,11 @@ class DeveloperIdentityResolver:
386
584
  .count()
387
585
  )
388
586
 
587
+ # Get actual ticket coverage if provided, otherwise default to 0.0
588
+ coverage_pct = 0.0
589
+ if ticket_coverage:
590
+ coverage_pct = ticket_coverage.get(identity.canonical_id, 0.0)
591
+
389
592
  stats.append(
390
593
  {
391
594
  "canonical_id": identity.canonical_id,
@@ -397,6 +600,7 @@ class DeveloperIdentityResolver:
397
600
  "alias_count": alias_count,
398
601
  "first_seen": identity.first_seen,
399
602
  "last_seen": identity.last_seen,
603
+ "ticket_coverage_pct": coverage_pct,
400
604
  }
401
605
  )
402
606
 
@@ -409,7 +613,14 @@ class DeveloperIdentityResolver:
409
613
  stats_by_dev = defaultdict(lambda: {"commits": 0, "story_points": 0})
410
614
 
411
615
  for commit in commits:
616
+ # Debug: check if commit is actually a dictionary
617
+ if not isinstance(commit, dict):
618
+ print(f"Error: Expected commit to be dict, got {type(commit)}: {commit}")
619
+ continue
620
+
412
621
  canonical_id = self.resolve_developer(commit["author_name"], commit["author_email"])
622
+ # Update the commit with the resolved canonical_id for later use in reports
623
+ commit["canonical_id"] = canonical_id
413
624
 
414
625
  stats_by_dev[canonical_id]["commits"] += 1
415
626
  stats_by_dev[canonical_id]["story_points"] += commit.get("story_points", 0) or 0
@@ -436,3 +647,141 @@ class DeveloperIdentityResolver:
436
647
  """Apply manual mappings - can be called explicitly after identities are created."""
437
648
  if self.manual_mappings:
438
649
  self._apply_manual_mappings(self.manual_mappings)
650
+
651
+ def get_canonical_name(self, canonical_id: str) -> str:
652
+ """
653
+ Get the canonical display name for a given canonical ID.
654
+
655
+ WHY: Reports need to show the proper display name from manual mappings
656
+ instead of the original commit author name. This method provides the
657
+ authoritative display name for any canonical ID.
658
+
659
+ Args:
660
+ canonical_id: The canonical ID to get the display name for
661
+
662
+ Returns:
663
+ The display name that should be used in reports, or "Unknown" if not found
664
+ """
665
+ if not self._database_available:
666
+ # Check in-memory storage first
667
+ if canonical_id in self._in_memory_identities:
668
+ return self._in_memory_identities[canonical_id]["primary_name"]
669
+ # Check cache
670
+ if canonical_id in self._cache:
671
+ cache_entry = self._cache[canonical_id]
672
+ if isinstance(cache_entry, dict):
673
+ return cache_entry.get("primary_name", "Unknown")
674
+ return "Unknown"
675
+
676
+ with self.get_session() as session:
677
+ identity = (
678
+ session.query(DeveloperIdentity)
679
+ .filter(DeveloperIdentity.canonical_id == canonical_id)
680
+ .first()
681
+ )
682
+
683
+ if identity:
684
+ return identity.primary_name
685
+
686
+ return "Unknown"
687
+
688
+ def _apply_manual_mappings_to_memory(self) -> None:
689
+ """
690
+ Apply manual mappings to in-memory storage when database is not available.
691
+
692
+ WHY: When persistence fails, we still need to apply user-configured
693
+ identity mappings for the current analysis session.
694
+ """
695
+ if not self.manual_mappings:
696
+ return
697
+
698
+ for mapping in self.manual_mappings:
699
+ # Support both canonical_email and primary_email for backward compatibility
700
+ canonical_email = (
701
+ (mapping.get("primary_email", "") or mapping.get("canonical_email", ""))
702
+ .lower()
703
+ .strip()
704
+ )
705
+ aliases = mapping.get("aliases", [])
706
+ preferred_name = mapping.get("name") # Optional display name
707
+
708
+ if not canonical_email or not aliases:
709
+ continue
710
+
711
+ # Create canonical identity in memory
712
+ canonical_id = str(uuid.uuid4())
713
+ self._in_memory_identities[canonical_id] = {
714
+ "primary_name": preferred_name or canonical_email.split("@")[0],
715
+ "primary_email": canonical_email,
716
+ "github_username": None,
717
+ "total_commits": 0,
718
+ "total_story_points": 0,
719
+ }
720
+
721
+ # Add to cache
722
+ self._cache[canonical_id] = self._in_memory_identities[canonical_id]
723
+
724
+ # Process aliases
725
+ for alias_email in aliases:
726
+ alias_email = alias_email.lower().strip()
727
+ alias_key = f"{alias_email}:{preferred_name or canonical_email.split('@')[0]}"
728
+ self._in_memory_aliases[alias_key] = canonical_id
729
+ self._cache[alias_key] = canonical_id
730
+
731
+ logger.debug(
732
+ f"Applied in-memory mapping: {preferred_name or canonical_email.split('@')[0]} "
733
+ f"with {len(aliases)} aliases"
734
+ )
735
+
736
+ def _fallback_identity_resolution(self, name: str, email: str) -> str:
737
+ """
738
+ Fallback identity resolution when database is not available.
739
+
740
+ WHY: Even without persistence, we need consistent identity resolution
741
+ within a single analysis session to avoid duplicate developer entries.
742
+
743
+ Args:
744
+ name: Developer name
745
+ email: Developer email
746
+
747
+ Returns:
748
+ Canonical ID for the developer
749
+ """
750
+ # Normalize inputs
751
+ name = name.strip()
752
+ email = email.lower().strip()
753
+ cache_key = f"{email}:{name.lower()}"
754
+
755
+ # Check if already resolved
756
+ if cache_key in self._cache:
757
+ return self._cache[cache_key]
758
+
759
+ # Check in-memory aliases
760
+ if cache_key in self._in_memory_aliases:
761
+ canonical_id = self._in_memory_aliases[cache_key]
762
+ self._cache[cache_key] = canonical_id
763
+ return canonical_id
764
+
765
+ # Check for email match in existing identities
766
+ for canonical_id, identity in self._in_memory_identities.items():
767
+ if identity["primary_email"] == email:
768
+ # Add this name variant to cache
769
+ self._cache[cache_key] = canonical_id
770
+ return canonical_id
771
+
772
+ # Create new identity
773
+ canonical_id = str(uuid.uuid4())
774
+ self._in_memory_identities[canonical_id] = {
775
+ "primary_name": name,
776
+ "primary_email": email,
777
+ "github_username": None,
778
+ "total_commits": 0,
779
+ "total_story_points": 0,
780
+ }
781
+
782
+ # Add to cache
783
+ self._cache[canonical_id] = self._in_memory_identities[canonical_id]
784
+ self._cache[cache_key] = canonical_id
785
+
786
+ logger.debug(f"Created in-memory identity for {name} <{email}>")
787
+ return canonical_id