gitflow-analytics 3.3.0__py3-none-any.whl → 3.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/cli.py +517 -15
  3. gitflow_analytics/cli_wizards/__init__.py +10 -0
  4. gitflow_analytics/cli_wizards/install_wizard.py +1181 -0
  5. gitflow_analytics/cli_wizards/run_launcher.py +433 -0
  6. gitflow_analytics/config/__init__.py +3 -0
  7. gitflow_analytics/config/aliases.py +306 -0
  8. gitflow_analytics/config/loader.py +35 -1
  9. gitflow_analytics/config/schema.py +13 -0
  10. gitflow_analytics/constants.py +75 -0
  11. gitflow_analytics/core/cache.py +7 -3
  12. gitflow_analytics/core/data_fetcher.py +66 -30
  13. gitflow_analytics/core/git_timeout_wrapper.py +6 -4
  14. gitflow_analytics/core/progress.py +2 -4
  15. gitflow_analytics/core/subprocess_git.py +31 -5
  16. gitflow_analytics/identity_llm/analysis_pass.py +13 -3
  17. gitflow_analytics/identity_llm/analyzer.py +14 -2
  18. gitflow_analytics/identity_llm/models.py +7 -1
  19. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +5 -3
  20. gitflow_analytics/security/config.py +6 -6
  21. gitflow_analytics/security/extractors/dependency_checker.py +14 -14
  22. gitflow_analytics/security/extractors/secret_detector.py +8 -14
  23. gitflow_analytics/security/extractors/vulnerability_scanner.py +9 -9
  24. gitflow_analytics/security/llm_analyzer.py +10 -10
  25. gitflow_analytics/security/security_analyzer.py +17 -17
  26. gitflow_analytics/tui/screens/analysis_progress_screen.py +1 -1
  27. gitflow_analytics/ui/progress_display.py +36 -29
  28. gitflow_analytics/verify_activity.py +23 -26
  29. {gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/METADATA +1 -1
  30. {gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/RECORD +34 -31
  31. gitflow_analytics/security/reports/__init__.py +0 -5
  32. gitflow_analytics/security/reports/security_report.py +0 -358
  33. {gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/WHEEL +0 -0
  34. {gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/entry_points.txt +0 -0
  35. {gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/licenses/LICENSE +0 -0
  36. {gitflow_analytics-3.3.0.dist-info → gitflow_analytics-3.5.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,306 @@
1
+ """Developer identity aliases management.
2
+
3
+ This module provides functionality for managing developer identity aliases
4
+ across multiple configuration files. Aliases can be shared to maintain
5
+ consistent identity resolution across different analysis configurations.
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import Any, Optional
12
+
13
+ import yaml
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class DeveloperAlias:
20
+ """A developer alias configuration.
21
+
22
+ Represents a single developer with their primary email and all known aliases.
23
+ Supports both manual and LLM-generated alias configurations with confidence scores.
24
+ """
25
+
26
+ primary_email: str
27
+ aliases: list[str] = field(default_factory=list)
28
+ name: Optional[str] = None
29
+ confidence: float = 1.0
30
+ reasoning: str = ""
31
+
32
+ def to_dict(self) -> dict[str, Any]:
33
+ """Convert to dictionary format for YAML serialization.
34
+
35
+ Returns:
36
+ Dictionary representation with optional fields omitted if not set
37
+ """
38
+ result: dict[str, Any] = {
39
+ "primary_email": self.primary_email,
40
+ "aliases": self.aliases,
41
+ }
42
+
43
+ if self.name:
44
+ result["name"] = self.name
45
+
46
+ # Only include confidence and reasoning for LLM-generated aliases
47
+ if self.confidence < 1.0:
48
+ result["confidence"] = round(self.confidence, 2)
49
+ if self.reasoning:
50
+ result["reasoning"] = self.reasoning
51
+
52
+ return result
53
+
54
+
55
+ class AliasesManager:
56
+ """Manages developer identity aliases.
57
+
58
+ Provides functionality to load, save, and manipulate developer identity aliases.
59
+ Supports both manual aliases (confidence=1.0) and LLM-generated aliases with
60
+ confidence scores and reasoning.
61
+
62
+ Example:
63
+ >>> manager = AliasesManager(Path("aliases.yaml"))
64
+ >>> manager.load()
65
+ >>> manager.add_alias(DeveloperAlias(
66
+ ... primary_email="john@company.com",
67
+ ... aliases=["jdoe@gmail.com"],
68
+ ... name="John Doe"
69
+ ... ))
70
+ >>> manager.save()
71
+ """
72
+
73
+ def __init__(self, aliases_path: Optional[Path] = None):
74
+ """Initialize aliases manager.
75
+
76
+ Args:
77
+ aliases_path: Path to aliases.yaml file. If None, aliases must be
78
+ added programmatically or loaded from another source.
79
+ """
80
+ self.aliases_path = aliases_path
81
+ self.aliases: list[DeveloperAlias] = []
82
+
83
+ if aliases_path and aliases_path.exists():
84
+ self.load()
85
+
86
+ def load(self) -> None:
87
+ """Load aliases from file.
88
+
89
+ Loads developer aliases from the configured YAML file. If the file
90
+ doesn't exist or is empty, initializes with an empty alias list.
91
+
92
+ Raises:
93
+ yaml.YAMLError: If the YAML file is malformed
94
+ """
95
+ if not self.aliases_path or not self.aliases_path.exists():
96
+ logger.debug("No aliases file found or path not set")
97
+ return
98
+
99
+ try:
100
+ with open(self.aliases_path) as f:
101
+ data = yaml.safe_load(f) or {}
102
+
103
+ self.aliases = []
104
+ for alias_data in data.get("developer_aliases", []):
105
+ # Support both 'primary_email' (new) and 'canonical_email' (old)
106
+ primary_email = alias_data.get("primary_email") or alias_data.get("canonical_email")
107
+
108
+ if not primary_email:
109
+ logger.warning(f"Skipping alias entry without primary_email: {alias_data}")
110
+ continue
111
+
112
+ self.aliases.append(
113
+ DeveloperAlias(
114
+ primary_email=primary_email,
115
+ aliases=alias_data.get("aliases", []),
116
+ name=alias_data.get("name"),
117
+ confidence=alias_data.get("confidence", 1.0),
118
+ reasoning=alias_data.get("reasoning", ""),
119
+ )
120
+ )
121
+
122
+ logger.info(f"Loaded {len(self.aliases)} developer aliases from {self.aliases_path}")
123
+
124
+ except yaml.YAMLError as e:
125
+ logger.error(f"Error parsing aliases file {self.aliases_path}: {e}")
126
+ raise
127
+ except Exception as e:
128
+ logger.error(f"Error loading aliases file {self.aliases_path}: {e}")
129
+ raise
130
+
131
+ def save(self) -> None:
132
+ """Save aliases to file.
133
+
134
+ Writes all developer aliases to the configured YAML file with proper
135
+ formatting and comments. Creates the parent directory if it doesn't exist.
136
+
137
+ Raises:
138
+ OSError: If file cannot be written
139
+ """
140
+ if not self.aliases_path:
141
+ logger.warning("No aliases path configured, cannot save")
142
+ return
143
+
144
+ # Ensure directory exists
145
+ self.aliases_path.parent.mkdir(parents=True, exist_ok=True)
146
+
147
+ try:
148
+ # Build data structure with comments
149
+ data = {
150
+ "# Developer Identity Aliases": None,
151
+ "# Generated by GitFlow Analytics": None,
152
+ "# Share this file across multiple config files": None,
153
+ "# Each alias maps multiple email addresses to a single developer": None,
154
+ "developer_aliases": [alias.to_dict() for alias in self.aliases],
155
+ }
156
+
157
+ with open(self.aliases_path, "w") as f:
158
+ # Custom YAML dump to preserve comments
159
+ f.write("# Developer Identity Aliases\n")
160
+ f.write("# Generated by GitFlow Analytics\n")
161
+ f.write("# Share this file across multiple config files\n")
162
+ f.write("# Each alias maps multiple email addresses to a single developer\n\n")
163
+
164
+ # Write the aliases list
165
+ yaml.dump(
166
+ {"developer_aliases": data["developer_aliases"]},
167
+ f,
168
+ default_flow_style=False,
169
+ sort_keys=False,
170
+ allow_unicode=True,
171
+ )
172
+
173
+ logger.info(f"Saved {len(self.aliases)} developer aliases to {self.aliases_path}")
174
+
175
+ except Exception as e:
176
+ logger.error(f"Error saving aliases file {self.aliases_path}: {e}")
177
+ raise
178
+
179
+ def add_alias(self, alias: DeveloperAlias) -> None:
180
+ """Add or update a developer alias.
181
+
182
+ If an alias with the same primary email already exists, it will be replaced.
183
+ This ensures there is only one alias configuration per developer.
184
+
185
+ Args:
186
+ alias: The developer alias to add or update
187
+ """
188
+ # Remove existing alias for same primary email
189
+ self.aliases = [a for a in self.aliases if a.primary_email != alias.primary_email]
190
+ self.aliases.append(alias)
191
+ logger.debug(f"Added/updated alias for {alias.primary_email}")
192
+
193
+ def remove_alias(self, primary_email: str) -> bool:
194
+ """Remove a developer alias by primary email.
195
+
196
+ Args:
197
+ primary_email: The primary email of the alias to remove
198
+
199
+ Returns:
200
+ True if an alias was removed, False if not found
201
+ """
202
+ original_count = len(self.aliases)
203
+ self.aliases = [a for a in self.aliases if a.primary_email != primary_email]
204
+ removed = len(self.aliases) < original_count
205
+ if removed:
206
+ logger.debug(f"Removed alias for {primary_email}")
207
+ return removed
208
+
209
+ def get_alias(self, primary_email: str) -> Optional[DeveloperAlias]:
210
+ """Get a developer alias by primary email.
211
+
212
+ Args:
213
+ primary_email: The primary email to look up
214
+
215
+ Returns:
216
+ The developer alias if found, None otherwise
217
+ """
218
+ for alias in self.aliases:
219
+ if alias.primary_email == primary_email:
220
+ return alias
221
+ return None
222
+
223
+ def to_manual_mappings(self) -> list[dict[str, Any]]:
224
+ """Convert aliases to config manual_identity_mappings format.
225
+
226
+ Converts the internal alias representation to the format expected
227
+ by the GitFlow Analytics configuration's manual_identity_mappings field.
228
+
229
+ Returns:
230
+ List of manual identity mapping dictionaries
231
+ """
232
+ mappings = []
233
+ for alias in self.aliases:
234
+ mapping: dict[str, Any] = {"primary_email": alias.primary_email}
235
+
236
+ if alias.name:
237
+ mapping["name"] = alias.name
238
+
239
+ mapping["aliases"] = alias.aliases
240
+
241
+ # Include confidence and reasoning for LLM-generated mappings
242
+ if alias.confidence < 1.0:
243
+ mapping["confidence"] = alias.confidence
244
+ if alias.reasoning:
245
+ mapping["reasoning"] = alias.reasoning
246
+
247
+ mappings.append(mapping)
248
+
249
+ return mappings
250
+
251
+ def merge_from_mappings(self, mappings: list[dict[str, Any]]) -> None:
252
+ """Merge aliases from manual identity mappings.
253
+
254
+ Takes manual identity mappings from a config file and merges them
255
+ into the current alias set. Existing aliases are preserved unless
256
+ they conflict with the new mappings.
257
+
258
+ Args:
259
+ mappings: List of manual identity mapping dictionaries
260
+ """
261
+ for mapping in mappings:
262
+ # Support both field name variants
263
+ primary_email = mapping.get("primary_email") or mapping.get("canonical_email")
264
+
265
+ if not primary_email:
266
+ logger.warning(f"Skipping mapping without primary_email: {mapping}")
267
+ continue
268
+
269
+ alias = DeveloperAlias(
270
+ primary_email=primary_email,
271
+ aliases=mapping.get("aliases", []),
272
+ name=mapping.get("name"),
273
+ confidence=mapping.get("confidence", 1.0),
274
+ reasoning=mapping.get("reasoning", ""),
275
+ )
276
+
277
+ self.add_alias(alias)
278
+
279
+ def get_statistics(self) -> dict[str, Any]:
280
+ """Get statistics about the aliases.
281
+
282
+ Returns:
283
+ Dictionary with statistics including total aliases, manual vs LLM-generated,
284
+ average confidence, etc.
285
+ """
286
+ if not self.aliases:
287
+ return {
288
+ "total_aliases": 0,
289
+ "manual_aliases": 0,
290
+ "llm_aliases": 0,
291
+ "avg_confidence": 0.0,
292
+ "total_email_addresses": 0,
293
+ }
294
+
295
+ manual_count = sum(1 for a in self.aliases if a.confidence == 1.0)
296
+ llm_count = len(self.aliases) - manual_count
297
+ avg_confidence = sum(a.confidence for a in self.aliases) / len(self.aliases)
298
+ total_emails = sum(len(a.aliases) + 1 for a in self.aliases) # +1 for primary
299
+
300
+ return {
301
+ "total_aliases": len(self.aliases),
302
+ "manual_aliases": manual_count,
303
+ "llm_aliases": llm_count,
304
+ "avg_confidence": round(avg_confidence, 3),
305
+ "total_email_addresses": total_emails,
306
+ }
@@ -1,5 +1,6 @@
1
1
  """YAML configuration loading and environment variable expansion."""
2
2
 
3
+ import logging
3
4
  import os
4
5
  from pathlib import Path
5
6
  from typing import Any, Optional, Union
@@ -33,6 +34,8 @@ from .schema import (
33
34
  )
34
35
  from .validator import ConfigValidator
35
36
 
37
+ logger = logging.getLogger(__name__)
38
+
36
39
 
37
40
  class ConfigLoader:
38
41
  """Load and validate configuration from YAML files."""
@@ -533,6 +536,36 @@ class ConfigLoader:
533
536
  BranchAnalysisConfig(**branch_data) if branch_data else BranchAnalysisConfig()
534
537
  )
535
538
 
539
+ # Process aliases file and manual identity mappings
540
+ manual_mappings = list(analysis_data.get("identity", {}).get("manual_mappings", []))
541
+ aliases_file_path = None
542
+
543
+ # Load aliases from external file if specified
544
+ aliases_file = analysis_data.get("identity", {}).get("aliases_file")
545
+ if aliases_file:
546
+ aliases_path = Path(aliases_file).expanduser()
547
+ # Make relative paths relative to config file directory
548
+ if not aliases_path.is_absolute():
549
+ aliases_path = config_path.parent / aliases_path
550
+
551
+ aliases_file_path = aliases_path
552
+
553
+ # Load and merge aliases if file exists
554
+ if aliases_path.exists():
555
+ try:
556
+ from .aliases import AliasesManager
557
+
558
+ aliases_mgr = AliasesManager(aliases_path)
559
+ # Merge aliases with existing manual mappings
560
+ manual_mappings.extend(aliases_mgr.to_manual_mappings())
561
+ logger.info(
562
+ f"Loaded {len(aliases_mgr.aliases)} identity aliases from {aliases_path}"
563
+ )
564
+ except Exception as e:
565
+ logger.warning(f"Could not load aliases file {aliases_path}: {e}")
566
+ else:
567
+ logger.warning(f"Aliases file not found: {aliases_path}")
568
+
536
569
  return AnalysisConfig(
537
570
  story_point_patterns=analysis_data.get(
538
571
  "story_point_patterns",
@@ -550,7 +583,8 @@ class ConfigLoader:
550
583
  similarity_threshold=analysis_data.get("identity", {}).get(
551
584
  "similarity_threshold", 0.85
552
585
  ),
553
- manual_identity_mappings=analysis_data.get("identity", {}).get("manual_mappings", []),
586
+ manual_identity_mappings=manual_mappings,
587
+ aliases_file=aliases_file_path,
554
588
  default_ticket_platform=analysis_data.get("default_ticket_platform"),
555
589
  branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
556
590
  ticket_platforms=analysis_data.get("ticket_platforms"),
@@ -301,6 +301,7 @@ class AnalysisConfig:
301
301
  exclude_paths: list[str] = field(default_factory=list)
302
302
  similarity_threshold: float = 0.85
303
303
  manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
304
+ aliases_file: Optional[Path] = None # Path to shared aliases.yaml file
304
305
  default_ticket_platform: Optional[str] = None
305
306
  branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
306
307
  ticket_platforms: Optional[list[str]] = None
@@ -379,6 +380,17 @@ class PMIntegrationConfig:
379
380
  platforms: dict[str, PMPlatformConfig] = field(default_factory=dict)
380
381
 
381
382
 
383
+ @dataclass
384
+ class LauncherPreferences:
385
+ """Interactive launcher preferences."""
386
+
387
+ last_selected_repos: list[str] = field(default_factory=list)
388
+ default_weeks: int = 4
389
+ auto_clear_cache: bool = False
390
+ skip_identity_analysis: bool = False
391
+ last_run: Optional[str] = None
392
+
393
+
382
394
  @dataclass
383
395
  class Config:
384
396
  """Main configuration container."""
@@ -393,6 +405,7 @@ class Config:
393
405
  pm: Optional[Any] = None # Modern PM framework config
394
406
  pm_integration: Optional[PMIntegrationConfig] = None
395
407
  qualitative: Optional["QualitativeConfig"] = None
408
+ launcher: Optional[LauncherPreferences] = None
396
409
 
397
410
  def discover_organization_repositories(
398
411
  self, clone_base_path: Optional[Path] = None
@@ -0,0 +1,75 @@
1
+ """Application-wide constants and configuration values.
2
+
3
+ This module centralizes magic numbers and configuration defaults to improve
4
+ code maintainability and readability. Constants are organized by functional
5
+ area for easy navigation and updates.
6
+ """
7
+
8
+
9
+ class Timeouts:
10
+ """Timeout values in seconds for various git operations.
11
+
12
+ These timeouts protect against hanging operations when repositories
13
+ require authentication or have network issues.
14
+ """
15
+
16
+ # Git remote operations
17
+ GIT_FETCH = 30 # Fetch from remote repository
18
+ GIT_PULL = 30 # Pull latest changes
19
+
20
+ # Git local operations
21
+ GIT_BRANCH_ITERATION = 15 # Iterate commits for a branch/day
22
+ GIT_DIFF = 10 # Calculate diff statistics
23
+ GIT_CONFIG = 2 # Read git configuration
24
+ GIT_REMOTE_LIST = 5 # List remote branches
25
+
26
+ # Default timeout for generic git operations
27
+ DEFAULT_GIT_OPERATION = 30
28
+
29
+ # Process-level timeouts
30
+ SUBPROCESS_DEFAULT = 5 # Default subprocess timeout
31
+ THREAD_JOIN = 1 # Thread join timeout
32
+
33
+
34
+ class BatchSizes:
35
+ """Batch processing sizes for efficient data handling.
36
+
37
+ These sizes balance memory usage with performance gains from bulk operations.
38
+ Tunable based on repository size and system capabilities.
39
+ """
40
+
41
+ COMMIT_STORAGE = 1000 # Commits per bulk insert operation
42
+ TICKET_FETCH = 50 # Tickets fetched per JIRA batch
43
+ CACHE_WARMUP = 100 # Commits per cache warmup batch
44
+
45
+ # Estimation constants
46
+ COMMITS_PER_WEEK_ESTIMATE = 50 # Estimated commits for progress tracking
47
+ DEFAULT_PROGRESS_ESTIMATE = 100 # Default when estimation fails
48
+
49
+
50
+ class CacheTTL:
51
+ """Cache time-to-live values.
52
+
53
+ These values control how long cached data remains valid before
54
+ requiring refresh. Measured in hours unless otherwise specified.
55
+ """
56
+
57
+ ONE_WEEK_HOURS = 168 # Standard cache TTL (7 days * 24 hours)
58
+ IDENTITY_CACHE_DAYS = 7 # Developer identity analysis cache (in days)
59
+
60
+
61
+ class Thresholds:
62
+ """Various threshold values for analysis and reporting."""
63
+
64
+ # Cache performance
65
+ CACHE_HIT_RATE_GOOD = 50 # Percentage threshold for good cache performance
66
+
67
+ # Percentage calculations
68
+ PERCENTAGE_MULTIPLIER = 100 # Standard percentage calculation multiplier
69
+
70
+
71
+ class Estimations:
72
+ """Estimation constants for progress tracking and metrics."""
73
+
74
+ COMMITS_PER_WEEK = 50 # Estimated commits per week for progress bars
75
+ DEFAULT_ESTIMATE = 100 # Default estimate when actual count unavailable
@@ -12,6 +12,7 @@ from typing import Any, Optional, Union
12
12
  import git
13
13
  from sqlalchemy import and_
14
14
 
15
+ from ..constants import BatchSizes, CacheTTL, Thresholds
15
16
  from ..models.database import (
16
17
  CachedCommit,
17
18
  Database,
@@ -27,7 +28,10 @@ class GitAnalysisCache:
27
28
  """Cache for Git analysis results."""
28
29
 
29
30
  def __init__(
30
- self, cache_dir: Union[Path, str], ttl_hours: int = 168, batch_size: int = 1000
31
+ self,
32
+ cache_dir: Union[Path, str],
33
+ ttl_hours: int = CacheTTL.ONE_WEEK_HOURS,
34
+ batch_size: int = BatchSizes.COMMIT_STORAGE,
31
35
  ) -> None:
32
36
  """Initialize cache with SQLite backend and configurable batch size.
33
37
 
@@ -37,7 +41,7 @@ class GitAnalysisCache:
37
41
 
38
42
  Args:
39
43
  cache_dir: Directory for cache database
40
- ttl_hours: Time-to-live for cache entries in hours
44
+ ttl_hours: Time-to-live for cache entries in hours (default: 168 = 1 week)
41
45
  batch_size: Default batch size for bulk operations (default: 1000)
42
46
  """
43
47
  self.cache_dir = Path(cache_dir) # Ensure it's a Path object
@@ -643,7 +647,7 @@ class GitAnalysisCache:
643
647
  # Performance insights
644
648
  if stats["hit_rate_percent"] > 80:
645
649
  print(" ✅ Excellent cache performance!")
646
- elif stats["hit_rate_percent"] > 50:
650
+ elif stats["hit_rate_percent"] > Thresholds.CACHE_HIT_RATE_GOOD:
647
651
  print(" 👍 Good cache performance")
648
652
  elif stats["total_requests"] > 0:
649
653
  print(" ⚠️ Consider clearing stale cache entries")