gitflow-analytics 3.4.7__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  """Version information for gitflow-analytics."""
2
2
 
3
- __version__ = "3.4.7"
3
+ __version__ = "3.6.0"
4
4
  __version_info__ = tuple(int(x) for x in __version__.split("."))
gitflow_analytics/cli.py CHANGED
@@ -342,11 +342,28 @@ def cli(ctx: click.Context) -> None:
342
342
  \b
343
343
  COMMANDS:
344
344
  analyze Analyze repositories and generate reports (default)
345
+ install Interactive installation wizard
346
+ run Interactive launcher with preferences
347
+ aliases Generate developer identity aliases using LLM
345
348
  identities Manage developer identity resolution
346
349
  train Train ML models for commit classification
347
350
  fetch Fetch external data (GitHub PRs, PM tickets)
348
351
  help Show detailed help and documentation
349
352
 
353
+ \b
354
+ EXAMPLES:
355
+ # Interactive installation
356
+ gitflow-analytics install
357
+
358
+ # Interactive launcher
359
+ gitflow-analytics run -c config.yaml
360
+
361
+ # Generate developer aliases
362
+ gitflow-analytics aliases -c config.yaml --apply
363
+
364
+ # Run analysis
365
+ gitflow-analytics -c config.yaml --weeks 4
366
+
350
367
  \b
351
368
  For detailed command help: gitflow-analytics COMMAND --help
352
369
  For documentation: https://github.com/yourusername/gitflow-analytics
@@ -4476,11 +4493,123 @@ def merge_identity(config: Path, dev1: str, dev2: str) -> None:
4476
4493
  sys.exit(1)
4477
4494
 
4478
4495
 
4496
+ def _resolve_config_path(config: Optional[Path]) -> Optional[Path]:
4497
+ """Resolve configuration file path, offering to create if missing.
4498
+
4499
+ Args:
4500
+ config: User-specified config path or None
4501
+
4502
+ Returns:
4503
+ Validated config path or None if user cancels
4504
+ """
4505
+ # Default config locations to search
4506
+ default_locations = [
4507
+ Path.cwd() / "config.yaml",
4508
+ Path.cwd() / ".gitflow-analytics.yaml",
4509
+ Path.home() / ".gitflow-analytics" / "config.yaml",
4510
+ ]
4511
+
4512
+ # Case 1: Config specified but doesn't exist
4513
+ if config:
4514
+ config_path = Path(config).resolve()
4515
+ if not config_path.exists():
4516
+ click.echo(f"❌ Configuration file not found: {config_path}\n", err=True)
4517
+
4518
+ if click.confirm("Would you like to create a new configuration?", default=True):
4519
+ click.echo("\n🚀 Launching installation wizard...\n")
4520
+
4521
+ from .cli_wizards.install_wizard import InstallWizard
4522
+
4523
+ wizard = InstallWizard(output_dir=config_path.parent, skip_validation=False)
4524
+
4525
+ # Store the desired config filename for the wizard
4526
+ wizard.config_filename = config_path.name
4527
+
4528
+ success = wizard.run()
4529
+
4530
+ if not success:
4531
+ click.echo("\n❌ Installation wizard cancelled or failed.", err=True)
4532
+ return None
4533
+
4534
+ click.echo(f"\n✅ Configuration created: {config_path}")
4535
+ click.echo("\n🎉 Ready to run analysis!\n")
4536
+ return config_path
4537
+ else:
4538
+ click.echo("\n💡 Create a configuration file with:")
4539
+ click.echo(" gitflow-analytics install")
4540
+ click.echo(f"\nOr manually create: {config_path}\n")
4541
+ return None
4542
+
4543
+ return config_path
4544
+
4545
+ # Case 2: No config specified, search for defaults
4546
+ click.echo("🔍 Looking for configuration files...\n")
4547
+
4548
+ for location in default_locations:
4549
+ if location.exists():
4550
+ click.echo(f"📋 Found configuration: {location}\n")
4551
+ return location
4552
+
4553
+ # No config found anywhere
4554
+ click.echo("No configuration file found. Let's create one!\n")
4555
+
4556
+ # Offer to create config
4557
+ locations = [
4558
+ ("./config.yaml", "Current directory"),
4559
+ (str(Path.home() / ".gitflow-analytics" / "config.yaml"), "User directory"),
4560
+ ]
4561
+
4562
+ click.echo("Where would you like to save the configuration?")
4563
+ for i, (path, desc) in enumerate(locations, 1):
4564
+ click.echo(f" {i}. {path} ({desc})")
4565
+ click.echo(" 3. Custom path")
4566
+
4567
+ try:
4568
+ choice = click.prompt("\nSelect option", type=click.Choice(["1", "2", "3"]), default="1")
4569
+ except (click.exceptions.Abort, EOFError):
4570
+ click.echo("\n⚠️ Cancelled by user.")
4571
+ return None
4572
+
4573
+ if choice == "1":
4574
+ config_path = Path.cwd() / "config.yaml"
4575
+ elif choice == "2":
4576
+ config_path = Path.home() / ".gitflow-analytics" / "config.yaml"
4577
+ else:
4578
+ try:
4579
+ custom_path = click.prompt("Enter configuration file path")
4580
+ config_path = Path(custom_path).expanduser().resolve()
4581
+ except (click.exceptions.Abort, EOFError):
4582
+ click.echo("\n⚠️ Cancelled by user.")
4583
+ return None
4584
+
4585
+ # Launch install wizard
4586
+ click.echo(f"\n🚀 Creating configuration at: {config_path}")
4587
+ click.echo("Launching installation wizard...\n")
4588
+
4589
+ from .cli_wizards.install_wizard import InstallWizard
4590
+
4591
+ wizard = InstallWizard(output_dir=config_path.parent, skip_validation=False)
4592
+
4593
+ # Store the desired config filename for the wizard
4594
+ wizard.config_filename = config_path.name
4595
+
4596
+ success = wizard.run()
4597
+
4598
+ if not success:
4599
+ click.echo("\n❌ Installation wizard cancelled or failed.", err=True)
4600
+ return None
4601
+
4602
+ click.echo(f"\n✅ Configuration created: {config_path}")
4603
+ click.echo("\n🎉 Ready to run analysis!\n")
4604
+
4605
+ return config_path
4606
+
4607
+
4479
4608
  @cli.command(name="run")
4480
4609
  @click.option(
4481
4610
  "--config",
4482
4611
  "-c",
4483
- type=click.Path(exists=True, path_type=Path),
4612
+ type=click.Path(path_type=Path), # Remove exists=True to allow missing files
4484
4613
  help="Path to configuration file (optional, will search for default)",
4485
4614
  )
4486
4615
  def run_launcher(config: Optional[Path]) -> None:
@@ -4516,11 +4645,21 @@ def run_launcher(config: Optional[Path]) -> None:
4516
4645
  5. Run analysis with your selections
4517
4646
  """
4518
4647
  try:
4648
+ # Handle missing config file gracefully
4649
+ config_path = _resolve_config_path(config)
4650
+
4651
+ if not config_path:
4652
+ # No config found or user cancelled
4653
+ sys.exit(1)
4654
+
4519
4655
  from .cli_wizards.run_launcher import run_interactive_launcher
4520
4656
 
4521
- success = run_interactive_launcher(config_path=config)
4657
+ success = run_interactive_launcher(config_path=config_path)
4522
4658
  sys.exit(0 if success else 1)
4523
4659
 
4660
+ except (KeyboardInterrupt, click.exceptions.Abort):
4661
+ click.echo("\n\n⚠️ Launcher cancelled by user.")
4662
+ sys.exit(130)
4524
4663
  except Exception as e:
4525
4664
  click.echo(f"❌ Launcher failed: {e}", err=True)
4526
4665
  logger.error(f"Launcher error: {type(e).__name__}")
@@ -4857,6 +4996,342 @@ def identities(config: Path, weeks: int, apply: bool) -> None:
4857
4996
  sys.exit(1)
4858
4997
 
4859
4998
 
4999
+ @cli.command(name="aliases")
5000
+ @click.option(
5001
+ "--config",
5002
+ "-c",
5003
+ type=click.Path(exists=True, path_type=Path),
5004
+ required=True,
5005
+ help="Path to configuration file",
5006
+ )
5007
+ @click.option(
5008
+ "--output",
5009
+ "-o",
5010
+ type=click.Path(path_type=Path),
5011
+ help="Output path for aliases.yaml (default: same dir as config)",
5012
+ )
5013
+ @click.option(
5014
+ "--confidence-threshold",
5015
+ type=float,
5016
+ default=0.9,
5017
+ help="Minimum confidence threshold for LLM matches (default: 0.9)",
5018
+ )
5019
+ @click.option(
5020
+ "--apply", is_flag=True, help="Automatically update config to use generated aliases file"
5021
+ )
5022
+ @click.option(
5023
+ "--weeks", type=int, default=12, help="Number of weeks of history to analyze (default: 12)"
5024
+ )
5025
+ def aliases_command(
5026
+ config: Path,
5027
+ output: Optional[Path],
5028
+ confidence_threshold: float,
5029
+ apply: bool,
5030
+ weeks: int,
5031
+ ) -> None:
5032
+ """Generate developer identity aliases using LLM analysis.
5033
+
5034
+ \b
5035
+ This command analyzes commit history and uses LLM to identify
5036
+ developer aliases (same person with different email addresses).
5037
+ Results are saved to aliases.yaml which can be shared across
5038
+ multiple config files.
5039
+
5040
+ \b
5041
+ EXAMPLES:
5042
+ # Generate aliases and review
5043
+ gitflow-analytics aliases -c config.yaml
5044
+
5045
+ # Generate and apply automatically
5046
+ gitflow-analytics aliases -c config.yaml --apply
5047
+
5048
+ # Save to specific location
5049
+ gitflow-analytics aliases -c config.yaml -o ~/shared/aliases.yaml
5050
+
5051
+ # Use longer history for better accuracy
5052
+ gitflow-analytics aliases -c config.yaml --weeks 24
5053
+
5054
+ \b
5055
+ CONFIGURATION:
5056
+ Aliases are saved to aliases.yaml and can be referenced in
5057
+ multiple config files for consistent identity resolution.
5058
+ """
5059
+ try:
5060
+ from .config.aliases import AliasesManager, DeveloperAlias
5061
+ from .identity_llm.analyzer import LLMIdentityAnalyzer
5062
+
5063
+ # Load configuration
5064
+ click.echo(f"\n📋 Loading configuration from {config}...")
5065
+ cfg = ConfigLoader.load(config)
5066
+
5067
+ # Determine output path
5068
+ if not output:
5069
+ output = config.parent / "aliases.yaml"
5070
+
5071
+ click.echo(f"🔍 Analyzing developer identities (last {weeks} weeks)")
5072
+ click.echo(f"📊 Confidence threshold: {confidence_threshold:.0%}")
5073
+ click.echo(f"💾 Output: {output}\n")
5074
+
5075
+ # Set up date range
5076
+ end_date = datetime.now(timezone.utc)
5077
+ start_date = end_date - timedelta(weeks=weeks)
5078
+
5079
+ # Analyze repositories to collect commits
5080
+ click.echo("📥 Fetching commit history...\n")
5081
+ cache = GitAnalysisCache(cfg.cache.directory)
5082
+
5083
+ # Prepare ML categorization config for analyzer
5084
+ ml_config = None
5085
+ if hasattr(cfg.analysis, "ml_categorization"):
5086
+ ml_config = {
5087
+ "enabled": cfg.analysis.ml_categorization.enabled,
5088
+ "min_confidence": cfg.analysis.ml_categorization.min_confidence,
5089
+ "semantic_weight": cfg.analysis.ml_categorization.semantic_weight,
5090
+ "file_pattern_weight": cfg.analysis.ml_categorization.file_pattern_weight,
5091
+ "hybrid_threshold": cfg.analysis.ml_categorization.hybrid_threshold,
5092
+ "cache_duration_days": cfg.analysis.ml_categorization.cache_duration_days,
5093
+ "batch_size": cfg.analysis.ml_categorization.batch_size,
5094
+ "enable_caching": cfg.analysis.ml_categorization.enable_caching,
5095
+ "spacy_model": cfg.analysis.ml_categorization.spacy_model,
5096
+ }
5097
+
5098
+ # LLM classification configuration
5099
+ llm_config = {
5100
+ "enabled": cfg.analysis.llm_classification.enabled,
5101
+ "api_key": cfg.analysis.llm_classification.api_key,
5102
+ "model": cfg.analysis.llm_classification.model,
5103
+ "confidence_threshold": cfg.analysis.llm_classification.confidence_threshold,
5104
+ "max_tokens": cfg.analysis.llm_classification.max_tokens,
5105
+ "temperature": cfg.analysis.llm_classification.temperature,
5106
+ "timeout_seconds": cfg.analysis.llm_classification.timeout_seconds,
5107
+ "cache_duration_days": cfg.analysis.llm_classification.cache_duration_days,
5108
+ "enable_caching": cfg.analysis.llm_classification.enable_caching,
5109
+ "max_daily_requests": cfg.analysis.llm_classification.max_daily_requests,
5110
+ "domain_terms": cfg.analysis.llm_classification.domain_terms,
5111
+ }
5112
+
5113
+ # Configure branch analysis
5114
+ branch_analysis_config = {
5115
+ "strategy": cfg.analysis.branch_analysis.strategy,
5116
+ "max_branches_per_repo": cfg.analysis.branch_analysis.max_branches_per_repo,
5117
+ "active_days_threshold": cfg.analysis.branch_analysis.active_days_threshold,
5118
+ "include_main_branches": cfg.analysis.branch_analysis.include_main_branches,
5119
+ "always_include_patterns": cfg.analysis.branch_analysis.always_include_patterns,
5120
+ "always_exclude_patterns": cfg.analysis.branch_analysis.always_exclude_patterns,
5121
+ "enable_progress_logging": cfg.analysis.branch_analysis.enable_progress_logging,
5122
+ "branch_commit_limit": cfg.analysis.branch_analysis.branch_commit_limit,
5123
+ }
5124
+
5125
+ analyzer = GitAnalyzer(
5126
+ cache,
5127
+ branch_mapping_rules=cfg.analysis.branch_mapping_rules,
5128
+ allowed_ticket_platforms=getattr(
5129
+ cfg.analysis, "ticket_platforms", ["jira", "github", "clickup", "linear"]
5130
+ ),
5131
+ exclude_paths=cfg.analysis.exclude_paths,
5132
+ story_point_patterns=cfg.analysis.story_point_patterns,
5133
+ ml_categorization_config=ml_config,
5134
+ llm_config=llm_config,
5135
+ branch_analysis_config=branch_analysis_config,
5136
+ )
5137
+
5138
+ all_commits = []
5139
+
5140
+ # Get repositories to analyze
5141
+ repositories = cfg.repositories if cfg.repositories else []
5142
+
5143
+ if not repositories:
5144
+ click.echo("❌ No repositories configured", err=True)
5145
+ sys.exit(1)
5146
+
5147
+ # Collect commits from all repositories
5148
+ with click.progressbar(
5149
+ repositories,
5150
+ label="Analyzing repositories",
5151
+ item_show_func=lambda r: r.name if r else "",
5152
+ ) as repos:
5153
+ for repo_config in repos:
5154
+ try:
5155
+ if not repo_config.path.exists():
5156
+ continue
5157
+
5158
+ # Fetch commits
5159
+ repo_commits = analyzer.analyze_repository(
5160
+ repo_config.path, start_date=start_date, branch=repo_config.branch
5161
+ )
5162
+
5163
+ if repo_commits:
5164
+ all_commits.extend(repo_commits)
5165
+
5166
+ except Exception as e:
5167
+ click.echo(f"\n⚠️ Warning: Failed to analyze repository: {e}", err=True)
5168
+ continue
5169
+
5170
+ click.echo(f"\n✅ Collected {len(all_commits)} commits\n")
5171
+
5172
+ if not all_commits:
5173
+ click.echo("❌ No commits found to analyze", err=True)
5174
+ sys.exit(1)
5175
+
5176
+ # Initialize LLM identity analyzer
5177
+ click.echo("🤖 Running LLM identity analysis...\n")
5178
+
5179
+ # Get OpenRouter API key from config
5180
+ api_key = None
5181
+ if cfg.chatgpt and cfg.chatgpt.api_key:
5182
+ # Resolve environment variable if needed
5183
+ api_key_value = cfg.chatgpt.api_key
5184
+ if api_key_value.startswith("${") and api_key_value.endswith("}"):
5185
+ var_name = api_key_value[2:-1]
5186
+ api_key = os.getenv(var_name)
5187
+ else:
5188
+ api_key = api_key_value
5189
+
5190
+ if not api_key:
5191
+ click.echo(
5192
+ "⚠️ No OpenRouter API key configured - using heuristic analysis only", err=True
5193
+ )
5194
+
5195
+ llm_analyzer = LLMIdentityAnalyzer(
5196
+ api_key=api_key, confidence_threshold=confidence_threshold
5197
+ )
5198
+
5199
+ # Run analysis
5200
+ result = llm_analyzer.analyze_identities(all_commits)
5201
+
5202
+ click.echo("✅ Analysis complete:")
5203
+ click.echo(f" - Found {len(result.clusters)} identity clusters")
5204
+ click.echo(f" - {len(result.unresolved_identities)} unresolved identities")
5205
+ click.echo(f" - Method: {result.analysis_metadata.get('analysis_method', 'unknown')}\n")
5206
+
5207
+ # Create aliases manager and add clusters
5208
+ aliases_mgr = AliasesManager(output)
5209
+
5210
+ # Load existing aliases if file exists
5211
+ if output.exists():
5212
+ click.echo(f"📂 Loading existing aliases from {output}...")
5213
+ aliases_mgr.load()
5214
+ existing_count = len(aliases_mgr.aliases)
5215
+ click.echo(f" Found {existing_count} existing aliases\n")
5216
+
5217
+ # Add new clusters
5218
+ new_count = 0
5219
+ updated_count = 0
5220
+
5221
+ for cluster in result.clusters:
5222
+ # Check if this is a new or updated alias
5223
+ existing = aliases_mgr.get_alias(cluster.canonical_email)
5224
+
5225
+ alias = DeveloperAlias(
5226
+ name=cluster.preferred_display_name or cluster.canonical_name,
5227
+ primary_email=cluster.canonical_email,
5228
+ aliases=[a.email for a in cluster.aliases],
5229
+ confidence=cluster.confidence,
5230
+ reasoning=(
5231
+ cluster.reasoning[:200] if cluster.reasoning else ""
5232
+ ), # Truncate for readability
5233
+ )
5234
+
5235
+ if existing:
5236
+ updated_count += 1
5237
+ else:
5238
+ new_count += 1
5239
+
5240
+ aliases_mgr.add_alias(alias)
5241
+
5242
+ # Save aliases
5243
+ click.echo("💾 Saving aliases...\n")
5244
+ aliases_mgr.save()
5245
+
5246
+ click.echo(f"✅ Saved to {output}")
5247
+ click.echo(f" - New aliases: {new_count}")
5248
+ click.echo(f" - Updated aliases: {updated_count}")
5249
+ click.echo(f" - Total aliases: {len(aliases_mgr.aliases)}\n")
5250
+
5251
+ # Display summary
5252
+ if aliases_mgr.aliases:
5253
+ click.echo("📋 Generated Aliases:\n")
5254
+
5255
+ for alias in sorted(aliases_mgr.aliases, key=lambda a: a.primary_email):
5256
+ name_display = (
5257
+ f"{alias.name} <{alias.primary_email}>" if alias.name else alias.primary_email
5258
+ )
5259
+ click.echo(f" • {name_display}")
5260
+
5261
+ if alias.aliases:
5262
+ for alias_email in alias.aliases:
5263
+ click.echo(f" → {alias_email}")
5264
+
5265
+ if alias.confidence < 1.0:
5266
+ confidence_color = (
5267
+ "green"
5268
+ if alias.confidence >= 0.9
5269
+ else "yellow" if alias.confidence >= 0.8 else "red"
5270
+ )
5271
+ click.echo(" Confidence: ", nl=False)
5272
+ click.secho(f"{alias.confidence:.0%}", fg=confidence_color)
5273
+
5274
+ click.echo() # Blank line between aliases
5275
+
5276
+ # Apply to config if requested
5277
+ if apply:
5278
+ click.echo(f"🔄 Updating {config} to reference aliases file...\n")
5279
+
5280
+ # Read current config
5281
+ with open(config) as f:
5282
+ config_data = yaml.safe_load(f)
5283
+
5284
+ # Ensure analysis section exists
5285
+ if "analysis" not in config_data:
5286
+ config_data["analysis"] = {}
5287
+
5288
+ if "identity" not in config_data["analysis"]:
5289
+ config_data["analysis"]["identity"] = {}
5290
+
5291
+ # Calculate relative path from config to aliases file
5292
+ try:
5293
+ rel_path = output.relative_to(config.parent)
5294
+ config_data["analysis"]["identity"]["aliases_file"] = str(rel_path)
5295
+ except ValueError:
5296
+ # Not relative, use absolute
5297
+ config_data["analysis"]["identity"]["aliases_file"] = str(output)
5298
+
5299
+ # Remove manual_mappings if present (now in aliases file)
5300
+ if "manual_identity_mappings" in config_data["analysis"].get("identity", {}):
5301
+ del config_data["analysis"]["identity"]["manual_identity_mappings"]
5302
+ click.echo(" Removed inline manual_identity_mappings (now in aliases file)")
5303
+
5304
+ # Save updated config
5305
+ with open(config, "w") as f:
5306
+ yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
5307
+
5308
+ click.echo(f"✅ Updated {config}")
5309
+ click.echo(
5310
+ f" Added: analysis.identity.aliases_file = "
5311
+ f"{config_data['analysis']['identity']['aliases_file']}\n"
5312
+ )
5313
+
5314
+ # Summary and next steps
5315
+ click.echo("✨ Identity alias generation complete!\n")
5316
+
5317
+ if not apply:
5318
+ click.echo("💡 Next steps:")
5319
+ click.echo(f" 1. Review the aliases in {output}")
5320
+ click.echo(" 2. Update your config.yaml to reference the aliases file:")
5321
+ click.echo(" analysis:")
5322
+ click.echo(" identity:")
5323
+ click.echo(f" aliases_file: {output.name}")
5324
+ click.echo(" 3. Or run with --apply flag to update automatically\n")
5325
+
5326
+ except Exception as e:
5327
+ click.echo(f"\n❌ Error generating aliases: {e}", err=True)
5328
+ import traceback
5329
+
5330
+ if os.getenv("GITFLOW_DEBUG"):
5331
+ traceback.print_exc()
5332
+ sys.exit(1)
5333
+
5334
+
4860
5335
  @cli.command()
4861
5336
  @click.option(
4862
5337
  "--config",