gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ """LLM-based developer identity resolution and auto-aliasing."""
2
+
3
+ from .analyzer import LLMIdentityAnalyzer
4
+ from .models import DeveloperCluster, IdentityAnalysisResult
5
+
6
+ __all__ = ["LLMIdentityAnalyzer", "IdentityAnalysisResult", "DeveloperCluster"]
@@ -0,0 +1,231 @@
1
+ """Analysis pass for auto-aliasing developer identities."""
2
+
3
+ import logging
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Optional
7
+
8
+ import yaml
9
+
10
+ from .analyzer import LLMIdentityAnalyzer
11
+ from .models import IdentityAnalysisResult
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class IdentityAnalysisPass:
17
+ """Performs an analysis pass to auto-alias developer identities."""
18
+
19
+ def __init__(self, config_path: Path):
20
+ """Initialize with configuration."""
21
+ self.config_path = config_path
22
+ self.config = self._load_config()
23
+
24
+ def _load_config(self) -> dict[str, Any]:
25
+ """Load configuration from file."""
26
+ with open(self.config_path) as f:
27
+ config = yaml.safe_load(f)
28
+
29
+ # Handle environment variables
30
+ config_dir = self.config_path.parent
31
+ env_file = config_dir / ".env"
32
+
33
+ if env_file.exists():
34
+ from dotenv import load_dotenv
35
+
36
+ load_dotenv(env_file)
37
+
38
+ return config
39
+
40
+ def run_analysis(
41
+ self,
42
+ commits: list[dict[str, Any]],
43
+ output_path: Optional[Path] = None,
44
+ apply_to_config: bool = False,
45
+ ) -> IdentityAnalysisResult:
46
+ """Run identity analysis pass on commits."""
47
+ logger.info("Starting identity analysis pass...")
48
+
49
+ # Get OpenRouter API key from config or env
50
+ api_key = None
51
+ if "qualitative" in self.config and self.config["qualitative"].get("enabled"):
52
+ api_key = self._resolve_env_var(
53
+ self.config["qualitative"].get("openrouter_api_key", "")
54
+ )
55
+
56
+ # Initialize analyzer
57
+ analyzer = LLMIdentityAnalyzer(
58
+ api_key=api_key,
59
+ model=self.config.get("qualitative", {}).get("model", "openai/gpt-4o-mini"),
60
+ confidence_threshold=self.config.get("analysis", {}).get("similarity_threshold", 0.85),
61
+ )
62
+
63
+ # Run analysis
64
+ result = analyzer.analyze_identities(commits)
65
+
66
+ logger.info(f"Analysis complete: {len(result.clusters)} identity clusters found")
67
+
68
+ # Save analysis report
69
+ if output_path:
70
+ self._save_analysis_report(result, output_path)
71
+
72
+ # Apply to configuration if requested
73
+ if apply_to_config:
74
+ self._apply_to_config(result)
75
+
76
+ return result
77
+
78
+ def _resolve_env_var(self, value: str) -> str:
79
+ """Resolve environment variable references."""
80
+ if value.startswith("${") and value.endswith("}"):
81
+ var_name = value[2:-1]
82
+ return os.getenv(var_name, value)
83
+ return value
84
+
85
+ def _save_analysis_report(self, result: IdentityAnalysisResult, output_path: Path):
86
+ """Save analysis report to file."""
87
+ report = {"analysis_metadata": result.analysis_metadata, "identity_clusters": []}
88
+
89
+ for cluster in result.clusters:
90
+ cluster_data = {
91
+ "canonical_name": cluster.canonical_name,
92
+ "canonical_email": cluster.canonical_email,
93
+ "confidence": cluster.confidence,
94
+ "reasoning": cluster.reasoning,
95
+ "total_commits": cluster.total_commits,
96
+ "aliases": [],
97
+ }
98
+
99
+ for alias in cluster.aliases:
100
+ cluster_data["aliases"].append(
101
+ {
102
+ "name": alias.name,
103
+ "email": alias.email,
104
+ "commit_count": alias.commit_count,
105
+ "repositories": list(alias.repositories),
106
+ }
107
+ )
108
+
109
+ report["identity_clusters"].append(cluster_data)
110
+
111
+ # Add unresolved identities
112
+ if result.unresolved_identities:
113
+ report["unresolved_identities"] = []
114
+ for identity in result.unresolved_identities:
115
+ report["unresolved_identities"].append(
116
+ {
117
+ "name": identity.name,
118
+ "email": identity.email,
119
+ "commit_count": identity.commit_count,
120
+ "repositories": list(identity.repositories),
121
+ }
122
+ )
123
+
124
+ # Write YAML report
125
+ with open(output_path, "w") as f:
126
+ yaml.dump(report, f, default_flow_style=False, sort_keys=False)
127
+
128
+ logger.info(f"Analysis report saved to: {output_path}")
129
+
130
+ def _apply_to_config(self, result: IdentityAnalysisResult):
131
+ """Apply analysis results to configuration file."""
132
+ # Get manual mappings from analysis
133
+ new_mappings = result.get_manual_mappings()
134
+
135
+ if not new_mappings:
136
+ logger.info("No new identity mappings to apply")
137
+ return
138
+
139
+ # Load current config
140
+ with open(self.config_path) as f:
141
+ config = yaml.safe_load(f)
142
+
143
+ # Ensure analysis section exists
144
+ if "analysis" not in config:
145
+ config["analysis"] = {}
146
+
147
+ # Get existing manual mappings
148
+ existing_mappings = config["analysis"].get("manual_identity_mappings", [])
149
+
150
+ # Merge new mappings
151
+ existing_emails = set()
152
+ for mapping in existing_mappings:
153
+ existing_emails.add(mapping.get("canonical_email", "").lower())
154
+
155
+ for new_mapping in new_mappings:
156
+ canonical_email = new_mapping["canonical_email"].lower()
157
+ if canonical_email not in existing_emails:
158
+ existing_mappings.append(new_mapping)
159
+ logger.info(f"Added identity mapping for: {canonical_email}")
160
+ else:
161
+ # Update existing mapping with new aliases
162
+ for existing in existing_mappings:
163
+ if existing.get("canonical_email", "").lower() == canonical_email:
164
+ existing_aliases = set(
165
+ alias.lower() for alias in existing.get("aliases", [])
166
+ )
167
+ new_aliases = set(alias.lower() for alias in new_mapping["aliases"])
168
+ combined_aliases = existing_aliases | new_aliases
169
+ existing["aliases"] = list(combined_aliases)
170
+ if new_aliases - existing_aliases:
171
+ logger.info(f"Updated aliases for: {canonical_email}")
172
+ break
173
+
174
+ # Update config
175
+ config["analysis"]["manual_identity_mappings"] = existing_mappings
176
+
177
+ # Write updated config
178
+ with open(self.config_path, "w") as f:
179
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
180
+
181
+ logger.info(f"Updated configuration with {len(new_mappings)} identity mappings")
182
+
183
+ def generate_suggested_config(self, result: IdentityAnalysisResult) -> dict[str, Any]:
184
+ """Generate suggested configuration snippet for manual review."""
185
+ manual_mappings = result.get_manual_mappings()
186
+
187
+ # Also generate exclusions for bots
188
+ bot_patterns = [
189
+ r".*\[bot\]$", # Matches "ewtn-version-bumper[bot]", "dependabot[bot]"
190
+ r".*-bot$", # Matches names ending in "-bot"
191
+ r"^bot-.*", # Matches names starting with "bot-"
192
+ r".*\sBot$", # Matches "CNA Studio Bot", "GitHub Bot", etc.
193
+ r".*\sbot$", # Matches "studio bot", "merge bot", etc.
194
+ r"^Bot\s.*", # Matches "Bot User", "Bot Account", etc.
195
+ r"^bot\s.*", # Matches "bot user", "bot account", etc.
196
+ ]
197
+
198
+ suggested_exclusions = []
199
+ logger.debug(
200
+ f"Bot detection: checking {len(result.unresolved_identities)} unresolved identities"
201
+ )
202
+
203
+ for identity in result.unresolved_identities:
204
+ logger.debug(
205
+ f"Bot detection: checking identity '{identity.name}' against {len(bot_patterns)} patterns"
206
+ )
207
+ for pattern in bot_patterns:
208
+ import re
209
+
210
+ if re.match(pattern, identity.name, re.IGNORECASE):
211
+ logger.debug(
212
+ f"Bot detection: MATCH - '{identity.name}' matches pattern '{pattern}'"
213
+ )
214
+ suggested_exclusions.append(identity.name)
215
+ break
216
+ else:
217
+ logger.debug(
218
+ f"Bot detection: NO MATCH - '{identity.name}' doesn't match any bot patterns"
219
+ )
220
+
221
+ if suggested_exclusions:
222
+ logger.debug(
223
+ f"Bot detection: found {len(suggested_exclusions)} bots to exclude: {suggested_exclusions}"
224
+ )
225
+ else:
226
+ logger.debug("Bot detection: no bots detected for exclusion")
227
+
228
+ return {
229
+ "analysis": {"manual_identity_mappings": manual_mappings},
230
+ "exclude": {"authors": suggested_exclusions} if suggested_exclusions else {},
231
+ }