gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""Analysis pass for auto-aliasing developer identities."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from .analyzer import LLMIdentityAnalyzer
|
|
11
|
+
from .models import IdentityAnalysisResult
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class IdentityAnalysisPass:
|
|
17
|
+
"""Performs an analysis pass to auto-alias developer identities."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config_path: Path):
|
|
20
|
+
"""Initialize with configuration."""
|
|
21
|
+
self.config_path = config_path
|
|
22
|
+
self.config = self._load_config()
|
|
23
|
+
|
|
24
|
+
def _load_config(self) -> dict[str, Any]:
|
|
25
|
+
"""Load configuration from file."""
|
|
26
|
+
with open(self.config_path) as f:
|
|
27
|
+
config = yaml.safe_load(f)
|
|
28
|
+
|
|
29
|
+
# Handle environment variables
|
|
30
|
+
config_dir = self.config_path.parent
|
|
31
|
+
env_file = config_dir / ".env"
|
|
32
|
+
|
|
33
|
+
if env_file.exists():
|
|
34
|
+
from dotenv import load_dotenv
|
|
35
|
+
|
|
36
|
+
load_dotenv(env_file)
|
|
37
|
+
|
|
38
|
+
return config
|
|
39
|
+
|
|
40
|
+
def run_analysis(
|
|
41
|
+
self,
|
|
42
|
+
commits: list[dict[str, Any]],
|
|
43
|
+
output_path: Optional[Path] = None,
|
|
44
|
+
apply_to_config: bool = False,
|
|
45
|
+
) -> IdentityAnalysisResult:
|
|
46
|
+
"""Run identity analysis pass on commits."""
|
|
47
|
+
logger.info("Starting identity analysis pass...")
|
|
48
|
+
|
|
49
|
+
# Get OpenRouter API key from config or env
|
|
50
|
+
api_key = None
|
|
51
|
+
if "qualitative" in self.config and self.config["qualitative"].get("enabled"):
|
|
52
|
+
api_key = self._resolve_env_var(
|
|
53
|
+
self.config["qualitative"].get("openrouter_api_key", "")
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Initialize analyzer
|
|
57
|
+
analyzer = LLMIdentityAnalyzer(
|
|
58
|
+
api_key=api_key,
|
|
59
|
+
model=self.config.get("qualitative", {}).get("model", "openai/gpt-4o-mini"),
|
|
60
|
+
confidence_threshold=self.config.get("analysis", {}).get("similarity_threshold", 0.85),
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Run analysis
|
|
64
|
+
result = analyzer.analyze_identities(commits)
|
|
65
|
+
|
|
66
|
+
logger.info(f"Analysis complete: {len(result.clusters)} identity clusters found")
|
|
67
|
+
|
|
68
|
+
# Save analysis report
|
|
69
|
+
if output_path:
|
|
70
|
+
self._save_analysis_report(result, output_path)
|
|
71
|
+
|
|
72
|
+
# Apply to configuration if requested
|
|
73
|
+
if apply_to_config:
|
|
74
|
+
self._apply_to_config(result)
|
|
75
|
+
|
|
76
|
+
return result
|
|
77
|
+
|
|
78
|
+
def _resolve_env_var(self, value: str) -> str:
|
|
79
|
+
"""Resolve environment variable references."""
|
|
80
|
+
if value.startswith("${") and value.endswith("}"):
|
|
81
|
+
var_name = value[2:-1]
|
|
82
|
+
return os.getenv(var_name, value)
|
|
83
|
+
return value
|
|
84
|
+
|
|
85
|
+
def _save_analysis_report(self, result: IdentityAnalysisResult, output_path: Path):
|
|
86
|
+
"""Save analysis report to file."""
|
|
87
|
+
report = {"analysis_metadata": result.analysis_metadata, "identity_clusters": []}
|
|
88
|
+
|
|
89
|
+
for cluster in result.clusters:
|
|
90
|
+
cluster_data = {
|
|
91
|
+
"canonical_name": cluster.canonical_name,
|
|
92
|
+
"canonical_email": cluster.canonical_email,
|
|
93
|
+
"confidence": cluster.confidence,
|
|
94
|
+
"reasoning": cluster.reasoning,
|
|
95
|
+
"total_commits": cluster.total_commits,
|
|
96
|
+
"aliases": [],
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
for alias in cluster.aliases:
|
|
100
|
+
cluster_data["aliases"].append(
|
|
101
|
+
{
|
|
102
|
+
"name": alias.name,
|
|
103
|
+
"email": alias.email,
|
|
104
|
+
"commit_count": alias.commit_count,
|
|
105
|
+
"repositories": list(alias.repositories),
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
report["identity_clusters"].append(cluster_data)
|
|
110
|
+
|
|
111
|
+
# Add unresolved identities
|
|
112
|
+
if result.unresolved_identities:
|
|
113
|
+
report["unresolved_identities"] = []
|
|
114
|
+
for identity in result.unresolved_identities:
|
|
115
|
+
report["unresolved_identities"].append(
|
|
116
|
+
{
|
|
117
|
+
"name": identity.name,
|
|
118
|
+
"email": identity.email,
|
|
119
|
+
"commit_count": identity.commit_count,
|
|
120
|
+
"repositories": list(identity.repositories),
|
|
121
|
+
}
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Write YAML report
|
|
125
|
+
with open(output_path, "w") as f:
|
|
126
|
+
yaml.dump(report, f, default_flow_style=False, sort_keys=False)
|
|
127
|
+
|
|
128
|
+
logger.info(f"Analysis report saved to: {output_path}")
|
|
129
|
+
|
|
130
|
+
def _apply_to_config(self, result: IdentityAnalysisResult):
|
|
131
|
+
"""Apply analysis results to configuration file."""
|
|
132
|
+
# Get manual mappings from analysis
|
|
133
|
+
new_mappings = result.get_manual_mappings()
|
|
134
|
+
|
|
135
|
+
if not new_mappings:
|
|
136
|
+
logger.info("No new identity mappings to apply")
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
# Load current config
|
|
140
|
+
with open(self.config_path) as f:
|
|
141
|
+
config = yaml.safe_load(f)
|
|
142
|
+
|
|
143
|
+
# Ensure analysis section exists
|
|
144
|
+
if "analysis" not in config:
|
|
145
|
+
config["analysis"] = {}
|
|
146
|
+
|
|
147
|
+
# Get existing manual mappings
|
|
148
|
+
existing_mappings = config["analysis"].get("manual_identity_mappings", [])
|
|
149
|
+
|
|
150
|
+
# Merge new mappings
|
|
151
|
+
existing_emails = set()
|
|
152
|
+
for mapping in existing_mappings:
|
|
153
|
+
existing_emails.add(mapping.get("canonical_email", "").lower())
|
|
154
|
+
|
|
155
|
+
for new_mapping in new_mappings:
|
|
156
|
+
canonical_email = new_mapping["canonical_email"].lower()
|
|
157
|
+
if canonical_email not in existing_emails:
|
|
158
|
+
existing_mappings.append(new_mapping)
|
|
159
|
+
logger.info(f"Added identity mapping for: {canonical_email}")
|
|
160
|
+
else:
|
|
161
|
+
# Update existing mapping with new aliases
|
|
162
|
+
for existing in existing_mappings:
|
|
163
|
+
if existing.get("canonical_email", "").lower() == canonical_email:
|
|
164
|
+
existing_aliases = set(
|
|
165
|
+
alias.lower() for alias in existing.get("aliases", [])
|
|
166
|
+
)
|
|
167
|
+
new_aliases = set(alias.lower() for alias in new_mapping["aliases"])
|
|
168
|
+
combined_aliases = existing_aliases | new_aliases
|
|
169
|
+
existing["aliases"] = list(combined_aliases)
|
|
170
|
+
if new_aliases - existing_aliases:
|
|
171
|
+
logger.info(f"Updated aliases for: {canonical_email}")
|
|
172
|
+
break
|
|
173
|
+
|
|
174
|
+
# Update config
|
|
175
|
+
config["analysis"]["manual_identity_mappings"] = existing_mappings
|
|
176
|
+
|
|
177
|
+
# Write updated config
|
|
178
|
+
with open(self.config_path, "w") as f:
|
|
179
|
+
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
|
180
|
+
|
|
181
|
+
logger.info(f"Updated configuration with {len(new_mappings)} identity mappings")
|
|
182
|
+
|
|
183
|
+
def generate_suggested_config(self, result: IdentityAnalysisResult) -> dict[str, Any]:
|
|
184
|
+
"""Generate suggested configuration snippet for manual review."""
|
|
185
|
+
manual_mappings = result.get_manual_mappings()
|
|
186
|
+
|
|
187
|
+
# Also generate exclusions for bots
|
|
188
|
+
bot_patterns = [
|
|
189
|
+
r".*\[bot\]$", # Matches "ewtn-version-bumper[bot]", "dependabot[bot]"
|
|
190
|
+
r".*-bot$", # Matches names ending in "-bot"
|
|
191
|
+
r"^bot-.*", # Matches names starting with "bot-"
|
|
192
|
+
r".*\sBot$", # Matches "CNA Studio Bot", "GitHub Bot", etc.
|
|
193
|
+
r".*\sbot$", # Matches "studio bot", "merge bot", etc.
|
|
194
|
+
r"^Bot\s.*", # Matches "Bot User", "Bot Account", etc.
|
|
195
|
+
r"^bot\s.*", # Matches "bot user", "bot account", etc.
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
suggested_exclusions = []
|
|
199
|
+
logger.debug(
|
|
200
|
+
f"Bot detection: checking {len(result.unresolved_identities)} unresolved identities"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
for identity in result.unresolved_identities:
|
|
204
|
+
logger.debug(
|
|
205
|
+
f"Bot detection: checking identity '{identity.name}' against {len(bot_patterns)} patterns"
|
|
206
|
+
)
|
|
207
|
+
for pattern in bot_patterns:
|
|
208
|
+
import re
|
|
209
|
+
|
|
210
|
+
if re.match(pattern, identity.name, re.IGNORECASE):
|
|
211
|
+
logger.debug(
|
|
212
|
+
f"Bot detection: MATCH - '{identity.name}' matches pattern '{pattern}'"
|
|
213
|
+
)
|
|
214
|
+
suggested_exclusions.append(identity.name)
|
|
215
|
+
break
|
|
216
|
+
else:
|
|
217
|
+
logger.debug(
|
|
218
|
+
f"Bot detection: NO MATCH - '{identity.name}' doesn't match any bot patterns"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if suggested_exclusions:
|
|
222
|
+
logger.debug(
|
|
223
|
+
f"Bot detection: found {len(suggested_exclusions)} bots to exclude: {suggested_exclusions}"
|
|
224
|
+
)
|
|
225
|
+
else:
|
|
226
|
+
logger.debug("Bot detection: no bots detected for exclusion")
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"analysis": {"manual_identity_mappings": manual_mappings},
|
|
230
|
+
"exclude": {"authors": suggested_exclusions} if suggested_exclusions else {},
|
|
231
|
+
}
|