gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
"""Configuration schema definitions and defaults for GitFlow Analytics."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from ..qualitative.models.schemas import QualitativeConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class RepositoryConfig:
|
|
13
|
+
"""Configuration for a single repository."""
|
|
14
|
+
|
|
15
|
+
name: str
|
|
16
|
+
path: Path
|
|
17
|
+
github_repo: Optional[str] = None
|
|
18
|
+
project_key: Optional[str] = None
|
|
19
|
+
branch: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
def __post_init__(self) -> None:
|
|
22
|
+
self.path = Path(self.path).expanduser().resolve()
|
|
23
|
+
if not self.project_key:
|
|
24
|
+
self.project_key = self.name.upper().replace("-", "_")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class GitHubConfig:
|
|
29
|
+
"""GitHub API configuration."""
|
|
30
|
+
|
|
31
|
+
token: Optional[str] = None
|
|
32
|
+
owner: Optional[str] = None
|
|
33
|
+
organization: Optional[str] = None
|
|
34
|
+
base_url: str = "https://api.github.com"
|
|
35
|
+
max_retries: int = 3
|
|
36
|
+
backoff_factor: int = 2
|
|
37
|
+
|
|
38
|
+
def get_repo_full_name(self, repo_name: str) -> str:
|
|
39
|
+
"""Get full repository name including owner."""
|
|
40
|
+
if "/" in repo_name:
|
|
41
|
+
return repo_name
|
|
42
|
+
if self.owner:
|
|
43
|
+
return f"{self.owner}/{repo_name}"
|
|
44
|
+
raise ValueError(f"Repository {repo_name} needs owner specified")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class MLCategorization:
|
|
49
|
+
"""ML-based commit categorization configuration."""
|
|
50
|
+
|
|
51
|
+
enabled: bool = True
|
|
52
|
+
min_confidence: float = 0.6
|
|
53
|
+
semantic_weight: float = 0.7
|
|
54
|
+
file_pattern_weight: float = 0.3
|
|
55
|
+
hybrid_threshold: float = 0.5 # Confidence threshold for using ML vs rule-based
|
|
56
|
+
cache_duration_days: int = 30
|
|
57
|
+
batch_size: int = 100
|
|
58
|
+
enable_caching: bool = True
|
|
59
|
+
spacy_model: str = "en_core_web_sm" # Preferred spaCy model
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class LLMClassificationConfig:
|
|
64
|
+
"""LLM-based commit classification configuration.
|
|
65
|
+
|
|
66
|
+
This configuration enables Large Language Model-based commit classification
|
|
67
|
+
via OpenRouter API for more accurate and context-aware categorization.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# Enable/disable LLM classification
|
|
71
|
+
enabled: bool = False # Disabled by default to avoid unexpected API costs
|
|
72
|
+
|
|
73
|
+
# OpenRouter API configuration
|
|
74
|
+
api_key: Optional[str] = None # Set via environment variable or config
|
|
75
|
+
api_base_url: str = "https://openrouter.ai/api/v1"
|
|
76
|
+
model: str = "mistralai/mistral-7b-instruct" # Fast, affordable model
|
|
77
|
+
|
|
78
|
+
# Alternative models for different use cases:
|
|
79
|
+
# - "meta-llama/llama-3-8b-instruct" (Higher accuracy, slightly more expensive)
|
|
80
|
+
# - "openai/gpt-3.5-turbo" (Good balance, more expensive)
|
|
81
|
+
|
|
82
|
+
# Classification parameters
|
|
83
|
+
confidence_threshold: float = 0.7 # Minimum confidence for LLM predictions
|
|
84
|
+
max_tokens: int = 50 # Keep responses short for cost optimization
|
|
85
|
+
temperature: float = 0.1 # Low temperature for consistent results
|
|
86
|
+
timeout_seconds: float = 30.0 # API request timeout
|
|
87
|
+
|
|
88
|
+
# Caching configuration (aggressive caching for cost optimization)
|
|
89
|
+
cache_duration_days: int = 90 # Long cache duration
|
|
90
|
+
enable_caching: bool = True
|
|
91
|
+
|
|
92
|
+
# Cost and rate limiting
|
|
93
|
+
max_daily_requests: int = 1000 # Daily API request limit
|
|
94
|
+
|
|
95
|
+
# Domain-specific terms for better classification accuracy
|
|
96
|
+
domain_terms: dict[str, list[str]] = field(
|
|
97
|
+
default_factory=lambda: {
|
|
98
|
+
"media": [
|
|
99
|
+
"video",
|
|
100
|
+
"audio",
|
|
101
|
+
"streaming",
|
|
102
|
+
"player",
|
|
103
|
+
"media",
|
|
104
|
+
"content",
|
|
105
|
+
"broadcast",
|
|
106
|
+
"live",
|
|
107
|
+
"recording",
|
|
108
|
+
"episode",
|
|
109
|
+
"program",
|
|
110
|
+
"tv",
|
|
111
|
+
"radio",
|
|
112
|
+
"podcast",
|
|
113
|
+
"channel",
|
|
114
|
+
"playlist",
|
|
115
|
+
],
|
|
116
|
+
"localization": [
|
|
117
|
+
"translation",
|
|
118
|
+
"i18n",
|
|
119
|
+
"l10n",
|
|
120
|
+
"locale",
|
|
121
|
+
"language",
|
|
122
|
+
"spanish",
|
|
123
|
+
"french",
|
|
124
|
+
"german",
|
|
125
|
+
"italian",
|
|
126
|
+
"portuguese",
|
|
127
|
+
"multilingual",
|
|
128
|
+
"translate",
|
|
129
|
+
"localize",
|
|
130
|
+
"regional",
|
|
131
|
+
],
|
|
132
|
+
"integration": [
|
|
133
|
+
"api",
|
|
134
|
+
"webhook",
|
|
135
|
+
"third-party",
|
|
136
|
+
"external",
|
|
137
|
+
"service",
|
|
138
|
+
"integration",
|
|
139
|
+
"sync",
|
|
140
|
+
"import",
|
|
141
|
+
"export",
|
|
142
|
+
"connector",
|
|
143
|
+
"oauth",
|
|
144
|
+
"auth",
|
|
145
|
+
"authentication",
|
|
146
|
+
"sso",
|
|
147
|
+
],
|
|
148
|
+
"content": [
|
|
149
|
+
"copy",
|
|
150
|
+
"text",
|
|
151
|
+
"wording",
|
|
152
|
+
"messaging",
|
|
153
|
+
"editorial",
|
|
154
|
+
"article",
|
|
155
|
+
"blog",
|
|
156
|
+
"news",
|
|
157
|
+
"story",
|
|
158
|
+
"caption",
|
|
159
|
+
"title",
|
|
160
|
+
"headline",
|
|
161
|
+
"description",
|
|
162
|
+
"summary",
|
|
163
|
+
"metadata",
|
|
164
|
+
],
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Fallback behavior when LLM is unavailable
|
|
169
|
+
fallback_to_rules: bool = True # Fall back to rule-based classification
|
|
170
|
+
fallback_to_ml: bool = True # Fall back to existing ML classification
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@dataclass
|
|
174
|
+
class CommitClassificationConfig:
|
|
175
|
+
"""Configuration for commit classification system.
|
|
176
|
+
|
|
177
|
+
This configuration controls the Random Forest-based commit classification
|
|
178
|
+
system that analyzes commits to categorize them into types like feature,
|
|
179
|
+
bugfix, refactor, docs, test, etc.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
enabled: bool = True
|
|
183
|
+
confidence_threshold: float = 0.5 # Minimum confidence for reliable predictions
|
|
184
|
+
batch_size: int = 100 # Commits processed per batch
|
|
185
|
+
auto_retrain: bool = True # Automatically check if model needs retraining
|
|
186
|
+
retrain_threshold_days: int = 30 # Days after which to suggest retraining
|
|
187
|
+
|
|
188
|
+
# Model hyperparameters
|
|
189
|
+
model: dict[str, Any] = field(
|
|
190
|
+
default_factory=lambda: {
|
|
191
|
+
"n_estimators": 100, # Number of trees in random forest
|
|
192
|
+
"max_depth": 20, # Maximum depth of trees
|
|
193
|
+
"min_samples_split": 5, # Minimum samples to split a node
|
|
194
|
+
"min_samples_leaf": 2, # Minimum samples at leaf node
|
|
195
|
+
"random_state": 42, # For reproducible results
|
|
196
|
+
"n_jobs": -1, # Use all available CPU cores
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Feature extraction settings
|
|
201
|
+
feature_extraction: dict[str, Any] = field(
|
|
202
|
+
default_factory=lambda: {
|
|
203
|
+
"enable_temporal_features": True,
|
|
204
|
+
"enable_author_features": True,
|
|
205
|
+
"enable_file_analysis": True,
|
|
206
|
+
"keyword_categories": [
|
|
207
|
+
"feature",
|
|
208
|
+
"bugfix",
|
|
209
|
+
"refactor",
|
|
210
|
+
"docs",
|
|
211
|
+
"test",
|
|
212
|
+
"config",
|
|
213
|
+
"security",
|
|
214
|
+
"performance",
|
|
215
|
+
"ui",
|
|
216
|
+
"api",
|
|
217
|
+
"database",
|
|
218
|
+
"deployment",
|
|
219
|
+
],
|
|
220
|
+
}
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Training settings
|
|
224
|
+
training: dict[str, Any] = field(
|
|
225
|
+
default_factory=lambda: {
|
|
226
|
+
"validation_split": 0.2, # Fraction for validation
|
|
227
|
+
"min_training_samples": 20, # Minimum samples needed for training
|
|
228
|
+
"cross_validation_folds": 5, # K-fold cross validation
|
|
229
|
+
"class_weight": "balanced", # Handle class imbalance
|
|
230
|
+
}
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Supported classification categories
|
|
234
|
+
categories: dict[str, str] = field(
|
|
235
|
+
default_factory=lambda: {
|
|
236
|
+
"feature": "New functionality or capabilities",
|
|
237
|
+
"bugfix": "Bug fixes and error corrections",
|
|
238
|
+
"refactor": "Code restructuring and optimization",
|
|
239
|
+
"docs": "Documentation changes and updates",
|
|
240
|
+
"test": "Testing-related changes",
|
|
241
|
+
"config": "Configuration and settings changes",
|
|
242
|
+
"chore": "Maintenance and housekeeping tasks",
|
|
243
|
+
"security": "Security-related changes",
|
|
244
|
+
"hotfix": "Emergency production fixes",
|
|
245
|
+
"style": "Code style and formatting changes",
|
|
246
|
+
"build": "Build system and dependency changes",
|
|
247
|
+
"ci": "Continuous integration changes",
|
|
248
|
+
"revert": "Reverts of previous changes",
|
|
249
|
+
"merge": "Merge commits and integration",
|
|
250
|
+
"wip": "Work in progress commits",
|
|
251
|
+
}
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@dataclass
|
|
256
|
+
class BranchAnalysisConfig:
|
|
257
|
+
"""Configuration for branch analysis optimization.
|
|
258
|
+
|
|
259
|
+
This configuration controls how branches are analyzed to prevent performance
|
|
260
|
+
issues on large organizations with many repositories and branches.
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
# Branch analysis strategy
|
|
264
|
+
strategy: str = "smart" # Options: "all", "smart", "main_only"
|
|
265
|
+
|
|
266
|
+
# Smart analysis parameters
|
|
267
|
+
max_branches_per_repo: int = 50 # Maximum branches to analyze per repository
|
|
268
|
+
active_days_threshold: int = 90 # Days to consider a branch "active"
|
|
269
|
+
include_main_branches: bool = True # Always include main/master branches
|
|
270
|
+
|
|
271
|
+
# Branch name patterns to always include/exclude
|
|
272
|
+
always_include_patterns: list[str] = field(
|
|
273
|
+
default_factory=lambda: [
|
|
274
|
+
r"^(main|master|develop|dev)$", # Main development branches
|
|
275
|
+
r"^release/.*", # Release branches
|
|
276
|
+
r"^hotfix/.*", # Hotfix branches
|
|
277
|
+
]
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
always_exclude_patterns: list[str] = field(
|
|
281
|
+
default_factory=lambda: [
|
|
282
|
+
r"^dependabot/.*", # Dependabot branches
|
|
283
|
+
r"^renovate/.*", # Renovate branches
|
|
284
|
+
r".*-backup$", # Backup branches
|
|
285
|
+
r".*-temp$", # Temporary branches
|
|
286
|
+
]
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Performance limits
|
|
290
|
+
enable_progress_logging: bool = True # Log branch analysis progress
|
|
291
|
+
branch_commit_limit: int = 1000 # Max commits to analyze per branch
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@dataclass
|
|
295
|
+
class AnalysisConfig:
|
|
296
|
+
"""Analysis-specific configuration."""
|
|
297
|
+
|
|
298
|
+
story_point_patterns: list[str] = field(default_factory=list)
|
|
299
|
+
exclude_authors: list[str] = field(default_factory=list)
|
|
300
|
+
exclude_message_patterns: list[str] = field(default_factory=list)
|
|
301
|
+
exclude_paths: list[str] = field(default_factory=list)
|
|
302
|
+
similarity_threshold: float = 0.85
|
|
303
|
+
manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
|
|
304
|
+
default_ticket_platform: Optional[str] = None
|
|
305
|
+
branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
|
|
306
|
+
ticket_platforms: Optional[list[str]] = None
|
|
307
|
+
auto_identity_analysis: bool = True # Enable automatic identity analysis by default
|
|
308
|
+
branch_analysis: BranchAnalysisConfig = field(default_factory=BranchAnalysisConfig)
|
|
309
|
+
ml_categorization: MLCategorization = field(default_factory=MLCategorization)
|
|
310
|
+
commit_classification: CommitClassificationConfig = field(
|
|
311
|
+
default_factory=CommitClassificationConfig
|
|
312
|
+
)
|
|
313
|
+
llm_classification: LLMClassificationConfig = field(default_factory=LLMClassificationConfig)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
@dataclass
|
|
317
|
+
class OutputConfig:
|
|
318
|
+
"""Output configuration."""
|
|
319
|
+
|
|
320
|
+
directory: Optional[Path] = None
|
|
321
|
+
formats: list[str] = field(default_factory=lambda: ["csv", "markdown"])
|
|
322
|
+
csv_delimiter: str = ","
|
|
323
|
+
csv_encoding: str = "utf-8"
|
|
324
|
+
anonymize_enabled: bool = False
|
|
325
|
+
anonymize_fields: list[str] = field(default_factory=list)
|
|
326
|
+
anonymize_method: str = "hash"
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
@dataclass
|
|
330
|
+
class CacheConfig:
|
|
331
|
+
"""Cache configuration."""
|
|
332
|
+
|
|
333
|
+
directory: Path = Path(".gitflow-cache")
|
|
334
|
+
ttl_hours: int = 168
|
|
335
|
+
max_size_mb: int = 500
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@dataclass
|
|
339
|
+
class JIRAConfig:
|
|
340
|
+
"""JIRA configuration."""
|
|
341
|
+
|
|
342
|
+
access_user: str
|
|
343
|
+
access_token: str
|
|
344
|
+
base_url: Optional[str] = None
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
@dataclass
|
|
348
|
+
class JIRAIntegrationConfig:
|
|
349
|
+
"""JIRA integration specific configuration."""
|
|
350
|
+
|
|
351
|
+
enabled: bool = True
|
|
352
|
+
fetch_story_points: bool = True
|
|
353
|
+
project_keys: list[str] = field(default_factory=list)
|
|
354
|
+
story_point_fields: list[str] = field(
|
|
355
|
+
default_factory=lambda: ["customfield_10016", "customfield_10021", "Story Points"]
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
@dataclass
|
|
360
|
+
class PMPlatformConfig:
|
|
361
|
+
"""Base PM platform configuration."""
|
|
362
|
+
|
|
363
|
+
enabled: bool = True
|
|
364
|
+
platform_type: str = ""
|
|
365
|
+
config: dict[str, Any] = field(default_factory=dict)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
@dataclass
|
|
369
|
+
class PMIntegrationConfig:
|
|
370
|
+
"""PM framework integration configuration."""
|
|
371
|
+
|
|
372
|
+
enabled: bool = False
|
|
373
|
+
primary_platform: Optional[str] = None
|
|
374
|
+
correlation: dict[str, Any] = field(default_factory=dict)
|
|
375
|
+
platforms: dict[str, PMPlatformConfig] = field(default_factory=dict)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
@dataclass
|
|
379
|
+
class Config:
|
|
380
|
+
"""Main configuration container."""
|
|
381
|
+
|
|
382
|
+
repositories: list[RepositoryConfig]
|
|
383
|
+
github: GitHubConfig
|
|
384
|
+
analysis: AnalysisConfig
|
|
385
|
+
output: OutputConfig
|
|
386
|
+
cache: CacheConfig
|
|
387
|
+
jira: Optional[JIRAConfig] = None
|
|
388
|
+
jira_integration: Optional[JIRAIntegrationConfig] = None
|
|
389
|
+
pm: Optional[Any] = None # Modern PM framework config
|
|
390
|
+
pm_integration: Optional[PMIntegrationConfig] = None
|
|
391
|
+
qualitative: Optional["QualitativeConfig"] = None
|
|
392
|
+
|
|
393
|
+
def discover_organization_repositories(
|
|
394
|
+
self, clone_base_path: Optional[Path] = None
|
|
395
|
+
) -> list[RepositoryConfig]:
|
|
396
|
+
"""Discover repositories from GitHub organization.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
clone_base_path: Base directory where repos should be cloned/found.
|
|
400
|
+
If None, uses output directory.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
List of discovered repository configurations.
|
|
404
|
+
"""
|
|
405
|
+
if not self.github.organization or not self.github.token:
|
|
406
|
+
return []
|
|
407
|
+
|
|
408
|
+
from github import Github
|
|
409
|
+
|
|
410
|
+
github_client = Github(self.github.token, base_url=self.github.base_url)
|
|
411
|
+
|
|
412
|
+
try:
|
|
413
|
+
org = github_client.get_organization(self.github.organization)
|
|
414
|
+
discovered_repos = []
|
|
415
|
+
|
|
416
|
+
base_path = clone_base_path or self.output.directory
|
|
417
|
+
if base_path is None:
|
|
418
|
+
raise ValueError("No base path available for repository cloning")
|
|
419
|
+
|
|
420
|
+
for repo in org.get_repos():
|
|
421
|
+
# Skip archived repositories
|
|
422
|
+
if repo.archived:
|
|
423
|
+
continue
|
|
424
|
+
|
|
425
|
+
# Create repository configuration
|
|
426
|
+
repo_path = base_path / repo.name
|
|
427
|
+
repo_config = RepositoryConfig(
|
|
428
|
+
name=repo.name,
|
|
429
|
+
path=repo_path,
|
|
430
|
+
github_repo=repo.full_name,
|
|
431
|
+
project_key=repo.name.upper().replace("-", "_"),
|
|
432
|
+
branch=repo.default_branch,
|
|
433
|
+
)
|
|
434
|
+
discovered_repos.append(repo_config)
|
|
435
|
+
|
|
436
|
+
return discovered_repos
|
|
437
|
+
|
|
438
|
+
except Exception as e:
|
|
439
|
+
raise ValueError(
|
|
440
|
+
f"Failed to discover repositories from organization {self.github.organization}: {e}"
|
|
441
|
+
) from e
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""Configuration validation logic for GitFlow Analytics."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from .schema import Config
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ConfigValidator:
|
|
9
|
+
"""Validates configuration settings."""
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def validate_config(config: Config) -> list[str]:
|
|
13
|
+
"""Validate configuration and return list of warnings.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
config: Configuration to validate
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
List of warning messages
|
|
20
|
+
"""
|
|
21
|
+
warnings = []
|
|
22
|
+
|
|
23
|
+
# Check repository paths exist
|
|
24
|
+
for repo in config.repositories:
|
|
25
|
+
if not repo.path.exists():
|
|
26
|
+
warnings.append(f"Repository path does not exist: {repo.path}")
|
|
27
|
+
elif not (repo.path / ".git").exists():
|
|
28
|
+
warnings.append(f"Path is not a git repository: {repo.path}")
|
|
29
|
+
|
|
30
|
+
# Check GitHub token if GitHub repos are specified
|
|
31
|
+
has_github_repos = any(r.github_repo for r in config.repositories)
|
|
32
|
+
if has_github_repos and not config.github.token:
|
|
33
|
+
warnings.append("GitHub repositories specified but no GitHub token provided")
|
|
34
|
+
|
|
35
|
+
# Check if owner is needed
|
|
36
|
+
for repo in config.repositories:
|
|
37
|
+
if repo.github_repo and "/" not in repo.github_repo and not config.github.owner:
|
|
38
|
+
warnings.append(f"Repository {repo.github_repo} needs owner specified")
|
|
39
|
+
|
|
40
|
+
# Check cache directory permissions
|
|
41
|
+
try:
|
|
42
|
+
config.cache.directory.mkdir(exist_ok=True, parents=True)
|
|
43
|
+
except PermissionError:
|
|
44
|
+
warnings.append(f"Cannot create cache directory: {config.cache.directory}")
|
|
45
|
+
|
|
46
|
+
return warnings
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def validate_analysis_config(analysis_config: dict, config_path: Path) -> None:
|
|
50
|
+
"""Validate analysis configuration section.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
analysis_config: Analysis configuration dictionary
|
|
54
|
+
config_path: Path to configuration file (for error messages)
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
InvalidValueError: If configuration values are invalid
|
|
58
|
+
"""
|
|
59
|
+
from .errors import InvalidValueError
|
|
60
|
+
|
|
61
|
+
# Validate similarity threshold
|
|
62
|
+
if "identity" in analysis_config:
|
|
63
|
+
threshold = analysis_config["identity"].get("similarity_threshold")
|
|
64
|
+
if threshold is not None and not (0.0 <= threshold <= 1.0):
|
|
65
|
+
raise InvalidValueError(
|
|
66
|
+
"similarity_threshold",
|
|
67
|
+
threshold,
|
|
68
|
+
"must be between 0.0 and 1.0",
|
|
69
|
+
config_path,
|
|
70
|
+
valid_values=["0.0 to 1.0"],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Validate ML categorization settings
|
|
74
|
+
if "ml_categorization" in analysis_config:
|
|
75
|
+
ml_config = analysis_config["ml_categorization"]
|
|
76
|
+
|
|
77
|
+
if "min_confidence" in ml_config:
|
|
78
|
+
conf = ml_config["min_confidence"]
|
|
79
|
+
if not (0.0 <= conf <= 1.0):
|
|
80
|
+
raise InvalidValueError(
|
|
81
|
+
"ml_categorization.min_confidence",
|
|
82
|
+
conf,
|
|
83
|
+
"must be between 0.0 and 1.0",
|
|
84
|
+
config_path,
|
|
85
|
+
valid_values=["0.0 to 1.0"],
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
if "semantic_weight" in ml_config:
|
|
89
|
+
weight = ml_config["semantic_weight"]
|
|
90
|
+
if not (0.0 <= weight <= 1.0):
|
|
91
|
+
raise InvalidValueError(
|
|
92
|
+
"ml_categorization.semantic_weight",
|
|
93
|
+
weight,
|
|
94
|
+
"must be between 0.0 and 1.0",
|
|
95
|
+
config_path,
|
|
96
|
+
valid_values=["0.0 to 1.0"],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Validate branch analysis strategy
|
|
100
|
+
if "branch_analysis" in analysis_config:
|
|
101
|
+
branch_config = analysis_config["branch_analysis"]
|
|
102
|
+
if "strategy" in branch_config:
|
|
103
|
+
strategy = branch_config["strategy"]
|
|
104
|
+
valid_strategies = ["all", "smart", "main_only"]
|
|
105
|
+
if strategy not in valid_strategies:
|
|
106
|
+
raise InvalidValueError(
|
|
107
|
+
"branch_analysis.strategy",
|
|
108
|
+
strategy,
|
|
109
|
+
"invalid branch analysis strategy",
|
|
110
|
+
config_path,
|
|
111
|
+
valid_values=valid_strategies,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def validate_output_config(output_config: dict, config_path: Path) -> None:
|
|
116
|
+
"""Validate output configuration section.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
output_config: Output configuration dictionary
|
|
120
|
+
config_path: Path to configuration file (for error messages)
|
|
121
|
+
|
|
122
|
+
Raises:
|
|
123
|
+
InvalidValueError: If configuration values are invalid
|
|
124
|
+
"""
|
|
125
|
+
from .errors import InvalidValueError
|
|
126
|
+
|
|
127
|
+
# Validate output formats
|
|
128
|
+
if "formats" in output_config:
|
|
129
|
+
formats = output_config["formats"]
|
|
130
|
+
valid_formats = ["csv", "markdown", "json"]
|
|
131
|
+
for fmt in formats:
|
|
132
|
+
if fmt not in valid_formats:
|
|
133
|
+
raise InvalidValueError(
|
|
134
|
+
"output.formats",
|
|
135
|
+
fmt,
|
|
136
|
+
"invalid output format",
|
|
137
|
+
config_path,
|
|
138
|
+
valid_values=valid_formats,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Validate anonymization method
|
|
142
|
+
if "anonymization" in output_config:
|
|
143
|
+
anon_config = output_config["anonymization"]
|
|
144
|
+
if "method" in anon_config:
|
|
145
|
+
method = anon_config["method"]
|
|
146
|
+
valid_methods = ["hash", "random", "sequential"]
|
|
147
|
+
if method not in valid_methods:
|
|
148
|
+
raise InvalidValueError(
|
|
149
|
+
"output.anonymization.method",
|
|
150
|
+
method,
|
|
151
|
+
"invalid anonymization method",
|
|
152
|
+
config_path,
|
|
153
|
+
valid_values=valid_methods,
|
|
154
|
+
)
|