gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,648 @@
|
|
|
1
|
+
"""Base classes for report generation abstraction layer.
|
|
2
|
+
|
|
3
|
+
This module provides the foundation for all report generators in GitFlow Analytics,
|
|
4
|
+
ensuring consistency, extensibility, and maintainability across different report formats.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
|
13
|
+
|
|
14
|
+
from ..models.database import Database
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ReportMetadata:
|
|
21
|
+
"""Metadata for report generation."""
|
|
22
|
+
|
|
23
|
+
generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
24
|
+
generation_time_seconds: float = 0.0
|
|
25
|
+
source_repositories: List[str] = field(default_factory=list)
|
|
26
|
+
analysis_period_weeks: int = 0
|
|
27
|
+
start_date: Optional[datetime] = None
|
|
28
|
+
end_date: Optional[datetime] = None
|
|
29
|
+
total_commits: int = 0
|
|
30
|
+
total_developers: int = 0
|
|
31
|
+
excluded_authors: List[str] = field(default_factory=list)
|
|
32
|
+
report_version: str = "1.0.0"
|
|
33
|
+
generator_name: str = ""
|
|
34
|
+
additional_info: Dict[str, Any] = field(default_factory=dict)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class ReportData:
|
|
39
|
+
"""Standardized data container for report generation.
|
|
40
|
+
|
|
41
|
+
This class provides a unified interface for passing data to report generators,
|
|
42
|
+
ensuring all generators have access to the same data structure.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# Core data
|
|
46
|
+
commits: List[Dict[str, Any]] = field(default_factory=list)
|
|
47
|
+
pull_requests: List[Dict[str, Any]] = field(default_factory=list)
|
|
48
|
+
developer_stats: List[Dict[str, Any]] = field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
# Analysis results
|
|
51
|
+
activity_data: List[Dict[str, Any]] = field(default_factory=list)
|
|
52
|
+
focus_data: List[Dict[str, Any]] = field(default_factory=list)
|
|
53
|
+
insights_data: List[Dict[str, Any]] = field(default_factory=list)
|
|
54
|
+
ticket_analysis: Dict[str, Any] = field(default_factory=dict)
|
|
55
|
+
|
|
56
|
+
# Metrics
|
|
57
|
+
pr_metrics: Dict[str, Any] = field(default_factory=dict)
|
|
58
|
+
dora_metrics: Dict[str, Any] = field(default_factory=dict)
|
|
59
|
+
branch_health_metrics: List[Dict[str, Any]] = field(default_factory=list)
|
|
60
|
+
|
|
61
|
+
# Project management data
|
|
62
|
+
pm_data: Optional[Dict[str, Any]] = None
|
|
63
|
+
story_points_data: Optional[Dict[str, Any]] = None
|
|
64
|
+
|
|
65
|
+
# Qualitative analysis
|
|
66
|
+
qualitative_results: List[Dict[str, Any]] = field(default_factory=list)
|
|
67
|
+
chatgpt_summary: Optional[str] = None
|
|
68
|
+
|
|
69
|
+
# Metadata
|
|
70
|
+
metadata: ReportMetadata = field(default_factory=ReportMetadata)
|
|
71
|
+
|
|
72
|
+
# Configuration
|
|
73
|
+
config: Dict[str, Any] = field(default_factory=dict)
|
|
74
|
+
|
|
75
|
+
def get_required_fields(self) -> Set[str]:
|
|
76
|
+
"""Get the set of required fields for basic report generation."""
|
|
77
|
+
return {"commits", "developer_stats"}
|
|
78
|
+
|
|
79
|
+
def validate(self) -> bool:
|
|
80
|
+
"""Validate that required data is present and properly formatted."""
|
|
81
|
+
# Check required fields
|
|
82
|
+
for field_name in self.get_required_fields():
|
|
83
|
+
if not getattr(self, field_name, None):
|
|
84
|
+
logger.warning(f"Required field '{field_name}' is empty or missing")
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
# Validate commits have required fields
|
|
88
|
+
if self.commits:
|
|
89
|
+
required_commit_fields = {"hash", "author_email", "timestamp"}
|
|
90
|
+
sample_commit = self.commits[0]
|
|
91
|
+
missing_fields = required_commit_fields - set(sample_commit.keys())
|
|
92
|
+
if missing_fields:
|
|
93
|
+
logger.warning(f"Commits missing required fields: {missing_fields}")
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class ReportOutput:
|
|
101
|
+
"""Container for report generation output."""
|
|
102
|
+
|
|
103
|
+
success: bool
|
|
104
|
+
file_path: Optional[Path] = None
|
|
105
|
+
content: Optional[Union[str, bytes]] = None
|
|
106
|
+
format: str = ""
|
|
107
|
+
size_bytes: int = 0
|
|
108
|
+
errors: List[str] = field(default_factory=list)
|
|
109
|
+
warnings: List[str] = field(default_factory=list)
|
|
110
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class BaseReportGenerator(ABC):
|
|
114
|
+
"""Abstract base class for all report generators.
|
|
115
|
+
|
|
116
|
+
This class defines the interface that all report generators must implement,
|
|
117
|
+
ensuring consistency across different report formats.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
anonymize: bool = False,
|
|
123
|
+
exclude_authors: Optional[List[str]] = None,
|
|
124
|
+
identity_resolver: Optional[Any] = None,
|
|
125
|
+
config: Optional[Dict[str, Any]] = None
|
|
126
|
+
):
|
|
127
|
+
"""Initialize the report generator.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
anonymize: Whether to anonymize developer identities
|
|
131
|
+
exclude_authors: List of authors to exclude from reports
|
|
132
|
+
identity_resolver: Identity resolver for consolidating developer identities
|
|
133
|
+
config: Additional configuration options
|
|
134
|
+
"""
|
|
135
|
+
self.anonymize = anonymize
|
|
136
|
+
self.exclude_authors = exclude_authors or []
|
|
137
|
+
self.identity_resolver = identity_resolver
|
|
138
|
+
self.config = config or {}
|
|
139
|
+
self._anonymization_map: Dict[str, str] = {}
|
|
140
|
+
self._anonymous_counter = 0
|
|
141
|
+
|
|
142
|
+
# Set up logging
|
|
143
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
|
144
|
+
|
|
145
|
+
@abstractmethod
|
|
146
|
+
def generate(self, data: ReportData, output_path: Optional[Path] = None) -> ReportOutput:
|
|
147
|
+
"""Generate the report.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
data: Standardized report data
|
|
151
|
+
output_path: Optional path to write the report to
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
ReportOutput containing the results of generation
|
|
155
|
+
"""
|
|
156
|
+
pass
|
|
157
|
+
|
|
158
|
+
@abstractmethod
|
|
159
|
+
def get_required_fields(self) -> List[str]:
|
|
160
|
+
"""Get the list of required data fields for this report generator.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
List of field names that must be present in ReportData
|
|
164
|
+
"""
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
@abstractmethod
|
|
168
|
+
def get_format_type(self) -> str:
|
|
169
|
+
"""Get the format type this generator produces.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Format identifier (e.g., 'csv', 'markdown', 'json', 'html')
|
|
173
|
+
"""
|
|
174
|
+
pass
|
|
175
|
+
|
|
176
|
+
def validate_data(self, data: ReportData) -> bool:
|
|
177
|
+
"""Validate that the required data is present and properly formatted.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
data: Report data to validate
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
True if data is valid, False otherwise
|
|
184
|
+
"""
|
|
185
|
+
required_fields = self.get_required_fields()
|
|
186
|
+
|
|
187
|
+
for field_name in required_fields:
|
|
188
|
+
if not hasattr(data, field_name):
|
|
189
|
+
self.logger.error(f"Missing required field: {field_name}")
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
field_value = getattr(data, field_name)
|
|
193
|
+
if field_value is None:
|
|
194
|
+
self.logger.error(f"Required field '{field_name}' is None")
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
# Check if collections are empty when they shouldn't be
|
|
198
|
+
if isinstance(field_value, (list, dict)) and not field_value:
|
|
199
|
+
if field_name in ["commits", "developer_stats"]: # Core required fields
|
|
200
|
+
self.logger.error(f"Required field '{field_name}' is empty")
|
|
201
|
+
return False
|
|
202
|
+
|
|
203
|
+
return True
|
|
204
|
+
|
|
205
|
+
def pre_process(self, data: ReportData) -> ReportData:
|
|
206
|
+
"""Pre-process data before report generation.
|
|
207
|
+
|
|
208
|
+
This method can be overridden by subclasses to perform any necessary
|
|
209
|
+
data transformation or filtering before the main generation logic.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
data: Input report data
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Processed report data
|
|
216
|
+
"""
|
|
217
|
+
# Apply author exclusions if configured
|
|
218
|
+
if self.exclude_authors:
|
|
219
|
+
data = self._filter_excluded_authors(data)
|
|
220
|
+
|
|
221
|
+
# Apply anonymization if configured
|
|
222
|
+
if self.anonymize:
|
|
223
|
+
data = self._anonymize_data(data)
|
|
224
|
+
|
|
225
|
+
return data
|
|
226
|
+
|
|
227
|
+
def post_process(self, output: ReportOutput) -> ReportOutput:
|
|
228
|
+
"""Post-process the report output.
|
|
229
|
+
|
|
230
|
+
This method can be overridden by subclasses to perform any necessary
|
|
231
|
+
post-processing on the generated report.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
output: Initial report output
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Processed report output
|
|
238
|
+
"""
|
|
239
|
+
return output
|
|
240
|
+
|
|
241
|
+
def _filter_excluded_authors(self, data: ReportData) -> ReportData:
|
|
242
|
+
"""Filter out excluded authors from the report data.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
data: Input report data
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Filtered report data
|
|
249
|
+
"""
|
|
250
|
+
if not self.exclude_authors:
|
|
251
|
+
return data
|
|
252
|
+
|
|
253
|
+
excluded_lower = [author.lower() for author in self.exclude_authors]
|
|
254
|
+
|
|
255
|
+
# Filter commits
|
|
256
|
+
if data.commits:
|
|
257
|
+
data.commits = [
|
|
258
|
+
commit for commit in data.commits
|
|
259
|
+
if not self._should_exclude_author(commit, excluded_lower)
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
# Filter developer stats
|
|
263
|
+
if data.developer_stats:
|
|
264
|
+
data.developer_stats = [
|
|
265
|
+
dev for dev in data.developer_stats
|
|
266
|
+
if not self._should_exclude_developer(dev, excluded_lower)
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
# Update other data structures as needed
|
|
270
|
+
for field_name in ["activity_data", "focus_data", "insights_data"]:
|
|
271
|
+
field_value = getattr(data, field_name, None)
|
|
272
|
+
if field_value:
|
|
273
|
+
filtered = [
|
|
274
|
+
item for item in field_value
|
|
275
|
+
if not self._should_exclude_item(item, excluded_lower)
|
|
276
|
+
]
|
|
277
|
+
setattr(data, field_name, filtered)
|
|
278
|
+
|
|
279
|
+
return data
|
|
280
|
+
|
|
281
|
+
def _should_exclude_author(self, commit: Dict[str, Any], excluded_lower: List[str]) -> bool:
|
|
282
|
+
"""Check if a commit author should be excluded.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
commit: Commit data
|
|
286
|
+
excluded_lower: Lowercase list of excluded authors
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
True if author should be excluded
|
|
290
|
+
"""
|
|
291
|
+
# Check canonical_id first
|
|
292
|
+
canonical_id = commit.get("canonical_id", "")
|
|
293
|
+
if canonical_id and canonical_id.lower() in excluded_lower:
|
|
294
|
+
return True
|
|
295
|
+
|
|
296
|
+
# Check other identity fields
|
|
297
|
+
for field in ["author_email", "author_name", "author"]:
|
|
298
|
+
value = commit.get(field, "")
|
|
299
|
+
if value and value.lower() in excluded_lower:
|
|
300
|
+
return True
|
|
301
|
+
|
|
302
|
+
# Check for bot patterns
|
|
303
|
+
author_name = commit.get("author_name", "").lower()
|
|
304
|
+
author_email = commit.get("author_email", "").lower()
|
|
305
|
+
|
|
306
|
+
bot_indicators = ["[bot]", "bot@", "-bot", "_bot", ".bot"]
|
|
307
|
+
for indicator in bot_indicators:
|
|
308
|
+
if indicator in author_name or indicator in author_email:
|
|
309
|
+
return True
|
|
310
|
+
|
|
311
|
+
return False
|
|
312
|
+
|
|
313
|
+
def _should_exclude_developer(self, dev: Dict[str, Any], excluded_lower: List[str]) -> bool:
|
|
314
|
+
"""Check if a developer should be excluded.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
dev: Developer data
|
|
318
|
+
excluded_lower: Lowercase list of excluded authors
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
True if developer should be excluded
|
|
322
|
+
"""
|
|
323
|
+
# Check various identity fields
|
|
324
|
+
identity_fields = [
|
|
325
|
+
"canonical_id", "primary_email", "primary_name",
|
|
326
|
+
"developer", "author", "name", "display_name"
|
|
327
|
+
]
|
|
328
|
+
|
|
329
|
+
for field in identity_fields:
|
|
330
|
+
value = dev.get(field, "")
|
|
331
|
+
if value and value.lower() in excluded_lower:
|
|
332
|
+
return True
|
|
333
|
+
|
|
334
|
+
return False
|
|
335
|
+
|
|
336
|
+
def _should_exclude_item(self, item: Dict[str, Any], excluded_lower: List[str]) -> bool:
|
|
337
|
+
"""Generic exclusion check for data items.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
item: Data item to check
|
|
341
|
+
excluded_lower: Lowercase list of excluded authors
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
True if item should be excluded
|
|
345
|
+
"""
|
|
346
|
+
# Try common identity fields
|
|
347
|
+
identity_fields = [
|
|
348
|
+
"canonical_id", "developer", "author", "author_email",
|
|
349
|
+
"primary_email", "name", "display_name"
|
|
350
|
+
]
|
|
351
|
+
|
|
352
|
+
for field in identity_fields:
|
|
353
|
+
value = item.get(field, "")
|
|
354
|
+
if value and value.lower() in excluded_lower:
|
|
355
|
+
return True
|
|
356
|
+
|
|
357
|
+
return False
|
|
358
|
+
|
|
359
|
+
def _anonymize_data(self, data: ReportData) -> ReportData:
|
|
360
|
+
"""Anonymize developer identities in the report data.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
data: Input report data
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Anonymized report data
|
|
367
|
+
"""
|
|
368
|
+
# Anonymize commits
|
|
369
|
+
if data.commits:
|
|
370
|
+
for commit in data.commits:
|
|
371
|
+
self._anonymize_commit(commit)
|
|
372
|
+
|
|
373
|
+
# Anonymize developer stats
|
|
374
|
+
if data.developer_stats:
|
|
375
|
+
for dev in data.developer_stats:
|
|
376
|
+
self._anonymize_developer(dev)
|
|
377
|
+
|
|
378
|
+
# Anonymize other data structures
|
|
379
|
+
for field_name in ["activity_data", "focus_data", "insights_data"]:
|
|
380
|
+
field_value = getattr(data, field_name, None)
|
|
381
|
+
if field_value:
|
|
382
|
+
for item in field_value:
|
|
383
|
+
self._anonymize_item(item)
|
|
384
|
+
|
|
385
|
+
return data
|
|
386
|
+
|
|
387
|
+
def _anonymize_commit(self, commit: Dict[str, Any]) -> None:
|
|
388
|
+
"""Anonymize a commit record in-place.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
commit: Commit data to anonymize
|
|
392
|
+
"""
|
|
393
|
+
for field in ["author_name", "author_email", "canonical_id"]:
|
|
394
|
+
if field in commit:
|
|
395
|
+
commit[field] = self._get_anonymous_name(commit[field])
|
|
396
|
+
|
|
397
|
+
def _anonymize_developer(self, dev: Dict[str, Any]) -> None:
|
|
398
|
+
"""Anonymize a developer record in-place.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
dev: Developer data to anonymize
|
|
402
|
+
"""
|
|
403
|
+
identity_fields = [
|
|
404
|
+
"canonical_id", "primary_email", "primary_name",
|
|
405
|
+
"developer", "author", "name", "display_name"
|
|
406
|
+
]
|
|
407
|
+
|
|
408
|
+
for field in identity_fields:
|
|
409
|
+
if field in dev:
|
|
410
|
+
dev[field] = self._get_anonymous_name(dev[field])
|
|
411
|
+
|
|
412
|
+
def _anonymize_item(self, item: Dict[str, Any]) -> None:
|
|
413
|
+
"""Anonymize a generic data item in-place.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
item: Data item to anonymize
|
|
417
|
+
"""
|
|
418
|
+
identity_fields = [
|
|
419
|
+
"canonical_id", "developer", "author", "author_email",
|
|
420
|
+
"primary_email", "name", "display_name", "author_name"
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
for field in identity_fields:
|
|
424
|
+
if field in item:
|
|
425
|
+
item[field] = self._get_anonymous_name(item[field])
|
|
426
|
+
|
|
427
|
+
def _get_anonymous_name(self, original: str) -> str:
|
|
428
|
+
"""Get an anonymous name for a given original name.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
original: Original name to anonymize
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Anonymous name
|
|
435
|
+
"""
|
|
436
|
+
if not original:
|
|
437
|
+
return original
|
|
438
|
+
|
|
439
|
+
if original not in self._anonymization_map:
|
|
440
|
+
self._anonymous_counter += 1
|
|
441
|
+
self._anonymization_map[original] = f"Developer{self._anonymous_counter:03d}"
|
|
442
|
+
|
|
443
|
+
return self._anonymization_map[original]
|
|
444
|
+
|
|
445
|
+
def write_to_file(self, content: Union[str, bytes], output_path: Path) -> None:
|
|
446
|
+
"""Write report content to a file.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
content: Report content to write
|
|
450
|
+
output_path: Path to write to
|
|
451
|
+
"""
|
|
452
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
453
|
+
|
|
454
|
+
if isinstance(content, bytes):
|
|
455
|
+
output_path.write_bytes(content)
|
|
456
|
+
else:
|
|
457
|
+
output_path.write_text(content, encoding="utf-8")
|
|
458
|
+
|
|
459
|
+
self.logger.info(f"Report written to {output_path}")
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
class CompositeReportGenerator(BaseReportGenerator):
|
|
463
|
+
"""Generator that can produce multiple report formats in a single run."""
|
|
464
|
+
|
|
465
|
+
def __init__(self, generators: List[BaseReportGenerator], **kwargs):
|
|
466
|
+
"""Initialize composite generator with multiple sub-generators.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
generators: List of report generators to compose
|
|
470
|
+
**kwargs: Additional arguments passed to base class
|
|
471
|
+
"""
|
|
472
|
+
super().__init__(**kwargs)
|
|
473
|
+
self.generators = generators
|
|
474
|
+
|
|
475
|
+
def generate(self, data: ReportData, output_path: Optional[Path] = None) -> ReportOutput:
|
|
476
|
+
"""Generate reports using all configured generators.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
data: Report data
|
|
480
|
+
output_path: Base output path (will be modified per generator)
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
Composite report output
|
|
484
|
+
"""
|
|
485
|
+
outputs = []
|
|
486
|
+
errors = []
|
|
487
|
+
warnings = []
|
|
488
|
+
|
|
489
|
+
for generator in self.generators:
|
|
490
|
+
try:
|
|
491
|
+
# Determine output path for this generator
|
|
492
|
+
gen_output_path = None
|
|
493
|
+
if output_path:
|
|
494
|
+
suffix = self._get_suffix_for_format(generator.get_format_type())
|
|
495
|
+
gen_output_path = output_path.with_suffix(suffix)
|
|
496
|
+
|
|
497
|
+
# Generate report
|
|
498
|
+
output = generator.generate(data, gen_output_path)
|
|
499
|
+
outputs.append(output)
|
|
500
|
+
|
|
501
|
+
# Collect errors and warnings
|
|
502
|
+
errors.extend(output.errors)
|
|
503
|
+
warnings.extend(output.warnings)
|
|
504
|
+
|
|
505
|
+
except Exception as e:
|
|
506
|
+
self.logger.error(f"Error in {generator.__class__.__name__}: {e}")
|
|
507
|
+
errors.append(f"{generator.__class__.__name__}: {str(e)}")
|
|
508
|
+
|
|
509
|
+
# Create composite output
|
|
510
|
+
return ReportOutput(
|
|
511
|
+
success=all(o.success for o in outputs),
|
|
512
|
+
errors=errors,
|
|
513
|
+
warnings=warnings,
|
|
514
|
+
metadata={"outputs": outputs}
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
def get_required_fields(self) -> List[str]:
|
|
518
|
+
"""Get union of all required fields from sub-generators."""
|
|
519
|
+
required = set()
|
|
520
|
+
for generator in self.generators:
|
|
521
|
+
required.update(generator.get_required_fields())
|
|
522
|
+
return list(required)
|
|
523
|
+
|
|
524
|
+
def get_format_type(self) -> str:
|
|
525
|
+
"""Get composite format type."""
|
|
526
|
+
formats = [g.get_format_type() for g in self.generators]
|
|
527
|
+
return f"composite[{','.join(formats)}]"
|
|
528
|
+
|
|
529
|
+
def _get_suffix_for_format(self, format_type: str) -> str:
|
|
530
|
+
"""Get file suffix for a given format type.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
format_type: Format type identifier
|
|
534
|
+
|
|
535
|
+
Returns:
|
|
536
|
+
File suffix including dot
|
|
537
|
+
"""
|
|
538
|
+
suffix_map = {
|
|
539
|
+
"csv": ".csv",
|
|
540
|
+
"markdown": ".md",
|
|
541
|
+
"json": ".json",
|
|
542
|
+
"html": ".html",
|
|
543
|
+
"xml": ".xml",
|
|
544
|
+
"yaml": ".yaml",
|
|
545
|
+
"pdf": ".pdf"
|
|
546
|
+
}
|
|
547
|
+
return suffix_map.get(format_type, f".{format_type}")
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
class ChainedReportGenerator(BaseReportGenerator):
|
|
551
|
+
"""Generator that chains multiple generators, passing output of one as input to the next."""
|
|
552
|
+
|
|
553
|
+
def __init__(self, generators: List[BaseReportGenerator], **kwargs):
|
|
554
|
+
"""Initialize chained generator.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
generators: Ordered list of generators to chain
|
|
558
|
+
**kwargs: Additional arguments passed to base class
|
|
559
|
+
"""
|
|
560
|
+
super().__init__(**kwargs)
|
|
561
|
+
self.generators = generators
|
|
562
|
+
|
|
563
|
+
def generate(self, data: ReportData, output_path: Optional[Path] = None) -> ReportOutput:
|
|
564
|
+
"""Generate reports in sequence, chaining outputs.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
data: Initial report data
|
|
568
|
+
output_path: Final output path
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
Final report output
|
|
572
|
+
"""
|
|
573
|
+
current_data = data
|
|
574
|
+
outputs = []
|
|
575
|
+
|
|
576
|
+
for i, generator in enumerate(self.generators):
|
|
577
|
+
try:
|
|
578
|
+
# Generate report
|
|
579
|
+
is_last = (i == len(self.generators) - 1)
|
|
580
|
+
gen_output_path = output_path if is_last else None
|
|
581
|
+
|
|
582
|
+
output = generator.generate(current_data, gen_output_path)
|
|
583
|
+
outputs.append(output)
|
|
584
|
+
|
|
585
|
+
if not output.success:
|
|
586
|
+
return ReportOutput(
|
|
587
|
+
success=False,
|
|
588
|
+
errors=[f"Chain broken at {generator.__class__.__name__}"] + output.errors,
|
|
589
|
+
metadata={"completed_steps": outputs}
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
# Transform output to input for next generator if not last
|
|
593
|
+
if not is_last and output.content:
|
|
594
|
+
current_data = self._transform_output_to_input(output, current_data)
|
|
595
|
+
|
|
596
|
+
except Exception as e:
|
|
597
|
+
self.logger.error(f"Error in chain at {generator.__class__.__name__}: {e}")
|
|
598
|
+
return ReportOutput(
|
|
599
|
+
success=False,
|
|
600
|
+
errors=[f"Chain error at {generator.__class__.__name__}: {str(e)}"],
|
|
601
|
+
metadata={"completed_steps": outputs}
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
# Return the final output
|
|
605
|
+
return outputs[-1] if outputs else ReportOutput(success=False, errors=["No generators in chain"])
|
|
606
|
+
|
|
607
|
+
def get_required_fields(self) -> List[str]:
|
|
608
|
+
"""Get required fields from first generator in chain."""
|
|
609
|
+
return self.generators[0].get_required_fields() if self.generators else []
|
|
610
|
+
|
|
611
|
+
def get_format_type(self) -> str:
|
|
612
|
+
"""Get format type of final generator in chain."""
|
|
613
|
+
return self.generators[-1].get_format_type() if self.generators else "unknown"
|
|
614
|
+
|
|
615
|
+
def _transform_output_to_input(self, output: ReportOutput, original_data: ReportData) -> ReportData:
|
|
616
|
+
"""Transform generator output to input for next generator.
|
|
617
|
+
|
|
618
|
+
Args:
|
|
619
|
+
output: Output from previous generator
|
|
620
|
+
original_data: Original input data
|
|
621
|
+
|
|
622
|
+
Returns:
|
|
623
|
+
Transformed data for next generator
|
|
624
|
+
"""
|
|
625
|
+
# Default implementation: add output content to additional data
|
|
626
|
+
new_data = ReportData(
|
|
627
|
+
commits=original_data.commits,
|
|
628
|
+
pull_requests=original_data.pull_requests,
|
|
629
|
+
developer_stats=original_data.developer_stats,
|
|
630
|
+
activity_data=original_data.activity_data,
|
|
631
|
+
focus_data=original_data.focus_data,
|
|
632
|
+
insights_data=original_data.insights_data,
|
|
633
|
+
ticket_analysis=original_data.ticket_analysis,
|
|
634
|
+
pr_metrics=original_data.pr_metrics,
|
|
635
|
+
dora_metrics=original_data.dora_metrics,
|
|
636
|
+
branch_health_metrics=original_data.branch_health_metrics,
|
|
637
|
+
pm_data=original_data.pm_data,
|
|
638
|
+
story_points_data=original_data.story_points_data,
|
|
639
|
+
qualitative_results=original_data.qualitative_results,
|
|
640
|
+
chatgpt_summary=original_data.chatgpt_summary,
|
|
641
|
+
metadata=original_data.metadata,
|
|
642
|
+
config=original_data.config
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
# Add previous output to config for next generator
|
|
646
|
+
new_data.config["previous_output"] = output
|
|
647
|
+
|
|
648
|
+
return new_data
|