gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,389 @@
1
+ """Weekly classification trends CSV report generation."""
2
+
3
+ import logging
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ import pandas as pd
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class WeeklyTrendsWriter:
15
+ """Generate weekly classification trends CSV reports.
16
+
17
+ WHY: Week-over-week classification trends provide insights into changing
18
+ development patterns, helping identify evolving team practices, seasonal
19
+ patterns, and the impact of process changes on development work types.
20
+
21
+ DESIGN DECISION: Generate separate developer and project trend reports
22
+ to allow analysis at different granularities. Include percentage changes
23
+ to highlight velocity and pattern shifts.
24
+ """
25
+
26
+ def __init__(self) -> None:
27
+ """Initialize weekly trends writer."""
28
+ self.classification_categories = [
29
+ 'feature', 'bug_fix', 'refactor', 'documentation',
30
+ 'maintenance', 'test', 'style', 'build', 'other'
31
+ ]
32
+
33
+ def generate_weekly_trends_reports(
34
+ self,
35
+ commits: List[Dict[str, Any]],
36
+ output_dir: Path,
37
+ weeks: int = 12,
38
+ date_suffix: str = ""
39
+ ) -> Dict[str, Path]:
40
+ """Generate both developer and project weekly trends reports.
41
+
42
+ WHY: Providing both perspectives allows analysis of individual developer
43
+ patterns as well as project-level trend analysis. This enables both
44
+ personal development tracking and project health monitoring.
45
+
46
+ Args:
47
+ commits: List of commit data with classifications and timestamps
48
+ output_dir: Directory to write CSV reports to
49
+ weeks: Number of weeks to analyze (for validation)
50
+ date_suffix: Date suffix for output filenames
51
+
52
+ Returns:
53
+ Dictionary mapping report type to output file paths
54
+ """
55
+ output_paths = {}
56
+
57
+ # Generate developer trends report
58
+ developer_trends_path = output_dir / f"developer_weekly_trends{date_suffix}.csv"
59
+ self._generate_developer_weekly_trends(commits, developer_trends_path, weeks)
60
+ output_paths['developer_trends'] = developer_trends_path
61
+
62
+ # Generate project trends report
63
+ project_trends_path = output_dir / f"project_weekly_trends{date_suffix}.csv"
64
+ self._generate_project_weekly_trends(commits, project_trends_path, weeks)
65
+ output_paths['project_trends'] = project_trends_path
66
+
67
+ logger.info(f"Generated weekly trends reports: {len(output_paths)} files")
68
+ return output_paths
69
+
70
+ def _generate_developer_weekly_trends(
71
+ self,
72
+ commits: List[Dict[str, Any]],
73
+ output_path: Path,
74
+ weeks: int
75
+ ) -> None:
76
+ """Generate developer weekly classification trends CSV.
77
+
78
+ WHY: Developer-level trends help identify individual development patterns,
79
+ skill progression, and changing work focus over time. This enables
80
+ targeted coaching and recognition of evolving expertise.
81
+
82
+ Args:
83
+ commits: List of commit data with developer and classification info
84
+ output_path: Path to write the CSV file
85
+ weeks: Number of weeks for trend analysis
86
+ """
87
+ # Group commits by developer and week
88
+ developer_weeks = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
89
+
90
+ # Find the date range for analysis
91
+ if not commits:
92
+ logger.warning("No commits provided for developer weekly trends analysis")
93
+ self._write_empty_developer_trends_csv(output_path)
94
+ return
95
+
96
+ # Sort commits by timestamp for consistent week calculation
97
+ sorted_commits = sorted(
98
+ [c for c in commits if c.get('timestamp')],
99
+ key=lambda x: x['timestamp'],
100
+ reverse=True # Most recent first
101
+ )
102
+
103
+ if not sorted_commits:
104
+ logger.warning("No commits with timestamps for developer weekly trends analysis")
105
+ self._write_empty_developer_trends_csv(output_path)
106
+ return
107
+
108
+ # Calculate week boundaries
109
+ latest_date = sorted_commits[0]['timestamp']
110
+ if hasattr(latest_date, 'date'):
111
+ latest_date = latest_date.date()
112
+
113
+ # Group commits by developer, week, and classification
114
+ for commit in sorted_commits:
115
+ timestamp = commit.get('timestamp')
116
+ if not timestamp:
117
+ continue
118
+
119
+ # Get week number (0 = current week, 1 = last week, etc.)
120
+ if hasattr(timestamp, 'date'):
121
+ commit_date = timestamp.date()
122
+ else:
123
+ commit_date = timestamp
124
+
125
+ days_diff = (latest_date - commit_date).days
126
+ week_num = days_diff // 7
127
+
128
+ # Only include commits within the analysis period
129
+ if week_num >= weeks:
130
+ continue
131
+
132
+ # Extract developer info
133
+ developer = (
134
+ commit.get('canonical_id') or
135
+ commit.get('author_email') or
136
+ commit.get('author_name', 'Unknown')
137
+ )
138
+
139
+ # Get classification - try multiple possible fields
140
+ classification = self._get_commit_classification(commit)
141
+
142
+ # Increment count
143
+ developer_weeks[developer][week_num][classification] += 1
144
+
145
+ # Build DataFrame
146
+ rows = []
147
+ for developer, weeks_data in developer_weeks.items():
148
+ # Sort weeks in chronological order (most recent = week 0)
149
+ sorted_weeks = sorted(weeks_data.keys())
150
+
151
+ for i, week_num in enumerate(sorted_weeks):
152
+ week_data = weeks_data[week_num]
153
+
154
+ # Calculate week start date
155
+ week_start = latest_date - timedelta(days=(week_num * 7))
156
+
157
+ # Base row data
158
+ row = {
159
+ 'week_start': week_start.strftime('%Y-%m-%d'),
160
+ 'developer': developer,
161
+ 'week_number': week_num,
162
+ }
163
+
164
+ # Add counts for each classification category
165
+ total_commits = sum(week_data.values())
166
+ row['total_commits'] = total_commits
167
+
168
+ for category in self.classification_categories:
169
+ count = week_data.get(category, 0)
170
+ row[f'{category}_count'] = count
171
+
172
+ # Calculate percentage change from previous week
173
+ if i < len(sorted_weeks) - 1: # Not the oldest week
174
+ prev_week_num = sorted_weeks[i + 1]
175
+ prev_week_data = weeks_data[prev_week_num]
176
+ prev_count = prev_week_data.get(category, 0)
177
+
178
+ if prev_count > 0:
179
+ pct_change = ((count - prev_count) / prev_count) * 100
180
+ elif count > 0:
181
+ pct_change = 100.0 # New activity
182
+ else:
183
+ pct_change = 0.0
184
+ else:
185
+ pct_change = 0.0 # No previous data
186
+
187
+ row[f'{category}_pct_change'] = round(pct_change, 1)
188
+
189
+ rows.append(row)
190
+
191
+ # Create DataFrame and sort by developer and week
192
+ df = pd.DataFrame(rows)
193
+ if not df.empty:
194
+ df = df.sort_values(['developer', 'week_number'])
195
+
196
+ # Write to CSV
197
+ df.to_csv(output_path, index=False)
198
+ logger.info(f"Generated developer weekly trends CSV: {output_path} ({len(df)} rows)")
199
+
200
+ def _generate_project_weekly_trends(
201
+ self,
202
+ commits: List[Dict[str, Any]],
203
+ output_path: Path,
204
+ weeks: int
205
+ ) -> None:
206
+ """Generate project weekly classification trends CSV.
207
+
208
+ WHY: Project-level trends reveal changing development patterns within
209
+ specific codebases, helping identify technical debt accumulation,
210
+ feature development cycles, and maintenance patterns.
211
+
212
+ Args:
213
+ commits: List of commit data with project and classification info
214
+ output_path: Path to write the CSV file
215
+ weeks: Number of weeks for trend analysis
216
+ """
217
+ # Group commits by project and week
218
+ project_weeks = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
219
+
220
+ # Find the date range for analysis
221
+ if not commits:
222
+ logger.warning("No commits provided for project weekly trends analysis")
223
+ self._write_empty_project_trends_csv(output_path)
224
+ return
225
+
226
+ # Sort commits by timestamp for consistent week calculation
227
+ sorted_commits = sorted(
228
+ [c for c in commits if c.get('timestamp')],
229
+ key=lambda x: x['timestamp'],
230
+ reverse=True # Most recent first
231
+ )
232
+
233
+ if not sorted_commits:
234
+ logger.warning("No commits with timestamps for project weekly trends analysis")
235
+ self._write_empty_project_trends_csv(output_path)
236
+ return
237
+
238
+ # Calculate week boundaries
239
+ latest_date = sorted_commits[0]['timestamp']
240
+ if hasattr(latest_date, 'date'):
241
+ latest_date = latest_date.date()
242
+
243
+ # Group commits by project, week, and classification
244
+ for commit in sorted_commits:
245
+ timestamp = commit.get('timestamp')
246
+ if not timestamp:
247
+ continue
248
+
249
+ # Get week number (0 = current week, 1 = last week, etc.)
250
+ if hasattr(timestamp, 'date'):
251
+ commit_date = timestamp.date()
252
+ else:
253
+ commit_date = timestamp
254
+
255
+ days_diff = (latest_date - commit_date).days
256
+ week_num = days_diff // 7
257
+
258
+ # Only include commits within the analysis period
259
+ if week_num >= weeks:
260
+ continue
261
+
262
+ # Extract project info
263
+ project = commit.get('project_key', 'UNKNOWN')
264
+
265
+ # Get classification
266
+ classification = self._get_commit_classification(commit)
267
+
268
+ # Increment count
269
+ project_weeks[project][week_num][classification] += 1
270
+
271
+ # Build DataFrame
272
+ rows = []
273
+ for project, weeks_data in project_weeks.items():
274
+ # Sort weeks in chronological order (most recent = week 0)
275
+ sorted_weeks = sorted(weeks_data.keys())
276
+
277
+ for i, week_num in enumerate(sorted_weeks):
278
+ week_data = weeks_data[week_num]
279
+
280
+ # Calculate week start date
281
+ week_start = latest_date - timedelta(days=(week_num * 7))
282
+
283
+ # Base row data
284
+ row = {
285
+ 'week_start': week_start.strftime('%Y-%m-%d'),
286
+ 'project': project,
287
+ 'week_number': week_num,
288
+ }
289
+
290
+ # Add counts for each classification category
291
+ total_commits = sum(week_data.values())
292
+ row['total_commits'] = total_commits
293
+
294
+ for category in self.classification_categories:
295
+ count = week_data.get(category, 0)
296
+ row[f'{category}_count'] = count
297
+
298
+ # Calculate percentage change from previous week
299
+ if i < len(sorted_weeks) - 1: # Not the oldest week
300
+ prev_week_num = sorted_weeks[i + 1]
301
+ prev_week_data = weeks_data[prev_week_num]
302
+ prev_count = prev_week_data.get(category, 0)
303
+
304
+ if prev_count > 0:
305
+ pct_change = ((count - prev_count) / prev_count) * 100
306
+ elif count > 0:
307
+ pct_change = 100.0 # New activity
308
+ else:
309
+ pct_change = 0.0
310
+ else:
311
+ pct_change = 0.0 # No previous data
312
+
313
+ row[f'{category}_pct_change'] = round(pct_change, 1)
314
+
315
+ rows.append(row)
316
+
317
+ # Create DataFrame and sort by project and week
318
+ df = pd.DataFrame(rows)
319
+ if not df.empty:
320
+ df = df.sort_values(['project', 'week_number'])
321
+
322
+ # Write to CSV
323
+ df.to_csv(output_path, index=False)
324
+ logger.info(f"Generated project weekly trends CSV: {output_path} ({len(df)} rows)")
325
+
326
+ def _get_commit_classification(self, commit: Dict[str, Any]) -> str:
327
+ """Extract commit classification from commit data.
328
+
329
+ WHY: Commits may have classification data in different fields depending
330
+ on the extraction method used (ML vs rule-based vs cached). This method
331
+ provides a consistent way to extract the classification.
332
+
333
+ DESIGN DECISION: Priority order for classification sources:
334
+ 1. predicted_class (from ML classification)
335
+ 2. category (from rule-based classification)
336
+ 3. 'other' (fallback for unclassified commits)
337
+
338
+ Args:
339
+ commit: Commit data dictionary
340
+
341
+ Returns:
342
+ Classification category string
343
+ """
344
+ # Try ML classification first
345
+ if commit.get('predicted_class'):
346
+ return commit['predicted_class']
347
+
348
+ # Try rule-based classification
349
+ if commit.get('category'):
350
+ return commit['category']
351
+
352
+ # Try to extract from ticket extractor categorization
353
+ if 'classification' in commit:
354
+ return commit['classification']
355
+
356
+ # Fallback to 'other'
357
+ return 'other'
358
+
359
+ def _write_empty_developer_trends_csv(self, output_path: Path) -> None:
360
+ """Write an empty developer trends CSV with proper headers.
361
+
362
+ Args:
363
+ output_path: Path to write the empty CSV file
364
+ """
365
+ columns = ['week_start', 'developer', 'week_number', 'total_commits']
366
+
367
+ # Add count and percentage change columns for each category
368
+ for category in self.classification_categories:
369
+ columns.extend([f'{category}_count', f'{category}_pct_change'])
370
+
371
+ empty_df = pd.DataFrame(columns=columns)
372
+ empty_df.to_csv(output_path, index=False)
373
+ logger.info(f"Generated empty developer weekly trends CSV: {output_path}")
374
+
375
+ def _write_empty_project_trends_csv(self, output_path: Path) -> None:
376
+ """Write an empty project trends CSV with proper headers.
377
+
378
+ Args:
379
+ output_path: Path to write the empty CSV file
380
+ """
381
+ columns = ['week_start', 'project', 'week_number', 'total_commits']
382
+
383
+ # Add count and percentage change columns for each category
384
+ for category in self.classification_categories:
385
+ columns.extend([f'{category}_count', f'{category}_pct_change'])
386
+
387
+ empty_df = pd.DataFrame(columns=columns)
388
+ empty_df.to_csv(output_path, index=False)
389
+ logger.info(f"Generated empty project weekly trends CSV: {output_path}")
@@ -0,0 +1,5 @@
1
+ """Training module for commit classification."""
2
+
3
+ from .pipeline import CommitClassificationTrainer
4
+
5
+ __all__ = ["CommitClassificationTrainer"]