gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,285 @@
1
+ """Text processing utilities for qualitative analysis."""
2
+
3
+ import hashlib
4
+ import re
5
+
6
+
7
+ class TextProcessor:
8
+ """Utility class for text preprocessing and feature extraction.
9
+
10
+ This class provides common text processing operations needed across
11
+ the qualitative analysis pipeline, including normalization, feature
12
+ extraction, and similarity calculations.
13
+ """
14
+
15
+ def __init__(self) -> None:
16
+ """Initialize text processor with common patterns."""
17
+ # Common patterns for normalization
18
+ self.url_pattern = re.compile(r"https?://[^\s]+")
19
+ self.email_pattern = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")
20
+ self.hash_pattern = re.compile(r"\b[a-f0-9]{7,40}\b") # Git hashes
21
+ self.ticket_pattern = re.compile(r"\b(?:JIRA|TICKET|ISSUE|BUG|TASK)-?\d+\b", re.IGNORECASE)
22
+
23
+ # Stop words for feature extraction
24
+ self.stop_words: set[str] = {
25
+ "the",
26
+ "a",
27
+ "an",
28
+ "and",
29
+ "or",
30
+ "but",
31
+ "in",
32
+ "on",
33
+ "at",
34
+ "to",
35
+ "for",
36
+ "of",
37
+ "with",
38
+ "by",
39
+ "is",
40
+ "are",
41
+ "was",
42
+ "were",
43
+ "be",
44
+ "been",
45
+ "being",
46
+ "have",
47
+ "has",
48
+ "had",
49
+ "do",
50
+ "does",
51
+ "did",
52
+ "will",
53
+ "would",
54
+ "could",
55
+ "should",
56
+ "may",
57
+ "might",
58
+ "can",
59
+ "this",
60
+ "that",
61
+ "these",
62
+ "those",
63
+ }
64
+
65
+ def normalize_message(self, message: str) -> str:
66
+ """Normalize commit message for consistent processing.
67
+
68
+ This method standardizes commit messages by removing URLs, emails,
69
+ hashes, and other variable content that doesn't contribute to
70
+ semantic classification.
71
+
72
+ Args:
73
+ message: Raw commit message
74
+
75
+ Returns:
76
+ Normalized message suitable for classification
77
+ """
78
+ if not message:
79
+ return ""
80
+
81
+ # Convert to lowercase for consistency
82
+ normalized = message.lower().strip()
83
+
84
+ # Remove URLs, emails, and hashes
85
+ normalized = self.url_pattern.sub("[URL]", normalized)
86
+ normalized = self.email_pattern.sub("[EMAIL]", normalized)
87
+ normalized = self.hash_pattern.sub("[HASH]", normalized)
88
+
89
+ # Normalize ticket references
90
+ normalized = self.ticket_pattern.sub("[TICKET]", normalized)
91
+
92
+ # Remove extra whitespace
93
+ normalized = re.sub(r"\s+", " ", normalized)
94
+
95
+ return normalized.strip()
96
+
97
+ def extract_keywords(self, text: str, min_length: int = 3) -> list[str]:
98
+ """Extract meaningful keywords from text.
99
+
100
+ Extracts keywords by removing stop words, punctuation, and short words
101
+ that are unlikely to be semantically meaningful.
102
+
103
+ Args:
104
+ text: Input text to extract keywords from
105
+ min_length: Minimum length for keywords
106
+
107
+ Returns:
108
+ List of extracted keywords
109
+ """
110
+ if not text:
111
+ return []
112
+
113
+ # Split into words and clean
114
+ words = re.findall(r"\b[a-zA-Z]+\b", text.lower())
115
+
116
+ # Filter stop words and short words
117
+ keywords = [
118
+ word for word in words if word not in self.stop_words and len(word) >= min_length
119
+ ]
120
+
121
+ return keywords
122
+
123
+ def create_semantic_fingerprint(self, message: str, files: list[str]) -> str:
124
+ """Create a semantic fingerprint for similarity matching.
125
+
126
+ Creates a hash-based fingerprint that captures the semantic essence
127
+ of a commit for pattern matching and caching.
128
+
129
+ Args:
130
+ message: Commit message
131
+ files: List of changed files
132
+
133
+ Returns:
134
+ Hex-encoded fingerprint string
135
+ """
136
+ # Normalize message for consistent fingerprinting
137
+ normalized_msg = self.normalize_message(message)
138
+ keywords = self.extract_keywords(normalized_msg)
139
+
140
+ # Extract file patterns (extensions, directories)
141
+ file_patterns = []
142
+ for file_path in files[:10]: # Limit to prevent huge fingerprints
143
+ # Get file extension
144
+ if "." in file_path:
145
+ ext = file_path.split(".")[-1].lower()
146
+ file_patterns.append(f"ext:{ext}")
147
+
148
+ # Get directory patterns
149
+ parts = file_path.split("/")
150
+ if len(parts) > 1:
151
+ # First directory
152
+ file_patterns.append(f"dir:{parts[0]}")
153
+ # Last directory before file
154
+ if len(parts) > 2:
155
+ file_patterns.append(f"dir:{parts[-2]}")
156
+
157
+ # Combine keywords and file patterns
158
+ semantic_elements = sorted(keywords[:10]) + sorted(set(file_patterns))
159
+
160
+ # Create fingerprint
161
+ fingerprint_text = "|".join(semantic_elements)
162
+ return hashlib.md5(fingerprint_text.encode()).hexdigest()
163
+
164
+ def calculate_message_similarity(self, msg1: str, msg2: str) -> float:
165
+ """Calculate semantic similarity between two commit messages.
166
+
167
+ Uses keyword overlap to estimate semantic similarity between
168
+ commit messages for grouping similar commits.
169
+
170
+ Args:
171
+ msg1: First commit message
172
+ msg2: Second commit message
173
+
174
+ Returns:
175
+ Similarity score between 0.0 and 1.0
176
+ """
177
+ if not msg1 or not msg2:
178
+ return 0.0
179
+
180
+ # Extract keywords from both messages
181
+ keywords1 = set(self.extract_keywords(self.normalize_message(msg1)))
182
+ keywords2 = set(self.extract_keywords(self.normalize_message(msg2)))
183
+
184
+ if not keywords1 or not keywords2:
185
+ return 0.0
186
+
187
+ # Calculate Jaccard similarity
188
+ intersection = len(keywords1.intersection(keywords2))
189
+ union = len(keywords1.union(keywords2))
190
+
191
+ return intersection / union if union > 0 else 0.0
192
+
193
+ def extract_file_patterns(self, files: list[str]) -> dict[str, int]:
194
+ """Extract file patterns for domain classification.
195
+
196
+ Analyzes file paths to extract patterns useful for determining
197
+ the technical domain of changes.
198
+
199
+ Args:
200
+ files: List of file paths
201
+
202
+ Returns:
203
+ Dictionary mapping pattern types to counts
204
+ """
205
+ patterns = {
206
+ "extensions": {},
207
+ "directories": {},
208
+ "special_files": {},
209
+ }
210
+
211
+ for file_path in files:
212
+ # File extensions
213
+ if "." in file_path:
214
+ ext = file_path.split(".")[-1].lower()
215
+ patterns["extensions"][ext] = patterns["extensions"].get(ext, 0) + 1
216
+
217
+ # Directory patterns
218
+ parts = file_path.split("/")
219
+ for part in parts[:-1]: # Exclude filename
220
+ if part: # Skip empty parts
221
+ patterns["directories"][part] = patterns["directories"].get(part, 0) + 1
222
+
223
+ # Special files
224
+ filename = parts[-1].lower()
225
+ special_files = [
226
+ "dockerfile",
227
+ "makefile",
228
+ "readme",
229
+ "license",
230
+ "changelog",
231
+ "package.json",
232
+ "requirements.txt",
233
+ "setup.py",
234
+ "pom.xml",
235
+ ]
236
+ for special in special_files:
237
+ if special in filename:
238
+ patterns["special_files"][special] = (
239
+ patterns["special_files"].get(special, 0) + 1
240
+ )
241
+
242
+ return patterns
243
+
244
+ def calculate_commit_complexity(
245
+ self, message: str, files: list[str], insertions: int, deletions: int
246
+ ) -> dict[str, float]:
247
+ """Calculate various complexity metrics for a commit.
248
+
249
+ Estimates the complexity of a commit based on message content,
250
+ file changes, and line changes to help with risk assessment.
251
+
252
+ Args:
253
+ message: Commit message
254
+ files: List of changed files
255
+ insertions: Number of lines inserted
256
+ deletions: Number of lines deleted
257
+
258
+ Returns:
259
+ Dictionary of complexity metrics
260
+ """
261
+ metrics = {}
262
+
263
+ # Message complexity (length, keywords)
264
+ metrics["message_length"] = len(message)
265
+ keywords = self.extract_keywords(message)
266
+ metrics["keyword_count"] = len(keywords)
267
+ metrics["message_complexity"] = min(1.0, len(keywords) / 10.0)
268
+
269
+ # File complexity
270
+ metrics["files_changed"] = len(files)
271
+ metrics["file_complexity"] = min(1.0, len(files) / 20.0)
272
+
273
+ # Line change complexity
274
+ total_changes = insertions + deletions
275
+ metrics["total_changes"] = total_changes
276
+ metrics["change_complexity"] = min(1.0, total_changes / 500.0)
277
+
278
+ # Overall complexity score (0.0 to 1.0)
279
+ metrics["overall_complexity"] = (
280
+ metrics["message_complexity"] * 0.2
281
+ + metrics["file_complexity"] * 0.3
282
+ + metrics["change_complexity"] * 0.5
283
+ )
284
+
285
+ return metrics
@@ -0,0 +1,100 @@
1
+ # Reports package
2
+
3
+ # Legacy imports for backward compatibility
4
+ from .analytics_writer import AnalyticsReportGenerator
5
+
6
+ # New abstraction layer components
7
+ from .base import (
8
+ BaseReportGenerator,
9
+ ChainedReportGenerator,
10
+ CompositeReportGenerator,
11
+ ReportData,
12
+ ReportMetadata,
13
+ ReportOutput,
14
+ )
15
+ from .csv_writer import CSVReportGenerator
16
+ from .data_models import (
17
+ CommitData,
18
+ CommitType,
19
+ DeveloperIdentity,
20
+ DeveloperMetrics,
21
+ DORAMetrics,
22
+ ProjectMetrics,
23
+ PullRequestData,
24
+ ReportSummary,
25
+ TicketMetrics,
26
+ WeeklyMetrics,
27
+ WorkStyle,
28
+ )
29
+ from .factory import (
30
+ ReportBuilder,
31
+ ReportFactory,
32
+ create_multiple_reports,
33
+ create_report,
34
+ get_default_factory,
35
+ )
36
+ from .formatters import (
37
+ CSVFormatter,
38
+ DateFormatter,
39
+ JSONFormatter,
40
+ MarkdownFormatter,
41
+ MetricFormatter,
42
+ NumberFormatter,
43
+ TextFormatter,
44
+ )
45
+ from .html_generator import HTMLReportGenerator
46
+ from .interfaces import ReportField, ReportFormat, ReportSchema, ReportType
47
+ from .json_exporter import ComprehensiveJSONExporter
48
+ from .narrative_writer import NarrativeReportGenerator
49
+
50
+ __all__ = [
51
+ # Legacy generators
52
+ 'CSVReportGenerator',
53
+ 'AnalyticsReportGenerator',
54
+ 'NarrativeReportGenerator',
55
+ 'ComprehensiveJSONExporter',
56
+ 'HTMLReportGenerator',
57
+
58
+ # Base classes
59
+ 'BaseReportGenerator',
60
+ 'CompositeReportGenerator',
61
+ 'ChainedReportGenerator',
62
+ 'ReportData',
63
+ 'ReportOutput',
64
+ 'ReportMetadata',
65
+
66
+ # Interfaces
67
+ 'ReportFormat',
68
+ 'ReportType',
69
+ 'ReportField',
70
+ 'ReportSchema',
71
+
72
+ # Factory
73
+ 'ReportFactory',
74
+ 'ReportBuilder',
75
+ 'create_report',
76
+ 'create_multiple_reports',
77
+ 'get_default_factory',
78
+
79
+ # Formatters
80
+ 'DateFormatter',
81
+ 'NumberFormatter',
82
+ 'TextFormatter',
83
+ 'MarkdownFormatter',
84
+ 'CSVFormatter',
85
+ 'JSONFormatter',
86
+ 'MetricFormatter',
87
+
88
+ # Data models
89
+ 'CommitData',
90
+ 'PullRequestData',
91
+ 'DeveloperMetrics',
92
+ 'ProjectMetrics',
93
+ 'WeeklyMetrics',
94
+ 'TicketMetrics',
95
+ 'DORAMetrics',
96
+ 'ReportSummary',
97
+ 'DeveloperIdentity',
98
+ 'CommitType',
99
+ 'WorkStyle'
100
+ ]