gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,506 @@
1
+ """Domain classifier for identifying technical domains of commits."""
2
+
3
+ import importlib.util
4
+ import logging
5
+ import re
6
+ from collections import defaultdict
7
+ from typing import Any
8
+
9
+ from ..models.schemas import DomainConfig
10
+
11
+ # Check if spacy is available without importing it
12
+ SPACY_AVAILABLE = importlib.util.find_spec("spacy") is not None
13
+
14
+ if SPACY_AVAILABLE:
15
+ from spacy.tokens import Doc
16
+ else:
17
+ Doc = Any
18
+
19
+
20
+ class DomainClassifier:
21
+ """Classify commits by technical domain (frontend, backend, etc.).
22
+
23
+ This classifier determines the technical domain or business area
24
+ affected by a commit by analyzing both the commit message content
25
+ and the patterns of files that were changed.
26
+
27
+ Domains identified:
28
+ - frontend: UI/UX, client-side code
29
+ - backend: Server-side logic, APIs
30
+ - database: Data models, migrations, queries
31
+ - infrastructure: Deployment, configuration, DevOps
32
+ - mobile: Mobile app development
33
+ - devops: CI/CD, build tools, automation
34
+ """
35
+
36
+ def __init__(self, config: DomainConfig):
37
+ """Initialize domain classifier.
38
+
39
+ Args:
40
+ config: Configuration for domain classification
41
+ """
42
+ self.config = config
43
+ self.logger = logging.getLogger(__name__)
44
+
45
+ # Compile file patterns for efficient matching
46
+ self._compile_file_patterns()
47
+
48
+ # Keyword patterns for semantic analysis
49
+ self.keyword_patterns = config.keyword_patterns
50
+
51
+ # Directory patterns that strongly indicate domains
52
+ self.directory_indicators = {
53
+ "frontend": {
54
+ "src/components",
55
+ "src/pages",
56
+ "src/views",
57
+ "public",
58
+ "assets",
59
+ "static",
60
+ "styles",
61
+ "css",
62
+ "scss",
63
+ "ui",
64
+ "components",
65
+ "pages",
66
+ },
67
+ "backend": {
68
+ "src/controllers",
69
+ "src/services",
70
+ "src/api",
71
+ "api",
72
+ "server",
73
+ "controllers",
74
+ "services",
75
+ "handlers",
76
+ "routes",
77
+ "middleware",
78
+ },
79
+ "database": {
80
+ "migrations",
81
+ "models",
82
+ "schemas",
83
+ "seeds",
84
+ "data",
85
+ "sql",
86
+ "database",
87
+ "db",
88
+ "repositories",
89
+ },
90
+ "infrastructure": {
91
+ "terraform",
92
+ "ansible",
93
+ "k8s",
94
+ "kubernetes",
95
+ "helm",
96
+ "charts",
97
+ "infrastructure",
98
+ "deploy",
99
+ "deployment",
100
+ "ops",
101
+ },
102
+ "mobile": {
103
+ "android",
104
+ "ios",
105
+ "mobile",
106
+ "app",
107
+ "native",
108
+ "react-native",
109
+ "flutter",
110
+ "swift",
111
+ "kotlin",
112
+ },
113
+ "devops": {
114
+ ".github",
115
+ ".gitlab",
116
+ "ci",
117
+ "cd",
118
+ "scripts",
119
+ "build",
120
+ "docker",
121
+ "jenkins",
122
+ "actions",
123
+ "workflows",
124
+ },
125
+ }
126
+
127
+ # Technology stack indicators
128
+ self.tech_indicators = {
129
+ "frontend": {
130
+ "react",
131
+ "vue",
132
+ "angular",
133
+ "svelte",
134
+ "jquery",
135
+ "bootstrap",
136
+ "tailwind",
137
+ "css",
138
+ "html",
139
+ "javascript",
140
+ "typescript",
141
+ "jsx",
142
+ "tsx",
143
+ },
144
+ "backend": {
145
+ "django",
146
+ "flask",
147
+ "fastapi",
148
+ "express",
149
+ "spring",
150
+ "rails",
151
+ "laravel",
152
+ "api",
153
+ "endpoint",
154
+ "service",
155
+ "controller",
156
+ },
157
+ "database": {
158
+ "mysql",
159
+ "postgresql",
160
+ "mongodb",
161
+ "redis",
162
+ "elasticsearch",
163
+ "migration",
164
+ "schema",
165
+ "query",
166
+ "orm",
167
+ "sql",
168
+ },
169
+ "infrastructure": {
170
+ "aws",
171
+ "gcp",
172
+ "azure",
173
+ "docker",
174
+ "kubernetes",
175
+ "terraform",
176
+ "ansible",
177
+ "helm",
178
+ "nginx",
179
+ "apache",
180
+ },
181
+ "mobile": {
182
+ "android",
183
+ "ios",
184
+ "swift",
185
+ "kotlin",
186
+ "flutter",
187
+ "react-native",
188
+ "xamarin",
189
+ "cordova",
190
+ "ionic",
191
+ },
192
+ "devops": {
193
+ "jenkins",
194
+ "gitlab-ci",
195
+ "github-actions",
196
+ "circleci",
197
+ "travis",
198
+ "docker",
199
+ "kubernetes",
200
+ "helm",
201
+ "terraform",
202
+ },
203
+ }
204
+
205
+ def _compile_file_patterns(self) -> None:
206
+ """Compile file extension patterns for efficient matching."""
207
+ self.compiled_file_patterns = {}
208
+
209
+ for domain, patterns in self.config.file_patterns.items():
210
+ compiled_patterns = []
211
+ for pattern in patterns:
212
+ try:
213
+ # Convert glob patterns to regex
214
+ regex_pattern = self._glob_to_regex(pattern)
215
+ compiled_patterns.append(re.compile(regex_pattern, re.IGNORECASE))
216
+ except re.error as e:
217
+ self.logger.warning(
218
+ f"Invalid file pattern '{pattern}' for domain {domain}: {e}"
219
+ )
220
+
221
+ self.compiled_file_patterns[domain] = compiled_patterns
222
+
223
+ def _glob_to_regex(self, pattern: str) -> str:
224
+ """Convert glob pattern to regex.
225
+
226
+ Args:
227
+ pattern: Glob pattern (e.g., '*.js', '**/models/**')
228
+
229
+ Returns:
230
+ Equivalent regex pattern
231
+ """
232
+ # Simple glob to regex conversion
233
+ pattern = pattern.replace(".", r"\.")
234
+ pattern = pattern.replace("*", ".*")
235
+ pattern = pattern.replace("?", ".")
236
+ pattern = f"^{pattern}$"
237
+ return pattern
238
+
239
+ def classify(self, message: str, doc: Doc, files: list[str]) -> tuple[str, float]:
240
+ """Classify commit domain with confidence score.
241
+
242
+ Args:
243
+ message: Commit message
244
+ doc: spaCy processed document (may be None)
245
+ files: List of changed files
246
+
247
+ Returns:
248
+ Tuple of (domain, confidence_score)
249
+ """
250
+ if not message and not files:
251
+ return "unknown", 0.0
252
+
253
+ # Analyze file patterns (primary signal)
254
+ file_scores = self._analyze_file_patterns(files)
255
+
256
+ # Analyze directory patterns
257
+ dir_scores = self._analyze_directory_patterns(files)
258
+
259
+ # Analyze message content
260
+ message_scores = self._analyze_message_content(message, doc)
261
+
262
+ # Combine all signals
263
+ combined_scores = self._combine_domain_scores(file_scores, dir_scores, message_scores)
264
+
265
+ if not combined_scores:
266
+ return "unknown", 0.0
267
+
268
+ # Select best domain
269
+ best_domain = max(combined_scores.keys(), key=lambda k: combined_scores[k])
270
+ confidence = combined_scores[best_domain]
271
+
272
+ # Apply confidence threshold
273
+ if confidence < self.config.min_confidence:
274
+ return "unknown", confidence
275
+
276
+ return best_domain, confidence
277
+
278
+ def _analyze_file_patterns(self, files: list[str]) -> dict[str, float]:
279
+ """Analyze file patterns to determine domain.
280
+
281
+ Args:
282
+ files: List of file paths
283
+
284
+ Returns:
285
+ Dictionary of domain -> confidence_score
286
+ """
287
+ if not files:
288
+ return {}
289
+
290
+ domain_matches = defaultdict(int)
291
+
292
+ for file_path in files:
293
+ for domain, patterns in self.compiled_file_patterns.items():
294
+ for pattern in patterns:
295
+ if pattern.search(file_path):
296
+ domain_matches[domain] += 1
297
+ break # Don't double-count same file for same domain
298
+
299
+ # Convert to confidence scores
300
+ scores = {}
301
+ total_files = len(files)
302
+
303
+ for domain, matches in domain_matches.items():
304
+ # Confidence based on proportion of matching files
305
+ confidence = matches / total_files
306
+ scores[domain] = min(1.0, confidence * 2) # Boost confidence for strong signals
307
+
308
+ return scores
309
+
310
+ def _analyze_directory_patterns(self, files: list[str]) -> dict[str, float]:
311
+ """Analyze directory patterns for domain signals.
312
+
313
+ Args:
314
+ files: List of file paths
315
+
316
+ Returns:
317
+ Dictionary of domain -> confidence_score
318
+ """
319
+ if not files:
320
+ return {}
321
+
322
+ domain_scores = defaultdict(float)
323
+
324
+ for file_path in files:
325
+ # Normalize path separators and convert to lowercase
326
+ normalized_path = file_path.replace("\\", "/").lower()
327
+ path_parts = normalized_path.split("/")
328
+
329
+ # Check each domain's directory indicators
330
+ for domain, indicators in self.directory_indicators.items():
331
+ for indicator in indicators:
332
+ # Check if indicator appears in any part of the path
333
+ if any(indicator in part for part in path_parts):
334
+ domain_scores[domain] += 1.0
335
+ break
336
+ # Also check full path contains indicator
337
+ elif indicator in normalized_path:
338
+ domain_scores[domain] += 0.5
339
+
340
+ # Normalize scores
341
+ scores = {}
342
+ max_score = max(domain_scores.values()) if domain_scores else 0
343
+
344
+ if max_score > 0:
345
+ for domain, score in domain_scores.items():
346
+ scores[domain] = min(1.0, score / max_score)
347
+
348
+ return scores
349
+
350
+ def _analyze_message_content(self, message: str, doc: Doc) -> dict[str, float]:
351
+ """Analyze commit message content for domain keywords.
352
+
353
+ Args:
354
+ message: Commit message
355
+ doc: spaCy processed document (may be None)
356
+
357
+ Returns:
358
+ Dictionary of domain -> confidence_score
359
+ """
360
+ if not message:
361
+ return {}
362
+
363
+ # Convert message to lowercase for analysis
364
+ message_lower = message.lower()
365
+
366
+ # Extract keywords from message
367
+ if SPACY_AVAILABLE and doc:
368
+ # Use spaCy for better keyword extraction
369
+ keywords = self._extract_keywords_from_doc(doc)
370
+ else:
371
+ # Fallback to simple word extraction
372
+ keywords = set(re.findall(r"\b\w+\b", message_lower))
373
+
374
+ # Score domains based on keyword matches
375
+ domain_scores = {}
376
+
377
+ for domain, domain_keywords in self.keyword_patterns.items():
378
+ keyword_matches = len(
379
+ keywords.intersection(set(word.lower() for word in domain_keywords))
380
+ )
381
+
382
+ if keyword_matches > 0:
383
+ # Base score from keyword matches
384
+ base_score = min(1.0, keyword_matches / 3.0) # Scale to 0-1
385
+
386
+ # Boost score for technology indicators
387
+ tech_keywords = self.tech_indicators.get(domain, set())
388
+ tech_matches = len(keywords.intersection(tech_keywords))
389
+ tech_boost = min(0.3, tech_matches * 0.1)
390
+
391
+ domain_scores[domain] = min(1.0, base_score + tech_boost)
392
+
393
+ return domain_scores
394
+
395
+ def _extract_keywords_from_doc(self, doc: Doc) -> set[str]:
396
+ """Extract meaningful keywords from spaCy document.
397
+
398
+ Args:
399
+ doc: spaCy processed document
400
+
401
+ Returns:
402
+ Set of extracted keywords
403
+ """
404
+ keywords = set()
405
+
406
+ for token in doc:
407
+ if (
408
+ not token.is_stop
409
+ and not token.is_punct
410
+ and len(token.text) > 2
411
+ and token.pos_ in ["NOUN", "PROPN", "ADJ", "VERB"]
412
+ ):
413
+ keywords.add(token.lemma_.lower())
414
+
415
+ # Add named entities
416
+ for ent in doc.ents:
417
+ if len(ent.text) > 2:
418
+ keywords.add(ent.text.lower())
419
+
420
+ return keywords
421
+
422
+ def _combine_domain_scores(
423
+ self,
424
+ file_scores: dict[str, float],
425
+ dir_scores: dict[str, float],
426
+ message_scores: dict[str, float],
427
+ ) -> dict[str, float]:
428
+ """Combine scores from different analysis methods.
429
+
430
+ Args:
431
+ file_scores: Scores from file pattern analysis
432
+ dir_scores: Scores from directory pattern analysis
433
+ message_scores: Scores from message content analysis
434
+
435
+ Returns:
436
+ Combined scores dictionary
437
+ """
438
+ all_domains = set(file_scores.keys()) | set(dir_scores.keys()) | set(message_scores.keys())
439
+ combined_scores = {}
440
+
441
+ # Weights for different signal types
442
+ weights = {
443
+ "file": 0.5, # File patterns are strongest signal
444
+ "directory": 0.3, # Directory patterns are also strong
445
+ "message": 0.2, # Message content provides additional context
446
+ }
447
+
448
+ for domain in all_domains:
449
+ file_score = file_scores.get(domain, 0.0)
450
+ dir_score = dir_scores.get(domain, 0.0)
451
+ message_score = message_scores.get(domain, 0.0)
452
+
453
+ # Weighted combination
454
+ combined_score = (
455
+ file_score * weights["file"]
456
+ + dir_score * weights["directory"]
457
+ + message_score * weights["message"]
458
+ )
459
+
460
+ # Bonus for multiple signal types agreeing
461
+ signal_count = sum(1 for score in [file_score, dir_score, message_score] if score > 0)
462
+ if signal_count > 1:
463
+ combined_score *= 1.0 + (signal_count - 1) * 0.1 # 10% bonus per additional signal
464
+
465
+ if combined_score > 0:
466
+ combined_scores[domain] = min(1.0, combined_score)
467
+
468
+ return combined_scores
469
+
470
+ def get_domain_statistics(self, files: list[str]) -> dict[str, Any]:
471
+ """Get detailed domain analysis statistics for debugging.
472
+
473
+ Args:
474
+ files: List of file paths
475
+
476
+ Returns:
477
+ Dictionary with detailed analysis breakdown
478
+ """
479
+ stats = {
480
+ "total_files": len(files),
481
+ "file_analysis": self._analyze_file_patterns(files),
482
+ "directory_analysis": self._analyze_directory_patterns(files),
483
+ "file_extensions": {},
484
+ "directory_breakdown": {},
485
+ }
486
+
487
+ # File extension breakdown
488
+ extensions = defaultdict(int)
489
+ directories = defaultdict(int)
490
+
491
+ for file_path in files:
492
+ # Extract extension
493
+ if "." in file_path:
494
+ ext = file_path.split(".")[-1].lower()
495
+ extensions[ext] += 1
496
+
497
+ # Extract directories
498
+ path_parts = file_path.split("/")
499
+ for part in path_parts[:-1]: # Exclude filename
500
+ if part:
501
+ directories[part] += 1
502
+
503
+ stats["file_extensions"] = dict(extensions)
504
+ stats["directory_breakdown"] = dict(directories)
505
+
506
+ return stats