gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +612 -258
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +251 -141
  6. gitflow_analytics/core/analyzer.py +140 -103
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +240 -169
  9. gitflow_analytics/core/identity.py +210 -173
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +101 -87
  13. gitflow_analytics/integrations/github_integration.py +84 -77
  14. gitflow_analytics/integrations/jira_integration.py +116 -104
  15. gitflow_analytics/integrations/orchestrator.py +86 -85
  16. gitflow_analytics/metrics/dora.py +181 -177
  17. gitflow_analytics/models/database.py +190 -53
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +11 -4
  37. gitflow_analytics/reports/csv_writer.py +51 -31
  38. gitflow_analytics/reports/narrative_writer.py +16 -14
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  54. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  55. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  56. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  57. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,457 @@
1
+ """Pattern caching system for qualitative analysis optimization."""
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import time
7
+ from datetime import datetime, timedelta
8
+ from pathlib import Path
9
+ from typing import Dict, List, Optional, Any, Tuple
10
+ from collections import defaultdict
11
+
12
+ from sqlalchemy.orm import Session
13
+ from sqlalchemy import and_, desc
14
+
15
+ from ...models.database import Database, PatternCache as PatternCacheModel
16
+ from ..models.schemas import CacheConfig, QualitativeCommitData
17
+ from ..utils.text_processing import TextProcessor
18
+
19
+
20
+ class PatternCache:
21
+ """Intelligent caching system for qualitative analysis patterns.
22
+
23
+ This system learns from successful classifications to speed up future
24
+ processing and improve accuracy through pattern recognition.
25
+
26
+ Features:
27
+ - Semantic fingerprinting for pattern matching
28
+ - Hit count tracking for popular patterns
29
+ - Automatic cache cleanup and optimization
30
+ - Pattern learning from successful classifications
31
+ """
32
+
33
+ def __init__(self, config: CacheConfig, database: Database):
34
+ """Initialize pattern cache.
35
+
36
+ Args:
37
+ config: Cache configuration
38
+ database: Database instance for persistence
39
+ """
40
+ self.config = config
41
+ self.database = database
42
+ self.logger = logging.getLogger(__name__)
43
+
44
+ # Initialize text processor for fingerprinting
45
+ self.text_processor = TextProcessor()
46
+
47
+ # In-memory cache for frequently accessed patterns
48
+ self._memory_cache: Dict[str, Dict[str, Any]] = {}
49
+ self._memory_cache_hits = defaultdict(int)
50
+
51
+ # Statistics
52
+ self.cache_hits = 0
53
+ self.cache_misses = 0
54
+ self.pattern_learning_count = 0
55
+
56
+ # Initialize cache directory if using file-based caching
57
+ cache_dir = Path(config.cache_dir)
58
+ cache_dir.mkdir(exist_ok=True, parents=True)
59
+
60
+ self.logger.info(f"Pattern cache initialized with TTL: {config.pattern_cache_ttl_hours}h")
61
+
62
+ def lookup_pattern(self, message: str, files: List[str]) -> Optional[Dict[str, Any]]:
63
+ """Look up cached classification for a commit pattern.
64
+
65
+ Args:
66
+ message: Commit message
67
+ files: List of changed files
68
+
69
+ Returns:
70
+ Cached classification result or None if not found
71
+ """
72
+ # Generate semantic fingerprint
73
+ fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
74
+
75
+ # Check in-memory cache first
76
+ if fingerprint in self._memory_cache:
77
+ self._memory_cache_hits[fingerprint] += 1
78
+ self.cache_hits += 1
79
+ self.logger.debug(f"Memory cache hit for pattern: {fingerprint[:8]}")
80
+ return self._memory_cache[fingerprint]
81
+
82
+ # Check database cache
83
+ with self.database.get_session() as session:
84
+ cached_pattern = session.query(PatternCacheModel).filter(
85
+ PatternCacheModel.semantic_fingerprint == fingerprint
86
+ ).first()
87
+
88
+ if cached_pattern:
89
+ # Check if pattern is still valid (not expired)
90
+ cutoff_time = datetime.utcnow() - timedelta(hours=self.config.pattern_cache_ttl_hours)
91
+
92
+ if cached_pattern.created_at > cutoff_time:
93
+ # Update hit count and last used
94
+ cached_pattern.hit_count += 1
95
+ cached_pattern.last_used = datetime.utcnow()
96
+ session.commit()
97
+
98
+ # Add to memory cache for faster future access
99
+ result = cached_pattern.classification_result
100
+ self._add_to_memory_cache(fingerprint, result)
101
+
102
+ self.cache_hits += 1
103
+ self.logger.debug(f"Database cache hit for pattern: {fingerprint[:8]}")
104
+ return result
105
+ else:
106
+ # Pattern is expired, remove it
107
+ session.delete(cached_pattern)
108
+ session.commit()
109
+ self.logger.debug(f"Expired pattern removed: {fingerprint[:8]}")
110
+
111
+ self.cache_misses += 1
112
+ return None
113
+
114
+ def store_pattern(self, message: str, files: List[str],
115
+ classification_result: Dict[str, Any],
116
+ confidence_score: float, source_method: str,
117
+ processing_time_ms: float = 0.0) -> None:
118
+ """Store a new pattern in the cache.
119
+
120
+ Args:
121
+ message: Commit message
122
+ files: List of changed files
123
+ classification_result: Classification results to cache
124
+ confidence_score: Confidence in the classification
125
+ source_method: Method that produced this result ('nlp' or 'llm')
126
+ processing_time_ms: Time taken to process
127
+ """
128
+ # Only cache high-confidence results
129
+ if confidence_score < 0.6:
130
+ return
131
+
132
+ fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
133
+ message_hash = hashlib.md5(message.encode()).hexdigest()
134
+
135
+ # Add to memory cache
136
+ self._add_to_memory_cache(fingerprint, classification_result)
137
+
138
+ # Store in database
139
+ with self.database.get_session() as session:
140
+ # Check if pattern already exists
141
+ existing_pattern = session.query(PatternCacheModel).filter(
142
+ PatternCacheModel.semantic_fingerprint == fingerprint
143
+ ).first()
144
+
145
+ if existing_pattern:
146
+ # Update existing pattern with new data
147
+ existing_pattern.hit_count += 1
148
+ existing_pattern.last_used = datetime.utcnow()
149
+
150
+ # Update confidence if new result is more confident
151
+ if confidence_score > existing_pattern.confidence_score:
152
+ existing_pattern.classification_result = classification_result
153
+ existing_pattern.confidence_score = confidence_score
154
+ existing_pattern.source_method = source_method
155
+
156
+ # Update average processing time
157
+ if processing_time_ms > 0:
158
+ if existing_pattern.avg_processing_time_ms:
159
+ existing_pattern.avg_processing_time_ms = (
160
+ existing_pattern.avg_processing_time_ms + processing_time_ms
161
+ ) / 2
162
+ else:
163
+ existing_pattern.avg_processing_time_ms = processing_time_ms
164
+ else:
165
+ # Create new pattern
166
+ new_pattern = PatternCacheModel(
167
+ message_hash=message_hash,
168
+ semantic_fingerprint=fingerprint,
169
+ classification_result=classification_result,
170
+ confidence_score=confidence_score,
171
+ source_method=source_method,
172
+ avg_processing_time_ms=processing_time_ms
173
+ )
174
+ session.add(new_pattern)
175
+ self.pattern_learning_count += 1
176
+
177
+ session.commit()
178
+
179
+ self.logger.debug(
180
+ f"Stored pattern: {fingerprint[:8]} "
181
+ f"(confidence: {confidence_score:.2f}, method: {source_method})"
182
+ )
183
+
184
+ def learn_from_results(self, results: List[QualitativeCommitData]) -> None:
185
+ """Learn patterns from successful classification results.
186
+
187
+ Args:
188
+ results: List of classification results to learn from
189
+ """
190
+ learned_patterns = 0
191
+
192
+ for result in results:
193
+ if result.confidence_score >= 0.7: # Only learn from high-confidence results
194
+ classification_data = {
195
+ 'change_type': result.change_type,
196
+ 'change_type_confidence': result.change_type_confidence,
197
+ 'business_domain': result.business_domain,
198
+ 'domain_confidence': result.domain_confidence,
199
+ 'risk_level': result.risk_level,
200
+ 'confidence_score': result.confidence_score
201
+ }
202
+
203
+ self.store_pattern(
204
+ message=result.message,
205
+ files=result.files_changed,
206
+ classification_result=classification_data,
207
+ confidence_score=result.confidence_score,
208
+ source_method=result.processing_method,
209
+ processing_time_ms=result.processing_time_ms
210
+ )
211
+ learned_patterns += 1
212
+
213
+ if learned_patterns > 0:
214
+ self.logger.info(f"Learned {learned_patterns} new patterns from results")
215
+
216
+ def _add_to_memory_cache(self, fingerprint: str, result: Dict[str, Any]) -> None:
217
+ """Add result to in-memory cache with size management.
218
+
219
+ Args:
220
+ fingerprint: Pattern fingerprint
221
+ result: Classification result
222
+ """
223
+ # Manage memory cache size
224
+ if len(self._memory_cache) >= self.config.semantic_cache_size:
225
+ # Remove least recently used items
226
+ sorted_items = sorted(
227
+ self._memory_cache_hits.items(),
228
+ key=lambda x: x[1]
229
+ )
230
+
231
+ # Remove bottom 20% of items
232
+ items_to_remove = len(sorted_items) // 5
233
+ for fingerprint_to_remove, _ in sorted_items[:items_to_remove]:
234
+ self._memory_cache.pop(fingerprint_to_remove, None)
235
+ self._memory_cache_hits.pop(fingerprint_to_remove, None)
236
+
237
+ self._memory_cache[fingerprint] = result
238
+ self._memory_cache_hits[fingerprint] = 1
239
+
240
+ def cleanup_cache(self) -> Dict[str, int]:
241
+ """Clean up expired and low-quality cache entries.
242
+
243
+ Returns:
244
+ Dictionary with cleanup statistics
245
+ """
246
+ stats = {
247
+ 'expired_removed': 0,
248
+ 'low_confidence_removed': 0,
249
+ 'low_usage_removed': 0,
250
+ 'total_remaining': 0
251
+ }
252
+
253
+ cutoff_time = datetime.utcnow() - timedelta(hours=self.config.pattern_cache_ttl_hours)
254
+
255
+ with self.database.get_session() as session:
256
+ # Remove expired patterns
257
+ expired_patterns = session.query(PatternCacheModel).filter(
258
+ PatternCacheModel.created_at < cutoff_time
259
+ )
260
+ stats['expired_removed'] = expired_patterns.count()
261
+ expired_patterns.delete()
262
+
263
+ # Remove very low confidence patterns (< 0.4)
264
+ low_confidence_patterns = session.query(PatternCacheModel).filter(
265
+ PatternCacheModel.confidence_score < 0.4
266
+ )
267
+ stats['low_confidence_removed'] = low_confidence_patterns.count()
268
+ low_confidence_patterns.delete()
269
+
270
+ # Remove patterns with very low usage (hit_count = 1 and older than 7 days)
271
+ week_ago = datetime.utcnow() - timedelta(days=7)
272
+ low_usage_patterns = session.query(PatternCacheModel).filter(
273
+ and_(
274
+ PatternCacheModel.hit_count == 1,
275
+ PatternCacheModel.created_at < week_ago
276
+ )
277
+ )
278
+ stats['low_usage_removed'] = low_usage_patterns.count()
279
+ low_usage_patterns.delete()
280
+
281
+ # Count remaining patterns
282
+ stats['total_remaining'] = session.query(PatternCacheModel).count()
283
+
284
+ session.commit()
285
+
286
+ # Clear memory cache to force refresh
287
+ self._memory_cache.clear()
288
+ self._memory_cache_hits.clear()
289
+
290
+ self.logger.info(
291
+ f"Cache cleanup completed: {stats['expired_removed']} expired, "
292
+ f"{stats['low_confidence_removed']} low-confidence, "
293
+ f"{stats['low_usage_removed']} low-usage removed. "
294
+ f"{stats['total_remaining']} patterns remaining."
295
+ )
296
+
297
+ return stats
298
+
299
+ def get_cache_statistics(self) -> Dict[str, Any]:
300
+ """Get comprehensive cache statistics.
301
+
302
+ Returns:
303
+ Dictionary with cache statistics
304
+ """
305
+ with self.database.get_session() as session:
306
+ # Basic counts
307
+ total_patterns = session.query(PatternCacheModel).count()
308
+
309
+ # Method breakdown
310
+ nlp_patterns = session.query(PatternCacheModel).filter(
311
+ PatternCacheModel.source_method == 'nlp'
312
+ ).count()
313
+
314
+ llm_patterns = session.query(PatternCacheModel).filter(
315
+ PatternCacheModel.source_method == 'llm'
316
+ ).count()
317
+
318
+ # Confidence distribution
319
+ high_confidence = session.query(PatternCacheModel).filter(
320
+ PatternCacheModel.confidence_score > 0.8
321
+ ).count()
322
+
323
+ medium_confidence = session.query(PatternCacheModel).filter(
324
+ and_(
325
+ PatternCacheModel.confidence_score >= 0.6,
326
+ PatternCacheModel.confidence_score <= 0.8
327
+ )
328
+ ).count()
329
+
330
+ # Usage statistics
331
+ top_patterns = session.query(PatternCacheModel).order_by(
332
+ desc(PatternCacheModel.hit_count)
333
+ ).limit(10).all()
334
+
335
+ # Age statistics
336
+ week_ago = datetime.utcnow() - timedelta(days=7)
337
+ recent_patterns = session.query(PatternCacheModel).filter(
338
+ PatternCacheModel.created_at > week_ago
339
+ ).count()
340
+
341
+ # Calculate hit rate
342
+ total_requests = self.cache_hits + self.cache_misses
343
+ hit_rate = (self.cache_hits / total_requests) if total_requests > 0 else 0.0
344
+
345
+ return {
346
+ 'total_patterns': total_patterns,
347
+ 'method_breakdown': {
348
+ 'nlp_patterns': nlp_patterns,
349
+ 'llm_patterns': llm_patterns
350
+ },
351
+ 'confidence_distribution': {
352
+ 'high_confidence': high_confidence,
353
+ 'medium_confidence': medium_confidence,
354
+ 'low_confidence': total_patterns - high_confidence - medium_confidence
355
+ },
356
+ 'usage_stats': {
357
+ 'cache_hits': self.cache_hits,
358
+ 'cache_misses': self.cache_misses,
359
+ 'hit_rate': hit_rate,
360
+ 'memory_cache_size': len(self._memory_cache)
361
+ },
362
+ 'top_patterns': [
363
+ {
364
+ 'fingerprint': p.semantic_fingerprint[:8],
365
+ 'hit_count': p.hit_count,
366
+ 'confidence': p.confidence_score,
367
+ 'method': p.source_method
368
+ }
369
+ for p in top_patterns
370
+ ],
371
+ 'recent_patterns': recent_patterns,
372
+ 'patterns_learned': self.pattern_learning_count,
373
+ 'config': {
374
+ 'ttl_hours': self.config.pattern_cache_ttl_hours,
375
+ 'max_memory_size': self.config.semantic_cache_size,
376
+ 'learning_enabled': self.config.enable_pattern_learning
377
+ }
378
+ }
379
+
380
+ def optimize_cache(self) -> Dict[str, Any]:
381
+ """Optimize cache for better performance.
382
+
383
+ Returns:
384
+ Dictionary with optimization results
385
+ """
386
+ optimization_stats = {}
387
+
388
+ # Step 1: Cleanup expired and low-quality entries
389
+ cleanup_stats = self.cleanup_cache()
390
+ optimization_stats['cleanup'] = cleanup_stats
391
+
392
+ # Step 2: Promote high-usage patterns to memory cache
393
+ with self.database.get_session() as session:
394
+ # Get top patterns by hit count
395
+ popular_patterns = session.query(PatternCacheModel).filter(
396
+ PatternCacheModel.hit_count >= 5
397
+ ).order_by(desc(PatternCacheModel.hit_count)).limit(
398
+ self.config.semantic_cache_size // 2 # Fill half of memory cache
399
+ ).all()
400
+
401
+ promoted_count = 0
402
+ for pattern in popular_patterns:
403
+ if pattern.semantic_fingerprint not in self._memory_cache:
404
+ self._add_to_memory_cache(
405
+ pattern.semantic_fingerprint,
406
+ pattern.classification_result
407
+ )
408
+ promoted_count += 1
409
+
410
+ optimization_stats['promoted_to_memory'] = promoted_count
411
+
412
+ # Step 3: Update learning threshold based on cache size
413
+ total_patterns = cleanup_stats['total_remaining']
414
+ if total_patterns > 1000:
415
+ # Increase learning threshold for large caches
416
+ self.config.learning_threshold = min(20, self.config.learning_threshold + 2)
417
+ elif total_patterns < 100:
418
+ # Decrease learning threshold for small caches
419
+ self.config.learning_threshold = max(5, self.config.learning_threshold - 1)
420
+
421
+ optimization_stats['learning_threshold'] = self.config.learning_threshold
422
+
423
+ self.logger.info(f"Cache optimization completed: {optimization_stats}")
424
+ return optimization_stats
425
+
426
+ def export_patterns(self, output_path: Path, min_confidence: float = 0.8) -> int:
427
+ """Export high-quality patterns for analysis or backup.
428
+
429
+ Args:
430
+ output_path: Path to export file
431
+ min_confidence: Minimum confidence threshold for export
432
+
433
+ Returns:
434
+ Number of patterns exported
435
+ """
436
+ with self.database.get_session() as session:
437
+ patterns = session.query(PatternCacheModel).filter(
438
+ PatternCacheModel.confidence_score >= min_confidence
439
+ ).order_by(desc(PatternCacheModel.hit_count)).all()
440
+
441
+ export_data = []
442
+ for pattern in patterns:
443
+ export_data.append({
444
+ 'semantic_fingerprint': pattern.semantic_fingerprint,
445
+ 'classification_result': pattern.classification_result,
446
+ 'confidence_score': pattern.confidence_score,
447
+ 'hit_count': pattern.hit_count,
448
+ 'source_method': pattern.source_method,
449
+ 'created_at': pattern.created_at.isoformat(),
450
+ 'last_used': pattern.last_used.isoformat()
451
+ })
452
+
453
+ with open(output_path, 'w') as f:
454
+ json.dump(export_data, f, indent=2)
455
+
456
+ self.logger.info(f"Exported {len(patterns)} patterns to {output_path}")
457
+ return len(patterns)