gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,23 @@
1
1
  """Performance and accuracy metrics for qualitative analysis."""
2
2
 
3
3
  import logging
4
- import time
4
+ import statistics
5
5
  from collections import defaultdict, deque
6
6
  from dataclasses import dataclass
7
7
  from datetime import datetime, timedelta
8
- from typing import Dict, List, Optional, Tuple
9
- import statistics
10
8
 
11
9
 
12
- @dataclass
10
+ @dataclass
13
11
  class ProcessingMetrics:
14
12
  """Metrics for a single processing operation."""
15
-
13
+
16
14
  operation: str
17
15
  processing_time_ms: float
18
16
  items_processed: int
19
17
  confidence_score: float
20
18
  method_used: str # 'nlp' or 'llm'
21
19
  timestamp: datetime
22
-
20
+
23
21
  @property
24
22
  def items_per_second(self) -> float:
25
23
  """Calculate processing rate."""
@@ -30,47 +28,52 @@ class ProcessingMetrics:
30
28
 
31
29
  class PerformanceMetrics:
32
30
  """Track and analyze performance metrics for qualitative analysis.
33
-
31
+
34
32
  This class provides comprehensive performance monitoring including
35
33
  processing times, accuracy metrics, cost tracking, and system health
36
34
  indicators for the qualitative analysis pipeline.
37
35
  """
38
-
36
+
39
37
  def __init__(self, max_history: int = 10000):
40
38
  """Initialize performance metrics tracker.
41
-
39
+
42
40
  Args:
43
41
  max_history: Maximum number of metrics to keep in memory
44
42
  """
45
43
  self.max_history = max_history
46
44
  self.logger = logging.getLogger(__name__)
47
-
45
+
48
46
  # Processing metrics
49
47
  self.processing_metrics: deque[ProcessingMetrics] = deque(maxlen=max_history)
50
-
48
+
51
49
  # Method usage tracking
52
50
  self.method_usage = defaultdict(int)
53
51
  self.method_performance = defaultdict(list)
54
-
52
+
55
53
  # Confidence tracking
56
54
  self.confidence_history = deque(maxlen=max_history)
57
-
55
+
58
56
  # Error tracking
59
57
  self.error_counts = defaultdict(int)
60
58
  self.error_history = deque(maxlen=1000)
61
-
59
+
62
60
  # Cache performance
63
61
  self.cache_hits = 0
64
62
  self.cache_misses = 0
65
-
63
+
66
64
  # Quality metrics
67
65
  self.classification_accuracy = deque(maxlen=1000)
68
-
69
- def record_processing(self, operation: str, processing_time_ms: float,
70
- items_processed: int, confidence_score: float,
71
- method_used: str) -> None:
66
+
67
+ def record_processing(
68
+ self,
69
+ operation: str,
70
+ processing_time_ms: float,
71
+ items_processed: int,
72
+ confidence_score: float,
73
+ method_used: str,
74
+ ) -> None:
72
75
  """Record a processing operation.
73
-
76
+
74
77
  Args:
75
78
  operation: Type of operation (e.g., 'classification', 'analysis')
76
79
  processing_time_ms: Processing time in milliseconds
@@ -84,264 +87,275 @@ class PerformanceMetrics:
84
87
  items_processed=items_processed,
85
88
  confidence_score=confidence_score,
86
89
  method_used=method_used,
87
- timestamp=datetime.utcnow()
90
+ timestamp=datetime.utcnow(),
88
91
  )
89
-
92
+
90
93
  self.processing_metrics.append(metric)
91
94
  self.method_usage[method_used] += items_processed
92
95
  self.method_performance[method_used].append(processing_time_ms / items_processed)
93
96
  self.confidence_history.append(confidence_score)
94
-
97
+
95
98
  def record_cache_hit(self) -> None:
96
99
  """Record a cache hit."""
97
100
  self.cache_hits += 1
98
-
101
+
99
102
  def record_cache_miss(self) -> None:
100
103
  """Record a cache miss."""
101
104
  self.cache_misses += 1
102
-
105
+
103
106
  def record_error(self, error_type: str, error_message: str) -> None:
104
107
  """Record an error occurrence.
105
-
108
+
106
109
  Args:
107
110
  error_type: Type of error
108
111
  error_message: Error message
109
112
  """
110
113
  self.error_counts[error_type] += 1
111
- self.error_history.append({
112
- 'type': error_type,
113
- 'message': error_message,
114
- 'timestamp': datetime.utcnow()
115
- })
116
-
114
+ self.error_history.append(
115
+ {"type": error_type, "message": error_message, "timestamp": datetime.utcnow()}
116
+ )
117
+
117
118
  def record_classification_accuracy(self, accuracy: float) -> None:
118
119
  """Record classification accuracy measurement.
119
-
120
+
120
121
  Args:
121
122
  accuracy: Accuracy score (0.0 to 1.0)
122
123
  """
123
124
  self.classification_accuracy.append(accuracy)
124
-
125
- def get_processing_stats(self, hours: int = 24) -> Dict[str, any]:
125
+
126
+ def get_processing_stats(self, hours: int = 24) -> dict[str, any]:
126
127
  """Get processing statistics for the last N hours.
127
-
128
+
128
129
  Args:
129
130
  hours: Number of hours to analyze
130
-
131
+
131
132
  Returns:
132
133
  Dictionary with processing statistics
133
134
  """
134
135
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
135
- recent_metrics = [
136
- m for m in self.processing_metrics
137
- if m.timestamp >= cutoff_time
138
- ]
139
-
136
+ recent_metrics = [m for m in self.processing_metrics if m.timestamp >= cutoff_time]
137
+
140
138
  if not recent_metrics:
141
139
  return {
142
- 'total_operations': 0,
143
- 'total_items_processed': 0,
144
- 'avg_processing_time_ms': 0.0,
145
- 'avg_items_per_second': 0.0,
146
- 'avg_confidence': 0.0,
147
- 'method_breakdown': {},
148
- 'cache_hit_rate': 0.0
140
+ "total_operations": 0,
141
+ "total_items_processed": 0,
142
+ "avg_processing_time_ms": 0.0,
143
+ "avg_items_per_second": 0.0,
144
+ "avg_confidence": 0.0,
145
+ "method_breakdown": {},
146
+ "cache_hit_rate": 0.0,
149
147
  }
150
-
148
+
151
149
  # Calculate statistics
152
150
  total_items = sum(m.items_processed for m in recent_metrics)
153
151
  total_time = sum(m.processing_time_ms for m in recent_metrics)
154
-
152
+
155
153
  avg_processing_time = total_time / len(recent_metrics)
156
154
  avg_items_per_second = statistics.mean([m.items_per_second for m in recent_metrics])
157
155
  avg_confidence = statistics.mean([m.confidence_score for m in recent_metrics])
158
-
156
+
159
157
  # Method breakdown
160
158
  method_breakdown = {}
161
- for method in ['nlp', 'llm']:
159
+ for method in ["nlp", "llm"]:
162
160
  method_metrics = [m for m in recent_metrics if m.method_used == method]
163
161
  if method_metrics:
164
162
  method_items = sum(m.items_processed for m in method_metrics)
165
163
  method_breakdown[method] = {
166
- 'items_processed': method_items,
167
- 'percentage': (method_items / total_items) * 100 if total_items > 0 else 0,
168
- 'avg_confidence': statistics.mean([m.confidence_score for m in method_metrics]),
169
- 'avg_processing_time_ms': statistics.mean([m.processing_time_ms for m in method_metrics])
164
+ "items_processed": method_items,
165
+ "percentage": (method_items / total_items) * 100 if total_items > 0 else 0,
166
+ "avg_confidence": statistics.mean([m.confidence_score for m in method_metrics]),
167
+ "avg_processing_time_ms": statistics.mean(
168
+ [m.processing_time_ms for m in method_metrics]
169
+ ),
170
170
  }
171
-
171
+
172
172
  # Cache hit rate
173
173
  total_cache_requests = self.cache_hits + self.cache_misses
174
- cache_hit_rate = (self.cache_hits / total_cache_requests) if total_cache_requests > 0 else 0.0
175
-
174
+ cache_hit_rate = (
175
+ (self.cache_hits / total_cache_requests) if total_cache_requests > 0 else 0.0
176
+ )
177
+
176
178
  return {
177
- 'total_operations': len(recent_metrics),
178
- 'total_items_processed': total_items,
179
- 'avg_processing_time_ms': avg_processing_time,
180
- 'avg_items_per_second': avg_items_per_second,
181
- 'avg_confidence': avg_confidence,
182
- 'method_breakdown': method_breakdown,
183
- 'cache_hit_rate': cache_hit_rate,
184
- 'time_period_hours': hours
179
+ "total_operations": len(recent_metrics),
180
+ "total_items_processed": total_items,
181
+ "avg_processing_time_ms": avg_processing_time,
182
+ "avg_items_per_second": avg_items_per_second,
183
+ "avg_confidence": avg_confidence,
184
+ "method_breakdown": method_breakdown,
185
+ "cache_hit_rate": cache_hit_rate,
186
+ "time_period_hours": hours,
185
187
  }
186
-
187
- def get_quality_metrics(self) -> Dict[str, any]:
188
+
189
+ def get_quality_metrics(self) -> dict[str, any]:
188
190
  """Get quality and accuracy metrics.
189
-
191
+
190
192
  Returns:
191
193
  Dictionary with quality metrics
192
194
  """
193
195
  if not self.confidence_history:
194
196
  return {
195
- 'avg_confidence': 0.0,
196
- 'confidence_distribution': {},
197
- 'classification_accuracy': 0.0,
198
- 'quality_trend': 'stable'
197
+ "avg_confidence": 0.0,
198
+ "confidence_distribution": {},
199
+ "classification_accuracy": 0.0,
200
+ "quality_trend": "stable",
199
201
  }
200
-
202
+
201
203
  # Confidence statistics
202
204
  confidences = list(self.confidence_history)
203
205
  avg_confidence = statistics.mean(confidences)
204
-
206
+
205
207
  # Confidence distribution
206
208
  confidence_buckets = {
207
- 'high (>0.8)': sum(1 for c in confidences if c > 0.8),
208
- 'medium (0.6-0.8)': sum(1 for c in confidences if 0.6 <= c <= 0.8),
209
- 'low (<0.6)': sum(1 for c in confidences if c < 0.6)
209
+ "high (>0.8)": sum(1 for c in confidences if c > 0.8),
210
+ "medium (0.6-0.8)": sum(1 for c in confidences if 0.6 <= c <= 0.8),
211
+ "low (<0.6)": sum(1 for c in confidences if c < 0.6),
210
212
  }
211
-
213
+
212
214
  # Quality trend (comparing recent vs. older metrics)
213
215
  if len(confidences) >= 100:
214
216
  recent_confidence = statistics.mean(confidences[-50:])
215
217
  older_confidence = statistics.mean(confidences[-100:-50])
216
-
218
+
217
219
  if recent_confidence > older_confidence + 0.05:
218
- quality_trend = 'improving'
220
+ quality_trend = "improving"
219
221
  elif recent_confidence < older_confidence - 0.05:
220
- quality_trend = 'declining'
222
+ quality_trend = "declining"
221
223
  else:
222
- quality_trend = 'stable'
224
+ quality_trend = "stable"
223
225
  else:
224
- quality_trend = 'insufficient_data'
225
-
226
+ quality_trend = "insufficient_data"
227
+
226
228
  # Classification accuracy
227
- avg_accuracy = statistics.mean(self.classification_accuracy) if self.classification_accuracy else 0.0
228
-
229
+ avg_accuracy = (
230
+ statistics.mean(self.classification_accuracy) if self.classification_accuracy else 0.0
231
+ )
232
+
229
233
  return {
230
- 'avg_confidence': avg_confidence,
231
- 'confidence_distribution': confidence_buckets,
232
- 'classification_accuracy': avg_accuracy,
233
- 'quality_trend': quality_trend,
234
- 'total_samples': len(confidences)
234
+ "avg_confidence": avg_confidence,
235
+ "confidence_distribution": confidence_buckets,
236
+ "classification_accuracy": avg_accuracy,
237
+ "quality_trend": quality_trend,
238
+ "total_samples": len(confidences),
235
239
  }
236
-
237
- def get_error_analysis(self) -> Dict[str, any]:
240
+
241
+ def get_error_analysis(self) -> dict[str, any]:
238
242
  """Get error analysis and system health metrics.
239
-
243
+
240
244
  Returns:
241
245
  Dictionary with error analysis
242
246
  """
243
247
  # Recent errors (last 24 hours)
244
248
  cutoff_time = datetime.utcnow() - timedelta(hours=24)
245
- recent_errors = [
246
- error for error in self.error_history
247
- if error['timestamp'] >= cutoff_time
248
- ]
249
-
249
+ recent_errors = [error for error in self.error_history if error["timestamp"] >= cutoff_time]
250
+
250
251
  # Error type breakdown
251
252
  error_type_counts = defaultdict(int)
252
253
  for error in recent_errors:
253
- error_type_counts[error['type']] += 1
254
-
254
+ error_type_counts[error["type"]] += 1
255
+
255
256
  # Total operations for error rate calculation
256
- total_operations = len([
257
- m for m in self.processing_metrics
258
- if m.timestamp >= cutoff_time
259
- ])
260
-
257
+ total_operations = len([m for m in self.processing_metrics if m.timestamp >= cutoff_time])
258
+
261
259
  error_rate = len(recent_errors) / total_operations if total_operations > 0 else 0.0
262
-
260
+
263
261
  return {
264
- 'total_errors_24h': len(recent_errors),
265
- 'error_rate': error_rate,
266
- 'error_types': dict(error_type_counts),
267
- 'most_common_error': max(error_type_counts.keys(), key=error_type_counts.get) if error_type_counts else None,
268
- 'system_health': 'healthy' if error_rate < 0.01 else 'degraded' if error_rate < 0.05 else 'unhealthy'
262
+ "total_errors_24h": len(recent_errors),
263
+ "error_rate": error_rate,
264
+ "error_types": dict(error_type_counts),
265
+ "most_common_error": (
266
+ max(error_type_counts.keys(), key=error_type_counts.get)
267
+ if error_type_counts
268
+ else None
269
+ ),
270
+ "system_health": (
271
+ "healthy" if error_rate < 0.01 else "degraded" if error_rate < 0.05 else "unhealthy"
272
+ ),
269
273
  }
270
-
271
- def get_performance_alerts(self) -> List[str]:
274
+
275
+ def get_performance_alerts(self) -> list[str]:
272
276
  """Get performance alerts and recommendations.
273
-
277
+
274
278
  Returns:
275
279
  List of alert messages
276
280
  """
277
281
  alerts = []
278
-
282
+
279
283
  # Check recent performance
280
284
  stats = self.get_processing_stats(hours=1)
281
285
  quality = self.get_quality_metrics()
282
286
  errors = self.get_error_analysis()
283
-
287
+
284
288
  # Processing speed alerts
285
- if stats['avg_items_per_second'] < 50: # Less than 50 items/second
289
+ if stats["avg_items_per_second"] < 50: # Less than 50 items/second
286
290
  alerts.append("Processing speed below target (< 50 items/second)")
287
-
291
+
288
292
  # Confidence alerts
289
- if quality['avg_confidence'] < 0.6:
293
+ if quality["avg_confidence"] < 0.6:
290
294
  alerts.append("Average confidence below threshold (< 0.6)")
291
-
295
+
292
296
  # Method balance alerts
293
- if 'llm' in stats['method_breakdown']:
294
- llm_percentage = stats['method_breakdown']['llm']['percentage']
297
+ if "llm" in stats["method_breakdown"]:
298
+ llm_percentage = stats["method_breakdown"]["llm"]["percentage"]
295
299
  if llm_percentage > 20: # More than 20% using LLM
296
- alerts.append(f"High LLM usage ({llm_percentage:.1f}%) - consider tuning NLP thresholds")
297
-
300
+ alerts.append(
301
+ f"High LLM usage ({llm_percentage:.1f}%) - consider tuning NLP thresholds"
302
+ )
303
+
298
304
  # Error rate alerts
299
- if errors['error_rate'] > 0.05: # More than 5% error rate
305
+ if errors["error_rate"] > 0.05: # More than 5% error rate
300
306
  alerts.append(f"High error rate ({errors['error_rate']:.1%})")
301
-
307
+
302
308
  # Cache performance alerts
303
- if stats['cache_hit_rate'] < 0.3: # Less than 30% cache hit rate
309
+ if stats["cache_hit_rate"] < 0.3: # Less than 30% cache hit rate
304
310
  alerts.append("Low cache hit rate - pattern learning may be ineffective")
305
-
311
+
306
312
  # Quality trend alerts
307
- if quality['quality_trend'] == 'declining':
313
+ if quality["quality_trend"] == "declining":
308
314
  alerts.append("Quality trend declining - review recent changes")
309
-
315
+
310
316
  return alerts
311
-
312
- def get_optimization_suggestions(self) -> List[str]:
317
+
318
+ def get_optimization_suggestions(self) -> list[str]:
313
319
  """Get optimization suggestions based on metrics.
314
-
320
+
315
321
  Returns:
316
322
  List of optimization suggestions
317
323
  """
318
324
  suggestions = []
319
-
325
+
320
326
  stats = self.get_processing_stats(hours=24)
321
327
  quality = self.get_quality_metrics()
322
-
328
+
323
329
  # Performance optimizations
324
- if stats['avg_items_per_second'] < 100:
330
+ if stats["avg_items_per_second"] < 100:
325
331
  suggestions.append("Consider increasing batch size or enabling parallel processing")
326
-
332
+
327
333
  # Method optimization
328
- method_breakdown = stats['method_breakdown']
329
- if 'llm' in method_breakdown and method_breakdown['llm']['percentage'] > 15:
330
- suggestions.append("High LLM usage - consider lowering confidence threshold or improving NLP patterns")
331
-
332
- if 'nlp' in method_breakdown and method_breakdown['nlp']['avg_confidence'] < 0.7:
334
+ method_breakdown = stats["method_breakdown"]
335
+ if "llm" in method_breakdown and method_breakdown["llm"]["percentage"] > 15:
336
+ suggestions.append(
337
+ "High LLM usage - consider lowering confidence threshold or improving NLP patterns"
338
+ )
339
+
340
+ if "nlp" in method_breakdown and method_breakdown["nlp"]["avg_confidence"] < 0.7:
333
341
  suggestions.append("NLP confidence low - consider updating classification patterns")
334
-
342
+
335
343
  # Quality optimizations
336
- if quality['avg_confidence'] < 0.7:
337
- suggestions.append("Overall confidence low - review classification accuracy and update models")
338
-
339
- confidence_dist = quality['confidence_distribution']
340
- if confidence_dist.get('low (<0.6)', 0) > confidence_dist.get('high (>0.8)', 0):
341
- suggestions.append("Many low-confidence predictions - consider additional training data")
342
-
344
+ if quality["avg_confidence"] < 0.7:
345
+ suggestions.append(
346
+ "Overall confidence low - review classification accuracy and update models"
347
+ )
348
+
349
+ confidence_dist = quality["confidence_distribution"]
350
+ if confidence_dist.get("low (<0.6)", 0) > confidence_dist.get("high (>0.8)", 0):
351
+ suggestions.append(
352
+ "Many low-confidence predictions - consider additional training data"
353
+ )
354
+
343
355
  # Cache optimizations
344
- if stats['cache_hit_rate'] < 0.5:
345
- suggestions.append("Low cache hit rate - increase cache size or improve pattern matching")
346
-
347
- return suggestions
356
+ if stats["cache_hit_rate"] < 0.5:
357
+ suggestions.append(
358
+ "Low cache hit rate - increase cache size or improve pattern matching"
359
+ )
360
+
361
+ return suggestions