gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +612 -258
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +251 -141
  6. gitflow_analytics/core/analyzer.py +140 -103
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +240 -169
  9. gitflow_analytics/core/identity.py +210 -173
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +101 -87
  13. gitflow_analytics/integrations/github_integration.py +84 -77
  14. gitflow_analytics/integrations/jira_integration.py +116 -104
  15. gitflow_analytics/integrations/orchestrator.py +86 -85
  16. gitflow_analytics/metrics/dora.py +181 -177
  17. gitflow_analytics/models/database.py +190 -53
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +11 -4
  37. gitflow_analytics/reports/csv_writer.py +51 -31
  38. gitflow_analytics/reports/narrative_writer.py +16 -14
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  54. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  55. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  56. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  57. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,343 @@
1
+ """Cost tracking utilities for LLM usage monitoring."""
2
+
3
+ import logging
4
+ from datetime import datetime, timedelta
5
+ from typing import Dict, List, Optional
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ import json
9
+
10
+
11
+ @dataclass
12
+ class LLMCall:
13
+ """Record of a single LLM API call."""
14
+
15
+ timestamp: datetime
16
+ model: str
17
+ input_tokens: int
18
+ output_tokens: int
19
+ processing_time_ms: float
20
+ estimated_cost: float
21
+ batch_size: int = 1
22
+ success: bool = True
23
+ error_message: Optional[str] = None
24
+
25
+
26
+ class CostTracker:
27
+ """Track and manage LLM API usage costs.
28
+
29
+ This class provides cost monitoring, budgeting, and optimization
30
+ features to keep LLM usage within acceptable limits while
31
+ maintaining analysis quality.
32
+ """
33
+
34
+ # OpenRouter pricing (approximate, in USD per 1M tokens)
35
+ MODEL_PRICING = {
36
+ # Anthropic models
37
+ 'anthropic/claude-3-haiku': {'input': 0.25, 'output': 1.25},
38
+ 'anthropic/claude-3-sonnet': {'input': 3.0, 'output': 15.0},
39
+ 'anthropic/claude-3-opus': {'input': 15.0, 'output': 75.0},
40
+
41
+ # OpenAI models
42
+ 'openai/gpt-3.5-turbo': {'input': 0.5, 'output': 1.5},
43
+ 'openai/gpt-4': {'input': 30.0, 'output': 60.0},
44
+ 'openai/gpt-4-turbo': {'input': 10.0, 'output': 30.0},
45
+
46
+ # Free models (Llama)
47
+ 'meta-llama/llama-3.1-8b-instruct:free': {'input': 0.0, 'output': 0.0},
48
+ 'meta-llama/llama-3.1-70b-instruct:free': {'input': 0.0, 'output': 0.0},
49
+
50
+ # Other popular models
51
+ 'google/gemini-pro': {'input': 0.5, 'output': 1.5},
52
+ 'mistralai/mixtral-8x7b-instruct': {'input': 0.27, 'output': 0.27},
53
+ }
54
+
55
+ def __init__(self, cache_dir: Optional[Path] = None, daily_budget: float = 5.0):
56
+ """Initialize cost tracker.
57
+
58
+ Args:
59
+ cache_dir: Directory to store cost tracking data
60
+ daily_budget: Maximum daily spending in USD
61
+ """
62
+ self.daily_budget = daily_budget
63
+ self.cache_dir = cache_dir or Path(".qualitative_cache")
64
+ self.cache_dir.mkdir(exist_ok=True)
65
+
66
+ self.cost_file = self.cache_dir / "llm_costs.json"
67
+ self.calls: List[LLMCall] = []
68
+ self.logger = logging.getLogger(__name__)
69
+
70
+ # Load existing cost data
71
+ self._load_cost_data()
72
+
73
+ def record_call(self, model: str, input_tokens: int, output_tokens: int,
74
+ processing_time: float, batch_size: int = 1,
75
+ success: bool = True, error_message: Optional[str] = None) -> float:
76
+ """Record an LLM API call and return estimated cost.
77
+
78
+ Args:
79
+ model: Model name used
80
+ input_tokens: Number of input tokens
81
+ output_tokens: Number of output tokens
82
+ processing_time: Processing time in seconds
83
+ batch_size: Number of commits processed in this call
84
+ success: Whether the call was successful
85
+ error_message: Error message if call failed
86
+
87
+ Returns:
88
+ Estimated cost in USD
89
+ """
90
+ estimated_cost = self._calculate_cost(model, input_tokens, output_tokens)
91
+
92
+ call = LLMCall(
93
+ timestamp=datetime.utcnow(),
94
+ model=model,
95
+ input_tokens=input_tokens,
96
+ output_tokens=output_tokens,
97
+ processing_time_ms=processing_time * 1000,
98
+ estimated_cost=estimated_cost,
99
+ batch_size=batch_size,
100
+ success=success,
101
+ error_message=error_message
102
+ )
103
+
104
+ self.calls.append(call)
105
+ self._save_cost_data()
106
+
107
+ # Log cost information
108
+ self.logger.info(
109
+ f"LLM call: {model} | tokens: {input_tokens}+{output_tokens} | "
110
+ f"cost: ${estimated_cost:.4f} | batch: {batch_size}"
111
+ )
112
+
113
+ return estimated_cost
114
+
115
+ def get_daily_spend(self, date: Optional[datetime] = None) -> float:
116
+ """Get total spending for a specific date.
117
+
118
+ Args:
119
+ date: Date to check (defaults to today)
120
+
121
+ Returns:
122
+ Total spending in USD for the date
123
+ """
124
+ if date is None:
125
+ date = datetime.utcnow()
126
+
127
+ start_of_day = date.replace(hour=0, minute=0, second=0, microsecond=0)
128
+ end_of_day = start_of_day + timedelta(days=1)
129
+
130
+ daily_spend = sum(
131
+ call.estimated_cost for call in self.calls
132
+ if start_of_day <= call.timestamp < end_of_day and call.success
133
+ )
134
+
135
+ return daily_spend
136
+
137
+ def check_budget_remaining(self) -> float:
138
+ """Check remaining budget for today.
139
+
140
+ Returns:
141
+ Remaining budget in USD (negative if over budget)
142
+ """
143
+ daily_spend = self.get_daily_spend()
144
+ return self.daily_budget - daily_spend
145
+
146
+ def can_afford_call(self, model: str, estimated_tokens: int) -> bool:
147
+ """Check if we can afford an API call within budget.
148
+
149
+ Args:
150
+ model: Model to use
151
+ estimated_tokens: Estimated total tokens (input + output)
152
+
153
+ Returns:
154
+ True if call is within budget
155
+ """
156
+ estimated_cost = self._calculate_cost(model, estimated_tokens // 2, estimated_tokens // 2)
157
+ remaining_budget = self.check_budget_remaining()
158
+
159
+ return remaining_budget >= estimated_cost
160
+
161
+ def get_usage_stats(self, days: int = 7) -> Dict[str, any]:
162
+ """Get usage statistics for the last N days.
163
+
164
+ Args:
165
+ days: Number of days to analyze
166
+
167
+ Returns:
168
+ Dictionary with usage statistics
169
+ """
170
+ cutoff_date = datetime.utcnow() - timedelta(days=days)
171
+ recent_calls = [call for call in self.calls if call.timestamp >= cutoff_date]
172
+
173
+ if not recent_calls:
174
+ return {
175
+ 'total_calls': 0,
176
+ 'total_cost': 0.0,
177
+ 'total_tokens': 0,
178
+ 'avg_cost_per_call': 0.0,
179
+ 'model_usage': {},
180
+ 'success_rate': 1.0
181
+ }
182
+
183
+ successful_calls = [call for call in recent_calls if call.success]
184
+
185
+ # Calculate statistics
186
+ total_cost = sum(call.estimated_cost for call in successful_calls)
187
+ total_tokens = sum(call.input_tokens + call.output_tokens for call in recent_calls)
188
+
189
+ # Model usage breakdown
190
+ model_usage = {}
191
+ for call in recent_calls:
192
+ if call.model not in model_usage:
193
+ model_usage[call.model] = {'calls': 0, 'cost': 0.0, 'tokens': 0}
194
+ model_usage[call.model]['calls'] += 1
195
+ model_usage[call.model]['cost'] += call.estimated_cost
196
+ model_usage[call.model]['tokens'] += call.input_tokens + call.output_tokens
197
+
198
+ return {
199
+ 'total_calls': len(recent_calls),
200
+ 'successful_calls': len(successful_calls),
201
+ 'total_cost': total_cost,
202
+ 'total_tokens': total_tokens,
203
+ 'avg_cost_per_call': total_cost / len(successful_calls) if successful_calls else 0.0,
204
+ 'model_usage': model_usage,
205
+ 'success_rate': len(successful_calls) / len(recent_calls) if recent_calls else 1.0,
206
+ 'daily_average_cost': total_cost / days,
207
+ }
208
+
209
+ def suggest_cost_optimizations(self) -> List[str]:
210
+ """Suggest ways to optimize costs based on usage patterns.
211
+
212
+ Returns:
213
+ List of optimization suggestions
214
+ """
215
+ suggestions = []
216
+ stats = self.get_usage_stats(days=7)
217
+
218
+ if stats['total_calls'] == 0:
219
+ return suggestions
220
+
221
+ # Check if expensive models are overused
222
+ model_usage = stats['model_usage']
223
+ total_cost = stats['total_cost']
224
+
225
+ expensive_models = ['anthropic/claude-3-opus', 'openai/gpt-4']
226
+ expensive_usage = sum(
227
+ model_usage.get(model, {}).get('cost', 0)
228
+ for model in expensive_models
229
+ )
230
+
231
+ if expensive_usage > total_cost * 0.3:
232
+ suggestions.append(
233
+ "Consider using cheaper models (Claude Haiku, GPT-3.5) for routine classification"
234
+ )
235
+
236
+ # Check for free model opportunities
237
+ free_usage = model_usage.get('meta-llama/llama-3.1-8b-instruct:free', {}).get('calls', 0)
238
+ if free_usage < stats['total_calls'] * 0.5:
239
+ suggestions.append(
240
+ "Increase usage of free Llama models for simple classification tasks"
241
+ )
242
+
243
+ # Check daily spend
244
+ if self.get_daily_spend() > self.daily_budget * 0.8:
245
+ suggestions.append(
246
+ "Approaching daily budget limit - consider increasing NLP confidence threshold"
247
+ )
248
+
249
+ # Check batch efficiency
250
+ avg_batch_size = sum(
251
+ call.batch_size for call in self.calls[-50:] # Last 50 calls
252
+ ) / min(50, len(self.calls))
253
+
254
+ if avg_batch_size < 3:
255
+ suggestions.append(
256
+ "Increase batch size for LLM calls to improve cost efficiency"
257
+ )
258
+
259
+ return suggestions
260
+
261
+ def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
262
+ """Calculate estimated cost for an API call.
263
+
264
+ Args:
265
+ model: Model name
266
+ input_tokens: Number of input tokens
267
+ output_tokens: Number of output tokens
268
+
269
+ Returns:
270
+ Estimated cost in USD
271
+ """
272
+ if model not in self.MODEL_PRICING:
273
+ # Default to moderate pricing for unknown models
274
+ input_price = 1.0
275
+ output_price = 3.0
276
+ self.logger.warning(f"Unknown model pricing for {model}, using default rates")
277
+ else:
278
+ pricing = self.MODEL_PRICING[model]
279
+ input_price = pricing['input']
280
+ output_price = pricing['output']
281
+
282
+ # Calculate cost (pricing is per 1M tokens)
283
+ input_cost = (input_tokens / 1_000_000) * input_price
284
+ output_cost = (output_tokens / 1_000_000) * output_price
285
+
286
+ return input_cost + output_cost
287
+
288
+ def _load_cost_data(self) -> None:
289
+ """Load cost tracking data from file."""
290
+ if not self.cost_file.exists():
291
+ return
292
+
293
+ try:
294
+ with open(self.cost_file, 'r') as f:
295
+ data = json.load(f)
296
+
297
+ self.calls = []
298
+ for call_data in data.get('calls', []):
299
+ call = LLMCall(
300
+ timestamp=datetime.fromisoformat(call_data['timestamp']),
301
+ model=call_data['model'],
302
+ input_tokens=call_data['input_tokens'],
303
+ output_tokens=call_data['output_tokens'],
304
+ processing_time_ms=call_data['processing_time_ms'],
305
+ estimated_cost=call_data['estimated_cost'],
306
+ batch_size=call_data.get('batch_size', 1),
307
+ success=call_data.get('success', True),
308
+ error_message=call_data.get('error_message')
309
+ )
310
+ self.calls.append(call)
311
+
312
+ except Exception as e:
313
+ self.logger.error(f"Failed to load cost data: {e}")
314
+ self.calls = []
315
+
316
+ def _save_cost_data(self) -> None:
317
+ """Save cost tracking data to file."""
318
+ try:
319
+ # Keep only last 1000 calls to prevent file from growing too large
320
+ recent_calls = self.calls[-1000:]
321
+
322
+ data = {
323
+ 'calls': [
324
+ {
325
+ 'timestamp': call.timestamp.isoformat(),
326
+ 'model': call.model,
327
+ 'input_tokens': call.input_tokens,
328
+ 'output_tokens': call.output_tokens,
329
+ 'processing_time_ms': call.processing_time_ms,
330
+ 'estimated_cost': call.estimated_cost,
331
+ 'batch_size': call.batch_size,
332
+ 'success': call.success,
333
+ 'error_message': call.error_message
334
+ }
335
+ for call in recent_calls
336
+ ]
337
+ }
338
+
339
+ with open(self.cost_file, 'w') as f:
340
+ json.dump(data, f, indent=2)
341
+
342
+ except Exception as e:
343
+ self.logger.error(f"Failed to save cost data: {e}")
@@ -0,0 +1,347 @@
1
+ """Performance and accuracy metrics for qualitative analysis."""
2
+
3
+ import logging
4
+ import time
5
+ from collections import defaultdict, deque
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timedelta
8
+ from typing import Dict, List, Optional, Tuple
9
+ import statistics
10
+
11
+
12
+ @dataclass
13
+ class ProcessingMetrics:
14
+ """Metrics for a single processing operation."""
15
+
16
+ operation: str
17
+ processing_time_ms: float
18
+ items_processed: int
19
+ confidence_score: float
20
+ method_used: str # 'nlp' or 'llm'
21
+ timestamp: datetime
22
+
23
+ @property
24
+ def items_per_second(self) -> float:
25
+ """Calculate processing rate."""
26
+ if self.processing_time_ms <= 0:
27
+ return 0.0
28
+ return (self.items_processed * 1000) / self.processing_time_ms
29
+
30
+
31
+ class PerformanceMetrics:
32
+ """Track and analyze performance metrics for qualitative analysis.
33
+
34
+ This class provides comprehensive performance monitoring including
35
+ processing times, accuracy metrics, cost tracking, and system health
36
+ indicators for the qualitative analysis pipeline.
37
+ """
38
+
39
+ def __init__(self, max_history: int = 10000):
40
+ """Initialize performance metrics tracker.
41
+
42
+ Args:
43
+ max_history: Maximum number of metrics to keep in memory
44
+ """
45
+ self.max_history = max_history
46
+ self.logger = logging.getLogger(__name__)
47
+
48
+ # Processing metrics
49
+ self.processing_metrics: deque[ProcessingMetrics] = deque(maxlen=max_history)
50
+
51
+ # Method usage tracking
52
+ self.method_usage = defaultdict(int)
53
+ self.method_performance = defaultdict(list)
54
+
55
+ # Confidence tracking
56
+ self.confidence_history = deque(maxlen=max_history)
57
+
58
+ # Error tracking
59
+ self.error_counts = defaultdict(int)
60
+ self.error_history = deque(maxlen=1000)
61
+
62
+ # Cache performance
63
+ self.cache_hits = 0
64
+ self.cache_misses = 0
65
+
66
+ # Quality metrics
67
+ self.classification_accuracy = deque(maxlen=1000)
68
+
69
+ def record_processing(self, operation: str, processing_time_ms: float,
70
+ items_processed: int, confidence_score: float,
71
+ method_used: str) -> None:
72
+ """Record a processing operation.
73
+
74
+ Args:
75
+ operation: Type of operation (e.g., 'classification', 'analysis')
76
+ processing_time_ms: Processing time in milliseconds
77
+ items_processed: Number of items processed
78
+ confidence_score: Average confidence score
79
+ method_used: Method used ('nlp' or 'llm')
80
+ """
81
+ metric = ProcessingMetrics(
82
+ operation=operation,
83
+ processing_time_ms=processing_time_ms,
84
+ items_processed=items_processed,
85
+ confidence_score=confidence_score,
86
+ method_used=method_used,
87
+ timestamp=datetime.utcnow()
88
+ )
89
+
90
+ self.processing_metrics.append(metric)
91
+ self.method_usage[method_used] += items_processed
92
+ self.method_performance[method_used].append(processing_time_ms / items_processed)
93
+ self.confidence_history.append(confidence_score)
94
+
95
+ def record_cache_hit(self) -> None:
96
+ """Record a cache hit."""
97
+ self.cache_hits += 1
98
+
99
+ def record_cache_miss(self) -> None:
100
+ """Record a cache miss."""
101
+ self.cache_misses += 1
102
+
103
+ def record_error(self, error_type: str, error_message: str) -> None:
104
+ """Record an error occurrence.
105
+
106
+ Args:
107
+ error_type: Type of error
108
+ error_message: Error message
109
+ """
110
+ self.error_counts[error_type] += 1
111
+ self.error_history.append({
112
+ 'type': error_type,
113
+ 'message': error_message,
114
+ 'timestamp': datetime.utcnow()
115
+ })
116
+
117
+ def record_classification_accuracy(self, accuracy: float) -> None:
118
+ """Record classification accuracy measurement.
119
+
120
+ Args:
121
+ accuracy: Accuracy score (0.0 to 1.0)
122
+ """
123
+ self.classification_accuracy.append(accuracy)
124
+
125
+ def get_processing_stats(self, hours: int = 24) -> Dict[str, any]:
126
+ """Get processing statistics for the last N hours.
127
+
128
+ Args:
129
+ hours: Number of hours to analyze
130
+
131
+ Returns:
132
+ Dictionary with processing statistics
133
+ """
134
+ cutoff_time = datetime.utcnow() - timedelta(hours=hours)
135
+ recent_metrics = [
136
+ m for m in self.processing_metrics
137
+ if m.timestamp >= cutoff_time
138
+ ]
139
+
140
+ if not recent_metrics:
141
+ return {
142
+ 'total_operations': 0,
143
+ 'total_items_processed': 0,
144
+ 'avg_processing_time_ms': 0.0,
145
+ 'avg_items_per_second': 0.0,
146
+ 'avg_confidence': 0.0,
147
+ 'method_breakdown': {},
148
+ 'cache_hit_rate': 0.0
149
+ }
150
+
151
+ # Calculate statistics
152
+ total_items = sum(m.items_processed for m in recent_metrics)
153
+ total_time = sum(m.processing_time_ms for m in recent_metrics)
154
+
155
+ avg_processing_time = total_time / len(recent_metrics)
156
+ avg_items_per_second = statistics.mean([m.items_per_second for m in recent_metrics])
157
+ avg_confidence = statistics.mean([m.confidence_score for m in recent_metrics])
158
+
159
+ # Method breakdown
160
+ method_breakdown = {}
161
+ for method in ['nlp', 'llm']:
162
+ method_metrics = [m for m in recent_metrics if m.method_used == method]
163
+ if method_metrics:
164
+ method_items = sum(m.items_processed for m in method_metrics)
165
+ method_breakdown[method] = {
166
+ 'items_processed': method_items,
167
+ 'percentage': (method_items / total_items) * 100 if total_items > 0 else 0,
168
+ 'avg_confidence': statistics.mean([m.confidence_score for m in method_metrics]),
169
+ 'avg_processing_time_ms': statistics.mean([m.processing_time_ms for m in method_metrics])
170
+ }
171
+
172
+ # Cache hit rate
173
+ total_cache_requests = self.cache_hits + self.cache_misses
174
+ cache_hit_rate = (self.cache_hits / total_cache_requests) if total_cache_requests > 0 else 0.0
175
+
176
+ return {
177
+ 'total_operations': len(recent_metrics),
178
+ 'total_items_processed': total_items,
179
+ 'avg_processing_time_ms': avg_processing_time,
180
+ 'avg_items_per_second': avg_items_per_second,
181
+ 'avg_confidence': avg_confidence,
182
+ 'method_breakdown': method_breakdown,
183
+ 'cache_hit_rate': cache_hit_rate,
184
+ 'time_period_hours': hours
185
+ }
186
+
187
+ def get_quality_metrics(self) -> Dict[str, any]:
188
+ """Get quality and accuracy metrics.
189
+
190
+ Returns:
191
+ Dictionary with quality metrics
192
+ """
193
+ if not self.confidence_history:
194
+ return {
195
+ 'avg_confidence': 0.0,
196
+ 'confidence_distribution': {},
197
+ 'classification_accuracy': 0.0,
198
+ 'quality_trend': 'stable'
199
+ }
200
+
201
+ # Confidence statistics
202
+ confidences = list(self.confidence_history)
203
+ avg_confidence = statistics.mean(confidences)
204
+
205
+ # Confidence distribution
206
+ confidence_buckets = {
207
+ 'high (>0.8)': sum(1 for c in confidences if c > 0.8),
208
+ 'medium (0.6-0.8)': sum(1 for c in confidences if 0.6 <= c <= 0.8),
209
+ 'low (<0.6)': sum(1 for c in confidences if c < 0.6)
210
+ }
211
+
212
+ # Quality trend (comparing recent vs. older metrics)
213
+ if len(confidences) >= 100:
214
+ recent_confidence = statistics.mean(confidences[-50:])
215
+ older_confidence = statistics.mean(confidences[-100:-50])
216
+
217
+ if recent_confidence > older_confidence + 0.05:
218
+ quality_trend = 'improving'
219
+ elif recent_confidence < older_confidence - 0.05:
220
+ quality_trend = 'declining'
221
+ else:
222
+ quality_trend = 'stable'
223
+ else:
224
+ quality_trend = 'insufficient_data'
225
+
226
+ # Classification accuracy
227
+ avg_accuracy = statistics.mean(self.classification_accuracy) if self.classification_accuracy else 0.0
228
+
229
+ return {
230
+ 'avg_confidence': avg_confidence,
231
+ 'confidence_distribution': confidence_buckets,
232
+ 'classification_accuracy': avg_accuracy,
233
+ 'quality_trend': quality_trend,
234
+ 'total_samples': len(confidences)
235
+ }
236
+
237
+ def get_error_analysis(self) -> Dict[str, any]:
238
+ """Get error analysis and system health metrics.
239
+
240
+ Returns:
241
+ Dictionary with error analysis
242
+ """
243
+ # Recent errors (last 24 hours)
244
+ cutoff_time = datetime.utcnow() - timedelta(hours=24)
245
+ recent_errors = [
246
+ error for error in self.error_history
247
+ if error['timestamp'] >= cutoff_time
248
+ ]
249
+
250
+ # Error type breakdown
251
+ error_type_counts = defaultdict(int)
252
+ for error in recent_errors:
253
+ error_type_counts[error['type']] += 1
254
+
255
+ # Total operations for error rate calculation
256
+ total_operations = len([
257
+ m for m in self.processing_metrics
258
+ if m.timestamp >= cutoff_time
259
+ ])
260
+
261
+ error_rate = len(recent_errors) / total_operations if total_operations > 0 else 0.0
262
+
263
+ return {
264
+ 'total_errors_24h': len(recent_errors),
265
+ 'error_rate': error_rate,
266
+ 'error_types': dict(error_type_counts),
267
+ 'most_common_error': max(error_type_counts.keys(), key=error_type_counts.get) if error_type_counts else None,
268
+ 'system_health': 'healthy' if error_rate < 0.01 else 'degraded' if error_rate < 0.05 else 'unhealthy'
269
+ }
270
+
271
+ def get_performance_alerts(self) -> List[str]:
272
+ """Get performance alerts and recommendations.
273
+
274
+ Returns:
275
+ List of alert messages
276
+ """
277
+ alerts = []
278
+
279
+ # Check recent performance
280
+ stats = self.get_processing_stats(hours=1)
281
+ quality = self.get_quality_metrics()
282
+ errors = self.get_error_analysis()
283
+
284
+ # Processing speed alerts
285
+ if stats['avg_items_per_second'] < 50: # Less than 50 items/second
286
+ alerts.append("Processing speed below target (< 50 items/second)")
287
+
288
+ # Confidence alerts
289
+ if quality['avg_confidence'] < 0.6:
290
+ alerts.append("Average confidence below threshold (< 0.6)")
291
+
292
+ # Method balance alerts
293
+ if 'llm' in stats['method_breakdown']:
294
+ llm_percentage = stats['method_breakdown']['llm']['percentage']
295
+ if llm_percentage > 20: # More than 20% using LLM
296
+ alerts.append(f"High LLM usage ({llm_percentage:.1f}%) - consider tuning NLP thresholds")
297
+
298
+ # Error rate alerts
299
+ if errors['error_rate'] > 0.05: # More than 5% error rate
300
+ alerts.append(f"High error rate ({errors['error_rate']:.1%})")
301
+
302
+ # Cache performance alerts
303
+ if stats['cache_hit_rate'] < 0.3: # Less than 30% cache hit rate
304
+ alerts.append("Low cache hit rate - pattern learning may be ineffective")
305
+
306
+ # Quality trend alerts
307
+ if quality['quality_trend'] == 'declining':
308
+ alerts.append("Quality trend declining - review recent changes")
309
+
310
+ return alerts
311
+
312
+ def get_optimization_suggestions(self) -> List[str]:
313
+ """Get optimization suggestions based on metrics.
314
+
315
+ Returns:
316
+ List of optimization suggestions
317
+ """
318
+ suggestions = []
319
+
320
+ stats = self.get_processing_stats(hours=24)
321
+ quality = self.get_quality_metrics()
322
+
323
+ # Performance optimizations
324
+ if stats['avg_items_per_second'] < 100:
325
+ suggestions.append("Consider increasing batch size or enabling parallel processing")
326
+
327
+ # Method optimization
328
+ method_breakdown = stats['method_breakdown']
329
+ if 'llm' in method_breakdown and method_breakdown['llm']['percentage'] > 15:
330
+ suggestions.append("High LLM usage - consider lowering confidence threshold or improving NLP patterns")
331
+
332
+ if 'nlp' in method_breakdown and method_breakdown['nlp']['avg_confidence'] < 0.7:
333
+ suggestions.append("NLP confidence low - consider updating classification patterns")
334
+
335
+ # Quality optimizations
336
+ if quality['avg_confidence'] < 0.7:
337
+ suggestions.append("Overall confidence low - review classification accuracy and update models")
338
+
339
+ confidence_dist = quality['confidence_distribution']
340
+ if confidence_dist.get('low (<0.6)', 0) > confidence_dist.get('high (>0.8)', 0):
341
+ suggestions.append("Many low-confidence predictions - consider additional training data")
342
+
343
+ # Cache optimizations
344
+ if stats['cache_hit_rate'] < 0.5:
345
+ suggestions.append("Low cache hit rate - increase cache size or improve pattern matching")
346
+
347
+ return suggestions