gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,345 @@
1
+ """Cost tracking utilities for LLM usage monitoring."""
2
+
3
+ import json
4
+ import logging
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timedelta
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ @dataclass
12
+ class LLMCall:
13
+ """Record of a single LLM API call."""
14
+
15
+ timestamp: datetime
16
+ model: str
17
+ input_tokens: int
18
+ output_tokens: int
19
+ processing_time_ms: float
20
+ estimated_cost: float
21
+ batch_size: int = 1
22
+ success: bool = True
23
+ error_message: Optional[str] = None
24
+
25
+
26
+ class CostTracker:
27
+ """Track and manage LLM API usage costs.
28
+
29
+ This class provides cost monitoring, budgeting, and optimization
30
+ features to keep LLM usage within acceptable limits while
31
+ maintaining analysis quality.
32
+ """
33
+
34
+ # OpenRouter pricing (approximate, in USD per 1M tokens)
35
+ MODEL_PRICING = {
36
+ # Anthropic models
37
+ "anthropic/claude-3-haiku": {"input": 0.25, "output": 1.25},
38
+ "anthropic/claude-3-sonnet": {"input": 3.0, "output": 15.0},
39
+ "anthropic/claude-3-opus": {"input": 15.0, "output": 75.0},
40
+ # OpenAI models
41
+ "openai/gpt-3.5-turbo": {"input": 0.5, "output": 1.5},
42
+ "openai/gpt-4": {"input": 30.0, "output": 60.0},
43
+ "openai/gpt-4-turbo": {"input": 10.0, "output": 30.0},
44
+ # Free models (Llama)
45
+ "meta-llama/llama-3.1-8b-instruct:free": {"input": 0.0, "output": 0.0},
46
+ "meta-llama/llama-3.1-70b-instruct:free": {"input": 0.0, "output": 0.0},
47
+ # Other popular models
48
+ "google/gemini-pro": {"input": 0.5, "output": 1.5},
49
+ "mistralai/mixtral-8x7b-instruct": {"input": 0.27, "output": 0.27},
50
+ }
51
+
52
+ def __init__(self, cache_dir: Optional[Path] = None, daily_budget: float = 5.0):
53
+ """Initialize cost tracker.
54
+
55
+ Args:
56
+ cache_dir: Directory to store cost tracking data
57
+ daily_budget: Maximum daily spending in USD
58
+ """
59
+ self.daily_budget = daily_budget
60
+ self.cache_dir = cache_dir or Path(".qualitative_cache")
61
+ self.cache_dir.mkdir(exist_ok=True)
62
+
63
+ self.cost_file = self.cache_dir / "llm_costs.json"
64
+ self.calls: list[LLMCall] = []
65
+ self.logger = logging.getLogger(__name__)
66
+
67
+ # Load existing cost data
68
+ self._load_cost_data()
69
+
70
+ def record_call(
71
+ self,
72
+ model: str,
73
+ input_tokens: int,
74
+ output_tokens: int,
75
+ processing_time: float,
76
+ batch_size: int = 1,
77
+ success: bool = True,
78
+ error_message: Optional[str] = None,
79
+ ) -> float:
80
+ """Record an LLM API call and return estimated cost.
81
+
82
+ Args:
83
+ model: Model name used
84
+ input_tokens: Number of input tokens
85
+ output_tokens: Number of output tokens
86
+ processing_time: Processing time in seconds
87
+ batch_size: Number of commits processed in this call
88
+ success: Whether the call was successful
89
+ error_message: Error message if call failed
90
+
91
+ Returns:
92
+ Estimated cost in USD
93
+ """
94
+ estimated_cost = self._calculate_cost(model, input_tokens, output_tokens)
95
+
96
+ call = LLMCall(
97
+ timestamp=datetime.utcnow(),
98
+ model=model,
99
+ input_tokens=input_tokens,
100
+ output_tokens=output_tokens,
101
+ processing_time_ms=processing_time * 1000,
102
+ estimated_cost=estimated_cost,
103
+ batch_size=batch_size,
104
+ success=success,
105
+ error_message=error_message,
106
+ )
107
+
108
+ self.calls.append(call)
109
+ self._save_cost_data()
110
+
111
+ # Log cost information
112
+ self.logger.info(
113
+ f"LLM call: {model} | tokens: {input_tokens}+{output_tokens} | "
114
+ f"cost: ${estimated_cost:.4f} | batch: {batch_size}"
115
+ )
116
+
117
+ return estimated_cost
118
+
119
+ def get_daily_spend(self, date: Optional[datetime] = None) -> float:
120
+ """Get total spending for a specific date.
121
+
122
+ Args:
123
+ date: Date to check (defaults to today)
124
+
125
+ Returns:
126
+ Total spending in USD for the date
127
+ """
128
+ if date is None:
129
+ date = datetime.utcnow()
130
+
131
+ start_of_day = date.replace(hour=0, minute=0, second=0, microsecond=0)
132
+ end_of_day = start_of_day + timedelta(days=1)
133
+
134
+ daily_spend = sum(
135
+ call.estimated_cost
136
+ for call in self.calls
137
+ if start_of_day <= call.timestamp < end_of_day and call.success
138
+ )
139
+
140
+ return daily_spend
141
+
142
+ def check_budget_remaining(self) -> float:
143
+ """Check remaining budget for today.
144
+
145
+ Returns:
146
+ Remaining budget in USD (negative if over budget)
147
+ """
148
+ daily_spend = self.get_daily_spend()
149
+ return self.daily_budget - daily_spend
150
+
151
+ def can_afford_call(self, model: str, estimated_tokens: int) -> bool:
152
+ """Check if we can afford an API call within budget.
153
+
154
+ Args:
155
+ model: Model to use
156
+ estimated_tokens: Estimated total tokens (input + output)
157
+
158
+ Returns:
159
+ True if call is within budget
160
+ """
161
+ estimated_cost = self._calculate_cost(model, estimated_tokens // 2, estimated_tokens // 2)
162
+ remaining_budget = self.check_budget_remaining()
163
+
164
+ return remaining_budget >= estimated_cost
165
+
166
+ def get_usage_stats(self, days: int = 7) -> dict[str, any]:
167
+ """Get usage statistics for the last N days.
168
+
169
+ Args:
170
+ days: Number of days to analyze
171
+
172
+ Returns:
173
+ Dictionary with usage statistics
174
+ """
175
+ cutoff_date = datetime.utcnow() - timedelta(days=days)
176
+ recent_calls = [call for call in self.calls if call.timestamp >= cutoff_date]
177
+
178
+ if not recent_calls:
179
+ return {
180
+ "total_calls": 0,
181
+ "total_cost": 0.0,
182
+ "total_tokens": 0,
183
+ "avg_cost_per_call": 0.0,
184
+ "model_usage": {},
185
+ "success_rate": 1.0,
186
+ }
187
+
188
+ successful_calls = [call for call in recent_calls if call.success]
189
+
190
+ # Calculate statistics
191
+ total_cost = sum(call.estimated_cost for call in successful_calls)
192
+ total_tokens = sum(call.input_tokens + call.output_tokens for call in recent_calls)
193
+
194
+ # Model usage breakdown
195
+ model_usage = {}
196
+ for call in recent_calls:
197
+ if call.model not in model_usage:
198
+ model_usage[call.model] = {"calls": 0, "cost": 0.0, "tokens": 0}
199
+ model_usage[call.model]["calls"] += 1
200
+ model_usage[call.model]["cost"] += call.estimated_cost
201
+ model_usage[call.model]["tokens"] += call.input_tokens + call.output_tokens
202
+
203
+ return {
204
+ "total_calls": len(recent_calls),
205
+ "successful_calls": len(successful_calls),
206
+ "total_cost": total_cost,
207
+ "total_tokens": total_tokens,
208
+ "avg_cost_per_call": total_cost / len(successful_calls) if successful_calls else 0.0,
209
+ "model_usage": model_usage,
210
+ "success_rate": len(successful_calls) / len(recent_calls) if recent_calls else 1.0,
211
+ "daily_average_cost": total_cost / days,
212
+ }
213
+
214
+ def suggest_cost_optimizations(self) -> list[str]:
215
+ """Suggest ways to optimize costs based on usage patterns.
216
+
217
+ Returns:
218
+ List of optimization suggestions
219
+ """
220
+ suggestions = []
221
+ stats = self.get_usage_stats(days=7)
222
+
223
+ if stats["total_calls"] == 0:
224
+ return suggestions
225
+
226
+ # Check if expensive models are overused
227
+ model_usage = stats["model_usage"]
228
+ total_cost = stats["total_cost"]
229
+
230
+ expensive_models = ["anthropic/claude-3-opus", "openai/gpt-4"]
231
+ expensive_usage = sum(
232
+ model_usage.get(model, {}).get("cost", 0) for model in expensive_models
233
+ )
234
+
235
+ if expensive_usage > total_cost * 0.3:
236
+ suggestions.append(
237
+ "Consider using cheaper models (Claude Haiku, GPT-3.5) for routine classification"
238
+ )
239
+
240
+ # Check for free model opportunities
241
+ free_usage = model_usage.get("meta-llama/llama-3.1-8b-instruct:free", {}).get("calls", 0)
242
+ if free_usage < stats["total_calls"] * 0.5:
243
+ suggestions.append(
244
+ "Increase usage of free Llama models for simple classification tasks"
245
+ )
246
+
247
+ # Check daily spend
248
+ if self.get_daily_spend() > self.daily_budget * 0.8:
249
+ suggestions.append(
250
+ "Approaching daily budget limit - consider increasing NLP confidence threshold"
251
+ )
252
+
253
+ # Check batch efficiency
254
+ avg_batch_size = sum(call.batch_size for call in self.calls[-50:]) / min( # Last 50 calls
255
+ 50, len(self.calls)
256
+ )
257
+
258
+ if avg_batch_size < 3:
259
+ suggestions.append("Increase batch size for LLM calls to improve cost efficiency")
260
+
261
+ return suggestions
262
+
263
+ def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
264
+ """Calculate estimated cost for an API call.
265
+
266
+ Args:
267
+ model: Model name
268
+ input_tokens: Number of input tokens
269
+ output_tokens: Number of output tokens
270
+
271
+ Returns:
272
+ Estimated cost in USD
273
+ """
274
+ if model not in self.MODEL_PRICING:
275
+ # Default to moderate pricing for unknown models
276
+ input_price = 1.0
277
+ output_price = 3.0
278
+ self.logger.warning(f"Unknown model pricing for {model}, using default rates")
279
+ else:
280
+ pricing = self.MODEL_PRICING[model]
281
+ input_price = pricing["input"]
282
+ output_price = pricing["output"]
283
+
284
+ # Calculate cost (pricing is per 1M tokens)
285
+ input_cost = (input_tokens / 1_000_000) * input_price
286
+ output_cost = (output_tokens / 1_000_000) * output_price
287
+
288
+ return input_cost + output_cost
289
+
290
+ def _load_cost_data(self) -> None:
291
+ """Load cost tracking data from file."""
292
+ if not self.cost_file.exists():
293
+ return
294
+
295
+ try:
296
+ with open(self.cost_file) as f:
297
+ data = json.load(f)
298
+
299
+ self.calls = []
300
+ for call_data in data.get("calls", []):
301
+ call = LLMCall(
302
+ timestamp=datetime.fromisoformat(call_data["timestamp"]),
303
+ model=call_data["model"],
304
+ input_tokens=call_data["input_tokens"],
305
+ output_tokens=call_data["output_tokens"],
306
+ processing_time_ms=call_data["processing_time_ms"],
307
+ estimated_cost=call_data["estimated_cost"],
308
+ batch_size=call_data.get("batch_size", 1),
309
+ success=call_data.get("success", True),
310
+ error_message=call_data.get("error_message"),
311
+ )
312
+ self.calls.append(call)
313
+
314
+ except Exception as e:
315
+ self.logger.error(f"Failed to load cost data: {e}")
316
+ self.calls = []
317
+
318
+ def _save_cost_data(self) -> None:
319
+ """Save cost tracking data to file."""
320
+ try:
321
+ # Keep only last 1000 calls to prevent file from growing too large
322
+ recent_calls = self.calls[-1000:]
323
+
324
+ data = {
325
+ "calls": [
326
+ {
327
+ "timestamp": call.timestamp.isoformat(),
328
+ "model": call.model,
329
+ "input_tokens": call.input_tokens,
330
+ "output_tokens": call.output_tokens,
331
+ "processing_time_ms": call.processing_time_ms,
332
+ "estimated_cost": call.estimated_cost,
333
+ "batch_size": call.batch_size,
334
+ "success": call.success,
335
+ "error_message": call.error_message,
336
+ }
337
+ for call in recent_calls
338
+ ]
339
+ }
340
+
341
+ with open(self.cost_file, "w") as f:
342
+ json.dump(data, f, indent=2)
343
+
344
+ except Exception as e:
345
+ self.logger.error(f"Failed to save cost data: {e}")
@@ -0,0 +1,361 @@
1
+ """Performance and accuracy metrics for qualitative analysis."""
2
+
3
+ import logging
4
+ import statistics
5
+ from collections import defaultdict, deque
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timedelta
8
+
9
+
10
+ @dataclass
11
+ class ProcessingMetrics:
12
+ """Metrics for a single processing operation."""
13
+
14
+ operation: str
15
+ processing_time_ms: float
16
+ items_processed: int
17
+ confidence_score: float
18
+ method_used: str # 'nlp' or 'llm'
19
+ timestamp: datetime
20
+
21
+ @property
22
+ def items_per_second(self) -> float:
23
+ """Calculate processing rate."""
24
+ if self.processing_time_ms <= 0:
25
+ return 0.0
26
+ return (self.items_processed * 1000) / self.processing_time_ms
27
+
28
+
29
+ class PerformanceMetrics:
30
+ """Track and analyze performance metrics for qualitative analysis.
31
+
32
+ This class provides comprehensive performance monitoring including
33
+ processing times, accuracy metrics, cost tracking, and system health
34
+ indicators for the qualitative analysis pipeline.
35
+ """
36
+
37
+ def __init__(self, max_history: int = 10000):
38
+ """Initialize performance metrics tracker.
39
+
40
+ Args:
41
+ max_history: Maximum number of metrics to keep in memory
42
+ """
43
+ self.max_history = max_history
44
+ self.logger = logging.getLogger(__name__)
45
+
46
+ # Processing metrics
47
+ self.processing_metrics: deque[ProcessingMetrics] = deque(maxlen=max_history)
48
+
49
+ # Method usage tracking
50
+ self.method_usage = defaultdict(int)
51
+ self.method_performance = defaultdict(list)
52
+
53
+ # Confidence tracking
54
+ self.confidence_history = deque(maxlen=max_history)
55
+
56
+ # Error tracking
57
+ self.error_counts = defaultdict(int)
58
+ self.error_history = deque(maxlen=1000)
59
+
60
+ # Cache performance
61
+ self.cache_hits = 0
62
+ self.cache_misses = 0
63
+
64
+ # Quality metrics
65
+ self.classification_accuracy = deque(maxlen=1000)
66
+
67
+ def record_processing(
68
+ self,
69
+ operation: str,
70
+ processing_time_ms: float,
71
+ items_processed: int,
72
+ confidence_score: float,
73
+ method_used: str,
74
+ ) -> None:
75
+ """Record a processing operation.
76
+
77
+ Args:
78
+ operation: Type of operation (e.g., 'classification', 'analysis')
79
+ processing_time_ms: Processing time in milliseconds
80
+ items_processed: Number of items processed
81
+ confidence_score: Average confidence score
82
+ method_used: Method used ('nlp' or 'llm')
83
+ """
84
+ metric = ProcessingMetrics(
85
+ operation=operation,
86
+ processing_time_ms=processing_time_ms,
87
+ items_processed=items_processed,
88
+ confidence_score=confidence_score,
89
+ method_used=method_used,
90
+ timestamp=datetime.utcnow(),
91
+ )
92
+
93
+ self.processing_metrics.append(metric)
94
+ self.method_usage[method_used] += items_processed
95
+ self.method_performance[method_used].append(processing_time_ms / items_processed)
96
+ self.confidence_history.append(confidence_score)
97
+
98
+ def record_cache_hit(self) -> None:
99
+ """Record a cache hit."""
100
+ self.cache_hits += 1
101
+
102
+ def record_cache_miss(self) -> None:
103
+ """Record a cache miss."""
104
+ self.cache_misses += 1
105
+
106
+ def record_error(self, error_type: str, error_message: str) -> None:
107
+ """Record an error occurrence.
108
+
109
+ Args:
110
+ error_type: Type of error
111
+ error_message: Error message
112
+ """
113
+ self.error_counts[error_type] += 1
114
+ self.error_history.append(
115
+ {"type": error_type, "message": error_message, "timestamp": datetime.utcnow()}
116
+ )
117
+
118
+ def record_classification_accuracy(self, accuracy: float) -> None:
119
+ """Record classification accuracy measurement.
120
+
121
+ Args:
122
+ accuracy: Accuracy score (0.0 to 1.0)
123
+ """
124
+ self.classification_accuracy.append(accuracy)
125
+
126
+ def get_processing_stats(self, hours: int = 24) -> dict[str, any]:
127
+ """Get processing statistics for the last N hours.
128
+
129
+ Args:
130
+ hours: Number of hours to analyze
131
+
132
+ Returns:
133
+ Dictionary with processing statistics
134
+ """
135
+ cutoff_time = datetime.utcnow() - timedelta(hours=hours)
136
+ recent_metrics = [m for m in self.processing_metrics if m.timestamp >= cutoff_time]
137
+
138
+ if not recent_metrics:
139
+ return {
140
+ "total_operations": 0,
141
+ "total_items_processed": 0,
142
+ "avg_processing_time_ms": 0.0,
143
+ "avg_items_per_second": 0.0,
144
+ "avg_confidence": 0.0,
145
+ "method_breakdown": {},
146
+ "cache_hit_rate": 0.0,
147
+ }
148
+
149
+ # Calculate statistics
150
+ total_items = sum(m.items_processed for m in recent_metrics)
151
+ total_time = sum(m.processing_time_ms for m in recent_metrics)
152
+
153
+ avg_processing_time = total_time / len(recent_metrics)
154
+ avg_items_per_second = statistics.mean([m.items_per_second for m in recent_metrics])
155
+ avg_confidence = statistics.mean([m.confidence_score for m in recent_metrics])
156
+
157
+ # Method breakdown
158
+ method_breakdown = {}
159
+ for method in ["nlp", "llm"]:
160
+ method_metrics = [m for m in recent_metrics if m.method_used == method]
161
+ if method_metrics:
162
+ method_items = sum(m.items_processed for m in method_metrics)
163
+ method_breakdown[method] = {
164
+ "items_processed": method_items,
165
+ "percentage": (method_items / total_items) * 100 if total_items > 0 else 0,
166
+ "avg_confidence": statistics.mean([m.confidence_score for m in method_metrics]),
167
+ "avg_processing_time_ms": statistics.mean(
168
+ [m.processing_time_ms for m in method_metrics]
169
+ ),
170
+ }
171
+
172
+ # Cache hit rate
173
+ total_cache_requests = self.cache_hits + self.cache_misses
174
+ cache_hit_rate = (
175
+ (self.cache_hits / total_cache_requests) if total_cache_requests > 0 else 0.0
176
+ )
177
+
178
+ return {
179
+ "total_operations": len(recent_metrics),
180
+ "total_items_processed": total_items,
181
+ "avg_processing_time_ms": avg_processing_time,
182
+ "avg_items_per_second": avg_items_per_second,
183
+ "avg_confidence": avg_confidence,
184
+ "method_breakdown": method_breakdown,
185
+ "cache_hit_rate": cache_hit_rate,
186
+ "time_period_hours": hours,
187
+ }
188
+
189
+ def get_quality_metrics(self) -> dict[str, any]:
190
+ """Get quality and accuracy metrics.
191
+
192
+ Returns:
193
+ Dictionary with quality metrics
194
+ """
195
+ if not self.confidence_history:
196
+ return {
197
+ "avg_confidence": 0.0,
198
+ "confidence_distribution": {},
199
+ "classification_accuracy": 0.0,
200
+ "quality_trend": "stable",
201
+ }
202
+
203
+ # Confidence statistics
204
+ confidences = list(self.confidence_history)
205
+ avg_confidence = statistics.mean(confidences)
206
+
207
+ # Confidence distribution
208
+ confidence_buckets = {
209
+ "high (>0.8)": sum(1 for c in confidences if c > 0.8),
210
+ "medium (0.6-0.8)": sum(1 for c in confidences if 0.6 <= c <= 0.8),
211
+ "low (<0.6)": sum(1 for c in confidences if c < 0.6),
212
+ }
213
+
214
+ # Quality trend (comparing recent vs. older metrics)
215
+ if len(confidences) >= 100:
216
+ recent_confidence = statistics.mean(confidences[-50:])
217
+ older_confidence = statistics.mean(confidences[-100:-50])
218
+
219
+ if recent_confidence > older_confidence + 0.05:
220
+ quality_trend = "improving"
221
+ elif recent_confidence < older_confidence - 0.05:
222
+ quality_trend = "declining"
223
+ else:
224
+ quality_trend = "stable"
225
+ else:
226
+ quality_trend = "insufficient_data"
227
+
228
+ # Classification accuracy
229
+ avg_accuracy = (
230
+ statistics.mean(self.classification_accuracy) if self.classification_accuracy else 0.0
231
+ )
232
+
233
+ return {
234
+ "avg_confidence": avg_confidence,
235
+ "confidence_distribution": confidence_buckets,
236
+ "classification_accuracy": avg_accuracy,
237
+ "quality_trend": quality_trend,
238
+ "total_samples": len(confidences),
239
+ }
240
+
241
+ def get_error_analysis(self) -> dict[str, any]:
242
+ """Get error analysis and system health metrics.
243
+
244
+ Returns:
245
+ Dictionary with error analysis
246
+ """
247
+ # Recent errors (last 24 hours)
248
+ cutoff_time = datetime.utcnow() - timedelta(hours=24)
249
+ recent_errors = [error for error in self.error_history if error["timestamp"] >= cutoff_time]
250
+
251
+ # Error type breakdown
252
+ error_type_counts = defaultdict(int)
253
+ for error in recent_errors:
254
+ error_type_counts[error["type"]] += 1
255
+
256
+ # Total operations for error rate calculation
257
+ total_operations = len([m for m in self.processing_metrics if m.timestamp >= cutoff_time])
258
+
259
+ error_rate = len(recent_errors) / total_operations if total_operations > 0 else 0.0
260
+
261
+ return {
262
+ "total_errors_24h": len(recent_errors),
263
+ "error_rate": error_rate,
264
+ "error_types": dict(error_type_counts),
265
+ "most_common_error": (
266
+ max(error_type_counts.keys(), key=error_type_counts.get)
267
+ if error_type_counts
268
+ else None
269
+ ),
270
+ "system_health": (
271
+ "healthy" if error_rate < 0.01 else "degraded" if error_rate < 0.05 else "unhealthy"
272
+ ),
273
+ }
274
+
275
+ def get_performance_alerts(self) -> list[str]:
276
+ """Get performance alerts and recommendations.
277
+
278
+ Returns:
279
+ List of alert messages
280
+ """
281
+ alerts = []
282
+
283
+ # Check recent performance
284
+ stats = self.get_processing_stats(hours=1)
285
+ quality = self.get_quality_metrics()
286
+ errors = self.get_error_analysis()
287
+
288
+ # Processing speed alerts
289
+ if stats["avg_items_per_second"] < 50: # Less than 50 items/second
290
+ alerts.append("Processing speed below target (< 50 items/second)")
291
+
292
+ # Confidence alerts
293
+ if quality["avg_confidence"] < 0.6:
294
+ alerts.append("Average confidence below threshold (< 0.6)")
295
+
296
+ # Method balance alerts
297
+ if "llm" in stats["method_breakdown"]:
298
+ llm_percentage = stats["method_breakdown"]["llm"]["percentage"]
299
+ if llm_percentage > 20: # More than 20% using LLM
300
+ alerts.append(
301
+ f"High LLM usage ({llm_percentage:.1f}%) - consider tuning NLP thresholds"
302
+ )
303
+
304
+ # Error rate alerts
305
+ if errors["error_rate"] > 0.05: # More than 5% error rate
306
+ alerts.append(f"High error rate ({errors['error_rate']:.1%})")
307
+
308
+ # Cache performance alerts
309
+ if stats["cache_hit_rate"] < 0.3: # Less than 30% cache hit rate
310
+ alerts.append("Low cache hit rate - pattern learning may be ineffective")
311
+
312
+ # Quality trend alerts
313
+ if quality["quality_trend"] == "declining":
314
+ alerts.append("Quality trend declining - review recent changes")
315
+
316
+ return alerts
317
+
318
+ def get_optimization_suggestions(self) -> list[str]:
319
+ """Get optimization suggestions based on metrics.
320
+
321
+ Returns:
322
+ List of optimization suggestions
323
+ """
324
+ suggestions = []
325
+
326
+ stats = self.get_processing_stats(hours=24)
327
+ quality = self.get_quality_metrics()
328
+
329
+ # Performance optimizations
330
+ if stats["avg_items_per_second"] < 100:
331
+ suggestions.append("Consider increasing batch size or enabling parallel processing")
332
+
333
+ # Method optimization
334
+ method_breakdown = stats["method_breakdown"]
335
+ if "llm" in method_breakdown and method_breakdown["llm"]["percentage"] > 15:
336
+ suggestions.append(
337
+ "High LLM usage - consider lowering confidence threshold or improving NLP patterns"
338
+ )
339
+
340
+ if "nlp" in method_breakdown and method_breakdown["nlp"]["avg_confidence"] < 0.7:
341
+ suggestions.append("NLP confidence low - consider updating classification patterns")
342
+
343
+ # Quality optimizations
344
+ if quality["avg_confidence"] < 0.7:
345
+ suggestions.append(
346
+ "Overall confidence low - review classification accuracy and update models"
347
+ )
348
+
349
+ confidence_dist = quality["confidence_distribution"]
350
+ if confidence_dist.get("low (<0.6)", 0) > confidence_dist.get("high (>0.8)", 0):
351
+ suggestions.append(
352
+ "Many low-confidence predictions - consider additional training data"
353
+ )
354
+
355
+ # Cache optimizations
356
+ if stats["cache_hit_rate"] < 0.5:
357
+ suggestions.append(
358
+ "Low cache hit rate - increase cache size or improve pattern matching"
359
+ )
360
+
361
+ return suggestions