gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4490 -378
- gitflow_analytics/cli_rich.py +503 -0
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -398
- gitflow_analytics/core/analyzer.py +1320 -172
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +1554 -175
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +571 -185
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +77 -59
- gitflow_analytics/extractors/tickets.py +841 -89
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +258 -87
- gitflow_analytics/integrations/jira_integration.py +572 -123
- gitflow_analytics/integrations/orchestrator.py +206 -82
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +542 -179
- gitflow_analytics/models/database.py +986 -59
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +29 -0
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
- gitflow_analytics/qualitative/core/processor.py +673 -0
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +306 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
- gitflow_analytics/qualitative/utils/metrics.py +361 -0
- gitflow_analytics/qualitative/utils/text_processing.py +285 -0
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +550 -18
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1700 -216
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2289 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +724 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
- gitflow_analytics/tui/screens/configuration_screen.py +523 -0
- gitflow_analytics/tui/screens/loading_screen.py +348 -0
- gitflow_analytics/tui/screens/main_screen.py +321 -0
- gitflow_analytics/tui/screens/results_screen.py +722 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +255 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +187 -0
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
"""Cost tracking utilities for LLM usage monitoring."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class LLMCall:
|
|
13
|
+
"""Record of a single LLM API call."""
|
|
14
|
+
|
|
15
|
+
timestamp: datetime
|
|
16
|
+
model: str
|
|
17
|
+
input_tokens: int
|
|
18
|
+
output_tokens: int
|
|
19
|
+
processing_time_ms: float
|
|
20
|
+
estimated_cost: float
|
|
21
|
+
batch_size: int = 1
|
|
22
|
+
success: bool = True
|
|
23
|
+
error_message: Optional[str] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CostTracker:
|
|
27
|
+
"""Track and manage LLM API usage costs.
|
|
28
|
+
|
|
29
|
+
This class provides cost monitoring, budgeting, and optimization
|
|
30
|
+
features to keep LLM usage within acceptable limits while
|
|
31
|
+
maintaining analysis quality.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# OpenRouter pricing (approximate, in USD per 1M tokens)
|
|
35
|
+
MODEL_PRICING = {
|
|
36
|
+
# Anthropic models
|
|
37
|
+
"anthropic/claude-3-haiku": {"input": 0.25, "output": 1.25},
|
|
38
|
+
"anthropic/claude-3-sonnet": {"input": 3.0, "output": 15.0},
|
|
39
|
+
"anthropic/claude-3-opus": {"input": 15.0, "output": 75.0},
|
|
40
|
+
# OpenAI models
|
|
41
|
+
"openai/gpt-3.5-turbo": {"input": 0.5, "output": 1.5},
|
|
42
|
+
"openai/gpt-4": {"input": 30.0, "output": 60.0},
|
|
43
|
+
"openai/gpt-4-turbo": {"input": 10.0, "output": 30.0},
|
|
44
|
+
# Free models (Llama)
|
|
45
|
+
"meta-llama/llama-3.1-8b-instruct:free": {"input": 0.0, "output": 0.0},
|
|
46
|
+
"meta-llama/llama-3.1-70b-instruct:free": {"input": 0.0, "output": 0.0},
|
|
47
|
+
# Other popular models
|
|
48
|
+
"google/gemini-pro": {"input": 0.5, "output": 1.5},
|
|
49
|
+
"mistralai/mixtral-8x7b-instruct": {"input": 0.27, "output": 0.27},
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def __init__(self, cache_dir: Optional[Path] = None, daily_budget: float = 5.0):
|
|
53
|
+
"""Initialize cost tracker.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
cache_dir: Directory to store cost tracking data
|
|
57
|
+
daily_budget: Maximum daily spending in USD
|
|
58
|
+
"""
|
|
59
|
+
self.daily_budget = daily_budget
|
|
60
|
+
self.cache_dir = cache_dir or Path(".qualitative_cache")
|
|
61
|
+
self.cache_dir.mkdir(exist_ok=True)
|
|
62
|
+
|
|
63
|
+
self.cost_file = self.cache_dir / "llm_costs.json"
|
|
64
|
+
self.calls: list[LLMCall] = []
|
|
65
|
+
self.logger = logging.getLogger(__name__)
|
|
66
|
+
|
|
67
|
+
# Load existing cost data
|
|
68
|
+
self._load_cost_data()
|
|
69
|
+
|
|
70
|
+
def record_call(
|
|
71
|
+
self,
|
|
72
|
+
model: str,
|
|
73
|
+
input_tokens: int,
|
|
74
|
+
output_tokens: int,
|
|
75
|
+
processing_time: float,
|
|
76
|
+
batch_size: int = 1,
|
|
77
|
+
success: bool = True,
|
|
78
|
+
error_message: Optional[str] = None,
|
|
79
|
+
) -> float:
|
|
80
|
+
"""Record an LLM API call and return estimated cost.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
model: Model name used
|
|
84
|
+
input_tokens: Number of input tokens
|
|
85
|
+
output_tokens: Number of output tokens
|
|
86
|
+
processing_time: Processing time in seconds
|
|
87
|
+
batch_size: Number of commits processed in this call
|
|
88
|
+
success: Whether the call was successful
|
|
89
|
+
error_message: Error message if call failed
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Estimated cost in USD
|
|
93
|
+
"""
|
|
94
|
+
estimated_cost = self._calculate_cost(model, input_tokens, output_tokens)
|
|
95
|
+
|
|
96
|
+
call = LLMCall(
|
|
97
|
+
timestamp=datetime.utcnow(),
|
|
98
|
+
model=model,
|
|
99
|
+
input_tokens=input_tokens,
|
|
100
|
+
output_tokens=output_tokens,
|
|
101
|
+
processing_time_ms=processing_time * 1000,
|
|
102
|
+
estimated_cost=estimated_cost,
|
|
103
|
+
batch_size=batch_size,
|
|
104
|
+
success=success,
|
|
105
|
+
error_message=error_message,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
self.calls.append(call)
|
|
109
|
+
self._save_cost_data()
|
|
110
|
+
|
|
111
|
+
# Log cost information
|
|
112
|
+
self.logger.info(
|
|
113
|
+
f"LLM call: {model} | tokens: {input_tokens}+{output_tokens} | "
|
|
114
|
+
f"cost: ${estimated_cost:.4f} | batch: {batch_size}"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
return estimated_cost
|
|
118
|
+
|
|
119
|
+
def get_daily_spend(self, date: Optional[datetime] = None) -> float:
|
|
120
|
+
"""Get total spending for a specific date.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
date: Date to check (defaults to today)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Total spending in USD for the date
|
|
127
|
+
"""
|
|
128
|
+
if date is None:
|
|
129
|
+
date = datetime.utcnow()
|
|
130
|
+
|
|
131
|
+
start_of_day = date.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
132
|
+
end_of_day = start_of_day + timedelta(days=1)
|
|
133
|
+
|
|
134
|
+
daily_spend = sum(
|
|
135
|
+
call.estimated_cost
|
|
136
|
+
for call in self.calls
|
|
137
|
+
if start_of_day <= call.timestamp < end_of_day and call.success
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return daily_spend
|
|
141
|
+
|
|
142
|
+
def check_budget_remaining(self) -> float:
|
|
143
|
+
"""Check remaining budget for today.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Remaining budget in USD (negative if over budget)
|
|
147
|
+
"""
|
|
148
|
+
daily_spend = self.get_daily_spend()
|
|
149
|
+
return self.daily_budget - daily_spend
|
|
150
|
+
|
|
151
|
+
def can_afford_call(self, model: str, estimated_tokens: int) -> bool:
|
|
152
|
+
"""Check if we can afford an API call within budget.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
model: Model to use
|
|
156
|
+
estimated_tokens: Estimated total tokens (input + output)
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
True if call is within budget
|
|
160
|
+
"""
|
|
161
|
+
estimated_cost = self._calculate_cost(model, estimated_tokens // 2, estimated_tokens // 2)
|
|
162
|
+
remaining_budget = self.check_budget_remaining()
|
|
163
|
+
|
|
164
|
+
return remaining_budget >= estimated_cost
|
|
165
|
+
|
|
166
|
+
def get_usage_stats(self, days: int = 7) -> dict[str, any]:
|
|
167
|
+
"""Get usage statistics for the last N days.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
days: Number of days to analyze
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Dictionary with usage statistics
|
|
174
|
+
"""
|
|
175
|
+
cutoff_date = datetime.utcnow() - timedelta(days=days)
|
|
176
|
+
recent_calls = [call for call in self.calls if call.timestamp >= cutoff_date]
|
|
177
|
+
|
|
178
|
+
if not recent_calls:
|
|
179
|
+
return {
|
|
180
|
+
"total_calls": 0,
|
|
181
|
+
"total_cost": 0.0,
|
|
182
|
+
"total_tokens": 0,
|
|
183
|
+
"avg_cost_per_call": 0.0,
|
|
184
|
+
"model_usage": {},
|
|
185
|
+
"success_rate": 1.0,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
successful_calls = [call for call in recent_calls if call.success]
|
|
189
|
+
|
|
190
|
+
# Calculate statistics
|
|
191
|
+
total_cost = sum(call.estimated_cost for call in successful_calls)
|
|
192
|
+
total_tokens = sum(call.input_tokens + call.output_tokens for call in recent_calls)
|
|
193
|
+
|
|
194
|
+
# Model usage breakdown
|
|
195
|
+
model_usage = {}
|
|
196
|
+
for call in recent_calls:
|
|
197
|
+
if call.model not in model_usage:
|
|
198
|
+
model_usage[call.model] = {"calls": 0, "cost": 0.0, "tokens": 0}
|
|
199
|
+
model_usage[call.model]["calls"] += 1
|
|
200
|
+
model_usage[call.model]["cost"] += call.estimated_cost
|
|
201
|
+
model_usage[call.model]["tokens"] += call.input_tokens + call.output_tokens
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
"total_calls": len(recent_calls),
|
|
205
|
+
"successful_calls": len(successful_calls),
|
|
206
|
+
"total_cost": total_cost,
|
|
207
|
+
"total_tokens": total_tokens,
|
|
208
|
+
"avg_cost_per_call": total_cost / len(successful_calls) if successful_calls else 0.0,
|
|
209
|
+
"model_usage": model_usage,
|
|
210
|
+
"success_rate": len(successful_calls) / len(recent_calls) if recent_calls else 1.0,
|
|
211
|
+
"daily_average_cost": total_cost / days,
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
def suggest_cost_optimizations(self) -> list[str]:
|
|
215
|
+
"""Suggest ways to optimize costs based on usage patterns.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
List of optimization suggestions
|
|
219
|
+
"""
|
|
220
|
+
suggestions = []
|
|
221
|
+
stats = self.get_usage_stats(days=7)
|
|
222
|
+
|
|
223
|
+
if stats["total_calls"] == 0:
|
|
224
|
+
return suggestions
|
|
225
|
+
|
|
226
|
+
# Check if expensive models are overused
|
|
227
|
+
model_usage = stats["model_usage"]
|
|
228
|
+
total_cost = stats["total_cost"]
|
|
229
|
+
|
|
230
|
+
expensive_models = ["anthropic/claude-3-opus", "openai/gpt-4"]
|
|
231
|
+
expensive_usage = sum(
|
|
232
|
+
model_usage.get(model, {}).get("cost", 0) for model in expensive_models
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if expensive_usage > total_cost * 0.3:
|
|
236
|
+
suggestions.append(
|
|
237
|
+
"Consider using cheaper models (Claude Haiku, GPT-3.5) for routine classification"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# Check for free model opportunities
|
|
241
|
+
free_usage = model_usage.get("meta-llama/llama-3.1-8b-instruct:free", {}).get("calls", 0)
|
|
242
|
+
if free_usage < stats["total_calls"] * 0.5:
|
|
243
|
+
suggestions.append(
|
|
244
|
+
"Increase usage of free Llama models for simple classification tasks"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Check daily spend
|
|
248
|
+
if self.get_daily_spend() > self.daily_budget * 0.8:
|
|
249
|
+
suggestions.append(
|
|
250
|
+
"Approaching daily budget limit - consider increasing NLP confidence threshold"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Check batch efficiency
|
|
254
|
+
avg_batch_size = sum(call.batch_size for call in self.calls[-50:]) / min( # Last 50 calls
|
|
255
|
+
50, len(self.calls)
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
if avg_batch_size < 3:
|
|
259
|
+
suggestions.append("Increase batch size for LLM calls to improve cost efficiency")
|
|
260
|
+
|
|
261
|
+
return suggestions
|
|
262
|
+
|
|
263
|
+
def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
|
|
264
|
+
"""Calculate estimated cost for an API call.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
model: Model name
|
|
268
|
+
input_tokens: Number of input tokens
|
|
269
|
+
output_tokens: Number of output tokens
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Estimated cost in USD
|
|
273
|
+
"""
|
|
274
|
+
if model not in self.MODEL_PRICING:
|
|
275
|
+
# Default to moderate pricing for unknown models
|
|
276
|
+
input_price = 1.0
|
|
277
|
+
output_price = 3.0
|
|
278
|
+
self.logger.warning(f"Unknown model pricing for {model}, using default rates")
|
|
279
|
+
else:
|
|
280
|
+
pricing = self.MODEL_PRICING[model]
|
|
281
|
+
input_price = pricing["input"]
|
|
282
|
+
output_price = pricing["output"]
|
|
283
|
+
|
|
284
|
+
# Calculate cost (pricing is per 1M tokens)
|
|
285
|
+
input_cost = (input_tokens / 1_000_000) * input_price
|
|
286
|
+
output_cost = (output_tokens / 1_000_000) * output_price
|
|
287
|
+
|
|
288
|
+
return input_cost + output_cost
|
|
289
|
+
|
|
290
|
+
def _load_cost_data(self) -> None:
|
|
291
|
+
"""Load cost tracking data from file."""
|
|
292
|
+
if not self.cost_file.exists():
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
with open(self.cost_file) as f:
|
|
297
|
+
data = json.load(f)
|
|
298
|
+
|
|
299
|
+
self.calls = []
|
|
300
|
+
for call_data in data.get("calls", []):
|
|
301
|
+
call = LLMCall(
|
|
302
|
+
timestamp=datetime.fromisoformat(call_data["timestamp"]),
|
|
303
|
+
model=call_data["model"],
|
|
304
|
+
input_tokens=call_data["input_tokens"],
|
|
305
|
+
output_tokens=call_data["output_tokens"],
|
|
306
|
+
processing_time_ms=call_data["processing_time_ms"],
|
|
307
|
+
estimated_cost=call_data["estimated_cost"],
|
|
308
|
+
batch_size=call_data.get("batch_size", 1),
|
|
309
|
+
success=call_data.get("success", True),
|
|
310
|
+
error_message=call_data.get("error_message"),
|
|
311
|
+
)
|
|
312
|
+
self.calls.append(call)
|
|
313
|
+
|
|
314
|
+
except Exception as e:
|
|
315
|
+
self.logger.error(f"Failed to load cost data: {e}")
|
|
316
|
+
self.calls = []
|
|
317
|
+
|
|
318
|
+
def _save_cost_data(self) -> None:
|
|
319
|
+
"""Save cost tracking data to file."""
|
|
320
|
+
try:
|
|
321
|
+
# Keep only last 1000 calls to prevent file from growing too large
|
|
322
|
+
recent_calls = self.calls[-1000:]
|
|
323
|
+
|
|
324
|
+
data = {
|
|
325
|
+
"calls": [
|
|
326
|
+
{
|
|
327
|
+
"timestamp": call.timestamp.isoformat(),
|
|
328
|
+
"model": call.model,
|
|
329
|
+
"input_tokens": call.input_tokens,
|
|
330
|
+
"output_tokens": call.output_tokens,
|
|
331
|
+
"processing_time_ms": call.processing_time_ms,
|
|
332
|
+
"estimated_cost": call.estimated_cost,
|
|
333
|
+
"batch_size": call.batch_size,
|
|
334
|
+
"success": call.success,
|
|
335
|
+
"error_message": call.error_message,
|
|
336
|
+
}
|
|
337
|
+
for call in recent_calls
|
|
338
|
+
]
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
with open(self.cost_file, "w") as f:
|
|
342
|
+
json.dump(data, f, indent=2)
|
|
343
|
+
|
|
344
|
+
except Exception as e:
|
|
345
|
+
self.logger.error(f"Failed to save cost data: {e}")
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""Performance and accuracy metrics for qualitative analysis."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import statistics
|
|
5
|
+
from collections import defaultdict, deque
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ProcessingMetrics:
|
|
12
|
+
"""Metrics for a single processing operation."""
|
|
13
|
+
|
|
14
|
+
operation: str
|
|
15
|
+
processing_time_ms: float
|
|
16
|
+
items_processed: int
|
|
17
|
+
confidence_score: float
|
|
18
|
+
method_used: str # 'nlp' or 'llm'
|
|
19
|
+
timestamp: datetime
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def items_per_second(self) -> float:
|
|
23
|
+
"""Calculate processing rate."""
|
|
24
|
+
if self.processing_time_ms <= 0:
|
|
25
|
+
return 0.0
|
|
26
|
+
return (self.items_processed * 1000) / self.processing_time_ms
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PerformanceMetrics:
|
|
30
|
+
"""Track and analyze performance metrics for qualitative analysis.
|
|
31
|
+
|
|
32
|
+
This class provides comprehensive performance monitoring including
|
|
33
|
+
processing times, accuracy metrics, cost tracking, and system health
|
|
34
|
+
indicators for the qualitative analysis pipeline.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, max_history: int = 10000):
|
|
38
|
+
"""Initialize performance metrics tracker.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
max_history: Maximum number of metrics to keep in memory
|
|
42
|
+
"""
|
|
43
|
+
self.max_history = max_history
|
|
44
|
+
self.logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
# Processing metrics
|
|
47
|
+
self.processing_metrics: deque[ProcessingMetrics] = deque(maxlen=max_history)
|
|
48
|
+
|
|
49
|
+
# Method usage tracking
|
|
50
|
+
self.method_usage = defaultdict(int)
|
|
51
|
+
self.method_performance = defaultdict(list)
|
|
52
|
+
|
|
53
|
+
# Confidence tracking
|
|
54
|
+
self.confidence_history = deque(maxlen=max_history)
|
|
55
|
+
|
|
56
|
+
# Error tracking
|
|
57
|
+
self.error_counts = defaultdict(int)
|
|
58
|
+
self.error_history = deque(maxlen=1000)
|
|
59
|
+
|
|
60
|
+
# Cache performance
|
|
61
|
+
self.cache_hits = 0
|
|
62
|
+
self.cache_misses = 0
|
|
63
|
+
|
|
64
|
+
# Quality metrics
|
|
65
|
+
self.classification_accuracy = deque(maxlen=1000)
|
|
66
|
+
|
|
67
|
+
def record_processing(
|
|
68
|
+
self,
|
|
69
|
+
operation: str,
|
|
70
|
+
processing_time_ms: float,
|
|
71
|
+
items_processed: int,
|
|
72
|
+
confidence_score: float,
|
|
73
|
+
method_used: str,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Record a processing operation.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
operation: Type of operation (e.g., 'classification', 'analysis')
|
|
79
|
+
processing_time_ms: Processing time in milliseconds
|
|
80
|
+
items_processed: Number of items processed
|
|
81
|
+
confidence_score: Average confidence score
|
|
82
|
+
method_used: Method used ('nlp' or 'llm')
|
|
83
|
+
"""
|
|
84
|
+
metric = ProcessingMetrics(
|
|
85
|
+
operation=operation,
|
|
86
|
+
processing_time_ms=processing_time_ms,
|
|
87
|
+
items_processed=items_processed,
|
|
88
|
+
confidence_score=confidence_score,
|
|
89
|
+
method_used=method_used,
|
|
90
|
+
timestamp=datetime.utcnow(),
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.processing_metrics.append(metric)
|
|
94
|
+
self.method_usage[method_used] += items_processed
|
|
95
|
+
self.method_performance[method_used].append(processing_time_ms / items_processed)
|
|
96
|
+
self.confidence_history.append(confidence_score)
|
|
97
|
+
|
|
98
|
+
def record_cache_hit(self) -> None:
|
|
99
|
+
"""Record a cache hit."""
|
|
100
|
+
self.cache_hits += 1
|
|
101
|
+
|
|
102
|
+
def record_cache_miss(self) -> None:
|
|
103
|
+
"""Record a cache miss."""
|
|
104
|
+
self.cache_misses += 1
|
|
105
|
+
|
|
106
|
+
def record_error(self, error_type: str, error_message: str) -> None:
|
|
107
|
+
"""Record an error occurrence.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
error_type: Type of error
|
|
111
|
+
error_message: Error message
|
|
112
|
+
"""
|
|
113
|
+
self.error_counts[error_type] += 1
|
|
114
|
+
self.error_history.append(
|
|
115
|
+
{"type": error_type, "message": error_message, "timestamp": datetime.utcnow()}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def record_classification_accuracy(self, accuracy: float) -> None:
|
|
119
|
+
"""Record classification accuracy measurement.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
accuracy: Accuracy score (0.0 to 1.0)
|
|
123
|
+
"""
|
|
124
|
+
self.classification_accuracy.append(accuracy)
|
|
125
|
+
|
|
126
|
+
def get_processing_stats(self, hours: int = 24) -> dict[str, any]:
|
|
127
|
+
"""Get processing statistics for the last N hours.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
hours: Number of hours to analyze
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Dictionary with processing statistics
|
|
134
|
+
"""
|
|
135
|
+
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
|
|
136
|
+
recent_metrics = [m for m in self.processing_metrics if m.timestamp >= cutoff_time]
|
|
137
|
+
|
|
138
|
+
if not recent_metrics:
|
|
139
|
+
return {
|
|
140
|
+
"total_operations": 0,
|
|
141
|
+
"total_items_processed": 0,
|
|
142
|
+
"avg_processing_time_ms": 0.0,
|
|
143
|
+
"avg_items_per_second": 0.0,
|
|
144
|
+
"avg_confidence": 0.0,
|
|
145
|
+
"method_breakdown": {},
|
|
146
|
+
"cache_hit_rate": 0.0,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Calculate statistics
|
|
150
|
+
total_items = sum(m.items_processed for m in recent_metrics)
|
|
151
|
+
total_time = sum(m.processing_time_ms for m in recent_metrics)
|
|
152
|
+
|
|
153
|
+
avg_processing_time = total_time / len(recent_metrics)
|
|
154
|
+
avg_items_per_second = statistics.mean([m.items_per_second for m in recent_metrics])
|
|
155
|
+
avg_confidence = statistics.mean([m.confidence_score for m in recent_metrics])
|
|
156
|
+
|
|
157
|
+
# Method breakdown
|
|
158
|
+
method_breakdown = {}
|
|
159
|
+
for method in ["nlp", "llm"]:
|
|
160
|
+
method_metrics = [m for m in recent_metrics if m.method_used == method]
|
|
161
|
+
if method_metrics:
|
|
162
|
+
method_items = sum(m.items_processed for m in method_metrics)
|
|
163
|
+
method_breakdown[method] = {
|
|
164
|
+
"items_processed": method_items,
|
|
165
|
+
"percentage": (method_items / total_items) * 100 if total_items > 0 else 0,
|
|
166
|
+
"avg_confidence": statistics.mean([m.confidence_score for m in method_metrics]),
|
|
167
|
+
"avg_processing_time_ms": statistics.mean(
|
|
168
|
+
[m.processing_time_ms for m in method_metrics]
|
|
169
|
+
),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# Cache hit rate
|
|
173
|
+
total_cache_requests = self.cache_hits + self.cache_misses
|
|
174
|
+
cache_hit_rate = (
|
|
175
|
+
(self.cache_hits / total_cache_requests) if total_cache_requests > 0 else 0.0
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return {
|
|
179
|
+
"total_operations": len(recent_metrics),
|
|
180
|
+
"total_items_processed": total_items,
|
|
181
|
+
"avg_processing_time_ms": avg_processing_time,
|
|
182
|
+
"avg_items_per_second": avg_items_per_second,
|
|
183
|
+
"avg_confidence": avg_confidence,
|
|
184
|
+
"method_breakdown": method_breakdown,
|
|
185
|
+
"cache_hit_rate": cache_hit_rate,
|
|
186
|
+
"time_period_hours": hours,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
def get_quality_metrics(self) -> dict[str, any]:
|
|
190
|
+
"""Get quality and accuracy metrics.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Dictionary with quality metrics
|
|
194
|
+
"""
|
|
195
|
+
if not self.confidence_history:
|
|
196
|
+
return {
|
|
197
|
+
"avg_confidence": 0.0,
|
|
198
|
+
"confidence_distribution": {},
|
|
199
|
+
"classification_accuracy": 0.0,
|
|
200
|
+
"quality_trend": "stable",
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
# Confidence statistics
|
|
204
|
+
confidences = list(self.confidence_history)
|
|
205
|
+
avg_confidence = statistics.mean(confidences)
|
|
206
|
+
|
|
207
|
+
# Confidence distribution
|
|
208
|
+
confidence_buckets = {
|
|
209
|
+
"high (>0.8)": sum(1 for c in confidences if c > 0.8),
|
|
210
|
+
"medium (0.6-0.8)": sum(1 for c in confidences if 0.6 <= c <= 0.8),
|
|
211
|
+
"low (<0.6)": sum(1 for c in confidences if c < 0.6),
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
# Quality trend (comparing recent vs. older metrics)
|
|
215
|
+
if len(confidences) >= 100:
|
|
216
|
+
recent_confidence = statistics.mean(confidences[-50:])
|
|
217
|
+
older_confidence = statistics.mean(confidences[-100:-50])
|
|
218
|
+
|
|
219
|
+
if recent_confidence > older_confidence + 0.05:
|
|
220
|
+
quality_trend = "improving"
|
|
221
|
+
elif recent_confidence < older_confidence - 0.05:
|
|
222
|
+
quality_trend = "declining"
|
|
223
|
+
else:
|
|
224
|
+
quality_trend = "stable"
|
|
225
|
+
else:
|
|
226
|
+
quality_trend = "insufficient_data"
|
|
227
|
+
|
|
228
|
+
# Classification accuracy
|
|
229
|
+
avg_accuracy = (
|
|
230
|
+
statistics.mean(self.classification_accuracy) if self.classification_accuracy else 0.0
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
"avg_confidence": avg_confidence,
|
|
235
|
+
"confidence_distribution": confidence_buckets,
|
|
236
|
+
"classification_accuracy": avg_accuracy,
|
|
237
|
+
"quality_trend": quality_trend,
|
|
238
|
+
"total_samples": len(confidences),
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
def get_error_analysis(self) -> dict[str, any]:
|
|
242
|
+
"""Get error analysis and system health metrics.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Dictionary with error analysis
|
|
246
|
+
"""
|
|
247
|
+
# Recent errors (last 24 hours)
|
|
248
|
+
cutoff_time = datetime.utcnow() - timedelta(hours=24)
|
|
249
|
+
recent_errors = [error for error in self.error_history if error["timestamp"] >= cutoff_time]
|
|
250
|
+
|
|
251
|
+
# Error type breakdown
|
|
252
|
+
error_type_counts = defaultdict(int)
|
|
253
|
+
for error in recent_errors:
|
|
254
|
+
error_type_counts[error["type"]] += 1
|
|
255
|
+
|
|
256
|
+
# Total operations for error rate calculation
|
|
257
|
+
total_operations = len([m for m in self.processing_metrics if m.timestamp >= cutoff_time])
|
|
258
|
+
|
|
259
|
+
error_rate = len(recent_errors) / total_operations if total_operations > 0 else 0.0
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
"total_errors_24h": len(recent_errors),
|
|
263
|
+
"error_rate": error_rate,
|
|
264
|
+
"error_types": dict(error_type_counts),
|
|
265
|
+
"most_common_error": (
|
|
266
|
+
max(error_type_counts.keys(), key=error_type_counts.get)
|
|
267
|
+
if error_type_counts
|
|
268
|
+
else None
|
|
269
|
+
),
|
|
270
|
+
"system_health": (
|
|
271
|
+
"healthy" if error_rate < 0.01 else "degraded" if error_rate < 0.05 else "unhealthy"
|
|
272
|
+
),
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
def get_performance_alerts(self) -> list[str]:
|
|
276
|
+
"""Get performance alerts and recommendations.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
List of alert messages
|
|
280
|
+
"""
|
|
281
|
+
alerts = []
|
|
282
|
+
|
|
283
|
+
# Check recent performance
|
|
284
|
+
stats = self.get_processing_stats(hours=1)
|
|
285
|
+
quality = self.get_quality_metrics()
|
|
286
|
+
errors = self.get_error_analysis()
|
|
287
|
+
|
|
288
|
+
# Processing speed alerts
|
|
289
|
+
if stats["avg_items_per_second"] < 50: # Less than 50 items/second
|
|
290
|
+
alerts.append("Processing speed below target (< 50 items/second)")
|
|
291
|
+
|
|
292
|
+
# Confidence alerts
|
|
293
|
+
if quality["avg_confidence"] < 0.6:
|
|
294
|
+
alerts.append("Average confidence below threshold (< 0.6)")
|
|
295
|
+
|
|
296
|
+
# Method balance alerts
|
|
297
|
+
if "llm" in stats["method_breakdown"]:
|
|
298
|
+
llm_percentage = stats["method_breakdown"]["llm"]["percentage"]
|
|
299
|
+
if llm_percentage > 20: # More than 20% using LLM
|
|
300
|
+
alerts.append(
|
|
301
|
+
f"High LLM usage ({llm_percentage:.1f}%) - consider tuning NLP thresholds"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Error rate alerts
|
|
305
|
+
if errors["error_rate"] > 0.05: # More than 5% error rate
|
|
306
|
+
alerts.append(f"High error rate ({errors['error_rate']:.1%})")
|
|
307
|
+
|
|
308
|
+
# Cache performance alerts
|
|
309
|
+
if stats["cache_hit_rate"] < 0.3: # Less than 30% cache hit rate
|
|
310
|
+
alerts.append("Low cache hit rate - pattern learning may be ineffective")
|
|
311
|
+
|
|
312
|
+
# Quality trend alerts
|
|
313
|
+
if quality["quality_trend"] == "declining":
|
|
314
|
+
alerts.append("Quality trend declining - review recent changes")
|
|
315
|
+
|
|
316
|
+
return alerts
|
|
317
|
+
|
|
318
|
+
def get_optimization_suggestions(self) -> list[str]:
|
|
319
|
+
"""Get optimization suggestions based on metrics.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
List of optimization suggestions
|
|
323
|
+
"""
|
|
324
|
+
suggestions = []
|
|
325
|
+
|
|
326
|
+
stats = self.get_processing_stats(hours=24)
|
|
327
|
+
quality = self.get_quality_metrics()
|
|
328
|
+
|
|
329
|
+
# Performance optimizations
|
|
330
|
+
if stats["avg_items_per_second"] < 100:
|
|
331
|
+
suggestions.append("Consider increasing batch size or enabling parallel processing")
|
|
332
|
+
|
|
333
|
+
# Method optimization
|
|
334
|
+
method_breakdown = stats["method_breakdown"]
|
|
335
|
+
if "llm" in method_breakdown and method_breakdown["llm"]["percentage"] > 15:
|
|
336
|
+
suggestions.append(
|
|
337
|
+
"High LLM usage - consider lowering confidence threshold or improving NLP patterns"
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
if "nlp" in method_breakdown and method_breakdown["nlp"]["avg_confidence"] < 0.7:
|
|
341
|
+
suggestions.append("NLP confidence low - consider updating classification patterns")
|
|
342
|
+
|
|
343
|
+
# Quality optimizations
|
|
344
|
+
if quality["avg_confidence"] < 0.7:
|
|
345
|
+
suggestions.append(
|
|
346
|
+
"Overall confidence low - review classification accuracy and update models"
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
confidence_dist = quality["confidence_distribution"]
|
|
350
|
+
if confidence_dist.get("low (<0.6)", 0) > confidence_dist.get("high (>0.8)", 0):
|
|
351
|
+
suggestions.append(
|
|
352
|
+
"Many low-confidence predictions - consider additional training data"
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# Cache optimizations
|
|
356
|
+
if stats["cache_hit_rate"] < 0.5:
|
|
357
|
+
suggestions.append(
|
|
358
|
+
"Low cache hit rate - increase cache size or improve pattern matching"
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
return suggestions
|