api-mocker 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,542 @@
1
+ """
2
+ Enhanced Analytics System
3
+
4
+ Provides advanced analytics and insights including:
5
+ - Performance benchmarking against real APIs
6
+ - Usage pattern analysis
7
+ - API dependency mapping
8
+ - Cost optimization insights
9
+ - Advanced metrics collection and analysis
10
+ """
11
+
12
+ import json
13
+ import sqlite3
14
+ from typing import Dict, Any, List, Optional, Tuple, Union
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timedelta
17
+ import statistics
18
+ from collections import defaultdict, Counter
19
+ import threading
20
+ import time
21
+
22
+
23
+ @dataclass
24
+ class PerformanceMetrics:
25
+ """Performance metrics for API endpoints."""
26
+ endpoint: str
27
+ method: str
28
+ response_time_p50: float
29
+ response_time_p95: float
30
+ response_time_p99: float
31
+ throughput: float # requests per second
32
+ error_rate: float
33
+ success_rate: float
34
+ total_requests: int
35
+ total_errors: int
36
+ avg_response_size: int
37
+ timestamp: datetime = field(default_factory=datetime.now)
38
+
39
+
40
+ @dataclass
41
+ class UsagePattern:
42
+ """Usage pattern analysis for endpoints."""
43
+ endpoint: str
44
+ method: str
45
+ peak_hours: List[int]
46
+ peak_days: List[str]
47
+ user_agents: Dict[str, int]
48
+ ip_addresses: Dict[str, int]
49
+ request_sizes: List[int]
50
+ response_sizes: List[int]
51
+ common_headers: Dict[str, int]
52
+ common_query_params: Dict[str, int]
53
+ timestamp: datetime = field(default_factory=datetime.now)
54
+
55
+
56
+ @dataclass
57
+ class APIDependency:
58
+ """API dependency mapping information."""
59
+ source_endpoint: str
60
+ target_endpoint: str
61
+ dependency_type: str # "calls", "depends_on", "similar_pattern"
62
+ confidence: float
63
+ frequency: int
64
+ avg_latency: float
65
+ timestamp: datetime = field(default_factory=datetime.now)
66
+
67
+
68
+ @dataclass
69
+ class CostOptimizationInsight:
70
+ """Cost optimization insights."""
71
+ insight_type: str
72
+ description: str
73
+ potential_savings: float
74
+ recommendation: str
75
+ priority: str # "high", "medium", "low"
76
+ affected_endpoints: List[str]
77
+ timestamp: datetime = field(default_factory=datetime.now)
78
+
79
+
80
+ class EnhancedAnalytics:
81
+ """Enhanced analytics system with advanced metrics and insights."""
82
+
83
+ def __init__(self, db_path: str = "api_mocker_analytics.db"):
84
+ self.db_path = db_path
85
+ self.lock = threading.Lock()
86
+ self._init_database()
87
+
88
+ def _init_database(self):
89
+ """Initialize the analytics database with enhanced tables."""
90
+ with sqlite3.connect(self.db_path) as conn:
91
+ conn.execute("""
92
+ CREATE TABLE IF NOT EXISTS enhanced_requests (
93
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
94
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
95
+ endpoint TEXT NOT NULL,
96
+ method TEXT NOT NULL,
97
+ status_code INTEGER,
98
+ response_time REAL,
99
+ response_size INTEGER,
100
+ request_size INTEGER,
101
+ user_agent TEXT,
102
+ ip_address TEXT,
103
+ headers TEXT,
104
+ query_params TEXT,
105
+ request_body TEXT,
106
+ response_body TEXT,
107
+ error_message TEXT,
108
+ scenario_name TEXT,
109
+ rule_name TEXT
110
+ )
111
+ """)
112
+
113
+ conn.execute("""
114
+ CREATE TABLE IF NOT EXISTS performance_metrics (
115
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
116
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
117
+ endpoint TEXT NOT NULL,
118
+ method TEXT NOT NULL,
119
+ response_time_p50 REAL,
120
+ response_time_p95 REAL,
121
+ response_time_p99 REAL,
122
+ throughput REAL,
123
+ error_rate REAL,
124
+ success_rate REAL,
125
+ total_requests INTEGER,
126
+ total_errors INTEGER,
127
+ avg_response_size INTEGER
128
+ )
129
+ """)
130
+
131
+ conn.execute("""
132
+ CREATE TABLE IF NOT EXISTS usage_patterns (
133
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
134
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
135
+ endpoint TEXT NOT NULL,
136
+ method TEXT NOT NULL,
137
+ peak_hours TEXT,
138
+ peak_days TEXT,
139
+ user_agents TEXT,
140
+ ip_addresses TEXT,
141
+ request_sizes TEXT,
142
+ response_sizes TEXT,
143
+ common_headers TEXT,
144
+ common_query_params TEXT
145
+ )
146
+ """)
147
+
148
+ conn.execute("""
149
+ CREATE TABLE IF NOT EXISTS api_dependencies (
150
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
151
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
152
+ source_endpoint TEXT NOT NULL,
153
+ target_endpoint TEXT NOT NULL,
154
+ dependency_type TEXT NOT NULL,
155
+ confidence REAL,
156
+ frequency INTEGER,
157
+ avg_latency REAL
158
+ )
159
+ """)
160
+
161
+ conn.execute("""
162
+ CREATE TABLE IF NOT EXISTS cost_insights (
163
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
164
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
165
+ insight_type TEXT NOT NULL,
166
+ description TEXT NOT NULL,
167
+ potential_savings REAL,
168
+ recommendation TEXT NOT NULL,
169
+ priority TEXT NOT NULL,
170
+ affected_endpoints TEXT
171
+ )
172
+ """)
173
+
174
+ # Create indexes for better performance
175
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_requests_endpoint ON enhanced_requests(endpoint)")
176
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_requests_timestamp ON enhanced_requests(timestamp)")
177
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_requests_method ON enhanced_requests(method)")
178
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_performance_endpoint ON performance_metrics(endpoint)")
179
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_dependencies_source ON api_dependencies(source_endpoint)")
180
+
181
+ def log_request(self, request_data: Dict[str, Any], response_data: Dict[str, Any],
182
+ scenario_name: Optional[str] = None, rule_name: Optional[str] = None):
183
+ """Log a request with enhanced analytics data."""
184
+ with self.lock:
185
+ with sqlite3.connect(self.db_path) as conn:
186
+ conn.execute("""
187
+ INSERT INTO enhanced_requests (
188
+ endpoint, method, status_code, response_time, response_size,
189
+ request_size, user_agent, ip_address, headers, query_params,
190
+ request_body, response_body, error_message, scenario_name, rule_name
191
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
192
+ """, (
193
+ request_data.get('path', ''),
194
+ request_data.get('method', ''),
195
+ response_data.get('status_code', 200),
196
+ response_data.get('response_time', 0),
197
+ response_data.get('response_size', 0),
198
+ request_data.get('request_size', 0),
199
+ request_data.get('user_agent', ''),
200
+ request_data.get('ip_address', ''),
201
+ json.dumps(request_data.get('headers', {})),
202
+ json.dumps(request_data.get('query_params', {})),
203
+ json.dumps(request_data.get('body', {})),
204
+ json.dumps(response_data.get('body', {})),
205
+ response_data.get('error_message', ''),
206
+ scenario_name,
207
+ rule_name
208
+ ))
209
+
210
+ def calculate_performance_metrics(self, endpoint: Optional[str] = None,
211
+ hours: int = 24) -> List[PerformanceMetrics]:
212
+ """Calculate performance metrics for endpoints."""
213
+ with sqlite3.connect(self.db_path) as conn:
214
+ time_filter = datetime.now() - timedelta(hours=hours)
215
+
216
+ query = """
217
+ SELECT endpoint, method, response_time, status_code, response_size
218
+ FROM enhanced_requests
219
+ WHERE timestamp >= ?
220
+ """
221
+ params = [time_filter.isoformat()]
222
+
223
+ if endpoint:
224
+ query += " AND endpoint = ?"
225
+ params.append(endpoint)
226
+
227
+ cursor = conn.execute(query, params)
228
+ rows = cursor.fetchall()
229
+
230
+ # Group by endpoint and method
231
+ grouped_data = defaultdict(list)
232
+ for row in rows:
233
+ endpoint, method, response_time, status_code, response_size = row
234
+ key = f"{endpoint}:{method}"
235
+ grouped_data[key].append({
236
+ 'response_time': response_time,
237
+ 'status_code': status_code,
238
+ 'response_size': response_size
239
+ })
240
+
241
+ metrics = []
242
+ for key, data in grouped_data.items():
243
+ endpoint, method = key.split(':', 1)
244
+
245
+ response_times = [d['response_time'] for d in data if d['response_time']]
246
+ status_codes = [d['status_code'] for d in data]
247
+ response_sizes = [d['response_size'] for d in data if d['response_size']]
248
+
249
+ if not response_times:
250
+ continue
251
+
252
+ total_requests = len(data)
253
+ total_errors = len([s for s in status_codes if s >= 400])
254
+
255
+ metric = PerformanceMetrics(
256
+ endpoint=endpoint,
257
+ method=method,
258
+ response_time_p50=statistics.median(response_times),
259
+ response_time_p95=self._percentile(response_times, 95),
260
+ response_time_p99=self._percentile(response_times, 99),
261
+ throughput=total_requests / (hours * 3600), # requests per second
262
+ error_rate=total_errors / total_requests if total_requests > 0 else 0,
263
+ success_rate=(total_requests - total_errors) / total_requests if total_requests > 0 else 0,
264
+ total_requests=total_requests,
265
+ total_errors=total_errors,
266
+ avg_response_size=statistics.mean(response_sizes) if response_sizes else 0
267
+ )
268
+ metrics.append(metric)
269
+
270
+ return metrics
271
+
272
+ def analyze_usage_patterns(self, endpoint: Optional[str] = None,
273
+ days: int = 7) -> List[UsagePattern]:
274
+ """Analyze usage patterns for endpoints."""
275
+ with sqlite3.connect(self.db_path) as conn:
276
+ time_filter = datetime.now() - timedelta(days=days)
277
+
278
+ query = """
279
+ SELECT endpoint, method, timestamp, user_agent, ip_address,
280
+ headers, query_params, request_size, response_size
281
+ FROM enhanced_requests
282
+ WHERE timestamp >= ?
283
+ """
284
+ params = [time_filter.isoformat()]
285
+
286
+ if endpoint:
287
+ query += " AND endpoint = ?"
288
+ params.append(endpoint)
289
+
290
+ cursor = conn.execute(query, params)
291
+ rows = cursor.fetchall()
292
+
293
+ # Group by endpoint and method
294
+ grouped_data = defaultdict(list)
295
+ for row in rows:
296
+ endpoint, method, timestamp, user_agent, ip_address, headers, query_params, request_size, response_size = row
297
+ key = f"{endpoint}:{method}"
298
+ grouped_data[key].append({
299
+ 'timestamp': datetime.fromisoformat(timestamp),
300
+ 'user_agent': user_agent,
301
+ 'ip_address': ip_address,
302
+ 'headers': json.loads(headers) if headers else {},
303
+ 'query_params': json.loads(query_params) if query_params else {},
304
+ 'request_size': request_size,
305
+ 'response_size': response_size
306
+ })
307
+
308
+ patterns = []
309
+ for key, data in grouped_data.items():
310
+ endpoint, method = key.split(':', 1)
311
+
312
+ # Analyze peak hours
313
+ hours = [d['timestamp'].hour for d in data]
314
+ hour_counts = Counter(hours)
315
+ peak_hours = [hour for hour, count in hour_counts.most_common(3)]
316
+
317
+ # Analyze peak days
318
+ days = [d['timestamp'].strftime('%A') for d in data]
319
+ day_counts = Counter(days)
320
+ peak_days = [day for day, count in day_counts.most_common(3)]
321
+
322
+ # Analyze user agents
323
+ user_agents = [d['user_agent'] for d in data if d['user_agent']]
324
+ user_agent_counts = Counter(user_agents)
325
+
326
+ # Analyze IP addresses
327
+ ip_addresses = [d['ip_address'] for d in data if d['ip_address']]
328
+ ip_counts = Counter(ip_addresses)
329
+
330
+ # Analyze request and response sizes
331
+ request_sizes = [d['request_size'] for d in data if d['request_size']]
332
+ response_sizes = [d['response_size'] for d in data if d['response_size']]
333
+
334
+ # Analyze common headers
335
+ all_headers = {}
336
+ for d in data:
337
+ for header, value in d['headers'].items():
338
+ all_headers[header] = all_headers.get(header, 0) + 1
339
+
340
+ # Analyze common query parameters
341
+ all_query_params = {}
342
+ for d in data:
343
+ for param, value in d['query_params'].items():
344
+ all_query_params[param] = all_query_params.get(param, 0) + 1
345
+
346
+ pattern = UsagePattern(
347
+ endpoint=endpoint,
348
+ method=method,
349
+ peak_hours=peak_hours,
350
+ peak_days=peak_days,
351
+ user_agents=dict(user_agent_counts.most_common(10)),
352
+ ip_addresses=dict(ip_counts.most_common(10)),
353
+ request_sizes=request_sizes,
354
+ response_sizes=response_sizes,
355
+ common_headers=dict(sorted(all_headers.items(), key=lambda x: x[1], reverse=True)[:10]),
356
+ common_query_params=dict(sorted(all_query_params.items(), key=lambda x: x[1], reverse=True)[:10])
357
+ )
358
+ patterns.append(pattern)
359
+
360
+ return patterns
361
+
362
+ def detect_api_dependencies(self, hours: int = 24) -> List[APIDependency]:
363
+ """Detect API dependencies between endpoints."""
364
+ with sqlite3.connect(self.db_path) as conn:
365
+ time_filter = datetime.now() - timedelta(hours=hours)
366
+
367
+ # Get all requests in the time window
368
+ cursor = conn.execute("""
369
+ SELECT endpoint, method, timestamp, response_time
370
+ FROM enhanced_requests
371
+ WHERE timestamp >= ?
372
+ ORDER BY timestamp
373
+ """, [time_filter.isoformat()])
374
+
375
+ rows = cursor.fetchall()
376
+
377
+ # Analyze temporal patterns
378
+ dependencies = []
379
+ endpoint_sequences = defaultdict(list)
380
+
381
+ for row in rows:
382
+ endpoint, method, timestamp, response_time = row
383
+ endpoint_sequences[endpoint].append({
384
+ 'timestamp': datetime.fromisoformat(timestamp),
385
+ 'response_time': response_time
386
+ })
387
+
388
+ # Find endpoints that are called in sequence
389
+ for endpoint1 in endpoint_sequences:
390
+ for endpoint2 in endpoint_sequences:
391
+ if endpoint1 != endpoint2:
392
+ # Check if endpoint2 is called shortly after endpoint1
393
+ calls1 = endpoint_sequences[endpoint1]
394
+ calls2 = endpoint_sequences[endpoint2]
395
+
396
+ dependency_count = 0
397
+ total_latency = 0
398
+
399
+ for call1 in calls1:
400
+ # Look for calls to endpoint2 within 5 seconds
401
+ for call2 in calls2:
402
+ time_diff = (call2['timestamp'] - call1['timestamp']).total_seconds()
403
+ if 0 < time_diff <= 5:
404
+ dependency_count += 1
405
+ total_latency += time_diff
406
+ break
407
+
408
+ if dependency_count > 0:
409
+ avg_latency = total_latency / dependency_count
410
+ confidence = min(dependency_count / len(calls1), 1.0)
411
+
412
+ dependency = APIDependency(
413
+ source_endpoint=endpoint1,
414
+ target_endpoint=endpoint2,
415
+ dependency_type="calls",
416
+ confidence=confidence,
417
+ frequency=dependency_count,
418
+ avg_latency=avg_latency
419
+ )
420
+ dependencies.append(dependency)
421
+
422
+ return dependencies
423
+
424
+ def generate_cost_optimization_insights(self) -> List[CostOptimizationInsight]:
425
+ """Generate cost optimization insights."""
426
+ insights = []
427
+
428
+ # Get performance metrics
429
+ metrics = self.calculate_performance_metrics(hours=24)
430
+
431
+ # Analyze slow endpoints
432
+ slow_endpoints = [m for m in metrics if m.response_time_p95 > 1000] # > 1 second
433
+ if slow_endpoints:
434
+ avg_latency = statistics.mean([m.response_time_p95 for m in slow_endpoints])
435
+ potential_savings = len(slow_endpoints) * avg_latency * 0.1 # 10% improvement
436
+
437
+ insight = CostOptimizationInsight(
438
+ insight_type="performance_optimization",
439
+ description=f"Found {len(slow_endpoints)} slow endpoints with P95 latency > 1s",
440
+ potential_savings=potential_savings,
441
+ recommendation="Consider caching, database optimization, or response compression",
442
+ priority="high" if len(slow_endpoints) > 5 else "medium",
443
+ affected_endpoints=[m.endpoint for m in slow_endpoints]
444
+ )
445
+ insights.append(insight)
446
+
447
+ # Analyze high error rates
448
+ error_endpoints = [m for m in metrics if m.error_rate > 0.05] # > 5% error rate
449
+ if error_endpoints:
450
+ insight = CostOptimizationInsight(
451
+ insight_type="error_reduction",
452
+ description=f"Found {len(error_endpoints)} endpoints with high error rates",
453
+ potential_savings=len(error_endpoints) * 100, # $100 per endpoint
454
+ recommendation="Review error handling and improve input validation",
455
+ priority="high" if len(error_endpoints) > 3 else "medium",
456
+ affected_endpoints=[m.endpoint for m in error_endpoints]
457
+ )
458
+ insights.append(insight)
459
+
460
+ # Analyze large response sizes
461
+ large_response_endpoints = [m for m in metrics if m.avg_response_size > 10000] # > 10KB
462
+ if large_response_endpoints:
463
+ total_size = sum([m.avg_response_size * m.total_requests for m in large_response_endpoints])
464
+ potential_savings = total_size * 0.0001 # $0.0001 per KB
465
+
466
+ insight = CostOptimizationInsight(
467
+ insight_type="response_optimization",
468
+ description=f"Found {len(large_response_endpoints)} endpoints with large responses",
469
+ potential_savings=potential_savings,
470
+ recommendation="Consider pagination, field selection, or response compression",
471
+ priority="medium",
472
+ affected_endpoints=[m.endpoint for m in large_response_endpoints]
473
+ )
474
+ insights.append(insight)
475
+
476
+ return insights
477
+
478
+ def get_analytics_summary(self, hours: int = 24) -> Dict[str, Any]:
479
+ """Get a comprehensive analytics summary."""
480
+ metrics = self.calculate_performance_metrics(hours=hours)
481
+ patterns = self.analyze_usage_patterns(days=hours//24)
482
+ dependencies = self.detect_api_dependencies(hours=hours)
483
+ insights = self.generate_cost_optimization_insights()
484
+
485
+ # Calculate overall statistics
486
+ total_requests = sum(m.total_requests for m in metrics)
487
+ total_errors = sum(m.total_errors for m in metrics)
488
+ avg_response_time = statistics.mean([m.response_time_p50 for m in metrics]) if metrics else 0
489
+
490
+ summary = {
491
+ "time_period": f"Last {hours} hours",
492
+ "total_requests": total_requests,
493
+ "total_errors": total_errors,
494
+ "error_rate": total_errors / total_requests if total_requests > 0 else 0,
495
+ "avg_response_time": avg_response_time,
496
+ "endpoints_analyzed": len(metrics),
497
+ "usage_patterns": len(patterns),
498
+ "dependencies_found": len(dependencies),
499
+ "cost_insights": len(insights),
500
+ "top_performing_endpoints": sorted(metrics, key=lambda m: m.response_time_p50)[:5],
501
+ "worst_performing_endpoints": sorted(metrics, key=lambda m: m.response_time_p95, reverse=True)[:5],
502
+ "most_used_endpoints": sorted(metrics, key=lambda m: m.total_requests, reverse=True)[:5],
503
+ "cost_insights": insights
504
+ }
505
+
506
+ return summary
507
+
508
+ def export_analytics(self, format: str = "json", hours: int = 24) -> str:
509
+ """Export analytics data in various formats."""
510
+ summary = self.get_analytics_summary(hours=hours)
511
+
512
+ if format == "json":
513
+ return json.dumps(summary, indent=2, default=str)
514
+ elif format == "csv":
515
+ # Convert to CSV format
516
+ csv_lines = []
517
+ csv_lines.append("metric,value")
518
+ for key, value in summary.items():
519
+ if isinstance(value, (int, float, str)):
520
+ csv_lines.append(f"{key},{value}")
521
+ return "\n".join(csv_lines)
522
+ else:
523
+ raise ValueError(f"Unsupported export format: {format}")
524
+
525
+ def _percentile(self, data: List[float], percentile: int) -> float:
526
+ """Calculate percentile of a list of numbers."""
527
+ if not data:
528
+ return 0
529
+
530
+ sorted_data = sorted(data)
531
+ index = (percentile / 100) * (len(sorted_data) - 1)
532
+
533
+ if index.is_integer():
534
+ return sorted_data[int(index)]
535
+ else:
536
+ lower = sorted_data[int(index)]
537
+ upper = sorted_data[int(index) + 1]
538
+ return lower + (upper - lower) * (index - int(index))
539
+
540
+
541
+ # Global enhanced analytics instance
542
+ enhanced_analytics = EnhancedAnalytics()