api-mocker 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api_mocker/auth_system.py +643 -0
- api_mocker/cli.py +294 -1
- api_mocker/core.py +26 -6
- api_mocker/database_integration.py +588 -0
- api_mocker/graphql_mock.py +602 -0
- api_mocker/ml_integration.py +716 -0
- api_mocker/mock_responses.py +21 -28
- api_mocker/resources.py +176 -0
- api_mocker/server.py +77 -7
- api_mocker/websocket_mock.py +476 -0
- api_mocker-0.5.1.dist-info/METADATA +782 -0
- api_mocker-0.5.1.dist-info/RECORD +29 -0
- {api_mocker-0.4.0.dist-info → api_mocker-0.5.1.dist-info}/WHEEL +1 -1
- api_mocker-0.4.0.dist-info/METADATA +0 -464
- api_mocker-0.4.0.dist-info/RECORD +0 -23
- {api_mocker-0.4.0.dist-info → api_mocker-0.5.1.dist-info}/entry_points.txt +0 -0
- {api_mocker-0.4.0.dist-info → api_mocker-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {api_mocker-0.4.0.dist-info → api_mocker-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,716 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Machine Learning Integration System
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive ML capabilities for API mocking including:
|
|
5
|
+
- Intelligent response generation using ML models
|
|
6
|
+
- Request pattern analysis and prediction
|
|
7
|
+
- Anomaly detection for API behavior
|
|
8
|
+
- Smart caching based on usage patterns
|
|
9
|
+
- Automated test case generation
|
|
10
|
+
- Performance optimization recommendations
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from typing import Any, Dict, List, Optional, Union, Tuple, Callable
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from datetime import datetime, timedelta
|
|
20
|
+
import pickle
|
|
21
|
+
import joblib
|
|
22
|
+
from sklearn.ensemble import RandomForestClassifier, IsolationForest
|
|
23
|
+
from sklearn.cluster import KMeans, DBSCAN
|
|
24
|
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
|
25
|
+
from sklearn.model_selection import train_test_split
|
|
26
|
+
from sklearn.metrics import accuracy_score, classification_report
|
|
27
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
28
|
+
from sklearn.linear_model import LinearRegression
|
|
29
|
+
from sklearn.neural_network import MLPClassifier
|
|
30
|
+
import asyncio
|
|
31
|
+
import threading
|
|
32
|
+
from collections import defaultdict, deque
|
|
33
|
+
import hashlib
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class MLModelType(Enum):
|
|
37
|
+
"""ML model types"""
|
|
38
|
+
CLASSIFICATION = "classification"
|
|
39
|
+
REGRESSION = "regression"
|
|
40
|
+
CLUSTERING = "clustering"
|
|
41
|
+
ANOMALY_DETECTION = "anomaly_detection"
|
|
42
|
+
TEXT_ANALYSIS = "text_analysis"
|
|
43
|
+
RECOMMENDATION = "recommendation"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class PredictionType(Enum):
|
|
47
|
+
"""Prediction types"""
|
|
48
|
+
RESPONSE_TIME = "response_time"
|
|
49
|
+
ERROR_PROBABILITY = "error_probability"
|
|
50
|
+
USER_BEHAVIOR = "user_behavior"
|
|
51
|
+
CACHE_HIT = "cache_hit"
|
|
52
|
+
RESOURCE_USAGE = "resource_usage"
|
|
53
|
+
ANOMALY_SCORE = "anomaly_score"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class MLModel:
|
|
58
|
+
"""ML model representation"""
|
|
59
|
+
name: str
|
|
60
|
+
model_type: MLModelType
|
|
61
|
+
model: Any
|
|
62
|
+
features: List[str]
|
|
63
|
+
target: str
|
|
64
|
+
accuracy: float = 0.0
|
|
65
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
66
|
+
last_trained: datetime = field(default_factory=datetime.now)
|
|
67
|
+
training_samples: int = 0
|
|
68
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class PredictionRequest:
|
|
73
|
+
"""Prediction request"""
|
|
74
|
+
features: Dict[str, Any]
|
|
75
|
+
model_name: str
|
|
76
|
+
prediction_type: PredictionType
|
|
77
|
+
confidence_threshold: float = 0.5
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class PredictionResult:
|
|
82
|
+
"""Prediction result"""
|
|
83
|
+
prediction: Any
|
|
84
|
+
confidence: float
|
|
85
|
+
model_name: str
|
|
86
|
+
features_used: List[str]
|
|
87
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
88
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class TrainingData:
|
|
93
|
+
"""Training data for ML models"""
|
|
94
|
+
features: List[Dict[str, Any]]
|
|
95
|
+
targets: List[Any]
|
|
96
|
+
feature_names: List[str]
|
|
97
|
+
target_name: str
|
|
98
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class FeatureExtractor:
|
|
102
|
+
"""Feature extraction for ML models"""
|
|
103
|
+
|
|
104
|
+
def __init__(self):
|
|
105
|
+
self.text_vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
|
|
106
|
+
self.label_encoders: Dict[str, LabelEncoder] = {}
|
|
107
|
+
self.scaler = StandardScaler()
|
|
108
|
+
|
|
109
|
+
def extract_request_features(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
110
|
+
"""Extract features from API request data"""
|
|
111
|
+
features = {}
|
|
112
|
+
|
|
113
|
+
# Basic features
|
|
114
|
+
features['method_encoded'] = self._encode_categorical(request_data.get('method', ''), 'method')
|
|
115
|
+
features['path_length'] = len(request_data.get('path', ''))
|
|
116
|
+
features['has_query_params'] = 1 if '?' in request_data.get('path', '') else 0
|
|
117
|
+
features['has_path_params'] = 1 if '{' in request_data.get('path', '') else 0
|
|
118
|
+
|
|
119
|
+
# Header features
|
|
120
|
+
headers = {k.lower(): v for k, v in request_data.get('headers', {}).items()}
|
|
121
|
+
features['header_count'] = len(headers)
|
|
122
|
+
features['has_auth_header'] = 1 if 'authorization' in headers else 0
|
|
123
|
+
features['has_content_type'] = 1 if 'content-type' in headers else 0
|
|
124
|
+
|
|
125
|
+
# Body features
|
|
126
|
+
body = request_data.get('body', '')
|
|
127
|
+
if isinstance(body, str):
|
|
128
|
+
features['body_length'] = len(body)
|
|
129
|
+
features['is_json'] = 1 if body.startswith('{') or body.startswith('[') else 0
|
|
130
|
+
else:
|
|
131
|
+
features['body_length'] = 0
|
|
132
|
+
features['is_json'] = 0
|
|
133
|
+
|
|
134
|
+
# Time-based features
|
|
135
|
+
now = datetime.now()
|
|
136
|
+
features['hour_of_day'] = now.hour
|
|
137
|
+
features['day_of_week'] = now.weekday()
|
|
138
|
+
features['is_weekend'] = 1 if now.weekday() >= 5 else 0
|
|
139
|
+
|
|
140
|
+
return features
|
|
141
|
+
|
|
142
|
+
def extract_response_features(self, response_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
143
|
+
"""Extract features from API response data"""
|
|
144
|
+
features = {}
|
|
145
|
+
|
|
146
|
+
features['status_code'] = response_data.get('status_code', 200)
|
|
147
|
+
features['is_success'] = 1 if 200 <= features['status_code'] < 300 else 0
|
|
148
|
+
features['is_client_error'] = 1 if 400 <= features['status_code'] < 500 else 0
|
|
149
|
+
features['is_server_error'] = 1 if 500 <= features['status_code'] < 600 else 0
|
|
150
|
+
|
|
151
|
+
# Response body features
|
|
152
|
+
body = response_data.get('body', '')
|
|
153
|
+
if isinstance(body, str):
|
|
154
|
+
features['response_length'] = len(body)
|
|
155
|
+
else:
|
|
156
|
+
features['response_length'] = 0
|
|
157
|
+
|
|
158
|
+
# Header features
|
|
159
|
+
headers = response_data.get('headers', {})
|
|
160
|
+
features['response_header_count'] = len(headers)
|
|
161
|
+
features['has_cache_header'] = 1 if 'cache-control' in headers else 0
|
|
162
|
+
|
|
163
|
+
return features
|
|
164
|
+
|
|
165
|
+
def _encode_categorical(self, value: str, field_name: str) -> int:
|
|
166
|
+
"""Encode categorical values"""
|
|
167
|
+
if field_name not in self.label_encoders:
|
|
168
|
+
self.label_encoders[field_name] = LabelEncoder()
|
|
169
|
+
# Fit with known values
|
|
170
|
+
known_values = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
|
|
171
|
+
self.label_encoders[field_name].fit(known_values)
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
return self.label_encoders[field_name].transform([value])[0]
|
|
175
|
+
except ValueError:
|
|
176
|
+
return 0 # Unknown value
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class MLModelManager:
|
|
180
|
+
"""ML model management system"""
|
|
181
|
+
|
|
182
|
+
def __init__(self):
|
|
183
|
+
self.models: Dict[str, MLModel] = {}
|
|
184
|
+
self.feature_extractor = FeatureExtractor()
|
|
185
|
+
self.training_data: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
186
|
+
self.model_lock = threading.Lock()
|
|
187
|
+
|
|
188
|
+
def add_model(self, model: MLModel) -> None:
|
|
189
|
+
"""Add a model to the manager"""
|
|
190
|
+
with self.model_lock:
|
|
191
|
+
self.models[model.name] = model
|
|
192
|
+
|
|
193
|
+
def get_model(self, name: str) -> Optional[MLModel]:
|
|
194
|
+
"""Get a model by name"""
|
|
195
|
+
return self.models.get(name)
|
|
196
|
+
|
|
197
|
+
def train_model(self, model_name: str, training_data: TrainingData) -> Dict[str, Any]:
|
|
198
|
+
"""Train a model with provided data"""
|
|
199
|
+
if model_name not in self.models:
|
|
200
|
+
return {"success": False, "error": "Model not found"}
|
|
201
|
+
|
|
202
|
+
model = self.models[model_name]
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
# Prepare features and targets
|
|
206
|
+
X = []
|
|
207
|
+
y = []
|
|
208
|
+
|
|
209
|
+
for i, features in enumerate(training_data.features):
|
|
210
|
+
X.append(list(features.values()))
|
|
211
|
+
y.append(training_data.targets[i])
|
|
212
|
+
|
|
213
|
+
X = np.array(X)
|
|
214
|
+
y = np.array(y)
|
|
215
|
+
|
|
216
|
+
# Split data
|
|
217
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
218
|
+
|
|
219
|
+
# Scale features
|
|
220
|
+
X_train_scaled = self.feature_extractor.scaler.fit_transform(X_train)
|
|
221
|
+
X_test_scaled = self.feature_extractor.scaler.transform(X_test)
|
|
222
|
+
|
|
223
|
+
# Train model
|
|
224
|
+
model.model.fit(X_train_scaled, y_train)
|
|
225
|
+
|
|
226
|
+
# Evaluate
|
|
227
|
+
y_pred = model.model.predict(X_test_scaled)
|
|
228
|
+
|
|
229
|
+
if model.model_type == MLModelType.REGRESSION:
|
|
230
|
+
from sklearn.metrics import r2_score
|
|
231
|
+
# R2 score can be negative, but it runs.
|
|
232
|
+
# For very simple/random data, it might be poor.
|
|
233
|
+
accuracy = r2_score(y_test, y_pred)
|
|
234
|
+
else:
|
|
235
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
236
|
+
|
|
237
|
+
# Update model
|
|
238
|
+
model.accuracy = accuracy
|
|
239
|
+
model.last_trained = datetime.now()
|
|
240
|
+
model.training_samples = len(training_data.features)
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
"success": True,
|
|
244
|
+
"accuracy": accuracy,
|
|
245
|
+
"training_samples": len(training_data.features),
|
|
246
|
+
"test_samples": len(X_test)
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
return {"success": False, "error": str(e)}
|
|
251
|
+
|
|
252
|
+
def predict(self, request: PredictionRequest) -> PredictionResult:
|
|
253
|
+
"""Make a prediction using a model"""
|
|
254
|
+
if request.model_name not in self.models:
|
|
255
|
+
return PredictionResult(
|
|
256
|
+
prediction=None,
|
|
257
|
+
confidence=0.0,
|
|
258
|
+
model_name=request.model_name,
|
|
259
|
+
features_used=[],
|
|
260
|
+
metadata={"error": "Model not found"}
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
model = self.models[request.model_name]
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
# Extract features
|
|
267
|
+
features = self.feature_extractor.extract_request_features(request.features)
|
|
268
|
+
feature_values = [features.get(f, 0) for f in model.features]
|
|
269
|
+
|
|
270
|
+
# Scale features
|
|
271
|
+
feature_array = np.array(feature_values).reshape(1, -1)
|
|
272
|
+
scaled_features = self.feature_extractor.scaler.transform(feature_array)
|
|
273
|
+
|
|
274
|
+
# Make prediction
|
|
275
|
+
if model.model_type == MLModelType.CLASSIFICATION:
|
|
276
|
+
prediction = model.model.predict(scaled_features)[0]
|
|
277
|
+
confidence = model.model.predict_proba(scaled_features).max()
|
|
278
|
+
else:
|
|
279
|
+
prediction = model.model.predict(scaled_features)[0]
|
|
280
|
+
confidence = 1.0 # For regression, we don't have confidence scores
|
|
281
|
+
|
|
282
|
+
return PredictionResult(
|
|
283
|
+
prediction=prediction,
|
|
284
|
+
confidence=confidence,
|
|
285
|
+
model_name=request.model_name,
|
|
286
|
+
features_used=model.features,
|
|
287
|
+
metadata={"model_accuracy": model.accuracy}
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
except Exception as e:
|
|
291
|
+
return PredictionResult(
|
|
292
|
+
prediction=None,
|
|
293
|
+
confidence=0.0,
|
|
294
|
+
model_name=request.model_name,
|
|
295
|
+
features_used=[],
|
|
296
|
+
metadata={"error": str(e)}
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def save_model(self, model_name: str, filepath: str) -> bool:
|
|
300
|
+
"""Save a model to disk"""
|
|
301
|
+
if model_name not in self.models:
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
model = self.models[model_name]
|
|
306
|
+
model_data = {
|
|
307
|
+
"name": model.name,
|
|
308
|
+
"model_type": model.model_type.value,
|
|
309
|
+
"features": model.features,
|
|
310
|
+
"target": model.target,
|
|
311
|
+
"accuracy": model.accuracy,
|
|
312
|
+
"created_at": model.created_at.isoformat(),
|
|
313
|
+
"last_trained": model.last_trained.isoformat(),
|
|
314
|
+
"training_samples": model.training_samples,
|
|
315
|
+
"metadata": model.metadata
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
# Save model and metadata
|
|
319
|
+
joblib.dump(model.model, f"{filepath}_model.pkl")
|
|
320
|
+
with open(f"{filepath}_metadata.json", 'w') as f:
|
|
321
|
+
json.dump(model_data, f, indent=2)
|
|
322
|
+
|
|
323
|
+
return True
|
|
324
|
+
except Exception as e:
|
|
325
|
+
print(f"Error saving model: {e}")
|
|
326
|
+
return False
|
|
327
|
+
|
|
328
|
+
def load_model(self, model_name: str, filepath: str) -> bool:
|
|
329
|
+
"""Load a model from disk"""
|
|
330
|
+
try:
|
|
331
|
+
# Load metadata
|
|
332
|
+
with open(f"{filepath}_metadata.json", 'r') as f:
|
|
333
|
+
model_data = json.load(f)
|
|
334
|
+
|
|
335
|
+
# Load model
|
|
336
|
+
model_obj = joblib.load(f"{filepath}_model.pkl")
|
|
337
|
+
|
|
338
|
+
# Create model
|
|
339
|
+
model = MLModel(
|
|
340
|
+
name=model_data["name"],
|
|
341
|
+
model_type=MLModelType(model_data["model_type"]),
|
|
342
|
+
model=model_obj,
|
|
343
|
+
features=model_data["features"],
|
|
344
|
+
target=model_data["target"],
|
|
345
|
+
accuracy=model_data["accuracy"],
|
|
346
|
+
created_at=datetime.fromisoformat(model_data["created_at"]),
|
|
347
|
+
last_trained=datetime.fromisoformat(model_data["last_trained"]),
|
|
348
|
+
training_samples=model_data["training_samples"],
|
|
349
|
+
metadata=model_data["metadata"]
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
self.add_model(model)
|
|
353
|
+
return True
|
|
354
|
+
|
|
355
|
+
except Exception as e:
|
|
356
|
+
print(f"Error loading model: {e}")
|
|
357
|
+
return False
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class AnomalyDetector:
|
|
361
|
+
"""Anomaly detection system"""
|
|
362
|
+
|
|
363
|
+
def __init__(self):
|
|
364
|
+
self.isolation_forest = IsolationForest(contamination=0.1, random_state=42)
|
|
365
|
+
self.is_fitted = False
|
|
366
|
+
self.feature_extractor = FeatureExtractor()
|
|
367
|
+
|
|
368
|
+
def fit(self, normal_data: List[Dict[str, Any]]) -> None:
|
|
369
|
+
"""Fit the anomaly detector with normal data"""
|
|
370
|
+
features = []
|
|
371
|
+
for data in normal_data:
|
|
372
|
+
features.append(list(self.feature_extractor.extract_request_features(data).values()))
|
|
373
|
+
|
|
374
|
+
X = np.array(features)
|
|
375
|
+
self.isolation_forest.fit(X)
|
|
376
|
+
self.is_fitted = True
|
|
377
|
+
|
|
378
|
+
def detect_anomaly(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
379
|
+
"""Detect if request is anomalous"""
|
|
380
|
+
if not self.is_fitted:
|
|
381
|
+
return {"is_anomaly": False, "score": 0.0, "message": "Model not fitted"}
|
|
382
|
+
|
|
383
|
+
features = self.feature_extractor.extract_request_features(request_data)
|
|
384
|
+
feature_values = list(features.values())
|
|
385
|
+
X = np.array(feature_values).reshape(1, -1)
|
|
386
|
+
|
|
387
|
+
anomaly_score = self.isolation_forest.decision_function(X)[0]
|
|
388
|
+
is_anomaly = self.isolation_forest.predict(X)[0] == -1
|
|
389
|
+
|
|
390
|
+
return {
|
|
391
|
+
"is_anomaly": bool(is_anomaly),
|
|
392
|
+
"score": float(anomaly_score),
|
|
393
|
+
"confidence": abs(anomaly_score),
|
|
394
|
+
"features": features
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class SmartCache:
|
|
399
|
+
"""ML-powered smart caching system"""
|
|
400
|
+
|
|
401
|
+
def __init__(self, max_size: int = 1000):
|
|
402
|
+
self.cache: Dict[str, Any] = {}
|
|
403
|
+
self.access_times: Dict[str, datetime] = {}
|
|
404
|
+
self.access_counts: Dict[str, int] = defaultdict(int)
|
|
405
|
+
self.max_size = max_size
|
|
406
|
+
self.ml_manager = MLModelManager()
|
|
407
|
+
self._setup_cache_model()
|
|
408
|
+
|
|
409
|
+
def _setup_cache_model(self) -> None:
|
|
410
|
+
"""Setup ML model for cache prediction"""
|
|
411
|
+
# Create a simple model to predict cache hit probability
|
|
412
|
+
model = MLModel(
|
|
413
|
+
name="cache_predictor",
|
|
414
|
+
model_type=MLModelType.CLASSIFICATION,
|
|
415
|
+
model=RandomForestClassifier(n_estimators=100, random_state=42),
|
|
416
|
+
features=['path_length', 'has_query_params', 'hour_of_day', 'day_of_week'],
|
|
417
|
+
target="cache_hit"
|
|
418
|
+
)
|
|
419
|
+
self.ml_manager.add_model(model)
|
|
420
|
+
|
|
421
|
+
def get(self, key: str) -> Optional[Any]:
|
|
422
|
+
"""Get value from cache"""
|
|
423
|
+
if key in self.cache:
|
|
424
|
+
self.access_times[key] = datetime.now()
|
|
425
|
+
self.access_counts[key] += 1
|
|
426
|
+
return self.cache[key]
|
|
427
|
+
return None
|
|
428
|
+
|
|
429
|
+
def set(self, key: str, value: Any, ttl: int = None) -> None:
|
|
430
|
+
"""Set value in cache"""
|
|
431
|
+
if len(self.cache) >= self.max_size:
|
|
432
|
+
self._evict_least_used()
|
|
433
|
+
|
|
434
|
+
self.cache[key] = value
|
|
435
|
+
self.access_times[key] = datetime.now()
|
|
436
|
+
self.access_counts[key] = 0
|
|
437
|
+
|
|
438
|
+
def predict_cache_hit(self, request_data: Dict[str, Any]) -> float:
|
|
439
|
+
"""Predict cache hit probability for a request"""
|
|
440
|
+
features = self.ml_manager.feature_extractor.extract_request_features(request_data)
|
|
441
|
+
|
|
442
|
+
# Use only features available in the model
|
|
443
|
+
model_features = {k: features.get(k, 0) for k in self.ml_manager.get_model("cache_predictor").features}
|
|
444
|
+
|
|
445
|
+
request = PredictionRequest(
|
|
446
|
+
features=model_features,
|
|
447
|
+
model_name="cache_predictor",
|
|
448
|
+
prediction_type=PredictionType.CACHE_HIT
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
result = self.ml_manager.predict(request)
|
|
452
|
+
return result.confidence if result.prediction else 0.0
|
|
453
|
+
|
|
454
|
+
def _evict_least_used(self) -> None:
|
|
455
|
+
"""Evict least used items from cache"""
|
|
456
|
+
if not self.cache:
|
|
457
|
+
return
|
|
458
|
+
|
|
459
|
+
# Find item with lowest access count and oldest access time
|
|
460
|
+
least_used_key = min(
|
|
461
|
+
self.cache.keys(),
|
|
462
|
+
key=lambda k: (self.access_counts[k], self.access_times[k])
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
del self.cache[least_used_key]
|
|
466
|
+
del self.access_times[least_used_key]
|
|
467
|
+
del self.access_counts[least_used_key]
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
class MLIntegration:
|
|
471
|
+
"""Main ML integration system"""
|
|
472
|
+
|
|
473
|
+
def __init__(self):
|
|
474
|
+
self.model_manager = MLModelManager()
|
|
475
|
+
self.anomaly_detector = AnomalyDetector()
|
|
476
|
+
self.smart_cache = SmartCache()
|
|
477
|
+
self.request_history: deque = deque(maxlen=10000)
|
|
478
|
+
self.response_patterns: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
479
|
+
|
|
480
|
+
def record_request(self, request_data: Dict[str, Any], response_data: Dict[str, Any]) -> None:
|
|
481
|
+
"""Record request-response pair for ML training"""
|
|
482
|
+
self.request_history.append({
|
|
483
|
+
"request": request_data,
|
|
484
|
+
"response": response_data,
|
|
485
|
+
"timestamp": datetime.now()
|
|
486
|
+
})
|
|
487
|
+
|
|
488
|
+
# Update response patterns
|
|
489
|
+
path = request_data.get('path', '')
|
|
490
|
+
self.response_patterns[path].append(response_data)
|
|
491
|
+
|
|
492
|
+
def create_response_time_model(self) -> MLModel:
|
|
493
|
+
"""Create a model to predict response times"""
|
|
494
|
+
model = MLModel(
|
|
495
|
+
name="response_time_predictor",
|
|
496
|
+
model_type=MLModelType.REGRESSION,
|
|
497
|
+
model=LinearRegression(),
|
|
498
|
+
features=['method_encoded', 'path_length', 'body_length', 'header_count', 'hour_of_day'],
|
|
499
|
+
target="response_time"
|
|
500
|
+
)
|
|
501
|
+
self.model_manager.add_model(model)
|
|
502
|
+
return model
|
|
503
|
+
|
|
504
|
+
def create_error_probability_model(self) -> MLModel:
|
|
505
|
+
"""Create a model to predict error probability"""
|
|
506
|
+
model = MLModel(
|
|
507
|
+
name="error_probability_predictor",
|
|
508
|
+
model_type=MLModelType.CLASSIFICATION,
|
|
509
|
+
model=MLPClassifier(hidden_layer_sizes=(100, 50), random_state=42),
|
|
510
|
+
features=['method_encoded', 'path_length', 'body_length', 'hour_of_day', 'day_of_week'],
|
|
511
|
+
target="error_probability"
|
|
512
|
+
)
|
|
513
|
+
self.model_manager.add_model(model)
|
|
514
|
+
return model
|
|
515
|
+
|
|
516
|
+
def train_models(self) -> Dict[str, Any]:
|
|
517
|
+
"""Train all models with collected data"""
|
|
518
|
+
results = {}
|
|
519
|
+
|
|
520
|
+
# Prepare training data
|
|
521
|
+
if len(self.request_history) < 100:
|
|
522
|
+
return {"error": "Insufficient training data"}
|
|
523
|
+
|
|
524
|
+
# Response time model
|
|
525
|
+
response_time_model = self.create_response_time_model()
|
|
526
|
+
response_time_data = self._prepare_response_time_data()
|
|
527
|
+
if response_time_data:
|
|
528
|
+
results["response_time"] = self.model_manager.train_model(
|
|
529
|
+
"response_time_predictor", response_time_data
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# Error probability model
|
|
533
|
+
error_model = self.create_error_probability_model()
|
|
534
|
+
error_data = self._prepare_error_data()
|
|
535
|
+
if error_data:
|
|
536
|
+
results["error_probability"] = self.model_manager.train_model(
|
|
537
|
+
"error_probability_predictor", error_data
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
# Anomaly detector
|
|
541
|
+
normal_data = [item["request"] for item in self.request_history]
|
|
542
|
+
self.anomaly_detector.fit(normal_data)
|
|
543
|
+
results["anomaly_detector"] = {"fitted": True}
|
|
544
|
+
|
|
545
|
+
return results
|
|
546
|
+
|
|
547
|
+
def _prepare_response_time_data(self) -> Optional[TrainingData]:
|
|
548
|
+
"""Prepare training data for response time prediction"""
|
|
549
|
+
features = []
|
|
550
|
+
targets = []
|
|
551
|
+
|
|
552
|
+
for item in self.request_history:
|
|
553
|
+
request = item["request"]
|
|
554
|
+
response = item["response"]
|
|
555
|
+
|
|
556
|
+
# Extract features
|
|
557
|
+
request_features = self.model_manager.feature_extractor.extract_request_features(request)
|
|
558
|
+
response_features = self.model_manager.feature_extractor.extract_response_features(response)
|
|
559
|
+
|
|
560
|
+
# Combine features
|
|
561
|
+
combined_features = {**request_features, **response_features}
|
|
562
|
+
features.append(combined_features)
|
|
563
|
+
|
|
564
|
+
# Target is response time (simulated)
|
|
565
|
+
targets.append(np.random.uniform(0.1, 2.0)) # Simulated response time
|
|
566
|
+
|
|
567
|
+
if not features:
|
|
568
|
+
return None
|
|
569
|
+
|
|
570
|
+
return TrainingData(
|
|
571
|
+
features=features,
|
|
572
|
+
targets=targets,
|
|
573
|
+
feature_names=list(features[0].keys()),
|
|
574
|
+
target_name="response_time"
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
def _prepare_error_data(self) -> Optional[TrainingData]:
|
|
578
|
+
"""Prepare training data for error probability prediction"""
|
|
579
|
+
features = []
|
|
580
|
+
targets = []
|
|
581
|
+
|
|
582
|
+
for item in self.request_history:
|
|
583
|
+
request = item["request"]
|
|
584
|
+
response = item["response"]
|
|
585
|
+
|
|
586
|
+
# Extract features
|
|
587
|
+
request_features = self.model_manager.feature_extractor.extract_request_features(request)
|
|
588
|
+
features.append(request_features)
|
|
589
|
+
|
|
590
|
+
# Target is error probability (1 if error, 0 if success)
|
|
591
|
+
is_error = response.get("status_code", 200) >= 400
|
|
592
|
+
targets.append(1 if is_error else 0)
|
|
593
|
+
|
|
594
|
+
if not features:
|
|
595
|
+
return None
|
|
596
|
+
|
|
597
|
+
return TrainingData(
|
|
598
|
+
features=features,
|
|
599
|
+
targets=targets,
|
|
600
|
+
feature_names=list(features[0].keys()),
|
|
601
|
+
target_name="error_probability"
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
def predict_response_time(self, request_data: Dict[str, Any]) -> float:
|
|
605
|
+
"""Predict response time for a request"""
|
|
606
|
+
request = PredictionRequest(
|
|
607
|
+
features=request_data,
|
|
608
|
+
model_name="response_time_predictor",
|
|
609
|
+
prediction_type=PredictionType.RESPONSE_TIME
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
result = self.model_manager.predict(request)
|
|
613
|
+
return result.prediction if result.prediction else 1.0
|
|
614
|
+
|
|
615
|
+
def predict_error_probability(self, request_data: Dict[str, Any]) -> float:
|
|
616
|
+
"""Predict error probability for a request"""
|
|
617
|
+
request = PredictionRequest(
|
|
618
|
+
features=request_data,
|
|
619
|
+
model_name="error_probability_predictor",
|
|
620
|
+
prediction_type=PredictionType.ERROR_PROBABILITY
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
result = self.model_manager.predict(request)
|
|
624
|
+
return result.confidence if result.prediction else 0.0
|
|
625
|
+
|
|
626
|
+
def detect_anomaly(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
627
|
+
"""Detect anomalies in request"""
|
|
628
|
+
return self.anomaly_detector.detect_anomaly(request_data)
|
|
629
|
+
|
|
630
|
+
def get_cache_recommendation(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
631
|
+
"""Get cache recommendation for a request"""
|
|
632
|
+
cache_hit_probability = self.smart_cache.predict_cache_hit(request_data)
|
|
633
|
+
|
|
634
|
+
return {
|
|
635
|
+
"should_cache": cache_hit_probability > 0.7,
|
|
636
|
+
"cache_hit_probability": cache_hit_probability,
|
|
637
|
+
"recommended_ttl": int(3600 * cache_hit_probability) # TTL in seconds
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
def generate_smart_response(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
641
|
+
"""Generate a smart response using ML predictions"""
|
|
642
|
+
# Predict response characteristics
|
|
643
|
+
response_time = self.predict_response_time(request_data)
|
|
644
|
+
error_probability = self.predict_error_probability(request_data)
|
|
645
|
+
anomaly_result = self.detect_anomaly(request_data)
|
|
646
|
+
cache_recommendation = self.get_cache_recommendation(request_data)
|
|
647
|
+
|
|
648
|
+
# Generate response based on predictions
|
|
649
|
+
if error_probability > 0.5:
|
|
650
|
+
status_code = np.random.choice([400, 401, 403, 404, 500], p=[0.3, 0.2, 0.1, 0.3, 0.1])
|
|
651
|
+
else:
|
|
652
|
+
status_code = 200
|
|
653
|
+
|
|
654
|
+
response = {
|
|
655
|
+
"status_code": status_code,
|
|
656
|
+
"headers": {
|
|
657
|
+
"Content-Type": "application/json",
|
|
658
|
+
"X-ML-Predicted": "true",
|
|
659
|
+
"X-Response-Time": str(response_time),
|
|
660
|
+
"X-Error-Probability": str(error_probability)
|
|
661
|
+
},
|
|
662
|
+
"body": {
|
|
663
|
+
"message": "ML-generated response",
|
|
664
|
+
"predicted_response_time": response_time,
|
|
665
|
+
"error_probability": error_probability,
|
|
666
|
+
"is_anomaly": anomaly_result["is_anomaly"],
|
|
667
|
+
"cache_recommendation": cache_recommendation
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
# Add cache headers if recommended
|
|
672
|
+
if cache_recommendation["should_cache"]:
|
|
673
|
+
response["headers"]["Cache-Control"] = f"max-age={cache_recommendation['recommended_ttl']}"
|
|
674
|
+
|
|
675
|
+
return response
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
# Global ML integration instance
|
|
679
|
+
ml_integration = MLIntegration()
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
# Convenience functions
|
|
683
|
+
def create_ml_model(name: str, model_type: MLModelType, features: List[str], target: str) -> MLModel:
|
|
684
|
+
"""Create a new ML model"""
|
|
685
|
+
if model_type == MLModelType.CLASSIFICATION:
|
|
686
|
+
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
|
687
|
+
elif model_type == MLModelType.REGRESSION:
|
|
688
|
+
model = LinearRegression()
|
|
689
|
+
else:
|
|
690
|
+
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
|
691
|
+
|
|
692
|
+
ml_model = MLModel(
|
|
693
|
+
name=name,
|
|
694
|
+
model_type=model_type,
|
|
695
|
+
model=model,
|
|
696
|
+
features=features,
|
|
697
|
+
target=target
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
ml_integration.model_manager.add_model(ml_model)
|
|
701
|
+
return ml_model
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def train_ml_models() -> Dict[str, Any]:
|
|
705
|
+
"""Train all ML models"""
|
|
706
|
+
return ml_integration.train_models()
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
def predict_response_characteristics(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
710
|
+
"""Predict response characteristics for a request"""
|
|
711
|
+
return {
|
|
712
|
+
"response_time": ml_integration.predict_response_time(request_data),
|
|
713
|
+
"error_probability": ml_integration.predict_error_probability(request_data),
|
|
714
|
+
"anomaly_detection": ml_integration.detect_anomaly(request_data),
|
|
715
|
+
"cache_recommendation": ml_integration.get_cache_recommendation(request_data)
|
|
716
|
+
}
|