createsonline 0.1.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- createsonline/__init__.py +46 -0
- createsonline/admin/__init__.py +7 -0
- createsonline/admin/content.py +526 -0
- createsonline/admin/crud.py +805 -0
- createsonline/admin/field_builder.py +559 -0
- createsonline/admin/integration.py +482 -0
- createsonline/admin/interface.py +2562 -0
- createsonline/admin/model_creator.py +513 -0
- createsonline/admin/model_manager.py +388 -0
- createsonline/admin/modern_dashboard.py +498 -0
- createsonline/admin/permissions.py +264 -0
- createsonline/admin/user_forms.py +594 -0
- createsonline/ai/__init__.py +202 -0
- createsonline/ai/fields.py +1226 -0
- createsonline/ai/orm.py +325 -0
- createsonline/ai/services.py +1244 -0
- createsonline/app.py +506 -0
- createsonline/auth/__init__.py +8 -0
- createsonline/auth/management.py +228 -0
- createsonline/auth/models.py +552 -0
- createsonline/cli/__init__.py +5 -0
- createsonline/cli/commands/__init__.py +122 -0
- createsonline/cli/commands/database.py +416 -0
- createsonline/cli/commands/info.py +173 -0
- createsonline/cli/commands/initdb.py +218 -0
- createsonline/cli/commands/project.py +545 -0
- createsonline/cli/commands/serve.py +173 -0
- createsonline/cli/commands/shell.py +93 -0
- createsonline/cli/commands/users.py +148 -0
- createsonline/cli/main.py +2041 -0
- createsonline/cli/manage.py +274 -0
- createsonline/config/__init__.py +9 -0
- createsonline/config/app.py +2577 -0
- createsonline/config/database.py +179 -0
- createsonline/config/docs.py +384 -0
- createsonline/config/errors.py +160 -0
- createsonline/config/orm.py +43 -0
- createsonline/config/request.py +93 -0
- createsonline/config/settings.py +176 -0
- createsonline/data/__init__.py +23 -0
- createsonline/data/dataframe.py +925 -0
- createsonline/data/io.py +453 -0
- createsonline/data/series.py +557 -0
- createsonline/database/__init__.py +60 -0
- createsonline/database/abstraction.py +440 -0
- createsonline/database/assistant.py +585 -0
- createsonline/database/fields.py +442 -0
- createsonline/database/migrations.py +132 -0
- createsonline/database/models.py +604 -0
- createsonline/database.py +438 -0
- createsonline/http/__init__.py +28 -0
- createsonline/http/client.py +535 -0
- createsonline/ml/__init__.py +55 -0
- createsonline/ml/classification.py +552 -0
- createsonline/ml/clustering.py +680 -0
- createsonline/ml/metrics.py +542 -0
- createsonline/ml/neural.py +560 -0
- createsonline/ml/preprocessing.py +784 -0
- createsonline/ml/regression.py +501 -0
- createsonline/performance/__init__.py +19 -0
- createsonline/performance/cache.py +444 -0
- createsonline/performance/compression.py +335 -0
- createsonline/performance/core.py +419 -0
- createsonline/project_init.py +789 -0
- createsonline/routing.py +528 -0
- createsonline/security/__init__.py +34 -0
- createsonline/security/core.py +811 -0
- createsonline/security/encryption.py +349 -0
- createsonline/server.py +295 -0
- createsonline/static/css/admin.css +263 -0
- createsonline/static/css/common.css +358 -0
- createsonline/static/css/dashboard.css +89 -0
- createsonline/static/favicon.ico +0 -0
- createsonline/static/icons/icon-128x128.png +0 -0
- createsonline/static/icons/icon-128x128.webp +0 -0
- createsonline/static/icons/icon-16x16.png +0 -0
- createsonline/static/icons/icon-16x16.webp +0 -0
- createsonline/static/icons/icon-180x180.png +0 -0
- createsonline/static/icons/icon-180x180.webp +0 -0
- createsonline/static/icons/icon-192x192.png +0 -0
- createsonline/static/icons/icon-192x192.webp +0 -0
- createsonline/static/icons/icon-256x256.png +0 -0
- createsonline/static/icons/icon-256x256.webp +0 -0
- createsonline/static/icons/icon-32x32.png +0 -0
- createsonline/static/icons/icon-32x32.webp +0 -0
- createsonline/static/icons/icon-384x384.png +0 -0
- createsonline/static/icons/icon-384x384.webp +0 -0
- createsonline/static/icons/icon-48x48.png +0 -0
- createsonline/static/icons/icon-48x48.webp +0 -0
- createsonline/static/icons/icon-512x512.png +0 -0
- createsonline/static/icons/icon-512x512.webp +0 -0
- createsonline/static/icons/icon-64x64.png +0 -0
- createsonline/static/icons/icon-64x64.webp +0 -0
- createsonline/static/image/android-chrome-192x192.png +0 -0
- createsonline/static/image/android-chrome-512x512.png +0 -0
- createsonline/static/image/apple-touch-icon.png +0 -0
- createsonline/static/image/favicon-16x16.png +0 -0
- createsonline/static/image/favicon-32x32.png +0 -0
- createsonline/static/image/favicon.ico +0 -0
- createsonline/static/image/favicon.svg +17 -0
- createsonline/static/image/icon-128x128.png +0 -0
- createsonline/static/image/icon-128x128.webp +0 -0
- createsonline/static/image/icon-16x16.png +0 -0
- createsonline/static/image/icon-16x16.webp +0 -0
- createsonline/static/image/icon-180x180.png +0 -0
- createsonline/static/image/icon-180x180.webp +0 -0
- createsonline/static/image/icon-192x192.png +0 -0
- createsonline/static/image/icon-192x192.webp +0 -0
- createsonline/static/image/icon-256x256.png +0 -0
- createsonline/static/image/icon-256x256.webp +0 -0
- createsonline/static/image/icon-32x32.png +0 -0
- createsonline/static/image/icon-32x32.webp +0 -0
- createsonline/static/image/icon-384x384.png +0 -0
- createsonline/static/image/icon-384x384.webp +0 -0
- createsonline/static/image/icon-48x48.png +0 -0
- createsonline/static/image/icon-48x48.webp +0 -0
- createsonline/static/image/icon-512x512.png +0 -0
- createsonline/static/image/icon-512x512.webp +0 -0
- createsonline/static/image/icon-64x64.png +0 -0
- createsonline/static/image/icon-64x64.webp +0 -0
- createsonline/static/image/logo-header-h100.png +0 -0
- createsonline/static/image/logo-header-h100.webp +0 -0
- createsonline/static/image/logo-header-h200@2x.png +0 -0
- createsonline/static/image/logo-header-h200@2x.webp +0 -0
- createsonline/static/image/logo.png +0 -0
- createsonline/static/js/admin.js +274 -0
- createsonline/static/site.webmanifest +35 -0
- createsonline/static/templates/admin/base.html +87 -0
- createsonline/static/templates/admin/dashboard.html +217 -0
- createsonline/static/templates/admin/model_form.html +270 -0
- createsonline/static/templates/admin/model_list.html +202 -0
- createsonline/static/test_script.js +15 -0
- createsonline/static/test_styles.css +59 -0
- createsonline/static_files.py +365 -0
- createsonline/templates/404.html +100 -0
- createsonline/templates/admin_login.html +169 -0
- createsonline/templates/base.html +102 -0
- createsonline/templates/index.html +151 -0
- createsonline/templates.py +205 -0
- createsonline/testing.py +322 -0
- createsonline/utils.py +448 -0
- createsonline/validation/__init__.py +49 -0
- createsonline/validation/fields.py +598 -0
- createsonline/validation/models.py +504 -0
- createsonline/validation/validators.py +561 -0
- createsonline/views.py +184 -0
- createsonline-0.1.26.dist-info/METADATA +46 -0
- createsonline-0.1.26.dist-info/RECORD +152 -0
- createsonline-0.1.26.dist-info/WHEEL +5 -0
- createsonline-0.1.26.dist-info/entry_points.txt +2 -0
- createsonline-0.1.26.dist-info/licenses/LICENSE +21 -0
- createsonline-0.1.26.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CREATESONLINE ML Metrics
|
|
3
|
+
|
|
4
|
+
Pure Python evaluation metrics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from typing import Union, Dict, Any, List, Optional
|
|
9
|
+
import math
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def accuracy_score(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
|
|
13
|
+
"""
|
|
14
|
+
Calculate accuracy score
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
y_true: True labels
|
|
18
|
+
y_pred: Predicted labels
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Accuracy score (0-1)
|
|
22
|
+
"""
|
|
23
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
24
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
25
|
+
|
|
26
|
+
if len(y_true) != len(y_pred):
|
|
27
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
28
|
+
|
|
29
|
+
return np.mean(y_true == y_pred)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def precision_score(
|
|
33
|
+
y_true: Union[np.ndarray, list],
|
|
34
|
+
y_pred: Union[np.ndarray, list],
|
|
35
|
+
average: str = 'binary',
|
|
36
|
+
pos_label: Union[str, int] = 1
|
|
37
|
+
) -> Union[float, np.ndarray]:
|
|
38
|
+
"""
|
|
39
|
+
Calculate precision score
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
y_true: True labels
|
|
43
|
+
y_pred: Predicted labels
|
|
44
|
+
average: Averaging strategy ('binary', 'micro', 'macro', 'weighted', None)
|
|
45
|
+
pos_label: Positive class label for binary classification
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Precision score(s)
|
|
49
|
+
"""
|
|
50
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
51
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
52
|
+
|
|
53
|
+
if len(y_true) != len(y_pred):
|
|
54
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
55
|
+
|
|
56
|
+
classes = np.unique(np.concatenate([y_true, y_pred]))
|
|
57
|
+
|
|
58
|
+
if average == 'binary':
|
|
59
|
+
if len(classes) > 2:
|
|
60
|
+
raise ValueError("Binary classification requires exactly 2 classes")
|
|
61
|
+
|
|
62
|
+
tp = np.sum((y_true == pos_label) & (y_pred == pos_label))
|
|
63
|
+
fp = np.sum((y_true != pos_label) & (y_pred == pos_label))
|
|
64
|
+
|
|
65
|
+
return tp / (tp + fp) if (tp + fp) > 0 else 0.0
|
|
66
|
+
|
|
67
|
+
# Multi-class metrics
|
|
68
|
+
precisions = []
|
|
69
|
+
for cls in classes:
|
|
70
|
+
tp = np.sum((y_true == cls) & (y_pred == cls))
|
|
71
|
+
fp = np.sum((y_true != cls) & (y_pred == cls))
|
|
72
|
+
|
|
73
|
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
|
|
74
|
+
precisions.append(precision)
|
|
75
|
+
|
|
76
|
+
precisions = np.array(precisions)
|
|
77
|
+
|
|
78
|
+
if average is None:
|
|
79
|
+
return precisions
|
|
80
|
+
elif average == 'macro':
|
|
81
|
+
return np.mean(precisions)
|
|
82
|
+
elif average == 'micro':
|
|
83
|
+
tp_total = sum(np.sum((y_true == cls) & (y_pred == cls)) for cls in classes)
|
|
84
|
+
fp_total = sum(np.sum((y_true != cls) & (y_pred == cls)) for cls in classes)
|
|
85
|
+
return tp_total / (tp_total + fp_total) if (tp_total + fp_total) > 0 else 0.0
|
|
86
|
+
elif average == 'weighted':
|
|
87
|
+
weights = [np.sum(y_true == cls) for cls in classes]
|
|
88
|
+
return np.average(precisions, weights=weights)
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError(f"Unknown average: {average}")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def recall_score(
|
|
94
|
+
y_true: Union[np.ndarray, list],
|
|
95
|
+
y_pred: Union[np.ndarray, list],
|
|
96
|
+
average: str = 'binary',
|
|
97
|
+
pos_label: Union[str, int] = 1
|
|
98
|
+
) -> Union[float, np.ndarray]:
|
|
99
|
+
"""
|
|
100
|
+
Calculate recall score
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
y_true: True labels
|
|
104
|
+
y_pred: Predicted labels
|
|
105
|
+
average: Averaging strategy ('binary', 'micro', 'macro', 'weighted', None)
|
|
106
|
+
pos_label: Positive class label for binary classification
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Recall score(s)
|
|
110
|
+
"""
|
|
111
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
112
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
113
|
+
|
|
114
|
+
if len(y_true) != len(y_pred):
|
|
115
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
116
|
+
|
|
117
|
+
classes = np.unique(np.concatenate([y_true, y_pred]))
|
|
118
|
+
|
|
119
|
+
if average == 'binary':
|
|
120
|
+
if len(classes) > 2:
|
|
121
|
+
raise ValueError("Binary classification requires exactly 2 classes")
|
|
122
|
+
|
|
123
|
+
tp = np.sum((y_true == pos_label) & (y_pred == pos_label))
|
|
124
|
+
fn = np.sum((y_true == pos_label) & (y_pred != pos_label))
|
|
125
|
+
|
|
126
|
+
return tp / (tp + fn) if (tp + fn) > 0 else 0.0
|
|
127
|
+
|
|
128
|
+
# Multi-class metrics
|
|
129
|
+
recalls = []
|
|
130
|
+
for cls in classes:
|
|
131
|
+
tp = np.sum((y_true == cls) & (y_pred == cls))
|
|
132
|
+
fn = np.sum((y_true == cls) & (y_pred != cls))
|
|
133
|
+
|
|
134
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
|
|
135
|
+
recalls.append(recall)
|
|
136
|
+
|
|
137
|
+
recalls = np.array(recalls)
|
|
138
|
+
|
|
139
|
+
if average is None:
|
|
140
|
+
return recalls
|
|
141
|
+
elif average == 'macro':
|
|
142
|
+
return np.mean(recalls)
|
|
143
|
+
elif average == 'micro':
|
|
144
|
+
tp_total = sum(np.sum((y_true == cls) & (y_pred == cls)) for cls in classes)
|
|
145
|
+
fn_total = sum(np.sum((y_true == cls) & (y_pred != cls)) for cls in classes)
|
|
146
|
+
return tp_total / (tp_total + fn_total) if (tp_total + fn_total) > 0 else 0.0
|
|
147
|
+
elif average == 'weighted':
|
|
148
|
+
weights = [np.sum(y_true == cls) for cls in classes]
|
|
149
|
+
return np.average(recalls, weights=weights)
|
|
150
|
+
else:
|
|
151
|
+
raise ValueError(f"Unknown average: {average}")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def f1_score(
|
|
155
|
+
y_true: Union[np.ndarray, list],
|
|
156
|
+
y_pred: Union[np.ndarray, list],
|
|
157
|
+
average: str = 'binary',
|
|
158
|
+
pos_label: Union[str, int] = 1
|
|
159
|
+
) -> Union[float, np.ndarray]:
|
|
160
|
+
"""
|
|
161
|
+
Calculate F1 score (harmonic mean of precision and recall)
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
y_true: True labels
|
|
165
|
+
y_pred: Predicted labels
|
|
166
|
+
average: Averaging strategy ('binary', 'micro', 'macro', 'weighted', None)
|
|
167
|
+
pos_label: Positive class label for binary classification
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
F1 score(s)
|
|
171
|
+
"""
|
|
172
|
+
precision = precision_score(y_true, y_pred, average=average, pos_label=pos_label)
|
|
173
|
+
recall = recall_score(y_true, y_pred, average=average, pos_label=pos_label)
|
|
174
|
+
|
|
175
|
+
if isinstance(precision, np.ndarray):
|
|
176
|
+
# Handle array case
|
|
177
|
+
f1_scores = np.zeros_like(precision)
|
|
178
|
+
mask = (precision + recall) > 0
|
|
179
|
+
f1_scores[mask] = 2 * precision[mask] * recall[mask] / (precision[mask] + recall[mask])
|
|
180
|
+
return f1_scores
|
|
181
|
+
else:
|
|
182
|
+
# Handle scalar case
|
|
183
|
+
return 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def confusion_matrix(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> np.ndarray:
|
|
187
|
+
"""
|
|
188
|
+
Calculate confusion matrix
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
y_true: True labels
|
|
192
|
+
y_pred: Predicted labels
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Confusion matrix (n_classes, n_classes)
|
|
196
|
+
"""
|
|
197
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
198
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
199
|
+
|
|
200
|
+
if len(y_true) != len(y_pred):
|
|
201
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
202
|
+
|
|
203
|
+
classes = np.unique(np.concatenate([y_true, y_pred]))
|
|
204
|
+
n_classes = len(classes)
|
|
205
|
+
|
|
206
|
+
# Create class to index mapping
|
|
207
|
+
class_to_idx = {cls: i for i, cls in enumerate(classes)}
|
|
208
|
+
|
|
209
|
+
# Initialize confusion matrix
|
|
210
|
+
cm = np.zeros((n_classes, n_classes), dtype=int)
|
|
211
|
+
|
|
212
|
+
# Fill confusion matrix
|
|
213
|
+
for true_label, pred_label in zip(y_true, y_pred):
|
|
214
|
+
true_idx = class_to_idx[true_label]
|
|
215
|
+
pred_idx = class_to_idx[pred_label]
|
|
216
|
+
cm[true_idx, pred_idx] += 1
|
|
217
|
+
|
|
218
|
+
return cm
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def classification_report(
|
|
222
|
+
y_true: Union[np.ndarray, list],
|
|
223
|
+
y_pred: Union[np.ndarray, list],
|
|
224
|
+
target_names: Optional[List[str]] = None
|
|
225
|
+
) -> Dict[str, Any]:
|
|
226
|
+
"""
|
|
227
|
+
Generate classification report with precision, recall, F1-score for each class
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
y_true: True labels
|
|
231
|
+
y_pred: Predicted labels
|
|
232
|
+
target_names: Optional names for classes
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Classification report dictionary
|
|
236
|
+
"""
|
|
237
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
238
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
239
|
+
|
|
240
|
+
classes = np.unique(np.concatenate([y_true, y_pred]))
|
|
241
|
+
|
|
242
|
+
if target_names is None:
|
|
243
|
+
target_names = [str(cls) for cls in classes]
|
|
244
|
+
elif len(target_names) != len(classes):
|
|
245
|
+
raise ValueError("target_names length must match number of classes")
|
|
246
|
+
|
|
247
|
+
# Calculate metrics for each class
|
|
248
|
+
precisions = precision_score(y_true, y_pred, average=None)
|
|
249
|
+
recalls = recall_score(y_true, y_pred, average=None)
|
|
250
|
+
f1_scores = f1_score(y_true, y_pred, average=None)
|
|
251
|
+
|
|
252
|
+
# Calculate support (number of true instances for each class)
|
|
253
|
+
supports = [np.sum(y_true == cls) for cls in classes]
|
|
254
|
+
|
|
255
|
+
# Build report
|
|
256
|
+
report = {}
|
|
257
|
+
|
|
258
|
+
for i, (cls, name) in enumerate(zip(classes, target_names)):
|
|
259
|
+
report[name] = {
|
|
260
|
+
'precision': float(precisions[i]),
|
|
261
|
+
'recall': float(recalls[i]),
|
|
262
|
+
'f1-score': float(f1_scores[i]),
|
|
263
|
+
'support': int(supports[i])
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
# Calculate macro averages
|
|
267
|
+
report['macro avg'] = {
|
|
268
|
+
'precision': float(np.mean(precisions)),
|
|
269
|
+
'recall': float(np.mean(recalls)),
|
|
270
|
+
'f1-score': float(np.mean(f1_scores)),
|
|
271
|
+
'support': int(np.sum(supports))
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
# Calculate weighted averages
|
|
275
|
+
weights = np.array(supports) / np.sum(supports)
|
|
276
|
+
report['weighted avg'] = {
|
|
277
|
+
'precision': float(np.average(precisions, weights=weights)),
|
|
278
|
+
'recall': float(np.average(recalls, weights=weights)),
|
|
279
|
+
'f1-score': float(np.average(f1_scores, weights=weights)),
|
|
280
|
+
'support': int(np.sum(supports))
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
# Overall accuracy
|
|
284
|
+
report['accuracy'] = float(accuracy_score(y_true, y_pred))
|
|
285
|
+
|
|
286
|
+
return report
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# Regression Metrics
|
|
290
|
+
|
|
291
|
+
def mean_squared_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
|
|
292
|
+
"""
|
|
293
|
+
Calculate Mean Squared Error
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
y_true: True values
|
|
297
|
+
y_pred: Predicted values
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Mean squared error
|
|
301
|
+
"""
|
|
302
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
303
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
304
|
+
|
|
305
|
+
if len(y_true) != len(y_pred):
|
|
306
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
307
|
+
|
|
308
|
+
return np.mean((y_true - y_pred) ** 2)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def mean_absolute_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
|
|
312
|
+
"""
|
|
313
|
+
Calculate Mean Absolute Error
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
y_true: True values
|
|
317
|
+
y_pred: Predicted values
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Mean absolute error
|
|
321
|
+
"""
|
|
322
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
323
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
324
|
+
|
|
325
|
+
if len(y_true) != len(y_pred):
|
|
326
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
327
|
+
|
|
328
|
+
return np.mean(np.abs(y_true - y_pred))
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def root_mean_squared_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
|
|
332
|
+
"""
|
|
333
|
+
Calculate Root Mean Squared Error
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
y_true: True values
|
|
337
|
+
y_pred: Predicted values
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Root mean squared error
|
|
341
|
+
"""
|
|
342
|
+
return np.sqrt(mean_squared_error(y_true, y_pred))
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def r2_score(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
|
|
346
|
+
"""
|
|
347
|
+
Calculate R-squared (coefficient of determination)
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
y_true: True values
|
|
351
|
+
y_pred: Predicted values
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
R-squared score
|
|
355
|
+
"""
|
|
356
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
357
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
358
|
+
|
|
359
|
+
if len(y_true) != len(y_pred):
|
|
360
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
361
|
+
|
|
362
|
+
ss_res = np.sum((y_true - y_pred) ** 2)
|
|
363
|
+
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
|
|
364
|
+
|
|
365
|
+
return 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def mean_absolute_percentage_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
|
|
369
|
+
"""
|
|
370
|
+
Calculate Mean Absolute Percentage Error
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
y_true: True values
|
|
374
|
+
y_pred: Predicted values
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Mean absolute percentage error
|
|
378
|
+
"""
|
|
379
|
+
y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
|
|
380
|
+
y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
|
|
381
|
+
|
|
382
|
+
if len(y_true) != len(y_pred):
|
|
383
|
+
raise ValueError("y_true and y_pred must have the same length")
|
|
384
|
+
|
|
385
|
+
# Avoid division by zero
|
|
386
|
+
mask = y_true != 0
|
|
387
|
+
if not np.any(mask):
|
|
388
|
+
return 0.0
|
|
389
|
+
|
|
390
|
+
return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# Clustering Metrics
|
|
394
|
+
|
|
395
|
+
def adjusted_rand_score(labels_true: Union[np.ndarray, list], labels_pred: Union[np.ndarray, list]) -> float:
|
|
396
|
+
"""
|
|
397
|
+
Calculate Adjusted Rand Index for clustering evaluation
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
labels_true: True cluster labels
|
|
401
|
+
labels_pred: Predicted cluster labels
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Adjusted Rand Index (-1 to 1, higher is better)
|
|
405
|
+
"""
|
|
406
|
+
labels_true = np.array(labels_true) if not isinstance(labels_true, np.ndarray) else labels_true
|
|
407
|
+
labels_pred = np.array(labels_pred) if not isinstance(labels_pred, np.ndarray) else labels_pred
|
|
408
|
+
|
|
409
|
+
if len(labels_true) != len(labels_pred):
|
|
410
|
+
raise ValueError("labels_true and labels_pred must have the same length")
|
|
411
|
+
|
|
412
|
+
# Create contingency table
|
|
413
|
+
classes_true = np.unique(labels_true)
|
|
414
|
+
classes_pred = np.unique(labels_pred)
|
|
415
|
+
|
|
416
|
+
contingency = np.zeros((len(classes_true), len(classes_pred)), dtype=int)
|
|
417
|
+
|
|
418
|
+
for i, cls_true in enumerate(classes_true):
|
|
419
|
+
for j, cls_pred in enumerate(classes_pred):
|
|
420
|
+
contingency[i, j] = np.sum((labels_true == cls_true) & (labels_pred == cls_pred))
|
|
421
|
+
|
|
422
|
+
# Calculate ARI
|
|
423
|
+
n = len(labels_true)
|
|
424
|
+
|
|
425
|
+
sum_comb_c = sum([math.comb(n_ij, 2) for n_ij in contingency.flatten() if n_ij >= 2])
|
|
426
|
+
sum_comb_k = sum([math.comb(int(np.sum(contingency[i, :])), 2) for i in range(len(classes_true))])
|
|
427
|
+
sum_comb_c_prime = sum([math.comb(int(np.sum(contingency[:, j])), 2) for j in range(len(classes_pred))])
|
|
428
|
+
|
|
429
|
+
expected_index = sum_comb_k * sum_comb_c_prime / math.comb(n, 2) if n >= 2 else 0
|
|
430
|
+
max_index = (sum_comb_k + sum_comb_c_prime) / 2
|
|
431
|
+
|
|
432
|
+
if max_index == expected_index:
|
|
433
|
+
return 1.0
|
|
434
|
+
|
|
435
|
+
return (sum_comb_c - expected_index) / (max_index - expected_index)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def silhouette_score(X: Union[np.ndarray, list], labels: Union[np.ndarray, list], metric: str = 'euclidean') -> float:
|
|
439
|
+
"""
|
|
440
|
+
Calculate Silhouette Score for clustering evaluation
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
X: Data points
|
|
444
|
+
labels: Cluster labels
|
|
445
|
+
metric: Distance metric ('euclidean', 'manhattan')
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
Silhouette score (-1 to 1, higher is better)
|
|
449
|
+
"""
|
|
450
|
+
X = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
451
|
+
labels = np.array(labels) if not isinstance(labels, np.ndarray) else labels
|
|
452
|
+
|
|
453
|
+
if X.ndim == 1:
|
|
454
|
+
X = X.reshape(-1, 1)
|
|
455
|
+
|
|
456
|
+
if len(X) != len(labels):
|
|
457
|
+
raise ValueError("X and labels must have the same length")
|
|
458
|
+
|
|
459
|
+
unique_labels = np.unique(labels)
|
|
460
|
+
if len(unique_labels) <= 1:
|
|
461
|
+
return 0.0
|
|
462
|
+
|
|
463
|
+
def distance(x1, x2):
|
|
464
|
+
if metric == 'euclidean':
|
|
465
|
+
return np.linalg.norm(x1 - x2)
|
|
466
|
+
elif metric == 'manhattan':
|
|
467
|
+
return np.sum(np.abs(x1 - x2))
|
|
468
|
+
else:
|
|
469
|
+
raise ValueError(f"Unknown metric: {metric}")
|
|
470
|
+
|
|
471
|
+
silhouette_scores = []
|
|
472
|
+
|
|
473
|
+
for i, point in enumerate(X):
|
|
474
|
+
own_cluster = labels[i]
|
|
475
|
+
|
|
476
|
+
# Calculate a(i): average distance to points in same cluster
|
|
477
|
+
same_cluster_points = X[labels == own_cluster]
|
|
478
|
+
if len(same_cluster_points) > 1:
|
|
479
|
+
a_i = np.mean([distance(point, other_point) for other_point in same_cluster_points if not np.array_equal(point, other_point)])
|
|
480
|
+
else:
|
|
481
|
+
a_i = 0.0
|
|
482
|
+
|
|
483
|
+
# Calculate b(i): minimum average distance to points in other clusters
|
|
484
|
+
b_i = float('inf')
|
|
485
|
+
|
|
486
|
+
for other_cluster in unique_labels:
|
|
487
|
+
if other_cluster != own_cluster:
|
|
488
|
+
other_cluster_points = X[labels == other_cluster]
|
|
489
|
+
if len(other_cluster_points) > 0:
|
|
490
|
+
avg_dist = np.mean([distance(point, other_point) for other_point in other_cluster_points])
|
|
491
|
+
b_i = min(b_i, avg_dist)
|
|
492
|
+
|
|
493
|
+
# Calculate silhouette score for this point
|
|
494
|
+
if b_i == float('inf'):
|
|
495
|
+
s_i = 0.0
|
|
496
|
+
else:
|
|
497
|
+
s_i = (b_i - a_i) / max(a_i, b_i) if max(a_i, b_i) > 0 else 0.0
|
|
498
|
+
|
|
499
|
+
silhouette_scores.append(s_i)
|
|
500
|
+
|
|
501
|
+
return np.mean(silhouette_scores)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
# Distance and Similarity Metrics
|
|
505
|
+
|
|
506
|
+
def euclidean_distance(x1: Union[np.ndarray, list], x2: Union[np.ndarray, list]) -> float:
|
|
507
|
+
"""Calculate Euclidean distance between two points"""
|
|
508
|
+
x1 = np.array(x1) if not isinstance(x1, np.ndarray) else x1
|
|
509
|
+
x2 = np.array(x2) if not isinstance(x2, np.ndarray) else x2
|
|
510
|
+
|
|
511
|
+
return np.linalg.norm(x1 - x2)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def manhattan_distance(x1: Union[np.ndarray, list], x2: Union[np.ndarray, list]) -> float:
|
|
515
|
+
"""Calculate Manhattan distance between two points"""
|
|
516
|
+
x1 = np.array(x1) if not isinstance(x1, np.ndarray) else x1
|
|
517
|
+
x2 = np.array(x2) if not isinstance(x2, np.ndarray) else x2
|
|
518
|
+
|
|
519
|
+
return np.sum(np.abs(x1 - x2))
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def cosine_similarity(x1: Union[np.ndarray, list], x2: Union[np.ndarray, list]) -> float:
|
|
523
|
+
"""Calculate cosine similarity between two vectors"""
|
|
524
|
+
x1 = np.array(x1) if not isinstance(x1, np.ndarray) else x1
|
|
525
|
+
x2 = np.array(x2) if not isinstance(x2, np.ndarray) else x2
|
|
526
|
+
|
|
527
|
+
dot_product = np.dot(x1, x2)
|
|
528
|
+
norm_x1 = np.linalg.norm(x1)
|
|
529
|
+
norm_x2 = np.linalg.norm(x2)
|
|
530
|
+
|
|
531
|
+
if norm_x1 == 0 or norm_x2 == 0:
|
|
532
|
+
return 0.0
|
|
533
|
+
|
|
534
|
+
return dot_product / (norm_x1 * norm_x2)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def jaccard_similarity(set1: set, set2: set) -> float:
|
|
538
|
+
"""Calculate Jaccard similarity between two sets"""
|
|
539
|
+
intersection = len(set1.intersection(set2))
|
|
540
|
+
union = len(set1.union(set2))
|
|
541
|
+
|
|
542
|
+
return intersection / union if union > 0 else 0.0
|