createsonline 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. createsonline/__init__.py +46 -0
  2. createsonline/admin/__init__.py +7 -0
  3. createsonline/admin/content.py +526 -0
  4. createsonline/admin/crud.py +805 -0
  5. createsonline/admin/field_builder.py +559 -0
  6. createsonline/admin/integration.py +482 -0
  7. createsonline/admin/interface.py +2562 -0
  8. createsonline/admin/model_creator.py +513 -0
  9. createsonline/admin/model_manager.py +388 -0
  10. createsonline/admin/modern_dashboard.py +498 -0
  11. createsonline/admin/permissions.py +264 -0
  12. createsonline/admin/user_forms.py +594 -0
  13. createsonline/ai/__init__.py +202 -0
  14. createsonline/ai/fields.py +1226 -0
  15. createsonline/ai/orm.py +325 -0
  16. createsonline/ai/services.py +1244 -0
  17. createsonline/app.py +506 -0
  18. createsonline/auth/__init__.py +8 -0
  19. createsonline/auth/management.py +228 -0
  20. createsonline/auth/models.py +552 -0
  21. createsonline/cli/__init__.py +5 -0
  22. createsonline/cli/commands/__init__.py +122 -0
  23. createsonline/cli/commands/database.py +416 -0
  24. createsonline/cli/commands/info.py +173 -0
  25. createsonline/cli/commands/initdb.py +218 -0
  26. createsonline/cli/commands/project.py +545 -0
  27. createsonline/cli/commands/serve.py +173 -0
  28. createsonline/cli/commands/shell.py +93 -0
  29. createsonline/cli/commands/users.py +148 -0
  30. createsonline/cli/main.py +2041 -0
  31. createsonline/cli/manage.py +274 -0
  32. createsonline/config/__init__.py +9 -0
  33. createsonline/config/app.py +2577 -0
  34. createsonline/config/database.py +179 -0
  35. createsonline/config/docs.py +384 -0
  36. createsonline/config/errors.py +160 -0
  37. createsonline/config/orm.py +43 -0
  38. createsonline/config/request.py +93 -0
  39. createsonline/config/settings.py +176 -0
  40. createsonline/data/__init__.py +23 -0
  41. createsonline/data/dataframe.py +925 -0
  42. createsonline/data/io.py +453 -0
  43. createsonline/data/series.py +557 -0
  44. createsonline/database/__init__.py +60 -0
  45. createsonline/database/abstraction.py +440 -0
  46. createsonline/database/assistant.py +585 -0
  47. createsonline/database/fields.py +442 -0
  48. createsonline/database/migrations.py +132 -0
  49. createsonline/database/models.py +604 -0
  50. createsonline/database.py +438 -0
  51. createsonline/http/__init__.py +28 -0
  52. createsonline/http/client.py +535 -0
  53. createsonline/ml/__init__.py +55 -0
  54. createsonline/ml/classification.py +552 -0
  55. createsonline/ml/clustering.py +680 -0
  56. createsonline/ml/metrics.py +542 -0
  57. createsonline/ml/neural.py +560 -0
  58. createsonline/ml/preprocessing.py +784 -0
  59. createsonline/ml/regression.py +501 -0
  60. createsonline/performance/__init__.py +19 -0
  61. createsonline/performance/cache.py +444 -0
  62. createsonline/performance/compression.py +335 -0
  63. createsonline/performance/core.py +419 -0
  64. createsonline/project_init.py +789 -0
  65. createsonline/routing.py +528 -0
  66. createsonline/security/__init__.py +34 -0
  67. createsonline/security/core.py +811 -0
  68. createsonline/security/encryption.py +349 -0
  69. createsonline/server.py +295 -0
  70. createsonline/static/css/admin.css +263 -0
  71. createsonline/static/css/common.css +358 -0
  72. createsonline/static/css/dashboard.css +89 -0
  73. createsonline/static/favicon.ico +0 -0
  74. createsonline/static/icons/icon-128x128.png +0 -0
  75. createsonline/static/icons/icon-128x128.webp +0 -0
  76. createsonline/static/icons/icon-16x16.png +0 -0
  77. createsonline/static/icons/icon-16x16.webp +0 -0
  78. createsonline/static/icons/icon-180x180.png +0 -0
  79. createsonline/static/icons/icon-180x180.webp +0 -0
  80. createsonline/static/icons/icon-192x192.png +0 -0
  81. createsonline/static/icons/icon-192x192.webp +0 -0
  82. createsonline/static/icons/icon-256x256.png +0 -0
  83. createsonline/static/icons/icon-256x256.webp +0 -0
  84. createsonline/static/icons/icon-32x32.png +0 -0
  85. createsonline/static/icons/icon-32x32.webp +0 -0
  86. createsonline/static/icons/icon-384x384.png +0 -0
  87. createsonline/static/icons/icon-384x384.webp +0 -0
  88. createsonline/static/icons/icon-48x48.png +0 -0
  89. createsonline/static/icons/icon-48x48.webp +0 -0
  90. createsonline/static/icons/icon-512x512.png +0 -0
  91. createsonline/static/icons/icon-512x512.webp +0 -0
  92. createsonline/static/icons/icon-64x64.png +0 -0
  93. createsonline/static/icons/icon-64x64.webp +0 -0
  94. createsonline/static/image/android-chrome-192x192.png +0 -0
  95. createsonline/static/image/android-chrome-512x512.png +0 -0
  96. createsonline/static/image/apple-touch-icon.png +0 -0
  97. createsonline/static/image/favicon-16x16.png +0 -0
  98. createsonline/static/image/favicon-32x32.png +0 -0
  99. createsonline/static/image/favicon.ico +0 -0
  100. createsonline/static/image/favicon.svg +17 -0
  101. createsonline/static/image/icon-128x128.png +0 -0
  102. createsonline/static/image/icon-128x128.webp +0 -0
  103. createsonline/static/image/icon-16x16.png +0 -0
  104. createsonline/static/image/icon-16x16.webp +0 -0
  105. createsonline/static/image/icon-180x180.png +0 -0
  106. createsonline/static/image/icon-180x180.webp +0 -0
  107. createsonline/static/image/icon-192x192.png +0 -0
  108. createsonline/static/image/icon-192x192.webp +0 -0
  109. createsonline/static/image/icon-256x256.png +0 -0
  110. createsonline/static/image/icon-256x256.webp +0 -0
  111. createsonline/static/image/icon-32x32.png +0 -0
  112. createsonline/static/image/icon-32x32.webp +0 -0
  113. createsonline/static/image/icon-384x384.png +0 -0
  114. createsonline/static/image/icon-384x384.webp +0 -0
  115. createsonline/static/image/icon-48x48.png +0 -0
  116. createsonline/static/image/icon-48x48.webp +0 -0
  117. createsonline/static/image/icon-512x512.png +0 -0
  118. createsonline/static/image/icon-512x512.webp +0 -0
  119. createsonline/static/image/icon-64x64.png +0 -0
  120. createsonline/static/image/icon-64x64.webp +0 -0
  121. createsonline/static/image/logo-header-h100.png +0 -0
  122. createsonline/static/image/logo-header-h100.webp +0 -0
  123. createsonline/static/image/logo-header-h200@2x.png +0 -0
  124. createsonline/static/image/logo-header-h200@2x.webp +0 -0
  125. createsonline/static/image/logo.png +0 -0
  126. createsonline/static/js/admin.js +274 -0
  127. createsonline/static/site.webmanifest +35 -0
  128. createsonline/static/templates/admin/base.html +87 -0
  129. createsonline/static/templates/admin/dashboard.html +217 -0
  130. createsonline/static/templates/admin/model_form.html +270 -0
  131. createsonline/static/templates/admin/model_list.html +202 -0
  132. createsonline/static/test_script.js +15 -0
  133. createsonline/static/test_styles.css +59 -0
  134. createsonline/static_files.py +365 -0
  135. createsonline/templates/404.html +100 -0
  136. createsonline/templates/admin_login.html +169 -0
  137. createsonline/templates/base.html +102 -0
  138. createsonline/templates/index.html +151 -0
  139. createsonline/templates.py +205 -0
  140. createsonline/testing.py +322 -0
  141. createsonline/utils.py +448 -0
  142. createsonline/validation/__init__.py +49 -0
  143. createsonline/validation/fields.py +598 -0
  144. createsonline/validation/models.py +504 -0
  145. createsonline/validation/validators.py +561 -0
  146. createsonline/views.py +184 -0
  147. createsonline-0.1.26.dist-info/METADATA +46 -0
  148. createsonline-0.1.26.dist-info/RECORD +152 -0
  149. createsonline-0.1.26.dist-info/WHEEL +5 -0
  150. createsonline-0.1.26.dist-info/entry_points.txt +2 -0
  151. createsonline-0.1.26.dist-info/licenses/LICENSE +21 -0
  152. createsonline-0.1.26.dist-info/top_level.txt +1 -0
@@ -0,0 +1,542 @@
1
+ """
2
+ CREATESONLINE ML Metrics
3
+
4
+ Pure Python evaluation metrics.
5
+ """
6
+
7
+ import numpy as np
8
+ from typing import Union, Dict, Any, List, Optional
9
+ import math
10
+
11
+
12
+ def accuracy_score(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
13
+ """
14
+ Calculate accuracy score
15
+
16
+ Args:
17
+ y_true: True labels
18
+ y_pred: Predicted labels
19
+
20
+ Returns:
21
+ Accuracy score (0-1)
22
+ """
23
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
24
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
25
+
26
+ if len(y_true) != len(y_pred):
27
+ raise ValueError("y_true and y_pred must have the same length")
28
+
29
+ return np.mean(y_true == y_pred)
30
+
31
+
32
+ def precision_score(
33
+ y_true: Union[np.ndarray, list],
34
+ y_pred: Union[np.ndarray, list],
35
+ average: str = 'binary',
36
+ pos_label: Union[str, int] = 1
37
+ ) -> Union[float, np.ndarray]:
38
+ """
39
+ Calculate precision score
40
+
41
+ Args:
42
+ y_true: True labels
43
+ y_pred: Predicted labels
44
+ average: Averaging strategy ('binary', 'micro', 'macro', 'weighted', None)
45
+ pos_label: Positive class label for binary classification
46
+
47
+ Returns:
48
+ Precision score(s)
49
+ """
50
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
51
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
52
+
53
+ if len(y_true) != len(y_pred):
54
+ raise ValueError("y_true and y_pred must have the same length")
55
+
56
+ classes = np.unique(np.concatenate([y_true, y_pred]))
57
+
58
+ if average == 'binary':
59
+ if len(classes) > 2:
60
+ raise ValueError("Binary classification requires exactly 2 classes")
61
+
62
+ tp = np.sum((y_true == pos_label) & (y_pred == pos_label))
63
+ fp = np.sum((y_true != pos_label) & (y_pred == pos_label))
64
+
65
+ return tp / (tp + fp) if (tp + fp) > 0 else 0.0
66
+
67
+ # Multi-class metrics
68
+ precisions = []
69
+ for cls in classes:
70
+ tp = np.sum((y_true == cls) & (y_pred == cls))
71
+ fp = np.sum((y_true != cls) & (y_pred == cls))
72
+
73
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
74
+ precisions.append(precision)
75
+
76
+ precisions = np.array(precisions)
77
+
78
+ if average is None:
79
+ return precisions
80
+ elif average == 'macro':
81
+ return np.mean(precisions)
82
+ elif average == 'micro':
83
+ tp_total = sum(np.sum((y_true == cls) & (y_pred == cls)) for cls in classes)
84
+ fp_total = sum(np.sum((y_true != cls) & (y_pred == cls)) for cls in classes)
85
+ return tp_total / (tp_total + fp_total) if (tp_total + fp_total) > 0 else 0.0
86
+ elif average == 'weighted':
87
+ weights = [np.sum(y_true == cls) for cls in classes]
88
+ return np.average(precisions, weights=weights)
89
+ else:
90
+ raise ValueError(f"Unknown average: {average}")
91
+
92
+
93
+ def recall_score(
94
+ y_true: Union[np.ndarray, list],
95
+ y_pred: Union[np.ndarray, list],
96
+ average: str = 'binary',
97
+ pos_label: Union[str, int] = 1
98
+ ) -> Union[float, np.ndarray]:
99
+ """
100
+ Calculate recall score
101
+
102
+ Args:
103
+ y_true: True labels
104
+ y_pred: Predicted labels
105
+ average: Averaging strategy ('binary', 'micro', 'macro', 'weighted', None)
106
+ pos_label: Positive class label for binary classification
107
+
108
+ Returns:
109
+ Recall score(s)
110
+ """
111
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
112
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
113
+
114
+ if len(y_true) != len(y_pred):
115
+ raise ValueError("y_true and y_pred must have the same length")
116
+
117
+ classes = np.unique(np.concatenate([y_true, y_pred]))
118
+
119
+ if average == 'binary':
120
+ if len(classes) > 2:
121
+ raise ValueError("Binary classification requires exactly 2 classes")
122
+
123
+ tp = np.sum((y_true == pos_label) & (y_pred == pos_label))
124
+ fn = np.sum((y_true == pos_label) & (y_pred != pos_label))
125
+
126
+ return tp / (tp + fn) if (tp + fn) > 0 else 0.0
127
+
128
+ # Multi-class metrics
129
+ recalls = []
130
+ for cls in classes:
131
+ tp = np.sum((y_true == cls) & (y_pred == cls))
132
+ fn = np.sum((y_true == cls) & (y_pred != cls))
133
+
134
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
135
+ recalls.append(recall)
136
+
137
+ recalls = np.array(recalls)
138
+
139
+ if average is None:
140
+ return recalls
141
+ elif average == 'macro':
142
+ return np.mean(recalls)
143
+ elif average == 'micro':
144
+ tp_total = sum(np.sum((y_true == cls) & (y_pred == cls)) for cls in classes)
145
+ fn_total = sum(np.sum((y_true == cls) & (y_pred != cls)) for cls in classes)
146
+ return tp_total / (tp_total + fn_total) if (tp_total + fn_total) > 0 else 0.0
147
+ elif average == 'weighted':
148
+ weights = [np.sum(y_true == cls) for cls in classes]
149
+ return np.average(recalls, weights=weights)
150
+ else:
151
+ raise ValueError(f"Unknown average: {average}")
152
+
153
+
154
+ def f1_score(
155
+ y_true: Union[np.ndarray, list],
156
+ y_pred: Union[np.ndarray, list],
157
+ average: str = 'binary',
158
+ pos_label: Union[str, int] = 1
159
+ ) -> Union[float, np.ndarray]:
160
+ """
161
+ Calculate F1 score (harmonic mean of precision and recall)
162
+
163
+ Args:
164
+ y_true: True labels
165
+ y_pred: Predicted labels
166
+ average: Averaging strategy ('binary', 'micro', 'macro', 'weighted', None)
167
+ pos_label: Positive class label for binary classification
168
+
169
+ Returns:
170
+ F1 score(s)
171
+ """
172
+ precision = precision_score(y_true, y_pred, average=average, pos_label=pos_label)
173
+ recall = recall_score(y_true, y_pred, average=average, pos_label=pos_label)
174
+
175
+ if isinstance(precision, np.ndarray):
176
+ # Handle array case
177
+ f1_scores = np.zeros_like(precision)
178
+ mask = (precision + recall) > 0
179
+ f1_scores[mask] = 2 * precision[mask] * recall[mask] / (precision[mask] + recall[mask])
180
+ return f1_scores
181
+ else:
182
+ # Handle scalar case
183
+ return 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
184
+
185
+
186
+ def confusion_matrix(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> np.ndarray:
187
+ """
188
+ Calculate confusion matrix
189
+
190
+ Args:
191
+ y_true: True labels
192
+ y_pred: Predicted labels
193
+
194
+ Returns:
195
+ Confusion matrix (n_classes, n_classes)
196
+ """
197
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
198
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
199
+
200
+ if len(y_true) != len(y_pred):
201
+ raise ValueError("y_true and y_pred must have the same length")
202
+
203
+ classes = np.unique(np.concatenate([y_true, y_pred]))
204
+ n_classes = len(classes)
205
+
206
+ # Create class to index mapping
207
+ class_to_idx = {cls: i for i, cls in enumerate(classes)}
208
+
209
+ # Initialize confusion matrix
210
+ cm = np.zeros((n_classes, n_classes), dtype=int)
211
+
212
+ # Fill confusion matrix
213
+ for true_label, pred_label in zip(y_true, y_pred):
214
+ true_idx = class_to_idx[true_label]
215
+ pred_idx = class_to_idx[pred_label]
216
+ cm[true_idx, pred_idx] += 1
217
+
218
+ return cm
219
+
220
+
221
+ def classification_report(
222
+ y_true: Union[np.ndarray, list],
223
+ y_pred: Union[np.ndarray, list],
224
+ target_names: Optional[List[str]] = None
225
+ ) -> Dict[str, Any]:
226
+ """
227
+ Generate classification report with precision, recall, F1-score for each class
228
+
229
+ Args:
230
+ y_true: True labels
231
+ y_pred: Predicted labels
232
+ target_names: Optional names for classes
233
+
234
+ Returns:
235
+ Classification report dictionary
236
+ """
237
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
238
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
239
+
240
+ classes = np.unique(np.concatenate([y_true, y_pred]))
241
+
242
+ if target_names is None:
243
+ target_names = [str(cls) for cls in classes]
244
+ elif len(target_names) != len(classes):
245
+ raise ValueError("target_names length must match number of classes")
246
+
247
+ # Calculate metrics for each class
248
+ precisions = precision_score(y_true, y_pred, average=None)
249
+ recalls = recall_score(y_true, y_pred, average=None)
250
+ f1_scores = f1_score(y_true, y_pred, average=None)
251
+
252
+ # Calculate support (number of true instances for each class)
253
+ supports = [np.sum(y_true == cls) for cls in classes]
254
+
255
+ # Build report
256
+ report = {}
257
+
258
+ for i, (cls, name) in enumerate(zip(classes, target_names)):
259
+ report[name] = {
260
+ 'precision': float(precisions[i]),
261
+ 'recall': float(recalls[i]),
262
+ 'f1-score': float(f1_scores[i]),
263
+ 'support': int(supports[i])
264
+ }
265
+
266
+ # Calculate macro averages
267
+ report['macro avg'] = {
268
+ 'precision': float(np.mean(precisions)),
269
+ 'recall': float(np.mean(recalls)),
270
+ 'f1-score': float(np.mean(f1_scores)),
271
+ 'support': int(np.sum(supports))
272
+ }
273
+
274
+ # Calculate weighted averages
275
+ weights = np.array(supports) / np.sum(supports)
276
+ report['weighted avg'] = {
277
+ 'precision': float(np.average(precisions, weights=weights)),
278
+ 'recall': float(np.average(recalls, weights=weights)),
279
+ 'f1-score': float(np.average(f1_scores, weights=weights)),
280
+ 'support': int(np.sum(supports))
281
+ }
282
+
283
+ # Overall accuracy
284
+ report['accuracy'] = float(accuracy_score(y_true, y_pred))
285
+
286
+ return report
287
+
288
+
289
+ # Regression Metrics
290
+
291
+ def mean_squared_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
292
+ """
293
+ Calculate Mean Squared Error
294
+
295
+ Args:
296
+ y_true: True values
297
+ y_pred: Predicted values
298
+
299
+ Returns:
300
+ Mean squared error
301
+ """
302
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
303
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
304
+
305
+ if len(y_true) != len(y_pred):
306
+ raise ValueError("y_true and y_pred must have the same length")
307
+
308
+ return np.mean((y_true - y_pred) ** 2)
309
+
310
+
311
+ def mean_absolute_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
312
+ """
313
+ Calculate Mean Absolute Error
314
+
315
+ Args:
316
+ y_true: True values
317
+ y_pred: Predicted values
318
+
319
+ Returns:
320
+ Mean absolute error
321
+ """
322
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
323
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
324
+
325
+ if len(y_true) != len(y_pred):
326
+ raise ValueError("y_true and y_pred must have the same length")
327
+
328
+ return np.mean(np.abs(y_true - y_pred))
329
+
330
+
331
+ def root_mean_squared_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
332
+ """
333
+ Calculate Root Mean Squared Error
334
+
335
+ Args:
336
+ y_true: True values
337
+ y_pred: Predicted values
338
+
339
+ Returns:
340
+ Root mean squared error
341
+ """
342
+ return np.sqrt(mean_squared_error(y_true, y_pred))
343
+
344
+
345
+ def r2_score(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
346
+ """
347
+ Calculate R-squared (coefficient of determination)
348
+
349
+ Args:
350
+ y_true: True values
351
+ y_pred: Predicted values
352
+
353
+ Returns:
354
+ R-squared score
355
+ """
356
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
357
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
358
+
359
+ if len(y_true) != len(y_pred):
360
+ raise ValueError("y_true and y_pred must have the same length")
361
+
362
+ ss_res = np.sum((y_true - y_pred) ** 2)
363
+ ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
364
+
365
+ return 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
366
+
367
+
368
+ def mean_absolute_percentage_error(y_true: Union[np.ndarray, list], y_pred: Union[np.ndarray, list]) -> float:
369
+ """
370
+ Calculate Mean Absolute Percentage Error
371
+
372
+ Args:
373
+ y_true: True values
374
+ y_pred: Predicted values
375
+
376
+ Returns:
377
+ Mean absolute percentage error
378
+ """
379
+ y_true = np.array(y_true) if not isinstance(y_true, np.ndarray) else y_true
380
+ y_pred = np.array(y_pred) if not isinstance(y_pred, np.ndarray) else y_pred
381
+
382
+ if len(y_true) != len(y_pred):
383
+ raise ValueError("y_true and y_pred must have the same length")
384
+
385
+ # Avoid division by zero
386
+ mask = y_true != 0
387
+ if not np.any(mask):
388
+ return 0.0
389
+
390
+ return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
391
+
392
+
393
+ # Clustering Metrics
394
+
395
+ def adjusted_rand_score(labels_true: Union[np.ndarray, list], labels_pred: Union[np.ndarray, list]) -> float:
396
+ """
397
+ Calculate Adjusted Rand Index for clustering evaluation
398
+
399
+ Args:
400
+ labels_true: True cluster labels
401
+ labels_pred: Predicted cluster labels
402
+
403
+ Returns:
404
+ Adjusted Rand Index (-1 to 1, higher is better)
405
+ """
406
+ labels_true = np.array(labels_true) if not isinstance(labels_true, np.ndarray) else labels_true
407
+ labels_pred = np.array(labels_pred) if not isinstance(labels_pred, np.ndarray) else labels_pred
408
+
409
+ if len(labels_true) != len(labels_pred):
410
+ raise ValueError("labels_true and labels_pred must have the same length")
411
+
412
+ # Create contingency table
413
+ classes_true = np.unique(labels_true)
414
+ classes_pred = np.unique(labels_pred)
415
+
416
+ contingency = np.zeros((len(classes_true), len(classes_pred)), dtype=int)
417
+
418
+ for i, cls_true in enumerate(classes_true):
419
+ for j, cls_pred in enumerate(classes_pred):
420
+ contingency[i, j] = np.sum((labels_true == cls_true) & (labels_pred == cls_pred))
421
+
422
+ # Calculate ARI
423
+ n = len(labels_true)
424
+
425
+ sum_comb_c = sum([math.comb(n_ij, 2) for n_ij in contingency.flatten() if n_ij >= 2])
426
+ sum_comb_k = sum([math.comb(int(np.sum(contingency[i, :])), 2) for i in range(len(classes_true))])
427
+ sum_comb_c_prime = sum([math.comb(int(np.sum(contingency[:, j])), 2) for j in range(len(classes_pred))])
428
+
429
+ expected_index = sum_comb_k * sum_comb_c_prime / math.comb(n, 2) if n >= 2 else 0
430
+ max_index = (sum_comb_k + sum_comb_c_prime) / 2
431
+
432
+ if max_index == expected_index:
433
+ return 1.0
434
+
435
+ return (sum_comb_c - expected_index) / (max_index - expected_index)
436
+
437
+
438
+ def silhouette_score(X: Union[np.ndarray, list], labels: Union[np.ndarray, list], metric: str = 'euclidean') -> float:
439
+ """
440
+ Calculate Silhouette Score for clustering evaluation
441
+
442
+ Args:
443
+ X: Data points
444
+ labels: Cluster labels
445
+ metric: Distance metric ('euclidean', 'manhattan')
446
+
447
+ Returns:
448
+ Silhouette score (-1 to 1, higher is better)
449
+ """
450
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
451
+ labels = np.array(labels) if not isinstance(labels, np.ndarray) else labels
452
+
453
+ if X.ndim == 1:
454
+ X = X.reshape(-1, 1)
455
+
456
+ if len(X) != len(labels):
457
+ raise ValueError("X and labels must have the same length")
458
+
459
+ unique_labels = np.unique(labels)
460
+ if len(unique_labels) <= 1:
461
+ return 0.0
462
+
463
+ def distance(x1, x2):
464
+ if metric == 'euclidean':
465
+ return np.linalg.norm(x1 - x2)
466
+ elif metric == 'manhattan':
467
+ return np.sum(np.abs(x1 - x2))
468
+ else:
469
+ raise ValueError(f"Unknown metric: {metric}")
470
+
471
+ silhouette_scores = []
472
+
473
+ for i, point in enumerate(X):
474
+ own_cluster = labels[i]
475
+
476
+ # Calculate a(i): average distance to points in same cluster
477
+ same_cluster_points = X[labels == own_cluster]
478
+ if len(same_cluster_points) > 1:
479
+ a_i = np.mean([distance(point, other_point) for other_point in same_cluster_points if not np.array_equal(point, other_point)])
480
+ else:
481
+ a_i = 0.0
482
+
483
+ # Calculate b(i): minimum average distance to points in other clusters
484
+ b_i = float('inf')
485
+
486
+ for other_cluster in unique_labels:
487
+ if other_cluster != own_cluster:
488
+ other_cluster_points = X[labels == other_cluster]
489
+ if len(other_cluster_points) > 0:
490
+ avg_dist = np.mean([distance(point, other_point) for other_point in other_cluster_points])
491
+ b_i = min(b_i, avg_dist)
492
+
493
+ # Calculate silhouette score for this point
494
+ if b_i == float('inf'):
495
+ s_i = 0.0
496
+ else:
497
+ s_i = (b_i - a_i) / max(a_i, b_i) if max(a_i, b_i) > 0 else 0.0
498
+
499
+ silhouette_scores.append(s_i)
500
+
501
+ return np.mean(silhouette_scores)
502
+
503
+
504
+ # Distance and Similarity Metrics
505
+
506
+ def euclidean_distance(x1: Union[np.ndarray, list], x2: Union[np.ndarray, list]) -> float:
507
+ """Calculate Euclidean distance between two points"""
508
+ x1 = np.array(x1) if not isinstance(x1, np.ndarray) else x1
509
+ x2 = np.array(x2) if not isinstance(x2, np.ndarray) else x2
510
+
511
+ return np.linalg.norm(x1 - x2)
512
+
513
+
514
+ def manhattan_distance(x1: Union[np.ndarray, list], x2: Union[np.ndarray, list]) -> float:
515
+ """Calculate Manhattan distance between two points"""
516
+ x1 = np.array(x1) if not isinstance(x1, np.ndarray) else x1
517
+ x2 = np.array(x2) if not isinstance(x2, np.ndarray) else x2
518
+
519
+ return np.sum(np.abs(x1 - x2))
520
+
521
+
522
+ def cosine_similarity(x1: Union[np.ndarray, list], x2: Union[np.ndarray, list]) -> float:
523
+ """Calculate cosine similarity between two vectors"""
524
+ x1 = np.array(x1) if not isinstance(x1, np.ndarray) else x1
525
+ x2 = np.array(x2) if not isinstance(x2, np.ndarray) else x2
526
+
527
+ dot_product = np.dot(x1, x2)
528
+ norm_x1 = np.linalg.norm(x1)
529
+ norm_x2 = np.linalg.norm(x2)
530
+
531
+ if norm_x1 == 0 or norm_x2 == 0:
532
+ return 0.0
533
+
534
+ return dot_product / (norm_x1 * norm_x2)
535
+
536
+
537
+ def jaccard_similarity(set1: set, set2: set) -> float:
538
+ """Calculate Jaccard similarity between two sets"""
539
+ intersection = len(set1.intersection(set2))
540
+ union = len(set1.union(set2))
541
+
542
+ return intersection / union if union > 0 else 0.0