ilovetools 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,601 @@
1
+ """
2
+ Model evaluation metrics for ML workflows
3
+ """
4
+
5
+ from typing import List, Tuple, Dict, Union, Optional
6
+ import numpy as np
7
+
8
+ __all__ = [
9
+ 'accuracy_score',
10
+ 'precision_score',
11
+ 'recall_score',
12
+ 'f1_score',
13
+ 'confusion_matrix',
14
+ 'classification_report',
15
+ 'mean_squared_error',
16
+ 'mean_absolute_error',
17
+ 'root_mean_squared_error',
18
+ 'r2_score',
19
+ 'roc_auc_score',
20
+ # Aliases
21
+ 'mse',
22
+ 'mae',
23
+ 'rmse',
24
+ ]
25
+
26
+
27
+ def accuracy_score(y_true: List, y_pred: List) -> float:
28
+ """
29
+ Calculate accuracy score for classification.
30
+
31
+ Accuracy = (Correct Predictions) / (Total Predictions)
32
+
33
+ Args:
34
+ y_true: True labels
35
+ y_pred: Predicted labels
36
+
37
+ Returns:
38
+ float: Accuracy score (0.0 to 1.0)
39
+
40
+ Examples:
41
+ >>> from ilovetools.ml import accuracy_score
42
+
43
+ # Perfect predictions
44
+ >>> y_true = [1, 0, 1, 1, 0]
45
+ >>> y_pred = [1, 0, 1, 1, 0]
46
+ >>> accuracy_score(y_true, y_pred)
47
+ 1.0
48
+
49
+ # 80% accuracy
50
+ >>> y_true = [1, 0, 1, 1, 0]
51
+ >>> y_pred = [1, 0, 1, 0, 0]
52
+ >>> accuracy_score(y_true, y_pred)
53
+ 0.8
54
+
55
+ # Real-world: Email spam detection
56
+ >>> actual = [1, 1, 0, 0, 1, 0, 1, 0]
57
+ >>> predicted = [1, 0, 0, 0, 1, 0, 1, 1]
58
+ >>> acc = accuracy_score(actual, predicted)
59
+ >>> print(f"Model accuracy: {acc:.2%}")
60
+ Model accuracy: 75.00%
61
+
62
+ Notes:
63
+ - Use for balanced datasets
64
+ - Don't use for imbalanced datasets
65
+ - Range: 0.0 (worst) to 1.0 (best)
66
+ - Simple and intuitive metric
67
+ """
68
+ if len(y_true) != len(y_pred):
69
+ raise ValueError("y_true and y_pred must have same length")
70
+
71
+ correct = sum(1 for true, pred in zip(y_true, y_pred) if true == pred)
72
+ return correct / len(y_true)
73
+
74
+
75
+ def precision_score(y_true: List, y_pred: List, positive_label: int = 1) -> float:
76
+ """
77
+ Calculate precision score for binary classification.
78
+
79
+ Precision = TP / (TP + FP)
80
+ "Of all positive predictions, how many were correct?"
81
+
82
+ Args:
83
+ y_true: True labels
84
+ y_pred: Predicted labels
85
+ positive_label: Label considered as positive class. Default: 1
86
+
87
+ Returns:
88
+ float: Precision score (0.0 to 1.0)
89
+
90
+ Examples:
91
+ >>> from ilovetools.ml import precision_score
92
+
93
+ # High precision (few false positives)
94
+ >>> y_true = [1, 0, 1, 1, 0, 0, 1, 0]
95
+ >>> y_pred = [1, 0, 1, 1, 0, 0, 0, 0]
96
+ >>> precision_score(y_true, y_pred)
97
+ 1.0
98
+
99
+ # Lower precision (some false positives)
100
+ >>> y_true = [1, 0, 1, 1, 0]
101
+ >>> y_pred = [1, 1, 1, 1, 0]
102
+ >>> precision_score(y_true, y_pred)
103
+ 0.75
104
+
105
+ # Spam detection (don't mark important emails as spam)
106
+ >>> actual_spam = [1, 1, 0, 0, 1, 0, 1, 0]
107
+ >>> predicted_spam = [1, 1, 1, 0, 1, 0, 1, 0]
108
+ >>> prec = precision_score(actual_spam, predicted_spam)
109
+ >>> print(f"Precision: {prec:.2%}")
110
+ Precision: 80.00%
111
+
112
+ Notes:
113
+ - Use when false positives are costly
114
+ - High precision = Few false alarms
115
+ - Example: Spam detection, fraud detection
116
+ - Returns 0.0 if no positive predictions
117
+ """
118
+ if len(y_true) != len(y_pred):
119
+ raise ValueError("y_true and y_pred must have same length")
120
+
121
+ tp = sum(1 for true, pred in zip(y_true, y_pred)
122
+ if true == positive_label and pred == positive_label)
123
+ fp = sum(1 for true, pred in zip(y_true, y_pred)
124
+ if true != positive_label and pred == positive_label)
125
+
126
+ if tp + fp == 0:
127
+ return 0.0
128
+
129
+ return tp / (tp + fp)
130
+
131
+
132
+ def recall_score(y_true: List, y_pred: List, positive_label: int = 1) -> float:
133
+ """
134
+ Calculate recall score (sensitivity) for binary classification.
135
+
136
+ Recall = TP / (TP + FN)
137
+ "Of all actual positives, how many did we catch?"
138
+
139
+ Args:
140
+ y_true: True labels
141
+ y_pred: Predicted labels
142
+ positive_label: Label considered as positive class. Default: 1
143
+
144
+ Returns:
145
+ float: Recall score (0.0 to 1.0)
146
+
147
+ Examples:
148
+ >>> from ilovetools.ml import recall_score
149
+
150
+ # High recall (caught most positives)
151
+ >>> y_true = [1, 0, 1, 1, 0, 0, 1, 0]
152
+ >>> y_pred = [1, 1, 1, 1, 0, 0, 1, 0]
153
+ >>> recall_score(y_true, y_pred)
154
+ 1.0
155
+
156
+ # Lower recall (missed some positives)
157
+ >>> y_true = [1, 0, 1, 1, 0]
158
+ >>> y_pred = [1, 0, 0, 1, 0]
159
+ >>> recall_score(y_true, y_pred)
160
+ 0.6666666666666666
161
+
162
+ # Cancer detection (don't miss any cases)
163
+ >>> actual_cancer = [1, 1, 0, 0, 1, 0, 1, 0]
164
+ >>> predicted_cancer = [1, 1, 0, 1, 1, 0, 0, 0]
165
+ >>> rec = recall_score(actual_cancer, predicted_cancer)
166
+ >>> print(f"Recall: {rec:.2%}")
167
+ Recall: 75.00%
168
+
169
+ Notes:
170
+ - Use when false negatives are costly
171
+ - High recall = Few missed cases
172
+ - Example: Disease detection, fraud detection
173
+ - Returns 0.0 if no actual positives
174
+ """
175
+ if len(y_true) != len(y_pred):
176
+ raise ValueError("y_true and y_pred must have same length")
177
+
178
+ tp = sum(1 for true, pred in zip(y_true, y_pred)
179
+ if true == positive_label and pred == positive_label)
180
+ fn = sum(1 for true, pred in zip(y_true, y_pred)
181
+ if true == positive_label and pred != positive_label)
182
+
183
+ if tp + fn == 0:
184
+ return 0.0
185
+
186
+ return tp / (tp + fn)
187
+
188
+
189
+ def f1_score(y_true: List, y_pred: List, positive_label: int = 1) -> float:
190
+ """
191
+ Calculate F1 score (harmonic mean of precision and recall).
192
+
193
+ F1 = 2 * (Precision * Recall) / (Precision + Recall)
194
+
195
+ Args:
196
+ y_true: True labels
197
+ y_pred: Predicted labels
198
+ positive_label: Label considered as positive class. Default: 1
199
+
200
+ Returns:
201
+ float: F1 score (0.0 to 1.0)
202
+
203
+ Examples:
204
+ >>> from ilovetools.ml import f1_score
205
+
206
+ # Balanced precision and recall
207
+ >>> y_true = [1, 0, 1, 1, 0, 0, 1, 0]
208
+ >>> y_pred = [1, 0, 1, 1, 0, 0, 1, 1]
209
+ >>> f1_score(y_true, y_pred)
210
+ 0.8888888888888888
211
+
212
+ # Imbalanced dataset
213
+ >>> y_true = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
214
+ >>> y_pred = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
215
+ >>> f1 = f1_score(y_true, y_pred)
216
+ >>> print(f"F1 Score: {f1:.2%}")
217
+ F1 Score: 100.00%
218
+
219
+ Notes:
220
+ - Best metric for imbalanced datasets
221
+ - Balances precision and recall
222
+ - Range: 0.0 (worst) to 1.0 (best)
223
+ - Use when both false positives and negatives matter
224
+ """
225
+ precision = precision_score(y_true, y_pred, positive_label)
226
+ recall = recall_score(y_true, y_pred, positive_label)
227
+
228
+ if precision + recall == 0:
229
+ return 0.0
230
+
231
+ return 2 * (precision * recall) / (precision + recall)
232
+
233
+
234
+ def confusion_matrix(y_true: List, y_pred: List) -> List[List[int]]:
235
+ """
236
+ Calculate confusion matrix for binary classification.
237
+
238
+ Returns 2x2 matrix:
239
+ [[TN, FP],
240
+ [FN, TP]]
241
+
242
+ Args:
243
+ y_true: True labels
244
+ y_pred: Predicted labels
245
+
246
+ Returns:
247
+ list: 2x2 confusion matrix
248
+
249
+ Examples:
250
+ >>> from ilovetools.ml import confusion_matrix
251
+
252
+ # Perfect predictions
253
+ >>> y_true = [1, 0, 1, 1, 0]
254
+ >>> y_pred = [1, 0, 1, 1, 0]
255
+ >>> cm = confusion_matrix(y_true, y_pred)
256
+ >>> print(cm)
257
+ [[2, 0], [0, 3]]
258
+
259
+ # With errors
260
+ >>> y_true = [1, 0, 1, 1, 0, 0, 1, 0]
261
+ >>> y_pred = [1, 0, 1, 0, 0, 1, 1, 0]
262
+ >>> cm = confusion_matrix(y_true, y_pred)
263
+ >>> print(cm)
264
+ [[3, 1], [1, 3]]
265
+
266
+ # Interpret results
267
+ >>> tn, fp, fn, tp = cm[0][0], cm[0][1], cm[1][0], cm[1][1]
268
+ >>> print(f"True Negatives: {tn}")
269
+ >>> print(f"False Positives: {fp}")
270
+ >>> print(f"False Negatives: {fn}")
271
+ >>> print(f"True Positives: {tp}")
272
+
273
+ Notes:
274
+ - Foundation of classification metrics
275
+ - Shows all types of errors
276
+ - Format: [[TN, FP], [FN, TP]]
277
+ - Use to understand model behavior
278
+ """
279
+ if len(y_true) != len(y_pred):
280
+ raise ValueError("y_true and y_pred must have same length")
281
+
282
+ tn = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and pred == 0)
283
+ fp = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and pred == 1)
284
+ fn = sum(1 for true, pred in zip(y_true, y_pred) if true == 1 and pred == 0)
285
+ tp = sum(1 for true, pred in zip(y_true, y_pred) if true == 1 and pred == 1)
286
+
287
+ return [[tn, fp], [fn, tp]]
288
+
289
+
290
+ def classification_report(y_true: List, y_pred: List) -> Dict[str, float]:
291
+ """
292
+ Generate comprehensive classification report.
293
+
294
+ Returns accuracy, precision, recall, and F1 score.
295
+
296
+ Args:
297
+ y_true: True labels
298
+ y_pred: Predicted labels
299
+
300
+ Returns:
301
+ dict: Dictionary with all metrics
302
+
303
+ Examples:
304
+ >>> from ilovetools.ml import classification_report
305
+
306
+ >>> y_true = [1, 0, 1, 1, 0, 0, 1, 0]
307
+ >>> y_pred = [1, 0, 1, 0, 0, 1, 1, 0]
308
+ >>> report = classification_report(y_true, y_pred)
309
+ >>> print(report)
310
+ {'accuracy': 0.75, 'precision': 0.75, 'recall': 0.75, 'f1_score': 0.75}
311
+
312
+ # Pretty print
313
+ >>> for metric, value in report.items():
314
+ ... print(f"{metric}: {value:.2%}")
315
+ accuracy: 75.00%
316
+ precision: 75.00%
317
+ recall: 75.00%
318
+ f1_score: 75.00%
319
+
320
+ Notes:
321
+ - Comprehensive overview of model performance
322
+ - All metrics in one call
323
+ - Easy to compare models
324
+ - Returns dictionary for flexibility
325
+ """
326
+ return {
327
+ 'accuracy': accuracy_score(y_true, y_pred),
328
+ 'precision': precision_score(y_true, y_pred),
329
+ 'recall': recall_score(y_true, y_pred),
330
+ 'f1_score': f1_score(y_true, y_pred)
331
+ }
332
+
333
+
334
+ def mean_squared_error(y_true: List[float], y_pred: List[float]) -> float:
335
+ """
336
+ Calculate Mean Squared Error for regression.
337
+
338
+ Alias: mse()
339
+
340
+ MSE = Average of (actual - predicted)^2
341
+
342
+ Args:
343
+ y_true: True values
344
+ y_pred: Predicted values
345
+
346
+ Returns:
347
+ float: MSE value
348
+
349
+ Examples:
350
+ >>> from ilovetools.ml import mse # Short alias
351
+
352
+ # Perfect predictions
353
+ >>> y_true = [1.0, 2.0, 3.0, 4.0]
354
+ >>> y_pred = [1.0, 2.0, 3.0, 4.0]
355
+ >>> mse(y_true, y_pred)
356
+ 0.0
357
+
358
+ # With errors
359
+ >>> y_true = [100, 200, 300, 400]
360
+ >>> y_pred = [110, 190, 310, 390]
361
+ >>> error = mse(y_true, y_pred)
362
+ >>> print(f"MSE: {error:.2f}")
363
+ MSE: 100.00
364
+
365
+ >>> from ilovetools.ml import mean_squared_error # Full name
366
+ >>> error = mean_squared_error(y_true, y_pred)
367
+
368
+ Notes:
369
+ - Penalizes large errors heavily
370
+ - Not in original units (squared)
371
+ - Sensitive to outliers
372
+ - Lower is better
373
+ """
374
+ if len(y_true) != len(y_pred):
375
+ raise ValueError("y_true and y_pred must have same length")
376
+
377
+ squared_errors = [(true - pred) ** 2 for true, pred in zip(y_true, y_pred)]
378
+ return sum(squared_errors) / len(squared_errors)
379
+
380
+
381
+ # Create alias
382
+ mse = mean_squared_error
383
+
384
+
385
+ def mean_absolute_error(y_true: List[float], y_pred: List[float]) -> float:
386
+ """
387
+ Calculate Mean Absolute Error for regression.
388
+
389
+ Alias: mae()
390
+
391
+ MAE = Average of |actual - predicted|
392
+
393
+ Args:
394
+ y_true: True values
395
+ y_pred: Predicted values
396
+
397
+ Returns:
398
+ float: MAE value
399
+
400
+ Examples:
401
+ >>> from ilovetools.ml import mae # Short alias
402
+
403
+ # Perfect predictions
404
+ >>> y_true = [1.0, 2.0, 3.0, 4.0]
405
+ >>> y_pred = [1.0, 2.0, 3.0, 4.0]
406
+ >>> mae(y_true, y_pred)
407
+ 0.0
408
+
409
+ # With errors
410
+ >>> y_true = [100, 200, 300, 400]
411
+ >>> y_pred = [110, 190, 310, 390]
412
+ >>> error = mae(y_true, y_pred)
413
+ >>> print(f"MAE: ${error:.2f}")
414
+ MAE: $10.00
415
+
416
+ >>> from ilovetools.ml import mean_absolute_error # Full name
417
+ >>> error = mean_absolute_error(y_true, y_pred)
418
+
419
+ Notes:
420
+ - Easy to interpret
421
+ - Same units as target variable
422
+ - Less sensitive to outliers than MSE
423
+ - Lower is better
424
+ """
425
+ if len(y_true) != len(y_pred):
426
+ raise ValueError("y_true and y_pred must have same length")
427
+
428
+ absolute_errors = [abs(true - pred) for true, pred in zip(y_true, y_pred)]
429
+ return sum(absolute_errors) / len(absolute_errors)
430
+
431
+
432
+ # Create alias
433
+ mae = mean_absolute_error
434
+
435
+
436
+ def root_mean_squared_error(y_true: List[float], y_pred: List[float]) -> float:
437
+ """
438
+ Calculate Root Mean Squared Error for regression.
439
+
440
+ Alias: rmse()
441
+
442
+ RMSE = sqrt(MSE)
443
+
444
+ Args:
445
+ y_true: True values
446
+ y_pred: Predicted values
447
+
448
+ Returns:
449
+ float: RMSE value
450
+
451
+ Examples:
452
+ >>> from ilovetools.ml import rmse # Short alias
453
+
454
+ # Perfect predictions
455
+ >>> y_true = [1.0, 2.0, 3.0, 4.0]
456
+ >>> y_pred = [1.0, 2.0, 3.0, 4.0]
457
+ >>> rmse(y_true, y_pred)
458
+ 0.0
459
+
460
+ # With errors
461
+ >>> y_true = [100, 200, 300, 400]
462
+ >>> y_pred = [110, 190, 310, 390]
463
+ >>> error = rmse(y_true, y_pred)
464
+ >>> print(f"RMSE: {error:.2f}")
465
+ RMSE: 10.00
466
+
467
+ >>> from ilovetools.ml import root_mean_squared_error # Full name
468
+ >>> error = root_mean_squared_error(y_true, y_pred)
469
+
470
+ Notes:
471
+ - Most common regression metric
472
+ - Same units as target variable
473
+ - Penalizes large errors
474
+ - Lower is better
475
+ """
476
+ mse_value = mean_squared_error(y_true, y_pred)
477
+ return mse_value ** 0.5
478
+
479
+
480
+ # Create alias
481
+ rmse = root_mean_squared_error
482
+
483
+
484
+ def r2_score(y_true: List[float], y_pred: List[float]) -> float:
485
+ """
486
+ Calculate R-squared (coefficient of determination) for regression.
487
+
488
+ R² = 1 - (SS_res / SS_tot)
489
+ Proportion of variance explained by the model.
490
+
491
+ Args:
492
+ y_true: True values
493
+ y_pred: Predicted values
494
+
495
+ Returns:
496
+ float: R² value (-inf to 1.0, higher is better)
497
+
498
+ Examples:
499
+ >>> from ilovetools.ml import r2_score
500
+
501
+ # Perfect predictions
502
+ >>> y_true = [1.0, 2.0, 3.0, 4.0]
503
+ >>> y_pred = [1.0, 2.0, 3.0, 4.0]
504
+ >>> r2_score(y_true, y_pred)
505
+ 1.0
506
+
507
+ # Good predictions
508
+ >>> y_true = [100, 200, 300, 400, 500]
509
+ >>> y_pred = [110, 190, 310, 390, 510]
510
+ >>> r2 = r2_score(y_true, y_pred)
511
+ >>> print(f"R²: {r2:.2%}")
512
+ R²: 99.00%
513
+
514
+ # Interpretation
515
+ >>> r2 = 0.85
516
+ >>> print(f"Model explains {r2:.0%} of variance")
517
+ Model explains 85% of variance
518
+
519
+ Notes:
520
+ - Range: -inf to 1.0 (1.0 is perfect)
521
+ - 0.0 = Model as good as mean baseline
522
+ - Negative = Model worse than mean
523
+ - Easy to interpret as percentage
524
+ """
525
+ if len(y_true) != len(y_pred):
526
+ raise ValueError("y_true and y_pred must have same length")
527
+
528
+ mean_true = sum(y_true) / len(y_true)
529
+
530
+ ss_tot = sum((true - mean_true) ** 2 for true in y_true)
531
+ ss_res = sum((true - pred) ** 2 for true, pred in zip(y_true, y_pred))
532
+
533
+ if ss_tot == 0:
534
+ return 0.0
535
+
536
+ return 1 - (ss_res / ss_tot)
537
+
538
+
539
+ def roc_auc_score(y_true: List[int], y_scores: List[float]) -> float:
540
+ """
541
+ Calculate ROC AUC score for binary classification.
542
+
543
+ AUC = Area Under the ROC Curve
544
+ Measures model's ability to distinguish between classes.
545
+
546
+ Args:
547
+ y_true: True binary labels (0 or 1)
548
+ y_scores: Predicted probabilities or scores
549
+
550
+ Returns:
551
+ float: AUC score (0.0 to 1.0)
552
+
553
+ Examples:
554
+ >>> from ilovetools.ml import roc_auc_score
555
+
556
+ # Perfect separation
557
+ >>> y_true = [0, 0, 1, 1]
558
+ >>> y_scores = [0.1, 0.2, 0.8, 0.9]
559
+ >>> roc_auc_score(y_true, y_scores)
560
+ 1.0
561
+
562
+ # Good separation
563
+ >>> y_true = [0, 0, 1, 1, 0, 1]
564
+ >>> y_scores = [0.2, 0.3, 0.7, 0.8, 0.4, 0.9]
565
+ >>> auc = roc_auc_score(y_true, y_scores)
566
+ >>> print(f"AUC: {auc:.2%}")
567
+ AUC: 91.67%
568
+
569
+ Notes:
570
+ - 1.0 = Perfect classifier
571
+ - 0.5 = Random guessing
572
+ - < 0.5 = Worse than random
573
+ - Threshold-independent metric
574
+ """
575
+ if len(y_true) != len(y_scores):
576
+ raise ValueError("y_true and y_scores must have same length")
577
+
578
+ # Sort by scores
579
+ pairs = sorted(zip(y_scores, y_true), reverse=True)
580
+
581
+ # Count positive and negative samples
582
+ n_pos = sum(y_true)
583
+ n_neg = len(y_true) - n_pos
584
+
585
+ if n_pos == 0 or n_neg == 0:
586
+ return 0.5
587
+
588
+ # Calculate AUC using trapezoidal rule
589
+ tp = 0
590
+ fp = 0
591
+ auc = 0.0
592
+ prev_score = None
593
+
594
+ for score, label in pairs:
595
+ if label == 1:
596
+ tp += 1
597
+ else:
598
+ fp += 1
599
+ auc += tp
600
+
601
+ return auc / (n_pos * n_neg)