ilovetools 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,872 @@
1
+ """
2
+ Ensemble methods utilities for ML workflows
3
+ Each function has TWO names: full descriptive name + abbreviated alias
4
+ """
5
+
6
+ from typing import List, Dict, Any, Callable, Optional, Tuple
7
+ import random
8
+
9
+ __all__ = [
10
+ # Full names
11
+ 'voting_classifier',
12
+ 'voting_regressor',
13
+ 'bagging_predictions',
14
+ 'boosting_sequential',
15
+ 'stacking_ensemble',
16
+ 'weighted_average_ensemble',
17
+ 'majority_vote',
18
+ 'soft_vote',
19
+ 'bootstrap_sample',
20
+ 'out_of_bag_score',
21
+ 'ensemble_diversity',
22
+ 'blend_predictions',
23
+ # Abbreviated aliases
24
+ 'vote_clf',
25
+ 'vote_reg',
26
+ 'bagging',
27
+ 'boosting',
28
+ 'stacking',
29
+ 'weighted_avg',
30
+ 'hard_vote',
31
+ 'soft_vote_alias',
32
+ 'bootstrap',
33
+ 'oob_score',
34
+ 'diversity',
35
+ 'blend',
36
+ ]
37
+
38
+
39
+ def voting_classifier(
40
+ predictions: List[List[int]],
41
+ method: str = 'hard',
42
+ weights: Optional[List[float]] = None
43
+ ) -> List[int]:
44
+ """
45
+ Combine multiple classifier predictions using voting.
46
+
47
+ Alias: vote_clf()
48
+
49
+ Args:
50
+ predictions: List of prediction arrays from different models
51
+ method: 'hard' (majority vote) or 'soft' (average probabilities)
52
+ weights: Optional weights for each model
53
+
54
+ Returns:
55
+ list: Combined predictions
56
+
57
+ Examples:
58
+ >>> from ilovetools.ml import vote_clf # Short alias
59
+
60
+ # Hard voting (majority)
61
+ >>> model1_pred = [0, 1, 1, 0, 1]
62
+ >>> model2_pred = [0, 1, 0, 0, 1]
63
+ >>> model3_pred = [1, 1, 1, 0, 1]
64
+ >>> predictions = [model1_pred, model2_pred, model3_pred]
65
+ >>> result = vote_clf(predictions, method='hard')
66
+ >>> print(result)
67
+ [0, 1, 1, 0, 1]
68
+
69
+ # Weighted voting
70
+ >>> weights = [0.5, 0.3, 0.2] # Trust model1 more
71
+ >>> result = vote_clf(predictions, weights=weights)
72
+
73
+ >>> from ilovetools.ml import voting_classifier # Full name
74
+ >>> result = voting_classifier(predictions, method='hard')
75
+
76
+ Notes:
77
+ - Hard voting: Majority class wins
78
+ - Soft voting: Average probabilities (need predict_proba)
79
+ - Weighted: Give more importance to better models
80
+ - Odd number of models avoids ties
81
+ """
82
+ if not predictions:
83
+ raise ValueError("predictions cannot be empty")
84
+
85
+ n_samples = len(predictions[0])
86
+ n_models = len(predictions)
87
+
88
+ if weights is None:
89
+ weights = [1.0] * n_models
90
+
91
+ if len(weights) != n_models:
92
+ raise ValueError("weights must match number of models")
93
+
94
+ result = []
95
+
96
+ for i in range(n_samples):
97
+ votes = {}
98
+ for model_idx, model_preds in enumerate(predictions):
99
+ pred = model_preds[i]
100
+ weight = weights[model_idx]
101
+ votes[pred] = votes.get(pred, 0) + weight
102
+
103
+ # Get class with highest weighted vote
104
+ final_pred = max(votes.items(), key=lambda x: x[1])[0]
105
+ result.append(final_pred)
106
+
107
+ return result
108
+
109
+
110
+ # Create alias
111
+ vote_clf = voting_classifier
112
+
113
+
114
+ def voting_regressor(
115
+ predictions: List[List[float]],
116
+ method: str = 'mean',
117
+ weights: Optional[List[float]] = None
118
+ ) -> List[float]:
119
+ """
120
+ Combine multiple regressor predictions.
121
+
122
+ Alias: vote_reg()
123
+
124
+ Args:
125
+ predictions: List of prediction arrays from different models
126
+ method: 'mean', 'median', or 'weighted'
127
+ weights: Optional weights for weighted average
128
+
129
+ Returns:
130
+ list: Combined predictions
131
+
132
+ Examples:
133
+ >>> from ilovetools.ml import vote_reg # Short alias
134
+
135
+ # Mean averaging
136
+ >>> model1_pred = [100, 200, 300]
137
+ >>> model2_pred = [110, 190, 310]
138
+ >>> model3_pred = [105, 195, 305]
139
+ >>> predictions = [model1_pred, model2_pred, model3_pred]
140
+ >>> result = vote_reg(predictions, method='mean')
141
+ >>> print(result)
142
+ [105.0, 195.0, 305.0]
143
+
144
+ # Weighted average
145
+ >>> weights = [0.5, 0.3, 0.2]
146
+ >>> result = vote_reg(predictions, method='weighted', weights=weights)
147
+
148
+ # Median (robust to outliers)
149
+ >>> result = vote_reg(predictions, method='median')
150
+
151
+ >>> from ilovetools.ml import voting_regressor # Full name
152
+ >>> result = voting_regressor(predictions, method='mean')
153
+
154
+ Notes:
155
+ - Mean: Simple average
156
+ - Median: Robust to outliers
157
+ - Weighted: Trust better models more
158
+ - Use median for noisy predictions
159
+ """
160
+ if not predictions:
161
+ raise ValueError("predictions cannot be empty")
162
+
163
+ n_samples = len(predictions[0])
164
+ n_models = len(predictions)
165
+
166
+ result = []
167
+
168
+ for i in range(n_samples):
169
+ values = [model_preds[i] for model_preds in predictions]
170
+
171
+ if method == 'mean':
172
+ combined = sum(values) / len(values)
173
+ elif method == 'median':
174
+ sorted_values = sorted(values)
175
+ mid = len(sorted_values) // 2
176
+ if len(sorted_values) % 2 == 0:
177
+ combined = (sorted_values[mid-1] + sorted_values[mid]) / 2
178
+ else:
179
+ combined = sorted_values[mid]
180
+ elif method == 'weighted':
181
+ if weights is None:
182
+ raise ValueError("weights required for weighted method")
183
+ if len(weights) != n_models:
184
+ raise ValueError("weights must match number of models")
185
+ combined = sum(v * w for v, w in zip(values, weights)) / sum(weights)
186
+ else:
187
+ raise ValueError(f"Unknown method: {method}")
188
+
189
+ result.append(combined)
190
+
191
+ return result
192
+
193
+
194
+ # Create alias
195
+ vote_reg = voting_regressor
196
+
197
+
198
+ def bagging_predictions(
199
+ X: List,
200
+ y: List,
201
+ model_func: Callable,
202
+ n_models: int = 10,
203
+ sample_size: float = 1.0,
204
+ random_state: Optional[int] = None
205
+ ) -> Tuple[List[Any], List[List]]:
206
+ """
207
+ Bootstrap Aggregating (Bagging) ensemble.
208
+
209
+ Alias: bagging()
210
+
211
+ Train multiple models on bootstrap samples and average predictions.
212
+
213
+ Args:
214
+ X: Feature data
215
+ y: Target data
216
+ model_func: Function(X_train, y_train, X_test) -> predictions
217
+ n_models: Number of models to train. Default: 10
218
+ sample_size: Proportion of data for each bootstrap. Default: 1.0
219
+ random_state: Random seed for reproducibility
220
+
221
+ Returns:
222
+ tuple: (final_predictions, all_model_predictions)
223
+
224
+ Examples:
225
+ >>> from ilovetools.ml import bagging # Short alias
226
+
227
+ >>> X = [[1], [2], [3], [4], [5]]
228
+ >>> y = [1, 2, 3, 4, 5]
229
+ >>>
230
+ >>> def simple_model(X_tr, y_tr, X_te):
231
+ ... avg = sum(y_tr) / len(y_tr)
232
+ ... return [avg] * len(X_te)
233
+ >>>
234
+ >>> final_pred, all_preds = bagging(X, y, simple_model, n_models=5)
235
+ >>> print(f"Trained {len(all_preds)} models")
236
+ Trained 5 models
237
+
238
+ >>> from ilovetools.ml import bagging_predictions # Full name
239
+ >>> final_pred, all_preds = bagging_predictions(X, y, simple_model)
240
+
241
+ Notes:
242
+ - Reduces variance (overfitting)
243
+ - Each model sees different data
244
+ - Random Forest uses bagging
245
+ - More models = more stable
246
+ """
247
+ if random_state is not None:
248
+ random.seed(random_state)
249
+
250
+ n_samples = len(X)
251
+ bootstrap_size = int(n_samples * sample_size)
252
+
253
+ all_predictions = []
254
+
255
+ for _ in range(n_models):
256
+ # Bootstrap sample (with replacement)
257
+ indices = [random.randint(0, n_samples - 1) for _ in range(bootstrap_size)]
258
+ X_bootstrap = [X[i] for i in indices]
259
+ y_bootstrap = [y[i] for i in indices]
260
+
261
+ # Train model and predict on original data
262
+ predictions = model_func(X_bootstrap, y_bootstrap, X)
263
+ all_predictions.append(predictions)
264
+
265
+ # Average predictions
266
+ final_predictions = []
267
+ for i in range(n_samples):
268
+ avg = sum(preds[i] for preds in all_predictions) / n_models
269
+ final_predictions.append(avg)
270
+
271
+ return final_predictions, all_predictions
272
+
273
+
274
+ # Create alias
275
+ bagging = bagging_predictions
276
+
277
+
278
+ def boosting_sequential(
279
+ X: List,
280
+ y: List,
281
+ model_func: Callable,
282
+ n_models: int = 10,
283
+ learning_rate: float = 0.1
284
+ ) -> Tuple[List[float], List[List]]:
285
+ """
286
+ Sequential Boosting ensemble.
287
+
288
+ Alias: boosting()
289
+
290
+ Train models sequentially, each focusing on previous errors.
291
+
292
+ Args:
293
+ X: Feature data
294
+ y: Target data
295
+ model_func: Function(X_train, y_train, weights) -> predictions
296
+ n_models: Number of models to train. Default: 10
297
+ learning_rate: Shrinkage parameter. Default: 0.1
298
+
299
+ Returns:
300
+ tuple: (final_predictions, all_model_predictions)
301
+
302
+ Examples:
303
+ >>> from ilovetools.ml import boosting # Short alias
304
+
305
+ >>> X = [[1], [2], [3], [4], [5]]
306
+ >>> y = [1.0, 2.0, 3.0, 4.0, 5.0]
307
+ >>>
308
+ >>> def simple_model(X_tr, y_tr, weights):
309
+ ... # Weighted average
310
+ ... total_weight = sum(weights)
311
+ ... weighted_sum = sum(y * w for y, w in zip(y_tr, weights))
312
+ ... avg = weighted_sum / total_weight
313
+ ... return [avg] * len(X_tr)
314
+ >>>
315
+ >>> final_pred, all_preds = boosting(X, y, simple_model, n_models=3)
316
+
317
+ >>> from ilovetools.ml import boosting_sequential # Full name
318
+ >>> final_pred, all_preds = boosting_sequential(X, y, simple_model)
319
+
320
+ Notes:
321
+ - Reduces bias (underfitting)
322
+ - Each model fixes previous errors
323
+ - XGBoost, AdaBoost use boosting
324
+ - Lower learning_rate = more models needed
325
+ """
326
+ n_samples = len(X)
327
+
328
+ # Initialize weights uniformly
329
+ weights = [1.0 / n_samples] * n_samples
330
+
331
+ all_predictions = []
332
+ final_predictions = [0.0] * n_samples
333
+
334
+ for _ in range(n_models):
335
+ # Train model with current weights
336
+ predictions = model_func(X, y, weights)
337
+ all_predictions.append(predictions)
338
+
339
+ # Update final predictions
340
+ for i in range(n_samples):
341
+ final_predictions[i] += learning_rate * predictions[i]
342
+
343
+ # Calculate errors
344
+ errors = [abs(y[i] - final_predictions[i]) for i in range(n_samples)]
345
+
346
+ # Update weights (focus on high error samples)
347
+ total_error = sum(errors)
348
+ if total_error > 0:
349
+ weights = [e / total_error for e in errors]
350
+ else:
351
+ weights = [1.0 / n_samples] * n_samples
352
+
353
+ return final_predictions, all_predictions
354
+
355
+
356
+ # Create alias
357
+ boosting = boosting_sequential
358
+
359
+
360
+ def stacking_ensemble(
361
+ base_predictions: List[List],
362
+ y_true: List,
363
+ meta_model_func: Callable
364
+ ) -> List:
365
+ """
366
+ Stacking ensemble with meta-model.
367
+
368
+ Alias: stacking()
369
+
370
+ Train meta-model to combine base model predictions.
371
+
372
+ Args:
373
+ base_predictions: List of prediction arrays from base models
374
+ y_true: True target values
375
+ meta_model_func: Function(X_meta, y_meta) -> meta_model
376
+
377
+ Returns:
378
+ list: Meta-model predictions
379
+
380
+ Examples:
381
+ >>> from ilovetools.ml import stacking # Short alias
382
+
383
+ >>> # Base model predictions
384
+ >>> model1_pred = [1.0, 2.0, 3.0, 4.0, 5.0]
385
+ >>> model2_pred = [1.1, 1.9, 3.1, 3.9, 5.1]
386
+ >>> model3_pred = [0.9, 2.1, 2.9, 4.1, 4.9]
387
+ >>> base_preds = [model1_pred, model2_pred, model3_pred]
388
+ >>> y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
389
+ >>>
390
+ >>> def meta_model(X_meta, y_meta):
391
+ ... # Simple weighted average learner
392
+ ... def predict(X_test):
393
+ ... return [sum(x) / len(x) for x in X_test]
394
+ ... return predict
395
+ >>>
396
+ >>> meta_preds = stacking(base_preds, y_true, meta_model)
397
+
398
+ >>> from ilovetools.ml import stacking_ensemble # Full name
399
+ >>> meta_preds = stacking_ensemble(base_preds, y_true, meta_model)
400
+
401
+ Notes:
402
+ - Most powerful ensemble method
403
+ - Meta-model learns optimal combination
404
+ - Kaggle winners use stacking
405
+ - Requires more data and compute
406
+ """
407
+ if not base_predictions:
408
+ raise ValueError("base_predictions cannot be empty")
409
+
410
+ n_samples = len(base_predictions[0])
411
+ n_models = len(base_predictions)
412
+
413
+ # Create meta-features (transpose predictions)
414
+ X_meta = []
415
+ for i in range(n_samples):
416
+ meta_features = [base_preds[i] for base_preds in base_predictions]
417
+ X_meta.append(meta_features)
418
+
419
+ # Train meta-model
420
+ meta_model = meta_model_func(X_meta, y_true)
421
+
422
+ # Get meta-predictions
423
+ meta_predictions = meta_model(X_meta)
424
+
425
+ return meta_predictions
426
+
427
+
428
+ # Create alias
429
+ stacking = stacking_ensemble
430
+
431
+
432
+ def weighted_average_ensemble(
433
+ predictions: List[List[float]],
434
+ weights: List[float]
435
+ ) -> List[float]:
436
+ """
437
+ Weighted average of predictions.
438
+
439
+ Alias: weighted_avg()
440
+
441
+ Args:
442
+ predictions: List of prediction arrays
443
+ weights: Weight for each model
444
+
445
+ Returns:
446
+ list: Weighted average predictions
447
+
448
+ Examples:
449
+ >>> from ilovetools.ml import weighted_avg # Short alias
450
+
451
+ >>> model1 = [100, 200, 300]
452
+ >>> model2 = [110, 190, 310]
453
+ >>> model3 = [105, 195, 305]
454
+ >>> predictions = [model1, model2, model3]
455
+ >>> weights = [0.5, 0.3, 0.2] # Trust model1 most
456
+ >>> result = weighted_avg(predictions, weights)
457
+ >>> print(result)
458
+ [105.0, 196.5, 304.5]
459
+
460
+ >>> from ilovetools.ml import weighted_average_ensemble # Full name
461
+ >>> result = weighted_average_ensemble(predictions, weights)
462
+
463
+ Notes:
464
+ - Give more weight to better models
465
+ - Weights should sum to 1.0
466
+ - Use CV to find optimal weights
467
+ - Simple but effective
468
+ """
469
+ if len(predictions) != len(weights):
470
+ raise ValueError("predictions and weights must have same length")
471
+
472
+ n_samples = len(predictions[0])
473
+ result = []
474
+
475
+ for i in range(n_samples):
476
+ weighted_sum = sum(preds[i] * w for preds, w in zip(predictions, weights))
477
+ result.append(weighted_sum)
478
+
479
+ return result
480
+
481
+
482
+ # Create alias
483
+ weighted_avg = weighted_average_ensemble
484
+
485
+
486
+ def majority_vote(predictions: List[List[int]]) -> List[int]:
487
+ """
488
+ Hard voting (majority vote) for classification.
489
+
490
+ Alias: hard_vote()
491
+
492
+ Args:
493
+ predictions: List of prediction arrays
494
+
495
+ Returns:
496
+ list: Majority vote predictions
497
+
498
+ Examples:
499
+ >>> from ilovetools.ml import hard_vote # Short alias
500
+
501
+ >>> model1 = [0, 1, 1, 0, 1]
502
+ >>> model2 = [0, 1, 0, 0, 1]
503
+ >>> model3 = [1, 1, 1, 0, 1]
504
+ >>> predictions = [model1, model2, model3]
505
+ >>> result = hard_vote(predictions)
506
+ >>> print(result)
507
+ [0, 1, 1, 0, 1]
508
+
509
+ >>> from ilovetools.ml import majority_vote # Full name
510
+ >>> result = majority_vote(predictions)
511
+
512
+ Notes:
513
+ - Simple majority wins
514
+ - Use odd number of models
515
+ - Fast and interpretable
516
+ - Good for balanced models
517
+ """
518
+ n_samples = len(predictions[0])
519
+ result = []
520
+
521
+ for i in range(n_samples):
522
+ votes = [preds[i] for preds in predictions]
523
+ # Count votes
524
+ vote_counts = {}
525
+ for vote in votes:
526
+ vote_counts[vote] = vote_counts.get(vote, 0) + 1
527
+ # Get majority
528
+ majority = max(vote_counts.items(), key=lambda x: x[1])[0]
529
+ result.append(majority)
530
+
531
+ return result
532
+
533
+
534
+ # Create alias
535
+ hard_vote = majority_vote
536
+
537
+
538
+ def soft_vote(
539
+ probabilities: List[List[List[float]]],
540
+ weights: Optional[List[float]] = None
541
+ ) -> List[int]:
542
+ """
543
+ Soft voting using predicted probabilities.
544
+
545
+ Alias: soft_vote_alias()
546
+
547
+ Args:
548
+ probabilities: List of probability arrays [n_models][n_samples][n_classes]
549
+ weights: Optional weights for each model
550
+
551
+ Returns:
552
+ list: Predicted classes based on averaged probabilities
553
+
554
+ Examples:
555
+ >>> from ilovetools.ml import soft_vote_alias # Short alias
556
+
557
+ # Binary classification probabilities
558
+ >>> model1_proba = [[0.8, 0.2], [0.3, 0.7], [0.6, 0.4]]
559
+ >>> model2_proba = [[0.7, 0.3], [0.4, 0.6], [0.5, 0.5]]
560
+ >>> probabilities = [model1_proba, model2_proba]
561
+ >>> result = soft_vote_alias(probabilities)
562
+ >>> print(result)
563
+ [0, 1, 0]
564
+
565
+ >>> from ilovetools.ml import soft_vote # Full name
566
+ >>> result = soft_vote(probabilities)
567
+
568
+ Notes:
569
+ - Uses probability information
570
+ - More nuanced than hard voting
571
+ - Requires predict_proba
572
+ - Better for uncertain predictions
573
+ """
574
+ n_models = len(probabilities)
575
+ n_samples = len(probabilities[0])
576
+ n_classes = len(probabilities[0][0])
577
+
578
+ if weights is None:
579
+ weights = [1.0] * n_models
580
+
581
+ result = []
582
+
583
+ for i in range(n_samples):
584
+ # Average probabilities across models
585
+ avg_proba = [0.0] * n_classes
586
+ for model_idx, model_proba in enumerate(probabilities):
587
+ for class_idx in range(n_classes):
588
+ avg_proba[class_idx] += model_proba[i][class_idx] * weights[model_idx]
589
+
590
+ # Normalize
591
+ total = sum(avg_proba)
592
+ avg_proba = [p / total for p in avg_proba]
593
+
594
+ # Get class with highest probability
595
+ predicted_class = avg_proba.index(max(avg_proba))
596
+ result.append(predicted_class)
597
+
598
+ return result
599
+
600
+
601
+ # Create alias
602
+ soft_vote_alias = soft_vote
603
+
604
+
605
+ def bootstrap_sample(
606
+ X: List,
607
+ y: List,
608
+ sample_size: Optional[int] = None,
609
+ random_state: Optional[int] = None
610
+ ) -> Tuple[List, List, List[int]]:
611
+ """
612
+ Create bootstrap sample (sampling with replacement).
613
+
614
+ Alias: bootstrap()
615
+
616
+ Args:
617
+ X: Feature data
618
+ y: Target data
619
+ sample_size: Size of bootstrap sample. Default: len(X)
620
+ random_state: Random seed
621
+
622
+ Returns:
623
+ tuple: (X_bootstrap, y_bootstrap, indices)
624
+
625
+ Examples:
626
+ >>> from ilovetools.ml import bootstrap # Short alias
627
+
628
+ >>> X = [1, 2, 3, 4, 5]
629
+ >>> y = [10, 20, 30, 40, 50]
630
+ >>> X_boot, y_boot, indices = bootstrap(X, y, random_state=42)
631
+ >>> print(f"Bootstrap size: {len(X_boot)}")
632
+ Bootstrap size: 5
633
+ >>> print(f"Unique samples: {len(set(indices))}")
634
+
635
+ >>> from ilovetools.ml import bootstrap_sample # Full name
636
+ >>> X_boot, y_boot, indices = bootstrap_sample(X, y)
637
+
638
+ Notes:
639
+ - Sampling with replacement
640
+ - Some samples appear multiple times
641
+ - ~63% unique samples on average
642
+ - Foundation of bagging
643
+ """
644
+ if random_state is not None:
645
+ random.seed(random_state)
646
+
647
+ n = len(X)
648
+ if sample_size is None:
649
+ sample_size = n
650
+
651
+ indices = [random.randint(0, n - 1) for _ in range(sample_size)]
652
+ X_bootstrap = [X[i] for i in indices]
653
+ y_bootstrap = [y[i] for i in indices]
654
+
655
+ return X_bootstrap, y_bootstrap, indices
656
+
657
+
658
+ # Create alias
659
+ bootstrap = bootstrap_sample
660
+
661
+
662
+ def out_of_bag_score(
663
+ X: List,
664
+ y: List,
665
+ model_func: Callable,
666
+ n_models: int = 10,
667
+ random_state: Optional[int] = None
668
+ ) -> float:
669
+ """
670
+ Calculate Out-of-Bag (OOB) score for bagging.
671
+
672
+ Alias: oob_score()
673
+
674
+ Args:
675
+ X: Feature data
676
+ y: Target data
677
+ model_func: Function(X_train, y_train, X_test) -> predictions
678
+ n_models: Number of bootstrap models. Default: 10
679
+ random_state: Random seed
680
+
681
+ Returns:
682
+ float: OOB score (accuracy for classification)
683
+
684
+ Examples:
685
+ >>> from ilovetools.ml import oob_score # Short alias
686
+
687
+ >>> X = [[1], [2], [3], [4], [5]]
688
+ >>> y = [1, 2, 3, 4, 5]
689
+ >>>
690
+ >>> def model(X_tr, y_tr, X_te):
691
+ ... avg = sum(y_tr) / len(y_tr)
692
+ ... return [avg] * len(X_te)
693
+ >>>
694
+ >>> score = oob_score(X, y, model, n_models=5, random_state=42)
695
+ >>> print(f"OOB Score: {score:.2f}")
696
+
697
+ >>> from ilovetools.ml import out_of_bag_score # Full name
698
+ >>> score = out_of_bag_score(X, y, model)
699
+
700
+ Notes:
701
+ - Free validation without separate test set
702
+ - Uses samples not in bootstrap
703
+ - ~37% samples are OOB per model
704
+ - Good estimate of generalization
705
+ """
706
+ if random_state is not None:
707
+ random.seed(random_state)
708
+
709
+ n_samples = len(X)
710
+ oob_predictions = [[] for _ in range(n_samples)]
711
+
712
+ for _ in range(n_models):
713
+ # Bootstrap sample
714
+ indices = [random.randint(0, n_samples - 1) for _ in range(n_samples)]
715
+ X_bootstrap = [X[i] for i in indices]
716
+ y_bootstrap = [y[i] for i in indices]
717
+
718
+ # Find OOB samples
719
+ oob_indices = [i for i in range(n_samples) if i not in indices]
720
+
721
+ if not oob_indices:
722
+ continue
723
+
724
+ X_oob = [X[i] for i in oob_indices]
725
+
726
+ # Predict on OOB samples
727
+ predictions = model_func(X_bootstrap, y_bootstrap, X_oob)
728
+
729
+ # Store OOB predictions
730
+ for idx, pred in zip(oob_indices, predictions):
731
+ oob_predictions[idx].append(pred)
732
+
733
+ # Calculate OOB score
734
+ correct = 0
735
+ total = 0
736
+
737
+ for i, preds in enumerate(oob_predictions):
738
+ if preds: # Has OOB predictions
739
+ avg_pred = sum(preds) / len(preds)
740
+ if abs(avg_pred - y[i]) < 0.5: # For classification
741
+ correct += 1
742
+ total += 1
743
+
744
+ return correct / total if total > 0 else 0.0
745
+
746
+
747
+ # Create alias
748
+ oob_score = out_of_bag_score
749
+
750
+
751
+ def ensemble_diversity(
752
+ predictions: List[List[int]]
753
+ ) -> float:
754
+ """
755
+ Calculate diversity among ensemble models.
756
+
757
+ Alias: diversity()
758
+
759
+ Higher diversity = Better ensemble potential
760
+
761
+ Args:
762
+ predictions: List of prediction arrays
763
+
764
+ Returns:
765
+ float: Diversity score (0.0 to 1.0)
766
+
767
+ Examples:
768
+ >>> from ilovetools.ml import diversity # Short alias
769
+
770
+ # High diversity (different predictions)
771
+ >>> model1 = [0, 1, 0, 1, 0]
772
+ >>> model2 = [1, 0, 1, 0, 1]
773
+ >>> model3 = [0, 0, 1, 1, 0]
774
+ >>> predictions = [model1, model2, model3]
775
+ >>> div = diversity(predictions)
776
+ >>> print(f"Diversity: {div:.2%}")
777
+
778
+ # Low diversity (similar predictions)
779
+ >>> model1 = [0, 1, 0, 1, 0]
780
+ >>> model2 = [0, 1, 0, 1, 0]
781
+ >>> model3 = [0, 1, 0, 1, 1]
782
+ >>> predictions = [model1, model2, model3]
783
+ >>> div = diversity(predictions)
784
+
785
+ >>> from ilovetools.ml import ensemble_diversity # Full name
786
+ >>> div = ensemble_diversity(predictions)
787
+
788
+ Notes:
789
+ - High diversity = Models make different errors
790
+ - Low diversity = Models too similar
791
+ - Aim for diverse but accurate models
792
+ - Use different algorithms for diversity
793
+ """
794
+ n_models = len(predictions)
795
+ n_samples = len(predictions[0])
796
+
797
+ if n_models < 2:
798
+ return 0.0
799
+
800
+ # Calculate pairwise disagreement
801
+ total_disagreement = 0
802
+ pairs = 0
803
+
804
+ for i in range(n_models):
805
+ for j in range(i + 1, n_models):
806
+ disagreement = sum(1 for k in range(n_samples)
807
+ if predictions[i][k] != predictions[j][k])
808
+ total_disagreement += disagreement / n_samples
809
+ pairs += 1
810
+
811
+ return total_disagreement / pairs if pairs > 0 else 0.0
812
+
813
+
814
+ # Create alias
815
+ diversity = ensemble_diversity
816
+
817
+
818
+ def blend_predictions(
819
+ train_predictions: List[List],
820
+ test_predictions: List[List],
821
+ y_train: List,
822
+ blend_func: Callable
823
+ ) -> List:
824
+ """
825
+ Blend predictions using a blending function.
826
+
827
+ Alias: blend()
828
+
829
+ Args:
830
+ train_predictions: Base model predictions on training set
831
+ test_predictions: Base model predictions on test set
832
+ y_train: Training labels
833
+ blend_func: Function to learn blending weights
834
+
835
+ Returns:
836
+ list: Blended test predictions
837
+
838
+ Examples:
839
+ >>> from ilovetools.ml import blend # Short alias
840
+
841
+ >>> train_preds = [[1, 2, 3], [1.1, 1.9, 3.1]]
842
+ >>> test_preds = [[4, 5], [3.9, 5.1]]
843
+ >>> y_train = [1, 2, 3]
844
+ >>>
845
+ >>> def simple_blend(train_p, y_tr):
846
+ ... # Learn to average
847
+ ... def predict(test_p):
848
+ ... return [sum(p)/len(p) for p in zip(*test_p)]
849
+ ... return predict
850
+ >>>
851
+ >>> result = blend(train_preds, test_preds, y_train, simple_blend)
852
+
853
+ >>> from ilovetools.ml import blend_predictions # Full name
854
+ >>> result = blend_predictions(train_preds, test_preds, y_train, simple_blend)
855
+
856
+ Notes:
857
+ - Similar to stacking but simpler
858
+ - Uses holdout set for blending
859
+ - Less prone to overfitting than stacking
860
+ - Popular in Kaggle competitions
861
+ """
862
+ # Learn blending function on training predictions
863
+ blender = blend_func(train_predictions, y_train)
864
+
865
+ # Apply to test predictions
866
+ blended_predictions = blender(test_predictions)
867
+
868
+ return blended_predictions
869
+
870
+
871
+ # Create alias
872
+ blend = blend_predictions