ilovetools 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,781 @@
1
+ """
2
+ Hyperparameter tuning utilities for ML workflows
3
+ Each function has TWO names: full descriptive name + abbreviated alias
4
+ """
5
+
6
+ from typing import List, Dict, Any, Callable, Optional, Tuple
7
+ import random
8
+ import itertools
9
+
10
+ __all__ = [
11
+ # Full names
12
+ 'grid_search_cv',
13
+ 'random_search_cv',
14
+ 'generate_param_grid',
15
+ 'extract_best_params',
16
+ 'format_cv_results',
17
+ 'learning_curve_data',
18
+ 'validation_curve_data',
19
+ 'early_stopping_monitor',
20
+ 'compare_models_cv',
21
+ 'bayesian_search_simple',
22
+ # Abbreviated aliases
23
+ 'gridsearch',
24
+ 'randomsearch',
25
+ 'param_grid',
26
+ 'best_params',
27
+ 'cv_results',
28
+ 'learning_curve',
29
+ 'val_curve',
30
+ 'early_stop',
31
+ 'compare_models',
32
+ 'bayesopt',
33
+ ]
34
+
35
+
36
+ def grid_search_cv(
37
+ X: List,
38
+ y: List,
39
+ model_func: Callable,
40
+ param_grid: Dict[str, List],
41
+ metric_func: Callable,
42
+ cv_splits: int = 5
43
+ ) -> Dict[str, Any]:
44
+ """
45
+ Grid Search Cross-Validation for hyperparameter tuning.
46
+
47
+ Alias: gridsearch()
48
+
49
+ Exhaustively searches all parameter combinations.
50
+
51
+ Args:
52
+ X: Feature data
53
+ y: Target data
54
+ model_func: Function(params, X_train, y_train, X_val) -> predictions
55
+ param_grid: Dictionary of parameter lists
56
+ metric_func: Function(y_true, y_pred) -> score
57
+ cv_splits: Number of CV folds. Default: 5
58
+
59
+ Returns:
60
+ dict: Best parameters, best score, all results
61
+
62
+ Examples:
63
+ >>> from ilovetools.ml import gridsearch # Short alias
64
+ >>> X = [[1], [2], [3], [4], [5]]
65
+ >>> y = [1, 2, 3, 4, 5]
66
+ >>>
67
+ >>> def model(params, X_tr, y_tr, X_val):
68
+ ... # Simple model with threshold param
69
+ ... threshold = params['threshold']
70
+ ... avg = sum(y_tr) / len(y_tr)
71
+ ... return [avg + threshold] * len(X_val)
72
+ >>>
73
+ >>> def metric(y_true, y_pred):
74
+ ... return -sum(abs(t - p) for t, p in zip(y_true, y_pred)) / len(y_true)
75
+ >>>
76
+ >>> param_grid = {'threshold': [0, 0.5, 1.0]}
77
+ >>> results = gridsearch(X, y, model, param_grid, metric, cv_splits=3)
78
+ >>> print(results['best_params'])
79
+
80
+ >>> from ilovetools.ml import grid_search_cv # Full name
81
+ >>> results = grid_search_cv(X, y, model, param_grid, metric)
82
+
83
+ Notes:
84
+ - Tries all combinations
85
+ - Exhaustive but slow
86
+ - Good for small parameter spaces
87
+ - Guaranteed to find best in grid
88
+ """
89
+ from .cross_validation import k_fold_cross_validation
90
+
91
+ # Generate all parameter combinations
92
+ param_names = list(param_grid.keys())
93
+ param_values = [param_grid[name] for name in param_names]
94
+ param_combinations = list(itertools.product(*param_values))
95
+
96
+ results = []
97
+
98
+ for combo in param_combinations:
99
+ params = dict(zip(param_names, combo))
100
+
101
+ # Perform CV
102
+ splits = k_fold_cross_validation(X, y, k=cv_splits)
103
+ scores = []
104
+
105
+ for train_idx, val_idx in splits:
106
+ X_train = [X[i] for i in train_idx]
107
+ y_train = [y[i] for i in train_idx]
108
+ X_val = [X[i] for i in val_idx]
109
+ y_val = [y[i] for i in val_idx]
110
+
111
+ y_pred = model_func(params, X_train, y_train, X_val)
112
+ score = metric_func(y_val, y_pred)
113
+ scores.append(score)
114
+
115
+ mean_score = sum(scores) / len(scores)
116
+
117
+ results.append({
118
+ 'params': params,
119
+ 'mean_score': mean_score,
120
+ 'scores': scores
121
+ })
122
+
123
+ # Find best
124
+ best_result = max(results, key=lambda x: x['mean_score'])
125
+
126
+ return {
127
+ 'best_params': best_result['params'],
128
+ 'best_score': best_result['mean_score'],
129
+ 'all_results': results,
130
+ 'n_combinations': len(param_combinations)
131
+ }
132
+
133
+
134
+ # Create alias
135
+ gridsearch = grid_search_cv
136
+
137
+
138
+ def random_search_cv(
139
+ X: List,
140
+ y: List,
141
+ model_func: Callable,
142
+ param_distributions: Dict[str, List],
143
+ metric_func: Callable,
144
+ n_iter: int = 10,
145
+ cv_splits: int = 5,
146
+ random_state: Optional[int] = None
147
+ ) -> Dict[str, Any]:
148
+ """
149
+ Random Search Cross-Validation for hyperparameter tuning.
150
+
151
+ Alias: randomsearch()
152
+
153
+ Randomly samples parameter combinations. Faster than grid search.
154
+
155
+ Args:
156
+ X: Feature data
157
+ y: Target data
158
+ model_func: Function(params, X_train, y_train, X_val) -> predictions
159
+ param_distributions: Dictionary of parameter lists
160
+ metric_func: Function(y_true, y_pred) -> score
161
+ n_iter: Number of random combinations to try. Default: 10
162
+ cv_splits: Number of CV folds. Default: 5
163
+ random_state: Random seed for reproducibility
164
+
165
+ Returns:
166
+ dict: Best parameters, best score, all results
167
+
168
+ Examples:
169
+ >>> from ilovetools.ml import randomsearch # Short alias
170
+ >>> X = [[1], [2], [3], [4], [5]]
171
+ >>> y = [1, 2, 3, 4, 5]
172
+ >>>
173
+ >>> def model(params, X_tr, y_tr, X_val):
174
+ ... alpha = params['alpha']
175
+ ... avg = sum(y_tr) / len(y_tr)
176
+ ... return [avg * alpha] * len(X_val)
177
+ >>>
178
+ >>> def metric(y_true, y_pred):
179
+ ... return -sum(abs(t - p) for t, p in zip(y_true, y_pred)) / len(y_true)
180
+ >>>
181
+ >>> param_dist = {'alpha': [0.5, 0.8, 1.0, 1.2, 1.5]}
182
+ >>> results = randomsearch(X, y, model, param_dist, metric, n_iter=3)
183
+
184
+ >>> from ilovetools.ml import random_search_cv # Full name
185
+ >>> results = random_search_cv(X, y, model, param_dist, metric)
186
+
187
+ Notes:
188
+ - Faster than grid search
189
+ - Often finds good params quickly
190
+ - Good for large parameter spaces
191
+ - May miss optimal combination
192
+ """
193
+ from .cross_validation import k_fold_cross_validation
194
+
195
+ if random_state is not None:
196
+ random.seed(random_state)
197
+
198
+ param_names = list(param_distributions.keys())
199
+ results = []
200
+
201
+ for _ in range(n_iter):
202
+ # Random sample
203
+ params = {name: random.choice(param_distributions[name]) for name in param_names}
204
+
205
+ # Perform CV
206
+ splits = k_fold_cross_validation(X, y, k=cv_splits)
207
+ scores = []
208
+
209
+ for train_idx, val_idx in splits:
210
+ X_train = [X[i] for i in train_idx]
211
+ y_train = [y[i] for i in train_idx]
212
+ X_val = [X[i] for i in val_idx]
213
+ y_val = [y[i] for i in val_idx]
214
+
215
+ y_pred = model_func(params, X_train, y_train, X_val)
216
+ score = metric_func(y_val, y_pred)
217
+ scores.append(score)
218
+
219
+ mean_score = sum(scores) / len(scores)
220
+
221
+ results.append({
222
+ 'params': params,
223
+ 'mean_score': mean_score,
224
+ 'scores': scores
225
+ })
226
+
227
+ # Find best
228
+ best_result = max(results, key=lambda x: x['mean_score'])
229
+
230
+ return {
231
+ 'best_params': best_result['params'],
232
+ 'best_score': best_result['mean_score'],
233
+ 'all_results': results,
234
+ 'n_iterations': n_iter
235
+ }
236
+
237
+
238
+ # Create alias
239
+ randomsearch = random_search_cv
240
+
241
+
242
+ def generate_param_grid(
243
+ param_ranges: Dict[str, Tuple[float, float, int]]
244
+ ) -> Dict[str, List[float]]:
245
+ """
246
+ Generate parameter grid from ranges.
247
+
248
+ Alias: param_grid()
249
+
250
+ Creates evenly spaced parameter values.
251
+
252
+ Args:
253
+ param_ranges: Dict of (min, max, n_values) tuples
254
+
255
+ Returns:
256
+ dict: Parameter grid
257
+
258
+ Examples:
259
+ >>> from ilovetools.ml import param_grid # Short alias
260
+ >>> ranges = {
261
+ ... 'learning_rate': (0.001, 0.1, 5),
262
+ ... 'max_depth': (3, 10, 4)
263
+ ... }
264
+ >>> grid = param_grid(ranges)
265
+ >>> print(grid)
266
+ {'learning_rate': [0.001, 0.02575, 0.0505, 0.07525, 0.1], 'max_depth': [3.0, 5.333, 7.667, 10.0]}
267
+
268
+ >>> from ilovetools.ml import generate_param_grid # Full name
269
+ >>> grid = generate_param_grid(ranges)
270
+
271
+ Notes:
272
+ - Creates evenly spaced values
273
+ - Useful for continuous parameters
274
+ - Combine with grid_search_cv
275
+ - Adjust n_values for granularity
276
+ """
277
+ grid = {}
278
+
279
+ for param_name, (min_val, max_val, n_values) in param_ranges.items():
280
+ if n_values == 1:
281
+ grid[param_name] = [min_val]
282
+ else:
283
+ step = (max_val - min_val) / (n_values - 1)
284
+ grid[param_name] = [min_val + i * step for i in range(n_values)]
285
+
286
+ return grid
287
+
288
+
289
+ # Create alias
290
+ param_grid = generate_param_grid
291
+
292
+
293
+ def extract_best_params(search_results: Dict[str, Any]) -> Dict[str, Any]:
294
+ """
295
+ Extract best parameters from search results.
296
+
297
+ Alias: best_params()
298
+
299
+ Args:
300
+ search_results: Results from grid_search_cv or random_search_cv
301
+
302
+ Returns:
303
+ dict: Best parameters
304
+
305
+ Examples:
306
+ >>> from ilovetools.ml import best_params # Short alias
307
+ >>> results = {'best_params': {'alpha': 0.5}, 'best_score': 0.95}
308
+ >>> params = best_params(results)
309
+ >>> print(params)
310
+ {'alpha': 0.5}
311
+
312
+ >>> from ilovetools.ml import extract_best_params # Full name
313
+ >>> params = extract_best_params(results)
314
+
315
+ Notes:
316
+ - Simple extraction utility
317
+ - Works with any search method
318
+ - Returns clean parameter dict
319
+ - Use for model training
320
+ """
321
+ return search_results.get('best_params', {})
322
+
323
+
324
+ # Create alias
325
+ best_params = extract_best_params
326
+
327
+
328
+ def format_cv_results(search_results: Dict[str, Any], top_n: int = 5) -> List[Dict]:
329
+ """
330
+ Format CV results for easy viewing.
331
+
332
+ Alias: cv_results()
333
+
334
+ Args:
335
+ search_results: Results from search
336
+ top_n: Number of top results to return. Default: 5
337
+
338
+ Returns:
339
+ list: Formatted top results
340
+
341
+ Examples:
342
+ >>> from ilovetools.ml import cv_results # Short alias
343
+ >>> results = {
344
+ ... 'all_results': [
345
+ ... {'params': {'a': 1}, 'mean_score': 0.8},
346
+ ... {'params': {'a': 2}, 'mean_score': 0.9}
347
+ ... ]
348
+ ... }
349
+ >>> top = cv_results(results, top_n=2)
350
+
351
+ >>> from ilovetools.ml import format_cv_results # Full name
352
+ >>> top = format_cv_results(results)
353
+
354
+ Notes:
355
+ - Shows top performing combinations
356
+ - Sorted by score
357
+ - Easy comparison
358
+ - Use for analysis
359
+ """
360
+ all_results = search_results.get('all_results', [])
361
+ sorted_results = sorted(all_results, key=lambda x: x['mean_score'], reverse=True)
362
+
363
+ return sorted_results[:top_n]
364
+
365
+
366
+ # Create alias
367
+ cv_results = format_cv_results
368
+
369
+
370
+ def learning_curve_data(
371
+ X: List,
372
+ y: List,
373
+ model_func: Callable,
374
+ metric_func: Callable,
375
+ train_sizes: List[float] = None
376
+ ) -> Dict[str, List]:
377
+ """
378
+ Generate learning curve data.
379
+
380
+ Alias: learning_curve()
381
+
382
+ Shows how model performance changes with training set size.
383
+
384
+ Args:
385
+ X: Feature data
386
+ y: Target data
387
+ model_func: Function(X_train, y_train, X_val) -> predictions
388
+ metric_func: Function(y_true, y_pred) -> score
389
+ train_sizes: List of training set proportions. Default: [0.2, 0.4, 0.6, 0.8, 1.0]
390
+
391
+ Returns:
392
+ dict: Training sizes, train scores, validation scores
393
+
394
+ Examples:
395
+ >>> from ilovetools.ml import learning_curve # Short alias
396
+ >>> X = list(range(20))
397
+ >>> y = [i * 2 for i in X]
398
+ >>>
399
+ >>> def model(X_tr, y_tr, X_val):
400
+ ... avg = sum(y_tr) / len(y_tr)
401
+ ... return [avg] * len(X_val)
402
+ >>>
403
+ >>> def metric(y_true, y_pred):
404
+ ... return -sum(abs(t - p) for t, p in zip(y_true, y_pred)) / len(y_true)
405
+ >>>
406
+ >>> curve = learning_curve(X, y, model, metric)
407
+
408
+ >>> from ilovetools.ml import learning_curve_data # Full name
409
+ >>> curve = learning_curve_data(X, y, model, metric)
410
+
411
+ Notes:
412
+ - Diagnose overfitting/underfitting
413
+ - Shows if more data helps
414
+ - Plot train vs val scores
415
+ - Use for model selection
416
+ """
417
+ if train_sizes is None:
418
+ train_sizes = [0.2, 0.4, 0.6, 0.8, 1.0]
419
+
420
+ from .cross_validation import holdout_validation_split
421
+
422
+ train_scores = []
423
+ val_scores = []
424
+
425
+ for size in train_sizes:
426
+ # Split data
427
+ X_train, X_val, y_train, y_val = holdout_validation_split(
428
+ X, y, test_size=1-size
429
+ )
430
+
431
+ # Train and evaluate
432
+ y_train_pred = model_func(X_train, y_train, X_train)
433
+ y_val_pred = model_func(X_train, y_train, X_val)
434
+
435
+ train_score = metric_func(y_train, y_train_pred)
436
+ val_score = metric_func(y_val, y_val_pred)
437
+
438
+ train_scores.append(train_score)
439
+ val_scores.append(val_score)
440
+
441
+ return {
442
+ 'train_sizes': train_sizes,
443
+ 'train_scores': train_scores,
444
+ 'val_scores': val_scores
445
+ }
446
+
447
+
448
+ # Create alias
449
+ learning_curve = learning_curve_data
450
+
451
+
452
+ def validation_curve_data(
453
+ X: List,
454
+ y: List,
455
+ model_func: Callable,
456
+ metric_func: Callable,
457
+ param_name: str,
458
+ param_range: List
459
+ ) -> Dict[str, List]:
460
+ """
461
+ Generate validation curve data.
462
+
463
+ Alias: val_curve()
464
+
465
+ Shows how model performance changes with a hyperparameter.
466
+
467
+ Args:
468
+ X: Feature data
469
+ y: Target data
470
+ model_func: Function(param_value, X_train, y_train, X_val) -> predictions
471
+ metric_func: Function(y_true, y_pred) -> score
472
+ param_name: Name of parameter to vary
473
+ param_range: List of parameter values to try
474
+
475
+ Returns:
476
+ dict: Parameter values, train scores, validation scores
477
+
478
+ Examples:
479
+ >>> from ilovetools.ml import val_curve # Short alias
480
+ >>> X = list(range(10))
481
+ >>> y = [i * 2 for i in X]
482
+ >>>
483
+ >>> def model(param_val, X_tr, y_tr, X_val):
484
+ ... avg = sum(y_tr) / len(y_tr)
485
+ ... return [avg * param_val] * len(X_val)
486
+ >>>
487
+ >>> def metric(y_true, y_pred):
488
+ ... return -sum(abs(t - p) for t, p in zip(y_true, y_pred)) / len(y_true)
489
+ >>>
490
+ >>> curve = val_curve(X, y, model, metric, 'alpha', [0.5, 1.0, 1.5])
491
+
492
+ >>> from ilovetools.ml import validation_curve_data # Full name
493
+ >>> curve = validation_curve_data(X, y, model, metric, 'alpha', [0.5, 1.0])
494
+
495
+ Notes:
496
+ - Visualize hyperparameter impact
497
+ - Find optimal parameter value
498
+ - Detect overfitting
499
+ - Use for tuning guidance
500
+ """
501
+ from .cross_validation import holdout_validation_split
502
+
503
+ train_scores = []
504
+ val_scores = []
505
+
506
+ for param_value in param_range:
507
+ # Split data
508
+ X_train, X_val, y_train, y_val = holdout_validation_split(X, y)
509
+
510
+ # Train and evaluate
511
+ y_train_pred = model_func(param_value, X_train, y_train, X_train)
512
+ y_val_pred = model_func(param_value, X_train, y_train, X_val)
513
+
514
+ train_score = metric_func(y_train, y_train_pred)
515
+ val_score = metric_func(y_val, y_val_pred)
516
+
517
+ train_scores.append(train_score)
518
+ val_scores.append(val_score)
519
+
520
+ return {
521
+ 'param_name': param_name,
522
+ 'param_range': param_range,
523
+ 'train_scores': train_scores,
524
+ 'val_scores': val_scores
525
+ }
526
+
527
+
528
+ # Create alias
529
+ val_curve = validation_curve_data
530
+
531
+
532
+ def early_stopping_monitor(
533
+ scores: List[float],
534
+ patience: int = 5,
535
+ min_delta: float = 0.001
536
+ ) -> bool:
537
+ """
538
+ Monitor for early stopping.
539
+
540
+ Alias: early_stop()
541
+
542
+ Stops training if no improvement for patience epochs.
543
+
544
+ Args:
545
+ scores: List of validation scores (higher is better)
546
+ patience: Number of epochs to wait. Default: 5
547
+ min_delta: Minimum improvement threshold. Default: 0.001
548
+
549
+ Returns:
550
+ bool: True if should stop, False otherwise
551
+
552
+ Examples:
553
+ >>> from ilovetools.ml import early_stop # Short alias
554
+ >>> scores = [0.7, 0.75, 0.78, 0.78, 0.78, 0.78]
555
+ >>> should_stop = early_stop(scores, patience=3)
556
+ >>> print(should_stop)
557
+ True
558
+
559
+ >>> from ilovetools.ml import early_stopping_monitor # Full name
560
+ >>> should_stop = early_stopping_monitor(scores, patience=5)
561
+
562
+ Notes:
563
+ - Prevents overfitting
564
+ - Saves training time
565
+ - Common in neural networks
566
+ - Adjust patience for stability
567
+ """
568
+ if len(scores) < patience + 1:
569
+ return False
570
+
571
+ best_score = max(scores[:-patience])
572
+ recent_scores = scores[-patience:]
573
+
574
+ # Check if any recent score improved
575
+ for score in recent_scores:
576
+ if score > best_score + min_delta:
577
+ return False
578
+
579
+ return True
580
+
581
+
582
+ # Create alias
583
+ early_stop = early_stopping_monitor
584
+
585
+
586
+ def compare_models_cv(
587
+ X: List,
588
+ y: List,
589
+ models: Dict[str, Callable],
590
+ metric_func: Callable,
591
+ cv_splits: int = 5
592
+ ) -> Dict[str, Dict]:
593
+ """
594
+ Compare multiple models using cross-validation.
595
+
596
+ Alias: compare_models()
597
+
598
+ Args:
599
+ X: Feature data
600
+ y: Target data
601
+ models: Dict of model_name: model_func
602
+ metric_func: Function(y_true, y_pred) -> score
603
+ cv_splits: Number of CV folds. Default: 5
604
+
605
+ Returns:
606
+ dict: Results for each model
607
+
608
+ Examples:
609
+ >>> from ilovetools.ml import compare_models # Short alias
610
+ >>> X = [[1], [2], [3], [4], [5]]
611
+ >>> y = [1, 2, 3, 4, 5]
612
+ >>>
613
+ >>> def model1(X_tr, y_tr, X_val):
614
+ ... avg = sum(y_tr) / len(y_tr)
615
+ ... return [avg] * len(X_val)
616
+ >>>
617
+ >>> def model2(X_tr, y_tr, X_val):
618
+ ... avg = sum(y_tr) / len(y_tr)
619
+ ... return [avg + 0.5] * len(X_val)
620
+ >>>
621
+ >>> def metric(y_true, y_pred):
622
+ ... return -sum(abs(t - p) for t, p in zip(y_true, y_pred)) / len(y_true)
623
+ >>>
624
+ >>> models = {'Model1': model1, 'Model2': model2}
625
+ >>> results = compare_models(X, y, models, metric)
626
+
627
+ >>> from ilovetools.ml import compare_models_cv # Full name
628
+ >>> results = compare_models_cv(X, y, models, metric)
629
+
630
+ Notes:
631
+ - Compare multiple algorithms
632
+ - Fair comparison with same CV splits
633
+ - Returns mean and std for each
634
+ - Use for model selection
635
+ """
636
+ from .cross_validation import k_fold_cross_validation
637
+
638
+ splits = k_fold_cross_validation(X, y, k=cv_splits)
639
+ results = {}
640
+
641
+ for model_name, model_func in models.items():
642
+ scores = []
643
+
644
+ for train_idx, val_idx in splits:
645
+ X_train = [X[i] for i in train_idx]
646
+ y_train = [y[i] for i in train_idx]
647
+ X_val = [X[i] for i in val_idx]
648
+ y_val = [y[i] for i in val_idx]
649
+
650
+ y_pred = model_func(X_train, y_train, X_val)
651
+ score = metric_func(y_val, y_pred)
652
+ scores.append(score)
653
+
654
+ results[model_name] = {
655
+ 'mean_score': sum(scores) / len(scores),
656
+ 'std_score': (sum((s - sum(scores)/len(scores))**2 for s in scores) / len(scores)) ** 0.5,
657
+ 'scores': scores
658
+ }
659
+
660
+ return results
661
+
662
+
663
+ # Create alias
664
+ compare_models = compare_models_cv
665
+
666
+
667
+ def bayesian_search_simple(
668
+ X: List,
669
+ y: List,
670
+ model_func: Callable,
671
+ param_bounds: Dict[str, Tuple[float, float]],
672
+ metric_func: Callable,
673
+ n_iter: int = 10,
674
+ cv_splits: int = 5
675
+ ) -> Dict[str, Any]:
676
+ """
677
+ Simple Bayesian optimization for hyperparameter tuning.
678
+
679
+ Alias: bayesopt()
680
+
681
+ Uses past results to guide search. More efficient than random search.
682
+
683
+ Args:
684
+ X: Feature data
685
+ y: Target data
686
+ model_func: Function(params, X_train, y_train, X_val) -> predictions
687
+ param_bounds: Dict of (min, max) tuples
688
+ metric_func: Function(y_true, y_pred) -> score
689
+ n_iter: Number of iterations. Default: 10
690
+ cv_splits: Number of CV folds. Default: 5
691
+
692
+ Returns:
693
+ dict: Best parameters, best score, all results
694
+
695
+ Examples:
696
+ >>> from ilovetools.ml import bayesopt # Short alias
697
+ >>> X = [[1], [2], [3], [4], [5]]
698
+ >>> y = [1, 2, 3, 4, 5]
699
+ >>>
700
+ >>> def model(params, X_tr, y_tr, X_val):
701
+ ... alpha = params['alpha']
702
+ ... avg = sum(y_tr) / len(y_tr)
703
+ ... return [avg * alpha] * len(X_val)
704
+ >>>
705
+ >>> def metric(y_true, y_pred):
706
+ ... return -sum(abs(t - p) for t, p in zip(y_true, y_pred)) / len(y_true)
707
+ >>>
708
+ >>> bounds = {'alpha': (0.5, 1.5)}
709
+ >>> results = bayesopt(X, y, model, bounds, metric, n_iter=5)
710
+
711
+ >>> from ilovetools.ml import bayesian_search_simple # Full name
712
+ >>> results = bayesian_search_simple(X, y, model, bounds, metric)
713
+
714
+ Notes:
715
+ - More efficient than random search
716
+ - Learns from past evaluations
717
+ - Good for expensive models
718
+ - Simplified implementation
719
+ """
720
+ from .cross_validation import k_fold_cross_validation
721
+
722
+ results = []
723
+
724
+ # Initial random samples
725
+ n_random = min(3, n_iter)
726
+
727
+ for i in range(n_iter):
728
+ if i < n_random:
729
+ # Random sampling initially
730
+ params = {
731
+ name: random.uniform(bounds[0], bounds[1])
732
+ for name, bounds in param_bounds.items()
733
+ }
734
+ else:
735
+ # Exploit best region
736
+ best_params = max(results, key=lambda x: x['mean_score'])['params']
737
+ params = {
738
+ name: best_params[name] + random.uniform(-0.1, 0.1) * (bounds[1] - bounds[0])
739
+ for name, bounds in param_bounds.items()
740
+ }
741
+ # Clip to bounds
742
+ params = {
743
+ name: max(param_bounds[name][0], min(param_bounds[name][1], val))
744
+ for name, val in params.items()
745
+ }
746
+
747
+ # Evaluate
748
+ splits = k_fold_cross_validation(X, y, k=cv_splits)
749
+ scores = []
750
+
751
+ for train_idx, val_idx in splits:
752
+ X_train = [X[i] for i in train_idx]
753
+ y_train = [y[i] for i in train_idx]
754
+ X_val = [X[i] for i in val_idx]
755
+ y_val = [y[i] for i in val_idx]
756
+
757
+ y_pred = model_func(params, X_train, y_train, X_val)
758
+ score = metric_func(y_val, y_pred)
759
+ scores.append(score)
760
+
761
+ mean_score = sum(scores) / len(scores)
762
+
763
+ results.append({
764
+ 'params': params,
765
+ 'mean_score': mean_score,
766
+ 'scores': scores
767
+ })
768
+
769
+ # Find best
770
+ best_result = max(results, key=lambda x: x['mean_score'])
771
+
772
+ return {
773
+ 'best_params': best_result['params'],
774
+ 'best_score': best_result['mean_score'],
775
+ 'all_results': results,
776
+ 'n_iterations': n_iter
777
+ }
778
+
779
+
780
+ # Create alias
781
+ bayesopt = bayesian_search_simple