npcpy 1.2.36__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
npcpy/ml_funcs.py ADDED
@@ -0,0 +1,746 @@
1
+ """
2
+ ml_funcs.py - NumPy-like interface for ML model operations
3
+
4
+ Parallels llm_funcs but for traditional ML:
5
+ - sklearn models
6
+ - PyTorch models
7
+ - Time series models
8
+ - Ensemble operations
9
+
10
+ Same interface pattern as llm_funcs:
11
+ - Single call does single operation
12
+ - matrix parameter enables cartesian product
13
+ - n_samples enables multiple samples
14
+ """
15
+
16
+ from __future__ import annotations
17
+ import copy
18
+ import itertools
19
+ import pickle
20
+ from concurrent.futures import ThreadPoolExecutor, as_completed
21
+ from dataclasses import dataclass, field
22
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
23
+ import numpy as np
24
+
25
+ # Lazy imports for optional dependencies
26
+ _sklearn_available = False
27
+ _torch_available = False
28
+ _xgboost_available = False
29
+ _statsmodels_available = False
30
+
31
+ try:
32
+ import sklearn
33
+ from sklearn.base import clone, BaseEstimator
34
+ _sklearn_available = True
35
+ except ImportError:
36
+ pass
37
+
38
+ try:
39
+ import torch
40
+ import torch.nn as nn
41
+ _torch_available = True
42
+ except ImportError:
43
+ pass
44
+
45
+ try:
46
+ import xgboost as xgb
47
+ _xgboost_available = True
48
+ except ImportError:
49
+ pass
50
+
51
+ try:
52
+ import statsmodels.api as sm
53
+ _statsmodels_available = True
54
+ except ImportError:
55
+ pass
56
+
57
+
58
+ # ==================== Model Registry ====================
59
+
60
+ SKLEARN_MODELS = {
61
+ # Classification
62
+ 'LogisticRegression': 'sklearn.linear_model.LogisticRegression',
63
+ 'RandomForestClassifier': 'sklearn.ensemble.RandomForestClassifier',
64
+ 'GradientBoostingClassifier': 'sklearn.ensemble.GradientBoostingClassifier',
65
+ 'SVC': 'sklearn.svm.SVC',
66
+ 'KNeighborsClassifier': 'sklearn.neighbors.KNeighborsClassifier',
67
+ 'DecisionTreeClassifier': 'sklearn.tree.DecisionTreeClassifier',
68
+ 'AdaBoostClassifier': 'sklearn.ensemble.AdaBoostClassifier',
69
+ 'GaussianNB': 'sklearn.naive_bayes.GaussianNB',
70
+ 'MLPClassifier': 'sklearn.neural_network.MLPClassifier',
71
+
72
+ # Regression
73
+ 'LinearRegression': 'sklearn.linear_model.LinearRegression',
74
+ 'Ridge': 'sklearn.linear_model.Ridge',
75
+ 'Lasso': 'sklearn.linear_model.Lasso',
76
+ 'ElasticNet': 'sklearn.linear_model.ElasticNet',
77
+ 'RandomForestRegressor': 'sklearn.ensemble.RandomForestRegressor',
78
+ 'GradientBoostingRegressor': 'sklearn.ensemble.GradientBoostingRegressor',
79
+ 'SVR': 'sklearn.svm.SVR',
80
+ 'KNeighborsRegressor': 'sklearn.neighbors.KNeighborsRegressor',
81
+ 'DecisionTreeRegressor': 'sklearn.tree.DecisionTreeRegressor',
82
+ 'MLPRegressor': 'sklearn.neural_network.MLPRegressor',
83
+
84
+ # Clustering
85
+ 'KMeans': 'sklearn.cluster.KMeans',
86
+ 'DBSCAN': 'sklearn.cluster.DBSCAN',
87
+ 'AgglomerativeClustering': 'sklearn.cluster.AgglomerativeClustering',
88
+
89
+ # Dimensionality Reduction
90
+ 'PCA': 'sklearn.decomposition.PCA',
91
+ 'TSNE': 'sklearn.manifold.TSNE',
92
+ 'UMAP': 'umap.UMAP',
93
+ }
94
+
95
+
96
+ def _import_model_class(model_path: str):
97
+ """Dynamically import a model class from path"""
98
+ parts = model_path.rsplit('.', 1)
99
+ if len(parts) == 2:
100
+ module_path, class_name = parts
101
+ import importlib
102
+ module = importlib.import_module(module_path)
103
+ return getattr(module, class_name)
104
+ raise ValueError(f"Invalid model path: {model_path}")
105
+
106
+
107
+ def _get_model_instance(model_name: str, **kwargs):
108
+ """Get model instance from name"""
109
+ if model_name in SKLEARN_MODELS:
110
+ model_class = _import_model_class(SKLEARN_MODELS[model_name])
111
+ return model_class(**kwargs)
112
+ elif _xgboost_available and model_name.lower().startswith('xgb'):
113
+ if 'classifier' in model_name.lower():
114
+ return xgb.XGBClassifier(**kwargs)
115
+ else:
116
+ return xgb.XGBRegressor(**kwargs)
117
+ else:
118
+ raise ValueError(f"Unknown model: {model_name}")
119
+
120
+
121
+ # ==================== Core ML Functions ====================
122
+
123
+ def fit_model(
124
+ X: Any,
125
+ y: Any = None,
126
+ model: Union[str, Any] = "RandomForestClassifier",
127
+ n_samples: int = 1,
128
+ matrix: Optional[Dict[str, List[Any]]] = None,
129
+ parallel: bool = True,
130
+ **kwargs
131
+ ) -> Dict[str, Any]:
132
+ """
133
+ Fit ML model(s) to data.
134
+
135
+ Similar interface to get_llm_response but for model fitting.
136
+
137
+ Args:
138
+ X: Training features
139
+ y: Training targets (optional for unsupervised)
140
+ model: Model name, class, or instance
141
+ n_samples: Number of models to fit (with different random seeds)
142
+ matrix: Dict of param -> list for grid search
143
+ parallel: Whether to parallelize fitting
144
+ **kwargs: Model hyperparameters
145
+
146
+ Returns:
147
+ Dict with:
148
+ - 'model': Fitted model(s)
149
+ - 'models': List of all fitted models (if multiple)
150
+ - 'scores': Training scores if available
151
+ """
152
+ if not _sklearn_available:
153
+ raise ImportError("sklearn required. Install with: pip install scikit-learn")
154
+
155
+ def _fit_single(model_instance, X, y, seed=None):
156
+ if seed is not None and hasattr(model_instance, 'random_state'):
157
+ model_instance.random_state = seed
158
+ model_instance.fit(X, y)
159
+ score = None
160
+ if hasattr(model_instance, 'score') and y is not None:
161
+ try:
162
+ score = model_instance.score(X, y)
163
+ except:
164
+ pass
165
+ return {'model': model_instance, 'score': score}
166
+
167
+ # Handle matrix (grid search)
168
+ use_matrix = matrix is not None and len(matrix) > 0
169
+ multi_sample = n_samples and n_samples > 1
170
+
171
+ if not use_matrix and not multi_sample:
172
+ # Single fit
173
+ if isinstance(model, str):
174
+ model_instance = _get_model_instance(model, **kwargs)
175
+ elif hasattr(model, 'fit'):
176
+ model_instance = clone(model) if _sklearn_available else copy.deepcopy(model)
177
+ else:
178
+ raise ValueError(f"Invalid model: {model}")
179
+
180
+ result = _fit_single(model_instance, X, y)
181
+ return {
182
+ 'model': result['model'],
183
+ 'models': [result['model']],
184
+ 'scores': [result['score']] if result['score'] is not None else None
185
+ }
186
+
187
+ # Build all combinations
188
+ combos = []
189
+ if use_matrix:
190
+ keys = list(matrix.keys())
191
+ values = [matrix[k] if isinstance(matrix[k], list) else [matrix[k]] for k in keys]
192
+ for combo_values in itertools.product(*values):
193
+ combo = dict(zip(keys, combo_values))
194
+ combos.append(combo)
195
+ else:
196
+ combos = [{}]
197
+
198
+ # Add sampling
199
+ all_tasks = []
200
+ for combo in combos:
201
+ for sample_idx in range(max(1, n_samples)):
202
+ all_tasks.append((combo, sample_idx))
203
+
204
+ # Execute fits
205
+ results = []
206
+ if parallel and len(all_tasks) > 1:
207
+ with ThreadPoolExecutor(max_workers=min(8, len(all_tasks))) as executor:
208
+ futures = {}
209
+ for combo, sample_idx in all_tasks:
210
+ merged_kwargs = {**kwargs, **combo}
211
+ if isinstance(model, str):
212
+ model_instance = _get_model_instance(model, **merged_kwargs)
213
+ else:
214
+ model_instance = clone(model)
215
+ for k, v in merged_kwargs.items():
216
+ if hasattr(model_instance, k):
217
+ setattr(model_instance, k, v)
218
+
219
+ future = executor.submit(_fit_single, model_instance, X, y, sample_idx)
220
+ futures[future] = (combo, sample_idx)
221
+
222
+ for future in as_completed(futures):
223
+ combo, sample_idx = futures[future]
224
+ try:
225
+ result = future.result()
226
+ result['params'] = combo
227
+ result['sample_index'] = sample_idx
228
+ results.append(result)
229
+ except Exception as e:
230
+ results.append({'error': str(e), 'params': combo, 'sample_index': sample_idx})
231
+ else:
232
+ for combo, sample_idx in all_tasks:
233
+ merged_kwargs = {**kwargs, **combo}
234
+ if isinstance(model, str):
235
+ model_instance = _get_model_instance(model, **merged_kwargs)
236
+ else:
237
+ model_instance = clone(model)
238
+ for k, v in merged_kwargs.items():
239
+ if hasattr(model_instance, k):
240
+ setattr(model_instance, k, v)
241
+
242
+ try:
243
+ result = _fit_single(model_instance, X, y, sample_idx)
244
+ result['params'] = combo
245
+ result['sample_index'] = sample_idx
246
+ results.append(result)
247
+ except Exception as e:
248
+ results.append({'error': str(e), 'params': combo, 'sample_index': sample_idx})
249
+
250
+ # Aggregate
251
+ models = [r['model'] for r in results if 'model' in r]
252
+ scores = [r['score'] for r in results if 'score' in r and r['score'] is not None]
253
+
254
+ return {
255
+ 'model': models[0] if models else None,
256
+ 'models': models,
257
+ 'scores': scores if scores else None,
258
+ 'results': results
259
+ }
260
+
261
+
262
+ def predict_model(
263
+ X: Any,
264
+ model: Any,
265
+ n_samples: int = 1,
266
+ matrix: Optional[Dict[str, List[Any]]] = None,
267
+ parallel: bool = True,
268
+ method: str = "predict",
269
+ **kwargs
270
+ ) -> Dict[str, Any]:
271
+ """
272
+ Make predictions with ML model(s).
273
+
274
+ Args:
275
+ X: Input features
276
+ model: Fitted model or list of models
277
+ n_samples: Number of prediction samples (for probabilistic models)
278
+ matrix: Not typically used for prediction
279
+ parallel: Whether to parallelize
280
+ method: 'predict', 'predict_proba', 'transform'
281
+ **kwargs: Additional prediction params
282
+
283
+ Returns:
284
+ Dict with:
285
+ - 'predictions': Predictions from first/main model
286
+ - 'all_predictions': All predictions (if multiple models)
287
+ """
288
+ models = model if isinstance(model, list) else [model]
289
+
290
+ def _predict_single(m, method_name):
291
+ if hasattr(m, method_name):
292
+ pred_fn = getattr(m, method_name)
293
+ return pred_fn(X, **kwargs)
294
+ elif method_name == "predict_proba" and hasattr(m, "predict"):
295
+ return m.predict(X, **kwargs)
296
+ else:
297
+ raise ValueError(f"Model has no {method_name} method")
298
+
299
+ results = []
300
+ if parallel and len(models) > 1:
301
+ with ThreadPoolExecutor(max_workers=min(8, len(models))) as executor:
302
+ futures = {executor.submit(_predict_single, m, method): i for i, m in enumerate(models)}
303
+ for future in as_completed(futures):
304
+ idx = futures[future]
305
+ try:
306
+ pred = future.result()
307
+ results.append((idx, pred))
308
+ except Exception as e:
309
+ results.append((idx, f"Error: {e}"))
310
+
311
+ results.sort(key=lambda x: x[0])
312
+ predictions = [r[1] for r in results]
313
+ else:
314
+ predictions = [_predict_single(m, method) for m in models]
315
+
316
+ return {
317
+ 'predictions': predictions[0] if predictions else None,
318
+ 'all_predictions': predictions
319
+ }
320
+
321
+
322
+ def score_model(
323
+ X: Any,
324
+ y: Any,
325
+ model: Any,
326
+ metrics: List[str] = None,
327
+ parallel: bool = True
328
+ ) -> Dict[str, Any]:
329
+ """
330
+ Score model(s) on test data.
331
+
332
+ Args:
333
+ X: Test features
334
+ y: Test targets
335
+ model: Fitted model or list of models
336
+ metrics: List of metric names ('accuracy', 'f1', 'mse', 'r2', etc.)
337
+ parallel: Whether to parallelize
338
+
339
+ Returns:
340
+ Dict with scores for each metric
341
+ """
342
+ from sklearn.metrics import (
343
+ accuracy_score, f1_score, precision_score, recall_score,
344
+ mean_squared_error, mean_absolute_error, r2_score
345
+ )
346
+
347
+ metric_funcs = {
348
+ 'accuracy': accuracy_score,
349
+ 'f1': lambda y_true, y_pred: f1_score(y_true, y_pred, average='weighted'),
350
+ 'precision': lambda y_true, y_pred: precision_score(y_true, y_pred, average='weighted'),
351
+ 'recall': lambda y_true, y_pred: recall_score(y_true, y_pred, average='weighted'),
352
+ 'mse': mean_squared_error,
353
+ 'mae': mean_absolute_error,
354
+ 'r2': r2_score,
355
+ }
356
+
357
+ if metrics is None:
358
+ metrics = ['accuracy']
359
+
360
+ models = model if isinstance(model, list) else [model]
361
+
362
+ all_scores = []
363
+ for m in models:
364
+ preds = m.predict(X)
365
+ model_scores = {}
366
+ for metric_name in metrics:
367
+ if metric_name in metric_funcs:
368
+ try:
369
+ model_scores[metric_name] = metric_funcs[metric_name](y, preds)
370
+ except:
371
+ model_scores[metric_name] = None
372
+ all_scores.append(model_scores)
373
+
374
+ return {
375
+ 'scores': all_scores[0] if len(all_scores) == 1 else all_scores,
376
+ 'all_scores': all_scores
377
+ }
378
+
379
+
380
+ # ==================== PyTorch Functions ====================
381
+
382
+ def fit_torch(
383
+ model: Any,
384
+ train_loader: Any,
385
+ epochs: int = 10,
386
+ optimizer: str = "Adam",
387
+ lr: float = 0.001,
388
+ criterion: str = "CrossEntropyLoss",
389
+ device: str = "cpu",
390
+ val_loader: Any = None,
391
+ **kwargs
392
+ ) -> Dict[str, Any]:
393
+ """
394
+ Train PyTorch model.
395
+
396
+ Args:
397
+ model: nn.Module instance
398
+ train_loader: DataLoader for training
399
+ epochs: Number of training epochs
400
+ optimizer: Optimizer name
401
+ lr: Learning rate
402
+ criterion: Loss function name
403
+ device: Device to train on
404
+ val_loader: Optional validation DataLoader
405
+
406
+ Returns:
407
+ Dict with trained model and training history
408
+ """
409
+ if not _torch_available:
410
+ raise ImportError("PyTorch required. Install with: pip install torch")
411
+
412
+ model = model.to(device)
413
+
414
+ # Get optimizer
415
+ opt_class = getattr(torch.optim, optimizer)
416
+ opt = opt_class(model.parameters(), lr=lr)
417
+
418
+ # Get criterion
419
+ crit_class = getattr(nn, criterion)
420
+ crit = crit_class()
421
+
422
+ history = {'train_loss': [], 'val_loss': []}
423
+
424
+ for epoch in range(epochs):
425
+ model.train()
426
+ epoch_loss = 0.0
427
+
428
+ for batch in train_loader:
429
+ if isinstance(batch, (list, tuple)):
430
+ inputs, targets = batch[0].to(device), batch[1].to(device)
431
+ else:
432
+ inputs = batch.to(device)
433
+ targets = None
434
+
435
+ opt.zero_grad()
436
+ outputs = model(inputs)
437
+
438
+ if targets is not None:
439
+ loss = crit(outputs, targets)
440
+ else:
441
+ loss = outputs # Assume model returns loss
442
+
443
+ loss.backward()
444
+ opt.step()
445
+ epoch_loss += loss.item()
446
+
447
+ history['train_loss'].append(epoch_loss / len(train_loader))
448
+
449
+ # Validation
450
+ if val_loader is not None:
451
+ model.eval()
452
+ val_loss = 0.0
453
+ with torch.no_grad():
454
+ for batch in val_loader:
455
+ if isinstance(batch, (list, tuple)):
456
+ inputs, targets = batch[0].to(device), batch[1].to(device)
457
+ else:
458
+ inputs = batch.to(device)
459
+ targets = None
460
+
461
+ outputs = model(inputs)
462
+ if targets is not None:
463
+ loss = crit(outputs, targets)
464
+ val_loss += loss.item()
465
+
466
+ history['val_loss'].append(val_loss / len(val_loader))
467
+
468
+ return {
469
+ 'model': model,
470
+ 'history': history,
471
+ 'final_train_loss': history['train_loss'][-1] if history['train_loss'] else None,
472
+ 'final_val_loss': history['val_loss'][-1] if history['val_loss'] else None
473
+ }
474
+
475
+
476
+ def forward_torch(
477
+ model: Any,
478
+ inputs: Any,
479
+ device: str = "cpu",
480
+ grad: bool = False
481
+ ) -> Dict[str, Any]:
482
+ """
483
+ Run forward pass on PyTorch model.
484
+
485
+ Args:
486
+ model: nn.Module instance
487
+ inputs: Input tensor or batch
488
+ device: Device to run on
489
+ grad: Whether to compute gradients
490
+
491
+ Returns:
492
+ Dict with outputs
493
+ """
494
+ if not _torch_available:
495
+ raise ImportError("PyTorch required. Install with: pip install torch")
496
+
497
+ model = model.to(device)
498
+ model.eval()
499
+
500
+ if hasattr(inputs, 'to'):
501
+ inputs = inputs.to(device)
502
+
503
+ if grad:
504
+ outputs = model(inputs)
505
+ else:
506
+ with torch.no_grad():
507
+ outputs = model(inputs)
508
+
509
+ return {
510
+ 'outputs': outputs,
511
+ 'output_numpy': outputs.cpu().numpy() if hasattr(outputs, 'cpu') else outputs
512
+ }
513
+
514
+
515
+ # ==================== Time Series Functions ====================
516
+
517
+ def fit_timeseries(
518
+ series: Any,
519
+ method: str = "arima",
520
+ order: Tuple[int, int, int] = (1, 1, 1),
521
+ seasonal_order: Tuple[int, int, int, int] = None,
522
+ **kwargs
523
+ ) -> Dict[str, Any]:
524
+ """
525
+ Fit time series model.
526
+
527
+ Args:
528
+ series: Time series data (array-like)
529
+ method: 'arima', 'sarima', 'exp_smoothing', 'prophet'
530
+ order: ARIMA order (p, d, q)
531
+ seasonal_order: Seasonal order (P, D, Q, s) for SARIMA
532
+ **kwargs: Additional model params
533
+
534
+ Returns:
535
+ Dict with fitted model and diagnostics
536
+ """
537
+ if method.lower() in ('arima', 'sarima'):
538
+ if not _statsmodels_available:
539
+ raise ImportError("statsmodels required. Install with: pip install statsmodels")
540
+
541
+ from statsmodels.tsa.arima.model import ARIMA
542
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
543
+
544
+ if method.lower() == 'sarima' and seasonal_order:
545
+ model = SARIMAX(series, order=order, seasonal_order=seasonal_order, **kwargs)
546
+ else:
547
+ model = ARIMA(series, order=order, **kwargs)
548
+
549
+ fitted = model.fit()
550
+
551
+ return {
552
+ 'model': fitted,
553
+ 'aic': fitted.aic,
554
+ 'bic': fitted.bic,
555
+ 'summary': str(fitted.summary())
556
+ }
557
+
558
+ elif method.lower() == 'exp_smoothing':
559
+ from statsmodels.tsa.holtwinters import ExponentialSmoothing
560
+
561
+ model = ExponentialSmoothing(series, **kwargs)
562
+ fitted = model.fit()
563
+
564
+ return {
565
+ 'model': fitted,
566
+ 'aic': fitted.aic,
567
+ 'sse': fitted.sse
568
+ }
569
+
570
+ else:
571
+ raise ValueError(f"Unknown time series method: {method}")
572
+
573
+
574
+ def forecast_timeseries(
575
+ model: Any,
576
+ horizon: int,
577
+ **kwargs
578
+ ) -> Dict[str, Any]:
579
+ """
580
+ Generate forecasts from fitted time series model.
581
+
582
+ Args:
583
+ model: Fitted time series model
584
+ horizon: Number of periods to forecast
585
+ **kwargs: Additional forecast params
586
+
587
+ Returns:
588
+ Dict with forecasts and confidence intervals
589
+ """
590
+ if hasattr(model, 'forecast'):
591
+ forecast = model.forecast(steps=horizon, **kwargs)
592
+ elif hasattr(model, 'predict'):
593
+ forecast = model.predict(start=len(model.data.endog), end=len(model.data.endog) + horizon - 1)
594
+ else:
595
+ raise ValueError("Model has no forecast or predict method")
596
+
597
+ result = {'forecast': forecast}
598
+
599
+ # Try to get confidence intervals
600
+ if hasattr(model, 'get_forecast'):
601
+ fc = model.get_forecast(steps=horizon)
602
+ result['conf_int'] = fc.conf_int()
603
+ result['forecast_mean'] = fc.predicted_mean
604
+
605
+ return result
606
+
607
+
608
+ # ==================== Ensemble Functions ====================
609
+
610
+ def ensemble_predict(
611
+ X: Any,
612
+ models: List[Any],
613
+ method: str = "vote",
614
+ weights: List[float] = None
615
+ ) -> Dict[str, Any]:
616
+ """
617
+ Ensemble predictions from multiple models.
618
+
619
+ Args:
620
+ X: Input features
621
+ models: List of fitted models
622
+ method: 'vote', 'average', 'weighted', 'stack'
623
+ weights: Model weights for weighted averaging
624
+
625
+ Returns:
626
+ Dict with ensemble predictions
627
+ """
628
+ # Get individual predictions
629
+ all_preds = []
630
+ for m in models:
631
+ pred = m.predict(X)
632
+ all_preds.append(pred)
633
+
634
+ all_preds = np.array(all_preds)
635
+
636
+ if method == "vote":
637
+ # Majority voting (for classification)
638
+ from scipy import stats
639
+ ensemble_pred, _ = stats.mode(all_preds, axis=0)
640
+ ensemble_pred = ensemble_pred.flatten()
641
+
642
+ elif method == "average":
643
+ ensemble_pred = np.mean(all_preds, axis=0)
644
+
645
+ elif method == "weighted":
646
+ if weights is None:
647
+ weights = [1.0 / len(models)] * len(models)
648
+ weights = np.array(weights).reshape(-1, 1)
649
+ ensemble_pred = np.sum(all_preds * weights, axis=0)
650
+
651
+ else:
652
+ raise ValueError(f"Unknown ensemble method: {method}")
653
+
654
+ return {
655
+ 'predictions': ensemble_pred,
656
+ 'individual_predictions': all_preds,
657
+ 'method': method
658
+ }
659
+
660
+
661
+ def cross_validate(
662
+ X: Any,
663
+ y: Any,
664
+ model: Union[str, Any],
665
+ cv: int = 5,
666
+ metrics: List[str] = None,
667
+ parallel: bool = True,
668
+ **kwargs
669
+ ) -> Dict[str, Any]:
670
+ """
671
+ Cross-validate model.
672
+
673
+ Args:
674
+ X: Features
675
+ y: Targets
676
+ model: Model name or instance
677
+ cv: Number of folds
678
+ metrics: Metrics to compute
679
+ parallel: Parallelize folds
680
+ **kwargs: Model hyperparameters
681
+
682
+ Returns:
683
+ Dict with CV scores
684
+ """
685
+ from sklearn.model_selection import cross_val_score, KFold
686
+
687
+ if isinstance(model, str):
688
+ model_instance = _get_model_instance(model, **kwargs)
689
+ else:
690
+ model_instance = model
691
+
692
+ if metrics is None:
693
+ metrics = ['accuracy']
694
+
695
+ results = {}
696
+ for metric in metrics:
697
+ scoring = metric if metric in ['accuracy', 'f1', 'precision', 'recall', 'r2', 'neg_mean_squared_error'] else None
698
+ if scoring:
699
+ scores = cross_val_score(model_instance, X, y, cv=cv, scoring=scoring)
700
+ results[metric] = {
701
+ 'mean': np.mean(scores),
702
+ 'std': np.std(scores),
703
+ 'scores': scores.tolist()
704
+ }
705
+
706
+ return results
707
+
708
+
709
+ # ==================== Utility Functions ====================
710
+
711
+ def serialize_model(model: Any, path: str = None) -> bytes:
712
+ """Serialize model to bytes or file"""
713
+ data = pickle.dumps(model)
714
+ if path:
715
+ with open(path, 'wb') as f:
716
+ f.write(data)
717
+ return data
718
+
719
+
720
+ def deserialize_model(data: Union[bytes, str]) -> Any:
721
+ """Deserialize model from bytes or file path"""
722
+ if isinstance(data, str):
723
+ with open(data, 'rb') as f:
724
+ data = f.read()
725
+ return pickle.loads(data)
726
+
727
+
728
+ def get_model_params(model: Any) -> Dict[str, Any]:
729
+ """Get model hyperparameters"""
730
+ if hasattr(model, 'get_params'):
731
+ return model.get_params()
732
+ elif hasattr(model, 'state_dict'):
733
+ return {'type': 'torch', 'params': list(model.state_dict().keys())}
734
+ else:
735
+ return {}
736
+
737
+
738
+ def set_model_params(model: Any, params: Dict[str, Any]) -> Any:
739
+ """Set model hyperparameters"""
740
+ if hasattr(model, 'set_params'):
741
+ return model.set_params(**params)
742
+ else:
743
+ for k, v in params.items():
744
+ if hasattr(model, k):
745
+ setattr(model, k, v)
746
+ return model