npcpy 1.2.36__py3-none-any.whl → 1.2.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcpy/__init__.py +10 -2
- npcpy/gen/image_gen.py +5 -2
- npcpy/gen/response.py +262 -64
- npcpy/llm_funcs.py +478 -832
- npcpy/ml_funcs.py +746 -0
- npcpy/npc_array.py +1294 -0
- npcpy/npc_compiler.py +320 -257
- npcpy/npc_sysenv.py +17 -2
- npcpy/serve.py +162 -14
- npcpy/sql/npcsql.py +96 -59
- {npcpy-1.2.36.dist-info → npcpy-1.2.37.dist-info}/METADATA +173 -1
- {npcpy-1.2.36.dist-info → npcpy-1.2.37.dist-info}/RECORD +15 -13
- {npcpy-1.2.36.dist-info → npcpy-1.2.37.dist-info}/WHEEL +0 -0
- {npcpy-1.2.36.dist-info → npcpy-1.2.37.dist-info}/licenses/LICENSE +0 -0
- {npcpy-1.2.36.dist-info → npcpy-1.2.37.dist-info}/top_level.txt +0 -0
npcpy/ml_funcs.py
ADDED
|
@@ -0,0 +1,746 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ml_funcs.py - NumPy-like interface for ML model operations
|
|
3
|
+
|
|
4
|
+
Parallels llm_funcs but for traditional ML:
|
|
5
|
+
- sklearn models
|
|
6
|
+
- PyTorch models
|
|
7
|
+
- Time series models
|
|
8
|
+
- Ensemble operations
|
|
9
|
+
|
|
10
|
+
Same interface pattern as llm_funcs:
|
|
11
|
+
- Single call does single operation
|
|
12
|
+
- matrix parameter enables cartesian product
|
|
13
|
+
- n_samples enables multiple samples
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
import copy
|
|
18
|
+
import itertools
|
|
19
|
+
import pickle
|
|
20
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
23
|
+
import numpy as np
|
|
24
|
+
|
|
25
|
+
# Lazy imports for optional dependencies
|
|
26
|
+
_sklearn_available = False
|
|
27
|
+
_torch_available = False
|
|
28
|
+
_xgboost_available = False
|
|
29
|
+
_statsmodels_available = False
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
import sklearn
|
|
33
|
+
from sklearn.base import clone, BaseEstimator
|
|
34
|
+
_sklearn_available = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
import torch
|
|
40
|
+
import torch.nn as nn
|
|
41
|
+
_torch_available = True
|
|
42
|
+
except ImportError:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
import xgboost as xgb
|
|
47
|
+
_xgboost_available = True
|
|
48
|
+
except ImportError:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
import statsmodels.api as sm
|
|
53
|
+
_statsmodels_available = True
|
|
54
|
+
except ImportError:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ==================== Model Registry ====================
|
|
59
|
+
|
|
60
|
+
SKLEARN_MODELS = {
|
|
61
|
+
# Classification
|
|
62
|
+
'LogisticRegression': 'sklearn.linear_model.LogisticRegression',
|
|
63
|
+
'RandomForestClassifier': 'sklearn.ensemble.RandomForestClassifier',
|
|
64
|
+
'GradientBoostingClassifier': 'sklearn.ensemble.GradientBoostingClassifier',
|
|
65
|
+
'SVC': 'sklearn.svm.SVC',
|
|
66
|
+
'KNeighborsClassifier': 'sklearn.neighbors.KNeighborsClassifier',
|
|
67
|
+
'DecisionTreeClassifier': 'sklearn.tree.DecisionTreeClassifier',
|
|
68
|
+
'AdaBoostClassifier': 'sklearn.ensemble.AdaBoostClassifier',
|
|
69
|
+
'GaussianNB': 'sklearn.naive_bayes.GaussianNB',
|
|
70
|
+
'MLPClassifier': 'sklearn.neural_network.MLPClassifier',
|
|
71
|
+
|
|
72
|
+
# Regression
|
|
73
|
+
'LinearRegression': 'sklearn.linear_model.LinearRegression',
|
|
74
|
+
'Ridge': 'sklearn.linear_model.Ridge',
|
|
75
|
+
'Lasso': 'sklearn.linear_model.Lasso',
|
|
76
|
+
'ElasticNet': 'sklearn.linear_model.ElasticNet',
|
|
77
|
+
'RandomForestRegressor': 'sklearn.ensemble.RandomForestRegressor',
|
|
78
|
+
'GradientBoostingRegressor': 'sklearn.ensemble.GradientBoostingRegressor',
|
|
79
|
+
'SVR': 'sklearn.svm.SVR',
|
|
80
|
+
'KNeighborsRegressor': 'sklearn.neighbors.KNeighborsRegressor',
|
|
81
|
+
'DecisionTreeRegressor': 'sklearn.tree.DecisionTreeRegressor',
|
|
82
|
+
'MLPRegressor': 'sklearn.neural_network.MLPRegressor',
|
|
83
|
+
|
|
84
|
+
# Clustering
|
|
85
|
+
'KMeans': 'sklearn.cluster.KMeans',
|
|
86
|
+
'DBSCAN': 'sklearn.cluster.DBSCAN',
|
|
87
|
+
'AgglomerativeClustering': 'sklearn.cluster.AgglomerativeClustering',
|
|
88
|
+
|
|
89
|
+
# Dimensionality Reduction
|
|
90
|
+
'PCA': 'sklearn.decomposition.PCA',
|
|
91
|
+
'TSNE': 'sklearn.manifold.TSNE',
|
|
92
|
+
'UMAP': 'umap.UMAP',
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _import_model_class(model_path: str):
|
|
97
|
+
"""Dynamically import a model class from path"""
|
|
98
|
+
parts = model_path.rsplit('.', 1)
|
|
99
|
+
if len(parts) == 2:
|
|
100
|
+
module_path, class_name = parts
|
|
101
|
+
import importlib
|
|
102
|
+
module = importlib.import_module(module_path)
|
|
103
|
+
return getattr(module, class_name)
|
|
104
|
+
raise ValueError(f"Invalid model path: {model_path}")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _get_model_instance(model_name: str, **kwargs):
|
|
108
|
+
"""Get model instance from name"""
|
|
109
|
+
if model_name in SKLEARN_MODELS:
|
|
110
|
+
model_class = _import_model_class(SKLEARN_MODELS[model_name])
|
|
111
|
+
return model_class(**kwargs)
|
|
112
|
+
elif _xgboost_available and model_name.lower().startswith('xgb'):
|
|
113
|
+
if 'classifier' in model_name.lower():
|
|
114
|
+
return xgb.XGBClassifier(**kwargs)
|
|
115
|
+
else:
|
|
116
|
+
return xgb.XGBRegressor(**kwargs)
|
|
117
|
+
else:
|
|
118
|
+
raise ValueError(f"Unknown model: {model_name}")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# ==================== Core ML Functions ====================
|
|
122
|
+
|
|
123
|
+
def fit_model(
|
|
124
|
+
X: Any,
|
|
125
|
+
y: Any = None,
|
|
126
|
+
model: Union[str, Any] = "RandomForestClassifier",
|
|
127
|
+
n_samples: int = 1,
|
|
128
|
+
matrix: Optional[Dict[str, List[Any]]] = None,
|
|
129
|
+
parallel: bool = True,
|
|
130
|
+
**kwargs
|
|
131
|
+
) -> Dict[str, Any]:
|
|
132
|
+
"""
|
|
133
|
+
Fit ML model(s) to data.
|
|
134
|
+
|
|
135
|
+
Similar interface to get_llm_response but for model fitting.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
X: Training features
|
|
139
|
+
y: Training targets (optional for unsupervised)
|
|
140
|
+
model: Model name, class, or instance
|
|
141
|
+
n_samples: Number of models to fit (with different random seeds)
|
|
142
|
+
matrix: Dict of param -> list for grid search
|
|
143
|
+
parallel: Whether to parallelize fitting
|
|
144
|
+
**kwargs: Model hyperparameters
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Dict with:
|
|
148
|
+
- 'model': Fitted model(s)
|
|
149
|
+
- 'models': List of all fitted models (if multiple)
|
|
150
|
+
- 'scores': Training scores if available
|
|
151
|
+
"""
|
|
152
|
+
if not _sklearn_available:
|
|
153
|
+
raise ImportError("sklearn required. Install with: pip install scikit-learn")
|
|
154
|
+
|
|
155
|
+
def _fit_single(model_instance, X, y, seed=None):
|
|
156
|
+
if seed is not None and hasattr(model_instance, 'random_state'):
|
|
157
|
+
model_instance.random_state = seed
|
|
158
|
+
model_instance.fit(X, y)
|
|
159
|
+
score = None
|
|
160
|
+
if hasattr(model_instance, 'score') and y is not None:
|
|
161
|
+
try:
|
|
162
|
+
score = model_instance.score(X, y)
|
|
163
|
+
except:
|
|
164
|
+
pass
|
|
165
|
+
return {'model': model_instance, 'score': score}
|
|
166
|
+
|
|
167
|
+
# Handle matrix (grid search)
|
|
168
|
+
use_matrix = matrix is not None and len(matrix) > 0
|
|
169
|
+
multi_sample = n_samples and n_samples > 1
|
|
170
|
+
|
|
171
|
+
if not use_matrix and not multi_sample:
|
|
172
|
+
# Single fit
|
|
173
|
+
if isinstance(model, str):
|
|
174
|
+
model_instance = _get_model_instance(model, **kwargs)
|
|
175
|
+
elif hasattr(model, 'fit'):
|
|
176
|
+
model_instance = clone(model) if _sklearn_available else copy.deepcopy(model)
|
|
177
|
+
else:
|
|
178
|
+
raise ValueError(f"Invalid model: {model}")
|
|
179
|
+
|
|
180
|
+
result = _fit_single(model_instance, X, y)
|
|
181
|
+
return {
|
|
182
|
+
'model': result['model'],
|
|
183
|
+
'models': [result['model']],
|
|
184
|
+
'scores': [result['score']] if result['score'] is not None else None
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Build all combinations
|
|
188
|
+
combos = []
|
|
189
|
+
if use_matrix:
|
|
190
|
+
keys = list(matrix.keys())
|
|
191
|
+
values = [matrix[k] if isinstance(matrix[k], list) else [matrix[k]] for k in keys]
|
|
192
|
+
for combo_values in itertools.product(*values):
|
|
193
|
+
combo = dict(zip(keys, combo_values))
|
|
194
|
+
combos.append(combo)
|
|
195
|
+
else:
|
|
196
|
+
combos = [{}]
|
|
197
|
+
|
|
198
|
+
# Add sampling
|
|
199
|
+
all_tasks = []
|
|
200
|
+
for combo in combos:
|
|
201
|
+
for sample_idx in range(max(1, n_samples)):
|
|
202
|
+
all_tasks.append((combo, sample_idx))
|
|
203
|
+
|
|
204
|
+
# Execute fits
|
|
205
|
+
results = []
|
|
206
|
+
if parallel and len(all_tasks) > 1:
|
|
207
|
+
with ThreadPoolExecutor(max_workers=min(8, len(all_tasks))) as executor:
|
|
208
|
+
futures = {}
|
|
209
|
+
for combo, sample_idx in all_tasks:
|
|
210
|
+
merged_kwargs = {**kwargs, **combo}
|
|
211
|
+
if isinstance(model, str):
|
|
212
|
+
model_instance = _get_model_instance(model, **merged_kwargs)
|
|
213
|
+
else:
|
|
214
|
+
model_instance = clone(model)
|
|
215
|
+
for k, v in merged_kwargs.items():
|
|
216
|
+
if hasattr(model_instance, k):
|
|
217
|
+
setattr(model_instance, k, v)
|
|
218
|
+
|
|
219
|
+
future = executor.submit(_fit_single, model_instance, X, y, sample_idx)
|
|
220
|
+
futures[future] = (combo, sample_idx)
|
|
221
|
+
|
|
222
|
+
for future in as_completed(futures):
|
|
223
|
+
combo, sample_idx = futures[future]
|
|
224
|
+
try:
|
|
225
|
+
result = future.result()
|
|
226
|
+
result['params'] = combo
|
|
227
|
+
result['sample_index'] = sample_idx
|
|
228
|
+
results.append(result)
|
|
229
|
+
except Exception as e:
|
|
230
|
+
results.append({'error': str(e), 'params': combo, 'sample_index': sample_idx})
|
|
231
|
+
else:
|
|
232
|
+
for combo, sample_idx in all_tasks:
|
|
233
|
+
merged_kwargs = {**kwargs, **combo}
|
|
234
|
+
if isinstance(model, str):
|
|
235
|
+
model_instance = _get_model_instance(model, **merged_kwargs)
|
|
236
|
+
else:
|
|
237
|
+
model_instance = clone(model)
|
|
238
|
+
for k, v in merged_kwargs.items():
|
|
239
|
+
if hasattr(model_instance, k):
|
|
240
|
+
setattr(model_instance, k, v)
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
result = _fit_single(model_instance, X, y, sample_idx)
|
|
244
|
+
result['params'] = combo
|
|
245
|
+
result['sample_index'] = sample_idx
|
|
246
|
+
results.append(result)
|
|
247
|
+
except Exception as e:
|
|
248
|
+
results.append({'error': str(e), 'params': combo, 'sample_index': sample_idx})
|
|
249
|
+
|
|
250
|
+
# Aggregate
|
|
251
|
+
models = [r['model'] for r in results if 'model' in r]
|
|
252
|
+
scores = [r['score'] for r in results if 'score' in r and r['score'] is not None]
|
|
253
|
+
|
|
254
|
+
return {
|
|
255
|
+
'model': models[0] if models else None,
|
|
256
|
+
'models': models,
|
|
257
|
+
'scores': scores if scores else None,
|
|
258
|
+
'results': results
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def predict_model(
|
|
263
|
+
X: Any,
|
|
264
|
+
model: Any,
|
|
265
|
+
n_samples: int = 1,
|
|
266
|
+
matrix: Optional[Dict[str, List[Any]]] = None,
|
|
267
|
+
parallel: bool = True,
|
|
268
|
+
method: str = "predict",
|
|
269
|
+
**kwargs
|
|
270
|
+
) -> Dict[str, Any]:
|
|
271
|
+
"""
|
|
272
|
+
Make predictions with ML model(s).
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
X: Input features
|
|
276
|
+
model: Fitted model or list of models
|
|
277
|
+
n_samples: Number of prediction samples (for probabilistic models)
|
|
278
|
+
matrix: Not typically used for prediction
|
|
279
|
+
parallel: Whether to parallelize
|
|
280
|
+
method: 'predict', 'predict_proba', 'transform'
|
|
281
|
+
**kwargs: Additional prediction params
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Dict with:
|
|
285
|
+
- 'predictions': Predictions from first/main model
|
|
286
|
+
- 'all_predictions': All predictions (if multiple models)
|
|
287
|
+
"""
|
|
288
|
+
models = model if isinstance(model, list) else [model]
|
|
289
|
+
|
|
290
|
+
def _predict_single(m, method_name):
|
|
291
|
+
if hasattr(m, method_name):
|
|
292
|
+
pred_fn = getattr(m, method_name)
|
|
293
|
+
return pred_fn(X, **kwargs)
|
|
294
|
+
elif method_name == "predict_proba" and hasattr(m, "predict"):
|
|
295
|
+
return m.predict(X, **kwargs)
|
|
296
|
+
else:
|
|
297
|
+
raise ValueError(f"Model has no {method_name} method")
|
|
298
|
+
|
|
299
|
+
results = []
|
|
300
|
+
if parallel and len(models) > 1:
|
|
301
|
+
with ThreadPoolExecutor(max_workers=min(8, len(models))) as executor:
|
|
302
|
+
futures = {executor.submit(_predict_single, m, method): i for i, m in enumerate(models)}
|
|
303
|
+
for future in as_completed(futures):
|
|
304
|
+
idx = futures[future]
|
|
305
|
+
try:
|
|
306
|
+
pred = future.result()
|
|
307
|
+
results.append((idx, pred))
|
|
308
|
+
except Exception as e:
|
|
309
|
+
results.append((idx, f"Error: {e}"))
|
|
310
|
+
|
|
311
|
+
results.sort(key=lambda x: x[0])
|
|
312
|
+
predictions = [r[1] for r in results]
|
|
313
|
+
else:
|
|
314
|
+
predictions = [_predict_single(m, method) for m in models]
|
|
315
|
+
|
|
316
|
+
return {
|
|
317
|
+
'predictions': predictions[0] if predictions else None,
|
|
318
|
+
'all_predictions': predictions
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def score_model(
|
|
323
|
+
X: Any,
|
|
324
|
+
y: Any,
|
|
325
|
+
model: Any,
|
|
326
|
+
metrics: List[str] = None,
|
|
327
|
+
parallel: bool = True
|
|
328
|
+
) -> Dict[str, Any]:
|
|
329
|
+
"""
|
|
330
|
+
Score model(s) on test data.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
X: Test features
|
|
334
|
+
y: Test targets
|
|
335
|
+
model: Fitted model or list of models
|
|
336
|
+
metrics: List of metric names ('accuracy', 'f1', 'mse', 'r2', etc.)
|
|
337
|
+
parallel: Whether to parallelize
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Dict with scores for each metric
|
|
341
|
+
"""
|
|
342
|
+
from sklearn.metrics import (
|
|
343
|
+
accuracy_score, f1_score, precision_score, recall_score,
|
|
344
|
+
mean_squared_error, mean_absolute_error, r2_score
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
metric_funcs = {
|
|
348
|
+
'accuracy': accuracy_score,
|
|
349
|
+
'f1': lambda y_true, y_pred: f1_score(y_true, y_pred, average='weighted'),
|
|
350
|
+
'precision': lambda y_true, y_pred: precision_score(y_true, y_pred, average='weighted'),
|
|
351
|
+
'recall': lambda y_true, y_pred: recall_score(y_true, y_pred, average='weighted'),
|
|
352
|
+
'mse': mean_squared_error,
|
|
353
|
+
'mae': mean_absolute_error,
|
|
354
|
+
'r2': r2_score,
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if metrics is None:
|
|
358
|
+
metrics = ['accuracy']
|
|
359
|
+
|
|
360
|
+
models = model if isinstance(model, list) else [model]
|
|
361
|
+
|
|
362
|
+
all_scores = []
|
|
363
|
+
for m in models:
|
|
364
|
+
preds = m.predict(X)
|
|
365
|
+
model_scores = {}
|
|
366
|
+
for metric_name in metrics:
|
|
367
|
+
if metric_name in metric_funcs:
|
|
368
|
+
try:
|
|
369
|
+
model_scores[metric_name] = metric_funcs[metric_name](y, preds)
|
|
370
|
+
except:
|
|
371
|
+
model_scores[metric_name] = None
|
|
372
|
+
all_scores.append(model_scores)
|
|
373
|
+
|
|
374
|
+
return {
|
|
375
|
+
'scores': all_scores[0] if len(all_scores) == 1 else all_scores,
|
|
376
|
+
'all_scores': all_scores
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
# ==================== PyTorch Functions ====================
|
|
381
|
+
|
|
382
|
+
def fit_torch(
|
|
383
|
+
model: Any,
|
|
384
|
+
train_loader: Any,
|
|
385
|
+
epochs: int = 10,
|
|
386
|
+
optimizer: str = "Adam",
|
|
387
|
+
lr: float = 0.001,
|
|
388
|
+
criterion: str = "CrossEntropyLoss",
|
|
389
|
+
device: str = "cpu",
|
|
390
|
+
val_loader: Any = None,
|
|
391
|
+
**kwargs
|
|
392
|
+
) -> Dict[str, Any]:
|
|
393
|
+
"""
|
|
394
|
+
Train PyTorch model.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
model: nn.Module instance
|
|
398
|
+
train_loader: DataLoader for training
|
|
399
|
+
epochs: Number of training epochs
|
|
400
|
+
optimizer: Optimizer name
|
|
401
|
+
lr: Learning rate
|
|
402
|
+
criterion: Loss function name
|
|
403
|
+
device: Device to train on
|
|
404
|
+
val_loader: Optional validation DataLoader
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
Dict with trained model and training history
|
|
408
|
+
"""
|
|
409
|
+
if not _torch_available:
|
|
410
|
+
raise ImportError("PyTorch required. Install with: pip install torch")
|
|
411
|
+
|
|
412
|
+
model = model.to(device)
|
|
413
|
+
|
|
414
|
+
# Get optimizer
|
|
415
|
+
opt_class = getattr(torch.optim, optimizer)
|
|
416
|
+
opt = opt_class(model.parameters(), lr=lr)
|
|
417
|
+
|
|
418
|
+
# Get criterion
|
|
419
|
+
crit_class = getattr(nn, criterion)
|
|
420
|
+
crit = crit_class()
|
|
421
|
+
|
|
422
|
+
history = {'train_loss': [], 'val_loss': []}
|
|
423
|
+
|
|
424
|
+
for epoch in range(epochs):
|
|
425
|
+
model.train()
|
|
426
|
+
epoch_loss = 0.0
|
|
427
|
+
|
|
428
|
+
for batch in train_loader:
|
|
429
|
+
if isinstance(batch, (list, tuple)):
|
|
430
|
+
inputs, targets = batch[0].to(device), batch[1].to(device)
|
|
431
|
+
else:
|
|
432
|
+
inputs = batch.to(device)
|
|
433
|
+
targets = None
|
|
434
|
+
|
|
435
|
+
opt.zero_grad()
|
|
436
|
+
outputs = model(inputs)
|
|
437
|
+
|
|
438
|
+
if targets is not None:
|
|
439
|
+
loss = crit(outputs, targets)
|
|
440
|
+
else:
|
|
441
|
+
loss = outputs # Assume model returns loss
|
|
442
|
+
|
|
443
|
+
loss.backward()
|
|
444
|
+
opt.step()
|
|
445
|
+
epoch_loss += loss.item()
|
|
446
|
+
|
|
447
|
+
history['train_loss'].append(epoch_loss / len(train_loader))
|
|
448
|
+
|
|
449
|
+
# Validation
|
|
450
|
+
if val_loader is not None:
|
|
451
|
+
model.eval()
|
|
452
|
+
val_loss = 0.0
|
|
453
|
+
with torch.no_grad():
|
|
454
|
+
for batch in val_loader:
|
|
455
|
+
if isinstance(batch, (list, tuple)):
|
|
456
|
+
inputs, targets = batch[0].to(device), batch[1].to(device)
|
|
457
|
+
else:
|
|
458
|
+
inputs = batch.to(device)
|
|
459
|
+
targets = None
|
|
460
|
+
|
|
461
|
+
outputs = model(inputs)
|
|
462
|
+
if targets is not None:
|
|
463
|
+
loss = crit(outputs, targets)
|
|
464
|
+
val_loss += loss.item()
|
|
465
|
+
|
|
466
|
+
history['val_loss'].append(val_loss / len(val_loader))
|
|
467
|
+
|
|
468
|
+
return {
|
|
469
|
+
'model': model,
|
|
470
|
+
'history': history,
|
|
471
|
+
'final_train_loss': history['train_loss'][-1] if history['train_loss'] else None,
|
|
472
|
+
'final_val_loss': history['val_loss'][-1] if history['val_loss'] else None
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def forward_torch(
|
|
477
|
+
model: Any,
|
|
478
|
+
inputs: Any,
|
|
479
|
+
device: str = "cpu",
|
|
480
|
+
grad: bool = False
|
|
481
|
+
) -> Dict[str, Any]:
|
|
482
|
+
"""
|
|
483
|
+
Run forward pass on PyTorch model.
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
model: nn.Module instance
|
|
487
|
+
inputs: Input tensor or batch
|
|
488
|
+
device: Device to run on
|
|
489
|
+
grad: Whether to compute gradients
|
|
490
|
+
|
|
491
|
+
Returns:
|
|
492
|
+
Dict with outputs
|
|
493
|
+
"""
|
|
494
|
+
if not _torch_available:
|
|
495
|
+
raise ImportError("PyTorch required. Install with: pip install torch")
|
|
496
|
+
|
|
497
|
+
model = model.to(device)
|
|
498
|
+
model.eval()
|
|
499
|
+
|
|
500
|
+
if hasattr(inputs, 'to'):
|
|
501
|
+
inputs = inputs.to(device)
|
|
502
|
+
|
|
503
|
+
if grad:
|
|
504
|
+
outputs = model(inputs)
|
|
505
|
+
else:
|
|
506
|
+
with torch.no_grad():
|
|
507
|
+
outputs = model(inputs)
|
|
508
|
+
|
|
509
|
+
return {
|
|
510
|
+
'outputs': outputs,
|
|
511
|
+
'output_numpy': outputs.cpu().numpy() if hasattr(outputs, 'cpu') else outputs
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
# ==================== Time Series Functions ====================
|
|
516
|
+
|
|
517
|
+
def fit_timeseries(
|
|
518
|
+
series: Any,
|
|
519
|
+
method: str = "arima",
|
|
520
|
+
order: Tuple[int, int, int] = (1, 1, 1),
|
|
521
|
+
seasonal_order: Tuple[int, int, int, int] = None,
|
|
522
|
+
**kwargs
|
|
523
|
+
) -> Dict[str, Any]:
|
|
524
|
+
"""
|
|
525
|
+
Fit time series model.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
series: Time series data (array-like)
|
|
529
|
+
method: 'arima', 'sarima', 'exp_smoothing', 'prophet'
|
|
530
|
+
order: ARIMA order (p, d, q)
|
|
531
|
+
seasonal_order: Seasonal order (P, D, Q, s) for SARIMA
|
|
532
|
+
**kwargs: Additional model params
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Dict with fitted model and diagnostics
|
|
536
|
+
"""
|
|
537
|
+
if method.lower() in ('arima', 'sarima'):
|
|
538
|
+
if not _statsmodels_available:
|
|
539
|
+
raise ImportError("statsmodels required. Install with: pip install statsmodels")
|
|
540
|
+
|
|
541
|
+
from statsmodels.tsa.arima.model import ARIMA
|
|
542
|
+
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
|
543
|
+
|
|
544
|
+
if method.lower() == 'sarima' and seasonal_order:
|
|
545
|
+
model = SARIMAX(series, order=order, seasonal_order=seasonal_order, **kwargs)
|
|
546
|
+
else:
|
|
547
|
+
model = ARIMA(series, order=order, **kwargs)
|
|
548
|
+
|
|
549
|
+
fitted = model.fit()
|
|
550
|
+
|
|
551
|
+
return {
|
|
552
|
+
'model': fitted,
|
|
553
|
+
'aic': fitted.aic,
|
|
554
|
+
'bic': fitted.bic,
|
|
555
|
+
'summary': str(fitted.summary())
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
elif method.lower() == 'exp_smoothing':
|
|
559
|
+
from statsmodels.tsa.holtwinters import ExponentialSmoothing
|
|
560
|
+
|
|
561
|
+
model = ExponentialSmoothing(series, **kwargs)
|
|
562
|
+
fitted = model.fit()
|
|
563
|
+
|
|
564
|
+
return {
|
|
565
|
+
'model': fitted,
|
|
566
|
+
'aic': fitted.aic,
|
|
567
|
+
'sse': fitted.sse
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
else:
|
|
571
|
+
raise ValueError(f"Unknown time series method: {method}")
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def forecast_timeseries(
|
|
575
|
+
model: Any,
|
|
576
|
+
horizon: int,
|
|
577
|
+
**kwargs
|
|
578
|
+
) -> Dict[str, Any]:
|
|
579
|
+
"""
|
|
580
|
+
Generate forecasts from fitted time series model.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
model: Fitted time series model
|
|
584
|
+
horizon: Number of periods to forecast
|
|
585
|
+
**kwargs: Additional forecast params
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
Dict with forecasts and confidence intervals
|
|
589
|
+
"""
|
|
590
|
+
if hasattr(model, 'forecast'):
|
|
591
|
+
forecast = model.forecast(steps=horizon, **kwargs)
|
|
592
|
+
elif hasattr(model, 'predict'):
|
|
593
|
+
forecast = model.predict(start=len(model.data.endog), end=len(model.data.endog) + horizon - 1)
|
|
594
|
+
else:
|
|
595
|
+
raise ValueError("Model has no forecast or predict method")
|
|
596
|
+
|
|
597
|
+
result = {'forecast': forecast}
|
|
598
|
+
|
|
599
|
+
# Try to get confidence intervals
|
|
600
|
+
if hasattr(model, 'get_forecast'):
|
|
601
|
+
fc = model.get_forecast(steps=horizon)
|
|
602
|
+
result['conf_int'] = fc.conf_int()
|
|
603
|
+
result['forecast_mean'] = fc.predicted_mean
|
|
604
|
+
|
|
605
|
+
return result
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
# ==================== Ensemble Functions ====================
|
|
609
|
+
|
|
610
|
+
def ensemble_predict(
|
|
611
|
+
X: Any,
|
|
612
|
+
models: List[Any],
|
|
613
|
+
method: str = "vote",
|
|
614
|
+
weights: List[float] = None
|
|
615
|
+
) -> Dict[str, Any]:
|
|
616
|
+
"""
|
|
617
|
+
Ensemble predictions from multiple models.
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
X: Input features
|
|
621
|
+
models: List of fitted models
|
|
622
|
+
method: 'vote', 'average', 'weighted', 'stack'
|
|
623
|
+
weights: Model weights for weighted averaging
|
|
624
|
+
|
|
625
|
+
Returns:
|
|
626
|
+
Dict with ensemble predictions
|
|
627
|
+
"""
|
|
628
|
+
# Get individual predictions
|
|
629
|
+
all_preds = []
|
|
630
|
+
for m in models:
|
|
631
|
+
pred = m.predict(X)
|
|
632
|
+
all_preds.append(pred)
|
|
633
|
+
|
|
634
|
+
all_preds = np.array(all_preds)
|
|
635
|
+
|
|
636
|
+
if method == "vote":
|
|
637
|
+
# Majority voting (for classification)
|
|
638
|
+
from scipy import stats
|
|
639
|
+
ensemble_pred, _ = stats.mode(all_preds, axis=0)
|
|
640
|
+
ensemble_pred = ensemble_pred.flatten()
|
|
641
|
+
|
|
642
|
+
elif method == "average":
|
|
643
|
+
ensemble_pred = np.mean(all_preds, axis=0)
|
|
644
|
+
|
|
645
|
+
elif method == "weighted":
|
|
646
|
+
if weights is None:
|
|
647
|
+
weights = [1.0 / len(models)] * len(models)
|
|
648
|
+
weights = np.array(weights).reshape(-1, 1)
|
|
649
|
+
ensemble_pred = np.sum(all_preds * weights, axis=0)
|
|
650
|
+
|
|
651
|
+
else:
|
|
652
|
+
raise ValueError(f"Unknown ensemble method: {method}")
|
|
653
|
+
|
|
654
|
+
return {
|
|
655
|
+
'predictions': ensemble_pred,
|
|
656
|
+
'individual_predictions': all_preds,
|
|
657
|
+
'method': method
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def cross_validate(
|
|
662
|
+
X: Any,
|
|
663
|
+
y: Any,
|
|
664
|
+
model: Union[str, Any],
|
|
665
|
+
cv: int = 5,
|
|
666
|
+
metrics: List[str] = None,
|
|
667
|
+
parallel: bool = True,
|
|
668
|
+
**kwargs
|
|
669
|
+
) -> Dict[str, Any]:
|
|
670
|
+
"""
|
|
671
|
+
Cross-validate model.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
X: Features
|
|
675
|
+
y: Targets
|
|
676
|
+
model: Model name or instance
|
|
677
|
+
cv: Number of folds
|
|
678
|
+
metrics: Metrics to compute
|
|
679
|
+
parallel: Parallelize folds
|
|
680
|
+
**kwargs: Model hyperparameters
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
Dict with CV scores
|
|
684
|
+
"""
|
|
685
|
+
from sklearn.model_selection import cross_val_score, KFold
|
|
686
|
+
|
|
687
|
+
if isinstance(model, str):
|
|
688
|
+
model_instance = _get_model_instance(model, **kwargs)
|
|
689
|
+
else:
|
|
690
|
+
model_instance = model
|
|
691
|
+
|
|
692
|
+
if metrics is None:
|
|
693
|
+
metrics = ['accuracy']
|
|
694
|
+
|
|
695
|
+
results = {}
|
|
696
|
+
for metric in metrics:
|
|
697
|
+
scoring = metric if metric in ['accuracy', 'f1', 'precision', 'recall', 'r2', 'neg_mean_squared_error'] else None
|
|
698
|
+
if scoring:
|
|
699
|
+
scores = cross_val_score(model_instance, X, y, cv=cv, scoring=scoring)
|
|
700
|
+
results[metric] = {
|
|
701
|
+
'mean': np.mean(scores),
|
|
702
|
+
'std': np.std(scores),
|
|
703
|
+
'scores': scores.tolist()
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
return results
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
# ==================== Utility Functions ====================
|
|
710
|
+
|
|
711
|
+
def serialize_model(model: Any, path: str = None) -> bytes:
|
|
712
|
+
"""Serialize model to bytes or file"""
|
|
713
|
+
data = pickle.dumps(model)
|
|
714
|
+
if path:
|
|
715
|
+
with open(path, 'wb') as f:
|
|
716
|
+
f.write(data)
|
|
717
|
+
return data
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def deserialize_model(data: Union[bytes, str]) -> Any:
|
|
721
|
+
"""Deserialize model from bytes or file path"""
|
|
722
|
+
if isinstance(data, str):
|
|
723
|
+
with open(data, 'rb') as f:
|
|
724
|
+
data = f.read()
|
|
725
|
+
return pickle.loads(data)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def get_model_params(model: Any) -> Dict[str, Any]:
|
|
729
|
+
"""Get model hyperparameters"""
|
|
730
|
+
if hasattr(model, 'get_params'):
|
|
731
|
+
return model.get_params()
|
|
732
|
+
elif hasattr(model, 'state_dict'):
|
|
733
|
+
return {'type': 'torch', 'params': list(model.state_dict().keys())}
|
|
734
|
+
else:
|
|
735
|
+
return {}
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def set_model_params(model: Any, params: Dict[str, Any]) -> Any:
|
|
739
|
+
"""Set model hyperparameters"""
|
|
740
|
+
if hasattr(model, 'set_params'):
|
|
741
|
+
return model.set_params(**params)
|
|
742
|
+
else:
|
|
743
|
+
for k, v in params.items():
|
|
744
|
+
if hasattr(model, k):
|
|
745
|
+
setattr(model, k, v)
|
|
746
|
+
return model
|