oikan 0.0.3.10__py3-none-any.whl → 0.0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oikan/elasticnet.py +9 -9
- oikan/model.py +119 -60
- {oikan-0.0.3.10.dist-info → oikan-0.0.3.12.dist-info}/METADATA +12 -15
- oikan-0.0.3.12.dist-info/RECORD +11 -0
- oikan-0.0.3.10.dist-info/RECORD +0 -11
- {oikan-0.0.3.10.dist-info → oikan-0.0.3.12.dist-info}/WHEEL +0 -0
- {oikan-0.0.3.10.dist-info → oikan-0.0.3.12.dist-info}/licenses/LICENSE +0 -0
- {oikan-0.0.3.10.dist-info → oikan-0.0.3.12.dist-info}/top_level.txt +0 -0
oikan/elasticnet.py
CHANGED
@@ -3,7 +3,7 @@ import torch
|
|
3
3
|
import numpy as np
|
4
4
|
|
5
5
|
class ElasticNet(nn.Module):
|
6
|
-
def __init__(self, alpha=1.0, l1_ratio=0.5, fit_intercept=False, max_iter=5000, tol=1e-4, random_state=
|
6
|
+
def __init__(self, alpha=1.0, l1_ratio=0.5, fit_intercept=False, max_iter=5000, tol=1e-4, random_state=42):
|
7
7
|
super().__init__()
|
8
8
|
self.alpha = alpha
|
9
9
|
self.l1_ratio = l1_ratio
|
@@ -21,10 +21,9 @@ class ElasticNet(nn.Module):
|
|
21
21
|
if y.ndim == 1:
|
22
22
|
y = y.reshape(-1, 1)
|
23
23
|
n_targets = y.shape[1]
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
np.random.seed(self.random_state)
|
24
|
+
|
25
|
+
torch.manual_seed(self.random_state)
|
26
|
+
np.random.seed(self.random_state)
|
28
27
|
|
29
28
|
X_tensor = torch.tensor(X, dtype=torch.float32)
|
30
29
|
y_tensor = torch.tensor(y, dtype=torch.float32)
|
@@ -64,8 +63,9 @@ class ElasticNet(nn.Module):
|
|
64
63
|
X = np.asarray(X, dtype=np.float32)
|
65
64
|
if self.coef_ is None:
|
66
65
|
raise RuntimeError("Model not fitted yet.")
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
with torch.no_grad():
|
67
|
+
W = self.coef_.T if self.coef_.ndim == 2 else self.coef_
|
68
|
+
y_pred = X @ W
|
69
|
+
if self.intercept_ is not None:
|
70
|
+
y_pred += self.intercept_
|
71
71
|
return y_pred
|
oikan/model.py
CHANGED
@@ -2,17 +2,17 @@ import numpy as np
|
|
2
2
|
import torch
|
3
3
|
import torch.nn as nn
|
4
4
|
import torch.optim as optim
|
5
|
-
from
|
6
|
-
from abc import ABC, abstractmethod
|
5
|
+
from tqdm import tqdm
|
7
6
|
import json
|
7
|
+
import sys
|
8
|
+
from abc import ABC, abstractmethod
|
9
|
+
from typing import List, Optional, Union
|
8
10
|
from .elasticnet import ElasticNet
|
9
11
|
from .neural import TabularNet
|
10
12
|
from .utils import evaluate_basis_functions, get_features_involved, sympify_formula, get_latex_formula
|
11
13
|
from sklearn.model_selection import train_test_split
|
12
14
|
from sklearn.metrics import r2_score, accuracy_score
|
13
15
|
from .exceptions import *
|
14
|
-
import sys
|
15
|
-
from tqdm import tqdm
|
16
16
|
|
17
17
|
class OIKAN(ABC):
|
18
18
|
"""
|
@@ -24,7 +24,7 @@ class OIKAN(ABC):
|
|
24
24
|
List of hidden layer sizes for the neural network.
|
25
25
|
activation : str, optional (default='relu')
|
26
26
|
Activation function for the neural network ('relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu').
|
27
|
-
augmentation_factor : int, optional (default=
|
27
|
+
augmentation_factor : int, optional (default=1)
|
28
28
|
Number of augmented samples per original sample.
|
29
29
|
alpha : float, optional (default=1.0)
|
30
30
|
ElasticNet regularization strength.
|
@@ -45,33 +45,34 @@ class OIKAN(ABC):
|
|
45
45
|
Whether to display training progress.
|
46
46
|
evaluate_nn : bool, optional (default=False)
|
47
47
|
Whether to evaluate neural network performance before full training.
|
48
|
-
random_state: int, optional (default=
|
48
|
+
random_state: int, optional (default=42)
|
49
49
|
Random seed for reproducibility.
|
50
50
|
"""
|
51
|
-
def __init__(
|
52
|
-
|
53
|
-
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
hidden_sizes: List[int] = [64, 64],
|
54
|
+
activation: str = 'relu',
|
55
|
+
augmentation_factor: int = 1,
|
56
|
+
alpha: float = 1.0,
|
57
|
+
l1_ratio: float = 0.5,
|
58
|
+
sigma: float = 5.0,
|
59
|
+
epochs: int = 100,
|
60
|
+
lr: float = 0.001,
|
61
|
+
batch_size: int = 32,
|
62
|
+
verbose: bool = False,
|
63
|
+
evaluate_nn: bool = False,
|
64
|
+
top_k: int = 5,
|
65
|
+
random_state: int = 42
|
66
|
+
) -> None:
|
54
67
|
if not isinstance(hidden_sizes, list) or not all(isinstance(x, int) and x > 0 for x in hidden_sizes):
|
55
68
|
raise InvalidParameterError("hidden_sizes must be a list of positive integers")
|
56
69
|
if activation not in ['relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu']:
|
57
70
|
raise InvalidParameterError(f"Unsupported activation function: {activation}")
|
58
|
-
if
|
59
|
-
raise InvalidParameterError("augmentation_factor must be
|
60
|
-
if not
|
61
|
-
raise InvalidParameterError("
|
62
|
-
|
63
|
-
raise InvalidParameterError("Learning rate must be between 0 and 1")
|
64
|
-
if not isinstance(batch_size, int) or batch_size < 1:
|
65
|
-
raise InvalidParameterError("batch_size must be a positive integer")
|
66
|
-
if not isinstance(epochs, int) or epochs < 1:
|
67
|
-
raise InvalidParameterError("epochs must be a positive integer")
|
68
|
-
if not 0 <= alpha <= 1:
|
69
|
-
raise InvalidParameterError("alpha must be between 0 and 1")
|
70
|
-
if not 0 <= l1_ratio <= 1:
|
71
|
-
raise InvalidParameterError("l1_ratio must be between 0 and 1")
|
72
|
-
if sigma <= 0:
|
73
|
-
raise InvalidParameterError("sigma must be positive")
|
74
|
-
|
71
|
+
if augmentation_factor < 1 or top_k < 1 or batch_size < 1 or epochs < 1:
|
72
|
+
raise InvalidParameterError("augmentation_factor, top_k, batch_size, and epochs must be positive integers")
|
73
|
+
if not 0 < lr < 1 or not 0 <= alpha <= 1 or not 0 <= l1_ratio <= 1 or sigma <= 0:
|
74
|
+
raise InvalidParameterError("Invalid values for lr, alpha, l1_ratio, or sigma")
|
75
|
+
|
75
76
|
self.hidden_sizes = hidden_sizes
|
76
77
|
self.activation = activation
|
77
78
|
self.augmentation_factor = augmentation_factor
|
@@ -84,24 +85,24 @@ class OIKAN(ABC):
|
|
84
85
|
self.verbose = verbose
|
85
86
|
self.evaluate_nn = evaluate_nn
|
86
87
|
self.top_k = top_k
|
87
|
-
self.neural_net = None
|
88
|
-
self.symbolic_model = None
|
88
|
+
self.neural_net: Optional[TabularNet] = None
|
89
|
+
self.symbolic_model: Optional[dict] = None
|
89
90
|
self.evaluation_done = False
|
90
91
|
self.random_state = random_state
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
92
|
+
self.__version__ = '0.0.3'
|
93
|
+
|
94
|
+
torch.manual_seed(self.random_state)
|
95
|
+
np.random.seed(self.random_state)
|
95
96
|
|
96
97
|
@abstractmethod
|
97
|
-
def fit(self, X, y):
|
98
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
|
98
99
|
pass
|
99
100
|
|
100
101
|
@abstractmethod
|
101
|
-
def predict(self, X):
|
102
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
102
103
|
pass
|
103
104
|
|
104
|
-
def get_formula(self, type='original'):
|
105
|
+
def get_formula(self, type: str = 'original') -> Union[str, List[str]]:
|
105
106
|
"""
|
106
107
|
Returns the symbolic formula(s) as a string (regression) or list of strings (classification).
|
107
108
|
|
@@ -149,19 +150,24 @@ class OIKAN(ABC):
|
|
149
150
|
formulas.append(f"Class {self.classes_[c]}: {formula}")
|
150
151
|
return formulas
|
151
152
|
|
152
|
-
def feature_importances(self):
|
153
|
+
def feature_importances(self, column_names: Optional[List[str]] = None) -> Union[np.ndarray, tuple]:
|
153
154
|
"""
|
154
155
|
Computes the importance of each original feature based on the symbolic model.
|
155
156
|
|
156
157
|
Returns:
|
157
158
|
--------
|
158
|
-
numpy.ndarray : Normalized feature importances.
|
159
|
+
numpy.ndarray : Normalized feature importances. If column_names was provided, also returns a dict mapping feature name -> feature label (symbolic)
|
159
160
|
"""
|
160
161
|
if self.symbolic_model is None:
|
161
162
|
raise ValueError("Model not fitted yet.")
|
162
163
|
basis_functions = self.symbolic_model['basis_functions']
|
163
164
|
n_features = self.symbolic_model['n_features']
|
164
165
|
importances = np.zeros(n_features)
|
166
|
+
|
167
|
+
# Validation of column names
|
168
|
+
if column_names is not None:
|
169
|
+
if not hasattr(column_names, '__len__') or len(column_names) != n_features:
|
170
|
+
raise InvalidParameterError("column_names must be a sequence with length equal to number of features")
|
165
171
|
|
166
172
|
# Handle regression case
|
167
173
|
if 'coefficients' in self.symbolic_model:
|
@@ -170,7 +176,8 @@ class OIKAN(ABC):
|
|
170
176
|
if coefficients[i] != 0:
|
171
177
|
features_involved = get_features_involved(func)
|
172
178
|
for idx in features_involved:
|
173
|
-
|
179
|
+
if 0 <= idx < n_features:
|
180
|
+
importances[idx] += np.abs(coefficients[i])
|
174
181
|
# Handle classification case with multiple coefficient sets
|
175
182
|
else:
|
176
183
|
for coef in self.symbolic_model['coefficients_list']:
|
@@ -178,12 +185,22 @@ class OIKAN(ABC):
|
|
178
185
|
if coef[i] != 0:
|
179
186
|
features_involved = get_features_involved(func)
|
180
187
|
for idx in features_involved:
|
181
|
-
|
188
|
+
if 0 <= idx < n_features:
|
189
|
+
importances[idx] += np.abs(coef[i])
|
182
190
|
|
183
191
|
total = importances.sum()
|
184
|
-
|
192
|
+
normalized = importances / total if total > 0 else importances
|
193
|
+
|
194
|
+
if column_names is not None:
|
195
|
+
feature_map = dict()
|
196
|
+
# map feature name -> formula label
|
197
|
+
for idx, feature_name in enumerate(column_names):
|
198
|
+
feature_map[feature_name] = f'x{idx}'
|
199
|
+
return normalized, feature_map
|
200
|
+
|
201
|
+
return normalized
|
185
202
|
|
186
|
-
def save(self, path):
|
203
|
+
def save(self, path: str) -> None:
|
187
204
|
"""
|
188
205
|
Saves the symbolic model to a .json file.
|
189
206
|
|
@@ -220,7 +237,7 @@ class OIKAN(ABC):
|
|
220
237
|
if self.verbose:
|
221
238
|
print(f"Model saved to {path}")
|
222
239
|
|
223
|
-
def load(self, path):
|
240
|
+
def load(self, path: str) -> None:
|
224
241
|
"""
|
225
242
|
Loads the symbolic model from a .json file.
|
226
243
|
|
@@ -253,9 +270,9 @@ class OIKAN(ABC):
|
|
253
270
|
if self.verbose:
|
254
271
|
print(f"Model loaded from {path}")
|
255
272
|
|
256
|
-
def _evaluate_neural_net(self, X, y, output_size, loss_fn):
|
273
|
+
def _evaluate_neural_net(self, X: np.ndarray, y: np.ndarray, output_size: int, loss_fn: nn.Module) -> None:
|
257
274
|
"""Evaluates neural network performance on train-test split."""
|
258
|
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=
|
275
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.random_state)
|
259
276
|
|
260
277
|
input_size = X.shape[1]
|
261
278
|
self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
|
@@ -289,7 +306,7 @@ class OIKAN(ABC):
|
|
289
306
|
# Retrain on full dataset
|
290
307
|
self._train_neural_net(X, y, output_size, loss_fn)
|
291
308
|
|
292
|
-
def _train_neural_net(self, X, y, output_size, loss_fn):
|
309
|
+
def _train_neural_net(self, X: np.ndarray, y: np.ndarray, output_size: int, loss_fn: nn.Module) -> None:
|
293
310
|
"""Trains the neural network on the input data."""
|
294
311
|
if self.evaluate_nn and not self.evaluation_done:
|
295
312
|
self.evaluation_done = True
|
@@ -300,8 +317,10 @@ class OIKAN(ABC):
|
|
300
317
|
if self.neural_net is None:
|
301
318
|
self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
|
302
319
|
optimizer = optim.Adam(self.neural_net.parameters(), lr=self.lr)
|
303
|
-
dataset = torch.utils.data.TensorDataset(
|
304
|
-
|
320
|
+
dataset = torch.utils.data.TensorDataset(
|
321
|
+
torch.tensor(X, dtype=torch.float32),
|
322
|
+
torch.tensor(y, dtype=torch.float32)
|
323
|
+
)
|
305
324
|
loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
|
306
325
|
self.neural_net.train()
|
307
326
|
|
@@ -324,7 +343,7 @@ class OIKAN(ABC):
|
|
324
343
|
if self.verbose:
|
325
344
|
epoch_iterator.set_postfix({'loss': f'{total_loss/len(loader):.4f}'})
|
326
345
|
|
327
|
-
def _generate_augmented_data(self, X):
|
346
|
+
def _generate_augmented_data(self, X: np.ndarray) -> np.ndarray:
|
328
347
|
"""Generates augmented data by adding Gaussian noise."""
|
329
348
|
if self.augmentation_factor == 1:
|
330
349
|
return np.array([]).reshape(0, X.shape[1])
|
@@ -337,7 +356,7 @@ class OIKAN(ABC):
|
|
337
356
|
|
338
357
|
return np.vstack(X_aug)
|
339
358
|
|
340
|
-
def _perform_symbolic_regression(self, X, y):
|
359
|
+
def _perform_symbolic_regression(self, X: np.ndarray, y: np.ndarray) -> None:
|
341
360
|
"""
|
342
361
|
Performs hierarchical symbolic regression using a two-stage approach.
|
343
362
|
|
@@ -362,14 +381,32 @@ class OIKAN(ABC):
|
|
362
381
|
|
363
382
|
if self.verbose:
|
364
383
|
print("\nStage 1: Coarse Model Fitting")
|
365
|
-
|
366
|
-
coarse_degree = 2 # Fixed low degree for coarse model
|
367
|
-
poly_coarse = PolynomialFeatures(degree=coarse_degree, include_bias=True)
|
368
384
|
|
385
|
+
# Generate polynomial features
|
369
386
|
if self.verbose:
|
370
387
|
print("Generating polynomial features...")
|
371
|
-
|
372
|
-
|
388
|
+
n_samples, n_features = X.shape
|
389
|
+
|
390
|
+
bias = np.ones((n_samples, 1))
|
391
|
+
features = X
|
392
|
+
powers_of_2 = X ** 2
|
393
|
+
|
394
|
+
X_poly_coarse = np.hstack([bias, features, powers_of_2])
|
395
|
+
basis_functions_coarse = ['1'] + [f'x{i}' for i in range(n_features)] + [f'x{i}^2' for i in range(n_features)]
|
396
|
+
|
397
|
+
# Generate random interaction features (O(N^2) -> O(N))
|
398
|
+
if self.verbose:
|
399
|
+
print("Generating random interaction features...")
|
400
|
+
rng = np.random.default_rng(self.random_state)
|
401
|
+
random_pairs = rng.choice(n_features, size=(n_features // 2, 2), replace=False)
|
402
|
+
interaction_features = np.array([X[:, i] * X[:, j] for i, j in random_pairs]).T
|
403
|
+
interaction_feature_names = [f"x{i} x{j}" for i, j in random_pairs]
|
404
|
+
|
405
|
+
# Combine all features
|
406
|
+
X_poly_coarse = np.hstack([X_poly_coarse, interaction_features])
|
407
|
+
basis_functions_coarse.extend(interaction_feature_names)
|
408
|
+
|
409
|
+
# Fit coarse elastic net model
|
373
410
|
if self.verbose:
|
374
411
|
print("Fitting coarse elastic net model...")
|
375
412
|
model_coarse = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio, fit_intercept=False, random_state=self.random_state)
|
@@ -377,7 +414,7 @@ class OIKAN(ABC):
|
|
377
414
|
|
378
415
|
if self.verbose:
|
379
416
|
print("Computing feature importances...")
|
380
|
-
|
417
|
+
|
381
418
|
if len(y.shape) == 1 or y.shape[1] == 1:
|
382
419
|
coef_coarse = model_coarse.coef_.flatten()
|
383
420
|
else:
|
@@ -443,7 +480,7 @@ class OIKAN(ABC):
|
|
443
480
|
coef = model_refined.coef_[c]
|
444
481
|
indices = np.where(np.abs(coef) > 1e-6)[0]
|
445
482
|
selected_indices.update(indices)
|
446
|
-
selected_indices =
|
483
|
+
selected_indices = [i for i in selected_indices if i < len(basis_functions_refined)]
|
447
484
|
basis_functions = [basis_functions_refined[i] for i in selected_indices]
|
448
485
|
for c in range(y.shape[1]):
|
449
486
|
coef = model_refined.coef_[c]
|
@@ -454,10 +491,26 @@ class OIKAN(ABC):
|
|
454
491
|
'basis_functions': basis_functions,
|
455
492
|
'coefficients_list': coefficients_list
|
456
493
|
}
|
494
|
+
|
495
|
+
def _print_system_info(self) -> None:
|
496
|
+
"""Prints system information (for debugging purposes)."""
|
497
|
+
import platform
|
498
|
+
import os
|
499
|
+
print("\n" + "="*30)
|
500
|
+
print("System Information:")
|
501
|
+
print(f"OIKAN version: {self.__version__}")
|
502
|
+
print(f"Python version: {platform.python_version()}")
|
503
|
+
print(f"NumPy version: {np.__version__}")
|
504
|
+
print(f"Torch version: {torch.__version__}")
|
505
|
+
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
|
506
|
+
print(f"Number of CPU cores: {os.cpu_count()}")
|
507
|
+
print(f"Memory Usage: {torch.cuda.memory_allocated() / (1024 ** 2) if torch.cuda.is_available() else 'N/A'} MB")
|
508
|
+
print(f"Architecture: {platform.machine()}")
|
509
|
+
print("="*30 + "\n")
|
457
510
|
|
458
511
|
class OIKANRegressor(OIKAN):
|
459
512
|
"""OIKAN model for regression tasks."""
|
460
|
-
def fit(self, X, y):
|
513
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
|
461
514
|
"""
|
462
515
|
Fits the regressor to the data.
|
463
516
|
|
@@ -468,6 +521,9 @@ class OIKANRegressor(OIKAN):
|
|
468
521
|
y : array-like of shape (n_samples,)
|
469
522
|
Target values.
|
470
523
|
"""
|
524
|
+
if self.verbose:
|
525
|
+
self._print_system_info()
|
526
|
+
|
471
527
|
X = np.asarray(X)
|
472
528
|
y = np.asarray(y).reshape(-1, 1)
|
473
529
|
|
@@ -499,7 +555,7 @@ class OIKANRegressor(OIKAN):
|
|
499
555
|
if self.verbose:
|
500
556
|
print("OIKANRegressor model training completed successfully!")
|
501
557
|
|
502
|
-
def predict(self, X):
|
558
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
503
559
|
"""
|
504
560
|
Predicts target values for the input data.
|
505
561
|
|
@@ -522,7 +578,7 @@ class OIKANRegressor(OIKAN):
|
|
522
578
|
|
523
579
|
class OIKANClassifier(OIKAN):
|
524
580
|
"""OIKAN model for classification tasks."""
|
525
|
-
def fit(self, X, y):
|
581
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
|
526
582
|
"""
|
527
583
|
Fits the classifier to the data.
|
528
584
|
|
@@ -533,6 +589,9 @@ class OIKANClassifier(OIKAN):
|
|
533
589
|
y : array-like of shape (n_samples,)
|
534
590
|
Target labels.
|
535
591
|
"""
|
592
|
+
if self.verbose:
|
593
|
+
self._print_system_info()
|
594
|
+
|
536
595
|
X = np.asarray(X)
|
537
596
|
from sklearn.preprocessing import LabelEncoder
|
538
597
|
le = LabelEncoder()
|
@@ -569,7 +628,7 @@ class OIKANClassifier(OIKAN):
|
|
569
628
|
if self.verbose:
|
570
629
|
print("OIKANClassifier model training completed successfully!")
|
571
630
|
|
572
|
-
def predict(self, X):
|
631
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
573
632
|
"""
|
574
633
|
Predicts class labels for the input data.
|
575
634
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: oikan
|
3
|
-
Version: 0.0.3.
|
3
|
+
Version: 0.0.3.12
|
4
4
|
Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
|
5
5
|
Author: Arman Zhalgasbayev
|
6
6
|
License: MIT
|
@@ -9,7 +9,7 @@ Project-URL: Bug Tracker, https://github.com/silvermete0r/oikan/issues
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
11
11
|
Classifier: Operating System :: OS Independent
|
12
|
-
Requires-Python: >=3.
|
12
|
+
Requires-Python: >=3.11
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
15
|
Requires-Dist: torch
|
@@ -62,12 +62,9 @@ OIKAN is a neuro-symbolic machine learning framework inspired by Kolmogorov-Arno
|
|
62
62
|
class OIKAN:
|
63
63
|
def __init__(self, hidden_sizes=[64, 64], activation='relu',
|
64
64
|
polynomial_degree=2, alpha=0.1):
|
65
|
-
#
|
66
|
-
self.
|
67
|
-
#
|
68
|
-
self.augmented_data = self.augment_data(X, y, augmentation_factor=5)
|
69
|
-
# Symbolic regression for interpretable formulas
|
70
|
-
self.symbolic_regression = SymbolicRegression(alpha=alpha)
|
65
|
+
self.neural_net = TabularNet(input_size, hidden_sizes, activation) # NN for predicting values in Data Augmentation (Additional)
|
66
|
+
self.augmented_data = self.augment_data(X, y, augmentation_factor=5) # Generate augmented samples (Additional)
|
67
|
+
self.symbolic_regression = SymbolicRegression(alpha=alpha, l1_ratio=0.5) # ElasticNet for symbolic regression (Main part)
|
71
68
|
```
|
72
69
|
|
73
70
|
2. **Basis Functions**: Core set of interpretable transformations:
|
@@ -111,7 +108,7 @@ pip install -e . # Install in development mode
|
|
111
108
|
|
112
109
|
| Requirement | Details |
|
113
110
|
|-------------------|--------------------------------------|
|
114
|
-
| Python | Version 3.
|
111
|
+
| Python | Version 3.11 or higher |
|
115
112
|
| Operating System | Platform independent (Windows/macOS/Linux) |
|
116
113
|
| Memory | Recommended minimum 4GB RAM |
|
117
114
|
| Disk Space | ~100MB for installation (including dependencies) |
|
@@ -120,7 +117,7 @@ pip install -e . # Install in development mode
|
|
120
117
|
|
121
118
|
### Regression Example
|
122
119
|
|
123
|
-
> **Suggestion:** Please ensure that the data is normalized using standard scaling (or another suitable normalization method), as
|
120
|
+
> **Suggestion:** Please ensure that the data is normalized using standard scaling (or another suitable normalization method), as ElasticNet assumes that the model intercept has already been accounted for.
|
124
121
|
|
125
122
|
```python
|
126
123
|
from oikan import OIKANRegressor
|
@@ -130,7 +127,7 @@ from sklearn.metrics import mean_squared_error
|
|
130
127
|
model = OIKANRegressor(
|
131
128
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
132
129
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
133
|
-
augmentation_factor=5, # Augmentation factor for data generation
|
130
|
+
augmentation_factor=5, # Augmentation factor for data generation (default: 1)
|
134
131
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
135
132
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
136
133
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -140,7 +137,7 @@ model = OIKANRegressor(
|
|
140
137
|
batch_size=32, # Batch size for training
|
141
138
|
verbose=True, # Verbose output during training
|
142
139
|
evaluate_nn=True, # Validate neural network performance before full process
|
143
|
-
random_state=42 # Random seed for reproducibility
|
140
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
144
141
|
)
|
145
142
|
|
146
143
|
# Fit the model
|
@@ -174,7 +171,7 @@ loaded_model.load("outputs/model.json")
|
|
174
171
|
|
175
172
|
### Classification Example
|
176
173
|
|
177
|
-
> **Suggestion:** Please ensure that the data is normalized using standard scaling (or another suitable normalization method), as
|
174
|
+
> **Suggestion:** Please ensure that the data is normalized using standard scaling (or another suitable normalization method), as ElasticNet assumes that the model intercept has already been accounted for.
|
178
175
|
|
179
176
|
```python
|
180
177
|
from oikan import OIKANClassifier
|
@@ -184,7 +181,7 @@ from sklearn.metrics import accuracy_score
|
|
184
181
|
model = OIKANClassifier(
|
185
182
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
186
183
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
187
|
-
augmentation_factor=10, # Augmentation factor for data generation
|
184
|
+
augmentation_factor=10, # Augmentation factor for data generation (default: 1)
|
188
185
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
189
186
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
190
187
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -194,7 +191,7 @@ model = OIKANClassifier(
|
|
194
191
|
batch_size=32, # Batch size for training
|
195
192
|
verbose=True, # Verbose output during training
|
196
193
|
evaluate_nn=True, # Validate neural network performance before full process
|
197
|
-
random_state=42 # Random seed for reproducibility
|
194
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
198
195
|
)
|
199
196
|
|
200
197
|
# Fit the model
|
@@ -0,0 +1,11 @@
|
|
1
|
+
oikan/__init__.py,sha256=Dh1Rf9ONRdm75B6tFiv9Y9P6NNiHAiKPCGDMuag6TTE,724
|
2
|
+
oikan/elasticnet.py,sha256=37vy1eCtzME7RQzcYWQ_VNCwLF4Vg6cW7-qJFpLgqKg,2616
|
3
|
+
oikan/exceptions.py,sha256=GhHWqy2Q5LVBcteTy4ngnqxr7FOoLNyD8dNt1kfRXyw,901
|
4
|
+
oikan/model.py,sha256=C-ykucsDEIc6Zejw8_0Wn2K2SlhpZ853j1CNiAqIpgw,28846
|
5
|
+
oikan/neural.py,sha256=PZjaffSuABuCNxu-7PinU1GR6ji0Y6xRgSQ3n5HRDxI,1572
|
6
|
+
oikan/utils.py,sha256=7UCm9obO-8Q2zhetdAkukMDOZvGSBWUL_dSF04XqM7k,8808
|
7
|
+
oikan-0.0.3.12.dist-info/licenses/LICENSE,sha256=75ASVmU-XIpN-M4LbVmJ_ibgbzbvRLVti8FhnR0BTf8,1096
|
8
|
+
oikan-0.0.3.12.dist-info/METADATA,sha256=WJq7-4Q8zA0o90pdlQL-X8WM91q6xkz_bFmlr9PIXNo,13172
|
9
|
+
oikan-0.0.3.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
10
|
+
oikan-0.0.3.12.dist-info/top_level.txt,sha256=XwnwKwTJddZwIvtrUsAz-l-58BJRj6HjAGWrfYi_3QY,6
|
11
|
+
oikan-0.0.3.12.dist-info/RECORD,,
|
oikan-0.0.3.10.dist-info/RECORD
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
oikan/__init__.py,sha256=Dh1Rf9ONRdm75B6tFiv9Y9P6NNiHAiKPCGDMuag6TTE,724
|
2
|
-
oikan/elasticnet.py,sha256=eeuK4lJ-7lsGZIyiXNH6NKYxhbbKbPp1kp8vLe4t7_4,2614
|
3
|
-
oikan/exceptions.py,sha256=GhHWqy2Q5LVBcteTy4ngnqxr7FOoLNyD8dNt1kfRXyw,901
|
4
|
-
oikan/model.py,sha256=lI5YTJr7a9ohVQv4B4lDcOueNIC5NARbuap3OKIr_gs,26213
|
5
|
-
oikan/neural.py,sha256=PZjaffSuABuCNxu-7PinU1GR6ji0Y6xRgSQ3n5HRDxI,1572
|
6
|
-
oikan/utils.py,sha256=7UCm9obO-8Q2zhetdAkukMDOZvGSBWUL_dSF04XqM7k,8808
|
7
|
-
oikan-0.0.3.10.dist-info/licenses/LICENSE,sha256=75ASVmU-XIpN-M4LbVmJ_ibgbzbvRLVti8FhnR0BTf8,1096
|
8
|
-
oikan-0.0.3.10.dist-info/METADATA,sha256=aVF1sUBOPHZeaiIebwFvpIwIw9W-XXqI13B0kkXK3X8,13127
|
9
|
-
oikan-0.0.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
10
|
-
oikan-0.0.3.10.dist-info/top_level.txt,sha256=XwnwKwTJddZwIvtrUsAz-l-58BJRj6HjAGWrfYi_3QY,6
|
11
|
-
oikan-0.0.3.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|