oikan 0.0.3.11__tar.gz → 0.0.3.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oikan-0.0.3.11 → oikan-0.0.3.12}/PKG-INFO +10 -13
- {oikan-0.0.3.11 → oikan-0.0.3.12}/README.md +8 -11
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan/elasticnet.py +9 -9
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan/model.py +68 -51
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan.egg-info/PKG-INFO +10 -13
- {oikan-0.0.3.11 → oikan-0.0.3.12}/pyproject.toml +2 -2
- {oikan-0.0.3.11 → oikan-0.0.3.12}/LICENSE +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan/__init__.py +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan/exceptions.py +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan/neural.py +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan/utils.py +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan.egg-info/SOURCES.txt +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan.egg-info/dependency_links.txt +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan.egg-info/requires.txt +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/oikan.egg-info/top_level.txt +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/setup.cfg +0 -0
- {oikan-0.0.3.11 → oikan-0.0.3.12}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: oikan
|
3
|
-
Version: 0.0.3.
|
3
|
+
Version: 0.0.3.12
|
4
4
|
Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
|
5
5
|
Author: Arman Zhalgasbayev
|
6
6
|
License: MIT
|
@@ -9,7 +9,7 @@ Project-URL: Bug Tracker, https://github.com/silvermete0r/oikan/issues
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
11
11
|
Classifier: Operating System :: OS Independent
|
12
|
-
Requires-Python: >=3.
|
12
|
+
Requires-Python: >=3.11
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
15
|
Requires-Dist: torch
|
@@ -62,12 +62,9 @@ OIKAN is a neuro-symbolic machine learning framework inspired by Kolmogorov-Arno
|
|
62
62
|
class OIKAN:
|
63
63
|
def __init__(self, hidden_sizes=[64, 64], activation='relu',
|
64
64
|
polynomial_degree=2, alpha=0.1):
|
65
|
-
#
|
66
|
-
self.
|
67
|
-
#
|
68
|
-
self.augmented_data = self.augment_data(X, y, augmentation_factor=5)
|
69
|
-
# Symbolic regression for interpretable formulas
|
70
|
-
self.symbolic_regression = SymbolicRegression(alpha=alpha, l1_ratio=0.5)
|
65
|
+
self.neural_net = TabularNet(input_size, hidden_sizes, activation) # NN for predicting values in Data Augmentation (Additional)
|
66
|
+
self.augmented_data = self.augment_data(X, y, augmentation_factor=5) # Generate augmented samples (Additional)
|
67
|
+
self.symbolic_regression = SymbolicRegression(alpha=alpha, l1_ratio=0.5) # ElasticNet for symbolic regression (Main part)
|
71
68
|
```
|
72
69
|
|
73
70
|
2. **Basis Functions**: Core set of interpretable transformations:
|
@@ -111,7 +108,7 @@ pip install -e . # Install in development mode
|
|
111
108
|
|
112
109
|
| Requirement | Details |
|
113
110
|
|-------------------|--------------------------------------|
|
114
|
-
| Python | Version 3.
|
111
|
+
| Python | Version 3.11 or higher |
|
115
112
|
| Operating System | Platform independent (Windows/macOS/Linux) |
|
116
113
|
| Memory | Recommended minimum 4GB RAM |
|
117
114
|
| Disk Space | ~100MB for installation (including dependencies) |
|
@@ -130,7 +127,7 @@ from sklearn.metrics import mean_squared_error
|
|
130
127
|
model = OIKANRegressor(
|
131
128
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
132
129
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
133
|
-
augmentation_factor=5, # Augmentation factor for data generation
|
130
|
+
augmentation_factor=5, # Augmentation factor for data generation (default: 1)
|
134
131
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
135
132
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
136
133
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -140,7 +137,7 @@ model = OIKANRegressor(
|
|
140
137
|
batch_size=32, # Batch size for training
|
141
138
|
verbose=True, # Verbose output during training
|
142
139
|
evaluate_nn=True, # Validate neural network performance before full process
|
143
|
-
random_state=42 # Random seed for reproducibility
|
140
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
144
141
|
)
|
145
142
|
|
146
143
|
# Fit the model
|
@@ -184,7 +181,7 @@ from sklearn.metrics import accuracy_score
|
|
184
181
|
model = OIKANClassifier(
|
185
182
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
186
183
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
187
|
-
augmentation_factor=10, # Augmentation factor for data generation
|
184
|
+
augmentation_factor=10, # Augmentation factor for data generation (default: 1)
|
188
185
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
189
186
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
190
187
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -194,7 +191,7 @@ model = OIKANClassifier(
|
|
194
191
|
batch_size=32, # Batch size for training
|
195
192
|
verbose=True, # Verbose output during training
|
196
193
|
evaluate_nn=True, # Validate neural network performance before full process
|
197
|
-
random_state=42 # Random seed for reproducibility
|
194
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
198
195
|
)
|
199
196
|
|
200
197
|
# Fit the model
|
@@ -41,12 +41,9 @@ OIKAN is a neuro-symbolic machine learning framework inspired by Kolmogorov-Arno
|
|
41
41
|
class OIKAN:
|
42
42
|
def __init__(self, hidden_sizes=[64, 64], activation='relu',
|
43
43
|
polynomial_degree=2, alpha=0.1):
|
44
|
-
#
|
45
|
-
self.
|
46
|
-
#
|
47
|
-
self.augmented_data = self.augment_data(X, y, augmentation_factor=5)
|
48
|
-
# Symbolic regression for interpretable formulas
|
49
|
-
self.symbolic_regression = SymbolicRegression(alpha=alpha, l1_ratio=0.5)
|
44
|
+
self.neural_net = TabularNet(input_size, hidden_sizes, activation) # NN for predicting values in Data Augmentation (Additional)
|
45
|
+
self.augmented_data = self.augment_data(X, y, augmentation_factor=5) # Generate augmented samples (Additional)
|
46
|
+
self.symbolic_regression = SymbolicRegression(alpha=alpha, l1_ratio=0.5) # ElasticNet for symbolic regression (Main part)
|
50
47
|
```
|
51
48
|
|
52
49
|
2. **Basis Functions**: Core set of interpretable transformations:
|
@@ -90,7 +87,7 @@ pip install -e . # Install in development mode
|
|
90
87
|
|
91
88
|
| Requirement | Details |
|
92
89
|
|-------------------|--------------------------------------|
|
93
|
-
| Python | Version 3.
|
90
|
+
| Python | Version 3.11 or higher |
|
94
91
|
| Operating System | Platform independent (Windows/macOS/Linux) |
|
95
92
|
| Memory | Recommended minimum 4GB RAM |
|
96
93
|
| Disk Space | ~100MB for installation (including dependencies) |
|
@@ -109,7 +106,7 @@ from sklearn.metrics import mean_squared_error
|
|
109
106
|
model = OIKANRegressor(
|
110
107
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
111
108
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
112
|
-
augmentation_factor=5, # Augmentation factor for data generation
|
109
|
+
augmentation_factor=5, # Augmentation factor for data generation (default: 1)
|
113
110
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
114
111
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
115
112
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -119,7 +116,7 @@ model = OIKANRegressor(
|
|
119
116
|
batch_size=32, # Batch size for training
|
120
117
|
verbose=True, # Verbose output during training
|
121
118
|
evaluate_nn=True, # Validate neural network performance before full process
|
122
|
-
random_state=42 # Random seed for reproducibility
|
119
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
123
120
|
)
|
124
121
|
|
125
122
|
# Fit the model
|
@@ -163,7 +160,7 @@ from sklearn.metrics import accuracy_score
|
|
163
160
|
model = OIKANClassifier(
|
164
161
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
165
162
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
166
|
-
augmentation_factor=10, # Augmentation factor for data generation
|
163
|
+
augmentation_factor=10, # Augmentation factor for data generation (default: 1)
|
167
164
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
168
165
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
169
166
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -173,7 +170,7 @@ model = OIKANClassifier(
|
|
173
170
|
batch_size=32, # Batch size for training
|
174
171
|
verbose=True, # Verbose output during training
|
175
172
|
evaluate_nn=True, # Validate neural network performance before full process
|
176
|
-
random_state=42 # Random seed for reproducibility
|
173
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
177
174
|
)
|
178
175
|
|
179
176
|
# Fit the model
|
@@ -3,7 +3,7 @@ import torch
|
|
3
3
|
import numpy as np
|
4
4
|
|
5
5
|
class ElasticNet(nn.Module):
|
6
|
-
def __init__(self, alpha=1.0, l1_ratio=0.5, fit_intercept=False, max_iter=5000, tol=1e-4, random_state=
|
6
|
+
def __init__(self, alpha=1.0, l1_ratio=0.5, fit_intercept=False, max_iter=5000, tol=1e-4, random_state=42):
|
7
7
|
super().__init__()
|
8
8
|
self.alpha = alpha
|
9
9
|
self.l1_ratio = l1_ratio
|
@@ -21,10 +21,9 @@ class ElasticNet(nn.Module):
|
|
21
21
|
if y.ndim == 1:
|
22
22
|
y = y.reshape(-1, 1)
|
23
23
|
n_targets = y.shape[1]
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
np.random.seed(self.random_state)
|
24
|
+
|
25
|
+
torch.manual_seed(self.random_state)
|
26
|
+
np.random.seed(self.random_state)
|
28
27
|
|
29
28
|
X_tensor = torch.tensor(X, dtype=torch.float32)
|
30
29
|
y_tensor = torch.tensor(y, dtype=torch.float32)
|
@@ -64,8 +63,9 @@ class ElasticNet(nn.Module):
|
|
64
63
|
X = np.asarray(X, dtype=np.float32)
|
65
64
|
if self.coef_ is None:
|
66
65
|
raise RuntimeError("Model not fitted yet.")
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
66
|
+
with torch.no_grad():
|
67
|
+
W = self.coef_.T if self.coef_.ndim == 2 else self.coef_
|
68
|
+
y_pred = X @ W
|
69
|
+
if self.intercept_ is not None:
|
70
|
+
y_pred += self.intercept_
|
71
71
|
return y_pred
|
@@ -2,16 +2,17 @@ import numpy as np
|
|
2
2
|
import torch
|
3
3
|
import torch.nn as nn
|
4
4
|
import torch.optim as optim
|
5
|
-
from
|
5
|
+
from tqdm import tqdm
|
6
6
|
import json
|
7
|
+
import sys
|
8
|
+
from abc import ABC, abstractmethod
|
9
|
+
from typing import List, Optional, Union
|
7
10
|
from .elasticnet import ElasticNet
|
8
11
|
from .neural import TabularNet
|
9
12
|
from .utils import evaluate_basis_functions, get_features_involved, sympify_formula, get_latex_formula
|
10
13
|
from sklearn.model_selection import train_test_split
|
11
14
|
from sklearn.metrics import r2_score, accuracy_score
|
12
15
|
from .exceptions import *
|
13
|
-
import sys
|
14
|
-
from tqdm import tqdm
|
15
16
|
|
16
17
|
class OIKAN(ABC):
|
17
18
|
"""
|
@@ -23,7 +24,7 @@ class OIKAN(ABC):
|
|
23
24
|
List of hidden layer sizes for the neural network.
|
24
25
|
activation : str, optional (default='relu')
|
25
26
|
Activation function for the neural network ('relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu').
|
26
|
-
augmentation_factor : int, optional (default=
|
27
|
+
augmentation_factor : int, optional (default=1)
|
27
28
|
Number of augmented samples per original sample.
|
28
29
|
alpha : float, optional (default=1.0)
|
29
30
|
ElasticNet regularization strength.
|
@@ -44,33 +45,34 @@ class OIKAN(ABC):
|
|
44
45
|
Whether to display training progress.
|
45
46
|
evaluate_nn : bool, optional (default=False)
|
46
47
|
Whether to evaluate neural network performance before full training.
|
47
|
-
random_state: int, optional (default=
|
48
|
+
random_state: int, optional (default=42)
|
48
49
|
Random seed for reproducibility.
|
49
50
|
"""
|
50
|
-
def __init__(
|
51
|
-
|
52
|
-
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
hidden_sizes: List[int] = [64, 64],
|
54
|
+
activation: str = 'relu',
|
55
|
+
augmentation_factor: int = 1,
|
56
|
+
alpha: float = 1.0,
|
57
|
+
l1_ratio: float = 0.5,
|
58
|
+
sigma: float = 5.0,
|
59
|
+
epochs: int = 100,
|
60
|
+
lr: float = 0.001,
|
61
|
+
batch_size: int = 32,
|
62
|
+
verbose: bool = False,
|
63
|
+
evaluate_nn: bool = False,
|
64
|
+
top_k: int = 5,
|
65
|
+
random_state: int = 42
|
66
|
+
) -> None:
|
53
67
|
if not isinstance(hidden_sizes, list) or not all(isinstance(x, int) and x > 0 for x in hidden_sizes):
|
54
68
|
raise InvalidParameterError("hidden_sizes must be a list of positive integers")
|
55
69
|
if activation not in ['relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu']:
|
56
70
|
raise InvalidParameterError(f"Unsupported activation function: {activation}")
|
57
|
-
if
|
58
|
-
raise InvalidParameterError("augmentation_factor must be
|
59
|
-
if not
|
60
|
-
raise InvalidParameterError("
|
61
|
-
|
62
|
-
raise InvalidParameterError("Learning rate must be between 0 and 1")
|
63
|
-
if not isinstance(batch_size, int) or batch_size < 1:
|
64
|
-
raise InvalidParameterError("batch_size must be a positive integer")
|
65
|
-
if not isinstance(epochs, int) or epochs < 1:
|
66
|
-
raise InvalidParameterError("epochs must be a positive integer")
|
67
|
-
if not 0 <= alpha <= 1:
|
68
|
-
raise InvalidParameterError("alpha must be between 0 and 1")
|
69
|
-
if not 0 <= l1_ratio <= 1:
|
70
|
-
raise InvalidParameterError("l1_ratio must be between 0 and 1")
|
71
|
-
if sigma <= 0:
|
72
|
-
raise InvalidParameterError("sigma must be positive")
|
73
|
-
|
71
|
+
if augmentation_factor < 1 or top_k < 1 or batch_size < 1 or epochs < 1:
|
72
|
+
raise InvalidParameterError("augmentation_factor, top_k, batch_size, and epochs must be positive integers")
|
73
|
+
if not 0 < lr < 1 or not 0 <= alpha <= 1 or not 0 <= l1_ratio <= 1 or sigma <= 0:
|
74
|
+
raise InvalidParameterError("Invalid values for lr, alpha, l1_ratio, or sigma")
|
75
|
+
|
74
76
|
self.hidden_sizes = hidden_sizes
|
75
77
|
self.activation = activation
|
76
78
|
self.augmentation_factor = augmentation_factor
|
@@ -83,25 +85,24 @@ class OIKAN(ABC):
|
|
83
85
|
self.verbose = verbose
|
84
86
|
self.evaluate_nn = evaluate_nn
|
85
87
|
self.top_k = top_k
|
86
|
-
self.neural_net = None
|
87
|
-
self.symbolic_model = None
|
88
|
+
self.neural_net: Optional[TabularNet] = None
|
89
|
+
self.symbolic_model: Optional[dict] = None
|
88
90
|
self.evaluation_done = False
|
89
91
|
self.random_state = random_state
|
90
92
|
self.__version__ = '0.0.3'
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
np.random.seed(self.random_state)
|
93
|
+
|
94
|
+
torch.manual_seed(self.random_state)
|
95
|
+
np.random.seed(self.random_state)
|
95
96
|
|
96
97
|
@abstractmethod
|
97
|
-
def fit(self, X, y):
|
98
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
|
98
99
|
pass
|
99
100
|
|
100
101
|
@abstractmethod
|
101
|
-
def predict(self, X):
|
102
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
102
103
|
pass
|
103
104
|
|
104
|
-
def get_formula(self, type='original'):
|
105
|
+
def get_formula(self, type: str = 'original') -> Union[str, List[str]]:
|
105
106
|
"""
|
106
107
|
Returns the symbolic formula(s) as a string (regression) or list of strings (classification).
|
107
108
|
|
@@ -149,19 +150,24 @@ class OIKAN(ABC):
|
|
149
150
|
formulas.append(f"Class {self.classes_[c]}: {formula}")
|
150
151
|
return formulas
|
151
152
|
|
152
|
-
def feature_importances(self):
|
153
|
+
def feature_importances(self, column_names: Optional[List[str]] = None) -> Union[np.ndarray, tuple]:
|
153
154
|
"""
|
154
155
|
Computes the importance of each original feature based on the symbolic model.
|
155
156
|
|
156
157
|
Returns:
|
157
158
|
--------
|
158
|
-
numpy.ndarray : Normalized feature importances.
|
159
|
+
numpy.ndarray : Normalized feature importances. If column_names was provided, also returns a dict mapping feature name -> feature label (symbolic)
|
159
160
|
"""
|
160
161
|
if self.symbolic_model is None:
|
161
162
|
raise ValueError("Model not fitted yet.")
|
162
163
|
basis_functions = self.symbolic_model['basis_functions']
|
163
164
|
n_features = self.symbolic_model['n_features']
|
164
165
|
importances = np.zeros(n_features)
|
166
|
+
|
167
|
+
# Validation of column names
|
168
|
+
if column_names is not None:
|
169
|
+
if not hasattr(column_names, '__len__') or len(column_names) != n_features:
|
170
|
+
raise InvalidParameterError("column_names must be a sequence with length equal to number of features")
|
165
171
|
|
166
172
|
# Handle regression case
|
167
173
|
if 'coefficients' in self.symbolic_model:
|
@@ -170,7 +176,8 @@ class OIKAN(ABC):
|
|
170
176
|
if coefficients[i] != 0:
|
171
177
|
features_involved = get_features_involved(func)
|
172
178
|
for idx in features_involved:
|
173
|
-
|
179
|
+
if 0 <= idx < n_features:
|
180
|
+
importances[idx] += np.abs(coefficients[i])
|
174
181
|
# Handle classification case with multiple coefficient sets
|
175
182
|
else:
|
176
183
|
for coef in self.symbolic_model['coefficients_list']:
|
@@ -178,12 +185,22 @@ class OIKAN(ABC):
|
|
178
185
|
if coef[i] != 0:
|
179
186
|
features_involved = get_features_involved(func)
|
180
187
|
for idx in features_involved:
|
181
|
-
|
188
|
+
if 0 <= idx < n_features:
|
189
|
+
importances[idx] += np.abs(coef[i])
|
182
190
|
|
183
191
|
total = importances.sum()
|
184
|
-
|
192
|
+
normalized = importances / total if total > 0 else importances
|
193
|
+
|
194
|
+
if column_names is not None:
|
195
|
+
feature_map = dict()
|
196
|
+
# map feature name -> formula label
|
197
|
+
for idx, feature_name in enumerate(column_names):
|
198
|
+
feature_map[feature_name] = f'x{idx}'
|
199
|
+
return normalized, feature_map
|
200
|
+
|
201
|
+
return normalized
|
185
202
|
|
186
|
-
def save(self, path):
|
203
|
+
def save(self, path: str) -> None:
|
187
204
|
"""
|
188
205
|
Saves the symbolic model to a .json file.
|
189
206
|
|
@@ -220,7 +237,7 @@ class OIKAN(ABC):
|
|
220
237
|
if self.verbose:
|
221
238
|
print(f"Model saved to {path}")
|
222
239
|
|
223
|
-
def load(self, path):
|
240
|
+
def load(self, path: str) -> None:
|
224
241
|
"""
|
225
242
|
Loads the symbolic model from a .json file.
|
226
243
|
|
@@ -253,9 +270,9 @@ class OIKAN(ABC):
|
|
253
270
|
if self.verbose:
|
254
271
|
print(f"Model loaded from {path}")
|
255
272
|
|
256
|
-
def _evaluate_neural_net(self, X, y, output_size, loss_fn):
|
273
|
+
def _evaluate_neural_net(self, X: np.ndarray, y: np.ndarray, output_size: int, loss_fn: nn.Module) -> None:
|
257
274
|
"""Evaluates neural network performance on train-test split."""
|
258
|
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=
|
275
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=self.random_state)
|
259
276
|
|
260
277
|
input_size = X.shape[1]
|
261
278
|
self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
|
@@ -289,7 +306,7 @@ class OIKAN(ABC):
|
|
289
306
|
# Retrain on full dataset
|
290
307
|
self._train_neural_net(X, y, output_size, loss_fn)
|
291
308
|
|
292
|
-
def _train_neural_net(self, X, y, output_size, loss_fn):
|
309
|
+
def _train_neural_net(self, X: np.ndarray, y: np.ndarray, output_size: int, loss_fn: nn.Module) -> None:
|
293
310
|
"""Trains the neural network on the input data."""
|
294
311
|
if self.evaluate_nn and not self.evaluation_done:
|
295
312
|
self.evaluation_done = True
|
@@ -326,7 +343,7 @@ class OIKAN(ABC):
|
|
326
343
|
if self.verbose:
|
327
344
|
epoch_iterator.set_postfix({'loss': f'{total_loss/len(loader):.4f}'})
|
328
345
|
|
329
|
-
def _generate_augmented_data(self, X):
|
346
|
+
def _generate_augmented_data(self, X: np.ndarray) -> np.ndarray:
|
330
347
|
"""Generates augmented data by adding Gaussian noise."""
|
331
348
|
if self.augmentation_factor == 1:
|
332
349
|
return np.array([]).reshape(0, X.shape[1])
|
@@ -339,7 +356,7 @@ class OIKAN(ABC):
|
|
339
356
|
|
340
357
|
return np.vstack(X_aug)
|
341
358
|
|
342
|
-
def _perform_symbolic_regression(self, X, y):
|
359
|
+
def _perform_symbolic_regression(self, X: np.ndarray, y: np.ndarray) -> None:
|
343
360
|
"""
|
344
361
|
Performs hierarchical symbolic regression using a two-stage approach.
|
345
362
|
|
@@ -475,7 +492,7 @@ class OIKAN(ABC):
|
|
475
492
|
'coefficients_list': coefficients_list
|
476
493
|
}
|
477
494
|
|
478
|
-
def _print_system_info(self):
|
495
|
+
def _print_system_info(self) -> None:
|
479
496
|
"""Prints system information (for debugging purposes)."""
|
480
497
|
import platform
|
481
498
|
import os
|
@@ -493,7 +510,7 @@ class OIKAN(ABC):
|
|
493
510
|
|
494
511
|
class OIKANRegressor(OIKAN):
|
495
512
|
"""OIKAN model for regression tasks."""
|
496
|
-
def fit(self, X, y):
|
513
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
|
497
514
|
"""
|
498
515
|
Fits the regressor to the data.
|
499
516
|
|
@@ -538,7 +555,7 @@ class OIKANRegressor(OIKAN):
|
|
538
555
|
if self.verbose:
|
539
556
|
print("OIKANRegressor model training completed successfully!")
|
540
557
|
|
541
|
-
def predict(self, X):
|
558
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
542
559
|
"""
|
543
560
|
Predicts target values for the input data.
|
544
561
|
|
@@ -561,7 +578,7 @@ class OIKANRegressor(OIKAN):
|
|
561
578
|
|
562
579
|
class OIKANClassifier(OIKAN):
|
563
580
|
"""OIKAN model for classification tasks."""
|
564
|
-
def fit(self, X, y):
|
581
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
|
565
582
|
"""
|
566
583
|
Fits the classifier to the data.
|
567
584
|
|
@@ -611,7 +628,7 @@ class OIKANClassifier(OIKAN):
|
|
611
628
|
if self.verbose:
|
612
629
|
print("OIKANClassifier model training completed successfully!")
|
613
630
|
|
614
|
-
def predict(self, X):
|
631
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
615
632
|
"""
|
616
633
|
Predicts class labels for the input data.
|
617
634
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: oikan
|
3
|
-
Version: 0.0.3.
|
3
|
+
Version: 0.0.3.12
|
4
4
|
Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
|
5
5
|
Author: Arman Zhalgasbayev
|
6
6
|
License: MIT
|
@@ -9,7 +9,7 @@ Project-URL: Bug Tracker, https://github.com/silvermete0r/oikan/issues
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
11
11
|
Classifier: Operating System :: OS Independent
|
12
|
-
Requires-Python: >=3.
|
12
|
+
Requires-Python: >=3.11
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
15
|
Requires-Dist: torch
|
@@ -62,12 +62,9 @@ OIKAN is a neuro-symbolic machine learning framework inspired by Kolmogorov-Arno
|
|
62
62
|
class OIKAN:
|
63
63
|
def __init__(self, hidden_sizes=[64, 64], activation='relu',
|
64
64
|
polynomial_degree=2, alpha=0.1):
|
65
|
-
#
|
66
|
-
self.
|
67
|
-
#
|
68
|
-
self.augmented_data = self.augment_data(X, y, augmentation_factor=5)
|
69
|
-
# Symbolic regression for interpretable formulas
|
70
|
-
self.symbolic_regression = SymbolicRegression(alpha=alpha, l1_ratio=0.5)
|
65
|
+
self.neural_net = TabularNet(input_size, hidden_sizes, activation) # NN for predicting values in Data Augmentation (Additional)
|
66
|
+
self.augmented_data = self.augment_data(X, y, augmentation_factor=5) # Generate augmented samples (Additional)
|
67
|
+
self.symbolic_regression = SymbolicRegression(alpha=alpha, l1_ratio=0.5) # ElasticNet for symbolic regression (Main part)
|
71
68
|
```
|
72
69
|
|
73
70
|
2. **Basis Functions**: Core set of interpretable transformations:
|
@@ -111,7 +108,7 @@ pip install -e . # Install in development mode
|
|
111
108
|
|
112
109
|
| Requirement | Details |
|
113
110
|
|-------------------|--------------------------------------|
|
114
|
-
| Python | Version 3.
|
111
|
+
| Python | Version 3.11 or higher |
|
115
112
|
| Operating System | Platform independent (Windows/macOS/Linux) |
|
116
113
|
| Memory | Recommended minimum 4GB RAM |
|
117
114
|
| Disk Space | ~100MB for installation (including dependencies) |
|
@@ -130,7 +127,7 @@ from sklearn.metrics import mean_squared_error
|
|
130
127
|
model = OIKANRegressor(
|
131
128
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
132
129
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
133
|
-
augmentation_factor=5, # Augmentation factor for data generation
|
130
|
+
augmentation_factor=5, # Augmentation factor for data generation (default: 1)
|
134
131
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
135
132
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
136
133
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -140,7 +137,7 @@ model = OIKANRegressor(
|
|
140
137
|
batch_size=32, # Batch size for training
|
141
138
|
verbose=True, # Verbose output during training
|
142
139
|
evaluate_nn=True, # Validate neural network performance before full process
|
143
|
-
random_state=42 # Random seed for reproducibility
|
140
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
144
141
|
)
|
145
142
|
|
146
143
|
# Fit the model
|
@@ -184,7 +181,7 @@ from sklearn.metrics import accuracy_score
|
|
184
181
|
model = OIKANClassifier(
|
185
182
|
hidden_sizes=[32, 32], # Hidden layer sizes
|
186
183
|
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
|
187
|
-
augmentation_factor=10, # Augmentation factor for data generation
|
184
|
+
augmentation_factor=10, # Augmentation factor for data generation (default: 1)
|
188
185
|
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
|
189
186
|
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
|
190
187
|
sigma=5, # Standard deviation of Gaussian noise for data augmentation
|
@@ -194,7 +191,7 @@ model = OIKANClassifier(
|
|
194
191
|
batch_size=32, # Batch size for training
|
195
192
|
verbose=True, # Verbose output during training
|
196
193
|
evaluate_nn=True, # Validate neural network performance before full process
|
197
|
-
random_state=42 # Random seed for reproducibility
|
194
|
+
random_state=42 # Random seed for reproducibility (default: 42)
|
198
195
|
)
|
199
196
|
|
200
197
|
# Fit the model
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "oikan"
|
7
|
-
version = "0.0.3.
|
7
|
+
version = "0.0.3.12"
|
8
8
|
description = "OIKAN: Neuro-Symbolic ML for Scientific Discovery"
|
9
9
|
readme = "README.md"
|
10
10
|
authors = [{name = "Arman Zhalgasbayev"}]
|
@@ -15,7 +15,7 @@ dependencies = [
|
|
15
15
|
"tqdm",
|
16
16
|
"sympy"
|
17
17
|
]
|
18
|
-
requires-python = ">=3.
|
18
|
+
requires-python = ">=3.11"
|
19
19
|
license = {text = "MIT"}
|
20
20
|
classifiers = [
|
21
21
|
"Programming Language :: Python :: 3",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|