oikan 0.0.3.1__tar.gz → 0.0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oikan-0.0.3.1 → oikan-0.0.3.2}/PKG-INFO +3 -3
- {oikan-0.0.3.1 → oikan-0.0.3.2}/README.md +2 -2
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan/model.py +62 -4
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan.egg-info/PKG-INFO +3 -3
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan.egg-info/SOURCES.txt +0 -1
- {oikan-0.0.3.1 → oikan-0.0.3.2}/pyproject.toml +1 -1
- oikan-0.0.3.1/oikan/symbolic.py +0 -55
- {oikan-0.0.3.1 → oikan-0.0.3.2}/LICENSE +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan/__init__.py +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan/exceptions.py +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan/neural.py +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan/utils.py +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan.egg-info/dependency_links.txt +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan.egg-info/requires.txt +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/oikan.egg-info/top_level.txt +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/setup.cfg +0 -0
- {oikan-0.0.3.1 → oikan-0.0.3.2}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: oikan
|
3
|
-
Version: 0.0.3.
|
3
|
+
Version: 0.0.3.2
|
4
4
|
Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
|
5
5
|
Author: Arman Zhalgasbayev
|
6
6
|
License: MIT
|
@@ -202,7 +202,7 @@ loaded_model.load("outputs/model.json")
|
|
202
202
|
|
203
203
|
### Architecture Diagram
|
204
204
|
|
205
|
-
|
205
|
+
-architecture-oop.png)
|
206
206
|
|
207
207
|
## Contributing
|
208
208
|
|
@@ -222,7 +222,7 @@ If you use OIKAN in your research, please cite:
|
|
222
222
|
|
223
223
|
```bibtex
|
224
224
|
@software{oikan2025,
|
225
|
-
title = {OIKAN:
|
225
|
+
title = {OIKAN: Neuro-Symbolic ML for Scientific Discovery},
|
226
226
|
author = {Zhalgasbayev, Arman},
|
227
227
|
year = {2025},
|
228
228
|
url = {https://github.com/silvermete0r/OIKAN}
|
@@ -184,7 +184,7 @@ loaded_model.load("outputs/model.json")
|
|
184
184
|
|
185
185
|
### Architecture Diagram
|
186
186
|
|
187
|
-
|
187
|
+
-architecture-oop.png)
|
188
188
|
|
189
189
|
## Contributing
|
190
190
|
|
@@ -204,7 +204,7 @@ If you use OIKAN in your research, please cite:
|
|
204
204
|
|
205
205
|
```bibtex
|
206
206
|
@software{oikan2025,
|
207
|
-
title = {OIKAN:
|
207
|
+
title = {OIKAN: Neuro-Symbolic ML for Scientific Discovery},
|
208
208
|
author = {Zhalgasbayev, Arman},
|
209
209
|
year = {2025},
|
210
210
|
url = {https://github.com/silvermete0r/OIKAN}
|
@@ -8,6 +8,9 @@ from abc import ABC, abstractmethod
|
|
8
8
|
import json
|
9
9
|
from .neural import TabularNet
|
10
10
|
from .utils import evaluate_basis_functions, get_features_involved
|
11
|
+
from sklearn.model_selection import train_test_split
|
12
|
+
from sklearn.metrics import r2_score, accuracy_score
|
13
|
+
import sys
|
11
14
|
|
12
15
|
class OIKAN(ABC):
|
13
16
|
"""
|
@@ -18,7 +21,7 @@ class OIKAN(ABC):
|
|
18
21
|
hidden_sizes : list, optional (default=[64, 64])
|
19
22
|
List of hidden layer sizes for the neural network.
|
20
23
|
activation : str, optional (default='relu')
|
21
|
-
Activation function for the neural network ('relu'
|
24
|
+
Activation function for the neural network ('relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu').
|
22
25
|
augmentation_factor : int, optional (default=10)
|
23
26
|
Number of augmented samples per original sample.
|
24
27
|
polynomial_degree : int, optional (default=2)
|
@@ -35,10 +38,12 @@ class OIKAN(ABC):
|
|
35
38
|
Batch size for neural network training.
|
36
39
|
verbose : bool, optional (default=False)
|
37
40
|
Whether to display training progress.
|
41
|
+
evaluate_nn : bool, optional (default=False)
|
42
|
+
Whether to evaluate neural network performance before full training.
|
38
43
|
"""
|
39
44
|
def __init__(self, hidden_sizes=[64, 64], activation='relu', augmentation_factor=10,
|
40
45
|
polynomial_degree=2, alpha=0.1, sigma=0.1, epochs=100, lr=0.001, batch_size=32,
|
41
|
-
verbose=False):
|
46
|
+
verbose=False, evaluate_nn=False):
|
42
47
|
self.hidden_sizes = hidden_sizes
|
43
48
|
self.activation = activation
|
44
49
|
self.augmentation_factor = augmentation_factor
|
@@ -49,8 +54,10 @@ class OIKAN(ABC):
|
|
49
54
|
self.lr = lr
|
50
55
|
self.batch_size = batch_size
|
51
56
|
self.verbose = verbose
|
57
|
+
self.evaluate_nn = evaluate_nn
|
52
58
|
self.neural_net = None
|
53
59
|
self.symbolic_model = None
|
60
|
+
self.evaluation_done = False
|
54
61
|
|
55
62
|
@abstractmethod
|
56
63
|
def fit(self, X, y):
|
@@ -61,7 +68,7 @@ class OIKAN(ABC):
|
|
61
68
|
pass
|
62
69
|
|
63
70
|
def get_formula(self):
|
64
|
-
"""Returns the symbolic formula(s) as a string or list of strings."""
|
71
|
+
"""Returns the symbolic formula(s) as a string (regression) or list of strings (classification)."""
|
65
72
|
if self.symbolic_model is None:
|
66
73
|
raise ValueError("Model not fitted yet.")
|
67
74
|
basis_functions = self.symbolic_model['basis_functions']
|
@@ -172,10 +179,53 @@ class OIKAN(ABC):
|
|
172
179
|
if 'classes' in model_data:
|
173
180
|
self.classes_ = np.array(model_data['classes'])
|
174
181
|
|
182
|
+
def _evaluate_neural_net(self, X, y, output_size, loss_fn):
|
183
|
+
"""Evaluates neural network performance on train-test split."""
|
184
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
185
|
+
|
186
|
+
input_size = X.shape[1]
|
187
|
+
self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
|
188
|
+
optimizer = optim.Adam(self.neural_net.parameters(), lr=self.lr)
|
189
|
+
|
190
|
+
# Train on the training set
|
191
|
+
self._train_neural_net(X_train, y_train, output_size, loss_fn)
|
192
|
+
|
193
|
+
# Evaluate on test set
|
194
|
+
self.neural_net.eval()
|
195
|
+
with torch.no_grad():
|
196
|
+
y_pred = self.neural_net(torch.tensor(X_test, dtype=torch.float32))
|
197
|
+
if output_size == 1: # Regression
|
198
|
+
y_pred = y_pred.numpy()
|
199
|
+
score = r2_score(y_test, y_pred)
|
200
|
+
metric_name = "R² Score"
|
201
|
+
else: # Classification
|
202
|
+
y_pred = torch.argmax(y_pred, dim=1).numpy()
|
203
|
+
y_test = torch.argmax(y_test, dim=1).numpy()
|
204
|
+
score = accuracy_score(y_test, y_pred)
|
205
|
+
metric_name = "Accuracy"
|
206
|
+
|
207
|
+
print(f"\nNeural Network Evaluation:")
|
208
|
+
print(f"Train size: {len(X_train)}, Test size: {len(X_test)}")
|
209
|
+
print(f"{metric_name}: {score:.4f}")
|
210
|
+
|
211
|
+
# Ask user for confirmation
|
212
|
+
response = input("\nProceed with full training and symbolic regression? [Y/n]: ").lower()
|
213
|
+
if response not in ['y', 'yes']:
|
214
|
+
sys.exit("Training cancelled by user.")
|
215
|
+
|
216
|
+
# Retrain on full dataset
|
217
|
+
self._train_neural_net(X, y, output_size, loss_fn)
|
218
|
+
|
175
219
|
def _train_neural_net(self, X, y, output_size, loss_fn):
|
176
220
|
"""Trains the neural network on the input data."""
|
221
|
+
if self.evaluate_nn and not self.evaluation_done:
|
222
|
+
self.evaluation_done = True
|
223
|
+
self._evaluate_neural_net(X, y, output_size, loss_fn)
|
224
|
+
return
|
225
|
+
|
177
226
|
input_size = X.shape[1]
|
178
|
-
self.neural_net
|
227
|
+
if self.neural_net is None:
|
228
|
+
self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
|
179
229
|
optimizer = optim.Adam(self.neural_net.parameters(), lr=self.lr)
|
180
230
|
dataset = torch.utils.data.TensorDataset(torch.tensor(X, dtype=torch.float32),
|
181
231
|
torch.tensor(y, dtype=torch.float32))
|
@@ -263,10 +313,14 @@ class OIKANRegressor(OIKAN):
|
|
263
313
|
X = np.asarray(X)
|
264
314
|
y = np.asarray(y).reshape(-1, 1)
|
265
315
|
self._train_neural_net(X, y, output_size=1, loss_fn=nn.MSELoss())
|
316
|
+
if self.verbose:
|
317
|
+
print(f"Original data: features shape: {X.shape} | target shape: {y.shape}")
|
266
318
|
X_aug = self._generate_augmented_data(X)
|
267
319
|
self.neural_net.eval()
|
268
320
|
with torch.no_grad():
|
269
321
|
y_aug = self.neural_net(torch.tensor(X_aug, dtype=torch.float32)).detach().numpy()
|
322
|
+
if self.verbose:
|
323
|
+
print(f"Augmented data: features shape: {X_aug.shape} | target shape: {y_aug.shape}")
|
270
324
|
self._perform_symbolic_regression(X_aug, y_aug)
|
271
325
|
|
272
326
|
def predict(self, X):
|
@@ -311,10 +365,14 @@ class OIKANClassifier(OIKAN):
|
|
311
365
|
n_classes = len(self.classes_)
|
312
366
|
y_onehot = nn.functional.one_hot(torch.tensor(y_encoded), num_classes=n_classes).float()
|
313
367
|
self._train_neural_net(X, y_onehot, output_size=n_classes, loss_fn=nn.CrossEntropyLoss())
|
368
|
+
if self.verbose:
|
369
|
+
print(f"Original data: features shape: {X.shape} | target shape: {y.shape}")
|
314
370
|
X_aug = self._generate_augmented_data(X)
|
315
371
|
self.neural_net.eval()
|
316
372
|
with torch.no_grad():
|
317
373
|
logits_aug = self.neural_net(torch.tensor(X_aug, dtype=torch.float32)).detach().numpy()
|
374
|
+
if self.verbose:
|
375
|
+
print(f"Augmented data: features shape: {X_aug.shape} | target shape: {logits_aug.shape}")
|
318
376
|
self._perform_symbolic_regression(X_aug, logits_aug)
|
319
377
|
|
320
378
|
def predict(self, X):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: oikan
|
3
|
-
Version: 0.0.3.
|
3
|
+
Version: 0.0.3.2
|
4
4
|
Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
|
5
5
|
Author: Arman Zhalgasbayev
|
6
6
|
License: MIT
|
@@ -202,7 +202,7 @@ loaded_model.load("outputs/model.json")
|
|
202
202
|
|
203
203
|
### Architecture Diagram
|
204
204
|
|
205
|
-
|
205
|
+
-architecture-oop.png)
|
206
206
|
|
207
207
|
## Contributing
|
208
208
|
|
@@ -222,7 +222,7 @@ If you use OIKAN in your research, please cite:
|
|
222
222
|
|
223
223
|
```bibtex
|
224
224
|
@software{oikan2025,
|
225
|
-
title = {OIKAN:
|
225
|
+
title = {OIKAN: Neuro-Symbolic ML for Scientific Discovery},
|
226
226
|
author = {Zhalgasbayev, Arman},
|
227
227
|
year = {2025},
|
228
228
|
url = {https://github.com/silvermete0r/OIKAN}
|
oikan-0.0.3.1/oikan/symbolic.py
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
from sklearn.preprocessing import PolynomialFeatures
|
3
|
-
from sklearn.linear_model import Lasso
|
4
|
-
|
5
|
-
def symbolic_regression(X, y, degree=2, alpha=0.1):
|
6
|
-
"""
|
7
|
-
Performs symbolic regression on the input data.
|
8
|
-
|
9
|
-
Parameters:
|
10
|
-
-----------
|
11
|
-
X : array-like of shape (n_samples, n_features)
|
12
|
-
Input data.
|
13
|
-
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
14
|
-
Target values.
|
15
|
-
degree : int, optional (default=2)
|
16
|
-
Maximum polynomial degree.
|
17
|
-
alpha : float, optional (default=0.1)
|
18
|
-
L1 regularization strength.
|
19
|
-
|
20
|
-
Returns:
|
21
|
-
--------
|
22
|
-
dict : Contains 'basis_functions', 'coefficients' (or 'coefficients_list'), 'n_features', 'degree'
|
23
|
-
"""
|
24
|
-
poly = PolynomialFeatures(degree=degree, include_bias=True)
|
25
|
-
X_poly = poly.fit_transform(X)
|
26
|
-
model = Lasso(alpha=alpha, fit_intercept=False)
|
27
|
-
model.fit(X_poly, y)
|
28
|
-
if len(y.shape) == 1 or y.shape[1] == 1:
|
29
|
-
coef = model.coef_.flatten()
|
30
|
-
selected_indices = np.where(np.abs(coef) > 1e-6)[0]
|
31
|
-
return {
|
32
|
-
'n_features': X.shape[1],
|
33
|
-
'degree': degree,
|
34
|
-
'basis_functions': poly.get_feature_names_out()[selected_indices].tolist(),
|
35
|
-
'coefficients': coef[selected_indices].tolist()
|
36
|
-
}
|
37
|
-
else:
|
38
|
-
coefficients_list = []
|
39
|
-
selected_indices = set()
|
40
|
-
for c in range(y.shape[1]):
|
41
|
-
coef = model.coef_[c]
|
42
|
-
indices = np.where(np.abs(coef) > 1e-6)[0]
|
43
|
-
selected_indices.update(indices)
|
44
|
-
selected_indices = list(selected_indices)
|
45
|
-
basis_functions = poly.get_feature_names_out()[selected_indices].tolist()
|
46
|
-
for c in range(y.shape[1]):
|
47
|
-
coef = model.coef_[c]
|
48
|
-
coef_selected = coef[selected_indices].tolist()
|
49
|
-
coefficients_list.append(coef_selected)
|
50
|
-
return {
|
51
|
-
'n_features': X.shape[1],
|
52
|
-
'degree': degree,
|
53
|
-
'basis_functions': basis_functions,
|
54
|
-
'coefficients_list': coefficients_list
|
55
|
-
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|