PyPI - oikan - Versions diffs - 0.0.2.5__py3-none-any.whl → 0.0.3.2__py3-none-any.whl - Mend

oikan 0.0.2.5py3-none-any.whl → 0.0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

oikan/__init__.py +14 -0
oikan/exceptions.py +5 -13
oikan/model.py +359 -441
oikan/neural.py +43 -0
oikan/utils.py +59 -49
oikan-0.0.3.2.dist-info/METADATA +233 -0
oikan-0.0.3.2.dist-info/RECORD +10 -0
{oikan-0.0.2.5.dist-info → oikan-0.0.3.2.dist-info}/WHEEL +1 -1
oikan-0.0.2.5.dist-info/METADATA +0 -195
oikan-0.0.2.5.dist-info/RECORD +0 -9
{oikan-0.0.2.5.dist-info → oikan-0.0.3.2.dist-info}/licenses/LICENSE +0 -0
{oikan-0.0.2.5.dist-info → oikan-0.0.3.2.dist-info}/top_level.txt +0 -0

oikan/model.py CHANGED Viewed

@@ -1,481 +1,399 @@
+import numpy as np
 import torch
 import torch.nn as nn
-import numpy as np
-from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
-from .utils import ADVANCED_LIB, EdgeActivation
-from .exceptions import *
-from datetime import datetime as dt
-class SymbolicEdge(nn.Module):
-    """Edge-based activation function learner"""
-    def __init__(self):
-        super().__init__()
-        self.activation = EdgeActivation()
-    def forward(self, x):
-        return self.activation(x)
-    def get_symbolic_repr(self, threshold=1e-4):
-        return self.activation.get_symbolic_repr(threshold)
+import torch.optim as optim
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.linear_model import Lasso
+from abc import ABC, abstractmethod
+import json
+from .neural import TabularNet
+from .utils import evaluate_basis_functions, get_features_involved
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import r2_score, accuracy_score
+import sys
-class KANLayer(nn.Module):
-    """Kolmogorov-Arnold Network layer with interpretable edges"""
-    def __init__(self, input_dim, output_dim):
-        super().__init__()
-        self.input_dim = input_dim
-        self.output_dim = output_dim
-        self.edges = nn.ModuleList([
-            nn.ModuleList([SymbolicEdge() for _ in range(output_dim)])
-            for _ in range(input_dim)
-        ])
-        # Updated initialization using Xavier uniform initialization
-        self.combination_weights = nn.Parameter(
-            nn.init.xavier_uniform_(torch.empty(input_dim, output_dim))
-        )
-    def forward(self, x):
-        x_split = x.split(1, dim=1)  # list of (batch, 1) tensors for each input feature
-        edge_outputs = torch.stack([
-            torch.stack([edge(x_i).squeeze() for edge in edge_list], dim=1)
-            for x_i, edge_list in zip(x_split, self.edges)
-        ], dim=1)  # shape: (batch, input_dim, output_dim)
-        combined = edge_outputs * self.combination_weights.unsqueeze(0)
-        return combined.sum(dim=1)
+class OIKAN(ABC):
+    """
+    Base class for the OIKAN neuro-symbolic framework.
-    def get_symbolic_formula(self):
-        """Extract interpretable formulas for each output"""
-        formulas = []
-        for j in range(self.output_dim):
-            terms = []
-            for i in range(self.input_dim):
-                weight = self.combination_weights[i, j].item()
-                if abs(weight) > 1e-4:
-                    # Pass lower threshold for improved precision
-                    edge_formula = self.edges[i][j].get_symbolic_repr(threshold=1e-6)
-                    if edge_formula != "0":
-                        terms.append(f"({weight:.4f} * ({edge_formula}))")
-            formulas.append(" + ".join(terms) if terms else "0")
-        return formulas
+    Parameters:
+    -----------
+    hidden_sizes : list, optional (default=[64, 64])
+        List of hidden layer sizes for the neural network.
+    activation : str, optional (default='relu')
+        Activation function for the neural network ('relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu').
+    augmentation_factor : int, optional (default=10)
+        Number of augmented samples per original sample.
+    polynomial_degree : int, optional (default=2)
+        Maximum degree of polynomial features for symbolic regression.
+    alpha : float, optional (default=0.1)
+        L1 regularization strength for Lasso in symbolic regression.
+    sigma : float, optional (default=0.1)
+        Standard deviation of Gaussian noise for data augmentation.
+    epochs : int, optional (default=100)
+        Number of epochs for neural network training.
+    lr : float, optional (default=0.001)
+        Learning rate for neural network optimization.
+    batch_size : int, optional (default=32)
+        Batch size for neural network training.
+    verbose : bool, optional (default=False)
+        Whether to display training progress.
+    evaluate_nn : bool, optional (default=False)
+        Whether to evaluate neural network performance before full training.
+    """
+    def __init__(self, hidden_sizes=[64, 64], activation='relu', augmentation_factor=10,
+                 polynomial_degree=2, alpha=0.1, sigma=0.1, epochs=100, lr=0.001, batch_size=32,
+                 verbose=False, evaluate_nn=False):
+        self.hidden_sizes = hidden_sizes
+        self.activation = activation
+        self.augmentation_factor = augmentation_factor
+        self.polynomial_degree = polynomial_degree
+        self.alpha = alpha
+        self.sigma = sigma
+        self.epochs = epochs
+        self.lr = lr
+        self.batch_size = batch_size
+        self.verbose = verbose
+        self.evaluate_nn = evaluate_nn
+        self.neural_net = None
+        self.symbolic_model = None
+        self.evaluation_done = False
-class BaseOIKAN(BaseEstimator):
-    """Base OIKAN model implementing common functionality"""
-    def __init__(self, hidden_dims=[32, 16], dropout=0.1):
-        self.hidden_dims = hidden_dims
-        self.dropout = dropout  # Dropout probability for uncertainty quantification
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Auto device chooser
-        self.model = None
-        self._is_fitted = False
-        self.__name = "OIKAN v0.0.2"  # Manual configured version
-        self.loss_history = []  # <-- new attribute to store loss values
-    def _build_network(self, input_dim, output_dim):
-        layers = []
-        prev_dim = input_dim
-        for hidden_dim in self.hidden_dims:
-            layers.append(KANLayer(prev_dim, hidden_dim))
-            layers.append(nn.BatchNorm1d(hidden_dim))  # Added batch normalization
-            layers.append(nn.ReLU())                  # Added activation function
-            layers.append(nn.Dropout(self.dropout))   # Apply dropout for uncertainty quantification
-            prev_dim = hidden_dim
-        layers.append(KANLayer(prev_dim, output_dim))
-        return nn.Sequential(*layers).to(self.device)
-    def _validate_data(self, X, y=None):
-        if not isinstance(X, torch.Tensor):
-            X = torch.FloatTensor(X)
-        if y is not None and not isinstance(y, torch.Tensor):
-            y = torch.FloatTensor(y)
-        return X.to(self.device), (y.to(self.device) if y is not None else None)
+    @abstractmethod
+    def fit(self, X, y):
+        pass
-    def _process_edge_formula(self, edge_formula, weight):
-        """Helper to scale symbolic formula terms by a given weight"""
-        terms = []
-        for term in edge_formula.split(" + "):
-            if term and term != "0":
-                if "*" in term:
-                    coef_str, rest = term.split("*", 1)
-                    try:
-                        coef = float(coef_str)
-                        terms.append(f"{(coef * weight):.4f}*{rest}")
-                    except Exception:
-                        terms.append(term)  # fallback
-                else:
-                    try:
-                        terms.append(f"{(float(term) * weight):.4f}")
-                    except Exception:
-                        terms.append(term)
-        return " + ".join(terms) if terms else "0"
+    @abstractmethod
+    def predict(self, X):
+        pass
-    def get_symbolic_formula(self):
-        """Generate and cache symbolic formulas for production‐ready inference."""
-        if not self._is_fitted:
-            raise NotFittedError("Model must be fitted before extracting formulas")
-        if hasattr(self, "symbolic_formula"):
-            return self.symbolic_formula
-        if hasattr(self, 'classes_'):  # Classifier
-            n_features = self.model[0].input_dim
-            n_classes = len(self.classes_)
-            formulas = [[None for _ in range(n_classes)] for _ in range(n_features)]
-            first_layer = self.model[0]
-            for i in range(n_features):
-                for j in range(n_classes):
-                    weight = first_layer.combination_weights[i, j].item()
-                    if abs(weight) > 1e-4:
-                        # Use improved threshold for formula extraction
-                        edge_formula = first_layer.edges[i][j].get_symbolic_repr(threshold=1e-6)
-                        formulas[i][j] = self._process_edge_formula(edge_formula, weight)
-                    else:
-                        formulas[i][j] = "0"
-            self.symbolic_formula = formulas
-            return formulas
-        else:  # Regressor
+    def get_formula(self):
+        """Returns the symbolic formula(s) as a string (regression) or list of strings (classification)."""
+        if self.symbolic_model is None:
+            raise ValueError("Model not fitted yet.")
+        basis_functions = self.symbolic_model['basis_functions']
+        if 'coefficients' in self.symbolic_model:
+            coefficients = self.symbolic_model['coefficients']
+            formula = " + ".join([f"{coefficients[i]:.3f}*{basis_functions[i]}"
+                                for i in range(len(coefficients)) if coefficients[i] != 0])
+            return formula if formula else "0"
+        else:
             formulas = []
-            first_layer = self.model[0]
-            for i in range(first_layer.input_dim):
-                # Use improved threshold for formula extraction in regressor branch
-                edge_formula = first_layer.edges[i][0].get_symbolic_repr(threshold=1e-6)
-                formulas.append(self._process_edge_formula(edge_formula, 1.0))
-            self.symbolic_formula = formulas
+            for c, coef in enumerate(self.symbolic_model['coefficients_list']):
+                formula = " + ".join([f"{coef[i]:.3f}*{basis_functions[i]}"
+                                    for i in range(len(coef)) if coef[i] != 0])
+                formulas.append(f"Class {self.classes_[c]}: {formula if formula else '0'}")
             return formulas
-    def save_symbolic_formula(self, filename="outputs/symbolic_formula.txt"):
-        """Save the cached symbolic formulas to file for production use.
+    def feature_importances(self):
+        """
+        Computes the importance of each original feature based on the symbolic model.
-        The file will contain:
-        - A header with the version and timestamp
-        - The symbolic formulas for each feature (and class for classification)
-        - A general formula, including softmax for classification
-        - Recommendations and performance results.
+        Returns:
+        --------
+        numpy.ndarray : Normalized feature importances.
         """
-        header = f"Generated by {self.__name} | Timestamp: {dt.now()}\n\n"
-        header += "Symbolic Formulas:\n"
-        header += "====================\n"
-        formulas = self.get_symbolic_formula()
-        formulas_text = ""
-        if hasattr(self, 'classes_'):
-            # For classifiers: formulas is a 2D list [feature][class]
-            for i, feature in enumerate(formulas):
-                for j, form in enumerate(feature):
-                    formulas_text += f"Feature {i} - Class {j}: {form}\n"
-            general = ("\nGeneral Formula (with softmax):\n"
-                       "For each class j: y_j = softmax( sum_i [ symbolic_formula(feature_i, class_j) ] )\n")
-            recs = ("\nRecommendations:\n"
-                    "• Use the symbolic formulas for streamlined inference in production.\n"
-                    "• Verify predictions with both the neural network and the compiled symbolic predictor.\n")
+        if self.symbolic_model is None:
+            raise ValueError("Model not fitted yet.")
+        basis_functions = self.symbolic_model['basis_functions']
+        n_features = self.symbolic_model['n_features']
+        importances = np.zeros(n_features)
+        # Handle regression case
+        if 'coefficients' in self.symbolic_model:
+            coefficients = self.symbolic_model['coefficients']
+            for i, func in enumerate(basis_functions):
+                if coefficients[i] != 0:
+                    features_involved = get_features_involved(func)
+                    for idx in features_involved:
+                        importances[idx] += np.abs(coefficients[i])
+        # Handle classification case with multiple coefficient sets
         else:
-            # For regressors: formulas is a list
-            for i, form in enumerate(formulas):
-                formulas_text += f"Feature {i}: {form}\n"
-            general = ("\nGeneral Formula:\n"
-                       "y = sum_i [ symbolic_formula(feature_i) ]\n")
-            recs = ("\nRecommendations:\n"
-                    "• Consider the symbolic formula for lightweight and interpretable inference.\n"
-                    "• Validate approximation accuracy against the neural model.\n")
-        # Disclaimer regarding experimental usage
-        disclaimer = ("\nDisclaimer:\n"
-                      "This experimental model is intended for research purposes only and is not production-ready. "
-                      "Feel free to fork and build your own project based on this research: "
-                      "https://github.com/silvermete0r/oikan\n")
+            for coef in self.symbolic_model['coefficients_list']:
+                for i, func in enumerate(basis_functions):
+                    if coef[i] != 0:
+                        features_involved = get_features_involved(func)
+                        for idx in features_involved:
+                            importances[idx] += np.abs(coef[i])
-        output = header + formulas_text + general + recs + disclaimer
-        with open(filename, "w") as f:
-            f.write(output)
-        print(f"Symbolic formulas saved to {filename}")
+        total = importances.sum()
+        return importances / total if total > 0 else importances
-    def get_feature_scores(self):
-        """Get feature importance scores based on edge weights."""
-        if not self._is_fitted:
-            raise NotFittedError("Model must be fitted before computing scores")
+    def save(self, path):
+        """
+        Saves the symbolic model to a .json file.
-        weights = self.model[0].combination_weights.detach().cpu().numpy()
-        return np.mean(np.abs(weights), axis=1)
-    def _eval_formula(self, formula, x):
-        """Helper to evaluate a symbolic formula for an input vector x using ADVANCED_LIB basis functions."""
-        import re
-        from .utils import ensure_tensor
+        Parameters:
+        -----------
+        path : str
+            File path to save the model. Should end with .json
+        """
+        if self.symbolic_model is None:
+            raise ValueError("Model not fitted yet.")
+        if not path.endswith('.json'):
+            path = path + '.json'
+        # Convert numpy arrays and other non-serializable types to lists
+        model_data = {
+            'n_features': self.symbolic_model['n_features'],
+            'degree': self.symbolic_model['degree'],
+            'basis_functions': self.symbolic_model['basis_functions']
+        }
-        if isinstance(x, (list, tuple)):
-            x = np.array(x)
+        if 'coefficients' in self.symbolic_model:
+            model_data['coefficients'] = self.symbolic_model['coefficients']
+        else:
+            model_data['coefficients_list'] = [coef for coef in self.symbolic_model['coefficients_list']]
+            if hasattr(self, 'classes_'):
+                model_data['classes'] = self.classes_.tolist()
-        total = torch.zeros_like(ensure_tensor(x))
-        pattern = re.compile(r"(-?\d+\.\d+)\*?([\w\(\)\^]+)")
-        matches = pattern.findall(formula)
+        with open(path, 'w') as f:
+            json.dump(model_data, f, indent=2)
+    def load(self, path):
+        """
+        Loads the symbolic model from a .json file.
-        for coef_str, func_name in matches:
-            try:
-                coef = float(coef_str)
-                for key, (notation, func) in ADVANCED_LIB.items():
-                    if notation.strip() == func_name.strip():
-                        result = func(x)
-                        if isinstance(result, torch.Tensor):
-                            total += coef * result
-                        else:
-                            total += coef * ensure_tensor(result)
-                        break
-            except Exception as e:
-                print(f"Warning: Error evaluating term {coef_str}*{func_name}: {str(e)}")
-                continue
+        Parameters:
+        -----------
+        path : str
+            File path to load the model from. Should end with .json
+        """
+        if not path.endswith('.json'):
+            path = path + '.json'
+        with open(path, 'r') as f:
+            model_data = json.load(f)
+        self.symbolic_model = {
+            'n_features': model_data['n_features'],
+            'degree': model_data['degree'],
+            'basis_functions': model_data['basis_functions']
+        }
-        return total.cpu().numpy() if isinstance(total, torch.Tensor) else total
+        if 'coefficients' in model_data:
+            self.symbolic_model['coefficients'] = model_data['coefficients']
+        else:
+            self.symbolic_model['coefficients_list'] = model_data['coefficients_list']
+            if 'classes' in model_data:
+                self.classes_ = np.array(model_data['classes'])
-    def symbolic_predict(self, X):
-        """Predict using only the extracted symbolic formula (regressor)."""
-        if not self._is_fitted:
-            raise NotFittedError("Model must be fitted before prediction")
+    def _evaluate_neural_net(self, X, y, output_size, loss_fn):
+        """Evaluates neural network performance on train-test split."""
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        X = np.array(X) if not isinstance(X, np.ndarray) else X
-        formulas = self.get_symbolic_formula()
-        predictions = np.zeros((X.shape[0], 1))
+        input_size = X.shape[1]
+        self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
+        optimizer = optim.Adam(self.neural_net.parameters(), lr=self.lr)
-        try:
-            for i, formula in enumerate(formulas):
-                x = X[:, i]
-                pred = self._eval_formula(formula, x)
-                if isinstance(pred, torch.Tensor):
-                    pred = pred.cpu().numpy()
-                predictions[:, 0] += pred
-        except Exception as e:
-            raise RuntimeError(f"Error in symbolic prediction: {str(e)}")
-        return predictions
+        # Train on the training set
+        self._train_neural_net(X_train, y_train, output_size, loss_fn)
+        # Evaluate on test set
+        self.neural_net.eval()
+        with torch.no_grad():
+            y_pred = self.neural_net(torch.tensor(X_test, dtype=torch.float32))
+            if output_size == 1:  # Regression
+                y_pred = y_pred.numpy()
+                score = r2_score(y_test, y_pred)
+                metric_name = "R² Score"
+            else:  # Classification
+                y_pred = torch.argmax(y_pred, dim=1).numpy()
+                y_test = torch.argmax(y_test, dim=1).numpy()
+                score = accuracy_score(y_test, y_pred)
+                metric_name = "Accuracy"
+        print(f"\nNeural Network Evaluation:")
+        print(f"Train size: {len(X_train)}, Test size: {len(X_test)}")
+        print(f"{metric_name}: {score:.4f}")
+        # Ask user for confirmation
+        response = input("\nProceed with full training and symbolic regression? [Y/n]: ").lower()
+        if response not in ['y', 'yes']:
+            sys.exit("Training cancelled by user.")
-    def compile_symbolic_formula(self, filename="output/final_symbolic_formula.txt"):
-        import re
-        from .utils import ADVANCED_LIB  # needed to retrieve basis functions
-        with open(filename, "r") as f:
-            content = f.read()
-        # Regex to extract coefficient and function notation.
-        # Matches patterns like: "(-?\d+\.\d+)\*?([\w\(\)\^]+)"
-        matches = re.findall(r"(-?\d+\.\d+)\*?([\w\(\)\^]+)", content)
-        compiled_terms = []
-        for coef_str, func_name in matches:
-            try:
-                coef = float(coef_str)
-                # Search for a matching basis function in ADVANCED_LIB (e.g. 'x', 'x^2', etc.)
-                for key, (notation, func) in ADVANCED_LIB.items():
-                    if notation.strip() == func_name.strip():
-                        compiled_terms.append((coef, func))
-                        break
-            except Exception:
-                continue
-        def prediction_function(x):
-            pred = 0
-            for coef, func in compiled_terms:
-                pred += coef * func(x)
-            return pred
-        return prediction_function
+        # Retrain on full dataset
+        self._train_neural_net(X, y, output_size, loss_fn)
-    def save_model(self, filepath="models/oikan_model.pth"):
-        """Save the current model's state dictionary and extra attributes to a file."""
-        if self.model is None:
-            raise NotFittedError("No model to save. Build and train a model first.")
-        save_dict = {'state_dict': self.model.state_dict()}
-        if hasattr(self, "classes_"):
-            # Save classes_ as a list so that it can be reloaded.
-            save_dict['classes_'] = self.classes_.tolist()
-        torch.save(save_dict, filepath)
-        print(f"Model saved to {filepath}")
-    def load_model(self, filepath="models/oikan_model.pth", input_dim=None, output_dim=None):
-        """Load the model's state dictionary and extra attributes from a file.
-        If the model architecture does not exist, it is automatically rebuilt using provided
-        input_dim and output_dim.
-        """
-        if self.model is None:
-            if input_dim is None or output_dim is None:
-                raise NotFittedError("No model architecture available. Provide input_dim and output_dim to rebuild the model.")
-            self.model = self._build_network(input_dim, output_dim)
-        loaded = torch.load(filepath, map_location=self.device)
-        if isinstance(loaded, dict) and 'state_dict' in loaded:
-            self.model.load_state_dict(loaded['state_dict'])
-            if 'classes_' in loaded:
-                self.classes_ = torch.tensor(loaded['classes_'])
+    def _train_neural_net(self, X, y, output_size, loss_fn):
+        """Trains the neural network on the input data."""
+        if self.evaluate_nn and not self.evaluation_done:
+            self.evaluation_done = True
+            self._evaluate_neural_net(X, y, output_size, loss_fn)
+            return
+        input_size = X.shape[1]
+        if self.neural_net is None:
+            self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
+        optimizer = optim.Adam(self.neural_net.parameters(), lr=self.lr)
+        dataset = torch.utils.data.TensorDataset(torch.tensor(X, dtype=torch.float32),
+                                               torch.tensor(y, dtype=torch.float32))
+        loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
+        self.neural_net.train()
+        if self.verbose:
+            from tqdm import tqdm
+            epoch_iterator = tqdm(range(self.epochs), desc="Training")
         else:
-            self.model.load_state_dict(loaded)
-        self._is_fitted = True   # Mark model as fitted after loading
-        print(f"Model loaded from {filepath}")
+            epoch_iterator = range(self.epochs)
-    def get_loss_history(self):
-        """Retrieve training loss history."""
-        return self.loss_history
+        for epoch in epoch_iterator:
+            total_loss = 0
+            for batch_X, batch_y in loader:
+                optimizer.zero_grad()
+                outputs = self.neural_net(batch_X)
+                loss = loss_fn(outputs, batch_y)
+                loss.backward()
+                optimizer.step()
+                total_loss += loss.item()
-class OIKANRegressor(BaseOIKAN, RegressorMixin):
-    """OIKAN implementation for regression tasks"""
-    def fit(self, X, y, epochs=100, lr=0.01, verbose=True):
-        X, y = self._validate_data(X, y)
-        if len(y.shape) == 1:
-            y = y.reshape(-1, 1)
-        if self.model is None:
-            self.model = self._build_network(X.shape[1], y.shape[1])
-        criterion = nn.MSELoss()
-        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-5)
-        self.model.train()
-        self.loss_history = []  # <-- reset loss history at start of training
-        for epoch in range(epochs):
-            optimizer.zero_grad()
-            y_pred = self.model(X)
-            loss = criterion(y_pred, y)
-            if torch.isnan(loss):
-                print("Warning: NaN loss detected, reinitializing model...")
-                self.model = None
-                return self.fit(X, y, epochs, lr/10, verbose)
-            loss.backward()
-            # Clip gradients
-            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
-            optimizer.step()
-            self.loss_history.append(loss.item())  # <-- save loss value for epoch
-            if verbose and (epoch + 1) % 10 == 0:
-                print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
-        self._is_fitted = True
-        return self
+            if self.verbose:
+                epoch_iterator.set_postfix({'loss': f'{total_loss/len(loader):.4f}'})
-    def predict(self, X):
-        if not self._is_fitted:
-            raise NotFittedError("Model must be fitted before prediction")
-        X = self._validate_data(X)[0]
-        self.model.eval()
-        with torch.no_grad():
-            return self.model(X).cpu().numpy()
+    def _generate_augmented_data(self, X):
+        """Generates augmented data by adding Gaussian noise."""
+        n_samples = X.shape[0]
+        X_aug = []
+        for _ in range(self.augmentation_factor):
+            noise = np.random.normal(0, self.sigma, X.shape)
+            X_perturbed = X + noise
+            X_aug.append(X_perturbed)
+        return np.vstack(X_aug)
-class OIKANClassifier(BaseOIKAN, ClassifierMixin):
-    """OIKAN implementation for classification tasks"""
-    def fit(self, X, y, epochs=100, lr=0.01, verbose=True):
-        X, y = self._validate_data(X, y)
-        self.classes_ = torch.unique(y)
-        n_classes = len(self.classes_)
-        if self.model is None:
-            self.model = self._build_network(X.shape[1], 1 if n_classes == 2 else n_classes)
-        criterion = (nn.BCEWithLogitsLoss() if n_classes == 2
-                    else nn.CrossEntropyLoss())
-        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
-        self.model.train()
-        self.loss_history = []  # <-- reset loss history at start of training
-        for epoch in range(epochs):
-            optimizer.zero_grad()
-            logits = self.model(X)
-            if n_classes == 2:
-                y_tensor = y.float()
-                logits = logits.squeeze()
-            else:
-                y_tensor = y.long()
-            loss = criterion(logits, y_tensor)
-            loss.backward()
-            optimizer.step()
-            self.loss_history.append(loss.item())  # <-- save loss value for epoch
-            if verbose and (epoch + 1) % 10 == 0:
-                print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
-        self._is_fitted = True
-        return self
+    def _perform_symbolic_regression(self, X, y):
+        """Performs symbolic regression using polynomial features and Lasso."""
+        poly = PolynomialFeatures(degree=self.polynomial_degree, include_bias=True)
+        X_poly = poly.fit_transform(X)
+        model = Lasso(alpha=self.alpha, fit_intercept=False)
+        model.fit(X_poly, y)
+        if len(y.shape) == 1 or y.shape[1] == 1:
+            coef = model.coef_.flatten()
+            selected_indices = np.where(np.abs(coef) > 1e-6)[0]
+            self.symbolic_model = {
+                'n_features': X.shape[1],
+                'degree': self.polynomial_degree,
+                'basis_functions': poly.get_feature_names_out()[selected_indices].tolist(),
+                'coefficients': coef[selected_indices].tolist()
+            }
+        else:
+            coefficients_list = []
+            # Note: Using the same basis functions across classes for simplicity
+            selected_indices = set()
+            for c in range(y.shape[1]):
+                coef = model.coef_[c]
+                indices = np.where(np.abs(coef) > 1e-6)[0]
+                selected_indices.update(indices)
+            selected_indices = list(selected_indices)
+            basis_functions = poly.get_feature_names_out()[selected_indices].tolist()
+            for c in range(y.shape[1]):
+                coef = model.coef_[c]
+                coef_selected = coef[selected_indices].tolist()
+                coefficients_list.append(coef_selected)
+            self.symbolic_model = {
+                'n_features': X.shape[1],
+                'degree': self.polynomial_degree,
+                'basis_functions': basis_functions,
+                'coefficients_list': coefficients_list
+            }
-    def predict_proba(self, X):
-        if not self._is_fitted:
-            raise NotFittedError("Model must be fitted before prediction")
-        X = self._validate_data(X)[0]
-        self.model.eval()
+class OIKANRegressor(OIKAN):
+    """OIKAN model for regression tasks."""
+    def fit(self, X, y):
+        """
+        Fits the regressor to the data.
+        Parameters:
+        -----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+        y : array-like of shape (n_samples,)
+            Target values.
+        """
+        X = np.asarray(X)
+        y = np.asarray(y).reshape(-1, 1)
+        self._train_neural_net(X, y, output_size=1, loss_fn=nn.MSELoss())
+        if self.verbose:
+            print(f"Original data: features shape: {X.shape} | target shape: {y.shape}")
+        X_aug = self._generate_augmented_data(X)
+        self.neural_net.eval()
         with torch.no_grad():
-            logits = self.model(X)
-            if len(self.classes_) == 2:
-                probs = torch.sigmoid(logits)
-                return np.column_stack([1 - probs.cpu().numpy(), probs.cpu().numpy()])
-            else:
-                return torch.softmax(logits, dim=1).cpu().numpy()
+            y_aug = self.neural_net(torch.tensor(X_aug, dtype=torch.float32)).detach().numpy()
+        if self.verbose:
+            print(f"Augmented data: features shape: {X_aug.shape} | target shape: {y_aug.shape}")
+        self._perform_symbolic_regression(X_aug, y_aug)
     def predict(self, X):
-        proba = self.predict_proba(X)
-        return self.classes_[np.argmax(proba, axis=1)]
-    def symbolic_predict_proba(self, X):
-        """Predict class probabilities using only the extracted symbolic formula."""
-        if not self._is_fitted:
-            raise NotFittedError("Model must be fitted before prediction")
-        if not isinstance(X, np.ndarray):
-            X = np.array(X)
-        # Scale input data similar to training
-        X_scaled = (X - X.mean(axis=0)) / (X.std(axis=0) + 1e-8)
-        formulas = self.get_symbolic_formula()
-        n_classes = len(self.classes_)
-        predictions = np.zeros((X.shape[0], n_classes))
-        # Evaluate each feature's contribution to each class
-        for i in range(X.shape[1]):  # For each feature
-            x = X_scaled[:, i]  # Use scaled data
-            for j in range(n_classes):  # For each class
-                formula = formulas[i][j]
-                if formula and formula != "0":
-                    predictions[:, j] += self._eval_formula(formula, x)
-        # Apply softmax with temperature for better separation
-        temperature = 1.0
-        exp_preds = np.exp(predictions / temperature)
-        probas = exp_preds / exp_preds.sum(axis=1, keepdims=True)
+        """
+        Predicts target values for the input data.
-        # Clip probabilities to avoid numerical issues
-        probas = np.clip(probas, 1e-7, 1.0)
-        probas = probas / probas.sum(axis=1, keepdims=True)
+        Parameters:
+        -----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
-        return probas
-    def get_symbolic_formula(self):
-        """Extract symbolic formulas for all features and outputs."""
-        if not self._is_fitted:
-            raise NotFittedError("Model must be fitted before extracting formulas")
+        Returns:
+        --------
+        y_pred : ndarray of shape (n_samples,)
+            Predicted values.
+        """
+        if self.symbolic_model is None:
+            raise ValueError("Model not fitted yet.")
+        X = np.asarray(X)
+        X_transformed = evaluate_basis_functions(X, self.symbolic_model['basis_functions'],
+                                               self.symbolic_model['n_features'])
+        return np.dot(X_transformed, self.symbolic_model['coefficients'])
+class OIKANClassifier(OIKAN):
+    """OIKAN model for classification tasks."""
+    def fit(self, X, y):
+        """
+        Fits the classifier to the data.
-        n_features = self.model[0].input_dim
+        Parameters:
+        -----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+        y : array-like of shape (n_samples,)
+            Target labels.
+        """
+        X = np.asarray(X)
+        from sklearn.preprocessing import LabelEncoder
+        le = LabelEncoder()
+        y_encoded = le.fit_transform(y)
+        self.classes_ = le.classes_
         n_classes = len(self.classes_)
-        formulas = [[[] for _ in range(n_classes)] for _ in range(n_features)]
+        y_onehot = nn.functional.one_hot(torch.tensor(y_encoded), num_classes=n_classes).float()
+        self._train_neural_net(X, y_onehot, output_size=n_classes, loss_fn=nn.CrossEntropyLoss())
+        if self.verbose:
+            print(f"Original data: features shape: {X.shape} | target shape: {y.shape}")
+        X_aug = self._generate_augmented_data(X)
+        self.neural_net.eval()
+        with torch.no_grad():
+            logits_aug = self.neural_net(torch.tensor(X_aug, dtype=torch.float32)).detach().numpy()
+        if self.verbose:
+            print(f"Augmented data: features shape: {X_aug.shape} | target shape: {logits_aug.shape}")
+        self._perform_symbolic_regression(X_aug, logits_aug)
+    def predict(self, X):
+        """
+        Predicts class labels for the input data.
-        first_layer = self.model[0]
-        for i in range(n_features):
-            for j in range(n_classes):
-                edge = first_layer.edges[i][j]
-                weight = first_layer.combination_weights[i, j].item()
-                if abs(weight) > 1e-4:
-                    # Improved precision by using a lower threshold
-                    edge_formula = edge.get_symbolic_repr(threshold=1e-6)
-                    terms = []
-                    for term in edge_formula.split(" + "):
-                        if term and term != "0":
-                            if "*" in term:
-                                coef, rest = term.split("*", 1)
-                                coef = float(coef) * weight
-                                terms.append(f"{coef:.4f}*{rest}")
-                            else:
-                                terms.append(f"{float(term) * weight:.4f}")
-                    formulas[i][j] = " + ".join(terms) if terms else "0"
-                else:
-                    formulas[i][j] = "0"
+        Parameters:
+        -----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
-        return formulas
-    def symbolic_predict(self, X):
-        """Predict classes using only the extracted symbolic formula."""
-        proba = self.symbolic_predict_proba(X)
-        return self.classes_[np.argmax(proba, axis=1)]
+        Returns:
+        --------
+        y_pred : ndarray of shape (n_samples,)
+            Predicted class labels.
+        """
+        if self.symbolic_model is None:
+            raise ValueError("Model not fitted yet.")
+        X = np.asarray(X)
+        X_transformed = evaluate_basis_functions(X, self.symbolic_model['basis_functions'],
+                                               self.symbolic_model['n_features'])
+        logits = np.dot(X_transformed, np.array(self.symbolic_model['coefficients_list']).T)
+        probabilities = nn.functional.softmax(torch.tensor(logits), dim=1).numpy()
+        return self.classes_[np.argmax(probabilities, axis=1)]

oikan 0.0.2.5__py3-none-any.whl → 0.0.3.2__py3-none-any.whl

oikan 0.0.2.5py3-none-any.whl → 0.0.3.2py3-none-any.whl