PyPI - oikan - Versions diffs - 0.0.3.2__py3-none-any.whl → 0.0.3.4__py3-none-any.whl - Mend

oikan 0.0.3.2py3-none-any.whl → 0.0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

oikan/exceptions.py +25 -1
oikan/model.py +194 -68
oikan/utils.py +208 -15
{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/METADATA +73 -12
oikan-0.0.3.4.dist-info/RECORD +10 -0
{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/WHEEL +1 -1
oikan-0.0.3.2.dist-info/RECORD +0 -10
{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/licenses/LICENSE +0 -0
{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/top_level.txt +0 -0

oikan/exceptions.py CHANGED Viewed

@@ -4,4 +4,28 @@ class OIKANError(Exception):
 class ModelNotFittedError(OIKANError):
     """Raised when a method requires a fitted model."""
-    pass
+    pass
+class InvalidParameterError(OIKANError):
+    """Raised when an invalid parameter value is provided."""
+    pass
+class DataDimensionError(OIKANError):
+    """Raised when input data has incorrect dimensions."""
+    pass
+class NumericalInstabilityError(OIKANError):
+    """Raised when numerical computations become unstable."""
+    pass
+class FeatureExtractionError(OIKANError):
+    """Raised when feature extraction or transformation fails."""
+    pass
+class ModelSerializationError(OIKANError):
+    """Raised when model saving/loading operations fail."""
+    pass
+class ConvergenceError(OIKANError):
+    """Raised when the model fails to converge during training."""
+    pass

oikan/model.py CHANGED Viewed

@@ -3,13 +3,14 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 from sklearn.preprocessing import PolynomialFeatures
-from sklearn.linear_model import Lasso
+from sklearn.linear_model import ElasticNet
 from abc import ABC, abstractmethod
 import json
 from .neural import TabularNet
-from .utils import evaluate_basis_functions, get_features_involved
+from .utils import evaluate_basis_functions, get_features_involved, sympify_formula, get_latex_formula
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import r2_score, accuracy_score
+from .exceptions import *
 import sys
 class OIKAN(ABC):
@@ -24,12 +25,12 @@ class OIKAN(ABC):
         Activation function for the neural network ('relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu').
     augmentation_factor : int, optional (default=10)
         Number of augmented samples per original sample.
-    polynomial_degree : int, optional (default=2)
-        Maximum degree of polynomial features for symbolic regression.
     alpha : float, optional (default=0.1)
         L1 regularization strength for Lasso in symbolic regression.
     sigma : float, optional (default=0.1)
         Standard deviation of Gaussian noise for data augmentation.
+    top_k : int, optional (default=5)
+        Number of top features to select in hierarchical symbolic regression.
     epochs : int, optional (default=100)
         Number of epochs for neural network training.
     lr : float, optional (default=0.001)
@@ -42,12 +43,30 @@ class OIKAN(ABC):
         Whether to evaluate neural network performance before full training.
     """
     def __init__(self, hidden_sizes=[64, 64], activation='relu', augmentation_factor=10,
-                 polynomial_degree=2, alpha=0.1, sigma=0.1, epochs=100, lr=0.001, batch_size=32,
-                 verbose=False, evaluate_nn=False):
+                 alpha=0.1, sigma=0.1, epochs=100, lr=0.001, batch_size=32,
+                 verbose=False, evaluate_nn=False, top_k=5):
+        if not isinstance(hidden_sizes, list) or not all(isinstance(x, int) and x > 0 for x in hidden_sizes):
+            raise InvalidParameterError("hidden_sizes must be a list of positive integers")
+        if activation not in ['relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu']:
+            raise InvalidParameterError(f"Unsupported activation function: {activation}")
+        if not isinstance(augmentation_factor, int) or augmentation_factor < 1:
+            raise InvalidParameterError("augmentation_factor must be a positive integer")
+        if not isinstance(top_k, int) or top_k < 1:
+            raise InvalidParameterError("top_k must be a positive integer")
+        if not 0 < lr < 1:
+            raise InvalidParameterError("Learning rate must be between 0 and 1")
+        if not isinstance(batch_size, int) or batch_size < 1:
+            raise InvalidParameterError("batch_size must be a positive integer")
+        if not isinstance(epochs, int) or epochs < 1:
+            raise InvalidParameterError("epochs must be a positive integer")
+        if not 0 <= alpha <= 1:
+            raise InvalidParameterError("alpha must be between 0 and 1")
+        if sigma <= 0:
+            raise InvalidParameterError("sigma must be positive")
         self.hidden_sizes = hidden_sizes
         self.activation = activation
         self.augmentation_factor = augmentation_factor
-        self.polynomial_degree = polynomial_degree
         self.alpha = alpha
         self.sigma = sigma
         self.epochs = epochs
@@ -55,6 +74,7 @@ class OIKAN(ABC):
         self.batch_size = batch_size
         self.verbose = verbose
         self.evaluate_nn = evaluate_nn
+        self.top_k = top_k
         self.neural_net = None
         self.symbolic_model = None
         self.evaluation_done = False
@@ -67,23 +87,53 @@ class OIKAN(ABC):
     def predict(self, X):
         pass
-    def get_formula(self):
-        """Returns the symbolic formula(s) as a string (regression) or list of strings (classification)."""
+    def get_formula(self, type='original'):
+        """
+        Returns the symbolic formula(s) as a string (regression) or list of strings (classification).
+        Parameter:
+        --------
+        type : str, optional (default='original') other options: 'sympied', 'latex'
+            'original' returns the original formula with coefficients, 'sympied' returns sympy simplified formula.
+        """
+        if type.lower() not in ['original', 'sympied', 'latex']:
+            raise InvalidParameterError("Invalid type. Choose 'original', 'sympied', 'latex'.")
         if self.symbolic_model is None:
             raise ValueError("Model not fitted yet.")
         basis_functions = self.symbolic_model['basis_functions']
-        if 'coefficients' in self.symbolic_model:
-            coefficients = self.symbolic_model['coefficients']
-            formula = " + ".join([f"{coefficients[i]:.3f}*{basis_functions[i]}"
-                                for i in range(len(coefficients)) if coefficients[i] != 0])
-            return formula if formula else "0"
+        if type.lower() == 'original':
+            if 'coefficients' in self.symbolic_model:
+                coefficients = self.symbolic_model['coefficients']
+                formula = " + ".join([f"{coefficients[i]:.6f}*{basis_functions[i]}"
+                                    for i in range(len(coefficients)) if coefficients[i] != 0])
+                return formula if formula else "0"
+            else:
+                formulas = []
+                for c, coef in enumerate(self.symbolic_model['coefficients_list']):
+                    formula = " + ".join([f"{coef[i]:.6f}*{basis_functions[i]}"
+                                        for i in range(len(coef)) if coef[i] != 0])
+                    formulas.append(f"Class {self.classes_[c]}: {formula if formula else '0'}")
+                return formulas
+        elif type.lower() == 'sympied':
+            if 'coefficients' in self.symbolic_model:
+                formula = sympify_formula(self.symbolic_model['basis_functions'], self.symbolic_model['coefficients'], self.symbolic_model['n_features'])
+                return formula
+            else:
+                formulas = []
+                for c, coef in enumerate(self.symbolic_model['coefficients_list']):
+                    formula = sympify_formula(self.symbolic_model['basis_functions'], coef, self.symbolic_model['n_features'])
+                    formulas.append(f"Class {self.classes_[c]}: {formula}")
+                return formulas
         else:
-            formulas = []
-            for c, coef in enumerate(self.symbolic_model['coefficients_list']):
-                formula = " + ".join([f"{coef[i]:.3f}*{basis_functions[i]}"
-                                    for i in range(len(coef)) if coef[i] != 0])
-                formulas.append(f"Class {self.classes_[c]}: {formula if formula else '0'}")
-            return formulas
+            if 'coefficients' in self.symbolic_model:
+                formula = get_latex_formula(self.symbolic_model['basis_functions'], self.symbolic_model['coefficients'], self.symbolic_model['n_features'])
+                return formula
+            else:
+                formulas = []
+                for c, coef in enumerate(self.symbolic_model['coefficients_list']):
+                    formula = get_latex_formula(self.symbolic_model['basis_functions'], coef, self.symbolic_model['n_features'])
+                    formulas.append(f"Class {self.classes_[c]}: {formula}")
+                return formulas
     def feature_importances(self):
         """
@@ -129,27 +179,32 @@ class OIKAN(ABC):
             File path to save the model. Should end with .json
         """
         if self.symbolic_model is None:
-            raise ValueError("Model not fitted yet.")
+            raise ModelNotFittedError("Model must be fitted before saving")
         if not path.endswith('.json'):
             path = path + '.json'
-        # Convert numpy arrays and other non-serializable types to lists
-        model_data = {
-            'n_features': self.symbolic_model['n_features'],
-            'degree': self.symbolic_model['degree'],
-            'basis_functions': self.symbolic_model['basis_functions']
-        }
-        if 'coefficients' in self.symbolic_model:
-            model_data['coefficients'] = self.symbolic_model['coefficients']
-        else:
-            model_data['coefficients_list'] = [coef for coef in self.symbolic_model['coefficients_list']]
-            if hasattr(self, 'classes_'):
-                model_data['classes'] = self.classes_.tolist()
+        try:
+            # Convert numpy arrays and other non-serializable types to lists
+            model_data = {
+                'n_features': self.symbolic_model['n_features'],
+                'basis_functions': self.symbolic_model['basis_functions']
+            }
+            if 'coefficients' in self.symbolic_model:
+                model_data['coefficients'] = self.symbolic_model['coefficients']
+            else:
+                model_data['coefficients_list'] = [coef for coef in self.symbolic_model['coefficients_list']]
+                if hasattr(self, 'classes_'):
+                    model_data['classes'] = self.classes_.tolist()
+            with open(path, 'w') as f:
+                json.dump(model_data, f, indent=2)
+        except Exception as e:
+            raise ModelSerializationError(f"Failed to save model: {str(e)}")
-        with open(path, 'w') as f:
-            json.dump(model_data, f, indent=2)
+        if self.verbose:
+            print(f"Model saved to {path}")
     def load(self, path):
         """
@@ -162,22 +217,27 @@ class OIKAN(ABC):
         """
         if not path.endswith('.json'):
             path = path + '.json'
+        try:
+            with open(path, 'r') as f:
+                model_data = json.load(f)
+            self.symbolic_model = {
+                'n_features': model_data['n_features'],
+                'basis_functions': model_data['basis_functions']
+            }
-        with open(path, 'r') as f:
-            model_data = json.load(f)
-        self.symbolic_model = {
-            'n_features': model_data['n_features'],
-            'degree': model_data['degree'],
-            'basis_functions': model_data['basis_functions']
-        }
+            if 'coefficients' in model_data:
+                self.symbolic_model['coefficients'] = model_data['coefficients']
+            else:
+                self.symbolic_model['coefficients_list'] = model_data['coefficients_list']
+                if 'classes' in model_data:
+                    self.classes_ = np.array(model_data['classes'])
+        except Exception as e:
+            raise ModelSerializationError(f"Failed to load model: {str(e)}")
-        if 'coefficients' in model_data:
-            self.symbolic_model['coefficients'] = model_data['coefficients']
-        else:
-            self.symbolic_model['coefficients_list'] = model_data['coefficients_list']
-            if 'classes' in model_data:
-                self.classes_ = np.array(model_data['classes'])
+        if self.verbose:
+            print(f"Model loaded from {path}")
     def _evaluate_neural_net(self, X, y, output_size, loss_fn):
         """Evaluates neural network performance on train-test split."""
@@ -185,7 +245,6 @@ class OIKAN(ABC):
         input_size = X.shape[1]
         self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
-        optimizer = optim.Adam(self.neural_net.parameters(), lr=self.lr)
         # Train on the training set
         self._train_neural_net(X_train, y_train, output_size, loss_fn)
@@ -253,7 +312,6 @@ class OIKAN(ABC):
     def _generate_augmented_data(self, X):
         """Generates augmented data by adding Gaussian noise."""
-        n_samples = X.shape[0]
         X_aug = []
         for _ in range(self.augmentation_factor):
             noise = np.random.normal(0, self.sigma, X.shape)
@@ -262,37 +320,105 @@ class OIKAN(ABC):
         return np.vstack(X_aug)
     def _perform_symbolic_regression(self, X, y):
-        """Performs symbolic regression using polynomial features and Lasso."""
-        poly = PolynomialFeatures(degree=self.polynomial_degree, include_bias=True)
-        X_poly = poly.fit_transform(X)
-        model = Lasso(alpha=self.alpha, fit_intercept=False)
-        model.fit(X_poly, y)
+        """
+        Performs hierarchical symbolic regression using a two-stage approach.
+        Parameters:
+        -----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
+        y : array-like of shape (n_samples,) or (n_samples, n_classes)
+            Target values or logits.
+        """
+        n_features = X.shape[1]
+        self.top_k = min(self.top_k, n_features)
+        if self.top_k < 1:
+            raise InvalidParameterError("top_k must be at least 1")
+        if np.any(np.isnan(X)) or np.any(np.isnan(y)):
+            raise NumericalInstabilityError("Input data contains NaN values")
+        if np.any(np.isinf(X)) or np.any(np.isinf(y)):
+            raise NumericalInstabilityError("Input data contains infinite values")
+        # Stage 1: Coarse Model
+        coarse_degree = 2  # Fixed low degree for coarse model
+        poly_coarse = PolynomialFeatures(degree=coarse_degree, include_bias=True)
+        X_poly_coarse = poly_coarse.fit_transform(X)
+        model_coarse = ElasticNet(alpha=self.alpha, fit_intercept=False)
+        model_coarse.fit(X_poly_coarse, y)
+        # Compute feature importances for original features
+        basis_functions_coarse = poly_coarse.get_feature_names_out()
+        if len(y.shape) == 1 or y.shape[1] == 1:
+            coef_coarse = model_coarse.coef_.flatten()
+        else:
+            coef_coarse = np.sum(np.abs(model_coarse.coef_), axis=0)
+        importances = np.zeros(X.shape[1])
+        for i, func in enumerate(basis_functions_coarse):
+            features_involved = get_features_involved(func)
+            for idx in features_involved:
+                importances[idx] += np.abs(coef_coarse[i])
+        if np.all(importances == 0):
+            raise FeatureExtractionError("Failed to compute feature importances - all values are zero")
+        # Select top K features
+        top_k_indices = np.argsort(importances)[::-1][:self.top_k]
+        # Stage 2: Refined Model
+        # ~ generate additional non-linear features for top K features
+        additional_features = []
+        additional_names = []
+        for i in top_k_indices:
+            # Higher-degree polynomial
+            additional_features.append(X[:, i]**3)
+            additional_names.append(f'x{i}^3')
+            # Non-linear transformations
+            additional_features.append(np.log1p(np.abs(X[:, i])))
+            additional_names.append(f'log1p_x{i}')
+            additional_features.append(np.exp(np.clip(X[:, i], -10, 10)))
+            additional_names.append(f'exp_x{i}')
+            additional_features.append(np.sin(X[:, i]))
+            additional_names.append(f'sin_x{i}')
+        # Combine features
+        X_additional = np.column_stack(additional_features)
+        X_refined = np.hstack([X_poly_coarse, X_additional])
+        basis_functions_refined = list(basis_functions_coarse) + additional_names
+        # Fit refined model
+        model_refined = ElasticNet(alpha=self.alpha, fit_intercept=False)
+        model_refined.fit(X_refined, y)
+        # Store symbolic model
         if len(y.shape) == 1 or y.shape[1] == 1:
-            coef = model.coef_.flatten()
-            selected_indices = np.where(np.abs(coef) > 1e-6)[0]
+            # Regression
+            coef_refined = model_refined.coef_.flatten()
+            selected_indices = np.where(np.abs(coef_refined) > 1e-6)[0]
             self.symbolic_model = {
                 'n_features': X.shape[1],
-                'degree': self.polynomial_degree,
-                'basis_functions': poly.get_feature_names_out()[selected_indices].tolist(),
-                'coefficients': coef[selected_indices].tolist()
+                'basis_functions': [basis_functions_refined[i] for i in selected_indices],
+                'coefficients': coef_refined[selected_indices].tolist()
             }
         else:
+            # Classification
             coefficients_list = []
-            # Note: Using the same basis functions across classes for simplicity
             selected_indices = set()
             for c in range(y.shape[1]):
-                coef = model.coef_[c]
+                coef = model_refined.coef_[c]
                 indices = np.where(np.abs(coef) > 1e-6)[0]
                 selected_indices.update(indices)
             selected_indices = list(selected_indices)
-            basis_functions = poly.get_feature_names_out()[selected_indices].tolist()
+            basis_functions = [basis_functions_refined[i] for i in selected_indices]
             for c in range(y.shape[1]):
-                coef = model.coef_[c]
+                coef = model_refined.coef_[c]
                 coef_selected = coef[selected_indices].tolist()
                 coefficients_list.append(coef_selected)
             self.symbolic_model = {
                 'n_features': X.shape[1],
-                'degree': self.polynomial_degree,
                 'basis_functions': basis_functions,
                 'coefficients_list': coefficients_list
             }

oikan/utils.py CHANGED Viewed

@@ -1,4 +1,7 @@
 import numpy as np
+import sympy as sp
+import json
+from functools import lru_cache
 def evaluate_basis_functions(X, basis_functions, n_features):
     """
@@ -9,7 +12,7 @@ def evaluate_basis_functions(X, basis_functions, n_features):
     X : array-like of shape (n_samples, n_features)
         Input data.
     basis_functions : list
-        List of basis function strings (e.g., '1', 'x0', 'x0^2', 'x0 x1').
+        List of basis function strings (e.g., '1', 'x0', 'x0^2', 'x0 x1', 'log1p_x0').
     n_features : int
         Number of input features.
@@ -22,15 +25,26 @@ def evaluate_basis_functions(X, basis_functions, n_features):
     for i, func in enumerate(basis_functions):
         if func == '1':
             X_transformed[:, i] = 1
+        elif func.startswith('log1p_x'):
+            idx = int(func.split('_')[1][1:])
+            X_transformed[:, i] = np.log1p(np.abs(X[:, idx]))
+        elif func.startswith('exp_x'):
+            idx = int(func.split('_')[1][1:])
+            X_transformed[:, i] = np.exp(np.clip(X[:, idx], -10, 10))
+        elif func.startswith('sin_x'):
+            idx = int(func.split('_')[1][1:])
+            X_transformed[:, i] = np.sin(X[:, idx])
         elif '^' in func:
             var, power = func.split('^')
             idx = int(var[1:])
             X_transformed[:, i] = X[:, idx] ** int(power)
         elif ' ' in func:
-            var1, var2 = func.split(' ')
-            idx1 = int(var1[1:])
-            idx2 = int(var2[1:])
-            X_transformed[:, i] = X[:, idx1] * X[:, idx2]
+            vars = func.split(' ')
+            result = np.ones(X.shape[0])
+            for var in vars:
+                idx = int(var[1:])
+                result *= X[:, idx]
+            X_transformed[:, i] = result
         else:
             idx = int(func[1:])
             X_transformed[:, i] = X[:, idx]
@@ -43,21 +57,200 @@ def get_features_involved(basis_function):
     Parameters:
     -----------
     basis_function : str
-        String representation of the basis function, e.g., 'x0', 'x0^2', 'x0 x1'.
+        String representation of the basis function, e.g., 'x0', 'x0^2', 'x0 x1', 'log1p_x0'.
     Returns:
     --------
     set : Set of feature indices involved.
     """
-    if basis_function == '1':  # Constant term involves no features
+    if basis_function == '1':
         return set()
     features = set()
-    for part in basis_function.split():  # Split by space for interaction terms
-        if part.startswith('x'):
-            if '^' in part:  # Handle powers, e.g., 'x0^2'
-                var = part.split('^')[0]  # Take 'x0'
-            else:
-                var = part  # Take 'x0' as is
-            idx = int(var[1:])  # Extract index, e.g., 0
+    if '_' in basis_function:  # Handle non-linear functions like 'log1p_x0'
+        parts = basis_function.split('_')
+        if len(parts) == 2 and parts[1].startswith('x'):
+            idx = int(parts[1][1:])
             features.add(idx)
-    return features
+    elif '^' in basis_function:  # Handle powers, e.g., 'x0^2'
+        var = basis_function.split('^')[0]
+        idx = int(var[1:])
+        features.add(idx)
+    elif ' ' in basis_function:  # Handle interactions, e.g., 'x0 x1'
+        for part in basis_function.split():
+            idx = int(part[1:])
+            features.add(idx)
+    elif basis_function.startswith('x'):
+        idx = int(basis_function[1:])
+        features.add(idx)
+    return features
+@lru_cache(maxsize=1000)
+def _cached_sympify_formula(basis_functions_tuple, coefficients_tuple, n_features, threshold):
+    """
+    Internal function to perform SymPy formula simplification with caching.
+    Parameters:
+    -----------
+    basis_functions_tuple : tuple
+        Tuple of basis function strings.
+    coefficients_tuple : tuple
+        Tuple of coefficients.
+    n_features : int
+        Number of input features.
+    threshold : float
+        Coefficients with absolute value below this are excluded.
+    Returns:
+    --------
+    str
+        Simplified formula as a string, or '0' if empty.
+    """
+    # Convert tuples back to lists
+    basis_functions = list(basis_functions_tuple)
+    coefficients = list(coefficients_tuple)
+    # Define symbolic variables
+    x = sp.symbols(f'x0:{n_features}')
+    expr = 0
+    # Build the expression
+    for coef, func in zip(coefficients, basis_functions):
+        if abs(coef) < threshold:
+            continue  # Skip negligible coefficients
+        if func == '1':
+            term = coef
+        elif func.startswith('log1p_x'):
+            idx = int(func.split('_')[1][1:])
+            term = coef * sp.log(1 + sp.Abs(x[idx]))
+        elif func.startswith('exp_x'):
+            idx = int(func.split('_')[1][1:])
+            term = coef * sp.exp(x[idx])
+        elif func.startswith('sin_x'):
+            idx = int(func.split('_')[1][1:])
+            term = coef * sp.sin(x[idx])
+        elif '^' in func:
+            var, power = func.split('^')
+            idx = int(var[1:])
+            term = coef * x[idx]**int(power)
+        elif ' ' in func:
+            vars = func.split(' ')
+            term = coef
+            for var in vars:
+                idx = int(var[1:])
+                term *= x[idx]
+        else:
+            idx = int(func[1:])
+            term = coef * x[idx]
+        expr += term
+    # Simplify the expression
+    simplified_expr = sp.simplify(expr)
+    # Convert to string with rounded coefficients
+    def format_term(term):
+        if term.is_Mul:
+            coeff = 1
+            factors = []
+            for factor in term.args:
+                if factor.is_Number:
+                    coeff *= float(factor)
+                else:
+                    factors.append(str(factor))
+            if abs(coeff) < threshold:
+                return None
+            return f"{coeff:.5f}*{'*'.join(factors)}" if factors else f"{coeff:.5f}"
+        elif term.is_Add:
+            return None  # Handle in recursion
+        elif term.is_Number:
+            return f"{float(term):.5f}" if abs(float(term)) >= threshold else None
+        else:
+            return f"{1.0:.5f}*{term}" if abs(1.0) >= threshold else None
+    terms = []
+    if simplified_expr.is_Add:
+        for term in simplified_expr.args:
+            formatted = format_term(term)
+            if formatted:
+                terms.append(formatted)
+    else:
+        formatted = format_term(simplified_expr)
+        if formatted:
+            terms.append(formatted)
+    formula = " + ".join(terms).replace("+ -", "- ")
+    return formula if formula else "0"
+def sympify_formula(basis_functions, coefficients, n_features, threshold=0.00005):
+    """
+    Simplifies a symbolic formula using SymPy with caching.
+    Parameters:
+    -----------
+    basis_functions : list
+        List of basis function strings (e.g., 'x0', 'x0^2', 'x0 x1', 'exp_x0').
+    coefficients : list
+        List of coefficients corresponding to each basis function.
+    n_features : int
+        Number of input features.
+    threshold : float, optional (default=0.00005)
+        Coefficients with absolute value below this are excluded.
+    Returns:
+    --------
+    str
+        Simplified formula as a string, or '0' if empty.
+    """
+    # Convert inputs to hashable types
+    basis_functions_tuple = tuple(basis_functions)
+    coefficients_tuple = tuple(coefficients)
+    # Call cached function
+    return _cached_sympify_formula(basis_functions_tuple, coefficients_tuple, n_features, threshold)
+@lru_cache(maxsize=1000)
+def _cached_get_latex_formula(formula):
+    """
+    Internal function to convert a simplified formula to LaTeX with caching.
+    Parameters:
+    -----------
+    formula : str
+        Simplified formula string.
+    Returns:
+    --------
+    str
+        LaTeX formula as a string.
+    """
+    return sp.latex(sp.sympify(formula))
+def get_latex_formula(basis_functions, coefficients, n_features, threshold=0.00005):
+    """
+    Generates a LaTeX formula from the basis functions and coefficients with caching.
+    Parameters:
+    -----------
+    basis_functions : list
+        List of basis function strings (e.g., 'x0', 'x0^2', 'x0 x1', 'exp_x0').
+    coefficients : list
+        List of coefficients corresponding to each basis function.
+    n_features : int
+        Number of input features.
+    threshold : float, optional (default=0.00005)
+        Coefficients with absolute value below this are excluded.
+    Returns:
+    --------
+    str
+        LaTeX formula as a string, or '0' if empty.
+    """
+    # Get simplified formula (cached)
+    formula = sympify_formula(basis_functions, coefficients, n_features, threshold)
+    # Convert to LaTeX (cached)
+    return _cached_get_latex_formula(formula)
+if __name__ == "__main__":
+    with open('outputs/california_housing_model.json', 'r') as f:
+        model = json.load(f)
+    print('Sympified formula:', sympify_formula(model['basis_functions'], model['coefficients'], model['n_features']))
+    print('LaTeX formula:', get_latex_formula(model['basis_functions'], model['coefficients'], model['n_features']))

{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: oikan
-Version: 0.0.3.2
+Version: 0.0.3.4
 Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
 Author: Arman Zhalgasbayev
 License: MIT
@@ -14,6 +14,7 @@ Requires-Dist: torch
 Requires-Dist: numpy
 Requires-Dist: scikit-learn
 Requires-Dist: tqdm
+Requires-Dist: sympy
 Dynamic: license-file
 <!-- logo in the center -->
@@ -57,7 +58,7 @@ OIKAN implements a modern interpretation of the Kolmogorov-Arnold Representation
 2. **Neural Implementation**: OIKAN uses a specialized architecture combining:
    - Feature transformation layers with interpretable basis functions
-   - Symbolic regression for formula extraction
+   - Symbolic regression for formula extraction (ElasticNet-based)
    - Automatic pruning of insignificant terms
    ```python
@@ -76,15 +77,19 @@ OIKAN implements a modern interpretation of the Kolmogorov-Arnold Representation
    SYMBOLIC_FUNCTIONS = {
        'linear': 'x',           # Direct relationships
        'quadratic': 'x^2',      # Non-linear patterns
+       'cubic': 'x^3',         # Higher-order relationships
        'interaction': 'x_i x_j', # Feature interactions
-       'higher_order': 'x^n'    # Polynomial terms
+       'higher_order': 'x^n',    # Polynomial terms
+       'trigonometric': 'sin(x)', # Trigonometric functions
+       'exponential': 'exp(x)',  # Exponential growth
+       'logarithmic': 'log(x)'  # Logarithmic relationships
    }
    ```
 4. **Formula Extraction Process**:
    - Train neural network on raw data
    - Generate augmented samples for better coverage
-   - Perform L1-regularized symbolic regression
+   - Perform L1-regularized symbolic regression (alpha)
    - Prune terms with coefficients below threshold
    - Export human-readable mathematical expressions
@@ -114,13 +119,14 @@ model = OIKANRegressor(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=5, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
-    alpha=0.1, # L1 regularization strength
+    alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
+    top_k=5, # Number of top features to select (Symbolic regression)
     epochs=100, # Number of training epochs
     lr=0.001, # Learning rate
     batch_size=32, # Batch size for training
-    verbose=True # Verbose output during training
+    verbose=True, # Verbose output during training
+    evaluate_nn=True # Validate neural network performance before full process
 )
 # Fit the model
@@ -134,7 +140,7 @@ mse = mean_squared_error(y_test, y_pred)
 print("Mean Squared Error:", mse)
 # Get symbolic formula
-formula = model.get_formula()
+formula = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 print("Symbolic Formula:", formula)
 # Get feature importances
@@ -162,13 +168,14 @@ model = OIKANClassifier(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=10, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
-    alpha=0.1, # L1 regularization strength
+    alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
+    top_k=5, # Number of top features to select (Symbolic regression)
     epochs=100, # # Number of training epochs
     lr=0.001, # Learning rate
     batch_size=32, # Batch size for training
-    verbose=True # Verbose output during training
+    verbose=True, # Verbose output during training
+    evaluate_nn=True # Validate neural network performance before full process
 )
 # Fit the model
@@ -182,7 +189,7 @@ accuracy = model.score(X_test, y_test)
 print("Accuracy:", accuracy)
 # Get symbolic formulas for each class
-formulas = model.get_formula()
+formulas = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 for i, formula in enumerate(formulas):
     print(f"Class {i} Formula:", formula)
@@ -204,6 +211,60 @@ loaded_model.load("outputs/model.json")
 ![OIKAN v0.0.3(1) Architecture](https://raw.githubusercontent.com/silvermete0r/oikan/main/docs/media/oikan-v0.0.3(1)-architecture-oop.png)
+## OIKAN Symbolic Model Compilers
+OIKAN provides a set of symbolic model compilers to convert the symbolic formulas generated by the OIKAN model into different programming languages.
+*Currently, we support: `Python`, `C++`, `C`, `JavaScript`, `Rust`, and `Go`. This allows users to easily integrate the generated formulas into their applications or systems.*
+All compilers: [model_compilers/](model_compilers)
+### Example of Python Compiler
+1. Regression Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    return np.dot(X_transformed, symbolic_model['coefficients'])
+if __name__ == "__main__":
+    with open('outputs/california_housing_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.random.rand(10, symbolic_model['n_features'])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
+2. Classification Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    logits = np.dot(X_transformed, np.array(symbolic_model['coefficients_list']).T)
+    probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
+    return np.argmax(probabilities, axis=1)
+if __name__ == "__main__":
+    with open('outputs/iris_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.array([[5.1, 3.5, 1.4, 0.2],
+                  [7.0, 3.2, 4.7, 1.4],
+                  [6.3, 3.3, 6.0, 2.5]])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
 ## Contributing
 We welcome contributions! Key areas of interest:

oikan-0.0.3.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+oikan/__init__.py,sha256=zEzhm1GYLT4vNaIQ4CgZcNpUk3uo8SWnoaHYtHW_XSQ,628
+oikan/exceptions.py,sha256=GhHWqy2Q5LVBcteTy4ngnqxr7FOoLNyD8dNt1kfRXyw,901
+oikan/model.py,sha256=-EqCxTMeejdOCh2T08ibc87YIDYWUvybFe7jfb1XYbA,23167
+oikan/neural.py,sha256=wxmGgzmtpwJ3lvH6u6D4i4BiAzg018czrIdw49phSCY,1558
+oikan/utils.py,sha256=7UCm9obO-8Q2zhetdAkukMDOZvGSBWUL_dSF04XqM7k,8808
+oikan-0.0.3.4.dist-info/licenses/LICENSE,sha256=75ASVmU-XIpN-M4LbVmJ_ibgbzbvRLVti8FhnR0BTf8,1096
+oikan-0.0.3.4.dist-info/METADATA,sha256=P-2T0xtWDyTNhfZYknMK0TPm9EcvCUBD-O2bdfCwpFc,11138
+oikan-0.0.3.4.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
+oikan-0.0.3.4.dist-info/top_level.txt,sha256=XwnwKwTJddZwIvtrUsAz-l-58BJRj6HjAGWrfYi_3QY,6
+oikan-0.0.3.4.dist-info/RECORD,,

{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.1)
+Generator: setuptools (80.4.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

oikan-0.0.3.2.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-oikan/__init__.py,sha256=zEzhm1GYLT4vNaIQ4CgZcNpUk3uo8SWnoaHYtHW_XSQ,628
-oikan/exceptions.py,sha256=Is0jG4apxO8QJQREIiJQYMjANYWibWeS-103q9KWbfg,192
-oikan/model.py,sha256=oZtx5uFxMj4q89ODKDBeTJ0whbqiMIR2tKwgmYVXHUY,16887
-oikan/neural.py,sha256=wxmGgzmtpwJ3lvH6u6D4i4BiAzg018czrIdw49phSCY,1558
-oikan/utils.py,sha256=xMGRa1qhn8BWn9UxpVeJIuGb-UvQmbjiFSsvAdF0bMU,2095
-oikan-0.0.3.2.dist-info/licenses/LICENSE,sha256=75ASVmU-XIpN-M4LbVmJ_ibgbzbvRLVti8FhnR0BTf8,1096
-oikan-0.0.3.2.dist-info/METADATA,sha256=PPsMSll3Ds6E9J3ZnXxo8Yno0ZsHrSb55kZ-035jJZE,8441
-oikan-0.0.3.2.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-oikan-0.0.3.2.dist-info/top_level.txt,sha256=XwnwKwTJddZwIvtrUsAz-l-58BJRj6HjAGWrfYi_3QY,6
-oikan-0.0.3.2.dist-info/RECORD,,

{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{oikan-0.0.3.2.dist-info → oikan-0.0.3.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

oikan 0.0.3.2__py3-none-any.whl → 0.0.3.4__py3-none-any.whl

oikan 0.0.3.2py3-none-any.whl → 0.0.3.4py3-none-any.whl