PyPI - oikan - Versions diffs - 0.0.3.3__tar.gz → 0.0.3.4__tar.gz - Mend

oikan 0.0.3.3tar.gz → 0.0.3.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{oikan-0.0.3.3 → oikan-0.0.3.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: oikan
-Version: 0.0.3.3
+Version: 0.0.3.4
 Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
 Author: Arman Zhalgasbayev
 License: MIT
@@ -14,6 +14,7 @@ Requires-Dist: torch
 Requires-Dist: numpy
 Requires-Dist: scikit-learn
 Requires-Dist: tqdm
+Requires-Dist: sympy
 Dynamic: license-file
 <!-- logo in the center -->
@@ -118,7 +119,6 @@ model = OIKANRegressor(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=5, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
     alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
     top_k=5, # Number of top features to select (Symbolic regression)
@@ -140,7 +140,7 @@ mse = mean_squared_error(y_test, y_pred)
 print("Mean Squared Error:", mse)
 # Get symbolic formula
-formula = model.get_formula()
+formula = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 print("Symbolic Formula:", formula)
 # Get feature importances
@@ -168,7 +168,6 @@ model = OIKANClassifier(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=10, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
     alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
     top_k=5, # Number of top features to select (Symbolic regression)
@@ -190,7 +189,7 @@ accuracy = model.score(X_test, y_test)
 print("Accuracy:", accuracy)
 # Get symbolic formulas for each class
-formulas = model.get_formula()
+formulas = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 for i, formula in enumerate(formulas):
     print(f"Class {i} Formula:", formula)
@@ -212,6 +211,60 @@ loaded_model.load("outputs/model.json")
 ![OIKAN v0.0.3(1) Architecture](https://raw.githubusercontent.com/silvermete0r/oikan/main/docs/media/oikan-v0.0.3(1)-architecture-oop.png)
+## OIKAN Symbolic Model Compilers
+OIKAN provides a set of symbolic model compilers to convert the symbolic formulas generated by the OIKAN model into different programming languages.
+*Currently, we support: `Python`, `C++`, `C`, `JavaScript`, `Rust`, and `Go`. This allows users to easily integrate the generated formulas into their applications or systems.*
+All compilers: [model_compilers/](model_compilers)
+### Example of Python Compiler
+1. Regression Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    return np.dot(X_transformed, symbolic_model['coefficients'])
+if __name__ == "__main__":
+    with open('outputs/california_housing_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.random.rand(10, symbolic_model['n_features'])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
+2. Classification Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    logits = np.dot(X_transformed, np.array(symbolic_model['coefficients_list']).T)
+    probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
+    return np.argmax(probabilities, axis=1)
+if __name__ == "__main__":
+    with open('outputs/iris_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.array([[5.1, 3.5, 1.4, 0.2],
+                  [7.0, 3.2, 4.7, 1.4],
+                  [6.3, 3.3, 6.0, 2.5]])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
 ## Contributing
 We welcome contributions! Key areas of interest:

{oikan-0.0.3.3 → oikan-0.0.3.4}/README.md RENAMED Viewed

@@ -100,7 +100,6 @@ model = OIKANRegressor(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=5, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
     alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
     top_k=5, # Number of top features to select (Symbolic regression)
@@ -122,7 +121,7 @@ mse = mean_squared_error(y_test, y_pred)
 print("Mean Squared Error:", mse)
 # Get symbolic formula
-formula = model.get_formula()
+formula = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 print("Symbolic Formula:", formula)
 # Get feature importances
@@ -150,7 +149,6 @@ model = OIKANClassifier(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=10, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
     alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
     top_k=5, # Number of top features to select (Symbolic regression)
@@ -172,7 +170,7 @@ accuracy = model.score(X_test, y_test)
 print("Accuracy:", accuracy)
 # Get symbolic formulas for each class
-formulas = model.get_formula()
+formulas = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 for i, formula in enumerate(formulas):
     print(f"Class {i} Formula:", formula)
@@ -194,6 +192,60 @@ loaded_model.load("outputs/model.json")
 ![OIKAN v0.0.3(1) Architecture](https://raw.githubusercontent.com/silvermete0r/oikan/main/docs/media/oikan-v0.0.3(1)-architecture-oop.png)
+## OIKAN Symbolic Model Compilers
+OIKAN provides a set of symbolic model compilers to convert the symbolic formulas generated by the OIKAN model into different programming languages.
+*Currently, we support: `Python`, `C++`, `C`, `JavaScript`, `Rust`, and `Go`. This allows users to easily integrate the generated formulas into their applications or systems.*
+All compilers: [model_compilers/](model_compilers)
+### Example of Python Compiler
+1. Regression Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    return np.dot(X_transformed, symbolic_model['coefficients'])
+if __name__ == "__main__":
+    with open('outputs/california_housing_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.random.rand(10, symbolic_model['n_features'])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
+2. Classification Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    logits = np.dot(X_transformed, np.array(symbolic_model['coefficients_list']).T)
+    probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
+    return np.argmax(probabilities, axis=1)
+if __name__ == "__main__":
+    with open('outputs/iris_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.array([[5.1, 3.5, 1.4, 0.2],
+                  [7.0, 3.2, 4.7, 1.4],
+                  [6.3, 3.3, 6.0, 2.5]])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
 ## Contributing
 We welcome contributions! Key areas of interest:

{oikan-0.0.3.3 → oikan-0.0.3.4}/oikan/model.py RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.linear_model import ElasticNet
 from abc import ABC, abstractmethod
 import json
 from .neural import TabularNet
-from .utils import evaluate_basis_functions, get_features_involved
+from .utils import evaluate_basis_functions, get_features_involved, sympify_formula, get_latex_formula
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import r2_score, accuracy_score
 from .exceptions import *
@@ -25,8 +25,6 @@ class OIKAN(ABC):
         Activation function for the neural network ('relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu').
     augmentation_factor : int, optional (default=10)
         Number of augmented samples per original sample.
-    polynomial_degree : int, optional (default=2)
-        Maximum degree of polynomial features for symbolic regression.
     alpha : float, optional (default=0.1)
         L1 regularization strength for Lasso in symbolic regression.
     sigma : float, optional (default=0.1)
@@ -45,7 +43,7 @@ class OIKAN(ABC):
         Whether to evaluate neural network performance before full training.
     """
     def __init__(self, hidden_sizes=[64, 64], activation='relu', augmentation_factor=10,
-                 polynomial_degree=2, alpha=0.1, sigma=0.1, epochs=100, lr=0.001, batch_size=32,
+                 alpha=0.1, sigma=0.1, epochs=100, lr=0.001, batch_size=32,
                  verbose=False, evaluate_nn=False, top_k=5):
         if not isinstance(hidden_sizes, list) or not all(isinstance(x, int) and x > 0 for x in hidden_sizes):
             raise InvalidParameterError("hidden_sizes must be a list of positive integers")
@@ -53,8 +51,6 @@ class OIKAN(ABC):
             raise InvalidParameterError(f"Unsupported activation function: {activation}")
         if not isinstance(augmentation_factor, int) or augmentation_factor < 1:
             raise InvalidParameterError("augmentation_factor must be a positive integer")
-        if not isinstance(polynomial_degree, int) or polynomial_degree < 1:
-            raise InvalidParameterError("polynomial_degree must be a positive integer")
         if not isinstance(top_k, int) or top_k < 1:
             raise InvalidParameterError("top_k must be a positive integer")
         if not 0 < lr < 1:
@@ -71,7 +67,6 @@ class OIKAN(ABC):
         self.hidden_sizes = hidden_sizes
         self.activation = activation
         self.augmentation_factor = augmentation_factor
-        self.polynomial_degree = polynomial_degree
         self.alpha = alpha
         self.sigma = sigma
         self.epochs = epochs
@@ -92,23 +87,53 @@ class OIKAN(ABC):
     def predict(self, X):
         pass
-    def get_formula(self):
-        """Returns the symbolic formula(s) as a string (regression) or list of strings (classification)."""
+    def get_formula(self, type='original'):
+        """
+        Returns the symbolic formula(s) as a string (regression) or list of strings (classification).
+        Parameter:
+        --------
+        type : str, optional (default='original') other options: 'sympied', 'latex'
+            'original' returns the original formula with coefficients, 'sympied' returns sympy simplified formula.
+        """
+        if type.lower() not in ['original', 'sympied', 'latex']:
+            raise InvalidParameterError("Invalid type. Choose 'original', 'sympied', 'latex'.")
         if self.symbolic_model is None:
             raise ValueError("Model not fitted yet.")
         basis_functions = self.symbolic_model['basis_functions']
-        if 'coefficients' in self.symbolic_model:
-            coefficients = self.symbolic_model['coefficients']
-            formula = " + ".join([f"{coefficients[i]:.5f}*{basis_functions[i]}"
-                                for i in range(len(coefficients)) if coefficients[i] != 0])
-            return formula if formula else "0"
+        if type.lower() == 'original':
+            if 'coefficients' in self.symbolic_model:
+                coefficients = self.symbolic_model['coefficients']
+                formula = " + ".join([f"{coefficients[i]:.6f}*{basis_functions[i]}"
+                                    for i in range(len(coefficients)) if coefficients[i] != 0])
+                return formula if formula else "0"
+            else:
+                formulas = []
+                for c, coef in enumerate(self.symbolic_model['coefficients_list']):
+                    formula = " + ".join([f"{coef[i]:.6f}*{basis_functions[i]}"
+                                        for i in range(len(coef)) if coef[i] != 0])
+                    formulas.append(f"Class {self.classes_[c]}: {formula if formula else '0'}")
+                return formulas
+        elif type.lower() == 'sympied':
+            if 'coefficients' in self.symbolic_model:
+                formula = sympify_formula(self.symbolic_model['basis_functions'], self.symbolic_model['coefficients'], self.symbolic_model['n_features'])
+                return formula
+            else:
+                formulas = []
+                for c, coef in enumerate(self.symbolic_model['coefficients_list']):
+                    formula = sympify_formula(self.symbolic_model['basis_functions'], coef, self.symbolic_model['n_features'])
+                    formulas.append(f"Class {self.classes_[c]}: {formula}")
+                return formulas
         else:
-            formulas = []
-            for c, coef in enumerate(self.symbolic_model['coefficients_list']):
-                formula = " + ".join([f"{coef[i]:.5f}*{basis_functions[i]}"
-                                    for i in range(len(coef)) if coef[i] != 0])
-                formulas.append(f"Class {self.classes_[c]}: {formula if formula else '0'}")
-            return formulas
+            if 'coefficients' in self.symbolic_model:
+                formula = get_latex_formula(self.symbolic_model['basis_functions'], self.symbolic_model['coefficients'], self.symbolic_model['n_features'])
+                return formula
+            else:
+                formulas = []
+                for c, coef in enumerate(self.symbolic_model['coefficients_list']):
+                    formula = get_latex_formula(self.symbolic_model['basis_functions'], coef, self.symbolic_model['n_features'])
+                    formulas.append(f"Class {self.classes_[c]}: {formula}")
+                return formulas
     def feature_importances(self):
         """
@@ -163,7 +188,6 @@ class OIKAN(ABC):
             # Convert numpy arrays and other non-serializable types to lists
             model_data = {
                 'n_features': self.symbolic_model['n_features'],
-                'degree': self.symbolic_model['degree'],
                 'basis_functions': self.symbolic_model['basis_functions']
             }
@@ -200,7 +224,6 @@ class OIKAN(ABC):
             self.symbolic_model = {
                 'n_features': model_data['n_features'],
-                'degree': model_data['degree'],
                 'basis_functions': model_data['basis_functions']
             }
@@ -222,7 +245,6 @@ class OIKAN(ABC):
         input_size = X.shape[1]
         self.neural_net = TabularNet(input_size, self.hidden_sizes, output_size, self.activation)
-        optimizer = optim.Adam(self.neural_net.parameters(), lr=self.lr)
         # Train on the training set
         self._train_neural_net(X_train, y_train, output_size, loss_fn)
@@ -378,7 +400,6 @@ class OIKAN(ABC):
             selected_indices = np.where(np.abs(coef_refined) > 1e-6)[0]
             self.symbolic_model = {
                 'n_features': X.shape[1],
-                'degree': self.polynomial_degree,
                 'basis_functions': [basis_functions_refined[i] for i in selected_indices],
                 'coefficients': coef_refined[selected_indices].tolist()
             }
@@ -398,7 +419,6 @@ class OIKAN(ABC):
                 coefficients_list.append(coef_selected)
             self.symbolic_model = {
                 'n_features': X.shape[1],
-                'degree': self.polynomial_degree,
                 'basis_functions': basis_functions,
                 'coefficients_list': coefficients_list
             }

oikan-0.0.3.4/oikan/utils.py ADDED Viewed

@@ -0,0 +1,256 @@
+import numpy as np
+import sympy as sp
+import json
+from functools import lru_cache
+def evaluate_basis_functions(X, basis_functions, n_features):
+    """
+    Evaluates basis functions on the input data.
+    Parameters:
+    -----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+    basis_functions : list
+        List of basis function strings (e.g., '1', 'x0', 'x0^2', 'x0 x1', 'log1p_x0').
+    n_features : int
+        Number of input features.
+    Returns:
+    --------
+    X_transformed : ndarray of shape (n_samples, n_basis_functions)
+        Transformed data matrix.
+    """
+    X_transformed = np.zeros((X.shape[0], len(basis_functions)))
+    for i, func in enumerate(basis_functions):
+        if func == '1':
+            X_transformed[:, i] = 1
+        elif func.startswith('log1p_x'):
+            idx = int(func.split('_')[1][1:])
+            X_transformed[:, i] = np.log1p(np.abs(X[:, idx]))
+        elif func.startswith('exp_x'):
+            idx = int(func.split('_')[1][1:])
+            X_transformed[:, i] = np.exp(np.clip(X[:, idx], -10, 10))
+        elif func.startswith('sin_x'):
+            idx = int(func.split('_')[1][1:])
+            X_transformed[:, i] = np.sin(X[:, idx])
+        elif '^' in func:
+            var, power = func.split('^')
+            idx = int(var[1:])
+            X_transformed[:, i] = X[:, idx] ** int(power)
+        elif ' ' in func:
+            vars = func.split(' ')
+            result = np.ones(X.shape[0])
+            for var in vars:
+                idx = int(var[1:])
+                result *= X[:, idx]
+            X_transformed[:, i] = result
+        else:
+            idx = int(func[1:])
+            X_transformed[:, i] = X[:, idx]
+    return X_transformed
+def get_features_involved(basis_function):
+    """
+    Extracts the feature indices involved in a basis function string.
+    Parameters:
+    -----------
+    basis_function : str
+        String representation of the basis function, e.g., 'x0', 'x0^2', 'x0 x1', 'log1p_x0'.
+    Returns:
+    --------
+    set : Set of feature indices involved.
+    """
+    if basis_function == '1':
+        return set()
+    features = set()
+    if '_' in basis_function:  # Handle non-linear functions like 'log1p_x0'
+        parts = basis_function.split('_')
+        if len(parts) == 2 and parts[1].startswith('x'):
+            idx = int(parts[1][1:])
+            features.add(idx)
+    elif '^' in basis_function:  # Handle powers, e.g., 'x0^2'
+        var = basis_function.split('^')[0]
+        idx = int(var[1:])
+        features.add(idx)
+    elif ' ' in basis_function:  # Handle interactions, e.g., 'x0 x1'
+        for part in basis_function.split():
+            idx = int(part[1:])
+            features.add(idx)
+    elif basis_function.startswith('x'):
+        idx = int(basis_function[1:])
+        features.add(idx)
+    return features
+@lru_cache(maxsize=1000)
+def _cached_sympify_formula(basis_functions_tuple, coefficients_tuple, n_features, threshold):
+    """
+    Internal function to perform SymPy formula simplification with caching.
+    Parameters:
+    -----------
+    basis_functions_tuple : tuple
+        Tuple of basis function strings.
+    coefficients_tuple : tuple
+        Tuple of coefficients.
+    n_features : int
+        Number of input features.
+    threshold : float
+        Coefficients with absolute value below this are excluded.
+    Returns:
+    --------
+    str
+        Simplified formula as a string, or '0' if empty.
+    """
+    # Convert tuples back to lists
+    basis_functions = list(basis_functions_tuple)
+    coefficients = list(coefficients_tuple)
+    # Define symbolic variables
+    x = sp.symbols(f'x0:{n_features}')
+    expr = 0
+    # Build the expression
+    for coef, func in zip(coefficients, basis_functions):
+        if abs(coef) < threshold:
+            continue  # Skip negligible coefficients
+        if func == '1':
+            term = coef
+        elif func.startswith('log1p_x'):
+            idx = int(func.split('_')[1][1:])
+            term = coef * sp.log(1 + sp.Abs(x[idx]))
+        elif func.startswith('exp_x'):
+            idx = int(func.split('_')[1][1:])
+            term = coef * sp.exp(x[idx])
+        elif func.startswith('sin_x'):
+            idx = int(func.split('_')[1][1:])
+            term = coef * sp.sin(x[idx])
+        elif '^' in func:
+            var, power = func.split('^')
+            idx = int(var[1:])
+            term = coef * x[idx]**int(power)
+        elif ' ' in func:
+            vars = func.split(' ')
+            term = coef
+            for var in vars:
+                idx = int(var[1:])
+                term *= x[idx]
+        else:
+            idx = int(func[1:])
+            term = coef * x[idx]
+        expr += term
+    # Simplify the expression
+    simplified_expr = sp.simplify(expr)
+    # Convert to string with rounded coefficients
+    def format_term(term):
+        if term.is_Mul:
+            coeff = 1
+            factors = []
+            for factor in term.args:
+                if factor.is_Number:
+                    coeff *= float(factor)
+                else:
+                    factors.append(str(factor))
+            if abs(coeff) < threshold:
+                return None
+            return f"{coeff:.5f}*{'*'.join(factors)}" if factors else f"{coeff:.5f}"
+        elif term.is_Add:
+            return None  # Handle in recursion
+        elif term.is_Number:
+            return f"{float(term):.5f}" if abs(float(term)) >= threshold else None
+        else:
+            return f"{1.0:.5f}*{term}" if abs(1.0) >= threshold else None
+    terms = []
+    if simplified_expr.is_Add:
+        for term in simplified_expr.args:
+            formatted = format_term(term)
+            if formatted:
+                terms.append(formatted)
+    else:
+        formatted = format_term(simplified_expr)
+        if formatted:
+            terms.append(formatted)
+    formula = " + ".join(terms).replace("+ -", "- ")
+    return formula if formula else "0"
+def sympify_formula(basis_functions, coefficients, n_features, threshold=0.00005):
+    """
+    Simplifies a symbolic formula using SymPy with caching.
+    Parameters:
+    -----------
+    basis_functions : list
+        List of basis function strings (e.g., 'x0', 'x0^2', 'x0 x1', 'exp_x0').
+    coefficients : list
+        List of coefficients corresponding to each basis function.
+    n_features : int
+        Number of input features.
+    threshold : float, optional (default=0.00005)
+        Coefficients with absolute value below this are excluded.
+    Returns:
+    --------
+    str
+        Simplified formula as a string, or '0' if empty.
+    """
+    # Convert inputs to hashable types
+    basis_functions_tuple = tuple(basis_functions)
+    coefficients_tuple = tuple(coefficients)
+    # Call cached function
+    return _cached_sympify_formula(basis_functions_tuple, coefficients_tuple, n_features, threshold)
+@lru_cache(maxsize=1000)
+def _cached_get_latex_formula(formula):
+    """
+    Internal function to convert a simplified formula to LaTeX with caching.
+    Parameters:
+    -----------
+    formula : str
+        Simplified formula string.
+    Returns:
+    --------
+    str
+        LaTeX formula as a string.
+    """
+    return sp.latex(sp.sympify(formula))
+def get_latex_formula(basis_functions, coefficients, n_features, threshold=0.00005):
+    """
+    Generates a LaTeX formula from the basis functions and coefficients with caching.
+    Parameters:
+    -----------
+    basis_functions : list
+        List of basis function strings (e.g., 'x0', 'x0^2', 'x0 x1', 'exp_x0').
+    coefficients : list
+        List of coefficients corresponding to each basis function.
+    n_features : int
+        Number of input features.
+    threshold : float, optional (default=0.00005)
+        Coefficients with absolute value below this are excluded.
+    Returns:
+    --------
+    str
+        LaTeX formula as a string, or '0' if empty.
+    """
+    # Get simplified formula (cached)
+    formula = sympify_formula(basis_functions, coefficients, n_features, threshold)
+    # Convert to LaTeX (cached)
+    return _cached_get_latex_formula(formula)
+if __name__ == "__main__":
+    with open('outputs/california_housing_model.json', 'r') as f:
+        model = json.load(f)
+    print('Sympified formula:', sympify_formula(model['basis_functions'], model['coefficients'], model['n_features']))
+    print('LaTeX formula:', get_latex_formula(model['basis_functions'], model['coefficients'], model['n_features']))

{oikan-0.0.3.3 → oikan-0.0.3.4}/oikan.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: oikan
-Version: 0.0.3.3
+Version: 0.0.3.4
 Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
 Author: Arman Zhalgasbayev
 License: MIT
@@ -14,6 +14,7 @@ Requires-Dist: torch
 Requires-Dist: numpy
 Requires-Dist: scikit-learn
 Requires-Dist: tqdm
+Requires-Dist: sympy
 Dynamic: license-file
 <!-- logo in the center -->
@@ -118,7 +119,6 @@ model = OIKANRegressor(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=5, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
     alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
     top_k=5, # Number of top features to select (Symbolic regression)
@@ -140,7 +140,7 @@ mse = mean_squared_error(y_test, y_pred)
 print("Mean Squared Error:", mse)
 # Get symbolic formula
-formula = model.get_formula()
+formula = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 print("Symbolic Formula:", formula)
 # Get feature importances
@@ -168,7 +168,6 @@ model = OIKANClassifier(
     hidden_sizes=[32, 32], # Hidden layer sizes
     activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
     augmentation_factor=10, # Augmentation factor for data generation
-    polynomial_degree=2, # Degree of polynomial basis functions
     alpha=0.1, # L1 regularization strength (Symbolic regression)
     sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
     top_k=5, # Number of top features to select (Symbolic regression)
@@ -190,7 +189,7 @@ accuracy = model.score(X_test, y_test)
 print("Accuracy:", accuracy)
 # Get symbolic formulas for each class
-formulas = model.get_formula()
+formulas = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympied' -> simplified formula using sympy; 'latex' -> LaTeX format
 for i, formula in enumerate(formulas):
     print(f"Class {i} Formula:", formula)
@@ -212,6 +211,60 @@ loaded_model.load("outputs/model.json")
 ![OIKAN v0.0.3(1) Architecture](https://raw.githubusercontent.com/silvermete0r/oikan/main/docs/media/oikan-v0.0.3(1)-architecture-oop.png)
+## OIKAN Symbolic Model Compilers
+OIKAN provides a set of symbolic model compilers to convert the symbolic formulas generated by the OIKAN model into different programming languages.
+*Currently, we support: `Python`, `C++`, `C`, `JavaScript`, `Rust`, and `Go`. This allows users to easily integrate the generated formulas into their applications or systems.*
+All compilers: [model_compilers/](model_compilers)
+### Example of Python Compiler
+1. Regression Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    return np.dot(X_transformed, symbolic_model['coefficients'])
+if __name__ == "__main__":
+    with open('outputs/california_housing_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.random.rand(10, symbolic_model['n_features'])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
+2. Classification Model:
+```python
+import numpy as np
+import json
+def predict(X, symbolic_model):
+    X = np.asarray(X)
+    X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
+                                            symbolic_model['n_features'])
+    logits = np.dot(X_transformed, np.array(symbolic_model['coefficients_list']).T)
+    probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
+    return np.argmax(probabilities, axis=1)
+if __name__ == "__main__":
+    with open('outputs/iris_model.json', 'r') as f:
+        symbolic_model = json.load(f)
+    X = np.array([[5.1, 3.5, 1.4, 0.2],
+                  [7.0, 3.2, 4.7, 1.4],
+                  [6.3, 3.3, 6.0, 2.5]])
+    y_pred = predict(X, symbolic_model)
+    print(y_pred)
+```
 ## Contributing
 We welcome contributions! Key areas of interest:

{oikan-0.0.3.3 → oikan-0.0.3.4}/oikan.egg-info/requires.txt RENAMED Viewed

@@ -2,3 +2,4 @@ torch
 numpy
 scikit-learn
 tqdm
+sympy

{oikan-0.0.3.3 → oikan-0.0.3.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "oikan"
-version = "0.0.3.3"
+version = "0.0.3.4"
 description = "OIKAN: Neuro-Symbolic ML for Scientific Discovery"
 readme = "README.md"
 authors = [{name = "Arman Zhalgasbayev"}]
@@ -12,7 +12,8 @@ dependencies = [
     "torch",
     "numpy",
     "scikit-learn",
-    "tqdm"
+    "tqdm",
+    "sympy"
 ]
 requires-python = ">=3.7"
 license = {text = "MIT"}

{oikan-0.0.3.3 → oikan-0.0.3.4}/setup.py RENAMED Viewed

@@ -7,6 +7,7 @@ setup(
         "torch",
         "numpy",
         "scikit-learn",
-        "tqdm"
+        "tqdm",
+        "sympy"
     ]
 )

oikan-0.0.3.3/oikan/utils.py DELETED Viewed

@@ -1,82 +0,0 @@
-import numpy as np
-def evaluate_basis_functions(X, basis_functions, n_features):
-    """
-    Evaluates basis functions on the input data.
-    Parameters:
-    -----------
-    X : array-like of shape (n_samples, n_features)
-        Input data.
-    basis_functions : list
-        List of basis function strings (e.g., '1', 'x0', 'x0^2', 'x0 x1', 'log1p_x0').
-    n_features : int
-        Number of input features.
-    Returns:
-    --------
-    X_transformed : ndarray of shape (n_samples, n_basis_functions)
-        Transformed data matrix.
-    """
-    X_transformed = np.zeros((X.shape[0], len(basis_functions)))
-    for i, func in enumerate(basis_functions):
-        if func == '1':
-            X_transformed[:, i] = 1
-        elif func.startswith('log1p_x'):
-            idx = int(func.split('_')[1][1:])
-            X_transformed[:, i] = np.log1p(np.abs(X[:, idx]))
-        elif func.startswith('exp_x'):
-            idx = int(func.split('_')[1][1:])
-            X_transformed[:, i] = np.exp(np.clip(X[:, idx], -10, 10))
-        elif func.startswith('sin_x'):
-            idx = int(func.split('_')[1][1:])
-            X_transformed[:, i] = np.sin(X[:, idx])
-        elif '^' in func:
-            var, power = func.split('^')
-            idx = int(var[1:])
-            X_transformed[:, i] = X[:, idx] ** int(power)
-        elif ' ' in func:
-            vars = func.split(' ')
-            result = np.ones(X.shape[0])
-            for var in vars:
-                idx = int(var[1:])
-                result *= X[:, idx]
-            X_transformed[:, i] = result
-        else:
-            idx = int(func[1:])
-            X_transformed[:, i] = X[:, idx]
-    return X_transformed
-def get_features_involved(basis_function):
-    """
-    Extracts the feature indices involved in a basis function string.
-    Parameters:
-    -----------
-    basis_function : str
-        String representation of the basis function, e.g., 'x0', 'x0^2', 'x0 x1', 'log1p_x0'.
-    Returns:
-    --------
-    set : Set of feature indices involved.
-    """
-    if basis_function == '1':
-        return set()
-    features = set()
-    if '_' in basis_function:  # Handle non-linear functions like 'log1p_x0'
-        parts = basis_function.split('_')
-        if len(parts) == 2 and parts[1].startswith('x'):
-            idx = int(parts[1][1:])
-            features.add(idx)
-    elif '^' in basis_function:  # Handle powers, e.g., 'x0^2'
-        var = basis_function.split('^')[0]
-        idx = int(var[1:])
-        features.add(idx)
-    elif ' ' in basis_function:  # Handle interactions, e.g., 'x0 x1'
-        for part in basis_function.split():
-            idx = int(part[1:])
-            features.add(idx)
-    elif basis_function.startswith('x'):
-        idx = int(basis_function[1:])
-        features.add(idx)
-    return features