PyPI - oikan - Versions diffs - 0.0.2.4__py3-none-any.whl → 0.0.3.1__py3-none-any.whl - Mend

oikan 0.0.2.4py3-none-any.whl → 0.0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

oikan/__init__.py +14 -0
oikan/exceptions.py +5 -13
oikan/model.py +307 -426
oikan/neural.py +43 -0
oikan/symbolic.py +52 -25
oikan/utils.py +59 -37
oikan-0.0.3.1.dist-info/METADATA +233 -0
oikan-0.0.3.1.dist-info/RECORD +11 -0
{oikan-0.0.2.4.dist-info → oikan-0.0.3.1.dist-info}/WHEEL +1 -1
oikan-0.0.2.4.dist-info/METADATA +0 -214
oikan-0.0.2.4.dist-info/RECORD +0 -10
{oikan-0.0.2.4.dist-info → oikan-0.0.3.1.dist-info}/licenses/LICENSE +0 -0
{oikan-0.0.2.4.dist-info → oikan-0.0.3.1.dist-info}/top_level.txt +0 -0

oikan/neural.py ADDED Viewed

@@ -0,0 +1,43 @@
+import torch.nn as nn
+class TabularNet(nn.Module):
+    """
+    Feedforward neural network for tabular data.
+    Parameters:
+    -----------
+    input_size : int
+        Number of input features.
+    hidden_sizes : list
+        List of hidden layer sizes.
+    output_size : int
+        Number of output units.
+    activation : str, optional (default='relu')
+        Activation function ('relu', 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu').
+    """
+    def __init__(self, input_size, hidden_sizes, output_size, activation='relu'):
+        super(TabularNet, self).__init__()
+        layers = []
+        in_size = input_size
+        for hidden_size in hidden_sizes:
+            layers.append(nn.Linear(in_size, hidden_size))
+            if activation == 'relu':
+                layers.append(nn.ReLU())
+            elif activation == 'tanh':
+                layers.append(nn.Tanh())
+            elif activation == 'leaky_relu':
+                layers.append(nn.LeakyReLU(negative_slope=0.01))
+            elif activation == 'elu':
+                layers.append(nn.ELU(alpha=1.0))
+            elif activation == 'swish':
+                layers.append(nn.SiLU())
+            elif activation == 'gelu':
+                layers.append(nn.GELU())
+            else:
+                raise ValueError("Unsupported activation function.")
+            in_size = hidden_size
+        layers.append(nn.Linear(in_size, output_size))
+        self.net = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.net(x)

oikan/symbolic.py CHANGED Viewed

@@ -1,28 +1,55 @@
-from .utils import ADVANCED_LIB
+import numpy as np
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.linear_model import Lasso
-def symbolic_edge_repr(weights, bias=None, threshold=1e-4):
+def symbolic_regression(X, y, degree=2, alpha=0.1):
     """
-    Given a list of weights (floats) and an optional bias,
-    returns a list of structured terms (coefficient, basis function string).
+    Performs symbolic regression on the input data.
+    Parameters:
+    -----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+    y : array-like of shape (n_samples,) or (n_samples, n_targets)
+        Target values.
+    degree : int, optional (default=2)
+        Maximum polynomial degree.
+    alpha : float, optional (default=0.1)
+        L1 regularization strength.
+    Returns:
+    --------
+    dict : Contains 'basis_functions', 'coefficients' (or 'coefficients_list'), 'n_features', 'degree'
     """
-    terms = []
-    # weights should be in the same order as ADVANCED_LIB.items()
-    for (_, (notation, _)), w in zip(ADVANCED_LIB.items(), weights):
-        if abs(w) > threshold:
-            terms.append((w, notation))
-    if bias is not None and abs(bias) > threshold:
-        # use "1" to represent the constant term
-        terms.append((bias, "1"))
-    return terms
-def format_symbolic_terms(terms):
-    """
-    Formats a list of structured symbolic terms (coef, basis) to a string.
-    """
-    formatted_terms = []
-    for coef, basis in terms:
-        if basis == "1":
-            formatted_terms.append(f"{coef:.4f}")
-        else:
-            formatted_terms.append(f"{coef:.4f}*{basis}")
-    return " + ".join(formatted_terms) if formatted_terms else "0"
+    poly = PolynomialFeatures(degree=degree, include_bias=True)
+    X_poly = poly.fit_transform(X)
+    model = Lasso(alpha=alpha, fit_intercept=False)
+    model.fit(X_poly, y)
+    if len(y.shape) == 1 or y.shape[1] == 1:
+        coef = model.coef_.flatten()
+        selected_indices = np.where(np.abs(coef) > 1e-6)[0]
+        return {
+            'n_features': X.shape[1],
+            'degree': degree,
+            'basis_functions': poly.get_feature_names_out()[selected_indices].tolist(),
+            'coefficients': coef[selected_indices].tolist()
+        }
+    else:
+        coefficients_list = []
+        selected_indices = set()
+        for c in range(y.shape[1]):
+            coef = model.coef_[c]
+            indices = np.where(np.abs(coef) > 1e-6)[0]
+            selected_indices.update(indices)
+        selected_indices = list(selected_indices)
+        basis_functions = poly.get_feature_names_out()[selected_indices].tolist()
+        for c in range(y.shape[1]):
+            coef = model.coef_[c]
+            coef_selected = coef[selected_indices].tolist()
+            coefficients_list.append(coef_selected)
+        return {
+            'n_features': X.shape[1],
+            'degree': degree,
+            'basis_functions': basis_functions,
+            'coefficients_list': coefficients_list
+        }

oikan/utils.py CHANGED Viewed

@@ -1,41 +1,63 @@
-from .exceptions import *
-import torch
-import torch.nn as nn
 import numpy as np
-ADVANCED_LIB = {
-    'x':    ('x', lambda x: x),
-    'x^2':  ('x^2', lambda x: x**2),
-    'sin':  ('sin(x)', lambda x: np.sin(x)),
-    'tanh': ('tanh(x)', lambda x: np.tanh(x))
-}
+def evaluate_basis_functions(X, basis_functions, n_features):
+    """
+    Evaluates basis functions on the input data.
+    Parameters:
+    -----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+    basis_functions : list
+        List of basis function strings (e.g., '1', 'x0', 'x0^2', 'x0 x1').
+    n_features : int
+        Number of input features.
+    Returns:
+    --------
+    X_transformed : ndarray of shape (n_samples, n_basis_functions)
+        Transformed data matrix.
+    """
+    X_transformed = np.zeros((X.shape[0], len(basis_functions)))
+    for i, func in enumerate(basis_functions):
+        if func == '1':
+            X_transformed[:, i] = 1
+        elif '^' in func:
+            var, power = func.split('^')
+            idx = int(var[1:])
+            X_transformed[:, i] = X[:, idx] ** int(power)
+        elif ' ' in func:
+            var1, var2 = func.split(' ')
+            idx1 = int(var1[1:])
+            idx2 = int(var2[1:])
+            X_transformed[:, i] = X[:, idx1] * X[:, idx2]
+        else:
+            idx = int(func[1:])
+            X_transformed[:, i] = X[:, idx]
+    return X_transformed
-class EdgeActivation(nn.Module):
-    """Learnable edge-based activation function."""
-    def __init__(self):
-        super().__init__()
-        self.weights = nn.Parameter(torch.randn(len(ADVANCED_LIB)))
-        self.bias = nn.Parameter(torch.zeros(1))
-    def forward(self, x):
-        features = []
-        for _, func in ADVANCED_LIB.values():
-            feat = torch.tensor(func(x.detach().cpu().numpy()),
-                              dtype=torch.float32).to(x.device)
-            features.append(feat)
-        features = torch.stack(features, dim=-1)
-        return torch.matmul(features, self.weights.unsqueeze(0).T) + self.bias
+def get_features_involved(basis_function):
+    """
+    Extracts the feature indices involved in a basis function string.
+    Parameters:
+    -----------
+    basis_function : str
+        String representation of the basis function, e.g., 'x0', 'x0^2', 'x0 x1'.
-    def get_symbolic_repr(self, threshold=1e-4):
-        """Get symbolic representation of the activation function."""
-        significant_terms = []
-        for (notation, _), weight in zip(ADVANCED_LIB.values(),
-                                       self.weights.detach().cpu().numpy()):
-            if abs(weight) > threshold:
-                significant_terms.append(f"{weight:.4f}*{notation}")
-        if abs(self.bias.item()) > threshold:
-            significant_terms.append(f"{self.bias.item():.4f}")
-        return " + ".join(significant_terms) if significant_terms else "0"
+    Returns:
+    --------
+    set : Set of feature indices involved.
+    """
+    if basis_function == '1':  # Constant term involves no features
+        return set()
+    features = set()
+    for part in basis_function.split():  # Split by space for interaction terms
+        if part.startswith('x'):
+            if '^' in part:  # Handle powers, e.g., 'x0^2'
+                var = part.split('^')[0]  # Take 'x0'
+            else:
+                var = part  # Take 'x0' as is
+            idx = int(var[1:])  # Extract index, e.g., 0
+            features.add(idx)
+    return features

oikan-0.0.3.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,233 @@
+Metadata-Version: 2.4
+Name: oikan
+Version: 0.0.3.1
+Summary: OIKAN: Neuro-Symbolic ML for Scientific Discovery
+Author: Arman Zhalgasbayev
+License: MIT
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch
+Requires-Dist: numpy
+Requires-Dist: scikit-learn
+Requires-Dist: tqdm
+Dynamic: license-file
+<!-- logo in the center -->
+<div align="center">
+<img src="https://raw.githubusercontent.com/silvermete0r/oikan/main/docs/media/oikan_logo.png" alt="OIKAN Logo" width="200"/>
+<h1>OIKAN: Neuro-Symbolic ML for Scientific Discovery</h1>
+</div>
+## Overview
+OIKAN is a neuro-symbolic machine learning framework inspired by Kolmogorov-Arnold representation theorem. It combines the power of modern neural networks with techniques for extracting clear, interpretable symbolic formulas from data. OIKAN is designed to make machine learning models both accurate and Interpretable.
+[![PyPI version](https://badge.fury.io/py/oikan.svg)](https://badge.fury.io/py/oikan)
+[![PyPI Downloads per month](https://img.shields.io/pypi/dm/oikan.svg)](https://pypistats.org/packages/oikan)
+[![PyPI Total Downloads](https://static.pepy.tech/badge/oikan)](https://pepy.tech/projects/oikan)
+[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+[![GitHub issues](https://img.shields.io/github/issues/silvermete0r/OIKAN.svg)](https://github.com/silvermete0r/oikan/issues)
+[![Docs](https://img.shields.io/badge/docs-passing-brightgreen)](https://silvermete0r.github.io/oikan/)
+> **Important Disclaimer**: OIKAN is an experimental research project. It is not intended for production use or real-world applications. This framework is designed for research purposes, experimentation, and academic exploration of neuro-symbolic machine learning concepts.
+## Key Features
+- 🧠 **Neuro-Symbolic ML**: Combines neural network learning with symbolic mathematics
+- 📊 **Automatic Formula Extraction**: Generates human-readable mathematical expressions
+- 🎯 **Scikit-learn Compatible**: Familiar `.fit()` and `.predict()` interface
+- 🔬 **Research-Focused**: Designed for academic exploration and experimentation
+- 📈 **Multi-Task**: Supports both regression and classification problems
+## Scientific Foundation
+OIKAN implements a modern interpretation of the Kolmogorov-Arnold Representation Theorem through a hybrid neural architecture:
+1. **Theoretical Foundation**: The Kolmogorov-Arnold theorem states that any continuous n-dimensional function can be decomposed into a combination of single-variable functions:
+   ```
+   f(x₁,...,xₙ) = ∑(j=0 to 2n){ φⱼ( ∑(i=1 to n) ψᵢⱼ(xᵢ) ) }
+   ```
+   where φⱼ and ψᵢⱼ are continuous univariate functions.
+2. **Neural Implementation**: OIKAN uses a specialized architecture combining:
+   - Feature transformation layers with interpretable basis functions
+   - Symbolic regression for formula extraction
+   - Automatic pruning of insignificant terms
+   ```python
+   class OIKANRegressor:
+       def __init__(self, hidden_sizes=[64, 64], activation='relu',
+                    polynomial_degree=2, alpha=0.1):
+           # Neural network for learning complex patterns
+           self.neural_net = TabularNet(input_size, hidden_sizes, activation)
+           # Symbolic regression for interpretable formulas
+           self.symbolic_model = None
+   ```
+3. **Basis Functions**: Core set of interpretable transformations:
+   ```python
+   SYMBOLIC_FUNCTIONS = {
+       'linear': 'x',           # Direct relationships
+       'quadratic': 'x^2',      # Non-linear patterns
+       'interaction': 'x_i x_j', # Feature interactions
+       'higher_order': 'x^n'    # Polynomial terms
+   }
+   ```
+4. **Formula Extraction Process**:
+   - Train neural network on raw data
+   - Generate augmented samples for better coverage
+   - Perform L1-regularized symbolic regression
+   - Prune terms with coefficients below threshold
+   - Export human-readable mathematical expressions
+## Quick Start
+### Installation
+#### Method 1: Via PyPI (Recommended)
+```bash
+pip install -qU oikan
+```
+#### Method 2: Local Development
+```bash
+git clone https://github.com/silvermete0r/OIKAN.git
+cd OIKAN
+pip install -e .  # Install in development mode
+```
+### Regression Example
+```python
+from oikan.model import OIKANRegressor
+from sklearn.metrics import mean_squared_error
+# Initialize model
+model = OIKANRegressor(
+    hidden_sizes=[32, 32], # Hidden layer sizes
+    activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
+    augmentation_factor=5, # Augmentation factor for data generation
+    polynomial_degree=2, # Degree of polynomial basis functions
+    alpha=0.1, # L1 regularization strength
+    sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
+    epochs=100, # Number of training epochs
+    lr=0.001, # Learning rate
+    batch_size=32, # Batch size for training
+    verbose=True # Verbose output during training
+)
+# Fit the model
+model.fit(X_train, y_train)
+# Make predictions
+y_pred = model.predict(X_test)
+# Evaluate performance
+mse = mean_squared_error(y_test, y_pred)
+print("Mean Squared Error:", mse)
+# Get symbolic formula
+formula = model.get_formula()
+print("Symbolic Formula:", formula)
+# Get feature importances
+importances = model.feature_importances()
+print("Feature Importances:", importances)
+# Save the model (optional)
+model.save("outputs/model.json")
+# Load the model (optional)
+loaded_model = OIKANRegressor()
+loaded_model.load("outputs/model.json")
+```
+*Example of the saved symbolic formula (regression model): [outputs/california_housing_model.json](outputs/california_housing_model.json)*
+### Classification Example
+```python
+from oikan.model import OIKANClassifier
+from sklearn.metrics import accuracy_score
+# Initialize model
+model = OIKANClassifier(
+    hidden_sizes=[32, 32], # Hidden layer sizes
+    activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
+    augmentation_factor=10, # Augmentation factor for data generation
+    polynomial_degree=2, # Degree of polynomial basis functions
+    alpha=0.1, # L1 regularization strength
+    sigma=0.1, # Standard deviation of Gaussian noise for data augmentation
+    epochs=100, # # Number of training epochs
+    lr=0.001, # Learning rate
+    batch_size=32, # Batch size for training
+    verbose=True # Verbose output during training
+)
+# Fit the model
+model.fit(X_train, y_train)
+# Make predictions
+y_pred = model.predict(X_test)
+# Evaluate performance
+accuracy = model.score(X_test, y_test)
+print("Accuracy:", accuracy)
+# Get symbolic formulas for each class
+formulas = model.get_formula()
+for i, formula in enumerate(formulas):
+    print(f"Class {i} Formula:", formula)
+# Get feature importances
+importances = model.feature_importances()
+print("Feature Importances:", importances)
+# Save the model (optional)
+model.save("outputs/model.json")
+# Load the model (optional)
+loaded_model = OIKANClassifier()
+loaded_model.load("outputs/model.json")
+```
+*Example of the saved symbolic formula (classification model): [outputs/iris_model.json](outputs/iris_model.json)*
+### Architecture Diagram
+*Will be updated soon..*
+## Contributing
+We welcome contributions! Key areas of interest:
+- Model architecture improvements
+- Novel basis function implementations
+- Improved symbolic extraction algorithms
+- Real-world case studies and applications
+- Performance optimizations
+Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+## Citation
+If you use OIKAN in your research, please cite:
+```bibtex
+@software{oikan2025,
+  title = {OIKAN: Optimized Interpretable Kolmogorov-Arnold Networks},
+  author = {Zhalgasbayev, Arman},
+  year = {2025},
+  url = {https://github.com/silvermete0r/OIKAN}
+}
+```
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

oikan-0.0.3.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+oikan/__init__.py,sha256=zEzhm1GYLT4vNaIQ4CgZcNpUk3uo8SWnoaHYtHW_XSQ,628
+oikan/exceptions.py,sha256=Is0jG4apxO8QJQREIiJQYMjANYWibWeS-103q9KWbfg,192
+oikan/model.py,sha256=-LuvcljM5fqQsqwmhfol_e-_zVQzTAfq8SedQ3HYQQQ,14032
+oikan/neural.py,sha256=wxmGgzmtpwJ3lvH6u6D4i4BiAzg018czrIdw49phSCY,1558
+oikan/symbolic.py,sha256=3gtBndqFFC9ny2-PekKkUgr_t1HEpfkbk68e94yPpbI,2083
+oikan/utils.py,sha256=xMGRa1qhn8BWn9UxpVeJIuGb-UvQmbjiFSsvAdF0bMU,2095
+oikan-0.0.3.1.dist-info/licenses/LICENSE,sha256=75ASVmU-XIpN-M4LbVmJ_ibgbzbvRLVti8FhnR0BTf8,1096
+oikan-0.0.3.1.dist-info/METADATA,sha256=BAYWIvUqQ-al4TPraOnx0tx6eGSFUOvl4_Mxfxo61Qw,8335
+oikan-0.0.3.1.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+oikan-0.0.3.1.dist-info/top_level.txt,sha256=XwnwKwTJddZwIvtrUsAz-l-58BJRj6HjAGWrfYi_3QY,6
+oikan-0.0.3.1.dist-info/RECORD,,

{oikan-0.0.2.4.dist-info → oikan-0.0.3.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: setuptools (80.3.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

oikan 0.0.2.4__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

oikan 0.0.2.4py3-none-any.whl → 0.0.3.1py3-none-any.whl