PyPI - nmn - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

nmn 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

nmn/nnx/loss/__init__.py +0 -0
nmn/nnx/nmn.py +25 -14
nmn/nnx/squashers/__init__.py +9 -0
nmn/nnx/squashers/soft_tanh.py +29 -0
nmn/nnx/squashers/softer_sigmoid.py +29 -0
nmn/nnx/squashers/softermax.py +38 -0
nmn/nnx/yatattention.py +36 -110
nmn/nnx/yatconv.py +19 -2
nmn-0.1.6.dist-info/METADATA +176 -0
nmn-0.1.6.dist-info/RECORD +19 -0
nmn-0.1.4.dist-info/METADATA +0 -119
nmn-0.1.4.dist-info/RECORD +0 -14
{nmn-0.1.4.dist-info → nmn-0.1.6.dist-info}/WHEEL +0 -0
{nmn-0.1.4.dist-info → nmn-0.1.6.dist-info}/licenses/LICENSE +0 -0

nmn/nnx/loss/__init__.py ADDED Viewed

File without changes

nmn/nnx/nmn.py CHANGED Viewed

@@ -4,26 +4,18 @@ import typing as tp
 import jax
 import jax.numpy as jnp
-import numpy as np
 from jax import lax
-import opt_einsum
-from flax.core.frozen_dict import FrozenDict
 from flax import nnx
-from flax.nnx import rnglib, variablelib
-from flax.nnx.module import Module, first_from
+from flax.nnx import rnglib
+from flax.nnx.module import Module
 from flax.nnx.nn import dtypes, initializers
 from flax.typing import (
   Dtype,
-  Shape,
   Initializer,
   PrecisionLike,
   DotGeneralT,
-  ConvGeneralDilatedT,
-  PaddingLike,
-  LaxPadding,
   PromoteDtypeFn,
-  EinsumT,
 )
 Array = jax.Array
@@ -60,21 +52,26 @@ class YatNMN(Module):
     in_features: the number of input features.
     out_features: the number of output features.
     use_bias: whether to add a bias to the output (default: True).
+    use_alpha: whether to use alpha scaling (default: True).
+    use_dropconnect: whether to use DropConnect (default: False).
     dtype: the dtype of the computation (default: infer from input and params).
     param_dtype: the dtype passed to parameter initializers (default: float32).
     precision: numerical precision of the computation see ``jax.lax.Precision``
       for details.
     kernel_init: initializer function for the weight matrix.
     bias_init: initializer function for the bias.
+    alpha_init: initializer function for the alpha.
     dot_general: dot product function.
     promote_dtype: function to promote the dtype of the arrays to the desired
       dtype. The function should accept a tuple of ``(inputs, kernel, bias)``
       and a ``dtype`` keyword argument, and return a tuple of arrays with the
       promoted dtype.
+    epsilon: A small float added to the denominator to prevent division by zero.
+    drop_rate: dropout rate for DropConnect (default: 0.0).
     rngs: rng key.
   """
-  __data__ = ('kernel', 'bias')
+  __data__ = ('kernel', 'bias', 'alpha', 'dropconnect_key')
   def __init__(
     self,
@@ -83,6 +80,7 @@ class YatNMN(Module):
     *,
     use_bias: bool = True,
     use_alpha: bool = True,
+    use_dropconnect: bool = False,
     dtype: tp.Optional[Dtype] = None,
     param_dtype: Dtype = jnp.float32,
     precision: PrecisionLike = None,
@@ -91,8 +89,9 @@ class YatNMN(Module):
     alpha_init: Initializer = default_alpha_init,
     dot_general: DotGeneralT = lax.dot_general,
     promote_dtype: PromoteDtypeFn = dtypes.promote_dtype,
-    rngs: rnglib.Rngs,
     epsilon: float = 1e-5,
+    drop_rate: float = 0.0,
+    rngs: rnglib.Rngs,
   ):
     kernel_key = rngs.params()
@@ -117,6 +116,7 @@ class YatNMN(Module):
     self.out_features = out_features
     self.use_bias = use_bias
     self.use_alpha = use_alpha
+    self.use_dropconnect = use_dropconnect
     self.dtype = dtype
     self.param_dtype = param_dtype
     self.precision = precision
@@ -125,12 +125,19 @@ class YatNMN(Module):
     self.dot_general = dot_general
     self.promote_dtype = promote_dtype
     self.epsilon = epsilon
+    self.drop_rate = drop_rate
+    if use_dropconnect:
+      self.dropconnect_key = rngs.params()
+    else:
+      self.dropconnect_key = None
-  def __call__(self, inputs: Array) -> Array:
+  def __call__(self, inputs: Array, *, deterministic: bool = False) -> Array:
     """Applies a linear transformation to the inputs along the last dimension.
     Args:
       inputs: The nd-array to be transformed.
+      deterministic: If true, DropConnect is not applied (e.g., during inference).
     Returns:
       The transformed input.
@@ -139,6 +146,11 @@ class YatNMN(Module):
     bias = self.bias.value if self.bias is not None else None
     alpha = self.alpha.value if self.alpha is not None else None
+    if self.use_dropconnect and not deterministic and self.drop_rate > 0.0:
+      keep_prob = 1.0 - self.drop_rate
+      mask = jax.random.bernoulli(self.dropconnect_key, p=keep_prob, shape=kernel.shape)
+      kernel = (kernel * mask) / keep_prob
     inputs, kernel, bias, alpha = self.promote_dtype(
       (inputs, kernel, bias, alpha), dtype=self.dtype
     )
@@ -166,5 +178,4 @@ class YatNMN(Module):
       scale = (jnp.sqrt(self.out_features) / jnp.log(1 + self.out_features)) ** alpha
       y = y * scale
     return y

nmn/nnx/squashers/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .softermax import softermax
+from .softer_sigmoid import softer_sigmoid
+from .soft_tanh import soft_tanh
+__all__ = [
+    "softermax",
+    "softer_sigmoid",
+    "soft_tanh",
+]

nmn/nnx/squashers/soft_tanh.py ADDED Viewed

@@ -0,0 +1,29 @@
+import jax.numpy as jnp
+from jax import Array
+def soft_tanh(
+    x: Array,
+    n: float = 1.0,
+) -> Array:
+    """
+    Maps a non-negative score to the range [-1, 1) using the soft-tanh function.
+    The soft-tanh function is defined as:
+    .. math::
+        \\text{soft-tanh}_n(x) = \\frac{x^n - 1}{1 + x^n}
+    The power `n` again controls the transition sharpness: higher `n` makes the
+    function approach -1 more quickly for large `x`.
+    Args:
+        x (Array): A JAX array of non-negative scores (x >= 0).
+        n (float, optional): The power to raise the score to. Defaults to 1.0.
+    Returns:
+        Array: The mapped scores in the range [-1, 1).
+    """
+    if n <= 0:
+        raise ValueError("Power 'n' must be positive.")
+    x_n = jnp.power(x, n)
+    return (x_n - 1.0) / (1.0 + x_n)

nmn/nnx/squashers/softer_sigmoid.py ADDED Viewed

@@ -0,0 +1,29 @@
+import jax.numpy as jnp
+from jax import Array
+def softer_sigmoid(
+    x: Array,
+    n: float = 1.0,
+) -> Array:
+    """
+    Squashes a non-negative score into the range [0, 1) using the soft-sigmoid function.
+    The soft-sigmoid function is defined as:
+    .. math::
+        \\text{soft-sigmoid}_n(x) = \\frac{x^n}{1 + x^n}
+    The power `n` modulates the softness: higher `n` makes the function approach
+    zero faster for large `x`, while `n < 1` makes the decay slower.
+    Args:
+        x (Array): A JAX array of non-negative scores (x >= 0).
+        n (float, optional): The power to raise the score to. Defaults to 1.0.
+    Returns:
+        Array: The squashed scores in the range [0, 1).
+    """
+    if n <= 0:
+        raise ValueError("Power 'n' must be positive.")
+    x_n = jnp.power(x, n)
+    return x_n / (1.0 + x_n)

nmn/nnx/squashers/softermax.py ADDED Viewed

@@ -0,0 +1,38 @@
+import jax.numpy as jnp
+from jax import Array
+from typing import Optional
+def softermax(
+    x: Array,
+    n: float = 1.0,
+    epsilon: float = 1e-12,
+    axis: Optional[int] = -1,
+) -> Array:
+    """
+    Normalizes a set of non-negative scores using the Softermax function.
+    The Softermax function is defined as:
+    .. math::
+        \\text{softermax}_n(x_k, \\{x_i\\}) = \\frac{x_k^n}{\\epsilon + \\sum_i x_i^n}
+    The power `n` controls the sharpness of the distribution: `n=1` recovers
+    the original Softermax, while `n > 1` makes the distribution harder (more
+    peaked), and `0 < n < 1` makes it softer.
+    Args:
+        x (Array): A JAX array of non-negative scores.
+        n (float, optional): The power to raise each score to. Defaults to 1.0.
+        epsilon (float, optional): A small constant for numerical stability.
+            Defaults to 1e-12.
+        axis (Optional[int], optional): The axis to perform the sum over.
+            Defaults to -1.
+    Returns:
+        Array: The normalized scores.
+    """
+    if n <= 0:
+        raise ValueError("Power 'n' must be positive.")
+    x_n = jnp.power(x, n)
+    sum_x_n = jnp.sum(x_n, axis=axis, keepdims=True)
+    return x_n / (epsilon + sum_x_n)

nmn/nnx/yatattention.py CHANGED Viewed

@@ -26,7 +26,8 @@ from flax.typing import (
   DotGeneralT,
 )
+from nmn.nnx.nmn import YatNMN
+from jax import Array
 def yat_attention_weights(
   query: Array,
@@ -153,91 +154,6 @@ def yat_attention(
     '...hqk,...khd->...qhd', attn_weights, value, precision=precision
   )
-Array = jax.Array
-# Add YatNMN class implementation
-default_bias_init = initializers.zeros_init()
-default_alpha_init = initializers.ones_init()
-class YatNMN(Module):
-  """A linear transformation with custom distance-based computation."""
-  def __init__(
-    self,
-    in_features: int,
-    out_features: int,
-    *,
-    use_bias: bool = True,
-    use_alpha: bool = True,
-    dtype: Optional[Dtype] = None,
-    param_dtype: Dtype = jnp.float32,
-    precision: PrecisionLike = None,
-    kernel_init: Initializer = default_kernel_init,
-    bias_init: Initializer = default_bias_init,
-    alpha_init: Initializer = default_alpha_init,
-    dot_general: DotGeneralT = lax.dot_general,
-    rngs: rnglib.Rngs,
-    epsilon: float = 1e-5,
-  ):
-    kernel_key = rngs.params()
-    self.kernel = nnx.Param(
-      kernel_init(kernel_key, (in_features, out_features), param_dtype)
-    )
-    self.bias: nnx.Param[jax.Array] | None
-    if use_bias:
-      bias_key = rngs.params()
-      self.bias = nnx.Param(bias_init(bias_key, (out_features,), param_dtype))
-    else:
-      self.bias = None
-    self.alpha: nnx.Param[jax.Array] | None
-    if use_alpha:
-      alpha_key = rngs.params()
-      self.alpha = nnx.Param(alpha_init(alpha_key, (1,), param_dtype))
-    else:
-      self.alpha = None
-    self.in_features = in_features
-    self.out_features = out_features
-    self.use_bias = use_bias
-    self.use_alpha = use_alpha
-    self.dtype = dtype
-    self.param_dtype = param_dtype
-    self.precision = precision
-    self.kernel_init = kernel_init
-    self.bias_init = bias_init
-    self.dot_general = dot_general
-    self.epsilon = epsilon
-  def __call__(self, inputs: Array) -> Array:
-    """Applies YatNMN transformation to inputs."""
-    kernel = self.kernel.value
-    bias = self.bias.value if self.bias is not None else None
-    alpha = self.alpha.value if self.alpha is not None else None
-    y = self.dot_general(
-      inputs,
-      kernel,
-      (((inputs.ndim - 1,), (0,)), ((), ())),
-      precision=self.precision,
-    )
-    inputs_squared_sum = jnp.sum(inputs**2, axis=-1, keepdims=True)
-    kernel_squared_sum = jnp.sum(kernel**2, axis=0, keepdims=True)
-    distances = inputs_squared_sum + kernel_squared_sum - 2 * y
-    # Element-wise operation
-    y = y ** 2 / (distances + self.epsilon)
-    if bias is not None:
-      y += jnp.reshape(bias, (1,) * (y.ndim - 1) + (-1,))
-    if alpha is not None:
-      scale = (jnp.sqrt(self.out_features) / jnp.log(1 + self.out_features)) ** alpha
-      y = y * scale
-    return y
 def dot_product_attention_weights(
@@ -435,6 +351,10 @@ class MultiHeadAttention(Module):
     attention_fn: Callable[..., Array] = yat_attention,
     decode: bool | None = None,
     normalize_qk: bool = False,
+    use_alpha: bool = True,
+    alpha_init: Initializer = initializers.ones_init(),
+    use_dropconnect: bool = False,
+    dropconnect_rate: float = 0.0,
     # Deprecated, will be removed.
     qkv_dot_general: DotGeneralT | None = None,
     out_dot_general: DotGeneralT | None = None,
@@ -470,6 +390,10 @@ class MultiHeadAttention(Module):
     self.qkv_dot_general_cls = qkv_dot_general_cls
     self.out_dot_general_cls = out_dot_general_cls
     self.epsilon = epsilon
+    self.use_alpha = use_alpha
+    self.alpha_init = alpha_init
+    self.use_dropconnect = use_dropconnect
+    self.dropconnect_rate = dropconnect_rate
     if self.qkv_features % self.num_heads != 0:
       raise ValueError(
@@ -491,6 +415,10 @@ class MultiHeadAttention(Module):
       use_bias=self.use_bias,
       precision=self.precision,
       epsilon=self.epsilon,
+      use_alpha=self.use_alpha,
+      alpha_init=self.alpha_init,
+      use_dropconnect=self.use_dropconnect,
+      drop_rate=self.dropconnect_rate,
     )
     # project inputs_q to multi-headed q/k/v
@@ -590,10 +518,23 @@ class MultiHeadAttention(Module):
         f'but module expects {self.in_features}.'
       )
+    is_deterministic: bool = False
+    if self.dropout_rate > 0.0 or (
+      self.use_dropconnect and self.dropconnect_rate > 0.0
+    ):
+      is_deterministic = first_from(
+        deterministic,
+        self.deterministic,
+        error_msg="""No `deterministic` argument was provided to MultiHeadAttention
+          as either a __call__ argument, class attribute, or nnx.flag.""",
+      )
+    else:
+      is_deterministic = True
     # Apply YatNMN transformations and reshape to multi-head format
-    query = squash(self.query(inputs_q))
-    key = squash(self.key(inputs_k))
-    value = squash(self.value(inputs_v))
+    query = self.query(inputs_q, deterministic=is_deterministic)
+    key = self.key(inputs_k, deterministic=is_deterministic)
+    value = self.value(inputs_v, deterministic=is_deterministic)
     # Reshape from [batch..., length, qkv_features] to [batch..., length, num_heads, head_dim]
     query = query.reshape(query.shape[:-1] + (self.num_heads, self.head_dim))
@@ -660,26 +601,11 @@ class MultiHeadAttention(Module):
         ),
       )
-    if (
-      self.dropout_rate > 0.0
-    ):  # Require `deterministic` only if using dropout.
-      deterministic = first_from(
-        deterministic,
-        self.deterministic,
-        error_msg="""No `deterministic` argument was provided to MultiHeadAttention
-          as either a __call__ argument, class attribute, or nnx.flag.""",
-      )
-      if not deterministic:
-        if rngs is None:
-          raise ValueError(
-            "'rngs' must be provided if 'dropout_rng' is not given."
-          )
-        dropout_rng = rngs.dropout()
-      else:
-        dropout_rng = None
-    else:
-      deterministic = True
-      dropout_rng = None
+    dropout_rng = None
+    if self.dropout_rate > 0.0 and not is_deterministic:
+      if rngs is None:
+        raise ValueError("'rngs' must be provided for dropout.")
+      dropout_rng = rngs.dropout()
     # apply attention with epsilon parameter for YatNMN
     x = self.attention_fn(
@@ -690,7 +616,7 @@ class MultiHeadAttention(Module):
       dropout_rng=dropout_rng,
       dropout_rate=self.dropout_rate,
       broadcast_dropout=self.broadcast_dropout,
-      deterministic=deterministic,
+      deterministic=is_deterministic,
       dtype=self.dtype,
       precision=self.precision,
       module=self if sow_weights else None,

nmn/nnx/yatconv.py CHANGED Viewed

@@ -110,6 +110,8 @@ class YatConv(Module):
     feature_group_count: integer, default 1. If specified divides the input
       features into groups.
     use_bias: whether to add a bias to the output (default: True).
+    use_alpha: whether to use alpha scaling (default: True).
+    use_dropconnect: whether to use DropConnect (default: False).
     mask: Optional mask for the weights during masked convolution. The mask must
           be the same shape as the convolution weight matrix.
     dtype: the dtype of the computation (default: infer from input and params).
@@ -123,10 +125,11 @@ class YatConv(Module):
       and a ``dtype`` keyword argument, and return a tuple of arrays with the
       promoted dtype.
     epsilon: A small float added to the denominator to prevent division by zero.
+    drop_rate: dropout rate for DropConnect (default: 0.0).
     rngs: rng key.
   """
-  __data__ = ('kernel', 'bias', 'mask')
+  __data__ = ('kernel', 'bias', 'mask', 'dropconnect_key')
   def __init__(
     self,
@@ -142,6 +145,7 @@ class YatConv(Module):
     use_bias: bool = True,
     use_alpha: bool = True,
+    use_dropconnect: bool = False,
     kernel_init: Initializer = default_kernel_init,
     bias_init: Initializer = default_bias_init,
     alpha_init: Initializer = default_alpha_init,
@@ -153,6 +157,7 @@ class YatConv(Module):
     conv_general_dilated: ConvGeneralDilatedT = lax.conv_general_dilated,
     promote_dtype: PromoteDtypeFn = dtypes.promote_dtype,
     epsilon: float = 1e-5,
+    drop_rate: float = 0.0,
     rngs: rnglib.Rngs,
   ):
     if isinstance(kernel_size, int):
@@ -185,6 +190,7 @@ class YatConv(Module):
     self.feature_group_count = feature_group_count
     self.use_bias = use_bias
     self.use_alpha = use_alpha
+    self.use_dropconnect = use_dropconnect
     self.mask = mask
     self.dtype = dtype
@@ -195,6 +201,7 @@ class YatConv(Module):
     self.conv_general_dilated = conv_general_dilated
     self.promote_dtype = promote_dtype
     self.epsilon = epsilon
+    self.drop_rate = drop_rate
     if use_alpha:
       alpha_key = rngs.params()
@@ -202,8 +209,12 @@ class YatConv(Module):
     else:
       self.alpha = None
+    if use_dropconnect:
+      self.dropconnect_key = rngs.params()
+    else:
+      self.dropconnect_key = None
-  def __call__(self, inputs: Array) -> Array:
+  def __call__(self, inputs: Array, *, deterministic: bool = False) -> Array:
     assert isinstance(self.kernel_size, tuple)
     def maybe_broadcast(
@@ -261,6 +272,12 @@ class YatConv(Module):
     kernel_val = self.kernel.value
+    # Apply DropConnect if enabled and not in deterministic mode
+    if self.use_dropconnect and not deterministic and self.drop_rate > 0.0:
+      keep_prob = 1.0 - self.drop_rate
+      mask = jax.random.bernoulli(self.dropconnect_key, p=keep_prob, shape=kernel_val.shape)
+      kernel_val = (kernel_val * mask) / keep_prob
     current_mask = self.mask
     if current_mask is not None:
       if current_mask.shape != self.kernel_shape:

nmn-0.1.6.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,176 @@
+Metadata-Version: 2.4
+Name: nmn
+Version: 0.1.6
+Summary: a neuron that matter
+Project-URL: Homepage, https://github.com/mlnomadpy/nmn
+Project-URL: Bug Tracker, https://github.com/mlnomadpy/my_package/issues
+Author-email: Taha Bouhsine <yat@mlnomads.com>
+License-File: LICENSE
+Classifier: License :: OSI Approved :: GNU Affero General Public License v3
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+# nmn
+Not the neurons we want, but the neurons we need
+[![PyPI version](https://img.shields.io/pypi/v/nmn.svg)](https://pypi.org/project/nmn/)
+[![Downloads](https://static.pepy.tech/badge/nmn)](https://pepy.tech/project/nmn)
+[![Downloads/month](https://static.pepy.tech/badge/nmn/month)](https://pepy.tech/project/nmn)
+[![GitHub stars](https://img.shields.io/github/stars/mlnomadpy/nmn?style=social)](https://github.com/mlnomadpy/nmn)
+[![GitHub forks](https://img.shields.io/github/forks/mlnomadpy/nmn?style=social)](https://github.com/mlnomadpy/nmn)
+[![GitHub issues](https://img.shields.io/github/issues/mlnomadpy/nmn)](https://github.com/mlnomadpy/nmn/issues)
+[![PyPI - License](https://img.shields.io/pypi/l/nmn)](https://pypi.org/project/nmn/)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/nmn)](https://pypi.org/project/nmn/)
+## Features
+*   **Activation-Free Non-linearity:** Learns complex, non-linear relationships without separate activation functions.
+*   **Multiple Frameworks:** Supports Flax (Linen & NNX), Keras, PyTorch, and TensorFlow.
+*   **Yat-Product & Yat-Conv:** Implements novel Yat-Product and Yat-Conv operations.
+*   **Inspired by Research:** Based on the principles from "Deep Learning 2.0/2.1: Artificial Neurons that Matter".
+## Overview
+**nmn** provides neural network layers for multiple frameworks (Flax, NNX, Keras, PyTorch, TensorFlow) that do not require activation functions to learn non-linearity. The main goal is to enable deep learning architectures where the layer itself is inherently non-linear, inspired by the papers:
+> Deep Learning 2.0: Artificial Neurons that Matter: Reject Correlation - Embrace Orthogonality
+>
+> Deep Learning 2.1: Deep Learning 2.1: Mind and Cosmos - Towards Cosmos-Inspired Interpretable Neural Networks
+## Math
+Yat-Product:
+$$
+ⵟ(\mathbf{w},\mathbf{x}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{\|\mathbf{w}\|^2 - 2\mathbf{w}^\top\mathbf{x} + \|\mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{((\mathbf{x}-\mathbf{w})\cdot(\mathbf{x}-\mathbf{w}))^2 + \epsilon}.
+$$
+**Explanation:**
+- $\mathbf{w}$ is the weight vector, $\mathbf{x}$ is the input vector.
+- $\langle \mathbf{w}, \mathbf{x} \rangle$ is the dot product between $\mathbf{w}$ and $\mathbf{x}$.
+- $\|\mathbf{w} - \mathbf{x}\|^2$ is the squared Euclidean distance between $\mathbf{w}$ and $\mathbf{x}$.
+- $\epsilon$ is a small constant for numerical stability.
+- $\theta$ is the angle between $\mathbf{w}$ and $\mathbf{x}$.
+This operation:
+- **Numerator:** Squares the similarity (dot product) between $\mathbf{w}$ and $\mathbf{x}$, emphasizing strong alignments.
+- **Denominator:** Penalizes large distances, so the response is high only when $\mathbf{w}$ and $\mathbf{x}$ are both similar in direction and close in space.
+- **No activation needed:** The non-linearity is built into the operation itself, allowing the layer to learn complex, non-linear relationships without a separate activation function.
+- **Geometric view:** The output is maximized when $\mathbf{w}$ and $\mathbf{x}$ are both large in norm, closely aligned (small $\theta$), and close together in Euclidean space.
+Yat-Conv:
+$$
+ⵟ^*(\mathbf{W}, \mathbf{X}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon}
+= \frac{\left(\sum_{i,j} w_{ij} x_{ij}\right)^2}{\sum_{i,j} (w_{ij} - x_{ij})^2 + \epsilon}
+$$
+Where:
+- $\mathbf{W}$ and $\mathbf{X}$ are local patches (e.g., kernel and input patch in convolution)
+- $w_{ij}$ and $x_{ij}$ are elements of the kernel and input patch, respectively
+- $\epsilon$ is a small constant for numerical stability
+This generalizes the Yat-product to convolutional (patch-wise) operations.
+## Supported Frameworks & API
+The `YatNMN` layer (for dense operations) and `YatConv` (for convolutional operations) are the core components. Below is a summary of their availability and features per framework:
+| Framework      | `YatNMN` Path                 | `YatConv` Path                | Core Layer | DropConnect | Ternary Network | Recurrent Layer |
+|----------------|-------------------------------|-------------------------------|------------|-------------|-----------------|-----------------|
+| **Flax (Linen)** | `src/nmn/linen/nmn.py`        | (Available)                   | ✅         |             |                 | 🚧              |
+| **Flax (NNX)**   | `src/nmn/nnx/nmn.py`          | `src/nmn/nnx/yatconv.py`      | ✅         | ✅          | 🚧              | 🚧              |
+| **Keras**      | `src/nmn/keras/nmn.py`        | (Available)                   | ✅         |             |                 | 🚧              |
+| **PyTorch**    | `src/nmn/torch/nmn.py`        | (Available)                   | ✅         |             |                 | 🚧              |
+| **TensorFlow** | `src/nmn/tf/nmn.py`           | (Available)                   | ✅         |             |                 | 🚧              |
+*Legend: ✅ Implemented, 🚧 To be implemented / In Progress, (Available) - Assumed available if NMN is, specific path might vary or be part of the NMN module.*
+## Installation
+```bash
+pip install nmn
+```
+## Usage Example (Flax NNX)
+```python
+import jax
+import jax.numpy as jnp
+from flax import nnx
+from nmn.nnx.nmn import YatNMN
+from nmn.nnx.yatconv import YatConv
+# Example YatNMN (Dense Layer)
+model_key, param_key, drop_key, input_key = jax.random.split(jax.random.key(0), 4)
+in_features, out_features = 3, 4
+layer = YatNMN(in_features=in_features, out_features=out_features, rngs=nnx.Rngs(params=param_key, dropout=drop_key))
+dummy_input = jax.random.normal(input_key, (2, in_features)) # Batch size 2
+output = layer(dummy_input)
+print("YatNMN Output Shape:", output.shape)
+# Example YatConv (Convolutional Layer)
+conv_key, conv_param_key, conv_input_key = jax.random.split(jax.random.key(1), 3)
+in_channels, out_channels = 3, 8
+kernel_size = (3, 3)
+conv_layer = YatConv(
+    in_features=in_channels,
+    out_features=out_channels,
+    kernel_size=kernel_size,
+    rngs=nnx.Rngs(params=conv_param_key)
+)
+dummy_conv_input = jax.random.normal(conv_input_key, (1, 28, 28, in_channels)) # Batch 1, 28x28 image, in_channels
+conv_output = conv_layer(dummy_conv_input)
+print("YatConv Output Shape:", conv_output.shape)
+```
+*Note: Examples for other frameworks (Keras, PyTorch, TensorFlow, Flax Linen) can be found in their respective `nmn.<framework>` modules and upcoming documentation.*
+## Roadmap
+-   [ ] Implement recurrent layers (`YatRNN`, `YatLSTM`, `YatGRU`) for all supported frameworks.
+-   [ ] Develop Ternary Network versions of Yat layers for NNX.
+-   [ ] Add more comprehensive examples and benchmark scripts for various tasks (vision, language).
+-   [ ] Publish detailed documentation and API references.
+-   [ ] Conduct and publish thorough performance benchmarks against traditional layers.
+## Contributing
+Contributions are welcome! If you'd like to contribute, please feel free to:
+-   Open an issue on the [Bug Tracker](https://github.com/mlnomadpy/nmn/issues) to report bugs or suggest features.
+-   Submit a pull request with your improvements.
+-   Help expand the documentation or add more examples.
+## License
+This project is licensed under the **GNU Affero General Public License v3**. See the [LICENSE](LICENSE) file for details.
+## Citation
+If you use `nmn` in your research, please consider citing the original papers that inspired this work:
+> Deep Learning 2.0: Artificial Neurons that Matter: Reject Correlation - Embrace Orthogonality
+>
+> Deep Learning 2.1: Mind and Cosmos - Towards Cosmos-Inspired Interpretable Neural Networks
+A BibTeX entry will be provided once the accompanying paper for this library is published.
+## Citing
+If you use this work, please cite the paper:
+```bibtex
+@article{taha2024dl2,
+  author    = {Taha Bouhsine},
+  title     = {Deep Learning 2.0: Artificial Neurons that Matter: Reject Correlation - Embrace Orthogonality},
+}
+```
+```bibtex
+@article{taha2025dl2,
+  author    = {Taha Bouhsine},
+  title     = {Deep Learning 2.1: Mind and Cosmos - Towards Cosmos-Inspired Interpretable Neural Networks},
+}
+```

nmn-0.1.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,19 @@
+nmn/__init__.py,sha256=F_5o-lCggdEdWfR1l1YC_jfR01mJmveugwUndoRx8n8,83
+nmn/keras/nmn.py,sha256=E7V7kyFB09PfMG1Da_TA2FirOiTCeAXYp3JWACV8h_c,5908
+nmn/linen/nmn.py,sha256=j4v6Z793wliE0xEAITde7jXu9Qras9u75NqdOSPSM4Q,3722
+nmn/nnx/nmn.py,sha256=tPNUtF8Lmv_B1TgMoVXfMQ9x0IPGKjSyAP6HnZ-YBsM,5651
+nmn/nnx/yatattention.py,sha256=i6XfCGHISyb2P6KrgYFnhhdzqSTWAyshFhy1XEeuEWc,24642
+nmn/nnx/yatconv.py,sha256=EOAAWfuv5QA-QTru-JyYKYNoGqxcklu7ph9a-CtmYsA,13123
+nmn/nnx/examples/language/mingpt.py,sha256=RveY3NwriTGPBdj8HNKDNtnXMaH0pgux8554m4Bhho4,61080
+nmn/nnx/examples/vision/cnn_cifar.py,sha256=UcK52-SCwuE2hl2BkpEbyg7N3Jwvvz8iFxiqhI7B9ew,73961
+nmn/nnx/loss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nmn/nnx/squashers/__init__.py,sha256=zXYPa3yzqMXxkIPvNHiaV6pcZRDOdVrzaVdYVDGALTY,180
+nmn/nnx/squashers/soft_tanh.py,sha256=WSJkxD6L9WU1eqPwsK2AW4V6OJbw5pSWYjKwkiWtLdo,812
+nmn/nnx/squashers/softer_sigmoid.py,sha256=vE6IWorZdBb2cww6fskARnwzdjTcWB2kKohuaJWVGNs,845
+nmn/nnx/squashers/softermax.py,sha256=NfxEDbogLUysyTvtVCTpDt27PplYvKRQLTZbYCL-Wfg,1226
+nmn/tf/nmn.py,sha256=A-K65z9_aN62tAy12b0553nXxrzOofK1umGMRGJYjqw,6036
+nmn/torch/nmn.py,sha256=8K0S3nwpGprT7apbCqpaYpKpxq8F8g8EL8PHIezgMCY,4658
+nmn-0.1.6.dist-info/METADATA,sha256=Y9MByC16wz1MGYVZRmZA0wJQATB6Kj6w6TOL5lPzl0Q,8800
+nmn-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+nmn-0.1.6.dist-info/licenses/LICENSE,sha256=kbZSd5WewnN2PSjvAC6DprP7pXx6NUNsnltmU2Mz1yA,34519
+nmn-0.1.6.dist-info/RECORD,,

nmn-0.1.4.dist-info/METADATA DELETED Viewed

@@ -1,119 +0,0 @@
-Metadata-Version: 2.4
-Name: nmn
-Version: 0.1.4
-Summary: a neuron that matter
-Project-URL: Homepage, https://github.com/mlnomadpy/nmn
-Project-URL: Bug Tracker, https://github.com/mlnomadpy/my_package/issues
-Author-email: Taha Bouhsine <yat@mlnomads.com>
-License-File: LICENSE
-Classifier: License :: OSI Approved :: GNU Affero General Public License v3
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-# nmn
-Not the neurons we want, but the neurons we need
-[![PyPI version](https://img.shields.io/pypi/v/nmn.svg)](https://pypi.org/project/nmn/)
-[![Downloads](https://static.pepy.tech/badge/nmn)](https://pepy.tech/project/nmn)
-[![Downloads/month](https://static.pepy.tech/badge/nmn/month)](https://pepy.tech/project/nmn)
-[![GitHub stars](https://img.shields.io/github/stars/mlnomadpy/nmn?style=social)](https://github.com/mlnomadpy/nmn)
-[![GitHub forks](https://img.shields.io/github/forks/mlnomadpy/nmn?style=social)](https://github.com/mlnomadpy/nmn)
-[![GitHub issues](https://img.shields.io/github/issues/mlnomadpy/nmn)](https://github.com/mlnomadpy/nmn/issues)
-[![PyPI - License](https://img.shields.io/pypi/l/nmn)](https://pypi.org/project/nmn/)
-[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/nmn)](https://pypi.org/project/nmn/)
-## Overview
-**nmn** provides neural network layers for multiple frameworks (Flax, NNX, Keras, PyTorch, TensorFlow) that do not require activation functions to learn non-linearity. The main goal is to enable deep learning architectures where the layer itself is inherently non-linear, inspired by the paper:
-> Deep Learning 2.0: Artificial Neurons that Matter: Reject Correlation - Embrace Orthogonality
-## Math
-Yat-Product:
-$$
-ⵟ(\mathbf{w},\mathbf{x}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{\|\mathbf{w}\|^2 - 2\mathbf{w}^\top\mathbf{x} + \|\mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{((\mathbf{x}-\mathbf{w})\cdot(\mathbf{x}-\mathbf{w}))^2 + \epsilon}.
-$$
-**Explanation:**
-- $\mathbf{w}$ is the weight vector, $\mathbf{x}$ is the input vector.
-- $\langle \mathbf{w}, \mathbf{x} \rangle$ is the dot product between $\mathbf{w}$ and $\mathbf{x}$.
-- $\|\mathbf{w} - \mathbf{x}\|^2$ is the squared Euclidean distance between $\mathbf{w}$ and $\mathbf{x}$.
-- $\epsilon$ is a small constant for numerical stability.
-- $\theta$ is the angle between $\mathbf{w}$ and $\mathbf{x}$.
-This operation:
-- **Numerator:** Squares the similarity (dot product) between $\mathbf{w}$ and $\mathbf{x}$, emphasizing strong alignments.
-- **Denominator:** Penalizes large distances, so the response is high only when $\mathbf{w}$ and $\mathbf{x}$ are both similar in direction and close in space.
-- **No activation needed:** The non-linearity is built into the operation itself, allowing the layer to learn complex, non-linear relationships without a separate activation function.
-- **Geometric view:** The output is maximized when $\mathbf{w}$ and $\mathbf{x}$ are both large in norm, closely aligned (small $\theta$), and close together in Euclidean space.
-Yat-Conv:
-$$
-ⵟ^*(\mathbf{W}, \mathbf{X}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon}
-= \frac{\left(\sum_{i,j} w_{ij} x_{ij}\right)^2}{\sum_{i,j} (w_{ij} - x_{ij})^2 + \epsilon}
-$$
-Where:
-- $\mathbf{W}$ and $\mathbf{X}$ are local patches (e.g., kernel and input patch in convolution)
-- $w_{ij}$ and $x_{ij}$ are elements of the kernel and input patch, respectively
-- $\epsilon$ is a small constant for numerical stability
-This generalizes the Yat-product to convolutional (patch-wise) operations.
-## Supported Frameworks & Tasks
-### Flax (JAX)
-- `YatNMN` layer implemented in `src/nmn/linen/nmn.py`
-- **Tasks:**
-  - [x] Core layer implementation
-  - [ ] Recurrent layer (to be implemented)
-### NNX (Flax NNX)
-- `YatNMN` layer implemented in `src/nmn/nnx/nmn.py`
-- **Tasks:**
-  - [x] Core layer implementation
-  - [ ] Recurrent layer (to be implemented)
-### Keras
-- `YatNMN` layer implemented in `src/nmn/keras/nmn.py`
-- **Tasks:**
-  - [x] Core layer implementation
-  - [ ] Recurrent layer (to be implemented)
-### PyTorch
-- `YatNMN` layer implemented in `src/nmn/torch/nmn.py`
-- **Tasks:**
-  - [x] Core layer implementation
-  - [ ] Recurrent layer (to be implemented)
-### TensorFlow
-- `YatNMN` layer implemented in `src/nmn/tf/nmn.py`
-- **Tasks:**
-  - [x] Core layer implementation
-  - [ ] Recurrent layer (to be implemented)
-## Installation
-```bash
-pip install nmn
-```
-## Usage Example (Flax)
-```python
-from nmn.nnx.nmn import YatNMN
-from nmn.nnx.yatconv import YatConv
-# ... use as a Flax module ...
-```
-## Roadmap
-- [ ] Implement recurrent layers for all frameworks
-- [ ] Add more examples and benchmarks
-- [ ] Improve documentation and API consistency
-## License
-GNU Affero General Public License v3

nmn-0.1.4.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-nmn/__init__.py,sha256=F_5o-lCggdEdWfR1l1YC_jfR01mJmveugwUndoRx8n8,83
-nmn/keras/nmn.py,sha256=E7V7kyFB09PfMG1Da_TA2FirOiTCeAXYp3JWACV8h_c,5908
-nmn/linen/nmn.py,sha256=j4v6Z793wliE0xEAITde7jXu9Qras9u75NqdOSPSM4Q,3722
-nmn/nnx/nmn.py,sha256=gWe8EL-aUm7be03M9O5R3XdBb92EpBEFsylrY6BA60c,4871
-nmn/nnx/yatattention.py,sha256=chjtUKJtaR7ROPnNqkicbvMs7hzZKE0fIo_8cTNiju8,26601
-nmn/nnx/yatconv.py,sha256=xUH9NBY1fIDZeTA9GdgmqR_DJiQJgwU2uDrgxqirKmU,12308
-nmn/nnx/examples/language/mingpt.py,sha256=RveY3NwriTGPBdj8HNKDNtnXMaH0pgux8554m4Bhho4,61080
-nmn/nnx/examples/vision/cnn_cifar.py,sha256=UcK52-SCwuE2hl2BkpEbyg7N3Jwvvz8iFxiqhI7B9ew,73961
-nmn/tf/nmn.py,sha256=A-K65z9_aN62tAy12b0553nXxrzOofK1umGMRGJYjqw,6036
-nmn/torch/nmn.py,sha256=8K0S3nwpGprT7apbCqpaYpKpxq8F8g8EL8PHIezgMCY,4658
-nmn-0.1.4.dist-info/METADATA,sha256=k28p055Dr6WWVQcb01uinFRiT5R-CAvdKz33fqZ85g4,5032
-nmn-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-nmn-0.1.4.dist-info/licenses/LICENSE,sha256=kbZSd5WewnN2PSjvAC6DprP7pXx6NUNsnltmU2Mz1yA,34519
-nmn-0.1.4.dist-info/RECORD,,

{nmn-0.1.4.dist-info → nmn-0.1.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{nmn-0.1.4.dist-info → nmn-0.1.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

nmn 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

nmn 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl