PyPI - nmn - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

nmn 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

nmn/nnx/examples/language/mingpt.py +1650 -0
nmn/nnx/examples/vision/cnn_cifar.py +1769 -0
nmn/nnx/nmn.py +1 -1
nmn/nnx/yatattention.py +764 -0
nmn/nnx/yatconv.py +22 -2
nmn/torch/nmn.py +2 -1
{nmn-0.1.2.dist-info → nmn-0.1.4.dist-info}/METADATA +2 -2
nmn-0.1.4.dist-info/RECORD +14 -0
nmn-0.1.2.dist-info/RECORD +0 -11
{nmn-0.1.2.dist-info → nmn-0.1.4.dist-info}/WHEEL +0 -0
{nmn-0.1.2.dist-info → nmn-0.1.4.dist-info}/licenses/LICENSE +0 -0

nmn/nnx/yatconv.py CHANGED Viewed

@@ -24,6 +24,7 @@ Array = jax.Array
 # Default initializers
 default_kernel_init = initializers.lecun_normal()
 default_bias_init = initializers.zeros_init()
+default_alpha_init = initializers.ones_init()
 # Helper functions
 def canonicalize_padding(padding: PaddingLike, rank: int) -> LaxPadding:
@@ -138,13 +139,17 @@ class YatConv(Module):
     input_dilation: tp.Union[None, int, tp.Sequence[int]] = 1,
     kernel_dilation: tp.Union[None, int, tp.Sequence[int]] = 1,
     feature_group_count: int = 1,
     use_bias: bool = True,
+    use_alpha: bool = True,
+    kernel_init: Initializer = default_kernel_init,
+    bias_init: Initializer = default_bias_init,
+    alpha_init: Initializer = default_alpha_init,
     mask: tp.Optional[Array] = None,
     dtype: tp.Optional[Dtype] = None,
     param_dtype: Dtype = jnp.float32,
     precision: PrecisionLike = None,
-    kernel_init: Initializer = default_kernel_init,
-    bias_init: Initializer = default_bias_init,
     conv_general_dilated: ConvGeneralDilatedT = lax.conv_general_dilated,
     promote_dtype: PromoteDtypeFn = dtypes.promote_dtype,
     epsilon: float = 1e-5,
@@ -179,6 +184,8 @@ class YatConv(Module):
     self.kernel_dilation = kernel_dilation
     self.feature_group_count = feature_group_count
     self.use_bias = use_bias
+    self.use_alpha = use_alpha
     self.mask = mask
     self.dtype = dtype
     self.param_dtype = param_dtype
@@ -189,6 +196,13 @@ class YatConv(Module):
     self.promote_dtype = promote_dtype
     self.epsilon = epsilon
+    if use_alpha:
+      alpha_key = rngs.params()
+      self.alpha = nnx.Param(alpha_init(alpha_key, (1,), param_dtype))
+    else:
+      self.alpha = None
   def __call__(self, inputs: Array) -> Array:
     assert isinstance(self.kernel_size, tuple)
@@ -257,6 +271,7 @@ class YatConv(Module):
       kernel_val *= current_mask
     bias_val = self.bias.value if self.bias is not None else None
+    alpha = self.alpha.value if self.alpha is not None else None
     inputs_promoted, kernel_promoted, bias_promoted = self.promote_dtype(
       (inputs_flat, kernel_val, bias_val), dtype=self.dtype
@@ -314,6 +329,11 @@ class YatConv(Module):
       bias_reshape_dims = (1,) * (y.ndim - 1) + (-1,)
       y += jnp.reshape(bias_val, bias_reshape_dims)
+    assert self.use_alpha == (alpha is not None)
+    if alpha is not None:
+      scale = (jnp.sqrt(self.out_features) / jnp.log(1 + self.out_features)) ** alpha
+      y = y * scale
     if num_batch_dimensions != 1:
       output_shape = input_batch_shape + y.shape[1:]
       y = jnp.reshape(y, output_shape)

nmn/torch/nmn.py CHANGED Viewed

@@ -10,7 +10,8 @@ class YatNMN(nn.Module):
     Attributes:
         in_features (int): Size of each input sample
         out_features (int): Size of each output sample
-        use_bias (bool): Whether to add a bias to the output
+        bias (bool): Whether to add a bias to the output
+        alpha (bool): Whether to multiply with alpha
         dtype (torch.dtype): Data type for computation
         epsilon (float): Small constant to avoid division by zero
         kernel_init (callable): Initializer for the weight matrix

{nmn-0.1.2.dist-info → nmn-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nmn
-Version: 0.1.2
+Version: 0.1.4
 Summary: a neuron that matter
 Project-URL: Homepage, https://github.com/mlnomadpy/nmn
 Project-URL: Bug Tracker, https://github.com/mlnomadpy/my_package/issues
@@ -34,7 +34,7 @@ Not the neurons we want, but the neurons we need
 Yat-Product:
 $$
-ⵟ(\mathbf{w},\mathbf{x}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{\|\mathbf{w}\|^2 - 2\mathbf{w}^\top\mathbf{x} + \|\mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{((\mathbf{x}-\mathbf{w})\cdot(\mathbf{x}-\mathbf{w})) + \epsilon}.
+ⵟ(\mathbf{w},\mathbf{x}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{\|\mathbf{w}\|^2 - 2\mathbf{w}^\top\mathbf{x} + \|\mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2  \|\mathbf{w}\|^2 \cos^2 \theta}{((\mathbf{x}-\mathbf{w})\cdot(\mathbf{x}-\mathbf{w}))^2 + \epsilon}.
 $$
 **Explanation:**

nmn-0.1.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+nmn/__init__.py,sha256=F_5o-lCggdEdWfR1l1YC_jfR01mJmveugwUndoRx8n8,83
+nmn/keras/nmn.py,sha256=E7V7kyFB09PfMG1Da_TA2FirOiTCeAXYp3JWACV8h_c,5908
+nmn/linen/nmn.py,sha256=j4v6Z793wliE0xEAITde7jXu9Qras9u75NqdOSPSM4Q,3722
+nmn/nnx/nmn.py,sha256=gWe8EL-aUm7be03M9O5R3XdBb92EpBEFsylrY6BA60c,4871
+nmn/nnx/yatattention.py,sha256=chjtUKJtaR7ROPnNqkicbvMs7hzZKE0fIo_8cTNiju8,26601
+nmn/nnx/yatconv.py,sha256=xUH9NBY1fIDZeTA9GdgmqR_DJiQJgwU2uDrgxqirKmU,12308
+nmn/nnx/examples/language/mingpt.py,sha256=RveY3NwriTGPBdj8HNKDNtnXMaH0pgux8554m4Bhho4,61080
+nmn/nnx/examples/vision/cnn_cifar.py,sha256=UcK52-SCwuE2hl2BkpEbyg7N3Jwvvz8iFxiqhI7B9ew,73961
+nmn/tf/nmn.py,sha256=A-K65z9_aN62tAy12b0553nXxrzOofK1umGMRGJYjqw,6036
+nmn/torch/nmn.py,sha256=8K0S3nwpGprT7apbCqpaYpKpxq8F8g8EL8PHIezgMCY,4658
+nmn-0.1.4.dist-info/METADATA,sha256=k28p055Dr6WWVQcb01uinFRiT5R-CAvdKz33fqZ85g4,5032
+nmn-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+nmn-0.1.4.dist-info/licenses/LICENSE,sha256=kbZSd5WewnN2PSjvAC6DprP7pXx6NUNsnltmU2Mz1yA,34519
+nmn-0.1.4.dist-info/RECORD,,

nmn-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-nmn/__init__.py,sha256=F_5o-lCggdEdWfR1l1YC_jfR01mJmveugwUndoRx8n8,83
-nmn/keras/nmn.py,sha256=E7V7kyFB09PfMG1Da_TA2FirOiTCeAXYp3JWACV8h_c,5908
-nmn/linen/nmn.py,sha256=j4v6Z793wliE0xEAITde7jXu9Qras9u75NqdOSPSM4Q,3722
-nmn/nnx/nmn.py,sha256=hZDgMnGnSnBSqMbk-z7qUt8QsHEM-2o6CVWacXZfz3E,4870
-nmn/nnx/yatconv.py,sha256=EZx6g-KcuwrPNEVPl8YdQ16ZXkly_m0XvYCIoWVwFc0,11742
-nmn/tf/nmn.py,sha256=A-K65z9_aN62tAy12b0553nXxrzOofK1umGMRGJYjqw,6036
-nmn/torch/nmn.py,sha256=qOFOlH4_pCOQr_4ctGpEbnW3DAGQotijDTKu5aIEXaE,4609
-nmn-0.1.2.dist-info/METADATA,sha256=MxRIZIm8TIcvUAyW-5gYBu88g4hF-upahr3e2tfrWE8,5030
-nmn-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-nmn-0.1.2.dist-info/licenses/LICENSE,sha256=kbZSd5WewnN2PSjvAC6DprP7pXx6NUNsnltmU2Mz1yA,34519
-nmn-0.1.2.dist-info/RECORD,,

{nmn-0.1.2.dist-info → nmn-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{nmn-0.1.2.dist-info → nmn-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

nmn 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

nmn 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl