nmn 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nmn
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: a neuron that matter
5
5
  Project-URL: Homepage, https://github.com/mlnomadpy/nmn
6
6
  Project-URL: Bug Tracker, https://github.com/mlnomadpy/my_package/issues
@@ -30,6 +30,40 @@ Not the neurons we want, but the neurons we need
30
30
 
31
31
  > Deep Learning 2.0: Artificial Neurons that Matter: Reject Correlation - Embrace Orthogonality
32
32
 
33
+ ## Math
34
+
35
+ Yat-Product:
36
+ $$
37
+ ⵟ(\mathbf{w},\mathbf{x}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2 \|\mathbf{w}\|^2 \cos^2 \theta}{\|\mathbf{w}\|^2 - 2\mathbf{w}^\top\mathbf{x} + \|\mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2 \|\mathbf{w}\|^2 \cos^2 \theta}{((\mathbf{x}-\mathbf{w})\cdot(\mathbf{x}-\mathbf{w})) + \epsilon}.
38
+ $$
39
+
40
+ **Explanation:**
41
+ - $\mathbf{w}$ is the weight vector, $\mathbf{x}$ is the input vector.
42
+ - $\langle \mathbf{w}, \mathbf{x} \rangle$ is the dot product between $\mathbf{w}$ and $\mathbf{x}$.
43
+ - $\|\mathbf{w} - \mathbf{x}\|^2$ is the squared Euclidean distance between $\mathbf{w}$ and $\mathbf{x}$.
44
+ - $\epsilon$ is a small constant for numerical stability.
45
+ - $\theta$ is the angle between $\mathbf{w}$ and $\mathbf{x}$.
46
+
47
+ This operation:
48
+ - **Numerator:** Squares the similarity (dot product) between $\mathbf{w}$ and $\mathbf{x}$, emphasizing strong alignments.
49
+ - **Denominator:** Penalizes large distances, so the response is high only when $\mathbf{w}$ and $\mathbf{x}$ are both similar in direction and close in space.
50
+ - **No activation needed:** The non-linearity is built into the operation itself, allowing the layer to learn complex, non-linear relationships without a separate activation function.
51
+ - **Geometric view:** The output is maximized when $\mathbf{w}$ and $\mathbf{x}$ are both large in norm, closely aligned (small $\theta$), and close together in Euclidean space.
52
+
53
+ Yat-Conv:
54
+ $$
55
+ ⵟ^*(\mathbf{W}, \mathbf{X}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon}
56
+ = \frac{\left(\sum_{i,j} w_{ij} x_{ij}\right)^2}{\sum_{i,j} (w_{ij} - x_{ij})^2 + \epsilon}
57
+ $$
58
+
59
+ Where:
60
+ - $\mathbf{W}$ and $\mathbf{X}$ are local patches (e.g., kernel and input patch in convolution)
61
+ - $w_{ij}$ and $x_{ij}$ are elements of the kernel and input patch, respectively
62
+ - $\epsilon$ is a small constant for numerical stability
63
+
64
+ This generalizes the Yat-product to convolutional (patch-wise) operations.
65
+
66
+
33
67
  ## Supported Frameworks & Tasks
34
68
 
35
69
  ### Flax (JAX)
@@ -16,6 +16,40 @@ Not the neurons we want, but the neurons we need
16
16
 
17
17
  > Deep Learning 2.0: Artificial Neurons that Matter: Reject Correlation - Embrace Orthogonality
18
18
 
19
+ ## Math
20
+
21
+ Yat-Product:
22
+ $$
23
+ ⵟ(\mathbf{w},\mathbf{x}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2 \|\mathbf{w}\|^2 \cos^2 \theta}{\|\mathbf{w}\|^2 - 2\mathbf{w}^\top\mathbf{x} + \|\mathbf{x}\|^2 + \epsilon} = \frac{ \|\mathbf{x}\|^2 \|\mathbf{w}\|^2 \cos^2 \theta}{((\mathbf{x}-\mathbf{w})\cdot(\mathbf{x}-\mathbf{w})) + \epsilon}.
24
+ $$
25
+
26
+ **Explanation:**
27
+ - $\mathbf{w}$ is the weight vector, $\mathbf{x}$ is the input vector.
28
+ - $\langle \mathbf{w}, \mathbf{x} \rangle$ is the dot product between $\mathbf{w}$ and $\mathbf{x}$.
29
+ - $\|\mathbf{w} - \mathbf{x}\|^2$ is the squared Euclidean distance between $\mathbf{w}$ and $\mathbf{x}$.
30
+ - $\epsilon$ is a small constant for numerical stability.
31
+ - $\theta$ is the angle between $\mathbf{w}$ and $\mathbf{x}$.
32
+
33
+ This operation:
34
+ - **Numerator:** Squares the similarity (dot product) between $\mathbf{w}$ and $\mathbf{x}$, emphasizing strong alignments.
35
+ - **Denominator:** Penalizes large distances, so the response is high only when $\mathbf{w}$ and $\mathbf{x}$ are both similar in direction and close in space.
36
+ - **No activation needed:** The non-linearity is built into the operation itself, allowing the layer to learn complex, non-linear relationships without a separate activation function.
37
+ - **Geometric view:** The output is maximized when $\mathbf{w}$ and $\mathbf{x}$ are both large in norm, closely aligned (small $\theta$), and close together in Euclidean space.
38
+
39
+ Yat-Conv:
40
+ $$
41
+ ⵟ^*(\mathbf{W}, \mathbf{X}) := \frac{\langle \mathbf{w}, \mathbf{x} \rangle^2}{\|\mathbf{w} - \mathbf{x}\|^2 + \epsilon}
42
+ = \frac{\left(\sum_{i,j} w_{ij} x_{ij}\right)^2}{\sum_{i,j} (w_{ij} - x_{ij})^2 + \epsilon}
43
+ $$
44
+
45
+ Where:
46
+ - $\mathbf{W}$ and $\mathbf{X}$ are local patches (e.g., kernel and input patch in convolution)
47
+ - $w_{ij}$ and $x_{ij}$ are elements of the kernel and input patch, respectively
48
+ - $\epsilon$ is a small constant for numerical stability
49
+
50
+ This generalizes the Yat-product to convolutional (patch-wise) operations.
51
+
52
+
19
53
  ## Supported Frameworks & Tasks
20
54
 
21
55
  ### Flax (JAX)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "nmn"
7
- version = "0.1.1"
7
+ version = "0.1.2"
8
8
  authors = [
9
9
  { name="Taha Bouhsine", email="yat@mlnomads.com" },
10
10
  ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes