PyPI - codon-model - Versions diffs - 0.0.3b2__tar.gz → 0.0.4__tar.gz - Mend

codon-model 0.0.3b2tar.gz → 0.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

{codon_model-0.0.3b2/codon_model.egg-info → codon_model-0.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codon-model
-Version: 0.0.3b2
+Version: 0.0.4
 Summary: Codon model package
 Author: CodonTeam
 Requires-Python: >=3.8

{codon_model-0.0.3b2 → codon_model-0.0.4}/codon/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from typing import Optional
-__version__ = '0.0.3b2'
+__version__ = '0.0.4'
 __seed__: Optional[int] = None

{codon_model-0.0.3b2 → codon_model-0.0.4}/codon/base.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from typing import Callable, Any, Iterator, Union
@@ -132,7 +133,7 @@ class BasicModel(nn.Module):
         return total
-    def load_pretrained(self, path: str) -> None:
+    def load_pretrained(self, path: str) -> 'BasicModel':
         '''
         Load a pretrained model from a file.
@@ -141,7 +142,7 @@ class BasicModel(nn.Module):
         '''
         if path.endswith('.safetensors'):
             safe_load_model(self, path)
-            return
+            return self
         state_dict = torch.load(path, map_location=self.device)
@@ -152,8 +153,10 @@ class BasicModel(nn.Module):
                 state_dict = state_dict['state_dict']
         self.load_state_dict(state_dict)
+        return self
-    def save_pretrained(self, path: str) -> None:
+    def save_pretrained(self, path: str) -> 'BasicModel':
         '''
         Save the model to a file.
@@ -165,3 +168,26 @@ class BasicModel(nn.Module):
         else:
             state_dict = self.state_dict()
             torch.save(state_dict, path)
+        return self
+    def freeze(self) -> 'BasicModel':
+        '''
+        Freeze all parameters in the model by setting requires_grad to False.
+        Returns:
+            BasicModel: The model itself for method chaining.
+        '''
+        for param in self.parameters():
+            param.requires_grad = False
+        return self
+    def unfreeze(self) -> 'BasicModel':
+        '''
+        Unfreeze all parameters in the model by setting requires_grad to True.
+        Returns:
+            BasicModel: The model itself for method chaining.
+        '''
+        for param in self.parameters():
+            param.requires_grad = True
+        return self

{codon_model-0.0.3b2 → codon_model-0.0.4}/codon/block/__init__.py RENAMED Viewed

@@ -31,6 +31,11 @@ from .transformer import (
     TransformerMoEDecoder,
     _TransformerDecoder,
 )
+from .manifold import (
+    MainfoldLoss,
+    BasicManifoldLinear, RiemannianManifoldLinear,
+    BasicManifoldConv2d, RiemannianManifoldConv2d
+)
 __all__ = [
     # attention
@@ -80,4 +85,10 @@ __all__ = [
     'TransformerDecoderOutput',
     'TransformerDenseDecoder',
     'TransformerMoEDecoder',
+    # manifold
+    'MainfoldLoss',
+    'BasicManifoldLinear',
+    'RiemannianManifoldLinear',
+    'BasicManifoldConv2d',
+    'RiemannianManifoldConv2d'
 ]

codon_model-0.0.4/codon/block/bio/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .hebian import HebianOutput, Hebian
+from .predictive import PredictiveCodingOutput, PredictiveCoding
+__all__ = [
+    'HebianOutput',
+    'Hebian',
+    'PredictiveCodingOutput',
+    'PredictiveCoding'
+]

{codon_model-0.0.3b2 → codon_model-0.0.4}/codon/block/lora.py RENAMED Viewed

@@ -50,6 +50,8 @@ class BasicLoRA(BasicModel):
         super().__init__()
         self.gradient_checkpointing = gradient_checkpointing
         self.original_layer = original_layer
+        self.lora_dropout_p = lora_dropout
+        self.merge_weights = merge_weights
         # Freeze original layer
         for p in self.original_layer.parameters():

codon_model-0.0.3b2/codon/exp/block/manifold_conv.py → codon_model-0.0.4/codon/block/manifold.py RENAMED Viewed

@@ -1,11 +1,231 @@
+from codon.base import *
 import torch.nn.functional as F
-from codon.base import *
+from typing import Tuple
+from dataclasses import dataclass
+from codon.ops.manifold import riemannian_manifold_linear, riemannian_manifold_conv2d
+@dataclass
+class MainfoldLoss:
+    '''
+    Dataclass for storing manifold-related loss components.
+    Attributes:
+        cosine (torch.Tensor): The cosine similarity loss.
+        laplacian (torch.Tensor): The Laplacian regularization loss.
+    '''
+    cosine: torch.Tensor
+    laplacian: torch.Tensor
+    def factor_loss(self, factor_cos: float = 0.013, factor_lap: float = 0.012) -> torch.Tensor:
+        '''
+        Calculates the weighted sum of cosine and Laplacian losses.
+        Args:
+            factor_cos (float): The weight factor for the cosine loss.
+            factor_lap (float): The weight factor for the Laplacian loss.
+        Returns:
+            torch.Tensor: The calculated total loss value.
+        '''
+        return self.cosine * factor_cos + self.laplacian * factor_lap
+class BasicManifoldLinear(BasicModel):
+    '''
+    Base class for manifold-based neural network layers.
+    Attributes:
+        in_features (int): Size of each input sample.
+        out_features (int): Size of each output sample.
+        k_neighbors (int): Number of nearest neighbors to consider for Laplacian loss.
+        weight (nn.Parameter): The learnable weights of the layer.
+    '''
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        k_neighbors: int = 2,
+    ) -> None:
+        '''
+        Initializes the BasicManifoldLinear layer.
-import math
-from typing import Tuple, Union
+        Args:
+            in_features (int): Size of each input sample.
+            out_features (int): Size of each output sample.
+            k_neighbors (int): Number of nearest neighbors for the Laplacian graph.
+        '''
+        super().__init__()
-from .manifold import MainfoldLoss
+        self.in_features = in_features
+        self.out_features = out_features
+        self.k_neighbors = min(k_neighbors, out_features - 1)
+        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
+    @property
+    def loss_cosine(self) -> torch.Tensor:
+        '''
+        Calculates the cosine similarity penalty loss among the weight vectors.
+        Returns:
+            torch.Tensor: The computed cosine penalty loss.
+        '''
+        w_norm = F.normalize(self.weight, p=2, dim=1)
+        C = torch.matmul(w_norm, w_norm.T)
+        I = torch.eye(self.out_features, device=C.device)
+        return torch.sum((C * (1 - I)) ** 2) / (self.out_features * (self.out_features - 1))
+    @property
+    def loss_laplacian(self) -> torch.Tensor:
+        '''
+        Calculates the Laplacian regularization loss based on k-nearest neighbors.
+        Returns:
+            torch.Tensor: The computed Laplacian regularization loss.
+        '''
+        w_norm = F.normalize(self.weight, p=2, dim=1)
+        C = torch.matmul(w_norm, w_norm.T)
+        I = torch.eye(self.out_features, device=C.device)
+        _, topk_idx = torch.topk(C, self.k_neighbors + 1, dim=1)
+        A = torch.zeros_like(C)
+        A.scatter_(1, topk_idx, 1.0)
+        A = A - I
+        A = torch.max(A, A.T)
+        return torch.sum(A * (1.0 - C)) / torch.sum(A)
+    def compute_loss(self) -> MainfoldLoss:
+        '''
+        Computes both the cosine and Laplacian losses and returns them in a MainfoldLoss object.
+        Returns:
+            MainfoldLoss: An object containing the computed cosine and Laplacian losses.
+        '''
+        w_norm = F.normalize(self.weight, p=2, dim=1)
+        C = torch.matmul(w_norm, w_norm.T)
+        I = torch.eye(self.out_features, device=C.device)
+        loss_cos = torch.sum((C * (1 - I)) ** 2) / (self.out_features * (self.out_features - 1))
+        _, topk_idx = torch.topk(C, self.k_neighbors + 1, dim=1)
+        A = torch.zeros_like(C)
+        A.scatter_(1, topk_idx, 1.0)
+        A = A - I
+        A = torch.max(A, A.T)
+        loss_lap = torch.sum(A * (1.0 - C)) / torch.sum(A)
+        return MainfoldLoss(cosine=loss_cos, laplacian=loss_lap)
+    def extra_repr(self) -> str:
+        '''
+        Sets the extra representation of the module for printing.
+        '''
+        return f'in_features={self.in_features}, out_features={self.out_features}, k_neighbors={self.k_neighbors}'
+class RiemannianManifoldLinear(BasicManifoldLinear):
+    '''
+    A linear layer projecting data onto a Riemannian manifold (hypersphere).
+    Attributes:
+        kappa (nn.Parameter): Concentration parameter for the von Mises-Fisher (vMF) distribution.
+        lambda_rate (nn.Parameter): Gravitational attraction coefficient.
+        scale (nn.Parameter): Vector amplifier for the hyperspherical network.
+        bias (nn.Parameter): Manifold bias vector.
+    '''
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        kappa_init: float = 2.0,
+        lambda_init: float = 0.1,
+        scale_init: float = 15.0,
+        k_neighbors: int = 2,
+        rule: str = 'near'
+    ) -> None:
+        '''
+        Initializes the RiemannianManifoldLinear layer.
+        Args:
+            in_features (int): Size of each input sample.
+            out_features (int): Size of each output sample.
+            kappa_init (float): Initial value for the vMF concentration parameter.
+            lambda_init (float): Initial value for the gravitational attraction coefficient.
+            scale_init (float): Initial value for the vector amplifier scale.
+            k_neighbors (int): Number of nearest neighbors for the Laplacian graph.
+            rule (str): Attraction rule, either 'near' or 'far'.
+        '''
+        super().__init__(
+            in_features=in_features,
+            out_features=out_features,
+            k_neighbors=k_neighbors
+        )
+        self.kappa_init = kappa_init
+        self.lambda_init = lambda_init
+        self.scale_init = scale_init
+        self.rule = rule.lower()
+        if not self.rule in ['far', 'near']:
+            raise ValueError(f"Invalid rule: {self.rule}, must be 'far' or 'near'")
+        # Concentration parameter for the vMF distribution
+        self.kappa = nn.Parameter(torch.tensor(float(kappa_init)))
+        # Gravitational attraction coefficient
+        self.lambda_rate = nn.Parameter(torch.tensor(float(lambda_init)))
+        # Vector amplifier for the hyperspherical network
+        self.scale = nn.Parameter(torch.ones(out_features) * scale_init)
+        # Manifold bias vector
+        self.bias = nn.Parameter(torch.zeros(out_features))
+        self.reset_parameters()
+    def reset_parameters(self) -> None:
+        '''
+        Resets the parameters of the layer.
+        '''
+        nn.init.normal_(self.weight, 0, 0.01)
+    def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
+        '''
+        Defines the computation performed at every call.
+        Args:
+            input_tensor (torch.Tensor): The input data with shape (batch_size, in_features).
+        Returns:
+            torch.Tensor: The output data with shape (batch_size, out_features).
+        '''
+        return riemannian_manifold_linear(
+            input_tensor=input_tensor,
+            weight=self.weight,
+            kappa=self.kappa,
+            lambda_rate=self.lambda_rate,
+            scale=self.scale,
+            bias=self.bias,
+            rule=self.rule
+        )
+    def extra_repr(self) -> str:
+        main_str = super().extra_repr()
+        return f'{main_str}, rule={self.rule}, kappa={self.kappa.item():.4f}, lambda={self.lambda_rate.item():.4f}'
 class BasicManifoldConv2d(BasicModel):
@@ -126,6 +346,16 @@ class BasicManifoldConv2d(BasicModel):
         loss_lap = torch.sum(A * (1.0 - C)) / torch.sum(A)
         return MainfoldLoss(cosine=loss_cos, laplacian=loss_lap)
+    def extra_repr(self) -> str:
+        s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
+             ', stride={stride}')
+        if self.padding != 0:
+            s += ', padding={padding}'
+        if self.dilation != 1:
+            s += ', dilation={dilation}'
+        s += f', rule={self.rule}, use_norm={self.use_norm}'
+        return s.format(**self.__dict__)
 class RiemannianManifoldConv2d(BasicManifoldConv2d):
@@ -138,6 +368,7 @@ class RiemannianManifoldConv2d(BasicManifoldConv2d):
         scale (nn.Parameter): Vector amplifier for the hyperspherical network.
         bias (nn.Parameter): Manifold bias vector.
         weight_ones (torch.Tensor): Fixed all-ones kernel for computing patch norm rapidly.
+        use_norm (bool): Whether to scale the output by the input patch norm.
     '''
     def __init__(
@@ -152,7 +383,9 @@ class RiemannianManifoldConv2d(BasicManifoldConv2d):
         lambda_init: float = 0.1,
         scale_init: float = 15.0,
         k_neighbors: int = 2,
-        rule: str = 'near'
+        rule: str = 'near',
+        use_norm_gate: bool = False,
+        use_norm: bool = False
     ) -> None:
         '''
         Initializes the RiemannianManifoldConv2d layer.
@@ -169,6 +402,7 @@ class RiemannianManifoldConv2d(BasicManifoldConv2d):
             scale_init (float): Initial value for the vector amplifier scale.
             k_neighbors (int): Number of nearest neighbors for the Laplacian graph.
             rule (str): Attraction rule, either 'near' or 'far'.
+            use_norm (bool): Whether to scale the output by the input patch norm. Default: True.
         '''
         super().__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, k_neighbors)
@@ -177,6 +411,8 @@ class RiemannianManifoldConv2d(BasicManifoldConv2d):
         self.lambda_rate = nn.Parameter(torch.tensor(float(lambda_init)))
         self.scale = nn.Parameter(torch.ones(out_channels) * scale_init)
         self.bias = nn.Parameter(torch.zeros(out_channels))
+        self.use_norm_gate = use_norm_gate
+        self.use_norm = use_norm
         # All-ones kernel for ultra-fast calculation of patch norm
         weight_ones = torch.ones(1, in_channels, *self.kernel_size)
@@ -199,132 +435,17 @@ class RiemannianManifoldConv2d(BasicManifoldConv2d):
         Returns:
             torch.Tensor: The output manifold projection tensor.
         '''
-        # 1. Weight normalization
-        w_flat = self.weight.view(self.out_channels, -1)
-        w_norm_flat = F.normalize(w_flat, p=2, dim=1)
-        w_norm = w_norm_flat.view_as(self.weight)
-        # 2. Ultra-fast calculation of the norm for each sliding patch of the input image
-        # x_sq: [batch, 1, H_out, W_out]
-        x_sq = F.conv2d(input_tensor ** 2, self.weight_ones, stride=self.stride, padding=self.padding, dilation=self.dilation)
-        x_norm_val = torch.sqrt(torch.clamp(x_sq, min=1e-6))
-        # 3. Calculate Cosine Feature Map
-        # cosine: [batch, out_channels, H_out, W_out]
-        conv_proj = F.conv2d(input_tensor, w_norm, stride=self.stride, padding=self.padding, dilation=self.dilation)
-        cosine = conv_proj / (x_norm_val + 1e-6)
-        cosine = torch.clamp(cosine, -1.0 + 1e-6, 1.0 - 1e-6)
-        # 4. vMF gravitational field calculation (applied pixel-wise)
-        theta = torch.acos(cosine)
-        exp_val = torch.exp(self.kappa * (cosine - 1.0))
-        attraction = exp_val if self.rule == 'near' else 1.0 - exp_val
-        # 5. Riemannian geodesic pullback
-        safe_lambda = torch.clamp(self.lambda_rate, 1e-6, 1.0 - 1e-4)
-        effective_theta = theta * (1.0 - safe_lambda * attraction)
-        # 6. Reconstruct the output (note the shape broadcasting)
-        scale_view = self.scale.view(1, -1, 1, 1)
-        bias_view = self.bias.view(1, -1, 1, 1)
-        output = scale_view * torch.cos(effective_theta) + bias_view
-        return output
-class EuclideanManifoldConv2d(BasicManifoldConv2d):
-    '''
-    A 2D convolutional layer simulating a manifold structure in Euclidean space.
-    Attributes:
-        tau (nn.Parameter): Temperature or radius parameter for the basin of attraction.
-        lambda_rate (nn.Parameter): Gravitational strength parameter.
-        bias (nn.Parameter): Translation bias vector.
-        weight_ones (torch.Tensor): Fixed all-ones kernel for computing patch norm rapidly.
-    '''
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int,
-        stride: int = 1,
-        padding: int = 0,
-        dilation: int = 1,
-        tau_init: float = 5.0,
-        lambda_init: float = 0.5,
-        k_neighbors: int = 2,
-        rule: str = 'near'
-    ) -> None:
-        '''
-        Initializes the EuclideanManifoldConv2d layer.
-        Args:
-            in_channels (int): Number of channels in the input image.
-            out_channels (int): Number of channels produced by the convolution.
-            kernel_size (int): Size of the convolving kernel.
-            stride (int): Stride of the convolution. Default: 1.
-            padding (int): Zero-padding added to both sides of the input. Default: 0.
-            dilation (int): Spacing between kernel elements. Default: 1.
-            tau_init (float): Initial value for the basin temperature/radius.
-            lambda_init (float): Initial value for the gravitational strength.
-            k_neighbors (int): Number of nearest neighbors for the Laplacian graph.
-            rule (str): Attraction rule, either 'near' or 'far'.
-        '''
-        super().__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, k_neighbors)
-        self.rule = rule.lower()
-        self.tau = nn.Parameter(torch.tensor(float(tau_init)))
-        self.lambda_rate = nn.Parameter(torch.tensor(float(lambda_init)))
-        self.bias = nn.Parameter(torch.Tensor(out_channels))
-        weight_ones = torch.ones(1, in_channels, *self.kernel_size)
-        self.register_buffer('weight_ones', weight_ones)
-        self.reset_parameters()
-    def reset_parameters(self) -> None:
-        '''
-        Resets the parameters of the layer using Kaiming uniform initialization.
-        '''
-        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
-        if self.bias is not None:
-            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
-            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
-            nn.init.uniform_(self.bias, -bound, bound)
-    def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
-        '''
-        Defines the computation performed at every call.
-        Args:
-            input_tensor (torch.Tensor): The input data tensor.
-        Returns:
-            torch.Tensor: The output manifold projection tensor.
-        '''
-        # 1. Base physical projection
-        # base_proj: [batch, out_channels, H_out, W_out]
-        base_proj = F.conv2d(input_tensor, self.weight, stride=self.stride, padding=self.padding, dilation=self.dilation)
-        # 2. Ultra-fast algebraic expansion of the squared L2 distance for local patches
-        # ||patch - W||^2 = ||patch||^2 + ||W||^2 - 2<patch, W>
-        x_sq = F.conv2d(input_tensor ** 2, self.weight_ones, stride=self.stride, padding=self.padding, dilation=self.dilation)
-        w_sq = torch.sum(self.weight ** 2, dim=(1,2,3)).view(1, -1, 1, 1)
-        dist_sq = x_sq + w_sq - 2 * base_proj
-        dist_sq = torch.clamp(dist_sq, min=1e-6)
-        # 3. Compute the attraction index
-        exp_val = torch.exp(-dist_sq / (self.tau ** 2 + 1e-8))
-        attraction = exp_val if self.rule == 'near' else 1.0 - exp_val
-        # 4. Gravitational correction
-        safe_lambda = torch.clamp(self.lambda_rate, 1e-6, 1.0 - 1e-4)
-        correction = safe_lambda * attraction * (w_sq - base_proj)
-        # 5. Combine outputs
-        output = base_proj + correction + self.bias.view(1, -1, 1, 1)
-        return output
+        return riemannian_manifold_conv2d(
+            input_tensor=input_tensor,
+            weight=self.weight,
+            weight_ones=self.weight_ones,
+            kappa=self.kappa,
+            lambda_rate=self.lambda_rate,
+            scale=self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+            dilation=self.dilation,
+            rule=self.rule,
+            use_norm=self.use_norm
+        )

codon-model 0.0.3b2__tar.gz → 0.0.4__tar.gz

codon-model 0.0.3b2tar.gz → 0.0.4tar.gz