PyPI - orbit-torch - Versions diffs - 0.0.4a1__py3-none-any.whl → 0.1.0b1__py3-none-any.whl - Mend

orbit-torch 0.0.4a1py3-none-any.whl → 0.1.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

orbit/__init__.py +3 -1
orbit/callback.py +4 -3
orbit/dataset/__init__.py +1 -0
orbit/dataset/cogn.py +138 -0
orbit/dataset/data/cogn_en.jsonl +45 -0
orbit/dataset/data/cogn_zh.jsonl +113 -0
orbit/engine.py +210 -146
orbit/kit/__init__.py +2 -0
orbit/kit/interface.py +154 -0
orbit/kit/wrapper.py +157 -0
orbit/model/__init__.py +5 -0
orbit/model/base.py +125 -0
orbit/model/block/__init__.py +34 -0
orbit/model/block/attention.py +265 -0
orbit/model/block/bio.py +537 -0
orbit/model/block/codebook.py +122 -0
orbit/model/block/conv.py +505 -0
orbit/model/block/embedding.py +252 -0
orbit/model/block/film.py +176 -0
orbit/model/block/fusion.py +335 -0
orbit/model/block/gate.py +334 -0
orbit/model/block/lora.py +776 -0
orbit/model/block/mlp.py +68 -0
orbit/model/block/moe.py +94 -0
orbit/model/block/tcn.py +99 -0
orbit/model/config.py +62 -0
orbit/model/kit/__init__.py +6 -0
orbit/model/kit/discriminator.py +46 -0
orbit/model/kit/losses.py +193 -0
orbit/model/motif/__init__.py +0 -0
orbit/model/motif/vision/__init__.py +0 -0
orbit/model/motif/vision/v1.py +645 -0
orbit/model/registry.py +53 -0
orbit/optim/__init__.py +2 -2
orbit/optim/sam.py +10 -3
orbit/plugin/__init__.py +12 -8
orbit/plugin/board.py +1 -2
orbit/plugin/checkpoint.py +137 -62
orbit/plugin/classification.py +2 -2
orbit/plugin/display_model.py +1 -2
orbit/plugin/early_stopping.py +1 -2
orbit/plugin/ema.py +1 -2
orbit/plugin/gradient_accumulation.py +1 -2
orbit/plugin/lora.py +346 -0
orbit/plugin/memory_estimator.py +1 -2
orbit/plugin/warmup.py +1 -2
orbit/utils/__init__.py +24 -1
orbit/utils/cuda.py +10 -0
orbit/utils/freeze.py +61 -17
orbit/utils/image.py +164 -0
orbit/utils/initialization.py +184 -94
orbit/utils/layer_io.py +66 -7
orbit/utils/lora.py +480 -0
orbit/utils/moe.py +55 -0
orbit/utils/seed.py +3 -19
orbit/utils/sft.py +93 -0
orbit_torch-0.1.0b1.dist-info/METADATA +208 -0
orbit_torch-0.1.0b1.dist-info/RECORD +65 -0
orbit_torch-0.0.4a1.dist-info/METADATA +0 -25
orbit_torch-0.0.4a1.dist-info/RECORD +0 -29
{orbit_torch-0.0.4a1.dist-info → orbit_torch-0.1.0b1.dist-info}/WHEEL +0 -0
{orbit_torch-0.0.4a1.dist-info → orbit_torch-0.1.0b1.dist-info}/top_level.txt +0 -0

orbit/model/block/mlp.py ADDED Viewed

@@ -0,0 +1,68 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from orbit.model import BaseBlock, register_model
+@register_model()
+class MLP(BaseBlock):
+    ''' 多层感知机 (MLP) 模块。
+    支持标准 MLP 和门控 MLP (Gated MLP) 结构。
+    Args:
+        in_features (int): 输入特征维度。
+        hidden_features (int): 隐藏层特征维度。
+        out_features (int, optional): 输出特征维度。如果为 None，则等于 in_features。默认为 None。
+        gate (bool, optional): 是否使用门控机制。默认为 False。
+        dropout (float, optional): Dropout 概率。默认为 0.0。
+    '''
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: int,
+        out_features: int = None,
+        bias: bool = True,
+        use_gate: bool = False,
+        dropout: float = 0.0
+    ):
+        super(MLP, self).__init__()
+        out_features = out_features or in_features
+        self.in_features = in_features
+        self.hidden_features = hidden_features
+        self.out_features = out_features
+        self.bias = bias
+        self.use_gate = use_gate
+        self.dropout = nn.Dropout(dropout)
+        self.act = nn.SiLU()
+        if use_gate:
+            self.gate_proj = nn.Linear(in_features, hidden_features, bias=bias)
+            self.up_proj = nn.Linear(in_features, hidden_features, bias=bias)
+            self.down_proj = nn.Linear(hidden_features, out_features, bias=bias)
+        else:
+            self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
+            self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        ''' 前向传播。
+        Args:
+            x (torch.Tensor): 输入张量。
+        Returns:
+            torch.Tensor: 输出张量。
+        '''
+        if self.use_gate:
+            return self.down_proj(self.act(self.gate_proj(x)) * self.up_proj(x))
+        else:
+            x = self.fc1(x)
+            x = self.act(x)
+            x = self.dropout(x)
+            x = self.fc2(x)
+            x = self.dropout(x)
+            return x

orbit/model/block/moe.py ADDED Viewed

@@ -0,0 +1,94 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from orbit.model import BaseBlock, register_model
+from orbit.model.block.mlp import MLP
+from orbit.model.block.gate import TopKGate
+@register_model()
+class MoE(BaseBlock):
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        num_experts: int = 4,
+        top_k: int = 2,
+        hidden_features: int = None,
+        dropout: float = 0.1,
+        use_gate: bool = False,
+        use_mlp_router: bool = False
+    ):
+        super(MoE, self).__init__()
+        hidden_features = hidden_features or in_features
+        self.in_features = in_features
+        self.hidden_features = hidden_features
+        self.out_features = out_features
+        self.dropout = dropout
+        self.num_experts = num_experts
+        self.top_k = top_k
+        self.use_gate = use_gate
+        self.use_mlp_router = use_mlp_router
+        self.router = TopKGate(
+            in_features=in_features,
+            out_features=num_experts,
+            k=top_k,
+            use_mlp=use_mlp_router,
+            hidden_features=hidden_features,
+            post_softmax=True
+        )
+        self.experts = nn.ModuleList([
+            MLP(
+                in_features=in_features,
+                hidden_features=hidden_features,
+                out_features=out_features,
+                dropout=dropout,
+                use_gate=use_gate
+            )
+            for _ in range(num_experts)
+        ])
+    def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        前向传播。
+        Args:
+            x (torch.Tensor): 输入张量。Shape: [batch_size, seq_len, in_dim]
+        Returns:
+            tuple[torch.Tensor, torch.Tensor]:
+                - 输出张量。Shape: [batch_size, seq_len, out_features]
+                - 辅助损失 (Auxiliary Loss)。标量。
+        """
+        batch_size, seq_len, dim = x.shape
+        x_flat = x.view(-1, dim)
+        gate_output = self.router(x_flat)
+        routing_probs = F.softmax(gate_output.logits, dim=-1)
+        selection_mask = torch.zeros_like(routing_probs).scatter_(1, gate_output.indices, 1.0)
+        fraction = selection_mask.mean(dim=0)
+        mean_probs = routing_probs.mean(dim=0)
+        aux_loss = self.num_experts * (fraction * mean_probs).sum()
+        final_output = torch.zeros(batch_size * seq_len, self.out_features, device=x.device, dtype=x.dtype)
+        for i, expert in enumerate(self.experts):
+            mask = (gate_output.indices == i)
+            batch_idx, k_idx = torch.where(mask)
+            if batch_idx.numel() == 0: continue
+            inp = x_flat[batch_idx]
+            expert_out = expert(inp)
+            w = gate_output.values[batch_idx, k_idx].unsqueeze(-1)
+            final_output.index_add_(0, batch_idx, expert_out * w)
+        return final_output.view(batch_size, seq_len, self.out_features), aux_loss

orbit/model/block/tcn.py ADDED Viewed

@@ -0,0 +1,99 @@
+import torch
+import torch.nn as nn
+from typing import List, Optional
+from orbit.model import BaseBlock, register_model
+from orbit.model.block.conv import CausalConv1d
+@register_model()
+class TCN(BaseBlock):
+    '''
+    时间卷积网络 (Temporal Convolutional Network, TCN)。
+    由一系列因果空洞卷积层 (Causal Dilated Convolutions) 组成。
+    支持手动指定每层通道数或根据目标感受野自动构建。
+    '''
+    def __init__(
+        self,
+        in_channels: int,
+        num_channels: Optional[List[int]] = None,
+        out_channels: Optional[int] = None,
+        step: Optional[int] = None,
+        kernel_size: int = 3,
+        dropout: float = 0.2,
+        use_res: bool = True,
+        norm: str = None,
+        activation: str = 'leaky_relu',
+        leaky_relu: float = 0.1
+    ):
+        '''
+        初始化 TCN 模块。
+        Args:
+            in_channels (int): 输入通道数。
+            num_channels (List[int], optional): 每一层的输出通道数列表。如果提供此参数，将忽略 out_channels 和 step。
+            out_channels (int, optional): 自动构建模式下的统一输出通道数。
+            step (int, optional): 自动构建模式下的目标感受野 (时间步长)。
+            kernel_size (int, optional): 卷积核大小。默认为 3。
+            dropout (float, optional): Dropout 概率。默认为 0.2。
+            use_res (bool, optional): 是否使用残差连接。默认为 True。
+            norm (str, optional): 归一化类型 (传递给 CausalConv1d/ConvBlock)。默认为 None。
+            activation (str, optional): 激活函数类型。默认为 'leaky_relu'。
+            leaky_relu (float, optional): LeakyReLU 的负斜率。默认为 0.1。
+        '''
+        super(TCN, self).__init__()
+        self.in_channels = in_channels
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        if num_channels is not None:
+            layers = []
+            num_levels = len(num_channels)
+            for i in range(num_levels):
+                dilation_size = 2 ** i
+                in_ch = in_channels if i == 0 else num_channels[i-1]
+                out_ch = num_channels[i]
+                layers.append(CausalConv1d(
+                    in_channels=in_ch,
+                    out_channels=out_ch,
+                    kernel_size=kernel_size,
+                    dilation=dilation_size,
+                    norm=norm,
+                    activation=activation,
+                    leaky_relu=leaky_relu,
+                    use_res=use_res,
+                    dropout=dropout
+                ))
+            self.network = nn.Sequential(*layers)
+            self.out_channels = num_channels[-1]
+        elif step is not None and out_channels is not None:
+            self.network = CausalConv1d.auto_block(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                step=step,
+                kernel_size=kernel_size,
+                norm=norm,
+                activation=activation,
+                leaky_relu=leaky_relu,
+                use_res=use_res,
+                dropout=dropout
+            )
+            self.out_channels = out_channels
+        else:
+            raise ValueError("Must provide either 'num_channels' (list) or both 'step' and 'out_channels' (int).")
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        '''
+        前向传播。
+        Args:
+            x (torch.Tensor): 输入张量。Shape: [Batch, in_channels, Seq_Len]
+        Returns:
+            torch.Tensor: 输出张量。Shape: [Batch, out_channels, Seq_Len]
+        '''
+        return self.network(x)

orbit/model/config.py ADDED Viewed

@@ -0,0 +1,62 @@
+import json
+import os
+from typing import Any, Dict
+class ModelConfig:
+    '''基础配置类，用于管理模型超参数。
+    支持从 JSON 文件加载和保存，以及字典风格的属性访问。
+    '''
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+    @classmethod
+    def from_pretrained(cls, path: str) -> 'ModelConfig':
+        '''从 JSON 文件加载配置。
+        Args:
+            path (str): JSON 文件路径。
+        Returns:
+            ModelConfig: 加载的配置对象。
+        '''
+        if not os.path.exists(path):
+            raise FileNotFoundError(f"Config file not found: {path}")
+        with open(path, 'r', encoding='utf-8') as f:
+            config_dict = json.load(f)
+        return cls(**config_dict)
+    def save_pretrained(self, path: str):
+        '''将配置保存到 JSON 文件。
+        Args:
+            path (str): 保存路径。
+        '''
+        directory = os.path.dirname(path)
+        if directory and not os.path.exists(directory):
+            os.makedirs(directory)
+        with open(path, 'w', encoding='utf-8') as f:
+            json.dump(self.to_dict(), f, indent=4, ensure_ascii=False)
+    def to_dict(self) -> Dict[str, Any]:
+        '''转换为字典。'''
+        return {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
+    def __getitem__(self, key):
+        return getattr(self, key)
+    def __setitem__(self, key, value):
+        setattr(self, key, value)
+    def __contains__(self, key):
+        return hasattr(self, key)
+    def get(self, key, default=None):
+        return getattr(self, key, default)
+    def __repr__(self):
+        return f"{self.__class__.__name__}({self.to_dict()})"

orbit/model/kit/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .discriminator import (
+    NLayerDiscriminator
+)
+from .losses import (
+    VQGANLossOutput, VQGANDiscriminatorLog, VQGANGeneratorLog, VQGANLoss
+)

orbit/model/kit/discriminator.py ADDED Viewed

@@ -0,0 +1,46 @@
+import torch.nn as nn
+from orbit.model import BaseBlock, register_model
+@register_model()
+class NLayerDiscriminator(BaseBlock):
+    '''
+    PatchGAN 判别器。
+    输出不是一个标量，而是一个 N x N 的矩阵，每个点代表对应 Patch 是真还是假。
+    '''
+    def __init__(self, input_nc=3, ndf=64, n_layers=3):
+        super().__init__()
+        sequence = [
+            nn.Conv2d(input_nc, ndf, kernel_size=4, stride=2, padding=1),
+            nn.LeakyReLU(0.2, True)
+        ]
+        nf_mult = 1
+        nf_mult_prev = 1
+        for n in range(1, n_layers):
+            nf_mult_prev = nf_mult
+            nf_mult = min(2 ** n, 8)
+            sequence += [
+                nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=4, stride=2, padding=1, bias=False),
+                nn.BatchNorm2d(ndf * nf_mult),
+                nn.LeakyReLU(0.2, True)
+            ]
+        nf_mult_prev = nf_mult
+        nf_mult = min(2 ** n_layers, 8)
+        sequence += [
+            nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=4, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(ndf * nf_mult),
+            nn.LeakyReLU(0.2, True)
+        ]
+        sequence += [
+            nn.Conv2d(ndf * nf_mult, 1, kernel_size=4, stride=1, padding=1)
+        ]
+        self.main = nn.Sequential(*sequence)
+    def forward(self, input):
+        return self.main(input)

orbit/model/kit/losses.py ADDED Viewed

@@ -0,0 +1,193 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from lpips import LPIPS
+from dataclasses import dataclass
+from typing import Dict, Tuple, Optional, Union
+@dataclass
+class VQGANGeneratorLog:
+    ''' VQGAN 生成器阶段的日志数据类。
+    Attributes:
+        total_loss (torch.Tensor): 总损失。
+        quant_loss (torch.Tensor): 量化损失。
+        nll_loss (torch.Tensor): 负对数似然损失（重构损失）。
+        p_loss (torch.Tensor): 感知损失。
+        rec_loss (torch.Tensor): 总重构损失（像素 + 感知）。
+        d_weight (torch.Tensor): 对抗损失的自适应权重。
+        g_loss (torch.Tensor): 生成器对抗损失。
+    '''
+    total_loss: torch.Tensor
+    quant_loss: torch.Tensor
+    nll_loss: torch.Tensor
+    p_loss: torch.Tensor
+    rec_loss: torch.Tensor
+    d_weight: torch.Tensor
+    g_loss: torch.Tensor
+@dataclass
+class VQGANDiscriminatorLog:
+    ''' VQGAN 判别器阶段的日志数据类。
+    Attributes:
+        disc_loss (torch.Tensor): 判别器总损失。
+        logits_real (torch.Tensor): 真实样本的 logits 均值。
+        logits_fake (torch.Tensor): 生成样本的 logits 均值。
+    '''
+    disc_loss: torch.Tensor
+    logits_real: torch.Tensor
+    logits_fake: torch.Tensor
+@dataclass
+class VQGANLossOutput:
+    ''' VQGANLoss 的输出数据类。
+    Attributes:
+        loss (torch.Tensor): 总损失标量。
+        log (Union[VQGANGeneratorLog, VQGANDiscriminatorLog]): 损失日志对象。
+    '''
+    loss: torch.Tensor
+    log: Union[VQGANGeneratorLog, VQGANDiscriminatorLog]
+class VQGANLoss(nn.Module):
+    ''' VQGAN 模型的损失函数模块。
+    结合了感知损失 (LPIPS)、重构损失 (L1/L2)、对抗损失 (GAN Loss) 和代码本损失。
+    '''
+    def __init__(
+            self,
+            disc_start: int = 10000,
+            kl_weight: float = 1.0,
+            pixelloss_weight: float = 1.0,
+            perceptual_weight: float = 1.0,
+            disc_weight: float = 0.8,
+            disc_factor: float = 1.0
+        ):
+        ''' 初始化 VQGANLoss。
+        Args:
+            disc_start (int): 判别器开始训练的步数。默认为 10000。
+            logvar_init (float): 对数方差的初始值。默认为 0.0。
+            kl_weight (float): KL 散度损失的权重。默认为 1.0。
+            pixelloss_weight (float): 像素级重构损失的权重。默认为 1.0。
+            perceptual_weight (float): 感知损失的权重。默认为 1.0。
+            disc_weight (float): 判别器损失的权重。默认为 0.8。
+            disc_factor (float): 判别器损失的缩放因子。默认为 1.0。
+        '''
+        super().__init__()
+        self.kl_weight = kl_weight
+        self.pixel_weight = pixelloss_weight
+        self.perceptual_weight = perceptual_weight
+        self.disc_factor = disc_factor
+        self.disc_weight = disc_weight
+        self.disc_start = disc_start
+        self.perceptual_loss = LPIPS(net='vgg', verbose=False).eval()
+    def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer_weights):
+        ''' 计算自适应对抗损失权重 lambda。
+        Args:
+            nll_loss (torch.Tensor): 负对数似然损失（重构损失）。
+            g_loss (torch.Tensor): 生成器损失。
+            last_layer_weights (torch.Tensor): 解码器最后一层的权重。
+        Returns:
+            torch.Tensor: 自适应权重。
+        '''
+        nll_grads = torch.autograd.grad(nll_loss, last_layer_weights, retain_graph=True)[0]
+        g_grads = torch.autograd.grad(g_loss, last_layer_weights, retain_graph=True)[0]
+        d_weight = torch.norm(nll_grads) / (torch.norm(g_grads) + 1e-4)
+        d_weight = torch.clamp(d_weight, 0.0, 1e4).detach()
+        return d_weight * self.disc_weight
+    def forward(
+            self,
+            inputs: torch.Tensor,
+            reconstructions: torch.Tensor,
+            quantizer_loss: torch.Tensor,
+            global_step: int,
+            last_layer_weights: torch.Tensor,
+            discriminator: nn.Module,
+            optimizer_idx: int,
+            mask: torch.Tensor = None
+        ) -> VQGANLossOutput:
+        ''' 前向计算损失。
+        Args:
+            inputs (torch.Tensor): 原始输入图像。
+            reconstructions (torch.Tensor): 重建图像。
+            quantizer_loss (torch.Tensor): 量化器损失。
+            global_step (int): 当前全局步数。
+            last_layer_weights (torch.Tensor): 解码器最后一层的权重。
+            discriminator (nn.Module): 判别器模型。
+            optimizer_idx (int): 优化器索引（0 为生成器，1 为判别器）。
+            mask (torch.Tensor, optional): 有效区域掩码。默认为 None。
+        Returns:
+            VQGANLossOutput: 包含 loss 和 log 的对象。
+        '''
+        rec_loss_tensor = torch.abs(inputs - reconstructions)
+        if mask is not None:
+            if mask.shape[-2:] != rec_loss_tensor.shape[-2:]:
+                mask = F.interpolate(mask, size=rec_loss_tensor.shape[-2:], mode='nearest')
+            mask_expanded = mask.expand_as(rec_loss_tensor)
+            nll_loss = (rec_loss_tensor * mask_expanded).sum() / (mask_expanded.sum() + 1e-6)
+        else:
+            nll_loss = torch.mean(rec_loss_tensor)
+        p_loss_scalar = torch.tensor(0.0, device=inputs.device)
+        if self.perceptual_weight > 0:
+            p_loss = self.perceptual_loss(inputs, reconstructions)
+            p_loss_scalar = p_loss.mean()
+        rec_loss_total = nll_loss * self.pixel_weight + self.perceptual_weight * p_loss_scalar
+        if optimizer_idx == 0:
+            logits_fake = discriminator(reconstructions)
+            g_loss = -torch.mean(logits_fake)
+            try: d_weight = self.calculate_adaptive_weight(rec_loss_total, g_loss, last_layer_weights)
+            except RuntimeError:
+                assert not self.training
+                d_weight = torch.tensor(0.0)
+            disc_factor = 1 if global_step >= self.disc_start else 0
+            loss = rec_loss_total + \
+                   self.kl_weight * quantizer_loss + \
+                   d_weight * disc_factor * g_loss
+            log = VQGANGeneratorLog(
+                total_loss=loss.detach(),
+                quant_loss=quantizer_loss.detach(),
+                nll_loss=nll_loss.detach(),
+                p_loss=p_loss_scalar.detach(),
+                rec_loss=rec_loss_total.detach(),
+                d_weight=d_weight.detach(),
+                g_loss=g_loss.detach()
+            )
+            return VQGANLossOutput(loss=loss, log=log)
+        if optimizer_idx == 1:
+            logits_real = discriminator(inputs.detach())
+            logits_fake = discriminator(reconstructions.detach())
+            disc_factor = 1 if global_step >= self.disc_start else 0
+            # Hinge Loss
+            loss_real = torch.mean(F.relu(1. - logits_real))
+            loss_fake = torch.mean(F.relu(1. + logits_fake))
+            d_loss = disc_factor * 0.5 * (loss_real + loss_fake)
+            log = VQGANDiscriminatorLog(
+                disc_loss=d_loss.detach(),
+                logits_real=logits_real.mean().detach(),
+                logits_fake=logits_fake.mean().detach()
+            )
+            return VQGANLossOutput(loss=d_loss, log=log)

orbit/model/motif/__init__.py ADDED Viewed

File without changes

orbit/model/motif/vision/__init__.py ADDED Viewed

File without changes

orbit-torch 0.0.4a1__py3-none-any.whl → 0.1.0b1__py3-none-any.whl

orbit-torch 0.0.4a1py3-none-any.whl → 0.1.0b1py3-none-any.whl