PyPI - deeplotx - Versions diffs - 0.6.1__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

deeplotx 0.6.1py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

deeplotx/__init__.py +4 -2
deeplotx/encoder/long_text_encoder.py +1 -0
deeplotx/nn/__init__.py +4 -1
deeplotx/nn/{self_attention.py → attention.py} +18 -9
deeplotx/nn/auto_regression.py +7 -8
deeplotx/nn/base_neural_network.py +61 -0
deeplotx/nn/feed_forward.py +11 -11
deeplotx/nn/long_context_auto_regression.py +10 -9
deeplotx/nn/long_context_recursive_sequential.py +22 -20
deeplotx/nn/multi_head_attention.py +34 -0
deeplotx/nn/recursive_sequential.py +13 -13
deeplotx/nn/roformer_encoder.py +40 -0
deeplotx/nn/rope.py +42 -0
deeplotx/trainer/text_binary_classification_trainer.py +21 -17
deeplotx/util/__init__.py +1 -1
{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/METADATA +2 -2
deeplotx-0.8.1.dist-info/RECORD +34 -0
deeplotx-0.6.1.dist-info/RECORD +0 -31
{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/WHEEL +0 -0
{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/licenses/LICENSE +0 -0
{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/top_level.txt +0 -0

deeplotx/__init__.py CHANGED Viewed

@@ -5,14 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
 from .encoder import Encoder, LongTextEncoder, LongformerEncoder
 from .nn import (
-    BaseNeuralNetwork,
     FeedForward,
     LinearRegression,
     LogisticRegression,
     SoftmaxRegression,
     RecursiveSequential,
     LongContextRecursiveSequential,
-    SelfAttention,
+    RoPE,
+    Attention,
+    MultiHeadAttention,
+    RoFormerEncoder,
     AutoRegression,
     LongContextAutoRegression
 )

deeplotx/encoder/long_text_encoder.py CHANGED Viewed

@@ -17,6 +17,7 @@ class LongTextEncoder(Encoder):
                  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
                  cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
         super().__init__(model_name_or_path=model_name_or_path, device=device)
+        assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
         self._max_length = max_length
         self._chunk_size = chunk_size
         self._overlapping = overlapping

deeplotx/nn/__init__.py CHANGED Viewed

@@ -5,6 +5,9 @@ from .logistic_regression import LogisticRegression
 from .softmax_regression import SoftmaxRegression
 from .recursive_sequential import RecursiveSequential
 from .long_context_recursive_sequential import LongContextRecursiveSequential
-from .self_attention import SelfAttention
+from .rope import RoPE
+from .attention import Attention
+from .multi_head_attention import MultiHeadAttention
+from .roformer_encoder import RoFormerEncoder
 from .auto_regression import AutoRegression
 from .long_context_auto_regression import LongContextAutoRegression

deeplotx/nn/{self_attention.py → attention.py} RENAMED Viewed

@@ -4,14 +4,17 @@ import torch
 from deeplotx.nn.base_neural_network import BaseNeuralNetwork
 from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.rope import RoPE, DEFAULT_THETA
-class SelfAttention(BaseNeuralNetwork):
-    def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
-                 proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
-                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+class Attention(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
+                 proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
         super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
                          device=device, dtype=dtype)
+        self._positional = positional
         self._feature_dim = feature_dim
         self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                   expansion_factor=proj_expansion_factor,
@@ -22,18 +25,24 @@ class SelfAttention(BaseNeuralNetwork):
         self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                   expansion_factor=proj_expansion_factor,
                                   bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        if self._positional:
+            self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
+                             device=self.device, dtype=self.dtype)
-    def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-        q, k = self.q_proj(x), self.k_proj(x)
+    def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        q, k = self.q_proj(x), self.k_proj(y)
+        if self._positional:
+            q, k = self.rope(q), self.rope(k)
         attn = torch.matmul(q, k.transpose(-2, -1))
         attn = attn / (self._feature_dim ** 0.5)
         attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
         return torch.softmax(attn, dim=-1)
     @override
-    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+    def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
         if mask is not None:
             mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
-        v = self.v_proj(x)
-        return torch.matmul(self._attention(x, mask), v)
+        v = self.v_proj(y)
+        return torch.matmul(self._attention(x, y, mask), v)

deeplotx/nn/auto_regression.py CHANGED Viewed

@@ -4,12 +4,11 @@ from deeplotx.nn import RecursiveSequential
 class AutoRegression(RecursiveSequential):
-    def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=feature_dim, output_dim=feature_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
+    def __init__(self, feature_dim: int, bias: bool = True,
+                 recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
                          ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
-                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
-                         model_name=model_name, device=device, dtype=dtype)
+                         dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)

deeplotx/nn/base_neural_network.py CHANGED Viewed

@@ -3,6 +3,7 @@ from abc import abstractmethod
 import torch
 from torch import nn
+from torch.nn import init
 DEFAULT_SUFFIX = 'dlx'
@@ -36,6 +37,44 @@ class BaseNeuralNetwork(nn.Module):
             x = x.to(dtype)
         return x
+    def initialize_weights(self):
+        for m in self.modules():
+            match m.__class__:
+                case nn.Linear:
+                    init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
+                    if m.bias is not None:
+                        init.constant_(m.bias, 0)
+                case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
+                    init.constant_(m.weight, 1)
+                    init.constant_(m.bias, 0)
+                case nn.LSTM | nn.GRU:
+                    for name, param in m.named_parameters():
+                        _tmp_name = name.lower()
+                        if 'weight_ih' in _tmp_name:
+                            init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
+                        elif 'weight_hh' in _tmp_name:
+                            init.orthogonal_(param)
+                        elif 'bias' in _tmp_name:
+                            init.constant_(param, 0)
+                case _:
+                    pass
+        return self
+    def size(self) -> dict:
+        total_params = trainable_params = non_trainable_params = 0
+        for param in self.parameters():
+            params = param.numel()
+            total_params += params
+            if param.requires_grad:
+                trainable_params += params
+            else:
+                non_trainable_params += params
+        return {
+            'total': total_params,
+            'trainable': trainable_params,
+            'non_trainable': non_trainable_params
+        }
     def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
         def _l1() -> torch.Tensor:
             l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
@@ -77,3 +116,25 @@ class BaseNeuralNetwork(nn.Module):
         model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
         self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
         return self
+    def __str__(self):
+        formatted = super().__str__()
+        _line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
+        _splitter_1 = '=' * (_line_len + 10)
+        _splitter_2 = '-' * (_line_len + 10)
+        _size = self.size()
+        total_param = _size['total']
+        trainable_param = _size['trainable']
+        non_trainable_param = _size['non_trainable']
+        formatted = (f'{_splitter_1}\n'
+                     f'Model_Name: {self._model_name}\n'
+                     f'In_Features: {self.in_features}\n'
+                     f'Out_Features: {self.out_features}\n'
+                     f'Device: {self.device}\n'
+                     f'Dtype: {self.dtype}\n'
+                     f'Total_Parameters: {total_param}\n'
+                     f'Trainable_Parameters: {trainable_param}\n'
+                     f'NonTrainable_Parameters: {non_trainable_param}\n'
+                     f'{_splitter_2}'
+                     f'\n{formatted}\n{_splitter_1}')
+        return formatted

deeplotx/nn/feed_forward.py CHANGED Viewed

@@ -12,13 +12,13 @@ class FeedForwardUnit(BaseNeuralNetwork):
                  device: str | None = None, dtype: torch.dtype | None = None):
         super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
         self._dropout_rate = dropout_rate
-        self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
-                             device=self.device, dtype=self.dtype)
-        self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
-                             device=self.device, dtype=self.dtype)
-        self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
-                                          device=self.device, dtype=self.dtype)
-        self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+        self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
+                                 bias=bias, device=self.device, dtype=self.dtype)
+        self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
+                                   bias=bias, device=self.device, dtype=self.dtype)
+        self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
+                                        device=self.device, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
                                        device=self.device, dtype=self.dtype)
     @override
@@ -26,11 +26,11 @@ class FeedForwardUnit(BaseNeuralNetwork):
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
         residual = x
         x = self.layer_norm(x)
-        x = self.fc1(x)
-        x = self.parametric_relu_1(x)
+        x = self.up_proj(x)
+        x = self.parametric_relu(x)
         if self._dropout_rate > .0:
             x = torch.dropout(x, p=self._dropout_rate, train=self.training)
-        return self.fc2(x) + residual
+        return self.down_proj(x) + residual
 class FeedForward(BaseNeuralNetwork):
@@ -43,7 +43,7 @@ class FeedForward(BaseNeuralNetwork):
         self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
                                                          expansion_factor=expansion_factor, bias=bias,
                                                          dropout_rate=dropout_rate,
-                                                         device=self.device, dtype=self.dtype)] * num_layers)
+                                                         device=self.device, dtype=self.dtype) for _ in range(num_layers)])
     @override
     def forward(self, x: torch.Tensor) -> torch.Tensor:

deeplotx/nn/long_context_auto_regression.py CHANGED Viewed

@@ -4,12 +4,13 @@ from deeplotx.nn import LongContextRecursiveSequential
 class LongContextAutoRegression(LongContextRecursiveSequential):
-    def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,  model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=feature_dim, output_dim=feature_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
-                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
-                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
-                         model_name=model_name, device=device, dtype=dtype)
+    def __init__(self, feature_dim: int, bias: bool = True,
+                 encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
+                         encoder_layers=encoder_layers, attn_heads=attn_heads,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype, **kwargs)

deeplotx/nn/long_context_recursive_sequential.py CHANGED Viewed

@@ -3,32 +3,34 @@ from typing_extensions import override
 import torch
 from torch import nn
+from deeplotx.nn.attention import DEFAULT_THETA
 from deeplotx.nn.recursive_sequential import RecursiveSequential
-from deeplotx.nn.self_attention import SelfAttention
+from deeplotx.nn.roformer_encoder import RoFormerEncoder
 class LongContextRecursiveSequential(RecursiveSequential):
-    def __init__(self, input_dim: int, output_dim: int,
-                 hidden_dim: int | None = None, recursive_layers: int = 2,
-                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
-                 model_name: str | None = None, device: str | None = None,
-                 dtype: torch.dtype | None = None, **kwargs):
-        super().__init__(input_dim=input_dim, output_dim=output_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
-                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
-                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
+    def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
+                 encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
                          model_name=model_name, device=device, dtype=dtype)
-        self.self_attention = SelfAttention(feature_dim=input_dim, bias=kwargs.get('attn_proj_bias', ffn_bias),
-                                            proj_layers=kwargs.get('attn_proj_layers', 1),
-                                            proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
-                                            dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate))
-        self.__proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
-                                bias=ffn_bias, device=self.device, dtype=self.dtype)
+        self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
+                                                                ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
+                                                                ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
+                                                                dropout_rate=kwargs.get('encoder_dropout_rate', dropout_rate),
+                                                                attn_ffn_layers=kwargs.get('attn_ffn_layers', 1),
+                                                                attn_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
+                                                                attn_dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
+                                                                theta=kwargs.get('theta', DEFAULT_THETA),
+                                                                device=self.device, dtype=self.dtype) for _ in range(encoder_layers)])
     @override
     def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-        x = torch.cat([self.self_attention(x), x], dim=-1)
-        x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
-        return super().forward(self.__proj(x), state)
+        for roformer_encoder in self.roformer_encoders:
+            x = roformer_encoder(x)
+        return super().forward(x, state)

deeplotx/nn/multi_head_attention.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.attention import Attention
+class MultiHeadAttention(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, num_heads: int = 1, bias: bool = True, positional: bool = True,
+                 proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        self._num_heads = num_heads
+        self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
+                                     device=self.device, dtype=self.dtype)
+        self.attn_heads = nn.ModuleList([Attention(feature_dim=feature_dim, bias=bias, positional=positional,
+                                                   proj_layers=proj_layers, proj_expansion_factor=proj_expansion_factor,
+                                                   dropout_rate=dropout_rate, device=self.device, dtype=self.dtype,
+                                                   **kwargs) for _ in range(self._num_heads)])
+        self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
+                                  device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
+        x, y = self.expand_proj(x), self.expand_proj(y)
+        x_heads, y_heads = x.split(self.in_features, dim=-1), y.split(self.in_features, dim=-1)
+        head_outs = [self.attn_heads[_](x=x_heads[_], y=y_heads[_], mask=mask) for _ in range(self._num_heads)]
+        return self.out_proj(torch.concat(head_outs, dim=-1))

deeplotx/nn/recursive_sequential.py CHANGED Viewed

@@ -8,22 +8,22 @@ from deeplotx.nn.feed_forward import FeedForward
 class RecursiveSequential(BaseNeuralNetwork):
-    def __init__(self, input_dim: int, output_dim: int,
-                 hidden_dim: int | None = None, recursive_layers: int = 2,
-                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
-                 model_name: str | None = None, device: str | None = None,
-                 dtype: torch.dtype | None = None):
-        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
-        if hidden_dim is None:
-            hidden_dim = input_dim
-        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
+    def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
+                 recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        if recursive_hidden_dim is None:
+            recursive_hidden_dim = input_dim
+        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=recursive_hidden_dim,
                             num_layers=recursive_layers, batch_first=True,
                             bias=True, bidirectional=True, device=self.device,
                             dtype=self.dtype)
-        self.ffn = FeedForward(feature_dim=hidden_dim * 2, num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
-                               bias=ffn_bias, dropout_rate=ffn_dropout_rate, device=self.device, dtype=self.dtype)
-        self.__proj = nn.Linear(in_features=hidden_dim * 2, out_features=output_dim, bias=ffn_bias,
+        self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
+                               expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
+                               device=self.device, dtype=self.dtype)
+        self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
                                 device=self.device, dtype=self.dtype)
     def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:

deeplotx/nn/roformer_encoder.py ADDED Viewed

@@ -0,0 +1,40 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.multi_head_attention import MultiHeadAttention
+class RoFormerEncoder(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, attn_heads: int = 2, bias: bool = True,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
+                 dropout_rate: float = 0.02, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
+        super().__init__(in_features=feature_dim, out_features=feature_dim,
+                         model_name=model_name, device=device, dtype=dtype)
+        self.attn = MultiHeadAttention(feature_dim=feature_dim, num_heads=attn_heads,
+                                       bias=bias, positional=True,
+                                       proj_layers=kwargs.get('attn_ffn_layers', 1),
+                                       proj_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
+                                       dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
+                                       device=self.device, dtype=self.dtype, **kwargs)
+        self.ffn = FeedForward(feature_dim=feature_dim * 2, num_layers=ffn_layers,
+                               expansion_factor=ffn_expansion_factor,
+                               bias=bias, dropout_rate=dropout_rate,
+                               device=self.device, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(normalized_shape=feature_dim, eps=1e-9,
+                                       device=self.device, dtype=self.dtype)
+        self.__proj = nn.Linear(in_features=feature_dim * 2, out_features=feature_dim,
+                                bias=bias, device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        if mask is not None:
+            mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
+        attn = self.attn(x=self.layer_norm(x), y=None, mask=mask)
+        x = torch.concat([attn, x], dim=-1)
+        return self.__proj(self.ffn(x))

deeplotx/nn/rope.py ADDED Viewed

@@ -0,0 +1,42 @@
+from typing_extensions import override
+import torch
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+DEFAULT_THETA = 10_000
+class RoPE(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, theta: int = DEFAULT_THETA,
+                 device: str | None = None, dtype: torch.dtype = torch.float32):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=None,
+                         device=device, dtype=dtype)
+        assert feature_dim % 2 == 0, f'feature_dim ({feature_dim}) is not divisible by 2.'
+        self._theta = theta
+        self._num_groups = feature_dim // 2
+        self._inv_freq = 1.0 / (theta ** (torch.arange(start=0, end=self._num_groups, step=1,
+                                                       device=self.device, dtype=self.dtype).float() / self._num_groups))
+        self.register_buffer('inv_freq', self._inv_freq)
+    @property
+    def dim(self):
+        return self._dim
+    @property
+    def theta(self):
+        return self._theta
+    def rotate_half(self, _t: torch.Tensor) -> torch.Tensor:
+        return torch.cat((- _t[..., self._num_groups:], _t[..., :self._num_groups]), dim=-1)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        *other_dims, seq_len, feature_dim = x.shape
+        assert feature_dim == self.in_features, f"feature_dim of x doesn't match with defined feature_dim {self.in_features}."
+        t = torch.arange(start=0, end=seq_len, step=1, device=self.device, dtype=self.dtype)
+        freq = torch.outer(t, self._inv_freq)
+        emb = torch.cat((freq, freq), dim=-1)
+        sin_emb, cos_emb = emb.sin(), emb.cos()
+        return x * cos_emb + self.rotate_half(x) * sin_emb

deeplotx/trainer/text_binary_classification_trainer.py CHANGED Viewed

@@ -6,6 +6,7 @@ from torch import nn, optim
 from torch.utils.data import DataLoader, TensorDataset
 from deeplotx.encoder.long_text_encoder import LongTextEncoder
+from deeplotx.nn.attention import DEFAULT_THETA
 from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
 from deeplotx.trainer.base_trainer import BaseTrainer
@@ -24,8 +25,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
     def train(self, positive_texts: list[str], negative_texts: list[str],
               num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
               train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
-              alpha: float = 1e-4, rho: float = 0.2,
-              hidden_dim: int = 256, recursive_layers: int = 2, **kwargs) -> LongContextRecursiveSequential:
+              alpha: float = 1e-4, rho: float = 0.2, encoder_layers: int = 4, attn_heads: int = 6,
+              recursive_layers: int = 2, recursive_hidden_dim: int = 256, **kwargs) -> LongContextRecursiveSequential:
         if balancing_dataset:
             min_length = min(len(positive_texts), len(negative_texts))
             positive_texts = positive_texts[:min_length]
@@ -50,21 +51,24 @@ class TextBinaryClassifierTrainer(BaseTrainer):
         if self.model is None:
             ffn_layers = kwargs.get('ffn_layers', 5)
             ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
-            ffn_bias = kwargs.get('ffn_bias', True)
-            ffn_dropout_rate = kwargs.get('ffn_dropout_rate', 0.1)
-            self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
-                                                        hidden_dim=hidden_dim,
-                                                        recursive_layers=recursive_layers,
-                                                        ffn_layers=ffn_layers,
-                                                        ffn_expansion_factor=ffn_expansion_factor,
-                                                        ffn_bias=ffn_bias,
-                                                        ffn_dropout_rate=ffn_dropout_rate,
-                                                        attn_proj_layers=kwargs.get('attn_proj_layers', ffn_layers),
-                                                        attn_proj_bias=kwargs.get('attn_proj_bias', ffn_bias),
-                                                        attn_proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
-                                                        attn_proj_dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate),
-                                                        device=self.device, dtype=dtype)
-        logger.debug(f'Training Model: {self.model}')
+            bias = kwargs.get('bias', True)
+            dropout_rate = kwargs.get('dropout_rate', 0.1)
+            encoder_ffn_layers = kwargs.get('encoder_ffn_layers', ffn_layers)
+            encoder_expansion_factor = kwargs.get('encoder_expansion_factor', ffn_expansion_factor)
+            encoder_dropout_rate = kwargs.get('encoder_dropout_rate', dropout_rate)
+            attn_ffn_layers = kwargs.get('attn_ffn_layers', 1)
+            attn_expansion_factor = kwargs.get('attn_expansion_factor', ffn_expansion_factor)
+            attn_dropout_rate = kwargs.get('attn_dropout_rate', dropout_rate)
+            theta = kwargs.get('theta', DEFAULT_THETA)
+            self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
+                                                        encoder_layers=encoder_layers, attn_heads=attn_heads,
+                                                        recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                                                        ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
+                                                        encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
+                                                        encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
+                                                        attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
+                                                        theta=theta).initialize_weights()
+        logger.debug(f'Training Model: \n{self.model}')
         loss_function = nn.BCELoss()
         optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
         for epoch in range(num_epochs):

deeplotx/util/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
-from .hash import md5, sha1
+from .hash import md5, sha1, sha256, sha512
 from .read_file import read_file, get_files

{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.6.1
+Version: 0.8.1
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
 Requires-Dist: torch
 Requires-Dist: transformers
 Requires-Dist: typing-extensions
-Requires-Dist: vortezwohl>=0.0.6
+Requires-Dist: vortezwohl>=0.0.8
 Dynamic: license-file
 [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/vortezwohl/DeepLoTX)

deeplotx-0.8.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,34 @@
+deeplotx/__init__.py,sha256=oNeA-vNu5YGiEQg0IcpKEdGh_Y_2uPvo2nqaNL_Zgv8,1159
+deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
+deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
+deeplotx/encoder/long_text_encoder.py,sha256=PFR6jjGyg1N58TQlKsPaNQEd-EDl13Hyhu7A1KtGBbA,3743
+deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
+deeplotx/nn/__init__.py,sha256=01I_yqx9GTa4wy3uNyAqhtxp66tDqxgMLC4Ky5Vnkrg,651
+deeplotx/nn/attention.py,sha256=HZ4nfFtkk7AnJ9nuoDSK6zIlIhZ_hbpZc3o6JQIBqJ8,2861
+deeplotx/nn/auto_regression.py,sha256=uISx29t_zkDGS8s2wvGB6wOGYZitQ4hQ7wyoQl4lcqY,857
+deeplotx/nn/base_neural_network.py,sha256=FjQEDFH810fJS7JV3aLgJZnaMqC6DH--wlBvuj-ghTc,5900
+deeplotx/nn/feed_forward.py,sha256=4ozj7EDalO9pb6JUhZtsJqE0r8bIHFApHRt2zTrl4ho,2931
+deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
+deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
+deeplotx/nn/long_context_auto_regression.py,sha256=uy0k_g8wEfMH5nd5HCfrHA8dgEsuWBA2x8U-g3h4vQc,1054
+deeplotx/nn/long_context_recursive_sequential.py,sha256=i7kUml9RV_mkLRJ114UHsj9Gxw7LzJVQ4z8-REHa8-w,2682
+deeplotx/nn/multi_head_attention.py,sha256=3z73uGbvy3jszRy1B9nxGOJjlttHpcpRF8Qd09OEams,2267
+deeplotx/nn/recursive_sequential.py,sha256=8Z8vT70xTygusL-3w3QlB_B_k0xQSUU2ZTgC1LhEmzQ,2805
+deeplotx/nn/roformer_encoder.py,sha256=UJjKniNdMd0rfoYQcsX6bPo6Ceq_Z6EhwHe2kgqWC_k,2426
+deeplotx/nn/rope.py,sha256=RTOjnllubktdy2rzFWxBfkuLuGjhEMyDd06uojdqPhM,1848
+deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
+deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
+deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
+deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
+deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
+deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
+deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
+deeplotx/trainer/text_binary_classification_trainer.py,sha256=QMLR4cC8NCUP-v7SOYVtCykNwahENmWHv9adaeTbYmA,6528
+deeplotx/util/__init__.py,sha256=5CH4MTeSgsmCe3LPMfvKoSBpwh6jDSBuHVElJvzQzgs,90
+deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
+deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
+deeplotx-0.8.1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
+deeplotx-0.8.1.dist-info/METADATA,sha256=zMKRLmdsEibLnN_hAx3OM7AbX3SiM7X1-8w4eFJGxNY,12251
+deeplotx-0.8.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+deeplotx-0.8.1.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
+deeplotx-0.8.1.dist-info/RECORD,,

deeplotx-0.6.1.dist-info/RECORD DELETED Viewed

@@ -1,31 +0,0 @@
-deeplotx/__init__.py,sha256=S0hLmRkHdoaxv7IPKVXh5Oat27pt_FGKGmKjp3aAyMU,1129
-deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
-deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
-deeplotx/encoder/long_text_encoder.py,sha256=It0hXuSe0Hq5Y_3QhjEqvF1JbtX6Hc2VzVabzOu7fLA,3625
-deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
-deeplotx/nn/__init__.py,sha256=f7f6Qx1Xkw3Nn3Lvafe7Pq4pUO7ZcESIA8KZxnSL_OM,535
-deeplotx/nn/auto_regression.py,sha256=8eEdXhOjRLKP4MpgX1wt9L1grU4_fS49ejVoNzFs7LM,877
-deeplotx/nn/base_neural_network.py,sha256=s7jHe7HprOelD1wZRbFdqb5Hxqs3sjLXLEo7OyDBHtk,3215
-deeplotx/nn/feed_forward.py,sha256=3lWV_snCp_PiqjxTYoiNlL9EF2heekWbMkKXoPlljkM,2839
-deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
-deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
-deeplotx/nn/long_context_auto_regression.py,sha256=oMrxeVuCa1M2EQJSbOYlpTjl5NrkKGAHers8qIaZdU8,911
-deeplotx/nn/long_context_recursive_sequential.py,sha256=sU_22QH7Z6EJurMbTVEYPd83wC2dzadMIeztVIcc04I,2173
-deeplotx/nn/recursive_sequential.py,sha256=WsmXaIgTdpudo2bYcpBX8bKeJgPnT-atwEmLSXqQEco,2743
-deeplotx/nn/self_attention.py,sha256=HW9ZB3S6-yfTQc2745rJ6TM7L01P8ewxt7nGHosE2r8,2291
-deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
-deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
-deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
-deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
-deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
-deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
-deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
-deeplotx/trainer/text_binary_classification_trainer.py,sha256=7oLzgXvdmFpQiBy7ncJ0smdqnMGr8xdZs6nTWpj6qfw,6085
-deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
-deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
-deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
-deeplotx-0.6.1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
-deeplotx-0.6.1.dist-info/METADATA,sha256=a1KcBHaewfyOwIywZ3wtBr8mdly4ofdb7Z4g2KYVzUk,12251
-deeplotx-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-deeplotx-0.6.1.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
-deeplotx-0.6.1.dist-info/RECORD,,

{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{deeplotx-0.6.1.dist-info → deeplotx-0.8.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

deeplotx 0.6.1__py3-none-any.whl → 0.8.1__py3-none-any.whl

deeplotx 0.6.1py3-none-any.whl → 0.8.1py3-none-any.whl