PyPI - deeplotx - Versions diffs - 0.6.1__tar.gz → 0.8.1__tar.gz - Mend

deeplotx 0.6.1tar.gz → 0.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{deeplotx-0.6.1 → deeplotx-0.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.6.1
+Version: 0.8.1
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
 Requires-Dist: torch
 Requires-Dist: transformers
 Requires-Dist: typing-extensions
-Requires-Dist: vortezwohl>=0.0.6
+Requires-Dist: vortezwohl>=0.0.8
 Dynamic: license-file
 [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/vortezwohl/DeepLoTX)

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/__init__.py RENAMED Viewed

@@ -5,14 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
 from .encoder import Encoder, LongTextEncoder, LongformerEncoder
 from .nn import (
-    BaseNeuralNetwork,
     FeedForward,
     LinearRegression,
     LogisticRegression,
     SoftmaxRegression,
     RecursiveSequential,
     LongContextRecursiveSequential,
-    SelfAttention,
+    RoPE,
+    Attention,
+    MultiHeadAttention,
+    RoFormerEncoder,
     AutoRegression,
     LongContextAutoRegression
 )

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/long_text_encoder.py RENAMED Viewed

@@ -17,6 +17,7 @@ class LongTextEncoder(Encoder):
                  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
                  cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
         super().__init__(model_name_or_path=model_name_or_path, device=device)
+        assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
         self._max_length = max_length
         self._chunk_size = chunk_size
         self._overlapping = overlapping

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/__init__.py RENAMED Viewed

@@ -5,6 +5,9 @@ from .logistic_regression import LogisticRegression
 from .softmax_regression import SoftmaxRegression
 from .recursive_sequential import RecursiveSequential
 from .long_context_recursive_sequential import LongContextRecursiveSequential
-from .self_attention import SelfAttention
+from .rope import RoPE
+from .attention import Attention
+from .multi_head_attention import MultiHeadAttention
+from .roformer_encoder import RoFormerEncoder
 from .auto_regression import AutoRegression
 from .long_context_auto_regression import LongContextAutoRegression

deeplotx-0.6.1/deeplotx/nn/self_attention.py → deeplotx-0.8.1/deeplotx/nn/attention.py RENAMED Viewed

@@ -4,14 +4,17 @@ import torch
 from deeplotx.nn.base_neural_network import BaseNeuralNetwork
 from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.rope import RoPE, DEFAULT_THETA
-class SelfAttention(BaseNeuralNetwork):
-    def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
-                 proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
-                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+class Attention(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
+                 proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
         super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
                          device=device, dtype=dtype)
+        self._positional = positional
         self._feature_dim = feature_dim
         self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                   expansion_factor=proj_expansion_factor,
@@ -22,18 +25,24 @@ class SelfAttention(BaseNeuralNetwork):
         self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                   expansion_factor=proj_expansion_factor,
                                   bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        if self._positional:
+            self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
+                             device=self.device, dtype=self.dtype)
-    def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-        q, k = self.q_proj(x), self.k_proj(x)
+    def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        q, k = self.q_proj(x), self.k_proj(y)
+        if self._positional:
+            q, k = self.rope(q), self.rope(k)
         attn = torch.matmul(q, k.transpose(-2, -1))
         attn = attn / (self._feature_dim ** 0.5)
         attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
         return torch.softmax(attn, dim=-1)
     @override
-    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+    def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
         if mask is not None:
             mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
-        v = self.v_proj(x)
-        return torch.matmul(self._attention(x, mask), v)
+        v = self.v_proj(y)
+        return torch.matmul(self._attention(x, y, mask), v)

deeplotx-0.8.1/deeplotx/nn/auto_regression.py ADDED Viewed

@@ -0,0 +1,14 @@
+import torch
+from deeplotx.nn import RecursiveSequential
+class AutoRegression(RecursiveSequential):
+    def __init__(self, feature_dim: int, bias: bool = True,
+                 recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
+                         dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/base_neural_network.py RENAMED Viewed

@@ -3,6 +3,7 @@ from abc import abstractmethod
 import torch
 from torch import nn
+from torch.nn import init
 DEFAULT_SUFFIX = 'dlx'
@@ -36,6 +37,44 @@ class BaseNeuralNetwork(nn.Module):
             x = x.to(dtype)
         return x
+    def initialize_weights(self):
+        for m in self.modules():
+            match m.__class__:
+                case nn.Linear:
+                    init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
+                    if m.bias is not None:
+                        init.constant_(m.bias, 0)
+                case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
+                    init.constant_(m.weight, 1)
+                    init.constant_(m.bias, 0)
+                case nn.LSTM | nn.GRU:
+                    for name, param in m.named_parameters():
+                        _tmp_name = name.lower()
+                        if 'weight_ih' in _tmp_name:
+                            init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
+                        elif 'weight_hh' in _tmp_name:
+                            init.orthogonal_(param)
+                        elif 'bias' in _tmp_name:
+                            init.constant_(param, 0)
+                case _:
+                    pass
+        return self
+    def size(self) -> dict:
+        total_params = trainable_params = non_trainable_params = 0
+        for param in self.parameters():
+            params = param.numel()
+            total_params += params
+            if param.requires_grad:
+                trainable_params += params
+            else:
+                non_trainable_params += params
+        return {
+            'total': total_params,
+            'trainable': trainable_params,
+            'non_trainable': non_trainable_params
+        }
     def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
         def _l1() -> torch.Tensor:
             l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
@@ -77,3 +116,25 @@ class BaseNeuralNetwork(nn.Module):
         model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
         self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
         return self
+    def __str__(self):
+        formatted = super().__str__()
+        _line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
+        _splitter_1 = '=' * (_line_len + 10)
+        _splitter_2 = '-' * (_line_len + 10)
+        _size = self.size()
+        total_param = _size['total']
+        trainable_param = _size['trainable']
+        non_trainable_param = _size['non_trainable']
+        formatted = (f'{_splitter_1}\n'
+                     f'Model_Name: {self._model_name}\n'
+                     f'In_Features: {self.in_features}\n'
+                     f'Out_Features: {self.out_features}\n'
+                     f'Device: {self.device}\n'
+                     f'Dtype: {self.dtype}\n'
+                     f'Total_Parameters: {total_param}\n'
+                     f'Trainable_Parameters: {trainable_param}\n'
+                     f'NonTrainable_Parameters: {non_trainable_param}\n'
+                     f'{_splitter_2}'
+                     f'\n{formatted}\n{_splitter_1}')
+        return formatted

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/feed_forward.py RENAMED Viewed

@@ -12,13 +12,13 @@ class FeedForwardUnit(BaseNeuralNetwork):
                  device: str | None = None, dtype: torch.dtype | None = None):
         super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
         self._dropout_rate = dropout_rate
-        self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
-                             device=self.device, dtype=self.dtype)
-        self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
-                             device=self.device, dtype=self.dtype)
-        self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
-                                          device=self.device, dtype=self.dtype)
-        self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+        self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
+                                 bias=bias, device=self.device, dtype=self.dtype)
+        self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
+                                   bias=bias, device=self.device, dtype=self.dtype)
+        self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
+                                        device=self.device, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
                                        device=self.device, dtype=self.dtype)
     @override
@@ -26,11 +26,11 @@ class FeedForwardUnit(BaseNeuralNetwork):
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
         residual = x
         x = self.layer_norm(x)
-        x = self.fc1(x)
-        x = self.parametric_relu_1(x)
+        x = self.up_proj(x)
+        x = self.parametric_relu(x)
         if self._dropout_rate > .0:
             x = torch.dropout(x, p=self._dropout_rate, train=self.training)
-        return self.fc2(x) + residual
+        return self.down_proj(x) + residual
 class FeedForward(BaseNeuralNetwork):
@@ -43,7 +43,7 @@ class FeedForward(BaseNeuralNetwork):
         self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
                                                          expansion_factor=expansion_factor, bias=bias,
                                                          dropout_rate=dropout_rate,
-                                                         device=self.device, dtype=self.dtype)] * num_layers)
+                                                         device=self.device, dtype=self.dtype) for _ in range(num_layers)])
     @override
     def forward(self, x: torch.Tensor) -> torch.Tensor:

deeplotx-0.8.1/deeplotx/nn/long_context_auto_regression.py ADDED Viewed

@@ -0,0 +1,16 @@
+import torch
+from deeplotx.nn import LongContextRecursiveSequential
+class LongContextAutoRegression(LongContextRecursiveSequential):
+    def __init__(self, feature_dim: int, bias: bool = True,
+                 encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
+                         encoder_layers=encoder_layers, attn_heads=attn_heads,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype, **kwargs)

deeplotx-0.8.1/deeplotx/nn/long_context_recursive_sequential.py ADDED Viewed

@@ -0,0 +1,36 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.attention import DEFAULT_THETA
+from deeplotx.nn.recursive_sequential import RecursiveSequential
+from deeplotx.nn.roformer_encoder import RoFormerEncoder
+class LongContextRecursiveSequential(RecursiveSequential):
+    def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
+                 encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype)
+        self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
+                                                                ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
+                                                                ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
+                                                                dropout_rate=kwargs.get('encoder_dropout_rate', dropout_rate),
+                                                                attn_ffn_layers=kwargs.get('attn_ffn_layers', 1),
+                                                                attn_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
+                                                                attn_dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
+                                                                theta=kwargs.get('theta', DEFAULT_THETA),
+                                                                device=self.device, dtype=self.dtype) for _ in range(encoder_layers)])
+    @override
+    def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        for roformer_encoder in self.roformer_encoders:
+            x = roformer_encoder(x)
+        return super().forward(x, state)

deeplotx-0.8.1/deeplotx/nn/multi_head_attention.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.attention import Attention
+class MultiHeadAttention(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, num_heads: int = 1, bias: bool = True, positional: bool = True,
+                 proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        self._num_heads = num_heads
+        self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
+                                     device=self.device, dtype=self.dtype)
+        self.attn_heads = nn.ModuleList([Attention(feature_dim=feature_dim, bias=bias, positional=positional,
+                                                   proj_layers=proj_layers, proj_expansion_factor=proj_expansion_factor,
+                                                   dropout_rate=dropout_rate, device=self.device, dtype=self.dtype,
+                                                   **kwargs) for _ in range(self._num_heads)])
+        self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
+                                  device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
+        x, y = self.expand_proj(x), self.expand_proj(y)
+        x_heads, y_heads = x.split(self.in_features, dim=-1), y.split(self.in_features, dim=-1)
+        head_outs = [self.attn_heads[_](x=x_heads[_], y=y_heads[_], mask=mask) for _ in range(self._num_heads)]
+        return self.out_proj(torch.concat(head_outs, dim=-1))

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/recursive_sequential.py RENAMED Viewed

@@ -8,22 +8,22 @@ from deeplotx.nn.feed_forward import FeedForward
 class RecursiveSequential(BaseNeuralNetwork):
-    def __init__(self, input_dim: int, output_dim: int,
-                 hidden_dim: int | None = None, recursive_layers: int = 2,
-                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
-                 model_name: str | None = None, device: str | None = None,
-                 dtype: torch.dtype | None = None):
-        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
-        if hidden_dim is None:
-            hidden_dim = input_dim
-        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
+    def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
+                 recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        if recursive_hidden_dim is None:
+            recursive_hidden_dim = input_dim
+        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=recursive_hidden_dim,
                             num_layers=recursive_layers, batch_first=True,
                             bias=True, bidirectional=True, device=self.device,
                             dtype=self.dtype)
-        self.ffn = FeedForward(feature_dim=hidden_dim * 2, num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
-                               bias=ffn_bias, dropout_rate=ffn_dropout_rate, device=self.device, dtype=self.dtype)
-        self.__proj = nn.Linear(in_features=hidden_dim * 2, out_features=output_dim, bias=ffn_bias,
+        self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
+                               expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
+                               device=self.device, dtype=self.dtype)
+        self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
                                 device=self.device, dtype=self.dtype)
     def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:

deeplotx-0.8.1/deeplotx/nn/roformer_encoder.py ADDED Viewed

@@ -0,0 +1,40 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.multi_head_attention import MultiHeadAttention
+class RoFormerEncoder(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, attn_heads: int = 2, bias: bool = True,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
+                 dropout_rate: float = 0.02, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
+        super().__init__(in_features=feature_dim, out_features=feature_dim,
+                         model_name=model_name, device=device, dtype=dtype)
+        self.attn = MultiHeadAttention(feature_dim=feature_dim, num_heads=attn_heads,
+                                       bias=bias, positional=True,
+                                       proj_layers=kwargs.get('attn_ffn_layers', 1),
+                                       proj_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
+                                       dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
+                                       device=self.device, dtype=self.dtype, **kwargs)
+        self.ffn = FeedForward(feature_dim=feature_dim * 2, num_layers=ffn_layers,
+                               expansion_factor=ffn_expansion_factor,
+                               bias=bias, dropout_rate=dropout_rate,
+                               device=self.device, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(normalized_shape=feature_dim, eps=1e-9,
+                                       device=self.device, dtype=self.dtype)
+        self.__proj = nn.Linear(in_features=feature_dim * 2, out_features=feature_dim,
+                                bias=bias, device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        if mask is not None:
+            mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
+        attn = self.attn(x=self.layer_norm(x), y=None, mask=mask)
+        x = torch.concat([attn, x], dim=-1)
+        return self.__proj(self.ffn(x))

deeplotx-0.8.1/deeplotx/nn/rope.py ADDED Viewed

@@ -0,0 +1,42 @@
+from typing_extensions import override
+import torch
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+DEFAULT_THETA = 10_000
+class RoPE(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, theta: int = DEFAULT_THETA,
+                 device: str | None = None, dtype: torch.dtype = torch.float32):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=None,
+                         device=device, dtype=dtype)
+        assert feature_dim % 2 == 0, f'feature_dim ({feature_dim}) is not divisible by 2.'
+        self._theta = theta
+        self._num_groups = feature_dim // 2
+        self._inv_freq = 1.0 / (theta ** (torch.arange(start=0, end=self._num_groups, step=1,
+                                                       device=self.device, dtype=self.dtype).float() / self._num_groups))
+        self.register_buffer('inv_freq', self._inv_freq)
+    @property
+    def dim(self):
+        return self._dim
+    @property
+    def theta(self):
+        return self._theta
+    def rotate_half(self, _t: torch.Tensor) -> torch.Tensor:
+        return torch.cat((- _t[..., self._num_groups:], _t[..., :self._num_groups]), dim=-1)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        *other_dims, seq_len, feature_dim = x.shape
+        assert feature_dim == self.in_features, f"feature_dim of x doesn't match with defined feature_dim {self.in_features}."
+        t = torch.arange(start=0, end=seq_len, step=1, device=self.device, dtype=self.dtype)
+        freq = torch.outer(t, self._inv_freq)
+        emb = torch.cat((freq, freq), dim=-1)
+        sin_emb, cos_emb = emb.sin(), emb.cos()
+        return x * cos_emb + self.rotate_half(x) * sin_emb

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/trainer/text_binary_classification_trainer.py RENAMED Viewed

@@ -6,6 +6,7 @@ from torch import nn, optim
 from torch.utils.data import DataLoader, TensorDataset
 from deeplotx.encoder.long_text_encoder import LongTextEncoder
+from deeplotx.nn.attention import DEFAULT_THETA
 from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
 from deeplotx.trainer.base_trainer import BaseTrainer
@@ -24,8 +25,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
     def train(self, positive_texts: list[str], negative_texts: list[str],
               num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
               train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
-              alpha: float = 1e-4, rho: float = 0.2,
-              hidden_dim: int = 256, recursive_layers: int = 2, **kwargs) -> LongContextRecursiveSequential:
+              alpha: float = 1e-4, rho: float = 0.2, encoder_layers: int = 4, attn_heads: int = 6,
+              recursive_layers: int = 2, recursive_hidden_dim: int = 256, **kwargs) -> LongContextRecursiveSequential:
         if balancing_dataset:
             min_length = min(len(positive_texts), len(negative_texts))
             positive_texts = positive_texts[:min_length]
@@ -50,21 +51,24 @@ class TextBinaryClassifierTrainer(BaseTrainer):
         if self.model is None:
             ffn_layers = kwargs.get('ffn_layers', 5)
             ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
-            ffn_bias = kwargs.get('ffn_bias', True)
-            ffn_dropout_rate = kwargs.get('ffn_dropout_rate', 0.1)
-            self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
-                                                        hidden_dim=hidden_dim,
-                                                        recursive_layers=recursive_layers,
-                                                        ffn_layers=ffn_layers,
-                                                        ffn_expansion_factor=ffn_expansion_factor,
-                                                        ffn_bias=ffn_bias,
-                                                        ffn_dropout_rate=ffn_dropout_rate,
-                                                        attn_proj_layers=kwargs.get('attn_proj_layers', ffn_layers),
-                                                        attn_proj_bias=kwargs.get('attn_proj_bias', ffn_bias),
-                                                        attn_proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
-                                                        attn_proj_dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate),
-                                                        device=self.device, dtype=dtype)
-        logger.debug(f'Training Model: {self.model}')
+            bias = kwargs.get('bias', True)
+            dropout_rate = kwargs.get('dropout_rate', 0.1)
+            encoder_ffn_layers = kwargs.get('encoder_ffn_layers', ffn_layers)
+            encoder_expansion_factor = kwargs.get('encoder_expansion_factor', ffn_expansion_factor)
+            encoder_dropout_rate = kwargs.get('encoder_dropout_rate', dropout_rate)
+            attn_ffn_layers = kwargs.get('attn_ffn_layers', 1)
+            attn_expansion_factor = kwargs.get('attn_expansion_factor', ffn_expansion_factor)
+            attn_dropout_rate = kwargs.get('attn_dropout_rate', dropout_rate)
+            theta = kwargs.get('theta', DEFAULT_THETA)
+            self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
+                                                        encoder_layers=encoder_layers, attn_heads=attn_heads,
+                                                        recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                                                        ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
+                                                        encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
+                                                        encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
+                                                        attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
+                                                        theta=theta).initialize_weights()
+        logger.debug(f'Training Model: \n{self.model}')
         loss_function = nn.BCELoss()
         optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
         for epoch in range(num_epochs):

deeplotx-0.8.1/deeplotx/util/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .hash import md5, sha1, sha256, sha512
2	+ from .read_file import read_file, get_files

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.6.1
+Version: 0.8.1
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
 Requires-Dist: torch
 Requires-Dist: transformers
 Requires-Dist: typing-extensions
-Requires-Dist: vortezwohl>=0.0.6
+Requires-Dist: vortezwohl>=0.0.8
 Dynamic: license-file
 [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/vortezwohl/DeepLoTX)

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/SOURCES.txt RENAMED Viewed

@@ -12,6 +12,7 @@ deeplotx/encoder/encoder.py
 deeplotx/encoder/long_text_encoder.py
 deeplotx/encoder/longformer_encoder.py
 deeplotx/nn/__init__.py
+deeplotx/nn/attention.py
 deeplotx/nn/auto_regression.py
 deeplotx/nn/base_neural_network.py
 deeplotx/nn/feed_forward.py
@@ -19,8 +20,10 @@ deeplotx/nn/linear_regression.py
 deeplotx/nn/logistic_regression.py
 deeplotx/nn/long_context_auto_regression.py
 deeplotx/nn/long_context_recursive_sequential.py
+deeplotx/nn/multi_head_attention.py
 deeplotx/nn/recursive_sequential.py
-deeplotx/nn/self_attention.py
+deeplotx/nn/roformer_encoder.py
+deeplotx/nn/rope.py
 deeplotx/nn/softmax_regression.py
 deeplotx/similarity/__init__.py
 deeplotx/similarity/distribution.py

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/requires.txt RENAMED Viewed

@@ -6,4 +6,4 @@ python-dotenv
 torch
 transformers
 typing-extensions
-vortezwohl>=0.0.6
+vortezwohl>=0.0.8

{deeplotx-0.6.1 → deeplotx-0.8.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "deeplotx"
-version = "0.6.1"
+version = "0.8.1"
 description = "Easy-2-use long text NLP toolkit."
 readme = "README.md"
 requires-python = ">=3.10"
@@ -13,5 +13,5 @@ dependencies = [
     "torch",
     "transformers",
     "typing-extensions",
-    "vortezwohl>=0.0.6",
+    "vortezwohl>=0.0.8",
 ]

deeplotx-0.6.1/deeplotx/nn/auto_regression.py DELETED Viewed

@@ -1,15 +0,0 @@
-import torch
-from deeplotx.nn import RecursiveSequential
-class AutoRegression(RecursiveSequential):
-    def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=feature_dim, output_dim=feature_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
-                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
-                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
-                         model_name=model_name, device=device, dtype=dtype)

deeplotx-0.6.1/deeplotx/nn/long_context_auto_regression.py DELETED Viewed

@@ -1,15 +0,0 @@
-import torch
-from deeplotx.nn import LongContextRecursiveSequential
-class LongContextAutoRegression(LongContextRecursiveSequential):
-    def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,  model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=feature_dim, output_dim=feature_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
-                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
-                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
-                         model_name=model_name, device=device, dtype=dtype)

deeplotx-0.6.1/deeplotx/nn/long_context_recursive_sequential.py DELETED Viewed

@@ -1,34 +0,0 @@
-from typing_extensions import override
-import torch
-from torch import nn
-from deeplotx.nn.recursive_sequential import RecursiveSequential
-from deeplotx.nn.self_attention import SelfAttention
-class LongContextRecursiveSequential(RecursiveSequential):
-    def __init__(self, input_dim: int, output_dim: int,
-                 hidden_dim: int | None = None, recursive_layers: int = 2,
-                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
-                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
-                 model_name: str | None = None, device: str | None = None,
-                 dtype: torch.dtype | None = None, **kwargs):
-        super().__init__(input_dim=input_dim, output_dim=output_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
-                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
-                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
-                         model_name=model_name, device=device, dtype=dtype)
-        self.self_attention = SelfAttention(feature_dim=input_dim, bias=kwargs.get('attn_proj_bias', ffn_bias),
-                                            proj_layers=kwargs.get('attn_proj_layers', 1),
-                                            proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
-                                            dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate))
-        self.__proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
-                                bias=ffn_bias, device=self.device, dtype=self.dtype)
-    @override
-    def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
-        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-        x = torch.cat([self.self_attention(x), x], dim=-1)
-        x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
-        return super().forward(self.__proj(x), state)

deeplotx-0.6.1/deeplotx/util/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- from .hash import md5, sha1
2	- from .read_file import read_file, get_files

{deeplotx-0.6.1 → deeplotx-0.8.1}/LICENSE RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/README.md RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/encoder.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/longformer_encoder.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/linear_regression.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/logistic_regression.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/softmax_regression.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/distribution.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/set.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/vector.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/trainer/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/trainer/base_trainer.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/util/hash.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/util/read_file.py RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/top_level.txt RENAMED Viewed

File without changes

{deeplotx-0.6.1 → deeplotx-0.8.1}/setup.cfg RENAMED Viewed

File without changes

deeplotx 0.6.1__tar.gz → 0.8.1__tar.gz

deeplotx 0.6.1tar.gz → 0.8.1tar.gz