PyPI - deeplotx - Versions diffs - 0.5.6__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

deeplotx 0.5.6py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

deeplotx/__init__.py +5 -1
deeplotx/encoder/encoder.py +22 -4
deeplotx/encoder/long_text_encoder.py +4 -2
deeplotx/nn/__init__.py +6 -1
deeplotx/nn/attention.py +48 -0
deeplotx/nn/auto_regression.py +8 -6
deeplotx/nn/base_neural_network.py +73 -1
deeplotx/nn/feed_forward.py +53 -0
deeplotx/nn/linear_regression.py +12 -24
deeplotx/nn/logistic_regression.py +5 -2
deeplotx/nn/long_context_auto_regression.py +10 -6
deeplotx/nn/long_context_recursive_sequential.py +22 -14
deeplotx/nn/multi_head_attention.py +34 -0
deeplotx/nn/recursive_sequential.py +19 -12
deeplotx/nn/roformer_encoder.py +40 -0
deeplotx/nn/rope.py +41 -0
deeplotx/nn/softmax_regression.py +5 -2
deeplotx/trainer/text_binary_classification_trainer.py +24 -8
deeplotx/util/__init__.py +1 -1
{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/METADATA +65 -42
deeplotx-0.8.0.dist-info/RECORD +34 -0
deeplotx/nn/self_attention.py +0 -34
deeplotx-0.5.6.dist-info/RECORD +0 -30
{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/WHEEL +0 -0
{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/licenses/LICENSE +0 -0
{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/top_level.txt +0 -0

deeplotx/__init__.py CHANGED Viewed

@@ -5,12 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
 from .encoder import Encoder, LongTextEncoder, LongformerEncoder
 from .nn import (
+    FeedForward,
     LinearRegression,
     LogisticRegression,
     SoftmaxRegression,
     RecursiveSequential,
     LongContextRecursiveSequential,
-    SelfAttention,
+    RoPE,
+    Attention,
+    MultiHeadAttention,
+    RoFormerEncoder,
     AutoRegression,
     LongContextAutoRegression
 )

deeplotx/encoder/encoder.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import logging
 import os
 import math
+from requests.exceptions import ConnectTimeout, SSLError
 import torch
 from torch import nn
@@ -18,10 +19,27 @@ class Encoder(nn.Module):
         super().__init__()
         self.device = torch.device(device) if device is not None \
             else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                       cache_dir=CACHE_PATH, _from_auto=True)
-        self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                 cache_dir=CACHE_PATH, _from_auto=True).to(self.device)
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True).to(self.device)
+        except ConnectTimeout:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True, local_files_only=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True, local_files_only=True).to(self.device)
+        except SSLError:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True, local_files_only=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True, local_files_only=True).to(self.device)
         self.embed_dim = self.encoder.config.max_position_embeddings
         logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')

deeplotx/encoder/long_text_encoder.py CHANGED Viewed

@@ -15,12 +15,14 @@ logger = logging.getLogger('deeplotx.embedding')
 class LongTextEncoder(Encoder):
     def __init__(self, max_length: int, chunk_size: int = 448,
                  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
-                 cache_capacity: int = 64, device: str | None = None):
+                 cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
         super().__init__(model_name_or_path=model_name_or_path, device=device)
+        assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
         self._max_length = max_length
         self._chunk_size = chunk_size
         self._overlapping = overlapping
         self._cache = LRUCache(capacity=cache_capacity)
+        self._worker_group = ThreadPool(max_workers=max_workers)
     def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
         return idx, super().forward(x, attention_mask=mask)
@@ -63,7 +65,7 @@ class LongTextEncoder(Encoder):
             _tmp_right = (i + 1) * self._chunk_size + self._overlapping
             chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
                            torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
-        embeddings = list(ThreadPool(max_workers=min(num_chunks + 1, 8)).map(self.__chunk_embedding, chunks))
+        embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
         embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
         fin_embedding = [x[1] for x in embeddings]
         # write cache

deeplotx/nn/__init__.py CHANGED Viewed

@@ -1,8 +1,13 @@
+from .base_neural_network import BaseNeuralNetwork
+from .feed_forward import FeedForward
 from .linear_regression import LinearRegression
 from .logistic_regression import LogisticRegression
 from .softmax_regression import SoftmaxRegression
 from .recursive_sequential import RecursiveSequential
 from .long_context_recursive_sequential import LongContextRecursiveSequential
-from .self_attention import SelfAttention
+from .rope import RoPE
+from .attention import Attention
+from .multi_head_attention import MultiHeadAttention
+from .roformer_encoder import RoFormerEncoder
 from .auto_regression import AutoRegression
 from .long_context_auto_regression import LongContextAutoRegression

deeplotx/nn/attention.py ADDED Viewed

@@ -0,0 +1,48 @@
+from typing_extensions import override
+import torch
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.rope import RoPE, DEFAULT_THETA
+class Attention(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
+                 proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        self._positional = positional
+        self._feature_dim = feature_dim
+        self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                  expansion_factor=proj_expansion_factor,
+                                  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                  expansion_factor=proj_expansion_factor,
+                                  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                  expansion_factor=proj_expansion_factor,
+                                  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        if self._positional:
+            self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
+                             device=self.device, dtype=self.dtype)
+    def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        q, k = self.q_proj(x), self.k_proj(y)
+        if self._positional:
+            q, k = self.rope(q), self.rope(k)
+        attn = torch.matmul(q, k.transpose(-2, -1))
+        attn = attn / (self._feature_dim ** 0.5)
+        attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
+        return torch.softmax(attn, dim=-1)
+    @override
+    def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
+        if mask is not None:
+            mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
+        v = self.v_proj(y)
+        return torch.matmul(self._attention(x, y, mask), v)

deeplotx/nn/auto_regression.py CHANGED Viewed

@@ -4,9 +4,11 @@ from deeplotx.nn import RecursiveSequential
 class AutoRegression(RecursiveSequential):
-    def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=feature_dim, output_dim=feature_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
-                         model_name=model_name, device=device, dtype=dtype)
+    def __init__(self, feature_dim: int, bias: bool = True,
+                 recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
+                         dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)

deeplotx/nn/base_neural_network.py CHANGED Viewed

@@ -3,12 +3,14 @@ from abc import abstractmethod
 import torch
 from torch import nn
+from torch.nn import init
 DEFAULT_SUFFIX = 'dlx'
 class BaseNeuralNetwork(nn.Module):
-    def __init__(self, model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+    def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None):
         super().__init__()
         self._model_name = model_name \
             if model_name is not None \
@@ -16,6 +18,16 @@ class BaseNeuralNetwork(nn.Module):
         self.device = torch.device(device) if device is not None \
             else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.dtype = dtype if dtype is not None else torch.float32
+        self._in_features = in_features
+        self._out_features = out_features
+    @property
+    def in_features(self) -> int:
+        return self._in_features
+    @property
+    def out_features(self) -> int:
+        return self._out_features
     @staticmethod
     def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
@@ -25,6 +37,44 @@ class BaseNeuralNetwork(nn.Module):
             x = x.to(dtype)
         return x
+    def initialize_weights(self):
+        for m in self.modules():
+            match m.__class__:
+                case nn.Linear:
+                    init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
+                    if m.bias is not None:
+                        init.constant_(m.bias, 0)
+                case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
+                    init.constant_(m.weight, 1)
+                    init.constant_(m.bias, 0)
+                case nn.LSTM | nn.GRU:
+                    for name, param in m.named_parameters():
+                        _tmp_name = name.lower()
+                        if 'weight_ih' in _tmp_name:
+                            init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
+                        elif 'weight_hh' in _tmp_name:
+                            init.orthogonal_(param)
+                        elif 'bias' in _tmp_name:
+                            init.constant_(param, 0)
+                case _:
+                    pass
+        return self
+    def size(self) -> dict:
+        total_params = trainable_params = non_trainable_params = 0
+        for param in self.parameters():
+            params = param.numel()
+            total_params += params
+            if param.requires_grad:
+                trainable_params += params
+            else:
+                non_trainable_params += params
+        return {
+            'total': total_params,
+            'trainable': trainable_params,
+            'non_trainable': non_trainable_params
+        }
     def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
         def _l1() -> torch.Tensor:
             l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
@@ -66,3 +116,25 @@ class BaseNeuralNetwork(nn.Module):
         model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
         self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
         return self
+    def __str__(self):
+        formatted = super().__str__()
+        _line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
+        _splitter_1 = '=' * (_line_len + 10)
+        _splitter_2 = '-' * (_line_len + 10)
+        _size = self.size()
+        total_param = _size['total']
+        trainable_param = _size['trainable']
+        non_trainable_param = _size['non_trainable']
+        formatted = (f'{_splitter_1}\n'
+                     f'Model_Name: {self._model_name}\n'
+                     f'In_Features: {self.in_features}\n'
+                     f'Out_Features: {self.out_features}\n'
+                     f'Device: {self.device}\n'
+                     f'Dtype: {self.dtype}\n'
+                     f'Total_Parameters: {total_param}\n'
+                     f'Trainable_Parameters: {trainable_param}\n'
+                     f'NonTrainable_Parameters: {non_trainable_param}\n'
+                     f'{_splitter_2}'
+                     f'\n{formatted}\n{_splitter_1}')
+        return formatted

deeplotx/nn/feed_forward.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+class FeedForwardUnit(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
+                 bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+        self._dropout_rate = dropout_rate
+        self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
+                                 bias=bias, device=self.device, dtype=self.dtype)
+        self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
+                                   bias=bias, device=self.device, dtype=self.dtype)
+        self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
+                                        device=self.device, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
+                                       device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        residual = x
+        x = self.layer_norm(x)
+        x = self.up_proj(x)
+        x = self.parametric_relu(x)
+        if self._dropout_rate > .0:
+            x = torch.dropout(x, p=self._dropout_rate, train=self.training)
+        return self.down_proj(x) + residual
+class FeedForward(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
+                 bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None):
+        if num_layers < 1:
+            raise ValueError('num_layers cannot be less than 1.')
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+        self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
+                                                         expansion_factor=expansion_factor, bias=bias,
+                                                         dropout_rate=dropout_rate,
+                                                         device=self.device, dtype=self.dtype) for _ in range(num_layers)])
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        for ffn in self.ffn_layers:
+            x = ffn(x)
+        return x

deeplotx/nn/linear_regression.py CHANGED Viewed

@@ -4,34 +4,22 @@ import torch
 from torch import nn
 from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
 class LinearRegression(BaseNeuralNetwork):
-    def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(model_name=model_name, device=device, dtype=dtype)
-        self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
-        self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
-        self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
-        self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
-        self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
-        self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
-        self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-        self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-        self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-        self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
+    def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
+                 expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
+        self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
+                               bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
+                              bias=bias, device=self.device, dtype=self.dtype)
     @override
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-        fc1_out = self.parametric_relu_1(self.fc1(x))
-        x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
-        x = torch.dropout(x, p=0.2, train=self.training)
-        x = self.parametric_relu_2(self.fc2(x))
-        x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
-        x = torch.dropout(x, p=0.2, train=self.training)
-        x = self.parametric_relu_3(self.fc3(x))
-        x = torch.dropout(x, p=0.2, train=self.training)
-        x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
-        x = self.fc5(x)
-        return x
+        residual = x
+        x = self.ffn(x) + residual
+        return self.proj(x)

deeplotx/nn/logistic_regression.py CHANGED Viewed

@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
 class LogisticRegression(LinearRegression):
-    def __init__(self, input_dim: int, output_dim: int = 1, model_name: str | None = None,
+    def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
+                 bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
                  device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
+        super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
+                         expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype)
     @override
     def forward(self, x: torch.Tensor) -> torch.Tensor:

deeplotx/nn/long_context_auto_regression.py CHANGED Viewed

@@ -4,9 +4,13 @@ from deeplotx.nn import LongContextRecursiveSequential
 class LongContextAutoRegression(LongContextRecursiveSequential):
-    def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=feature_dim, output_dim=feature_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
-                         model_name=model_name, device=device, dtype=dtype)
+    def __init__(self, feature_dim: int, bias: bool = True,
+                 encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
+                         encoder_layers=encoder_layers, attn_heads=attn_heads,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype, **kwargs)

deeplotx/nn/long_context_recursive_sequential.py CHANGED Viewed

@@ -3,26 +3,34 @@ from typing_extensions import override
 import torch
 from torch import nn
+from deeplotx.nn.attention import DEFAULT_THETA
 from deeplotx.nn.recursive_sequential import RecursiveSequential
-from deeplotx.nn.self_attention import SelfAttention
+from deeplotx.nn.roformer_encoder import RoFormerEncoder
 class LongContextRecursiveSequential(RecursiveSequential):
-    def __init__(self, input_dim: int, output_dim: int,
-                 hidden_dim: int | None = None, recursive_layers: int = 2,
-                 model_name: str | None = None, device: str | None = None,
-                 dtype: torch.dtype | None = None):
-        super().__init__(input_dim=input_dim, output_dim=output_dim,
-                         hidden_dim=hidden_dim, recursive_layers=recursive_layers,
+    def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
+                 encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
+                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
                          model_name=model_name, device=device, dtype=dtype)
-        self._feature_dim = input_dim
-        self.self_attention = SelfAttention(feature_dim=input_dim)
-        self.proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
-                              bias=True, device=self.device, dtype=self.dtype)
+        self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
+                                                                ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
+                                                                ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
+                                                                dropout_rate=kwargs.get('encoder_dropout_rate', dropout_rate),
+                                                                attn_ffn_layers=kwargs.get('attn_ffn_layers', 1),
+                                                                attn_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
+                                                                attn_dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
+                                                                theta=kwargs.get('theta', DEFAULT_THETA),
+                                                                device=self.device, dtype=self.dtype) for _ in range(encoder_layers)])
     @override
     def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-        x = torch.cat([self.self_attention(x), x], dim=-1)
-        x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
-        return super().forward(self.proj(x), state)
+        for roformer_encoder in self.roformer_encoders:
+            x = roformer_encoder(x)
+        return super().forward(x, state)

deeplotx/nn/multi_head_attention.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.attention import Attention
+class MultiHeadAttention(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, num_heads: int = 1, bias: bool = True, positional: bool = True,
+                 proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                 **kwargs):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        self._num_heads = num_heads
+        self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
+                                     device=self.device, dtype=self.dtype)
+        self.attn_heads = nn.ModuleList([Attention(feature_dim=feature_dim, bias=bias, positional=positional,
+                                                   proj_layers=proj_layers, proj_expansion_factor=proj_expansion_factor,
+                                                   dropout_rate=dropout_rate, device=self.device, dtype=self.dtype,
+                                                   **kwargs) for _ in range(self._num_heads)])
+        self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
+                                  device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
+        x, y = self.expand_proj(x), self.expand_proj(y)
+        x_heads, y_heads = x.split(self.in_features, dim=-1), y.split(self.in_features, dim=-1)
+        head_outs = [self.attn_heads[_](x=x_heads[_], y=y_heads[_], mask=mask) for _ in range(self._num_heads)]
+        return self.out_proj(torch.concat(head_outs, dim=-1))

deeplotx/nn/recursive_sequential.py CHANGED Viewed

@@ -4,23 +4,27 @@ import torch
 from torch import nn
 from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-from deeplotx.nn import LinearRegression
+from deeplotx.nn.feed_forward import FeedForward
 class RecursiveSequential(BaseNeuralNetwork):
-    def __init__(self, input_dim: int, output_dim: int,
-                 hidden_dim: int | None = None, recursive_layers: int = 2,
-                 model_name: str | None = None, device: str | None = None,
-                 dtype: torch.dtype | None = None):
-        super().__init__(model_name=model_name, device=device, dtype=dtype)
-        if hidden_dim is None:
-            hidden_dim = input_dim
-        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
+    def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
+                 recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        if recursive_hidden_dim is None:
+            recursive_hidden_dim = input_dim
+        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=recursive_hidden_dim,
                             num_layers=recursive_layers, batch_first=True,
                             bias=True, bidirectional=True, device=self.device,
                             dtype=self.dtype)
-        self.regressive_head = LinearRegression(input_dim=hidden_dim * 2, output_dim=output_dim,
-                                                device=self.device, dtype=self.dtype)
+        self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
+                               expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
+                               device=self.device, dtype=self.dtype)
+        self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
+                                device=self.device, dtype=self.dtype)
     def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
         zeros = torch.zeros(self.lstm.num_layers * 2, batch_size, self.lstm.hidden_size, device=self.device, dtype=self.dtype)
@@ -32,7 +36,10 @@ class RecursiveSequential(BaseNeuralNetwork):
         state = (self.ensure_device_and_dtype(state[0], device=self.device, dtype=self.dtype),
                  self.ensure_device_and_dtype(state[1], device=self.device, dtype=self.dtype))
         x, (hidden_state, cell_state) = self.lstm(x, state)
-        x = self.regressive_head(x[:, -1, :])
+        x = x[:, -1, :]
+        residual = x
+        x = self.ffn(x) + residual
+        x = self.__proj(x)
         return x, (hidden_state, cell_state)
     @override

deeplotx/nn/roformer_encoder.py ADDED Viewed

@@ -0,0 +1,40 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.multi_head_attention import MultiHeadAttention
+class RoFormerEncoder(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, attn_heads: int = 2, bias: bool = True,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
+                 dropout_rate: float = 0.02, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
+        super().__init__(in_features=feature_dim, out_features=feature_dim,
+                         model_name=model_name, device=device, dtype=dtype)
+        self.attn = MultiHeadAttention(feature_dim=feature_dim, num_heads=attn_heads,
+                                       bias=bias, positional=True,
+                                       proj_layers=kwargs.get('attn_ffn_layers', 1),
+                                       proj_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
+                                       dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
+                                       device=self.device, dtype=self.dtype, **kwargs)
+        self.ffn = FeedForward(feature_dim=feature_dim * 2, num_layers=ffn_layers,
+                               expansion_factor=ffn_expansion_factor,
+                               bias=bias, dropout_rate=dropout_rate,
+                               device=self.device, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(normalized_shape=feature_dim, eps=1e-9,
+                                       device=self.device, dtype=self.dtype)
+        self.__proj = nn.Linear(in_features=feature_dim * 2, out_features=feature_dim,
+                                bias=bias, device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        if mask is not None:
+            mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
+        attn = self.attn(x=self.layer_norm(x), y=None, mask=mask)
+        x = torch.concat([attn, x], dim=-1)
+        return self.__proj(self.ffn(x))

deeplotx/nn/rope.py ADDED Viewed

@@ -0,0 +1,41 @@
+from typing_extensions import override
+import torch
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+DEFAULT_THETA = 10_000
+class RoPE(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, theta: int = DEFAULT_THETA,
+                 device: str | None = None, dtype: torch.dtype = torch.float32):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=None,
+                         device=device, dtype=dtype)
+        assert feature_dim % 2 == 0, f'feature_dim ({feature_dim}) is not divisible by 2.'
+        self._theta = theta
+        self._num_groups = feature_dim // 2
+        self._inv_freq = 1.0 / (theta ** (torch.arange(start=0, end=self._num_groups, step=1).float() / self._num_groups))
+        self.register_buffer('inv_freq', self._inv_freq)
+    @property
+    def dim(self):
+        return self._dim
+    @property
+    def theta(self):
+        return self._theta
+    def rotate_half(self, _t: torch.Tensor) -> torch.Tensor:
+        return torch.cat((- _t[..., self._num_groups:], _t[..., :self._num_groups]), dim=-1)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        *other_dims, seq_len, feature_dim = x.shape
+        assert feature_dim == self.in_features, f"feature_dim of x doesn't match with defined feature_dim {self.in_features}."
+        t = torch.arange(start=0, end=seq_len, step=1, device=self.device, dtype=self.dtype)
+        freq = torch.outer(t, self._inv_freq)
+        emb = torch.cat((freq, freq), dim=-1)
+        sin_emb, cos_emb = emb.sin(), emb.cos()
+        return x * cos_emb + self.rotate_half(x) * sin_emb

deeplotx/nn/softmax_regression.py CHANGED Viewed

@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
 class SoftmaxRegression(LinearRegression):
-    def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
+    def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
+                 bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
                  device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
+        super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
+                         expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype)
     @override
     def forward(self, x: torch.Tensor) -> torch.Tensor:

deeplotx/trainer/text_binary_classification_trainer.py CHANGED Viewed

@@ -6,6 +6,7 @@ from torch import nn, optim
 from torch.utils.data import DataLoader, TensorDataset
 from deeplotx.encoder.long_text_encoder import LongTextEncoder
+from deeplotx.nn.attention import DEFAULT_THETA
 from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
 from deeplotx.trainer.base_trainer import BaseTrainer
@@ -24,8 +25,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
     def train(self, positive_texts: list[str], negative_texts: list[str],
               num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
               train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
-              alpha: float = 1e-4, rho: float = 0.2,
-              hidden_dim: int = 256, recursive_layers: int = 2) -> LongContextRecursiveSequential:
+              alpha: float = 1e-4, rho: float = 0.2, encoder_layers: int = 4, attn_heads: int = 6,
+              recursive_layers: int = 2, recursive_hidden_dim: int = 256, **kwargs) -> LongContextRecursiveSequential:
         if balancing_dataset:
             min_length = min(len(positive_texts), len(negative_texts))
             positive_texts = positive_texts[:min_length]
@@ -44,15 +45,30 @@ class TextBinaryClassifierTrainer(BaseTrainer):
         valid_dataset = TensorDataset(inputs[train_size:], labels[train_size:])
         self.train_dataset_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
         self.valid_dataset_loader = DataLoader(valid_dataset, batch_size=self._batch_size, shuffle=True)
-        if self.model is not None and self.model.fc1.in_features != feature_dim:
+        if self.model is not None and self.model.in_features != feature_dim:
             logger.warning("The dimension of features doesn't match. A new model instance will be created.")
             self.model = None
         if self.model is None:
-            self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
-                                                        hidden_dim=hidden_dim,
-                                                        recursive_layers=recursive_layers,
-                                                        device=self.device, dtype=dtype)
+            ffn_layers = kwargs.get('ffn_layers', 5)
+            ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
+            bias = kwargs.get('bias', True)
+            dropout_rate = kwargs.get('dropout_rate', 0.1)
+            encoder_ffn_layers = kwargs.get('encoder_ffn_layers', ffn_layers)
+            encoder_expansion_factor = kwargs.get('encoder_expansion_factor', ffn_expansion_factor)
+            encoder_dropout_rate = kwargs.get('encoder_dropout_rate', dropout_rate)
+            attn_ffn_layers = kwargs.get('attn_ffn_layers', 1)
+            attn_expansion_factor = kwargs.get('attn_expansion_factor', ffn_expansion_factor)
+            attn_dropout_rate = kwargs.get('attn_dropout_rate', dropout_rate)
+            theta = kwargs.get('theta', DEFAULT_THETA)
+            self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
+                                                        encoder_layers=encoder_layers, attn_heads=attn_heads,
+                                                        recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
+                                                        ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
+                                                        encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
+                                                        encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
+                                                        attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
+                                                        theta=theta).initialize_weights()
+        logger.debug(f'Training Model: \n{self.model}')
         loss_function = nn.BCELoss()
         optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
         for epoch in range(num_epochs):

deeplotx/util/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
-from .hash import md5, sha1
+from .hash import md5, sha1, sha256, sha512
 from .read_file import read_file, get_files

{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.5.6
+Version: 0.8.0
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
 Requires-Dist: torch
 Requires-Dist: transformers
 Requires-Dist: typing-extensions
-Requires-Dist: vortezwohl>=0.0.6
+Requires-Dist: vortezwohl>=0.0.8
 Dynamic: license-file
 [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/vortezwohl/DeepLoTX)
@@ -163,6 +163,8 @@ Dynamic: license-file
     ```python
     from deeplotx import (
+        BaseNeuralNetwork,  # 深度神经网络基类
+        FeedForward,  # 前馈神经网络
         LinearRegression,  # 线性回归
         LogisticRegression,  # 逻辑回归 / 二分类 / 多标签分类
         SoftmaxRegression,  # Softmax 回归 / 多分类
@@ -181,38 +183,54 @@ Dynamic: license-file
     import torch
     from torch import nn
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-    class LinearRegression(BaseNeuralNetwork):
-        def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
-                     device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
-            self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
-            self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
-            self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
-            self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
-            self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
-            self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
-            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
+    class FeedForwardUnit(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self._dropout_rate = dropout_rate
+            self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+                                            device=self.device, dtype=self.dtype)
+            self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+                                        device=self.device, dtype=self.dtype)
         @override
-        def forward(self, x) -> torch.Tensor:
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-            fc1_out = self.parametric_relu_1(self.fc1(x))
-            x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_2(self.fc2(x))
-            x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_3(self.fc3(x))
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
-            x = self.fc5(x)
+            residual = x
+            x = self.layer_norm(x)
+            x = self.fc1(x)
+            x = self.parametric_relu_1(x)
+            if self._dropout_rate > .0:
+                x = torch.dropout(x, p=self._dropout_rate, train=self.training)
+            return self.fc2(x) + residual
+    class FeedForward(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            if num_layers < 1:
+                raise ValueError('num_layers cannot be less than 1.')
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
+                                                            expansion_factor=expansion_factor, bias=bias,
+                                                            dropout_rate=dropout_rate,
+                                                            device=self.device, dtype=self.dtype)] * num_layers)
+        @override
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+            for ffn in self.ffn_layers:
+                x = ffn(x)
             return x
     ```
@@ -222,29 +240,34 @@ Dynamic: license-file
     from typing_extensions import override
     import torch
-    from torch import nn, softmax
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+    from deeplotx.nn.feed_forward import FeedForward
     class SelfAttention(BaseNeuralNetwork):
-        def __init__(self, feature_dim: int, model_name: str | None = None,
-                    device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
+        def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
+                    proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                            device=device, dtype=dtype)
             self._feature_dim = feature_dim
-            self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
+            self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
         def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
             q, k = self.q_proj(x), self.k_proj(x)
             attn = torch.matmul(q, k.transpose(-2, -1))
             attn = attn / (self._feature_dim ** 0.5)
             attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-            return softmax(attn, dim=-1)
+            return torch.softmax(attn, dim=-1)
         @override
         def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:

deeplotx-0.8.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,34 @@
+deeplotx/__init__.py,sha256=oNeA-vNu5YGiEQg0IcpKEdGh_Y_2uPvo2nqaNL_Zgv8,1159
+deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
+deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
+deeplotx/encoder/long_text_encoder.py,sha256=PFR6jjGyg1N58TQlKsPaNQEd-EDl13Hyhu7A1KtGBbA,3743
+deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
+deeplotx/nn/__init__.py,sha256=01I_yqx9GTa4wy3uNyAqhtxp66tDqxgMLC4Ky5Vnkrg,651
+deeplotx/nn/attention.py,sha256=HZ4nfFtkk7AnJ9nuoDSK6zIlIhZ_hbpZc3o6JQIBqJ8,2861
+deeplotx/nn/auto_regression.py,sha256=uISx29t_zkDGS8s2wvGB6wOGYZitQ4hQ7wyoQl4lcqY,857
+deeplotx/nn/base_neural_network.py,sha256=FjQEDFH810fJS7JV3aLgJZnaMqC6DH--wlBvuj-ghTc,5900
+deeplotx/nn/feed_forward.py,sha256=4ozj7EDalO9pb6JUhZtsJqE0r8bIHFApHRt2zTrl4ho,2931
+deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
+deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
+deeplotx/nn/long_context_auto_regression.py,sha256=uy0k_g8wEfMH5nd5HCfrHA8dgEsuWBA2x8U-g3h4vQc,1054
+deeplotx/nn/long_context_recursive_sequential.py,sha256=i7kUml9RV_mkLRJ114UHsj9Gxw7LzJVQ4z8-REHa8-w,2682
+deeplotx/nn/multi_head_attention.py,sha256=3z73uGbvy3jszRy1B9nxGOJjlttHpcpRF8Qd09OEams,2267
+deeplotx/nn/recursive_sequential.py,sha256=8Z8vT70xTygusL-3w3QlB_B_k0xQSUU2ZTgC1LhEmzQ,2805
+deeplotx/nn/roformer_encoder.py,sha256=UJjKniNdMd0rfoYQcsX6bPo6Ceq_Z6EhwHe2kgqWC_k,2426
+deeplotx/nn/rope.py,sha256=r3hfENCxJv-td55L0CBfF8MkhEPd9V1vU_U6pDfCfr0,1754
+deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
+deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
+deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
+deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
+deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
+deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
+deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
+deeplotx/trainer/text_binary_classification_trainer.py,sha256=QMLR4cC8NCUP-v7SOYVtCykNwahENmWHv9adaeTbYmA,6528
+deeplotx/util/__init__.py,sha256=5CH4MTeSgsmCe3LPMfvKoSBpwh6jDSBuHVElJvzQzgs,90
+deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
+deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
+deeplotx-0.8.0.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
+deeplotx-0.8.0.dist-info/METADATA,sha256=KprDhH6R0zsqk6tPUoC9FpWeljaaJTaTsYm2Au0qQwY,12251
+deeplotx-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+deeplotx-0.8.0.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
+deeplotx-0.8.0.dist-info/RECORD,,

deeplotx/nn/self_attention.py DELETED Viewed

@@ -1,34 +0,0 @@
-from typing_extensions import override
-import torch
-from torch import nn, softmax
-from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-class SelfAttention(BaseNeuralNetwork):
-    def __init__(self, feature_dim: int, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(model_name=model_name, device=device, dtype=dtype)
-        self._feature_dim = feature_dim
-        self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                bias=True, device=self.device, dtype=self.dtype)
-        self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                bias=True, device=self.device, dtype=self.dtype)
-        self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                bias=True, device=self.device, dtype=self.dtype)
-    def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-        q, k = self.q_proj(x), self.k_proj(x)
-        attn = torch.matmul(q, k.transpose(-2, -1))
-        attn = attn / (self._feature_dim ** 0.5)
-        attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-        return softmax(attn, dim=-1)
-    @override
-    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-        if mask is not None:
-            mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
-        v = self.v_proj(x)
-        return torch.matmul(self._attention(x, mask), v)

deeplotx-0.5.6.dist-info/RECORD DELETED Viewed

@@ -1,30 +0,0 @@
-deeplotx/__init__.py,sha256=6El66QXHDrgNMsNIG9bG97WO8BhPK5btXbTikzx2ce4,1087
-deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
-deeplotx/encoder/encoder.py,sha256=2e1ZnZ37PkFQ5BePndmq42xmHp8YZh65Q1bd0dxejPI,2417
-deeplotx/encoder/long_text_encoder.py,sha256=4445FdVwubvDiebCWoT9wAUpYlMj6Mmd0OBxbFZ3ZIo,3565
-deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
-deeplotx/nn/__init__.py,sha256=CS0UwyYKa8wI6vu6FBIYxvm-HAmw39MTMFlZDtqi6UA,444
-deeplotx/nn/auto_regression.py,sha256=7P63opWCWMqE2DigwbsL6kfXtFtJPz00Yo1RqflBz4A,572
-deeplotx/nn/base_neural_network.py,sha256=o9s0NqxkDcFZdipX8UrlbBmwYHOg7wPmzbjBEeGw63s,2902
-deeplotx/nn/linear_regression.py,sha256=7TbbplBgY70b1l5lKvTJMzDWQ8khQfnRCyMjObhVdEc,2180
-deeplotx/nn/logistic_regression.py,sha256=YiSLAon8gLDtMXAkPQ210sauod24eyJYYH50fPhj6T8,667
-deeplotx/nn/long_context_auto_regression.py,sha256=Z67Enq1kc1bERIrQW4jHeDQQmisOXhhjrtaPklnHkyw,605
-deeplotx/nn/long_context_recursive_sequential.py,sha256=_fKpPA7wt6B0kPyyig4xuhmLxygK19FSLgxW1Xa453M,1487
-deeplotx/nn/recursive_sequential.py,sha256=8YHZ-IdLyMJN5QVWPMuizDxLodAE9Bgdg1_YtIxFw7o,2247
-deeplotx/nn/self_attention.py,sha256=fb34wXnfgAGYJEhqa1l9AxMa-AHcCTOLbUlAfaGIK7Q,1766
-deeplotx/nn/softmax_regression.py,sha256=BeVk0G2H3zKG6bsQgPRNWuTxnnNmVI2zFZtCHgARAAc,688
-deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
-deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
-deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
-deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
-deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
-deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
-deeplotx/trainer/text_binary_classification_trainer.py,sha256=umuvikc09Op4SB43EqmYo8W3ung8DBjEOrMG3hCVFz8,4915
-deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
-deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
-deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
-deeplotx-0.5.6.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
-deeplotx-0.5.6.dist-info/METADATA,sha256=vBUVgshgGG_vZmJT07C7CPEhMfBUmwbCtsIY06D_14g,10925
-deeplotx-0.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-deeplotx-0.5.6.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
-deeplotx-0.5.6.dist-info/RECORD,,

{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

deeplotx 0.5.6__py3-none-any.whl → 0.8.0__py3-none-any.whl

deeplotx 0.5.6py3-none-any.whl → 0.8.0py3-none-any.whl