PyPI - deeplotx - Versions diffs - 0.5.5__tar.gz → 0.6.1__tar.gz - Mend

deeplotx 0.5.5tar.gz → 0.6.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{deeplotx-0.5.5 → deeplotx-0.6.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.5.5
+Version: 0.6.1
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -163,6 +163,8 @@ Dynamic: license-file
     ```python
     from deeplotx import (
+        BaseNeuralNetwork,  # 深度神经网络基类
+        FeedForward,  # 前馈神经网络
         LinearRegression,  # 线性回归
         LogisticRegression,  # 逻辑回归 / 二分类 / 多标签分类
         SoftmaxRegression,  # Softmax 回归 / 多分类
@@ -181,38 +183,54 @@ Dynamic: license-file
     import torch
     from torch import nn
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-    class LinearRegression(BaseNeuralNetwork):
-        def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
-                     device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
-            self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
-            self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
-            self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
-            self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
-            self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
-            self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
-            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
+    class FeedForwardUnit(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self._dropout_rate = dropout_rate
+            self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+                                            device=self.device, dtype=self.dtype)
+            self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+                                        device=self.device, dtype=self.dtype)
         @override
-        def forward(self, x) -> torch.Tensor:
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-            fc1_out = self.parametric_relu_1(self.fc1(x))
-            x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_2(self.fc2(x))
-            x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_3(self.fc3(x))
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
-            x = self.fc5(x)
+            residual = x
+            x = self.layer_norm(x)
+            x = self.fc1(x)
+            x = self.parametric_relu_1(x)
+            if self._dropout_rate > .0:
+                x = torch.dropout(x, p=self._dropout_rate, train=self.training)
+            return self.fc2(x) + residual
+    class FeedForward(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            if num_layers < 1:
+                raise ValueError('num_layers cannot be less than 1.')
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
+                                                            expansion_factor=expansion_factor, bias=bias,
+                                                            dropout_rate=dropout_rate,
+                                                            device=self.device, dtype=self.dtype)] * num_layers)
+        @override
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+            for ffn in self.ffn_layers:
+                x = ffn(x)
             return x
     ```
@@ -222,29 +240,34 @@ Dynamic: license-file
     from typing_extensions import override
     import torch
-    from torch import nn, softmax
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+    from deeplotx.nn.feed_forward import FeedForward
     class SelfAttention(BaseNeuralNetwork):
-        def __init__(self, feature_dim: int, model_name: str | None = None,
-                    device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
+        def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
+                    proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                            device=device, dtype=dtype)
             self._feature_dim = feature_dim
-            self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
+            self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
         def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
             q, k = self.q_proj(x), self.k_proj(x)
             attn = torch.matmul(q, k.transpose(-2, -1))
             attn = attn / (self._feature_dim ** 0.5)
             attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-            return softmax(attn, dim=-1)
+            return torch.softmax(attn, dim=-1)
         @override
         def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -265,7 +288,8 @@ Dynamic: license-file
     long_text_encoder = LongTextEncoder(
         max_length=2048,  # 最大文本大小, 超出截断
         chunk_size=448,  # 块大小 (按 Token 计)
-        overlapping=32  # 块间重叠大小 (按 Token 计)
+        overlapping=32,  # 块间重叠大小 (按 Token 计)
+        cache_capacity=512  # 缓存大小
     )
     trainer = TextBinaryClassifierTrainer(

{deeplotx-0.5.5 → deeplotx-0.6.1}/README.md RENAMED Viewed

@@ -145,6 +145,8 @@
     ```python
     from deeplotx import (
+        BaseNeuralNetwork,  # 深度神经网络基类
+        FeedForward,  # 前馈神经网络
         LinearRegression,  # 线性回归
         LogisticRegression,  # 逻辑回归 / 二分类 / 多标签分类
         SoftmaxRegression,  # Softmax 回归 / 多分类
@@ -163,38 +165,54 @@
     import torch
     from torch import nn
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-    class LinearRegression(BaseNeuralNetwork):
-        def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
-                     device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
-            self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
-            self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
-            self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
-            self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
-            self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
-            self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
-            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
+    class FeedForwardUnit(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self._dropout_rate = dropout_rate
+            self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+                                            device=self.device, dtype=self.dtype)
+            self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+                                        device=self.device, dtype=self.dtype)
         @override
-        def forward(self, x) -> torch.Tensor:
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-            fc1_out = self.parametric_relu_1(self.fc1(x))
-            x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_2(self.fc2(x))
-            x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_3(self.fc3(x))
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
-            x = self.fc5(x)
+            residual = x
+            x = self.layer_norm(x)
+            x = self.fc1(x)
+            x = self.parametric_relu_1(x)
+            if self._dropout_rate > .0:
+                x = torch.dropout(x, p=self._dropout_rate, train=self.training)
+            return self.fc2(x) + residual
+    class FeedForward(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            if num_layers < 1:
+                raise ValueError('num_layers cannot be less than 1.')
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
+                                                            expansion_factor=expansion_factor, bias=bias,
+                                                            dropout_rate=dropout_rate,
+                                                            device=self.device, dtype=self.dtype)] * num_layers)
+        @override
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+            for ffn in self.ffn_layers:
+                x = ffn(x)
             return x
     ```
@@ -204,29 +222,34 @@
     from typing_extensions import override
     import torch
-    from torch import nn, softmax
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+    from deeplotx.nn.feed_forward import FeedForward
     class SelfAttention(BaseNeuralNetwork):
-        def __init__(self, feature_dim: int, model_name: str | None = None,
-                    device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
+        def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
+                    proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                            device=device, dtype=dtype)
             self._feature_dim = feature_dim
-            self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
+            self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
         def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
             q, k = self.q_proj(x), self.k_proj(x)
             attn = torch.matmul(q, k.transpose(-2, -1))
             attn = attn / (self._feature_dim ** 0.5)
             attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-            return softmax(attn, dim=-1)
+            return torch.softmax(attn, dim=-1)
         @override
         def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -247,7 +270,8 @@
     long_text_encoder = LongTextEncoder(
         max_length=2048,  # 最大文本大小, 超出截断
         chunk_size=448,  # 块大小 (按 Token 计)
-        overlapping=32  # 块间重叠大小 (按 Token 计)
+        overlapping=32,  # 块间重叠大小 (按 Token 计)
+        cache_capacity=512  # 缓存大小
     )
     trainer = TextBinaryClassifierTrainer(

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/__init__.py RENAMED Viewed

@@ -5,6 +5,8 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
 from .encoder import Encoder, LongTextEncoder, LongformerEncoder
 from .nn import (
+    BaseNeuralNetwork,
+    FeedForward,
     LinearRegression,
     LogisticRegression,
     SoftmaxRegression,

deeplotx-0.6.1/deeplotx/encoder/encoder.py ADDED Viewed

@@ -0,0 +1,66 @@
+import logging
+import os
+import math
+from requests.exceptions import ConnectTimeout, SSLError
+import torch
+from torch import nn
+from transformers import AutoTokenizer, AutoModel
+from deeplotx import __ROOT__
+CACHE_PATH = os.path.join(__ROOT__, '.cache')
+DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
+logger = logging.getLogger('deeplotx.embedding')
+class Encoder(nn.Module):
+    def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
+        super().__init__()
+        self.device = torch.device(device) if device is not None \
+            else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True).to(self.device)
+        except ConnectTimeout:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True, local_files_only=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True, local_files_only=True).to(self.device)
+        except SSLError:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True, local_files_only=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True, local_files_only=True).to(self.device)
+        self.embed_dim = self.encoder.config.max_position_embeddings
+        logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
+            return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
+        num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
+        chunks = chunk_results = []
+        for i in range(num_chunks):
+            start_idx = i * self.embed_dim
+            end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
+            chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
+        ori_mode = self.encoder.training
+        self.encoder.eval()
+        with torch.no_grad():
+            chunk_results = [_encoder(x) for x in chunks]
+        self.encoder.train(mode=ori_mode)
+        return torch.cat(chunk_results, dim=-1)
+    def encode(self, text: str) -> torch.Tensor:
+        _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
+        _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
+        return self.forward(_input_ids, _att_mask).squeeze()

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/encoder/long_text_encoder.py RENAMED Viewed

@@ -15,16 +15,21 @@ logger = logging.getLogger('deeplotx.embedding')
 class LongTextEncoder(Encoder):
     def __init__(self, max_length: int, chunk_size: int = 448,
                  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
-                 cache_capacity: int = 64, device: str | None = None):
+                 cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
         super().__init__(model_name_or_path=model_name_or_path, device=device)
         self._max_length = max_length
         self._chunk_size = chunk_size
         self._overlapping = overlapping
         self._cache = LRUCache(capacity=cache_capacity)
+        self._worker_group = ThreadPool(max_workers=max_workers)
     def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
         return idx, super().forward(x, attention_mask=mask)
+    @override
+    def forward(self, text: str, flatten: bool = False, *args, **kwargs) -> torch.Tensor:
+        return self.encode(text=text, flatten=flatten)
     @override
     def encode(self, text: str, flatten: bool = False) -> torch.Tensor:
         def postprocess(tensors: list[torch.Tensor], _flatten: bool) -> torch.Tensor:
@@ -59,7 +64,7 @@ class LongTextEncoder(Encoder):
             _tmp_right = (i + 1) * self._chunk_size + self._overlapping
             chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
                            torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
-        embeddings = list(ThreadPool(max_workers=min(num_chunks + 1, 8)).map(self.__chunk_embedding, chunks))
+        embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
         embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
         fin_embedding = [x[1] for x in embeddings]
         # write cache

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/__init__.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from .base_neural_network import BaseNeuralNetwork
+from .feed_forward import FeedForward
 from .linear_regression import LinearRegression
 from .logistic_regression import LogisticRegression
 from .softmax_regression import SoftmaxRegression

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/auto_regression.py RENAMED Viewed

@@ -5,8 +5,11 @@ from deeplotx.nn import RecursiveSequential
 class AutoRegression(RecursiveSequential):
     def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, model_name: str | None = None,
+                 recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
+                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
                  device: str | None = None, dtype: torch.dtype | None = None):
         super().__init__(input_dim=feature_dim, output_dim=feature_dim,
                          hidden_dim=hidden_dim, recursive_layers=recursive_layers,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
+                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
                          model_name=model_name, device=device, dtype=dtype)

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/base_neural_network.py RENAMED Viewed

@@ -8,7 +8,8 @@ DEFAULT_SUFFIX = 'dlx'
 class BaseNeuralNetwork(nn.Module):
-    def __init__(self, model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+    def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None):
         super().__init__()
         self._model_name = model_name \
             if model_name is not None \
@@ -16,6 +17,16 @@ class BaseNeuralNetwork(nn.Module):
         self.device = torch.device(device) if device is not None \
             else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.dtype = dtype if dtype is not None else torch.float32
+        self._in_features = in_features
+        self._out_features = out_features
+    @property
+    def in_features(self) -> int:
+        return self._in_features
+    @property
+    def out_features(self) -> int:
+        return self._out_features
     @staticmethod
     def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:

deeplotx-0.6.1/deeplotx/nn/feed_forward.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+class FeedForwardUnit(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
+                 bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+        self._dropout_rate = dropout_rate
+        self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
+                             device=self.device, dtype=self.dtype)
+        self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
+                             device=self.device, dtype=self.dtype)
+        self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+                                          device=self.device, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+                                       device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        residual = x
+        x = self.layer_norm(x)
+        x = self.fc1(x)
+        x = self.parametric_relu_1(x)
+        if self._dropout_rate > .0:
+            x = torch.dropout(x, p=self._dropout_rate, train=self.training)
+        return self.fc2(x) + residual
+class FeedForward(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
+                 bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None):
+        if num_layers < 1:
+            raise ValueError('num_layers cannot be less than 1.')
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+        self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
+                                                         expansion_factor=expansion_factor, bias=bias,
+                                                         dropout_rate=dropout_rate,
+                                                         device=self.device, dtype=self.dtype)] * num_layers)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        for ffn in self.ffn_layers:
+            x = ffn(x)
+        return x

deeplotx-0.6.1/deeplotx/nn/linear_regression.py ADDED Viewed

@@ -0,0 +1,25 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
+class LinearRegression(BaseNeuralNetwork):
+    def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
+                 expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
+        self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
+                               bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
+                              bias=bias, device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        residual = x
+        x = self.ffn(x) + residual
+        return self.proj(x)

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/logistic_regression.py RENAMED Viewed

@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
 class LogisticRegression(LinearRegression):
-    def __init__(self, input_dim: int, output_dim: int = 1, model_name: str | None = None,
+    def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
+                 bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
                  device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
+        super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
+                         expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype)
     @override
     def forward(self, x: torch.Tensor) -> torch.Tensor:

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/long_context_auto_regression.py RENAMED Viewed

@@ -5,8 +5,11 @@ from deeplotx.nn import LongContextRecursiveSequential
 class LongContextAutoRegression(LongContextRecursiveSequential):
     def __init__(self, feature_dim: int, hidden_dim: int | None = None,
-                 recursive_layers: int = 2, model_name: str | None = None,
+                 recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
+                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,  model_name: str | None = None,
                  device: str | None = None, dtype: torch.dtype | None = None):
         super().__init__(input_dim=feature_dim, output_dim=feature_dim,
                          hidden_dim=hidden_dim, recursive_layers=recursive_layers,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
+                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
                          model_name=model_name, device=device, dtype=dtype)

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/long_context_recursive_sequential.py RENAMED Viewed

@@ -10,19 +10,25 @@ from deeplotx.nn.self_attention import SelfAttention
 class LongContextRecursiveSequential(RecursiveSequential):
     def __init__(self, input_dim: int, output_dim: int,
                  hidden_dim: int | None = None, recursive_layers: int = 2,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
+                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
                  model_name: str | None = None, device: str | None = None,
-                 dtype: torch.dtype | None = None):
+                 dtype: torch.dtype | None = None, **kwargs):
         super().__init__(input_dim=input_dim, output_dim=output_dim,
                          hidden_dim=hidden_dim, recursive_layers=recursive_layers,
+                         ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
+                         ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
                          model_name=model_name, device=device, dtype=dtype)
-        self._feature_dim = input_dim
-        self.self_attention = SelfAttention(feature_dim=input_dim)
-        self.proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
-                              bias=True, device=self.device, dtype=self.dtype)
+        self.self_attention = SelfAttention(feature_dim=input_dim, bias=kwargs.get('attn_proj_bias', ffn_bias),
+                                            proj_layers=kwargs.get('attn_proj_layers', 1),
+                                            proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
+                                            dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate))
+        self.__proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
+                                bias=ffn_bias, device=self.device, dtype=self.dtype)
     @override
     def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
         x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
         x = torch.cat([self.self_attention(x), x], dim=-1)
         x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
-        return super().forward(self.proj(x), state)
+        return super().forward(self.__proj(x), state)

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/recursive_sequential.py RENAMED Viewed

@@ -4,23 +4,27 @@ import torch
 from torch import nn
 from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-from deeplotx.nn import LinearRegression
+from deeplotx.nn.feed_forward import FeedForward
 class RecursiveSequential(BaseNeuralNetwork):
     def __init__(self, input_dim: int, output_dim: int,
                  hidden_dim: int | None = None, recursive_layers: int = 2,
+                 ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
+                 ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
                  model_name: str | None = None, device: str | None = None,
                  dtype: torch.dtype | None = None):
-        super().__init__(model_name=model_name, device=device, dtype=dtype)
+        super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
         if hidden_dim is None:
             hidden_dim = input_dim
         self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
                             num_layers=recursive_layers, batch_first=True,
                             bias=True, bidirectional=True, device=self.device,
                             dtype=self.dtype)
-        self.regressive_head = LinearRegression(input_dim=hidden_dim * 2, output_dim=output_dim,
-                                                device=self.device, dtype=self.dtype)
+        self.ffn = FeedForward(feature_dim=hidden_dim * 2, num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
+                               bias=ffn_bias, dropout_rate=ffn_dropout_rate, device=self.device, dtype=self.dtype)
+        self.__proj = nn.Linear(in_features=hidden_dim * 2, out_features=output_dim, bias=ffn_bias,
+                                device=self.device, dtype=self.dtype)
     def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
         zeros = torch.zeros(self.lstm.num_layers * 2, batch_size, self.lstm.hidden_size, device=self.device, dtype=self.dtype)
@@ -32,7 +36,10 @@ class RecursiveSequential(BaseNeuralNetwork):
         state = (self.ensure_device_and_dtype(state[0], device=self.device, dtype=self.dtype),
                  self.ensure_device_and_dtype(state[1], device=self.device, dtype=self.dtype))
         x, (hidden_state, cell_state) = self.lstm(x, state)
-        x = self.regressive_head(x[:, -1, :])
+        x = x[:, -1, :]
+        residual = x
+        x = self.ffn(x) + residual
+        x = self.__proj(x)
         return x, (hidden_state, cell_state)
     @override

deeplotx-0.6.1/deeplotx/nn/self_attention.py ADDED Viewed

@@ -0,0 +1,39 @@
+from typing_extensions import override
+import torch
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
+class SelfAttention(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
+                 proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        self._feature_dim = feature_dim
+        self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                  expansion_factor=proj_expansion_factor,
+                                  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                  expansion_factor=proj_expansion_factor,
+                                  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                  expansion_factor=proj_expansion_factor,
+                                  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+    def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        q, k = self.q_proj(x), self.k_proj(x)
+        attn = torch.matmul(q, k.transpose(-2, -1))
+        attn = attn / (self._feature_dim ** 0.5)
+        attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
+        return torch.softmax(attn, dim=-1)
+    @override
+    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        if mask is not None:
+            mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
+        v = self.v_proj(x)
+        return torch.matmul(self._attention(x, mask), v)

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/softmax_regression.py RENAMED Viewed

@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
 class SoftmaxRegression(LinearRegression):
-    def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
+    def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
+                 bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
                  device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
+        super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
+                         expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
+                         model_name=model_name, device=device, dtype=dtype)
     @override
     def forward(self, x: torch.Tensor) -> torch.Tensor:

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/trainer/text_binary_classification_trainer.py RENAMED Viewed

@@ -25,13 +25,13 @@ class TextBinaryClassifierTrainer(BaseTrainer):
               num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
               train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
               alpha: float = 1e-4, rho: float = 0.2,
-              hidden_dim: int = 256, recursive_layers: int = 2) -> LongContextRecursiveSequential:
+              hidden_dim: int = 256, recursive_layers: int = 2, **kwargs) -> LongContextRecursiveSequential:
         if balancing_dataset:
             min_length = min(len(positive_texts), len(negative_texts))
             positive_texts = positive_texts[:min_length]
             negative_texts = negative_texts[:min_length]
         all_texts = positive_texts + negative_texts
-        text_embeddings = [self._long_text_encoder.encode(x, flatten=False, use_cache=True) for x in all_texts]
+        text_embeddings = [self._long_text_encoder.encode(x, flatten=False) for x in all_texts]
         feature_dim = text_embeddings[0].shape[-1]
         dtype = text_embeddings[0].dtype
         labels = ([torch.tensor([1.], dtype=dtype, device=self.device) for _ in range(len(positive_texts))]
@@ -44,15 +44,27 @@ class TextBinaryClassifierTrainer(BaseTrainer):
         valid_dataset = TensorDataset(inputs[train_size:], labels[train_size:])
         self.train_dataset_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
         self.valid_dataset_loader = DataLoader(valid_dataset, batch_size=self._batch_size, shuffle=True)
-        if self.model is not None and self.model.fc1.in_features != feature_dim:
+        if self.model is not None and self.model.in_features != feature_dim:
             logger.warning("The dimension of features doesn't match. A new model instance will be created.")
             self.model = None
         if self.model is None:
+            ffn_layers = kwargs.get('ffn_layers', 5)
+            ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
+            ffn_bias = kwargs.get('ffn_bias', True)
+            ffn_dropout_rate = kwargs.get('ffn_dropout_rate', 0.1)
             self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
                                                         hidden_dim=hidden_dim,
                                                         recursive_layers=recursive_layers,
+                                                        ffn_layers=ffn_layers,
+                                                        ffn_expansion_factor=ffn_expansion_factor,
+                                                        ffn_bias=ffn_bias,
+                                                        ffn_dropout_rate=ffn_dropout_rate,
+                                                        attn_proj_layers=kwargs.get('attn_proj_layers', ffn_layers),
+                                                        attn_proj_bias=kwargs.get('attn_proj_bias', ffn_bias),
+                                                        attn_proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
+                                                        attn_proj_dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate),
                                                         device=self.device, dtype=dtype)
+        logger.debug(f'Training Model: {self.model}')
         loss_function = nn.BCELoss()
         optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
         for epoch in range(num_epochs):

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.5.5
+Version: 0.6.1
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -163,6 +163,8 @@ Dynamic: license-file
     ```python
     from deeplotx import (
+        BaseNeuralNetwork,  # 深度神经网络基类
+        FeedForward,  # 前馈神经网络
         LinearRegression,  # 线性回归
         LogisticRegression,  # 逻辑回归 / 二分类 / 多标签分类
         SoftmaxRegression,  # Softmax 回归 / 多分类
@@ -181,38 +183,54 @@ Dynamic: license-file
     import torch
     from torch import nn
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-    class LinearRegression(BaseNeuralNetwork):
-        def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
-                     device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
-            self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
-            self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
-            self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
-            self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
-            self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
-            self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
-            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-            self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
+    class FeedForwardUnit(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self._dropout_rate = dropout_rate
+            self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
+                                device=self.device, dtype=self.dtype)
+            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+                                            device=self.device, dtype=self.dtype)
+            self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+                                        device=self.device, dtype=self.dtype)
         @override
-        def forward(self, x) -> torch.Tensor:
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-            fc1_out = self.parametric_relu_1(self.fc1(x))
-            x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_2(self.fc2(x))
-            x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_3(self.fc3(x))
-            x = torch.dropout(x, p=0.2, train=self.training)
-            x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
-            x = self.fc5(x)
+            residual = x
+            x = self.layer_norm(x)
+            x = self.fc1(x)
+            x = self.parametric_relu_1(x)
+            if self._dropout_rate > .0:
+                x = torch.dropout(x, p=self._dropout_rate, train=self.training)
+            return self.fc2(x) + residual
+    class FeedForward(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
+                    bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                    device: str | None = None, dtype: torch.dtype | None = None):
+            if num_layers < 1:
+                raise ValueError('num_layers cannot be less than 1.')
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
+            self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
+                                                            expansion_factor=expansion_factor, bias=bias,
+                                                            dropout_rate=dropout_rate,
+                                                            device=self.device, dtype=self.dtype)] * num_layers)
+        @override
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+            for ffn in self.ffn_layers:
+                x = ffn(x)
             return x
     ```
@@ -222,29 +240,34 @@ Dynamic: license-file
     from typing_extensions import override
     import torch
-    from torch import nn, softmax
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+    from deeplotx.nn.feed_forward import FeedForward
     class SelfAttention(BaseNeuralNetwork):
-        def __init__(self, feature_dim: int, model_name: str | None = None,
-                    device: str | None = None, dtype: torch.dtype | None = None):
-            super().__init__(model_name=model_name, device=device, dtype=dtype)
+        def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
+                    proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+            super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                            device=device, dtype=dtype)
             self._feature_dim = feature_dim
-            self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
-            self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                    bias=True, device=self.device, dtype=self.dtype)
+            self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
+                                    expansion_factor=proj_expansion_factor,
+                                    bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
         def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
             q, k = self.q_proj(x), self.k_proj(x)
             attn = torch.matmul(q, k.transpose(-2, -1))
             attn = attn / (self._feature_dim ** 0.5)
             attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-            return softmax(attn, dim=-1)
+            return torch.softmax(attn, dim=-1)
         @override
         def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -265,7 +288,8 @@ Dynamic: license-file
     long_text_encoder = LongTextEncoder(
         max_length=2048,  # 最大文本大小, 超出截断
         chunk_size=448,  # 块大小 (按 Token 计)
-        overlapping=32  # 块间重叠大小 (按 Token 计)
+        overlapping=32,  # 块间重叠大小 (按 Token 计)
+        cache_capacity=512  # 缓存大小
     )
     trainer = TextBinaryClassifierTrainer(

{deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/SOURCES.txt RENAMED Viewed

@@ -14,6 +14,7 @@ deeplotx/encoder/longformer_encoder.py
 deeplotx/nn/__init__.py
 deeplotx/nn/auto_regression.py
 deeplotx/nn/base_neural_network.py
+deeplotx/nn/feed_forward.py
 deeplotx/nn/linear_regression.py
 deeplotx/nn/logistic_regression.py
 deeplotx/nn/long_context_auto_regression.py

{deeplotx-0.5.5 → deeplotx-0.6.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "deeplotx"
-version = "0.5.5"
+version = "0.6.1"
 description = "Easy-2-use long text NLP toolkit."
 readme = "README.md"
 requires-python = ">=3.10"

deeplotx-0.5.5/deeplotx/encoder/encoder.py DELETED Viewed

@@ -1,48 +0,0 @@
-import logging
-import os
-import math
-import torch
-from torch import nn
-from transformers import AutoTokenizer, AutoModel
-from deeplotx import __ROOT__
-CACHE_PATH = os.path.join(__ROOT__, '.cache')
-DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
-logger = logging.getLogger('deeplotx.embedding')
-class Encoder(nn.Module):
-    def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
-        super().__init__()
-        self.device = torch.device(device) if device is not None \
-            else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                       cache_dir=CACHE_PATH, _from_auto=True)
-        self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                 cache_dir=CACHE_PATH, _from_auto=True).to(self.device)
-        self.embed_dim = self.encoder.config.max_position_embeddings
-        logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
-    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
-        def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
-            return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
-        num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
-        chunks = chunk_results = []
-        for i in range(num_chunks):
-            start_idx = i * self.embed_dim
-            end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
-            chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
-        ori_mode = self.encoder.training
-        self.encoder.eval()
-        with torch.no_grad():
-            chunk_results = [_encoder(x) for x in chunks]
-        self.encoder.train(mode=ori_mode)
-        return torch.cat(chunk_results, dim=-1)
-    def encode(self, text: str) -> torch.Tensor:
-        _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
-        _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
-        return self.forward(_input_ids, _att_mask).squeeze()

deeplotx-0.5.5/deeplotx/nn/linear_regression.py DELETED Viewed

@@ -1,37 +0,0 @@
-from typing_extensions import override
-import torch
-from torch import nn
-from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-class LinearRegression(BaseNeuralNetwork):
-    def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(model_name=model_name, device=device, dtype=dtype)
-        self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
-        self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
-        self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
-        self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
-        self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
-        self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
-        self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-        self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-        self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-        self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
-    @override
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-        fc1_out = self.parametric_relu_1(self.fc1(x))
-        x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
-        x = torch.dropout(x, p=0.2, train=self.training)
-        x = self.parametric_relu_2(self.fc2(x))
-        x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
-        x = torch.dropout(x, p=0.2, train=self.training)
-        x = self.parametric_relu_3(self.fc3(x))
-        x = torch.dropout(x, p=0.2, train=self.training)
-        x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
-        x = self.fc5(x)
-        return x

deeplotx-0.5.5/deeplotx/nn/self_attention.py DELETED Viewed

@@ -1,34 +0,0 @@
-from typing_extensions import override
-import torch
-from torch import nn, softmax
-from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-class SelfAttention(BaseNeuralNetwork):
-    def __init__(self, feature_dim: int, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(model_name=model_name, device=device, dtype=dtype)
-        self._feature_dim = feature_dim
-        self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                bias=True, device=self.device, dtype=self.dtype)
-        self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                bias=True, device=self.device, dtype=self.dtype)
-        self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
-                                bias=True, device=self.device, dtype=self.dtype)
-    def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-        q, k = self.q_proj(x), self.k_proj(x)
-        attn = torch.matmul(q, k.transpose(-2, -1))
-        attn = attn / (self._feature_dim ** 0.5)
-        attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-        return softmax(attn, dim=-1)
-    @override
-    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
-        if mask is not None:
-            mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
-        v = self.v_proj(x)
-        return torch.matmul(self._attention(x, mask), v)