PyPI - deeplotx - Versions diffs - 0.8.2__tar.gz → 0.8.5__tar.gz - Mend

deeplotx 0.8.2tar.gz → 0.8.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{deeplotx-0.8.2 → deeplotx-0.8.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.8.2
+Version: 0.8.5
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -48,24 +48,23 @@ Dynamic: license-file
 - ### 长文本嵌入
-    - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 通过 max_length 定义)
+    - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 可通过 max_length 限制长度)
         ```python
         from deeplotx import LongTextEncoder
-        # 最大文本长度为 2048 个 tokens, 块大小为 512 个 tokens, 块间重叠部分为 64 个 tokens.
+        # 块大小为 448 个 tokens, 块间重叠部分为 32 个 tokens.
         encoder = LongTextEncoder(
-            max_length=2048,
-            chunk_size=512,
-            overlapping=64
+            chunk_size=448,
+            overlapping=32
         )
-        # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并展平.
-        encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=True, use_cache=True)
+        # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并堆叠.
+        encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=False)
         ```
         输出:
         ```
-        tensor([ 0.5163,  0.2497,  0.5896,  ..., -0.9815, -0.3095,  0.4232])
+        tensor([ 2.2316e-01,  2.0300e-01,  ...,  1.5578e-01, -6.6735e-02])
         ```
     - **基于 Longformer 的长文本嵌入** (最大支持长度 4096 个 tokens)
@@ -77,6 +76,11 @@ Dynamic: license-file
         encoder.encode('我是吴子豪, 这是一个测试文本.')
         ```
+        输出:
+        ```
+        tensor([-2.7490e-02,  6.6503e-02, ..., -6.5937e-02,  6.7802e-03])
+        ```
 - ### 相似性计算
     - **基于向量的相似性**
@@ -163,14 +167,17 @@ Dynamic: license-file
     ```python
     from deeplotx import (
-        BaseNeuralNetwork,  # 深度神经网络基类
         FeedForward,  # 前馈神经网络
+        MultiHeadFeedForward,  # 多头前馈神经网络
         LinearRegression,  # 线性回归
         LogisticRegression,  # 逻辑回归 / 二分类 / 多标签分类
         SoftmaxRegression,  # Softmax 回归 / 多分类
         RecursiveSequential,  # 序列模型 / 循环神经网络
         LongContextRecursiveSequential,  # 长上下文序列模型 / 自注意力融合循环神经网络
-        SelfAttention,  # 自注意力模块
+        RoPE,  # RoPE 位置编码
+        Attention,  # 自注意力 / 交叉注意力
+        MultiHeadAttention,  # 并行多头注意力
+        RoFormerEncoder,  # Roformer (Transformer + RoPE) 编码器模型
         AutoRegression,  # 自回归模型 / 循环神经网络
         LongContextAutoRegression  # 长上下文自回归模型 / 自注意力融合循环神经网络
     )
@@ -193,13 +200,13 @@ Dynamic: license-file
                     device: str | None = None, dtype: torch.dtype | None = None):
             super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
             self._dropout_rate = dropout_rate
-            self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
-                                device=self.device, dtype=self.dtype)
-            self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
-                                device=self.device, dtype=self.dtype)
-            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+            self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
+                                    bias=bias, device=self.device, dtype=self.dtype)
+            self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
+                                    bias=bias, device=self.device, dtype=self.dtype)
+            self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
                                             device=self.device, dtype=self.dtype)
-            self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+            self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
                                         device=self.device, dtype=self.dtype)
         @override
@@ -207,11 +214,11 @@ Dynamic: license-file
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
             residual = x
             x = self.layer_norm(x)
-            x = self.fc1(x)
-            x = self.parametric_relu_1(x)
+            x = self.up_proj(x)
+            x = self.parametric_relu(x)
             if self._dropout_rate > .0:
                 x = torch.dropout(x, p=self._dropout_rate, train=self.training)
-            return self.fc2(x) + residual
+            return self.down_proj(x) + residual
     class FeedForward(BaseNeuralNetwork):
@@ -224,7 +231,7 @@ Dynamic: license-file
             self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
                                                             expansion_factor=expansion_factor, bias=bias,
                                                             dropout_rate=dropout_rate,
-                                                            device=self.device, dtype=self.dtype)] * num_layers)
+                                                            device=self.device, dtype=self.dtype) for _ in range(num_layers)])
         @override
         def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -234,7 +241,7 @@ Dynamic: license-file
             return x
     ```
-    自注意力模块:
+    注意力模块:
     ```python
     from typing_extensions import override
@@ -243,14 +250,17 @@ Dynamic: license-file
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
     from deeplotx.nn.feed_forward import FeedForward
+    from deeplotx.nn.rope import RoPE, DEFAULT_THETA
-    class SelfAttention(BaseNeuralNetwork):
-        def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
-                    proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
-                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+    class Attention(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
+                    proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                    **kwargs):
             super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
                             device=device, dtype=dtype)
+            self._positional = positional
             self._feature_dim = feature_dim
             self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                     expansion_factor=proj_expansion_factor,
@@ -261,21 +271,27 @@ Dynamic: license-file
             self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                     expansion_factor=proj_expansion_factor,
                                     bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            if self._positional:
+                self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
+                                device=self.device, dtype=self.dtype)
-        def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-            q, k = self.q_proj(x), self.k_proj(x)
+        def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+            q, k = self.q_proj(x), self.k_proj(y)
+            if self._positional:
+                q, k = self.rope(q), self.rope(k)
             attn = torch.matmul(q, k.transpose(-2, -1))
             attn = attn / (self._feature_dim ** 0.5)
             attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-            return torch.softmax(attn, dim=-1)
+            return torch.softmax(attn, dtype=self.dtype, dim=-1)
         @override
-        def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+            y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
             if mask is not None:
                 mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
-            v = self.v_proj(x)
-            return torch.matmul(self._attention(x, mask), v)
+            v = self.v_proj(y)
+            return torch.matmul(self._attention(x, y, mask), v)
     ```
 - ### 使用预定义训练器实现文本二分类任务
@@ -284,7 +300,7 @@ Dynamic: license-file
     from deeplotx import TextBinaryClassifierTrainer, LongTextEncoder
     from deeplotx.util import get_files, read_file
-    # 定义向量编码策略 (默认使用 bert-base-uncased 作为嵌入模型)
+    # 定义向量编码策略 (默认使用 FacebookAI/xlm-roberta-base 作为嵌入模型)
     long_text_encoder = LongTextEncoder(
         max_length=2048,  # 最大文本大小, 超出截断
         chunk_size=448,  # 块大小 (按 Token 计)
@@ -306,10 +322,11 @@ Dynamic: license-file
     # 开始训练
     model = trainer.train(pos_data, neg_data,
-                          num_epochs=36, learning_rate=2e-5,  # 设置训练轮数和学习率
-                          balancing_dataset=True,  # 是否平衡数据集
-                          alpha=1e-4, rho=.2,  # 设置 elastic net 正则化的超参数 alpha 和 rho
-                          hidden_dim=256, recursive_layers=2)  # 设置循环神经网络的结构
+                        num_epochs=36, learning_rate=2e-5,
+                        balancing_dataset=True, alpha=1e-4,
+                        rho=.2, encoder_layers=2,  # 2 层 Roformer 编码器
+                        attn_heads=8,  # 8 个注意力头
+                        recursive_layers=2)  # 2 层 Bi-LSTM
     # 保存模型权重
     model.save(model_name='test_model', model_dir='model')

{deeplotx-0.8.2 → deeplotx-0.8.5}/README.md RENAMED Viewed

@@ -30,24 +30,23 @@
 - ### 长文本嵌入
-    - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 通过 max_length 定义)
+    - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 可通过 max_length 限制长度)
         ```python
         from deeplotx import LongTextEncoder
-        # 最大文本长度为 2048 个 tokens, 块大小为 512 个 tokens, 块间重叠部分为 64 个 tokens.
+        # 块大小为 448 个 tokens, 块间重叠部分为 32 个 tokens.
         encoder = LongTextEncoder(
-            max_length=2048,
-            chunk_size=512,
-            overlapping=64
+            chunk_size=448,
+            overlapping=32
         )
-        # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并展平.
-        encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=True, use_cache=True)
+        # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并堆叠.
+        encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=False)
         ```
         输出:
         ```
-        tensor([ 0.5163,  0.2497,  0.5896,  ..., -0.9815, -0.3095,  0.4232])
+        tensor([ 2.2316e-01,  2.0300e-01,  ...,  1.5578e-01, -6.6735e-02])
         ```
     - **基于 Longformer 的长文本嵌入** (最大支持长度 4096 个 tokens)
@@ -59,6 +58,11 @@
         encoder.encode('我是吴子豪, 这是一个测试文本.')
         ```
+        输出:
+        ```
+        tensor([-2.7490e-02,  6.6503e-02, ..., -6.5937e-02,  6.7802e-03])
+        ```
 - ### 相似性计算
     - **基于向量的相似性**
@@ -145,14 +149,17 @@
     ```python
     from deeplotx import (
-        BaseNeuralNetwork,  # 深度神经网络基类
         FeedForward,  # 前馈神经网络
+        MultiHeadFeedForward,  # 多头前馈神经网络
         LinearRegression,  # 线性回归
         LogisticRegression,  # 逻辑回归 / 二分类 / 多标签分类
         SoftmaxRegression,  # Softmax 回归 / 多分类
         RecursiveSequential,  # 序列模型 / 循环神经网络
         LongContextRecursiveSequential,  # 长上下文序列模型 / 自注意力融合循环神经网络
-        SelfAttention,  # 自注意力模块
+        RoPE,  # RoPE 位置编码
+        Attention,  # 自注意力 / 交叉注意力
+        MultiHeadAttention,  # 并行多头注意力
+        RoFormerEncoder,  # Roformer (Transformer + RoPE) 编码器模型
         AutoRegression,  # 自回归模型 / 循环神经网络
         LongContextAutoRegression  # 长上下文自回归模型 / 自注意力融合循环神经网络
     )
@@ -175,13 +182,13 @@
                     device: str | None = None, dtype: torch.dtype | None = None):
             super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
             self._dropout_rate = dropout_rate
-            self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
-                                device=self.device, dtype=self.dtype)
-            self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
-                                device=self.device, dtype=self.dtype)
-            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+            self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
+                                    bias=bias, device=self.device, dtype=self.dtype)
+            self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
+                                    bias=bias, device=self.device, dtype=self.dtype)
+            self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
                                             device=self.device, dtype=self.dtype)
-            self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+            self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
                                         device=self.device, dtype=self.dtype)
         @override
@@ -189,11 +196,11 @@
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
             residual = x
             x = self.layer_norm(x)
-            x = self.fc1(x)
-            x = self.parametric_relu_1(x)
+            x = self.up_proj(x)
+            x = self.parametric_relu(x)
             if self._dropout_rate > .0:
                 x = torch.dropout(x, p=self._dropout_rate, train=self.training)
-            return self.fc2(x) + residual
+            return self.down_proj(x) + residual
     class FeedForward(BaseNeuralNetwork):
@@ -206,7 +213,7 @@
             self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
                                                             expansion_factor=expansion_factor, bias=bias,
                                                             dropout_rate=dropout_rate,
-                                                            device=self.device, dtype=self.dtype)] * num_layers)
+                                                            device=self.device, dtype=self.dtype) for _ in range(num_layers)])
         @override
         def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -216,7 +223,7 @@
             return x
     ```
-    自注意力模块:
+    注意力模块:
     ```python
     from typing_extensions import override
@@ -225,14 +232,17 @@
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
     from deeplotx.nn.feed_forward import FeedForward
+    from deeplotx.nn.rope import RoPE, DEFAULT_THETA
-    class SelfAttention(BaseNeuralNetwork):
-        def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
-                    proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
-                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+    class Attention(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
+                    proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                    **kwargs):
             super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
                             device=device, dtype=dtype)
+            self._positional = positional
             self._feature_dim = feature_dim
             self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                     expansion_factor=proj_expansion_factor,
@@ -243,21 +253,27 @@
             self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                     expansion_factor=proj_expansion_factor,
                                     bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            if self._positional:
+                self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
+                                device=self.device, dtype=self.dtype)
-        def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-            q, k = self.q_proj(x), self.k_proj(x)
+        def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+            q, k = self.q_proj(x), self.k_proj(y)
+            if self._positional:
+                q, k = self.rope(q), self.rope(k)
             attn = torch.matmul(q, k.transpose(-2, -1))
             attn = attn / (self._feature_dim ** 0.5)
             attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-            return torch.softmax(attn, dim=-1)
+            return torch.softmax(attn, dtype=self.dtype, dim=-1)
         @override
-        def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+            y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
             if mask is not None:
                 mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
-            v = self.v_proj(x)
-            return torch.matmul(self._attention(x, mask), v)
+            v = self.v_proj(y)
+            return torch.matmul(self._attention(x, y, mask), v)
     ```
 - ### 使用预定义训练器实现文本二分类任务
@@ -266,7 +282,7 @@
     from deeplotx import TextBinaryClassifierTrainer, LongTextEncoder
     from deeplotx.util import get_files, read_file
-    # 定义向量编码策略 (默认使用 bert-base-uncased 作为嵌入模型)
+    # 定义向量编码策略 (默认使用 FacebookAI/xlm-roberta-base 作为嵌入模型)
     long_text_encoder = LongTextEncoder(
         max_length=2048,  # 最大文本大小, 超出截断
         chunk_size=448,  # 块大小 (按 Token 计)
@@ -288,10 +304,11 @@
     # 开始训练
     model = trainer.train(pos_data, neg_data,
-                          num_epochs=36, learning_rate=2e-5,  # 设置训练轮数和学习率
-                          balancing_dataset=True,  # 是否平衡数据集
-                          alpha=1e-4, rho=.2,  # 设置 elastic net 正则化的超参数 alpha 和 rho
-                          hidden_dim=256, recursive_layers=2)  # 设置循环神经网络的结构
+                        num_epochs=36, learning_rate=2e-5,
+                        balancing_dataset=True, alpha=1e-4,
+                        rho=.2, encoder_layers=2,  # 2 层 Roformer 编码器
+                        attn_heads=8,  # 8 个注意力头
+                        recursive_layers=2)  # 2 层 Bi-LSTM
     # 保存模型权重
     model.save(model_name='test_model', model_dir='model')

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/__init__.py RENAMED Viewed

@@ -6,6 +6,7 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
 from .encoder import Encoder, LongTextEncoder, LongformerEncoder
 from .nn import (
     FeedForward,
+    MultiHeadFeedForward,
     LinearRegression,
     LogisticRegression,
     SoftmaxRegression,

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/encoder/long_text_encoder.py RENAMED Viewed

@@ -13,9 +13,9 @@ logger = logging.getLogger('deeplotx.embedding')
 class LongTextEncoder(Encoder):
-    def __init__(self, max_length: int, chunk_size: int = 448,
-                 overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
-                 cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
+    def __init__(self, chunk_size: int = 448, overlapping: int = 32, max_length: int = -1,
+                 model_name_or_path: str = DEFAULT_BERT, cache_capacity: int = 64,
+                 max_workers: int = 8, device: str | None = None):
         super().__init__(model_name_or_path=model_name_or_path, device=device)
         assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
         self._max_length = max_length
@@ -41,23 +41,28 @@ class LongTextEncoder(Encoder):
                 _fin_emb_tensor = torch.cat((_fin_emb_tensor.detach().clone(), _emb.detach().clone()), dim=-1)
             return _fin_emb_tensor.squeeze()
+        _tmp_max_length = self._max_length
         _text_to_show = text.replace("\n", str())
         logger.debug(f'Embedding \"{_text_to_show if len(_text_to_show) < 128 else _text_to_show[:128] + "..."}\".')
         # read cache
         _text_hash = sha512(text)
         if _text_hash in self._cache:
             return postprocess(self._cache[_text_hash], flatten)
-        _text_to_input_ids = self.tokenizer.encode(text.strip())[:self._max_length]
+        _text_to_input_ids = self.tokenizer.encode(text.strip())
+        # variable length
+        if _tmp_max_length < 0:
+            _tmp_max_length = len(_text_to_input_ids)
+        _text_to_input_ids = _text_to_input_ids[:_tmp_max_length]
         _text_to_input_ids_att_mask = []
         # padding
         pad_token = self.tokenizer.pad_token_type_id
-        if len(_text_to_input_ids) < self._max_length:
-            _text_to_input_ids.extend([pad_token] * (self._max_length - len(_text_to_input_ids)))
+        if len(_text_to_input_ids) < _tmp_max_length:
+            _text_to_input_ids.extend([pad_token] * (_tmp_max_length - len(_text_to_input_ids)))
         pads = _text_to_input_ids.count(pad_token)
-        non_pads = self._max_length - pads
+        non_pads = _tmp_max_length - pads
         _text_to_input_ids_att_mask.extend([1] * non_pads)
         _text_to_input_ids_att_mask.extend([0] * pads)
-        num_chunks = math.ceil(self._max_length / self._chunk_size)
+        num_chunks = math.ceil(_tmp_max_length / self._chunk_size)
         # split chunks
         chunks = []
         for i in range(num_chunks):

deeplotx-0.8.5/deeplotx/encoder/longformer_encoder.py ADDED Viewed

@@ -0,0 +1,55 @@
+import logging
+import os
+import torch
+from torch import nn
+from transformers import AutoModel, AutoTokenizer
+from requests.exceptions import ConnectTimeout, SSLError
+from deeplotx import __ROOT__
+CACHE_PATH = os.path.join(__ROOT__, '.cache')
+DEFAULT_LONGFORMER = 'allenai/longformer-base-4096'
+logger = logging.getLogger('deeplotx.embedding')
+class LongformerEncoder(nn.Module):
+    def __init__(self, model_name_or_path: str = DEFAULT_LONGFORMER, device: str | None = None):
+        super().__init__()
+        self.device = torch.device(device) if device is not None \
+            else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True).to(self.device)
+        except ConnectTimeout:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True, local_files_only=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True, local_files_only=True).to(self.device)
+        except SSLError:
+            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                           cache_dir=CACHE_PATH, _from_auto=True,
+                                                           trust_remote_code=True, local_files_only=True)
+            self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                     cache_dir=CACHE_PATH, _from_auto=True,
+                                                     trust_remote_code=True, local_files_only=True).to(self.device)
+        logger.debug(f'{LongformerEncoder.__name__} initialized on device: {self.device}.')
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
+        ori_mode = self.encoder.training
+        self.encoder.eval()
+        with torch.no_grad():
+            res = self.encoder.forward(input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
+        self.encoder.train(mode=ori_mode)
+        return res
+    def encode(self, text: str) -> torch.Tensor:
+        _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
+        _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
+        return self.forward(_input_ids, _att_mask).squeeze()

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/nn/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from .base_neural_network import BaseNeuralNetwork
 from .feed_forward import FeedForward
+from .multi_head_feed_forward import MultiHeadFeedForward
 from .linear_regression import LinearRegression
 from .logistic_regression import LogisticRegression
 from .softmax_regression import SoftmaxRegression

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/nn/auto_regression.py RENAMED Viewed

@@ -7,8 +7,8 @@ class AutoRegression(RecursiveSequential):
     def __init__(self, feature_dim: int, bias: bool = True,
                  recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
                  ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
-                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
         super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
                          recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
                          ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
-                         dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
+                         dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype, **kwargs)

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/nn/linear_regression.py RENAMED Viewed

@@ -4,16 +4,17 @@ import torch
 from torch import nn
 from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.multi_head_feed_forward import MultiHeadFeedForward
 class LinearRegression(BaseNeuralNetwork):
-    def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
+    def __init__(self, input_dim: int, output_dim: int, num_heads: int = 1, num_layers: int = 1,
                  expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
                  model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
         super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
-        self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
-                               bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+        self.ffn = MultiHeadFeedForward(feature_dim=input_dim, num_heads=num_heads,
+                                        num_layers=num_layers, expansion_factor=expansion_factor,
+                                        bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
         self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
                               bias=bias, device=self.device, dtype=self.dtype)

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/nn/logistic_regression.py RENAMED Viewed

@@ -6,10 +6,10 @@ from deeplotx.nn.linear_regression import LinearRegression
 class LogisticRegression(LinearRegression):
-    def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
-                 bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
+    def __init__(self, input_dim: int, output_dim: int = 1, num_heads: int = 1, num_layers: int = 1,
+                 expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(input_dim=input_dim, output_dim=output_dim, num_heads=num_heads, num_layers=num_layers,
                          expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
                          model_name=model_name, device=device, dtype=dtype)

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/nn/long_context_recursive_sequential.py RENAMED Viewed

@@ -12,12 +12,11 @@ class LongContextRecursiveSequential(RecursiveSequential):
     def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
                  encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
                  ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
-                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
-                 **kwargs):
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
         super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
                          recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
                          ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
-                         model_name=model_name, device=device, dtype=dtype)
+                         model_name=model_name, device=device, dtype=dtype, **kwargs)
         self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
                                                                 ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
                                                                 ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),

deeplotx-0.8.5/deeplotx/nn/multi_head_feed_forward.py ADDED Viewed

@@ -0,0 +1,32 @@
+from typing_extensions import override
+import torch
+from torch import nn
+from deeplotx.nn.base_neural_network import BaseNeuralNetwork
+from deeplotx.nn.feed_forward import FeedForward
+class MultiHeadFeedForward(BaseNeuralNetwork):
+    def __init__(self, feature_dim: int, num_heads: int = 1, num_layers: int = 1, expansion_factor: int | float = 2,
+                 bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
+                 device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
+                         device=device, dtype=dtype)
+        self._num_heads = num_heads
+        self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
+                                     device=self.device, dtype=self.dtype)
+        self.ffn_heads = nn.ModuleList([FeedForward(feature_dim=feature_dim, num_layers=num_layers,
+                                                    expansion_factor=expansion_factor, bias=bias,
+                                                    dropout_rate=dropout_rate, device=self.device,
+                                                    dtype=self.dtype) for _ in range(self._num_heads)])
+        self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
+                                  device=self.device, dtype=self.dtype)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+        x = self.expand_proj(x)
+        x_heads = x.split(self.in_features, dim=-1)
+        head_outs = [self.ffn_heads[_](x_heads[_]) for _ in range(self._num_heads)]
+        return self.out_proj(torch.concat(head_outs, dim=-1))

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/nn/recursive_sequential.py RENAMED Viewed

@@ -4,14 +4,14 @@ import torch
 from torch import nn
 from deeplotx.nn.base_neural_network import BaseNeuralNetwork
-from deeplotx.nn.feed_forward import FeedForward
+from deeplotx.nn.multi_head_feed_forward import MultiHeadFeedForward
 class RecursiveSequential(BaseNeuralNetwork):
     def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
                  recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
                  ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
-                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
         super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
                          device=device, dtype=dtype)
         if recursive_hidden_dim is None:
@@ -20,9 +20,9 @@ class RecursiveSequential(BaseNeuralNetwork):
                             num_layers=recursive_layers, batch_first=True,
                             bias=True, bidirectional=True, device=self.device,
                             dtype=self.dtype)
-        self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
-                               expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
-                               device=self.device, dtype=self.dtype)
+        self.ffn = MultiHeadFeedForward(feature_dim=recursive_hidden_dim * 2, num_heads=kwargs.get('ffn_heads', 1),
+                                        num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
+                                        bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
         self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
                                 device=self.device, dtype=self.dtype)

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/nn/softmax_regression.py RENAMED Viewed

@@ -6,10 +6,10 @@ from deeplotx.nn.linear_regression import LinearRegression
 class SoftmaxRegression(LinearRegression):
-    def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
-                 bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
-                 device: str | None = None, dtype: torch.dtype | None = None):
-        super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
+    def __init__(self, input_dim: int, output_dim: int, num_heads: int = 1, num_layers: int = 1,
+                 expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
+                 model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+        super().__init__(input_dim=input_dim, output_dim=output_dim, num_heads=num_heads, num_layers=num_layers,
                          expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
                          model_name=model_name, device=device, dtype=dtype)

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx/trainer/text_binary_classification_trainer.py RENAMED Viewed

@@ -49,6 +49,7 @@ class TextBinaryClassifierTrainer(BaseTrainer):
             logger.warning("The dimension of features doesn't match. A new model instance will be created.")
             self.model = None
         if self.model is None:
+            ffn_heads = kwargs.get('ffn_heads', 2)
             ffn_layers = kwargs.get('ffn_layers', 5)
             ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
             bias = kwargs.get('bias', True)
@@ -63,11 +64,11 @@ class TextBinaryClassifierTrainer(BaseTrainer):
             self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
                                                         encoder_layers=encoder_layers, attn_heads=attn_heads,
                                                         recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
-                                                        ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
-                                                        encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
-                                                        encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
-                                                        attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
-                                                        theta=theta).initialize_weights()
+                                                        ffn_layers=ffn_layers, ffn_heads=ffn_heads, ffn_expansion_factor=ffn_expansion_factor,
+                                                        dropout_rate=dropout_rate, encoder_ffn_layers=encoder_ffn_layers,
+                                                        encoder_expansion_factor=encoder_expansion_factor, encoder_dropout_rate=encoder_dropout_rate,
+                                                        attn_ffn_layers=attn_ffn_layers, attn_expansion_factor=attn_expansion_factor,
+                                                        attn_dropout_rate=attn_dropout_rate, theta=theta).initialize_weights()
         logger.debug(f'Training Model: \n{self.model}')
         loss_function = nn.BCELoss()
         optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.8.2
+Version: 0.8.5
 Summary: Easy-2-use long text NLP toolkit.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -48,24 +48,23 @@ Dynamic: license-file
 - ### 长文本嵌入
-    - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 通过 max_length 定义)
+    - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 可通过 max_length 限制长度)
         ```python
         from deeplotx import LongTextEncoder
-        # 最大文本长度为 2048 个 tokens, 块大小为 512 个 tokens, 块间重叠部分为 64 个 tokens.
+        # 块大小为 448 个 tokens, 块间重叠部分为 32 个 tokens.
         encoder = LongTextEncoder(
-            max_length=2048,
-            chunk_size=512,
-            overlapping=64
+            chunk_size=448,
+            overlapping=32
         )
-        # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并展平.
-        encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=True, use_cache=True)
+        # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并堆叠.
+        encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=False)
         ```
         输出:
         ```
-        tensor([ 0.5163,  0.2497,  0.5896,  ..., -0.9815, -0.3095,  0.4232])
+        tensor([ 2.2316e-01,  2.0300e-01,  ...,  1.5578e-01, -6.6735e-02])
         ```
     - **基于 Longformer 的长文本嵌入** (最大支持长度 4096 个 tokens)
@@ -77,6 +76,11 @@ Dynamic: license-file
         encoder.encode('我是吴子豪, 这是一个测试文本.')
         ```
+        输出:
+        ```
+        tensor([-2.7490e-02,  6.6503e-02, ..., -6.5937e-02,  6.7802e-03])
+        ```
 - ### 相似性计算
     - **基于向量的相似性**
@@ -163,14 +167,17 @@ Dynamic: license-file
     ```python
     from deeplotx import (
-        BaseNeuralNetwork,  # 深度神经网络基类
         FeedForward,  # 前馈神经网络
+        MultiHeadFeedForward,  # 多头前馈神经网络
         LinearRegression,  # 线性回归
         LogisticRegression,  # 逻辑回归 / 二分类 / 多标签分类
         SoftmaxRegression,  # Softmax 回归 / 多分类
         RecursiveSequential,  # 序列模型 / 循环神经网络
         LongContextRecursiveSequential,  # 长上下文序列模型 / 自注意力融合循环神经网络
-        SelfAttention,  # 自注意力模块
+        RoPE,  # RoPE 位置编码
+        Attention,  # 自注意力 / 交叉注意力
+        MultiHeadAttention,  # 并行多头注意力
+        RoFormerEncoder,  # Roformer (Transformer + RoPE) 编码器模型
         AutoRegression,  # 自回归模型 / 循环神经网络
         LongContextAutoRegression  # 长上下文自回归模型 / 自注意力融合循环神经网络
     )
@@ -193,13 +200,13 @@ Dynamic: license-file
                     device: str | None = None, dtype: torch.dtype | None = None):
             super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
             self._dropout_rate = dropout_rate
-            self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
-                                device=self.device, dtype=self.dtype)
-            self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
-                                device=self.device, dtype=self.dtype)
-            self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
+            self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
+                                    bias=bias, device=self.device, dtype=self.dtype)
+            self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
+                                    bias=bias, device=self.device, dtype=self.dtype)
+            self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
                                             device=self.device, dtype=self.dtype)
-            self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
+            self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
                                         device=self.device, dtype=self.dtype)
         @override
@@ -207,11 +214,11 @@ Dynamic: license-file
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
             residual = x
             x = self.layer_norm(x)
-            x = self.fc1(x)
-            x = self.parametric_relu_1(x)
+            x = self.up_proj(x)
+            x = self.parametric_relu(x)
             if self._dropout_rate > .0:
                 x = torch.dropout(x, p=self._dropout_rate, train=self.training)
-            return self.fc2(x) + residual
+            return self.down_proj(x) + residual
     class FeedForward(BaseNeuralNetwork):
@@ -224,7 +231,7 @@ Dynamic: license-file
             self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
                                                             expansion_factor=expansion_factor, bias=bias,
                                                             dropout_rate=dropout_rate,
-                                                            device=self.device, dtype=self.dtype)] * num_layers)
+                                                            device=self.device, dtype=self.dtype) for _ in range(num_layers)])
         @override
         def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -234,7 +241,7 @@ Dynamic: license-file
             return x
     ```
-    自注意力模块:
+    注意力模块:
     ```python
     from typing_extensions import override
@@ -243,14 +250,17 @@ Dynamic: license-file
     from deeplotx.nn.base_neural_network import BaseNeuralNetwork
     from deeplotx.nn.feed_forward import FeedForward
+    from deeplotx.nn.rope import RoPE, DEFAULT_THETA
-    class SelfAttention(BaseNeuralNetwork):
-        def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
-                    proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
-                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
+    class Attention(BaseNeuralNetwork):
+        def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
+                    proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
+                    model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
+                    **kwargs):
             super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
                             device=device, dtype=dtype)
+            self._positional = positional
             self._feature_dim = feature_dim
             self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                     expansion_factor=proj_expansion_factor,
@@ -261,21 +271,27 @@ Dynamic: license-file
             self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
                                     expansion_factor=proj_expansion_factor,
                                     bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
+            if self._positional:
+                self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
+                                device=self.device, dtype=self.dtype)
-        def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
-            q, k = self.q_proj(x), self.k_proj(x)
+        def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+            q, k = self.q_proj(x), self.k_proj(y)
+            if self._positional:
+                q, k = self.rope(q), self.rope(k)
             attn = torch.matmul(q, k.transpose(-2, -1))
             attn = attn / (self._feature_dim ** 0.5)
             attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
-            return torch.softmax(attn, dim=-1)
+            return torch.softmax(attn, dtype=self.dtype, dim=-1)
         @override
-        def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
+        def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
             x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
+            y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
             if mask is not None:
                 mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
-            v = self.v_proj(x)
-            return torch.matmul(self._attention(x, mask), v)
+            v = self.v_proj(y)
+            return torch.matmul(self._attention(x, y, mask), v)
     ```
 - ### 使用预定义训练器实现文本二分类任务
@@ -284,7 +300,7 @@ Dynamic: license-file
     from deeplotx import TextBinaryClassifierTrainer, LongTextEncoder
     from deeplotx.util import get_files, read_file
-    # 定义向量编码策略 (默认使用 bert-base-uncased 作为嵌入模型)
+    # 定义向量编码策略 (默认使用 FacebookAI/xlm-roberta-base 作为嵌入模型)
     long_text_encoder = LongTextEncoder(
         max_length=2048,  # 最大文本大小, 超出截断
         chunk_size=448,  # 块大小 (按 Token 计)
@@ -306,10 +322,11 @@ Dynamic: license-file
     # 开始训练
     model = trainer.train(pos_data, neg_data,
-                          num_epochs=36, learning_rate=2e-5,  # 设置训练轮数和学习率
-                          balancing_dataset=True,  # 是否平衡数据集
-                          alpha=1e-4, rho=.2,  # 设置 elastic net 正则化的超参数 alpha 和 rho
-                          hidden_dim=256, recursive_layers=2)  # 设置循环神经网络的结构
+                        num_epochs=36, learning_rate=2e-5,
+                        balancing_dataset=True, alpha=1e-4,
+                        rho=.2, encoder_layers=2,  # 2 层 Roformer 编码器
+                        attn_heads=8,  # 8 个注意力头
+                        recursive_layers=2)  # 2 层 Bi-LSTM
     # 保存模型权重
     model.save(model_name='test_model', model_dir='model')

{deeplotx-0.8.2 → deeplotx-0.8.5}/deeplotx.egg-info/SOURCES.txt RENAMED Viewed

@@ -21,6 +21,7 @@ deeplotx/nn/logistic_regression.py
 deeplotx/nn/long_context_auto_regression.py
 deeplotx/nn/long_context_recursive_sequential.py
 deeplotx/nn/multi_head_attention.py
+deeplotx/nn/multi_head_feed_forward.py
 deeplotx/nn/recursive_sequential.py
 deeplotx/nn/roformer_encoder.py
 deeplotx/nn/rope.py

{deeplotx-0.8.2 → deeplotx-0.8.5}/pyproject.toml RENAMED Viewed

@@ -1,17 +1,17 @@
-[project]
-name = "deeplotx"
-version = "0.8.2"
-description = "Easy-2-use long text NLP toolkit."
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = [
-    "hf-xet",
-    "jupyter",
-    "numpy",
-    "protobuf",
-    "python-dotenv",
-    "torch",
-    "transformers",
-    "typing-extensions",
-    "vortezwohl>=0.0.8",
-]
+[project]
+name = "deeplotx"
+version = "0.8.5"
+description = "Easy-2-use long text NLP toolkit."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "hf-xet",
+    "jupyter",
+    "numpy",
+    "protobuf",
+    "python-dotenv",
+    "torch",
+    "transformers",
+    "typing-extensions",
+    "vortezwohl>=0.0.8",
+]

deeplotx-0.8.2/deeplotx/encoder/longformer_encoder.py DELETED Viewed

@@ -1,37 +0,0 @@
-import logging
-import os
-import torch
-from torch import nn
-from transformers import LongformerTokenizer, LongformerModel
-from deeplotx import __ROOT__
-CACHE_PATH = os.path.join(__ROOT__, '.cache')
-DEFAULT_LONGFORMER = 'allenai/longformer-base-4096'
-logger = logging.getLogger('deeplotx.embedding')
-class LongformerEncoder(nn.Module):
-    def __init__(self, model_name_or_path: str = DEFAULT_LONGFORMER, device: str | None = None):
-        super().__init__()
-        self.device = torch.device(device) if device is not None \
-            else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.tokenizer = LongformerTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                             cache_dir=CACHE_PATH, _from_auto=True)
-        self.bert = LongformerModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                    cache_dir=CACHE_PATH, _from_auto=True).to(self.device)
-        logger.debug(f'{LongformerEncoder.__name__} initialized on device: {self.device}.')
-    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
-        ori_mode = self.bert.training
-        self.bert.eval()
-        with torch.no_grad():
-            res = self.bert.forward(input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
-        self.bert.train(mode=ori_mode)
-        return res
-    def encode(self, text: str) -> torch.Tensor:
-        _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
-        _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
-        return self.forward(_input_ids, _att_mask).squeeze()