deeplotx 0.8.2__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deeplotx/__init__.py CHANGED
@@ -6,6 +6,7 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
6
6
  from .encoder import Encoder, LongTextEncoder, LongformerEncoder
7
7
  from .nn import (
8
8
  FeedForward,
9
+ MultiHeadFeedForward,
9
10
  LinearRegression,
10
11
  LogisticRegression,
11
12
  SoftmaxRegression,
@@ -13,9 +13,9 @@ logger = logging.getLogger('deeplotx.embedding')
13
13
 
14
14
 
15
15
  class LongTextEncoder(Encoder):
16
- def __init__(self, max_length: int, chunk_size: int = 448,
17
- overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
18
- cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
16
+ def __init__(self, chunk_size: int = 448, overlapping: int = 32, max_length: int = -1,
17
+ model_name_or_path: str = DEFAULT_BERT, cache_capacity: int = 64,
18
+ max_workers: int = 8, device: str | None = None):
19
19
  super().__init__(model_name_or_path=model_name_or_path, device=device)
20
20
  assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
21
21
  self._max_length = max_length
@@ -41,23 +41,28 @@ class LongTextEncoder(Encoder):
41
41
  _fin_emb_tensor = torch.cat((_fin_emb_tensor.detach().clone(), _emb.detach().clone()), dim=-1)
42
42
  return _fin_emb_tensor.squeeze()
43
43
 
44
+ _tmp_max_length = self._max_length
44
45
  _text_to_show = text.replace("\n", str())
45
46
  logger.debug(f'Embedding \"{_text_to_show if len(_text_to_show) < 128 else _text_to_show[:128] + "..."}\".')
46
47
  # read cache
47
48
  _text_hash = sha512(text)
48
49
  if _text_hash in self._cache:
49
50
  return postprocess(self._cache[_text_hash], flatten)
50
- _text_to_input_ids = self.tokenizer.encode(text.strip())[:self._max_length]
51
+ _text_to_input_ids = self.tokenizer.encode(text.strip())
52
+ # variable length
53
+ if _tmp_max_length < 0:
54
+ _tmp_max_length = len(_text_to_input_ids)
55
+ _text_to_input_ids = _text_to_input_ids[:_tmp_max_length]
51
56
  _text_to_input_ids_att_mask = []
52
57
  # padding
53
58
  pad_token = self.tokenizer.pad_token_type_id
54
- if len(_text_to_input_ids) < self._max_length:
55
- _text_to_input_ids.extend([pad_token] * (self._max_length - len(_text_to_input_ids)))
59
+ if len(_text_to_input_ids) < _tmp_max_length:
60
+ _text_to_input_ids.extend([pad_token] * (_tmp_max_length - len(_text_to_input_ids)))
56
61
  pads = _text_to_input_ids.count(pad_token)
57
- non_pads = self._max_length - pads
62
+ non_pads = _tmp_max_length - pads
58
63
  _text_to_input_ids_att_mask.extend([1] * non_pads)
59
64
  _text_to_input_ids_att_mask.extend([0] * pads)
60
- num_chunks = math.ceil(self._max_length / self._chunk_size)
65
+ num_chunks = math.ceil(_tmp_max_length / self._chunk_size)
61
66
  # split chunks
62
67
  chunks = []
63
68
  for i in range(num_chunks):
@@ -3,7 +3,8 @@ import os
3
3
 
4
4
  import torch
5
5
  from torch import nn
6
- from transformers import LongformerTokenizer, LongformerModel
6
+ from transformers import AutoModel, AutoTokenizer
7
+ from requests.exceptions import ConnectTimeout, SSLError
7
8
 
8
9
  from deeplotx import __ROOT__
9
10
 
@@ -17,18 +18,35 @@ class LongformerEncoder(nn.Module):
17
18
  super().__init__()
18
19
  self.device = torch.device(device) if device is not None \
19
20
  else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
- self.tokenizer = LongformerTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
21
- cache_dir=CACHE_PATH, _from_auto=True)
22
- self.bert = LongformerModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
23
- cache_dir=CACHE_PATH, _from_auto=True).to(self.device)
21
+ try:
22
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
23
+ cache_dir=CACHE_PATH, _from_auto=True,
24
+ trust_remote_code=True)
25
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
26
+ cache_dir=CACHE_PATH, _from_auto=True,
27
+ trust_remote_code=True).to(self.device)
28
+ except ConnectTimeout:
29
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
30
+ cache_dir=CACHE_PATH, _from_auto=True,
31
+ trust_remote_code=True, local_files_only=True)
32
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
33
+ cache_dir=CACHE_PATH, _from_auto=True,
34
+ trust_remote_code=True, local_files_only=True).to(self.device)
35
+ except SSLError:
36
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
37
+ cache_dir=CACHE_PATH, _from_auto=True,
38
+ trust_remote_code=True, local_files_only=True)
39
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
40
+ cache_dir=CACHE_PATH, _from_auto=True,
41
+ trust_remote_code=True, local_files_only=True).to(self.device)
24
42
  logger.debug(f'{LongformerEncoder.__name__} initialized on device: {self.device}.')
25
43
 
26
44
  def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
27
- ori_mode = self.bert.training
28
- self.bert.eval()
45
+ ori_mode = self.encoder.training
46
+ self.encoder.eval()
29
47
  with torch.no_grad():
30
- res = self.bert.forward(input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
31
- self.bert.train(mode=ori_mode)
48
+ res = self.encoder.forward(input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
49
+ self.encoder.train(mode=ori_mode)
32
50
  return res
33
51
 
34
52
  def encode(self, text: str) -> torch.Tensor:
deeplotx/nn/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from .base_neural_network import BaseNeuralNetwork
2
2
  from .feed_forward import FeedForward
3
+ from .multi_head_feed_forward import MultiHeadFeedForward
3
4
  from .linear_regression import LinearRegression
4
5
  from .logistic_regression import LogisticRegression
5
6
  from .softmax_regression import SoftmaxRegression
@@ -7,8 +7,8 @@ class AutoRegression(RecursiveSequential):
7
7
  def __init__(self, feature_dim: int, bias: bool = True,
8
8
  recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
9
9
  ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
10
- model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
10
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
11
11
  super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
12
12
  recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
13
13
  ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
14
- dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
14
+ dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype, **kwargs)
@@ -4,16 +4,17 @@ import torch
4
4
  from torch import nn
5
5
 
6
6
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
- from deeplotx.nn.feed_forward import FeedForward
7
+ from deeplotx.nn.multi_head_feed_forward import MultiHeadFeedForward
8
8
 
9
9
 
10
10
  class LinearRegression(BaseNeuralNetwork):
11
- def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
11
+ def __init__(self, input_dim: int, output_dim: int, num_heads: int = 1, num_layers: int = 1,
12
12
  expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
13
13
  model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
14
14
  super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
15
- self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
16
- bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
15
+ self.ffn = MultiHeadFeedForward(feature_dim=input_dim, num_heads=num_heads,
16
+ num_layers=num_layers, expansion_factor=expansion_factor,
17
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
17
18
  self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
18
19
  bias=bias, device=self.device, dtype=self.dtype)
19
20
 
@@ -6,10 +6,10 @@ from deeplotx.nn.linear_regression import LinearRegression
6
6
 
7
7
 
8
8
  class LogisticRegression(LinearRegression):
9
- def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
10
- bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
11
- device: str | None = None, dtype: torch.dtype | None = None):
12
- super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
9
+ def __init__(self, input_dim: int, output_dim: int = 1, num_heads: int = 1, num_layers: int = 1,
10
+ expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
11
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
12
+ super().__init__(input_dim=input_dim, output_dim=output_dim, num_heads=num_heads, num_layers=num_layers,
13
13
  expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
14
14
  model_name=model_name, device=device, dtype=dtype)
15
15
 
@@ -12,12 +12,11 @@ class LongContextRecursiveSequential(RecursiveSequential):
12
12
  def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
13
13
  encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
14
14
  ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
15
- model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
16
- **kwargs):
15
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
17
16
  super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
18
17
  recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
19
18
  ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
20
- model_name=model_name, device=device, dtype=dtype)
19
+ model_name=model_name, device=device, dtype=dtype, **kwargs)
21
20
  self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
22
21
  ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
23
22
  ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
@@ -0,0 +1,32 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+ from deeplotx.nn.feed_forward import FeedForward
8
+
9
+
10
+ class MultiHeadFeedForward(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, num_heads: int = 1, num_layers: int = 1, expansion_factor: int | float = 2,
12
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
13
+ device: str | None = None, dtype: torch.dtype | None = None):
14
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
15
+ device=device, dtype=dtype)
16
+ self._num_heads = num_heads
17
+ self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
18
+ device=self.device, dtype=self.dtype)
19
+ self.ffn_heads = nn.ModuleList([FeedForward(feature_dim=feature_dim, num_layers=num_layers,
20
+ expansion_factor=expansion_factor, bias=bias,
21
+ dropout_rate=dropout_rate, device=self.device,
22
+ dtype=self.dtype) for _ in range(self._num_heads)])
23
+ self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
24
+ device=self.device, dtype=self.dtype)
25
+
26
+ @override
27
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
28
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
29
+ x = self.expand_proj(x)
30
+ x_heads = x.split(self.in_features, dim=-1)
31
+ head_outs = [self.ffn_heads[_](x_heads[_]) for _ in range(self._num_heads)]
32
+ return self.out_proj(torch.concat(head_outs, dim=-1))
@@ -4,14 +4,14 @@ import torch
4
4
  from torch import nn
5
5
 
6
6
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
- from deeplotx.nn.feed_forward import FeedForward
7
+ from deeplotx.nn.multi_head_feed_forward import MultiHeadFeedForward
8
8
 
9
9
 
10
10
  class RecursiveSequential(BaseNeuralNetwork):
11
11
  def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
12
12
  recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
13
13
  ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
14
- model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
14
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
15
15
  super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
16
16
  device=device, dtype=dtype)
17
17
  if recursive_hidden_dim is None:
@@ -20,9 +20,9 @@ class RecursiveSequential(BaseNeuralNetwork):
20
20
  num_layers=recursive_layers, batch_first=True,
21
21
  bias=True, bidirectional=True, device=self.device,
22
22
  dtype=self.dtype)
23
- self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
24
- expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
25
- device=self.device, dtype=self.dtype)
23
+ self.ffn = MultiHeadFeedForward(feature_dim=recursive_hidden_dim * 2, num_heads=kwargs.get('ffn_heads', 1),
24
+ num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
25
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
26
26
  self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
27
27
  device=self.device, dtype=self.dtype)
28
28
 
@@ -6,10 +6,10 @@ from deeplotx.nn.linear_regression import LinearRegression
6
6
 
7
7
 
8
8
  class SoftmaxRegression(LinearRegression):
9
- def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
10
- bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
11
- device: str | None = None, dtype: torch.dtype | None = None):
12
- super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
9
+ def __init__(self, input_dim: int, output_dim: int, num_heads: int = 1, num_layers: int = 1,
10
+ expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
11
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
12
+ super().__init__(input_dim=input_dim, output_dim=output_dim, num_heads=num_heads, num_layers=num_layers,
13
13
  expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
14
14
  model_name=model_name, device=device, dtype=dtype)
15
15
 
@@ -49,6 +49,7 @@ class TextBinaryClassifierTrainer(BaseTrainer):
49
49
  logger.warning("The dimension of features doesn't match. A new model instance will be created.")
50
50
  self.model = None
51
51
  if self.model is None:
52
+ ffn_heads = kwargs.get('ffn_heads', 2)
52
53
  ffn_layers = kwargs.get('ffn_layers', 5)
53
54
  ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
54
55
  bias = kwargs.get('bias', True)
@@ -63,11 +64,11 @@ class TextBinaryClassifierTrainer(BaseTrainer):
63
64
  self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
64
65
  encoder_layers=encoder_layers, attn_heads=attn_heads,
65
66
  recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
66
- ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
67
- encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
68
- encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
69
- attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
70
- theta=theta).initialize_weights()
67
+ ffn_layers=ffn_layers, ffn_heads=ffn_heads, ffn_expansion_factor=ffn_expansion_factor,
68
+ dropout_rate=dropout_rate, encoder_ffn_layers=encoder_ffn_layers,
69
+ encoder_expansion_factor=encoder_expansion_factor, encoder_dropout_rate=encoder_dropout_rate,
70
+ attn_ffn_layers=attn_ffn_layers, attn_expansion_factor=attn_expansion_factor,
71
+ attn_dropout_rate=attn_dropout_rate, theta=theta).initialize_weights()
71
72
  logger.debug(f'Training Model: \n{self.model}')
72
73
  loss_function = nn.BCELoss()
73
74
  optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.8.2
3
+ Version: 0.8.5
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -48,24 +48,23 @@ Dynamic: license-file
48
48
 
49
49
  - ### 长文本嵌入
50
50
 
51
- - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 通过 max_length 定义)
51
+ - **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 可通过 max_length 限制长度)
52
52
 
53
53
  ```python
54
54
  from deeplotx import LongTextEncoder
55
55
 
56
- # 最大文本长度为 2048 个 tokens, 块大小为 512 个 tokens, 块间重叠部分为 64 个 tokens.
56
+ # 块大小为 448 个 tokens, 块间重叠部分为 32 个 tokens.
57
57
  encoder = LongTextEncoder(
58
- max_length=2048,
59
- chunk_size=512,
60
- overlapping=64
58
+ chunk_size=448,
59
+ overlapping=32
61
60
  )
62
- # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并展平.
63
- encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=True, use_cache=True)
61
+ # 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并堆叠.
62
+ encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=False)
64
63
  ```
65
64
 
66
65
  输出:
67
66
  ```
68
- tensor([ 0.5163, 0.2497, 0.5896, ..., -0.9815, -0.3095, 0.4232])
67
+ tensor([ 2.2316e-01, 2.0300e-01, ..., 1.5578e-01, -6.6735e-02])
69
68
  ```
70
69
 
71
70
  - **基于 Longformer 的长文本嵌入** (最大支持长度 4096 个 tokens)
@@ -77,6 +76,11 @@ Dynamic: license-file
77
76
  encoder.encode('我是吴子豪, 这是一个测试文本.')
78
77
  ```
79
78
 
79
+ 输出:
80
+ ```
81
+ tensor([-2.7490e-02, 6.6503e-02, ..., -6.5937e-02, 6.7802e-03])
82
+ ```
83
+
80
84
  - ### 相似性计算
81
85
 
82
86
  - **基于向量的相似性**
@@ -163,14 +167,17 @@ Dynamic: license-file
163
167
 
164
168
  ```python
165
169
  from deeplotx import (
166
- BaseNeuralNetwork, # 深度神经网络基类
167
170
  FeedForward, # 前馈神经网络
171
+ MultiHeadFeedForward, # 多头前馈神经网络
168
172
  LinearRegression, # 线性回归
169
173
  LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
170
174
  SoftmaxRegression, # Softmax 回归 / 多分类
171
175
  RecursiveSequential, # 序列模型 / 循环神经网络
172
176
  LongContextRecursiveSequential, # 长上下文序列模型 / 自注意力融合循环神经网络
173
- SelfAttention, # 自注意力模块
177
+ RoPE, # RoPE 位置编码
178
+ Attention, # 自注意力 / 交叉注意力
179
+ MultiHeadAttention, # 并行多头注意力
180
+ RoFormerEncoder, # Roformer (Transformer + RoPE) 编码器模型
174
181
  AutoRegression, # 自回归模型 / 循环神经网络
175
182
  LongContextAutoRegression # 长上下文自回归模型 / 自注意力融合循环神经网络
176
183
  )
@@ -193,13 +200,13 @@ Dynamic: license-file
193
200
  device: str | None = None, dtype: torch.dtype | None = None):
194
201
  super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
195
202
  self._dropout_rate = dropout_rate
196
- self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
197
- device=self.device, dtype=self.dtype)
198
- self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
199
- device=self.device, dtype=self.dtype)
200
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
203
+ self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
204
+ bias=bias, device=self.device, dtype=self.dtype)
205
+ self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
206
+ bias=bias, device=self.device, dtype=self.dtype)
207
+ self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
201
208
  device=self.device, dtype=self.dtype)
202
- self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
209
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
203
210
  device=self.device, dtype=self.dtype)
204
211
 
205
212
  @override
@@ -207,11 +214,11 @@ Dynamic: license-file
207
214
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
208
215
  residual = x
209
216
  x = self.layer_norm(x)
210
- x = self.fc1(x)
211
- x = self.parametric_relu_1(x)
217
+ x = self.up_proj(x)
218
+ x = self.parametric_relu(x)
212
219
  if self._dropout_rate > .0:
213
220
  x = torch.dropout(x, p=self._dropout_rate, train=self.training)
214
- return self.fc2(x) + residual
221
+ return self.down_proj(x) + residual
215
222
 
216
223
 
217
224
  class FeedForward(BaseNeuralNetwork):
@@ -224,7 +231,7 @@ Dynamic: license-file
224
231
  self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
225
232
  expansion_factor=expansion_factor, bias=bias,
226
233
  dropout_rate=dropout_rate,
227
- device=self.device, dtype=self.dtype)] * num_layers)
234
+ device=self.device, dtype=self.dtype) for _ in range(num_layers)])
228
235
 
229
236
  @override
230
237
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -234,7 +241,7 @@ Dynamic: license-file
234
241
  return x
235
242
  ```
236
243
 
237
- 自注意力模块:
244
+ 注意力模块:
238
245
 
239
246
  ```python
240
247
  from typing_extensions import override
@@ -243,14 +250,17 @@ Dynamic: license-file
243
250
 
244
251
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
245
252
  from deeplotx.nn.feed_forward import FeedForward
253
+ from deeplotx.nn.rope import RoPE, DEFAULT_THETA
246
254
 
247
255
 
248
- class SelfAttention(BaseNeuralNetwork):
249
- def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
250
- proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
251
- model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
256
+ class Attention(BaseNeuralNetwork):
257
+ def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
258
+ proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
259
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
260
+ **kwargs):
252
261
  super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
253
262
  device=device, dtype=dtype)
263
+ self._positional = positional
254
264
  self._feature_dim = feature_dim
255
265
  self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
256
266
  expansion_factor=proj_expansion_factor,
@@ -261,21 +271,27 @@ Dynamic: license-file
261
271
  self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
262
272
  expansion_factor=proj_expansion_factor,
263
273
  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
274
+ if self._positional:
275
+ self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
276
+ device=self.device, dtype=self.dtype)
264
277
 
265
- def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
266
- q, k = self.q_proj(x), self.k_proj(x)
278
+ def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
279
+ q, k = self.q_proj(x), self.k_proj(y)
280
+ if self._positional:
281
+ q, k = self.rope(q), self.rope(k)
267
282
  attn = torch.matmul(q, k.transpose(-2, -1))
268
283
  attn = attn / (self._feature_dim ** 0.5)
269
284
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
270
- return torch.softmax(attn, dim=-1)
285
+ return torch.softmax(attn, dtype=self.dtype, dim=-1)
271
286
 
272
287
  @override
273
- def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
288
+ def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
274
289
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
290
+ y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
275
291
  if mask is not None:
276
292
  mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
277
- v = self.v_proj(x)
278
- return torch.matmul(self._attention(x, mask), v)
293
+ v = self.v_proj(y)
294
+ return torch.matmul(self._attention(x, y, mask), v)
279
295
  ```
280
296
 
281
297
  - ### 使用预定义训练器实现文本二分类任务
@@ -284,7 +300,7 @@ Dynamic: license-file
284
300
  from deeplotx import TextBinaryClassifierTrainer, LongTextEncoder
285
301
  from deeplotx.util import get_files, read_file
286
302
 
287
- # 定义向量编码策略 (默认使用 bert-base-uncased 作为嵌入模型)
303
+ # 定义向量编码策略 (默认使用 FacebookAI/xlm-roberta-base 作为嵌入模型)
288
304
  long_text_encoder = LongTextEncoder(
289
305
  max_length=2048, # 最大文本大小, 超出截断
290
306
  chunk_size=448, # 块大小 (按 Token 计)
@@ -306,10 +322,11 @@ Dynamic: license-file
306
322
 
307
323
  # 开始训练
308
324
  model = trainer.train(pos_data, neg_data,
309
- num_epochs=36, learning_rate=2e-5, # 设置训练轮数和学习率
310
- balancing_dataset=True, # 是否平衡数据集
311
- alpha=1e-4, rho=.2, # 设置 elastic net 正则化的超参数 alpha 和 rho
312
- hidden_dim=256, recursive_layers=2) # 设置循环神经网络的结构
325
+ num_epochs=36, learning_rate=2e-5,
326
+ balancing_dataset=True, alpha=1e-4,
327
+ rho=.2, encoder_layers=2, # 2 Roformer 编码器
328
+ attn_heads=8, # 8 个注意力头
329
+ recursive_layers=2) # 2 层 Bi-LSTM
313
330
 
314
331
  # 保存模型权重
315
332
  model.save(model_name='test_model', model_dir='model')
@@ -1,34 +1,35 @@
1
- deeplotx/__init__.py,sha256=oNeA-vNu5YGiEQg0IcpKEdGh_Y_2uPvo2nqaNL_Zgv8,1159
1
+ deeplotx/__init__.py,sha256=xEq8WQ2LpEZoLX_Z464d0dy4aemFGrEV6ZMJr6ioFnQ,1186
2
2
  deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
3
3
  deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
4
- deeplotx/encoder/long_text_encoder.py,sha256=PFR6jjGyg1N58TQlKsPaNQEd-EDl13Hyhu7A1KtGBbA,3743
5
- deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
6
- deeplotx/nn/__init__.py,sha256=01I_yqx9GTa4wy3uNyAqhtxp66tDqxgMLC4Ky5Vnkrg,651
4
+ deeplotx/encoder/long_text_encoder.py,sha256=3ScdKDi65J5tdO8PFCXBjCzNUCLlJRwVhpDR0BrphG4,3951
5
+ deeplotx/encoder/longformer_encoder.py,sha256=NNYLr5I9tdeh0C8Ir7QcbEMU9gDk6U7CiF3Tbg6NEsE,3372
6
+ deeplotx/nn/__init__.py,sha256=YILwbxb-NHdiJjfOwBKH8F7PuZSDZSrGpTznPDucTro,710
7
7
  deeplotx/nn/attention.py,sha256=R-i-Rd7gnsh6hwXDeYfqLQOJvfSZIGfQbFzRlC91XLo,2879
8
- deeplotx/nn/auto_regression.py,sha256=uISx29t_zkDGS8s2wvGB6wOGYZitQ4hQ7wyoQl4lcqY,857
8
+ deeplotx/nn/auto_regression.py,sha256=j_R7WGPq9REngjpLuX5c0AaNqOpgGm2Vfrolw-XjWXw,877
9
9
  deeplotx/nn/base_neural_network.py,sha256=FjQEDFH810fJS7JV3aLgJZnaMqC6DH--wlBvuj-ghTc,5900
10
10
  deeplotx/nn/feed_forward.py,sha256=4ozj7EDalO9pb6JUhZtsJqE0r8bIHFApHRt2zTrl4ho,2931
11
- deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
12
- deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
11
+ deeplotx/nn/linear_regression.py,sha256=EotBCCam7FH5iaAv0ma4TfYId2YfhBnrQlMirF0xoq4,1400
12
+ deeplotx/nn/logistic_regression.py,sha256=6vlXuP5el6EdXEhUbpVTKstcf-pikD50Xezw66l-aUc,978
13
13
  deeplotx/nn/long_context_auto_regression.py,sha256=uy0k_g8wEfMH5nd5HCfrHA8dgEsuWBA2x8U-g3h4vQc,1054
14
- deeplotx/nn/long_context_recursive_sequential.py,sha256=i7kUml9RV_mkLRJ114UHsj9Gxw7LzJVQ4z8-REHa8-w,2682
14
+ deeplotx/nn/long_context_recursive_sequential.py,sha256=pcZfnrIHBqbp2BssfUTS1klpuykZwowikfAIaOnvRUI,2674
15
15
  deeplotx/nn/multi_head_attention.py,sha256=3z73uGbvy3jszRy1B9nxGOJjlttHpcpRF8Qd09OEams,2267
16
- deeplotx/nn/recursive_sequential.py,sha256=8Z8vT70xTygusL-3w3QlB_B_k0xQSUU2ZTgC1LhEmzQ,2805
16
+ deeplotx/nn/multi_head_feed_forward.py,sha256=hD9ScrVJZ9kNksoFASf0xaPgEnNgCeRivW-XjYOPjj8,1908
17
+ deeplotx/nn/recursive_sequential.py,sha256=crD3rEUPPjwu-uSJSiX9kqaM8OPI8SYspbDPlZb2J2Y,2900
17
18
  deeplotx/nn/roformer_encoder.py,sha256=UJjKniNdMd0rfoYQcsX6bPo6Ceq_Z6EhwHe2kgqWC_k,2426
18
19
  deeplotx/nn/rope.py,sha256=RTOjnllubktdy2rzFWxBfkuLuGjhEMyDd06uojdqPhM,1848
19
- deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
20
+ deeplotx/nn/softmax_regression.py,sha256=1brNbnj8qI0VfycZmZQlfn52myKZZe8BF_ziq1JQfPY,999
20
21
  deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
21
22
  deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
22
23
  deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
23
24
  deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
24
25
  deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
25
26
  deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
26
- deeplotx/trainer/text_binary_classification_trainer.py,sha256=QMLR4cC8NCUP-v7SOYVtCykNwahENmWHv9adaeTbYmA,6528
27
+ deeplotx/trainer/text_binary_classification_trainer.py,sha256=TFxOX8rWU_zKliI9zm7F5ZH7snR2d-sk95s3pfTmm78,6601
27
28
  deeplotx/util/__init__.py,sha256=5CH4MTeSgsmCe3LPMfvKoSBpwh6jDSBuHVElJvzQzgs,90
28
29
  deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
29
30
  deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
30
- deeplotx-0.8.2.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
31
- deeplotx-0.8.2.dist-info/METADATA,sha256=nYbTzZTrgKWDcUMv_QFQDxcnuTgxt1Icg1SWzUbLXks,12251
32
- deeplotx-0.8.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
- deeplotx-0.8.2.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
34
- deeplotx-0.8.2.dist-info/RECORD,,
31
+ deeplotx-0.8.5.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
32
+ deeplotx-0.8.5.dist-info/METADATA,sha256=aM49grLNXqwEDdA4PwOEgiBKH1uCPjFuu7OCf5-_5aU,13138
33
+ deeplotx-0.8.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ deeplotx-0.8.5.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
35
+ deeplotx-0.8.5.dist-info/RECORD,,