deeplotx 0.8.2__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deeplotx/__init__.py +1 -0
- deeplotx/encoder/long_text_encoder.py +13 -8
- deeplotx/encoder/longformer_encoder.py +27 -9
- deeplotx/nn/__init__.py +1 -0
- deeplotx/nn/auto_regression.py +2 -2
- deeplotx/nn/linear_regression.py +5 -4
- deeplotx/nn/logistic_regression.py +4 -4
- deeplotx/nn/long_context_recursive_sequential.py +2 -3
- deeplotx/nn/multi_head_feed_forward.py +32 -0
- deeplotx/nn/recursive_sequential.py +5 -5
- deeplotx/nn/softmax_regression.py +4 -4
- deeplotx/trainer/text_binary_classification_trainer.py +6 -5
- {deeplotx-0.8.2.dist-info → deeplotx-0.8.5.dist-info}/METADATA +54 -37
- {deeplotx-0.8.2.dist-info → deeplotx-0.8.5.dist-info}/RECORD +17 -16
- {deeplotx-0.8.2.dist-info → deeplotx-0.8.5.dist-info}/WHEEL +0 -0
- {deeplotx-0.8.2.dist-info → deeplotx-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {deeplotx-0.8.2.dist-info → deeplotx-0.8.5.dist-info}/top_level.txt +0 -0
deeplotx/__init__.py
CHANGED
@@ -13,9 +13,9 @@ logger = logging.getLogger('deeplotx.embedding')
|
|
13
13
|
|
14
14
|
|
15
15
|
class LongTextEncoder(Encoder):
|
16
|
-
def __init__(self,
|
17
|
-
|
18
|
-
|
16
|
+
def __init__(self, chunk_size: int = 448, overlapping: int = 32, max_length: int = -1,
|
17
|
+
model_name_or_path: str = DEFAULT_BERT, cache_capacity: int = 64,
|
18
|
+
max_workers: int = 8, device: str | None = None):
|
19
19
|
super().__init__(model_name_or_path=model_name_or_path, device=device)
|
20
20
|
assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
|
21
21
|
self._max_length = max_length
|
@@ -41,23 +41,28 @@ class LongTextEncoder(Encoder):
|
|
41
41
|
_fin_emb_tensor = torch.cat((_fin_emb_tensor.detach().clone(), _emb.detach().clone()), dim=-1)
|
42
42
|
return _fin_emb_tensor.squeeze()
|
43
43
|
|
44
|
+
_tmp_max_length = self._max_length
|
44
45
|
_text_to_show = text.replace("\n", str())
|
45
46
|
logger.debug(f'Embedding \"{_text_to_show if len(_text_to_show) < 128 else _text_to_show[:128] + "..."}\".')
|
46
47
|
# read cache
|
47
48
|
_text_hash = sha512(text)
|
48
49
|
if _text_hash in self._cache:
|
49
50
|
return postprocess(self._cache[_text_hash], flatten)
|
50
|
-
_text_to_input_ids = self.tokenizer.encode(text.strip())
|
51
|
+
_text_to_input_ids = self.tokenizer.encode(text.strip())
|
52
|
+
# variable length
|
53
|
+
if _tmp_max_length < 0:
|
54
|
+
_tmp_max_length = len(_text_to_input_ids)
|
55
|
+
_text_to_input_ids = _text_to_input_ids[:_tmp_max_length]
|
51
56
|
_text_to_input_ids_att_mask = []
|
52
57
|
# padding
|
53
58
|
pad_token = self.tokenizer.pad_token_type_id
|
54
|
-
if len(_text_to_input_ids) <
|
55
|
-
_text_to_input_ids.extend([pad_token] * (
|
59
|
+
if len(_text_to_input_ids) < _tmp_max_length:
|
60
|
+
_text_to_input_ids.extend([pad_token] * (_tmp_max_length - len(_text_to_input_ids)))
|
56
61
|
pads = _text_to_input_ids.count(pad_token)
|
57
|
-
non_pads =
|
62
|
+
non_pads = _tmp_max_length - pads
|
58
63
|
_text_to_input_ids_att_mask.extend([1] * non_pads)
|
59
64
|
_text_to_input_ids_att_mask.extend([0] * pads)
|
60
|
-
num_chunks = math.ceil(
|
65
|
+
num_chunks = math.ceil(_tmp_max_length / self._chunk_size)
|
61
66
|
# split chunks
|
62
67
|
chunks = []
|
63
68
|
for i in range(num_chunks):
|
@@ -3,7 +3,8 @@ import os
|
|
3
3
|
|
4
4
|
import torch
|
5
5
|
from torch import nn
|
6
|
-
from transformers import
|
6
|
+
from transformers import AutoModel, AutoTokenizer
|
7
|
+
from requests.exceptions import ConnectTimeout, SSLError
|
7
8
|
|
8
9
|
from deeplotx import __ROOT__
|
9
10
|
|
@@ -17,18 +18,35 @@ class LongformerEncoder(nn.Module):
|
|
17
18
|
super().__init__()
|
18
19
|
self.device = torch.device(device) if device is not None \
|
19
20
|
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
try:
|
22
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
23
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
24
|
+
trust_remote_code=True)
|
25
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
26
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
27
|
+
trust_remote_code=True).to(self.device)
|
28
|
+
except ConnectTimeout:
|
29
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
30
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
31
|
+
trust_remote_code=True, local_files_only=True)
|
32
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
33
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
34
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
35
|
+
except SSLError:
|
36
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
37
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
38
|
+
trust_remote_code=True, local_files_only=True)
|
39
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
40
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
41
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
24
42
|
logger.debug(f'{LongformerEncoder.__name__} initialized on device: {self.device}.')
|
25
43
|
|
26
44
|
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
|
27
|
-
ori_mode = self.
|
28
|
-
self.
|
45
|
+
ori_mode = self.encoder.training
|
46
|
+
self.encoder.eval()
|
29
47
|
with torch.no_grad():
|
30
|
-
res = self.
|
31
|
-
self.
|
48
|
+
res = self.encoder.forward(input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
|
49
|
+
self.encoder.train(mode=ori_mode)
|
32
50
|
return res
|
33
51
|
|
34
52
|
def encode(self, text: str) -> torch.Tensor:
|
deeplotx/nn/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from .base_neural_network import BaseNeuralNetwork
|
2
2
|
from .feed_forward import FeedForward
|
3
|
+
from .multi_head_feed_forward import MultiHeadFeedForward
|
3
4
|
from .linear_regression import LinearRegression
|
4
5
|
from .logistic_regression import LogisticRegression
|
5
6
|
from .softmax_regression import SoftmaxRegression
|
deeplotx/nn/auto_regression.py
CHANGED
@@ -7,8 +7,8 @@ class AutoRegression(RecursiveSequential):
|
|
7
7
|
def __init__(self, feature_dim: int, bias: bool = True,
|
8
8
|
recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
9
9
|
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
10
|
-
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
10
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
|
11
11
|
super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
|
12
12
|
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
13
13
|
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
-
dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
|
14
|
+
dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype, **kwargs)
|
deeplotx/nn/linear_regression.py
CHANGED
@@ -4,16 +4,17 @@ import torch
|
|
4
4
|
from torch import nn
|
5
5
|
|
6
6
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
-
from deeplotx.nn.
|
7
|
+
from deeplotx.nn.multi_head_feed_forward import MultiHeadFeedForward
|
8
8
|
|
9
9
|
|
10
10
|
class LinearRegression(BaseNeuralNetwork):
|
11
|
-
def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
|
11
|
+
def __init__(self, input_dim: int, output_dim: int, num_heads: int = 1, num_layers: int = 1,
|
12
12
|
expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
|
13
13
|
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
14
14
|
super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
|
15
|
-
self.ffn =
|
16
|
-
|
15
|
+
self.ffn = MultiHeadFeedForward(feature_dim=input_dim, num_heads=num_heads,
|
16
|
+
num_layers=num_layers, expansion_factor=expansion_factor,
|
17
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
17
18
|
self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
|
18
19
|
bias=bias, device=self.device, dtype=self.dtype)
|
19
20
|
|
@@ -6,10 +6,10 @@ from deeplotx.nn.linear_regression import LinearRegression
|
|
6
6
|
|
7
7
|
|
8
8
|
class LogisticRegression(LinearRegression):
|
9
|
-
def __init__(self, input_dim: int, output_dim: int = 1,
|
10
|
-
bias: bool = True, dropout_rate: float = 0.1,
|
11
|
-
device: str | None = None, dtype: torch.dtype | None = None):
|
12
|
-
super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
|
9
|
+
def __init__(self, input_dim: int, output_dim: int = 1, num_heads: int = 1, num_layers: int = 1,
|
10
|
+
expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
|
11
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
12
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, num_heads=num_heads, num_layers=num_layers,
|
13
13
|
expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
14
14
|
model_name=model_name, device=device, dtype=dtype)
|
15
15
|
|
@@ -12,12 +12,11 @@ class LongContextRecursiveSequential(RecursiveSequential):
|
|
12
12
|
def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
|
13
13
|
encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
|
14
14
|
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
15
|
-
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
16
|
-
**kwargs):
|
15
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
|
17
16
|
super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
|
18
17
|
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
19
18
|
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
|
20
|
-
model_name=model_name, device=device, dtype=dtype)
|
19
|
+
model_name=model_name, device=device, dtype=dtype, **kwargs)
|
21
20
|
self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
|
22
21
|
ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
|
23
22
|
ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
from deeplotx.nn.feed_forward import FeedForward
|
8
|
+
|
9
|
+
|
10
|
+
class MultiHeadFeedForward(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, num_heads: int = 1, num_layers: int = 1, expansion_factor: int | float = 2,
|
12
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
13
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
14
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
15
|
+
device=device, dtype=dtype)
|
16
|
+
self._num_heads = num_heads
|
17
|
+
self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
|
18
|
+
device=self.device, dtype=self.dtype)
|
19
|
+
self.ffn_heads = nn.ModuleList([FeedForward(feature_dim=feature_dim, num_layers=num_layers,
|
20
|
+
expansion_factor=expansion_factor, bias=bias,
|
21
|
+
dropout_rate=dropout_rate, device=self.device,
|
22
|
+
dtype=self.dtype) for _ in range(self._num_heads)])
|
23
|
+
self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
|
24
|
+
device=self.device, dtype=self.dtype)
|
25
|
+
|
26
|
+
@override
|
27
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
28
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
29
|
+
x = self.expand_proj(x)
|
30
|
+
x_heads = x.split(self.in_features, dim=-1)
|
31
|
+
head_outs = [self.ffn_heads[_](x_heads[_]) for _ in range(self._num_heads)]
|
32
|
+
return self.out_proj(torch.concat(head_outs, dim=-1))
|
@@ -4,14 +4,14 @@ import torch
|
|
4
4
|
from torch import nn
|
5
5
|
|
6
6
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
-
from deeplotx.nn.
|
7
|
+
from deeplotx.nn.multi_head_feed_forward import MultiHeadFeedForward
|
8
8
|
|
9
9
|
|
10
10
|
class RecursiveSequential(BaseNeuralNetwork):
|
11
11
|
def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
|
12
12
|
recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
13
13
|
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
14
|
-
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
14
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
|
15
15
|
super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
|
16
16
|
device=device, dtype=dtype)
|
17
17
|
if recursive_hidden_dim is None:
|
@@ -20,9 +20,9 @@ class RecursiveSequential(BaseNeuralNetwork):
|
|
20
20
|
num_layers=recursive_layers, batch_first=True,
|
21
21
|
bias=True, bidirectional=True, device=self.device,
|
22
22
|
dtype=self.dtype)
|
23
|
-
self.ffn =
|
24
|
-
|
25
|
-
|
23
|
+
self.ffn = MultiHeadFeedForward(feature_dim=recursive_hidden_dim * 2, num_heads=kwargs.get('ffn_heads', 1),
|
24
|
+
num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
|
25
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
26
26
|
self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
|
27
27
|
device=self.device, dtype=self.dtype)
|
28
28
|
|
@@ -6,10 +6,10 @@ from deeplotx.nn.linear_regression import LinearRegression
|
|
6
6
|
|
7
7
|
|
8
8
|
class SoftmaxRegression(LinearRegression):
|
9
|
-
def __init__(self, input_dim: int, output_dim: int,
|
10
|
-
bias: bool = True, dropout_rate: float = 0.1,
|
11
|
-
device: str | None = None, dtype: torch.dtype | None = None):
|
12
|
-
super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
|
9
|
+
def __init__(self, input_dim: int, output_dim: int, num_heads: int = 1, num_layers: int = 1,
|
10
|
+
expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
|
11
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
12
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, num_heads=num_heads, num_layers=num_layers,
|
13
13
|
expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
14
14
|
model_name=model_name, device=device, dtype=dtype)
|
15
15
|
|
@@ -49,6 +49,7 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
49
49
|
logger.warning("The dimension of features doesn't match. A new model instance will be created.")
|
50
50
|
self.model = None
|
51
51
|
if self.model is None:
|
52
|
+
ffn_heads = kwargs.get('ffn_heads', 2)
|
52
53
|
ffn_layers = kwargs.get('ffn_layers', 5)
|
53
54
|
ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
|
54
55
|
bias = kwargs.get('bias', True)
|
@@ -63,11 +64,11 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
63
64
|
self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
|
64
65
|
encoder_layers=encoder_layers, attn_heads=attn_heads,
|
65
66
|
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
66
|
-
ffn_layers=ffn_layers,
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
theta=theta).initialize_weights()
|
67
|
+
ffn_layers=ffn_layers, ffn_heads=ffn_heads, ffn_expansion_factor=ffn_expansion_factor,
|
68
|
+
dropout_rate=dropout_rate, encoder_ffn_layers=encoder_ffn_layers,
|
69
|
+
encoder_expansion_factor=encoder_expansion_factor, encoder_dropout_rate=encoder_dropout_rate,
|
70
|
+
attn_ffn_layers=attn_ffn_layers, attn_expansion_factor=attn_expansion_factor,
|
71
|
+
attn_dropout_rate=attn_dropout_rate, theta=theta).initialize_weights()
|
71
72
|
logger.debug(f'Training Model: \n{self.model}')
|
72
73
|
loss_function = nn.BCELoss()
|
73
74
|
optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.5
|
4
4
|
Summary: Easy-2-use long text NLP toolkit.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -48,24 +48,23 @@ Dynamic: license-file
|
|
48
48
|
|
49
49
|
- ### 长文本嵌入
|
50
50
|
|
51
|
-
- **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长,
|
51
|
+
- **基于通用 BERT 的长文本嵌入** (最大支持长度, 无限长, 可通过 max_length 限制长度)
|
52
52
|
|
53
53
|
```python
|
54
54
|
from deeplotx import LongTextEncoder
|
55
55
|
|
56
|
-
#
|
56
|
+
# 块大小为 448 个 tokens, 块间重叠部分为 32 个 tokens.
|
57
57
|
encoder = LongTextEncoder(
|
58
|
-
|
59
|
-
|
60
|
-
overlapping=64
|
58
|
+
chunk_size=448,
|
59
|
+
overlapping=32
|
61
60
|
)
|
62
|
-
# 对 "我是吴子豪, 这是一个测试文本." 计算嵌入,
|
63
|
-
encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=
|
61
|
+
# 对 "我是吴子豪, 这是一个测试文本." 计算嵌入, 并堆叠.
|
62
|
+
encoder.encode('我是吴子豪, 这是一个测试文本.', flatten=False)
|
64
63
|
```
|
65
64
|
|
66
65
|
输出:
|
67
66
|
```
|
68
|
-
tensor([
|
67
|
+
tensor([ 2.2316e-01, 2.0300e-01, ..., 1.5578e-01, -6.6735e-02])
|
69
68
|
```
|
70
69
|
|
71
70
|
- **基于 Longformer 的长文本嵌入** (最大支持长度 4096 个 tokens)
|
@@ -77,6 +76,11 @@ Dynamic: license-file
|
|
77
76
|
encoder.encode('我是吴子豪, 这是一个测试文本.')
|
78
77
|
```
|
79
78
|
|
79
|
+
输出:
|
80
|
+
```
|
81
|
+
tensor([-2.7490e-02, 6.6503e-02, ..., -6.5937e-02, 6.7802e-03])
|
82
|
+
```
|
83
|
+
|
80
84
|
- ### 相似性计算
|
81
85
|
|
82
86
|
- **基于向量的相似性**
|
@@ -163,14 +167,17 @@ Dynamic: license-file
|
|
163
167
|
|
164
168
|
```python
|
165
169
|
from deeplotx import (
|
166
|
-
BaseNeuralNetwork, # 深度神经网络基类
|
167
170
|
FeedForward, # 前馈神经网络
|
171
|
+
MultiHeadFeedForward, # 多头前馈神经网络
|
168
172
|
LinearRegression, # 线性回归
|
169
173
|
LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
|
170
174
|
SoftmaxRegression, # Softmax 回归 / 多分类
|
171
175
|
RecursiveSequential, # 序列模型 / 循环神经网络
|
172
176
|
LongContextRecursiveSequential, # 长上下文序列模型 / 自注意力融合循环神经网络
|
173
|
-
|
177
|
+
RoPE, # RoPE 位置编码
|
178
|
+
Attention, # 自注意力 / 交叉注意力
|
179
|
+
MultiHeadAttention, # 并行多头注意力
|
180
|
+
RoFormerEncoder, # Roformer (Transformer + RoPE) 编码器模型
|
174
181
|
AutoRegression, # 自回归模型 / 循环神经网络
|
175
182
|
LongContextAutoRegression # 长上下文自回归模型 / 自注意力融合循环神经网络
|
176
183
|
)
|
@@ -193,13 +200,13 @@ Dynamic: license-file
|
|
193
200
|
device: str | None = None, dtype: torch.dtype | None = None):
|
194
201
|
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
195
202
|
self._dropout_rate = dropout_rate
|
196
|
-
self.
|
197
|
-
|
198
|
-
self.
|
199
|
-
|
200
|
-
self.
|
203
|
+
self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
|
204
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
205
|
+
self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
|
206
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
207
|
+
self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
|
201
208
|
device=self.device, dtype=self.dtype)
|
202
|
-
self.layer_norm = nn.LayerNorm(normalized_shape=self.
|
209
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
|
203
210
|
device=self.device, dtype=self.dtype)
|
204
211
|
|
205
212
|
@override
|
@@ -207,11 +214,11 @@ Dynamic: license-file
|
|
207
214
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
208
215
|
residual = x
|
209
216
|
x = self.layer_norm(x)
|
210
|
-
x = self.
|
211
|
-
x = self.
|
217
|
+
x = self.up_proj(x)
|
218
|
+
x = self.parametric_relu(x)
|
212
219
|
if self._dropout_rate > .0:
|
213
220
|
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
214
|
-
return self.
|
221
|
+
return self.down_proj(x) + residual
|
215
222
|
|
216
223
|
|
217
224
|
class FeedForward(BaseNeuralNetwork):
|
@@ -224,7 +231,7 @@ Dynamic: license-file
|
|
224
231
|
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
225
232
|
expansion_factor=expansion_factor, bias=bias,
|
226
233
|
dropout_rate=dropout_rate,
|
227
|
-
device=self.device, dtype=self.dtype)
|
234
|
+
device=self.device, dtype=self.dtype) for _ in range(num_layers)])
|
228
235
|
|
229
236
|
@override
|
230
237
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
@@ -234,7 +241,7 @@ Dynamic: license-file
|
|
234
241
|
return x
|
235
242
|
```
|
236
243
|
|
237
|
-
|
244
|
+
注意力模块:
|
238
245
|
|
239
246
|
```python
|
240
247
|
from typing_extensions import override
|
@@ -243,14 +250,17 @@ Dynamic: license-file
|
|
243
250
|
|
244
251
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
245
252
|
from deeplotx.nn.feed_forward import FeedForward
|
253
|
+
from deeplotx.nn.rope import RoPE, DEFAULT_THETA
|
246
254
|
|
247
255
|
|
248
|
-
class
|
249
|
-
def __init__(self, feature_dim: int, bias: bool = True,
|
250
|
-
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
251
|
-
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None
|
256
|
+
class Attention(BaseNeuralNetwork):
|
257
|
+
def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
|
258
|
+
proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
259
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
260
|
+
**kwargs):
|
252
261
|
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
253
262
|
device=device, dtype=dtype)
|
263
|
+
self._positional = positional
|
254
264
|
self._feature_dim = feature_dim
|
255
265
|
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
256
266
|
expansion_factor=proj_expansion_factor,
|
@@ -261,21 +271,27 @@ Dynamic: license-file
|
|
261
271
|
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
262
272
|
expansion_factor=proj_expansion_factor,
|
263
273
|
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
274
|
+
if self._positional:
|
275
|
+
self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
|
276
|
+
device=self.device, dtype=self.dtype)
|
264
277
|
|
265
|
-
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
266
|
-
q, k = self.q_proj(x), self.k_proj(
|
278
|
+
def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
279
|
+
q, k = self.q_proj(x), self.k_proj(y)
|
280
|
+
if self._positional:
|
281
|
+
q, k = self.rope(q), self.rope(k)
|
267
282
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
268
283
|
attn = attn / (self._feature_dim ** 0.5)
|
269
284
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
270
|
-
return torch.softmax(attn, dim=-1)
|
285
|
+
return torch.softmax(attn, dtype=self.dtype, dim=-1)
|
271
286
|
|
272
287
|
@override
|
273
|
-
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
288
|
+
def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
|
274
289
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
290
|
+
y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
|
275
291
|
if mask is not None:
|
276
292
|
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
277
|
-
v = self.v_proj(
|
278
|
-
return torch.matmul(self._attention(x, mask), v)
|
293
|
+
v = self.v_proj(y)
|
294
|
+
return torch.matmul(self._attention(x, y, mask), v)
|
279
295
|
```
|
280
296
|
|
281
297
|
- ### 使用预定义训练器实现文本二分类任务
|
@@ -284,7 +300,7 @@ Dynamic: license-file
|
|
284
300
|
from deeplotx import TextBinaryClassifierTrainer, LongTextEncoder
|
285
301
|
from deeplotx.util import get_files, read_file
|
286
302
|
|
287
|
-
# 定义向量编码策略 (默认使用
|
303
|
+
# 定义向量编码策略 (默认使用 FacebookAI/xlm-roberta-base 作为嵌入模型)
|
288
304
|
long_text_encoder = LongTextEncoder(
|
289
305
|
max_length=2048, # 最大文本大小, 超出截断
|
290
306
|
chunk_size=448, # 块大小 (按 Token 计)
|
@@ -306,10 +322,11 @@ Dynamic: license-file
|
|
306
322
|
|
307
323
|
# 开始训练
|
308
324
|
model = trainer.train(pos_data, neg_data,
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
325
|
+
num_epochs=36, learning_rate=2e-5,
|
326
|
+
balancing_dataset=True, alpha=1e-4,
|
327
|
+
rho=.2, encoder_layers=2, # 2 层 Roformer 编码器
|
328
|
+
attn_heads=8, # 8 个注意力头
|
329
|
+
recursive_layers=2) # 2 层 Bi-LSTM
|
313
330
|
|
314
331
|
# 保存模型权重
|
315
332
|
model.save(model_name='test_model', model_dir='model')
|
@@ -1,34 +1,35 @@
|
|
1
|
-
deeplotx/__init__.py,sha256=
|
1
|
+
deeplotx/__init__.py,sha256=xEq8WQ2LpEZoLX_Z464d0dy4aemFGrEV6ZMJr6ioFnQ,1186
|
2
2
|
deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
|
3
3
|
deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
|
4
|
-
deeplotx/encoder/long_text_encoder.py,sha256=
|
5
|
-
deeplotx/encoder/longformer_encoder.py,sha256=
|
6
|
-
deeplotx/nn/__init__.py,sha256=
|
4
|
+
deeplotx/encoder/long_text_encoder.py,sha256=3ScdKDi65J5tdO8PFCXBjCzNUCLlJRwVhpDR0BrphG4,3951
|
5
|
+
deeplotx/encoder/longformer_encoder.py,sha256=NNYLr5I9tdeh0C8Ir7QcbEMU9gDk6U7CiF3Tbg6NEsE,3372
|
6
|
+
deeplotx/nn/__init__.py,sha256=YILwbxb-NHdiJjfOwBKH8F7PuZSDZSrGpTznPDucTro,710
|
7
7
|
deeplotx/nn/attention.py,sha256=R-i-Rd7gnsh6hwXDeYfqLQOJvfSZIGfQbFzRlC91XLo,2879
|
8
|
-
deeplotx/nn/auto_regression.py,sha256=
|
8
|
+
deeplotx/nn/auto_regression.py,sha256=j_R7WGPq9REngjpLuX5c0AaNqOpgGm2Vfrolw-XjWXw,877
|
9
9
|
deeplotx/nn/base_neural_network.py,sha256=FjQEDFH810fJS7JV3aLgJZnaMqC6DH--wlBvuj-ghTc,5900
|
10
10
|
deeplotx/nn/feed_forward.py,sha256=4ozj7EDalO9pb6JUhZtsJqE0r8bIHFApHRt2zTrl4ho,2931
|
11
|
-
deeplotx/nn/linear_regression.py,sha256=
|
12
|
-
deeplotx/nn/logistic_regression.py,sha256=
|
11
|
+
deeplotx/nn/linear_regression.py,sha256=EotBCCam7FH5iaAv0ma4TfYId2YfhBnrQlMirF0xoq4,1400
|
12
|
+
deeplotx/nn/logistic_regression.py,sha256=6vlXuP5el6EdXEhUbpVTKstcf-pikD50Xezw66l-aUc,978
|
13
13
|
deeplotx/nn/long_context_auto_regression.py,sha256=uy0k_g8wEfMH5nd5HCfrHA8dgEsuWBA2x8U-g3h4vQc,1054
|
14
|
-
deeplotx/nn/long_context_recursive_sequential.py,sha256=
|
14
|
+
deeplotx/nn/long_context_recursive_sequential.py,sha256=pcZfnrIHBqbp2BssfUTS1klpuykZwowikfAIaOnvRUI,2674
|
15
15
|
deeplotx/nn/multi_head_attention.py,sha256=3z73uGbvy3jszRy1B9nxGOJjlttHpcpRF8Qd09OEams,2267
|
16
|
-
deeplotx/nn/
|
16
|
+
deeplotx/nn/multi_head_feed_forward.py,sha256=hD9ScrVJZ9kNksoFASf0xaPgEnNgCeRivW-XjYOPjj8,1908
|
17
|
+
deeplotx/nn/recursive_sequential.py,sha256=crD3rEUPPjwu-uSJSiX9kqaM8OPI8SYspbDPlZb2J2Y,2900
|
17
18
|
deeplotx/nn/roformer_encoder.py,sha256=UJjKniNdMd0rfoYQcsX6bPo6Ceq_Z6EhwHe2kgqWC_k,2426
|
18
19
|
deeplotx/nn/rope.py,sha256=RTOjnllubktdy2rzFWxBfkuLuGjhEMyDd06uojdqPhM,1848
|
19
|
-
deeplotx/nn/softmax_regression.py,sha256=
|
20
|
+
deeplotx/nn/softmax_regression.py,sha256=1brNbnj8qI0VfycZmZQlfn52myKZZe8BF_ziq1JQfPY,999
|
20
21
|
deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
|
21
22
|
deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
|
22
23
|
deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
|
23
24
|
deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
|
24
25
|
deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
|
25
26
|
deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
|
26
|
-
deeplotx/trainer/text_binary_classification_trainer.py,sha256=
|
27
|
+
deeplotx/trainer/text_binary_classification_trainer.py,sha256=TFxOX8rWU_zKliI9zm7F5ZH7snR2d-sk95s3pfTmm78,6601
|
27
28
|
deeplotx/util/__init__.py,sha256=5CH4MTeSgsmCe3LPMfvKoSBpwh6jDSBuHVElJvzQzgs,90
|
28
29
|
deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
|
29
30
|
deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
|
30
|
-
deeplotx-0.8.
|
31
|
-
deeplotx-0.8.
|
32
|
-
deeplotx-0.8.
|
33
|
-
deeplotx-0.8.
|
34
|
-
deeplotx-0.8.
|
31
|
+
deeplotx-0.8.5.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
|
32
|
+
deeplotx-0.8.5.dist-info/METADATA,sha256=aM49grLNXqwEDdA4PwOEgiBKH1uCPjFuu7OCf5-_5aU,13138
|
33
|
+
deeplotx-0.8.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
34
|
+
deeplotx-0.8.5.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
|
35
|
+
deeplotx-0.8.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|