deeplotx 0.5.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deeplotx/__init__.py +5 -1
- deeplotx/encoder/encoder.py +22 -4
- deeplotx/encoder/long_text_encoder.py +4 -2
- deeplotx/nn/__init__.py +6 -1
- deeplotx/nn/attention.py +48 -0
- deeplotx/nn/auto_regression.py +8 -6
- deeplotx/nn/base_neural_network.py +73 -1
- deeplotx/nn/feed_forward.py +53 -0
- deeplotx/nn/linear_regression.py +12 -24
- deeplotx/nn/logistic_regression.py +5 -2
- deeplotx/nn/long_context_auto_regression.py +10 -6
- deeplotx/nn/long_context_recursive_sequential.py +22 -14
- deeplotx/nn/multi_head_attention.py +34 -0
- deeplotx/nn/recursive_sequential.py +19 -12
- deeplotx/nn/roformer_encoder.py +40 -0
- deeplotx/nn/rope.py +41 -0
- deeplotx/nn/softmax_regression.py +5 -2
- deeplotx/trainer/text_binary_classification_trainer.py +24 -8
- deeplotx/util/__init__.py +1 -1
- {deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/METADATA +65 -42
- deeplotx-0.8.0.dist-info/RECORD +34 -0
- deeplotx/nn/self_attention.py +0 -34
- deeplotx-0.5.6.dist-info/RECORD +0 -30
- {deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/WHEEL +0 -0
- {deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {deeplotx-0.5.6.dist-info → deeplotx-0.8.0.dist-info}/top_level.txt +0 -0
deeplotx/__init__.py
CHANGED
@@ -5,12 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
|
|
5
5
|
|
6
6
|
from .encoder import Encoder, LongTextEncoder, LongformerEncoder
|
7
7
|
from .nn import (
|
8
|
+
FeedForward,
|
8
9
|
LinearRegression,
|
9
10
|
LogisticRegression,
|
10
11
|
SoftmaxRegression,
|
11
12
|
RecursiveSequential,
|
12
13
|
LongContextRecursiveSequential,
|
13
|
-
|
14
|
+
RoPE,
|
15
|
+
Attention,
|
16
|
+
MultiHeadAttention,
|
17
|
+
RoFormerEncoder,
|
14
18
|
AutoRegression,
|
15
19
|
LongContextAutoRegression
|
16
20
|
)
|
deeplotx/encoder/encoder.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
3
|
import math
|
4
|
+
from requests.exceptions import ConnectTimeout, SSLError
|
4
5
|
|
5
6
|
import torch
|
6
7
|
from torch import nn
|
@@ -18,10 +19,27 @@ class Encoder(nn.Module):
|
|
18
19
|
super().__init__()
|
19
20
|
self.device = torch.device(device) if device is not None \
|
20
21
|
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
try:
|
23
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
24
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
25
|
+
trust_remote_code=True)
|
26
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
27
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
28
|
+
trust_remote_code=True).to(self.device)
|
29
|
+
except ConnectTimeout:
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
31
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
32
|
+
trust_remote_code=True, local_files_only=True)
|
33
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
34
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
35
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
36
|
+
except SSLError:
|
37
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
38
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
39
|
+
trust_remote_code=True, local_files_only=True)
|
40
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
41
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
42
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
25
43
|
self.embed_dim = self.encoder.config.max_position_embeddings
|
26
44
|
logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
|
27
45
|
|
@@ -15,12 +15,14 @@ logger = logging.getLogger('deeplotx.embedding')
|
|
15
15
|
class LongTextEncoder(Encoder):
|
16
16
|
def __init__(self, max_length: int, chunk_size: int = 448,
|
17
17
|
overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
|
18
|
-
cache_capacity: int = 64, device: str | None = None):
|
18
|
+
cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
|
19
19
|
super().__init__(model_name_or_path=model_name_or_path, device=device)
|
20
|
+
assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
|
20
21
|
self._max_length = max_length
|
21
22
|
self._chunk_size = chunk_size
|
22
23
|
self._overlapping = overlapping
|
23
24
|
self._cache = LRUCache(capacity=cache_capacity)
|
25
|
+
self._worker_group = ThreadPool(max_workers=max_workers)
|
24
26
|
|
25
27
|
def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
|
26
28
|
return idx, super().forward(x, attention_mask=mask)
|
@@ -63,7 +65,7 @@ class LongTextEncoder(Encoder):
|
|
63
65
|
_tmp_right = (i + 1) * self._chunk_size + self._overlapping
|
64
66
|
chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
|
65
67
|
torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
|
66
|
-
embeddings = list(
|
68
|
+
embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
|
67
69
|
embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
|
68
70
|
fin_embedding = [x[1] for x in embeddings]
|
69
71
|
# write cache
|
deeplotx/nn/__init__.py
CHANGED
@@ -1,8 +1,13 @@
|
|
1
|
+
from .base_neural_network import BaseNeuralNetwork
|
2
|
+
from .feed_forward import FeedForward
|
1
3
|
from .linear_regression import LinearRegression
|
2
4
|
from .logistic_regression import LogisticRegression
|
3
5
|
from .softmax_regression import SoftmaxRegression
|
4
6
|
from .recursive_sequential import RecursiveSequential
|
5
7
|
from .long_context_recursive_sequential import LongContextRecursiveSequential
|
6
|
-
from .
|
8
|
+
from .rope import RoPE
|
9
|
+
from .attention import Attention
|
10
|
+
from .multi_head_attention import MultiHeadAttention
|
11
|
+
from .roformer_encoder import RoFormerEncoder
|
7
12
|
from .auto_regression import AutoRegression
|
8
13
|
from .long_context_auto_regression import LongContextAutoRegression
|
deeplotx/nn/attention.py
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
6
|
+
from deeplotx.nn.feed_forward import FeedForward
|
7
|
+
from deeplotx.nn.rope import RoPE, DEFAULT_THETA
|
8
|
+
|
9
|
+
|
10
|
+
class Attention(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
|
12
|
+
proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
13
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
14
|
+
**kwargs):
|
15
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
16
|
+
device=device, dtype=dtype)
|
17
|
+
self._positional = positional
|
18
|
+
self._feature_dim = feature_dim
|
19
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
20
|
+
expansion_factor=proj_expansion_factor,
|
21
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
22
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
23
|
+
expansion_factor=proj_expansion_factor,
|
24
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
25
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
26
|
+
expansion_factor=proj_expansion_factor,
|
27
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
28
|
+
if self._positional:
|
29
|
+
self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
|
30
|
+
device=self.device, dtype=self.dtype)
|
31
|
+
|
32
|
+
def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
33
|
+
q, k = self.q_proj(x), self.k_proj(y)
|
34
|
+
if self._positional:
|
35
|
+
q, k = self.rope(q), self.rope(k)
|
36
|
+
attn = torch.matmul(q, k.transpose(-2, -1))
|
37
|
+
attn = attn / (self._feature_dim ** 0.5)
|
38
|
+
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
39
|
+
return torch.softmax(attn, dim=-1)
|
40
|
+
|
41
|
+
@override
|
42
|
+
def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
|
43
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
44
|
+
y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
|
45
|
+
if mask is not None:
|
46
|
+
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
47
|
+
v = self.v_proj(y)
|
48
|
+
return torch.matmul(self._attention(x, y, mask), v)
|
deeplotx/nn/auto_regression.py
CHANGED
@@ -4,9 +4,11 @@ from deeplotx.nn import RecursiveSequential
|
|
4
4
|
|
5
5
|
|
6
6
|
class AutoRegression(RecursiveSequential):
|
7
|
-
def __init__(self, feature_dim: int,
|
8
|
-
recursive_layers: int =
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
def __init__(self, feature_dim: int, bias: bool = True,
|
8
|
+
recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
9
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
10
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
+
super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
|
12
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
13
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
+
dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
|
@@ -3,12 +3,14 @@ from abc import abstractmethod
|
|
3
3
|
|
4
4
|
import torch
|
5
5
|
from torch import nn
|
6
|
+
from torch.nn import init
|
6
7
|
|
7
8
|
DEFAULT_SUFFIX = 'dlx'
|
8
9
|
|
9
10
|
|
10
11
|
class BaseNeuralNetwork(nn.Module):
|
11
|
-
def __init__(self,
|
12
|
+
def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
|
13
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
12
14
|
super().__init__()
|
13
15
|
self._model_name = model_name \
|
14
16
|
if model_name is not None \
|
@@ -16,6 +18,16 @@ class BaseNeuralNetwork(nn.Module):
|
|
16
18
|
self.device = torch.device(device) if device is not None \
|
17
19
|
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
18
20
|
self.dtype = dtype if dtype is not None else torch.float32
|
21
|
+
self._in_features = in_features
|
22
|
+
self._out_features = out_features
|
23
|
+
|
24
|
+
@property
|
25
|
+
def in_features(self) -> int:
|
26
|
+
return self._in_features
|
27
|
+
|
28
|
+
@property
|
29
|
+
def out_features(self) -> int:
|
30
|
+
return self._out_features
|
19
31
|
|
20
32
|
@staticmethod
|
21
33
|
def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
|
@@ -25,6 +37,44 @@ class BaseNeuralNetwork(nn.Module):
|
|
25
37
|
x = x.to(dtype)
|
26
38
|
return x
|
27
39
|
|
40
|
+
def initialize_weights(self):
|
41
|
+
for m in self.modules():
|
42
|
+
match m.__class__:
|
43
|
+
case nn.Linear:
|
44
|
+
init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
|
45
|
+
if m.bias is not None:
|
46
|
+
init.constant_(m.bias, 0)
|
47
|
+
case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
|
48
|
+
init.constant_(m.weight, 1)
|
49
|
+
init.constant_(m.bias, 0)
|
50
|
+
case nn.LSTM | nn.GRU:
|
51
|
+
for name, param in m.named_parameters():
|
52
|
+
_tmp_name = name.lower()
|
53
|
+
if 'weight_ih' in _tmp_name:
|
54
|
+
init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
|
55
|
+
elif 'weight_hh' in _tmp_name:
|
56
|
+
init.orthogonal_(param)
|
57
|
+
elif 'bias' in _tmp_name:
|
58
|
+
init.constant_(param, 0)
|
59
|
+
case _:
|
60
|
+
pass
|
61
|
+
return self
|
62
|
+
|
63
|
+
def size(self) -> dict:
|
64
|
+
total_params = trainable_params = non_trainable_params = 0
|
65
|
+
for param in self.parameters():
|
66
|
+
params = param.numel()
|
67
|
+
total_params += params
|
68
|
+
if param.requires_grad:
|
69
|
+
trainable_params += params
|
70
|
+
else:
|
71
|
+
non_trainable_params += params
|
72
|
+
return {
|
73
|
+
'total': total_params,
|
74
|
+
'trainable': trainable_params,
|
75
|
+
'non_trainable': non_trainable_params
|
76
|
+
}
|
77
|
+
|
28
78
|
def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
|
29
79
|
def _l1() -> torch.Tensor:
|
30
80
|
l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
|
@@ -66,3 +116,25 @@ class BaseNeuralNetwork(nn.Module):
|
|
66
116
|
model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
|
67
117
|
self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
|
68
118
|
return self
|
119
|
+
|
120
|
+
def __str__(self):
|
121
|
+
formatted = super().__str__()
|
122
|
+
_line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
|
123
|
+
_splitter_1 = '=' * (_line_len + 10)
|
124
|
+
_splitter_2 = '-' * (_line_len + 10)
|
125
|
+
_size = self.size()
|
126
|
+
total_param = _size['total']
|
127
|
+
trainable_param = _size['trainable']
|
128
|
+
non_trainable_param = _size['non_trainable']
|
129
|
+
formatted = (f'{_splitter_1}\n'
|
130
|
+
f'Model_Name: {self._model_name}\n'
|
131
|
+
f'In_Features: {self.in_features}\n'
|
132
|
+
f'Out_Features: {self.out_features}\n'
|
133
|
+
f'Device: {self.device}\n'
|
134
|
+
f'Dtype: {self.dtype}\n'
|
135
|
+
f'Total_Parameters: {total_param}\n'
|
136
|
+
f'Trainable_Parameters: {trainable_param}\n'
|
137
|
+
f'NonTrainable_Parameters: {non_trainable_param}\n'
|
138
|
+
f'{_splitter_2}'
|
139
|
+
f'\n{formatted}\n{_splitter_1}')
|
140
|
+
return formatted
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
|
8
|
+
|
9
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
10
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
11
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
12
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
13
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
14
|
+
self._dropout_rate = dropout_rate
|
15
|
+
self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
|
16
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
17
|
+
self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
|
18
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
19
|
+
self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
|
20
|
+
device=self.device, dtype=self.dtype)
|
21
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
|
22
|
+
device=self.device, dtype=self.dtype)
|
23
|
+
|
24
|
+
@override
|
25
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
26
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
27
|
+
residual = x
|
28
|
+
x = self.layer_norm(x)
|
29
|
+
x = self.up_proj(x)
|
30
|
+
x = self.parametric_relu(x)
|
31
|
+
if self._dropout_rate > .0:
|
32
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
33
|
+
return self.down_proj(x) + residual
|
34
|
+
|
35
|
+
|
36
|
+
class FeedForward(BaseNeuralNetwork):
|
37
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
38
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
39
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
40
|
+
if num_layers < 1:
|
41
|
+
raise ValueError('num_layers cannot be less than 1.')
|
42
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
43
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
44
|
+
expansion_factor=expansion_factor, bias=bias,
|
45
|
+
dropout_rate=dropout_rate,
|
46
|
+
device=self.device, dtype=self.dtype) for _ in range(num_layers)])
|
47
|
+
|
48
|
+
@override
|
49
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
50
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
51
|
+
for ffn in self.ffn_layers:
|
52
|
+
x = ffn(x)
|
53
|
+
return x
|
deeplotx/nn/linear_regression.py
CHANGED
@@ -4,34 +4,22 @@ import torch
|
|
4
4
|
from torch import nn
|
5
5
|
|
6
6
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
from deeplotx.nn.feed_forward import FeedForward
|
7
8
|
|
8
9
|
|
9
10
|
class LinearRegression(BaseNeuralNetwork):
|
10
|
-
def __init__(self, input_dim: int, output_dim: int,
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
self.
|
15
|
-
|
16
|
-
self.
|
17
|
-
|
18
|
-
self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
|
19
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
20
|
-
self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
21
|
-
self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
22
|
-
self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
11
|
+
def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
|
12
|
+
expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
|
13
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
14
|
+
super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
|
15
|
+
self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
|
16
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
17
|
+
self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
|
18
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
23
19
|
|
24
20
|
@override
|
25
21
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
26
22
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
27
|
-
|
28
|
-
x =
|
29
|
-
|
30
|
-
x = self.parametric_relu_2(self.fc2(x))
|
31
|
-
x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
|
32
|
-
x = torch.dropout(x, p=0.2, train=self.training)
|
33
|
-
x = self.parametric_relu_3(self.fc3(x))
|
34
|
-
x = torch.dropout(x, p=0.2, train=self.training)
|
35
|
-
x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
|
36
|
-
x = self.fc5(x)
|
37
|
-
return x
|
23
|
+
residual = x
|
24
|
+
x = self.ffn(x) + residual
|
25
|
+
return self.proj(x)
|
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
|
|
6
6
|
|
7
7
|
|
8
8
|
class LogisticRegression(LinearRegression):
|
9
|
-
def __init__(self, input_dim: int, output_dim: int = 1,
|
9
|
+
def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
|
10
|
+
bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
|
10
11
|
device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
-
super().__init__(input_dim=input_dim, output_dim=output_dim,
|
12
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
|
13
|
+
expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
14
|
+
model_name=model_name, device=device, dtype=dtype)
|
12
15
|
|
13
16
|
@override
|
14
17
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
@@ -4,9 +4,13 @@ from deeplotx.nn import LongContextRecursiveSequential
|
|
4
4
|
|
5
5
|
|
6
6
|
class LongContextAutoRegression(LongContextRecursiveSequential):
|
7
|
-
def __init__(self, feature_dim: int,
|
8
|
-
recursive_layers: int =
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
def __init__(self, feature_dim: int, bias: bool = True,
|
8
|
+
encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
9
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
10
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
11
|
+
**kwargs):
|
12
|
+
super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
|
13
|
+
encoder_layers=encoder_layers, attn_heads=attn_heads,
|
14
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
15
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
|
16
|
+
model_name=model_name, device=device, dtype=dtype, **kwargs)
|
@@ -3,26 +3,34 @@ from typing_extensions import override
|
|
3
3
|
import torch
|
4
4
|
from torch import nn
|
5
5
|
|
6
|
+
from deeplotx.nn.attention import DEFAULT_THETA
|
6
7
|
from deeplotx.nn.recursive_sequential import RecursiveSequential
|
7
|
-
from deeplotx.nn.
|
8
|
+
from deeplotx.nn.roformer_encoder import RoFormerEncoder
|
8
9
|
|
9
10
|
|
10
11
|
class LongContextRecursiveSequential(RecursiveSequential):
|
11
|
-
def __init__(self, input_dim: int, output_dim: int,
|
12
|
-
|
13
|
-
|
14
|
-
dtype: torch.dtype | None = None
|
15
|
-
|
16
|
-
|
12
|
+
def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
|
13
|
+
encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
|
14
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
15
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
16
|
+
**kwargs):
|
17
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
|
18
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
19
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
|
17
20
|
model_name=model_name, device=device, dtype=dtype)
|
18
|
-
self.
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
|
22
|
+
ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
|
23
|
+
ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
|
24
|
+
dropout_rate=kwargs.get('encoder_dropout_rate', dropout_rate),
|
25
|
+
attn_ffn_layers=kwargs.get('attn_ffn_layers', 1),
|
26
|
+
attn_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
|
27
|
+
attn_dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
|
28
|
+
theta=kwargs.get('theta', DEFAULT_THETA),
|
29
|
+
device=self.device, dtype=self.dtype) for _ in range(encoder_layers)])
|
22
30
|
|
23
31
|
@override
|
24
32
|
def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
|
25
33
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
26
|
-
|
27
|
-
|
28
|
-
return super().forward(
|
34
|
+
for roformer_encoder in self.roformer_encoders:
|
35
|
+
x = roformer_encoder(x)
|
36
|
+
return super().forward(x, state)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
from deeplotx.nn.attention import Attention
|
8
|
+
|
9
|
+
|
10
|
+
class MultiHeadAttention(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, num_heads: int = 1, bias: bool = True, positional: bool = True,
|
12
|
+
proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
13
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
14
|
+
**kwargs):
|
15
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
16
|
+
device=device, dtype=dtype)
|
17
|
+
self._num_heads = num_heads
|
18
|
+
self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
|
19
|
+
device=self.device, dtype=self.dtype)
|
20
|
+
self.attn_heads = nn.ModuleList([Attention(feature_dim=feature_dim, bias=bias, positional=positional,
|
21
|
+
proj_layers=proj_layers, proj_expansion_factor=proj_expansion_factor,
|
22
|
+
dropout_rate=dropout_rate, device=self.device, dtype=self.dtype,
|
23
|
+
**kwargs) for _ in range(self._num_heads)])
|
24
|
+
self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
|
25
|
+
device=self.device, dtype=self.dtype)
|
26
|
+
|
27
|
+
@override
|
28
|
+
def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
|
29
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
30
|
+
y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
|
31
|
+
x, y = self.expand_proj(x), self.expand_proj(y)
|
32
|
+
x_heads, y_heads = x.split(self.in_features, dim=-1), y.split(self.in_features, dim=-1)
|
33
|
+
head_outs = [self.attn_heads[_](x=x_heads[_], y=y_heads[_], mask=mask) for _ in range(self._num_heads)]
|
34
|
+
return self.out_proj(torch.concat(head_outs, dim=-1))
|
@@ -4,23 +4,27 @@ import torch
|
|
4
4
|
from torch import nn
|
5
5
|
|
6
6
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
-
from deeplotx.nn import
|
7
|
+
from deeplotx.nn.feed_forward import FeedForward
|
8
8
|
|
9
9
|
|
10
10
|
class RecursiveSequential(BaseNeuralNetwork):
|
11
|
-
def __init__(self, input_dim: int, output_dim: int,
|
12
|
-
|
13
|
-
|
14
|
-
dtype: torch.dtype | None = None):
|
15
|
-
super().__init__(
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
|
12
|
+
recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
13
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
14
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
15
|
+
super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
|
16
|
+
device=device, dtype=dtype)
|
17
|
+
if recursive_hidden_dim is None:
|
18
|
+
recursive_hidden_dim = input_dim
|
19
|
+
self.lstm = nn.LSTM(input_size=input_dim, hidden_size=recursive_hidden_dim,
|
19
20
|
num_layers=recursive_layers, batch_first=True,
|
20
21
|
bias=True, bidirectional=True, device=self.device,
|
21
22
|
dtype=self.dtype)
|
22
|
-
self.
|
23
|
-
|
23
|
+
self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
|
24
|
+
expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
25
|
+
device=self.device, dtype=self.dtype)
|
26
|
+
self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
|
27
|
+
device=self.device, dtype=self.dtype)
|
24
28
|
|
25
29
|
def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
|
26
30
|
zeros = torch.zeros(self.lstm.num_layers * 2, batch_size, self.lstm.hidden_size, device=self.device, dtype=self.dtype)
|
@@ -32,7 +36,10 @@ class RecursiveSequential(BaseNeuralNetwork):
|
|
32
36
|
state = (self.ensure_device_and_dtype(state[0], device=self.device, dtype=self.dtype),
|
33
37
|
self.ensure_device_and_dtype(state[1], device=self.device, dtype=self.dtype))
|
34
38
|
x, (hidden_state, cell_state) = self.lstm(x, state)
|
35
|
-
x =
|
39
|
+
x = x[:, -1, :]
|
40
|
+
residual = x
|
41
|
+
x = self.ffn(x) + residual
|
42
|
+
x = self.__proj(x)
|
36
43
|
return x, (hidden_state, cell_state)
|
37
44
|
|
38
45
|
@override
|
@@ -0,0 +1,40 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
from deeplotx.nn.feed_forward import FeedForward
|
8
|
+
from deeplotx.nn.multi_head_attention import MultiHeadAttention
|
9
|
+
|
10
|
+
|
11
|
+
class RoFormerEncoder(BaseNeuralNetwork):
|
12
|
+
def __init__(self, feature_dim: int, attn_heads: int = 2, bias: bool = True,
|
13
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
14
|
+
dropout_rate: float = 0.02, model_name: str | None = None,
|
15
|
+
device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
|
16
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim,
|
17
|
+
model_name=model_name, device=device, dtype=dtype)
|
18
|
+
self.attn = MultiHeadAttention(feature_dim=feature_dim, num_heads=attn_heads,
|
19
|
+
bias=bias, positional=True,
|
20
|
+
proj_layers=kwargs.get('attn_ffn_layers', 1),
|
21
|
+
proj_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
|
22
|
+
dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
|
23
|
+
device=self.device, dtype=self.dtype, **kwargs)
|
24
|
+
self.ffn = FeedForward(feature_dim=feature_dim * 2, num_layers=ffn_layers,
|
25
|
+
expansion_factor=ffn_expansion_factor,
|
26
|
+
bias=bias, dropout_rate=dropout_rate,
|
27
|
+
device=self.device, dtype=self.dtype)
|
28
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=feature_dim, eps=1e-9,
|
29
|
+
device=self.device, dtype=self.dtype)
|
30
|
+
self.__proj = nn.Linear(in_features=feature_dim * 2, out_features=feature_dim,
|
31
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
32
|
+
|
33
|
+
@override
|
34
|
+
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
35
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
36
|
+
if mask is not None:
|
37
|
+
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
38
|
+
attn = self.attn(x=self.layer_norm(x), y=None, mask=mask)
|
39
|
+
x = torch.concat([attn, x], dim=-1)
|
40
|
+
return self.__proj(self.ffn(x))
|
deeplotx/nn/rope.py
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
6
|
+
|
7
|
+
DEFAULT_THETA = 10_000
|
8
|
+
|
9
|
+
|
10
|
+
class RoPE(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, theta: int = DEFAULT_THETA,
|
12
|
+
device: str | None = None, dtype: torch.dtype = torch.float32):
|
13
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=None,
|
14
|
+
device=device, dtype=dtype)
|
15
|
+
assert feature_dim % 2 == 0, f'feature_dim ({feature_dim}) is not divisible by 2.'
|
16
|
+
self._theta = theta
|
17
|
+
self._num_groups = feature_dim // 2
|
18
|
+
self._inv_freq = 1.0 / (theta ** (torch.arange(start=0, end=self._num_groups, step=1).float() / self._num_groups))
|
19
|
+
self.register_buffer('inv_freq', self._inv_freq)
|
20
|
+
|
21
|
+
@property
|
22
|
+
def dim(self):
|
23
|
+
return self._dim
|
24
|
+
|
25
|
+
@property
|
26
|
+
def theta(self):
|
27
|
+
return self._theta
|
28
|
+
|
29
|
+
def rotate_half(self, _t: torch.Tensor) -> torch.Tensor:
|
30
|
+
return torch.cat((- _t[..., self._num_groups:], _t[..., :self._num_groups]), dim=-1)
|
31
|
+
|
32
|
+
@override
|
33
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
34
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
35
|
+
*other_dims, seq_len, feature_dim = x.shape
|
36
|
+
assert feature_dim == self.in_features, f"feature_dim of x doesn't match with defined feature_dim {self.in_features}."
|
37
|
+
t = torch.arange(start=0, end=seq_len, step=1, device=self.device, dtype=self.dtype)
|
38
|
+
freq = torch.outer(t, self._inv_freq)
|
39
|
+
emb = torch.cat((freq, freq), dim=-1)
|
40
|
+
sin_emb, cos_emb = emb.sin(), emb.cos()
|
41
|
+
return x * cos_emb + self.rotate_half(x) * sin_emb
|
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
|
|
6
6
|
|
7
7
|
|
8
8
|
class SoftmaxRegression(LinearRegression):
|
9
|
-
def __init__(self, input_dim: int, output_dim: int,
|
9
|
+
def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
|
10
|
+
bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
|
10
11
|
device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
-
super().__init__(input_dim=input_dim, output_dim=output_dim,
|
12
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
|
13
|
+
expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
14
|
+
model_name=model_name, device=device, dtype=dtype)
|
12
15
|
|
13
16
|
@override
|
14
17
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
@@ -6,6 +6,7 @@ from torch import nn, optim
|
|
6
6
|
from torch.utils.data import DataLoader, TensorDataset
|
7
7
|
|
8
8
|
from deeplotx.encoder.long_text_encoder import LongTextEncoder
|
9
|
+
from deeplotx.nn.attention import DEFAULT_THETA
|
9
10
|
from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
|
10
11
|
from deeplotx.trainer.base_trainer import BaseTrainer
|
11
12
|
|
@@ -24,8 +25,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
24
25
|
def train(self, positive_texts: list[str], negative_texts: list[str],
|
25
26
|
num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
|
26
27
|
train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
|
27
|
-
alpha: float = 1e-4, rho: float = 0.2,
|
28
|
-
|
28
|
+
alpha: float = 1e-4, rho: float = 0.2, encoder_layers: int = 4, attn_heads: int = 6,
|
29
|
+
recursive_layers: int = 2, recursive_hidden_dim: int = 256, **kwargs) -> LongContextRecursiveSequential:
|
29
30
|
if balancing_dataset:
|
30
31
|
min_length = min(len(positive_texts), len(negative_texts))
|
31
32
|
positive_texts = positive_texts[:min_length]
|
@@ -44,15 +45,30 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
44
45
|
valid_dataset = TensorDataset(inputs[train_size:], labels[train_size:])
|
45
46
|
self.train_dataset_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
|
46
47
|
self.valid_dataset_loader = DataLoader(valid_dataset, batch_size=self._batch_size, shuffle=True)
|
47
|
-
|
48
|
-
if self.model is not None and self.model.fc1.in_features != feature_dim:
|
48
|
+
if self.model is not None and self.model.in_features != feature_dim:
|
49
49
|
logger.warning("The dimension of features doesn't match. A new model instance will be created.")
|
50
50
|
self.model = None
|
51
51
|
if self.model is None:
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
52
|
+
ffn_layers = kwargs.get('ffn_layers', 5)
|
53
|
+
ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
|
54
|
+
bias = kwargs.get('bias', True)
|
55
|
+
dropout_rate = kwargs.get('dropout_rate', 0.1)
|
56
|
+
encoder_ffn_layers = kwargs.get('encoder_ffn_layers', ffn_layers)
|
57
|
+
encoder_expansion_factor = kwargs.get('encoder_expansion_factor', ffn_expansion_factor)
|
58
|
+
encoder_dropout_rate = kwargs.get('encoder_dropout_rate', dropout_rate)
|
59
|
+
attn_ffn_layers = kwargs.get('attn_ffn_layers', 1)
|
60
|
+
attn_expansion_factor = kwargs.get('attn_expansion_factor', ffn_expansion_factor)
|
61
|
+
attn_dropout_rate = kwargs.get('attn_dropout_rate', dropout_rate)
|
62
|
+
theta = kwargs.get('theta', DEFAULT_THETA)
|
63
|
+
self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
|
64
|
+
encoder_layers=encoder_layers, attn_heads=attn_heads,
|
65
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
66
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
|
67
|
+
encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
|
68
|
+
encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
|
69
|
+
attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
|
70
|
+
theta=theta).initialize_weights()
|
71
|
+
logger.debug(f'Training Model: \n{self.model}')
|
56
72
|
loss_function = nn.BCELoss()
|
57
73
|
optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
|
58
74
|
for epoch in range(num_epochs):
|
deeplotx/util/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
from .hash import md5, sha1
|
1
|
+
from .hash import md5, sha1, sha256, sha512
|
2
2
|
from .read_file import read_file, get_files
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Easy-2-use long text NLP toolkit.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
|
|
13
13
|
Requires-Dist: torch
|
14
14
|
Requires-Dist: transformers
|
15
15
|
Requires-Dist: typing-extensions
|
16
|
-
Requires-Dist: vortezwohl>=0.0.
|
16
|
+
Requires-Dist: vortezwohl>=0.0.8
|
17
17
|
Dynamic: license-file
|
18
18
|
|
19
19
|
[](https://deepwiki.com/vortezwohl/DeepLoTX)
|
@@ -163,6 +163,8 @@ Dynamic: license-file
|
|
163
163
|
|
164
164
|
```python
|
165
165
|
from deeplotx import (
|
166
|
+
BaseNeuralNetwork, # 深度神经网络基类
|
167
|
+
FeedForward, # 前馈神经网络
|
166
168
|
LinearRegression, # 线性回归
|
167
169
|
LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
|
168
170
|
SoftmaxRegression, # Softmax 回归 / 多分类
|
@@ -181,38 +183,54 @@ Dynamic: license-file
|
|
181
183
|
|
182
184
|
import torch
|
183
185
|
from torch import nn
|
184
|
-
|
186
|
+
|
185
187
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
186
|
-
|
187
|
-
|
188
|
-
class
|
189
|
-
def __init__(self,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
self.
|
194
|
-
self.
|
195
|
-
|
196
|
-
self.
|
197
|
-
|
198
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
199
|
-
|
200
|
-
self.
|
201
|
-
|
202
|
-
|
188
|
+
|
189
|
+
|
190
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
191
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
192
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
193
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
194
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
195
|
+
self._dropout_rate = dropout_rate
|
196
|
+
self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
|
197
|
+
device=self.device, dtype=self.dtype)
|
198
|
+
self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
|
199
|
+
device=self.device, dtype=self.dtype)
|
200
|
+
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
201
|
+
device=self.device, dtype=self.dtype)
|
202
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
|
203
|
+
device=self.device, dtype=self.dtype)
|
204
|
+
|
203
205
|
@override
|
204
|
-
def forward(self, x) -> torch.Tensor:
|
206
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
205
207
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
206
|
-
|
207
|
-
x =
|
208
|
-
x =
|
209
|
-
x = self.
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
208
|
+
residual = x
|
209
|
+
x = self.layer_norm(x)
|
210
|
+
x = self.fc1(x)
|
211
|
+
x = self.parametric_relu_1(x)
|
212
|
+
if self._dropout_rate > .0:
|
213
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
214
|
+
return self.fc2(x) + residual
|
215
|
+
|
216
|
+
|
217
|
+
class FeedForward(BaseNeuralNetwork):
|
218
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
219
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
220
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
221
|
+
if num_layers < 1:
|
222
|
+
raise ValueError('num_layers cannot be less than 1.')
|
223
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
224
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
225
|
+
expansion_factor=expansion_factor, bias=bias,
|
226
|
+
dropout_rate=dropout_rate,
|
227
|
+
device=self.device, dtype=self.dtype)] * num_layers)
|
228
|
+
|
229
|
+
@override
|
230
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
231
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
232
|
+
for ffn in self.ffn_layers:
|
233
|
+
x = ffn(x)
|
216
234
|
return x
|
217
235
|
```
|
218
236
|
|
@@ -222,29 +240,34 @@ Dynamic: license-file
|
|
222
240
|
from typing_extensions import override
|
223
241
|
|
224
242
|
import torch
|
225
|
-
from torch import nn, softmax
|
226
243
|
|
227
244
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
245
|
+
from deeplotx.nn.feed_forward import FeedForward
|
228
246
|
|
229
247
|
|
230
248
|
class SelfAttention(BaseNeuralNetwork):
|
231
|
-
def __init__(self, feature_dim: int,
|
232
|
-
|
233
|
-
|
249
|
+
def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
|
250
|
+
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
251
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
252
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
253
|
+
device=device, dtype=dtype)
|
234
254
|
self._feature_dim = feature_dim
|
235
|
-
self.q_proj =
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
bias=
|
255
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
256
|
+
expansion_factor=proj_expansion_factor,
|
257
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
258
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
259
|
+
expansion_factor=proj_expansion_factor,
|
260
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
261
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
262
|
+
expansion_factor=proj_expansion_factor,
|
263
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
241
264
|
|
242
265
|
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
243
266
|
q, k = self.q_proj(x), self.k_proj(x)
|
244
267
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
245
268
|
attn = attn / (self._feature_dim ** 0.5)
|
246
269
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
247
|
-
return softmax(attn, dim=-1)
|
270
|
+
return torch.softmax(attn, dim=-1)
|
248
271
|
|
249
272
|
@override
|
250
273
|
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
@@ -0,0 +1,34 @@
|
|
1
|
+
deeplotx/__init__.py,sha256=oNeA-vNu5YGiEQg0IcpKEdGh_Y_2uPvo2nqaNL_Zgv8,1159
|
2
|
+
deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
|
3
|
+
deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
|
4
|
+
deeplotx/encoder/long_text_encoder.py,sha256=PFR6jjGyg1N58TQlKsPaNQEd-EDl13Hyhu7A1KtGBbA,3743
|
5
|
+
deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
|
6
|
+
deeplotx/nn/__init__.py,sha256=01I_yqx9GTa4wy3uNyAqhtxp66tDqxgMLC4Ky5Vnkrg,651
|
7
|
+
deeplotx/nn/attention.py,sha256=HZ4nfFtkk7AnJ9nuoDSK6zIlIhZ_hbpZc3o6JQIBqJ8,2861
|
8
|
+
deeplotx/nn/auto_regression.py,sha256=uISx29t_zkDGS8s2wvGB6wOGYZitQ4hQ7wyoQl4lcqY,857
|
9
|
+
deeplotx/nn/base_neural_network.py,sha256=FjQEDFH810fJS7JV3aLgJZnaMqC6DH--wlBvuj-ghTc,5900
|
10
|
+
deeplotx/nn/feed_forward.py,sha256=4ozj7EDalO9pb6JUhZtsJqE0r8bIHFApHRt2zTrl4ho,2931
|
11
|
+
deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
|
12
|
+
deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
|
13
|
+
deeplotx/nn/long_context_auto_regression.py,sha256=uy0k_g8wEfMH5nd5HCfrHA8dgEsuWBA2x8U-g3h4vQc,1054
|
14
|
+
deeplotx/nn/long_context_recursive_sequential.py,sha256=i7kUml9RV_mkLRJ114UHsj9Gxw7LzJVQ4z8-REHa8-w,2682
|
15
|
+
deeplotx/nn/multi_head_attention.py,sha256=3z73uGbvy3jszRy1B9nxGOJjlttHpcpRF8Qd09OEams,2267
|
16
|
+
deeplotx/nn/recursive_sequential.py,sha256=8Z8vT70xTygusL-3w3QlB_B_k0xQSUU2ZTgC1LhEmzQ,2805
|
17
|
+
deeplotx/nn/roformer_encoder.py,sha256=UJjKniNdMd0rfoYQcsX6bPo6Ceq_Z6EhwHe2kgqWC_k,2426
|
18
|
+
deeplotx/nn/rope.py,sha256=r3hfENCxJv-td55L0CBfF8MkhEPd9V1vU_U6pDfCfr0,1754
|
19
|
+
deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
|
20
|
+
deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
|
21
|
+
deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
|
22
|
+
deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
|
23
|
+
deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
|
24
|
+
deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
|
25
|
+
deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
|
26
|
+
deeplotx/trainer/text_binary_classification_trainer.py,sha256=QMLR4cC8NCUP-v7SOYVtCykNwahENmWHv9adaeTbYmA,6528
|
27
|
+
deeplotx/util/__init__.py,sha256=5CH4MTeSgsmCe3LPMfvKoSBpwh6jDSBuHVElJvzQzgs,90
|
28
|
+
deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
|
29
|
+
deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
|
30
|
+
deeplotx-0.8.0.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
|
31
|
+
deeplotx-0.8.0.dist-info/METADATA,sha256=KprDhH6R0zsqk6tPUoC9FpWeljaaJTaTsYm2Au0qQwY,12251
|
32
|
+
deeplotx-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
33
|
+
deeplotx-0.8.0.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
|
34
|
+
deeplotx-0.8.0.dist-info/RECORD,,
|
deeplotx/nn/self_attention.py
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
from typing_extensions import override
|
2
|
-
|
3
|
-
import torch
|
4
|
-
from torch import nn, softmax
|
5
|
-
|
6
|
-
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
-
|
8
|
-
|
9
|
-
class SelfAttention(BaseNeuralNetwork):
|
10
|
-
def __init__(self, feature_dim: int, model_name: str | None = None,
|
11
|
-
device: str | None = None, dtype: torch.dtype | None = None):
|
12
|
-
super().__init__(model_name=model_name, device=device, dtype=dtype)
|
13
|
-
self._feature_dim = feature_dim
|
14
|
-
self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
|
15
|
-
bias=True, device=self.device, dtype=self.dtype)
|
16
|
-
self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
|
17
|
-
bias=True, device=self.device, dtype=self.dtype)
|
18
|
-
self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
|
19
|
-
bias=True, device=self.device, dtype=self.dtype)
|
20
|
-
|
21
|
-
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
22
|
-
q, k = self.q_proj(x), self.k_proj(x)
|
23
|
-
attn = torch.matmul(q, k.transpose(-2, -1))
|
24
|
-
attn = attn / (self._feature_dim ** 0.5)
|
25
|
-
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
26
|
-
return softmax(attn, dim=-1)
|
27
|
-
|
28
|
-
@override
|
29
|
-
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
30
|
-
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
31
|
-
if mask is not None:
|
32
|
-
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
33
|
-
v = self.v_proj(x)
|
34
|
-
return torch.matmul(self._attention(x, mask), v)
|
deeplotx-0.5.6.dist-info/RECORD
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
deeplotx/__init__.py,sha256=6El66QXHDrgNMsNIG9bG97WO8BhPK5btXbTikzx2ce4,1087
|
2
|
-
deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
|
3
|
-
deeplotx/encoder/encoder.py,sha256=2e1ZnZ37PkFQ5BePndmq42xmHp8YZh65Q1bd0dxejPI,2417
|
4
|
-
deeplotx/encoder/long_text_encoder.py,sha256=4445FdVwubvDiebCWoT9wAUpYlMj6Mmd0OBxbFZ3ZIo,3565
|
5
|
-
deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
|
6
|
-
deeplotx/nn/__init__.py,sha256=CS0UwyYKa8wI6vu6FBIYxvm-HAmw39MTMFlZDtqi6UA,444
|
7
|
-
deeplotx/nn/auto_regression.py,sha256=7P63opWCWMqE2DigwbsL6kfXtFtJPz00Yo1RqflBz4A,572
|
8
|
-
deeplotx/nn/base_neural_network.py,sha256=o9s0NqxkDcFZdipX8UrlbBmwYHOg7wPmzbjBEeGw63s,2902
|
9
|
-
deeplotx/nn/linear_regression.py,sha256=7TbbplBgY70b1l5lKvTJMzDWQ8khQfnRCyMjObhVdEc,2180
|
10
|
-
deeplotx/nn/logistic_regression.py,sha256=YiSLAon8gLDtMXAkPQ210sauod24eyJYYH50fPhj6T8,667
|
11
|
-
deeplotx/nn/long_context_auto_regression.py,sha256=Z67Enq1kc1bERIrQW4jHeDQQmisOXhhjrtaPklnHkyw,605
|
12
|
-
deeplotx/nn/long_context_recursive_sequential.py,sha256=_fKpPA7wt6B0kPyyig4xuhmLxygK19FSLgxW1Xa453M,1487
|
13
|
-
deeplotx/nn/recursive_sequential.py,sha256=8YHZ-IdLyMJN5QVWPMuizDxLodAE9Bgdg1_YtIxFw7o,2247
|
14
|
-
deeplotx/nn/self_attention.py,sha256=fb34wXnfgAGYJEhqa1l9AxMa-AHcCTOLbUlAfaGIK7Q,1766
|
15
|
-
deeplotx/nn/softmax_regression.py,sha256=BeVk0G2H3zKG6bsQgPRNWuTxnnNmVI2zFZtCHgARAAc,688
|
16
|
-
deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
|
17
|
-
deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
|
18
|
-
deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
|
19
|
-
deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
|
20
|
-
deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
|
21
|
-
deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
|
22
|
-
deeplotx/trainer/text_binary_classification_trainer.py,sha256=umuvikc09Op4SB43EqmYo8W3ung8DBjEOrMG3hCVFz8,4915
|
23
|
-
deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
|
24
|
-
deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
|
25
|
-
deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
|
26
|
-
deeplotx-0.5.6.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
|
27
|
-
deeplotx-0.5.6.dist-info/METADATA,sha256=vBUVgshgGG_vZmJT07C7CPEhMfBUmwbCtsIY06D_14g,10925
|
28
|
-
deeplotx-0.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
29
|
-
deeplotx-0.5.6.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
|
30
|
-
deeplotx-0.5.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|