deeplotx 0.6.1__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deeplotx-0.6.1 → deeplotx-0.8.1}/PKG-INFO +2 -2
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/__init__.py +4 -2
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/long_text_encoder.py +1 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/__init__.py +4 -1
- deeplotx-0.6.1/deeplotx/nn/self_attention.py → deeplotx-0.8.1/deeplotx/nn/attention.py +18 -9
- deeplotx-0.8.1/deeplotx/nn/auto_regression.py +14 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/base_neural_network.py +61 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/feed_forward.py +11 -11
- deeplotx-0.8.1/deeplotx/nn/long_context_auto_regression.py +16 -0
- deeplotx-0.8.1/deeplotx/nn/long_context_recursive_sequential.py +36 -0
- deeplotx-0.8.1/deeplotx/nn/multi_head_attention.py +34 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/recursive_sequential.py +13 -13
- deeplotx-0.8.1/deeplotx/nn/roformer_encoder.py +40 -0
- deeplotx-0.8.1/deeplotx/nn/rope.py +42 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/trainer/text_binary_classification_trainer.py +21 -17
- deeplotx-0.8.1/deeplotx/util/__init__.py +2 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/PKG-INFO +2 -2
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/SOURCES.txt +4 -1
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/requires.txt +1 -1
- {deeplotx-0.6.1 → deeplotx-0.8.1}/pyproject.toml +2 -2
- deeplotx-0.6.1/deeplotx/nn/auto_regression.py +0 -15
- deeplotx-0.6.1/deeplotx/nn/long_context_auto_regression.py +0 -15
- deeplotx-0.6.1/deeplotx/nn/long_context_recursive_sequential.py +0 -34
- deeplotx-0.6.1/deeplotx/util/__init__.py +0 -2
- {deeplotx-0.6.1 → deeplotx-0.8.1}/LICENSE +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/README.md +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/__init__.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/encoder.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/encoder/longformer_encoder.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/linear_regression.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/logistic_regression.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/nn/softmax_regression.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/__init__.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/distribution.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/set.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/similarity/vector.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/trainer/__init__.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/trainer/base_trainer.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/util/hash.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx/util/read_file.py +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/dependency_links.txt +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/deeplotx.egg-info/top_level.txt +0 -0
- {deeplotx-0.6.1 → deeplotx-0.8.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.1
|
4
4
|
Summary: Easy-2-use long text NLP toolkit.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
|
|
13
13
|
Requires-Dist: torch
|
14
14
|
Requires-Dist: transformers
|
15
15
|
Requires-Dist: typing-extensions
|
16
|
-
Requires-Dist: vortezwohl>=0.0.
|
16
|
+
Requires-Dist: vortezwohl>=0.0.8
|
17
17
|
Dynamic: license-file
|
18
18
|
|
19
19
|
[](https://deepwiki.com/vortezwohl/DeepLoTX)
|
@@ -5,14 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
|
|
5
5
|
|
6
6
|
from .encoder import Encoder, LongTextEncoder, LongformerEncoder
|
7
7
|
from .nn import (
|
8
|
-
BaseNeuralNetwork,
|
9
8
|
FeedForward,
|
10
9
|
LinearRegression,
|
11
10
|
LogisticRegression,
|
12
11
|
SoftmaxRegression,
|
13
12
|
RecursiveSequential,
|
14
13
|
LongContextRecursiveSequential,
|
15
|
-
|
14
|
+
RoPE,
|
15
|
+
Attention,
|
16
|
+
MultiHeadAttention,
|
17
|
+
RoFormerEncoder,
|
16
18
|
AutoRegression,
|
17
19
|
LongContextAutoRegression
|
18
20
|
)
|
@@ -17,6 +17,7 @@ class LongTextEncoder(Encoder):
|
|
17
17
|
overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
|
18
18
|
cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
|
19
19
|
super().__init__(model_name_or_path=model_name_or_path, device=device)
|
20
|
+
assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
|
20
21
|
self._max_length = max_length
|
21
22
|
self._chunk_size = chunk_size
|
22
23
|
self._overlapping = overlapping
|
@@ -5,6 +5,9 @@ from .logistic_regression import LogisticRegression
|
|
5
5
|
from .softmax_regression import SoftmaxRegression
|
6
6
|
from .recursive_sequential import RecursiveSequential
|
7
7
|
from .long_context_recursive_sequential import LongContextRecursiveSequential
|
8
|
-
from .
|
8
|
+
from .rope import RoPE
|
9
|
+
from .attention import Attention
|
10
|
+
from .multi_head_attention import MultiHeadAttention
|
11
|
+
from .roformer_encoder import RoFormerEncoder
|
9
12
|
from .auto_regression import AutoRegression
|
10
13
|
from .long_context_auto_regression import LongContextAutoRegression
|
@@ -4,14 +4,17 @@ import torch
|
|
4
4
|
|
5
5
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
6
6
|
from deeplotx.nn.feed_forward import FeedForward
|
7
|
+
from deeplotx.nn.rope import RoPE, DEFAULT_THETA
|
7
8
|
|
8
9
|
|
9
|
-
class
|
10
|
-
def __init__(self, feature_dim: int, bias: bool = True,
|
11
|
-
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
12
|
-
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None
|
10
|
+
class Attention(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
|
12
|
+
proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
13
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
14
|
+
**kwargs):
|
13
15
|
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
14
16
|
device=device, dtype=dtype)
|
17
|
+
self._positional = positional
|
15
18
|
self._feature_dim = feature_dim
|
16
19
|
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
17
20
|
expansion_factor=proj_expansion_factor,
|
@@ -22,18 +25,24 @@ class SelfAttention(BaseNeuralNetwork):
|
|
22
25
|
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
23
26
|
expansion_factor=proj_expansion_factor,
|
24
27
|
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
28
|
+
if self._positional:
|
29
|
+
self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
|
30
|
+
device=self.device, dtype=self.dtype)
|
25
31
|
|
26
|
-
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
27
|
-
q, k = self.q_proj(x), self.k_proj(
|
32
|
+
def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
33
|
+
q, k = self.q_proj(x), self.k_proj(y)
|
34
|
+
if self._positional:
|
35
|
+
q, k = self.rope(q), self.rope(k)
|
28
36
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
29
37
|
attn = attn / (self._feature_dim ** 0.5)
|
30
38
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
31
39
|
return torch.softmax(attn, dim=-1)
|
32
40
|
|
33
41
|
@override
|
34
|
-
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
42
|
+
def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
|
35
43
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
44
|
+
y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
|
36
45
|
if mask is not None:
|
37
46
|
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
38
|
-
v = self.v_proj(
|
39
|
-
return torch.matmul(self._attention(x, mask), v)
|
47
|
+
v = self.v_proj(y)
|
48
|
+
return torch.matmul(self._attention(x, y, mask), v)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
import torch
|
2
|
+
|
3
|
+
from deeplotx.nn import RecursiveSequential
|
4
|
+
|
5
|
+
|
6
|
+
class AutoRegression(RecursiveSequential):
|
7
|
+
def __init__(self, feature_dim: int, bias: bool = True,
|
8
|
+
recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
9
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
10
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
+
super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
|
12
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
13
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
+
dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
|
@@ -3,6 +3,7 @@ from abc import abstractmethod
|
|
3
3
|
|
4
4
|
import torch
|
5
5
|
from torch import nn
|
6
|
+
from torch.nn import init
|
6
7
|
|
7
8
|
DEFAULT_SUFFIX = 'dlx'
|
8
9
|
|
@@ -36,6 +37,44 @@ class BaseNeuralNetwork(nn.Module):
|
|
36
37
|
x = x.to(dtype)
|
37
38
|
return x
|
38
39
|
|
40
|
+
def initialize_weights(self):
|
41
|
+
for m in self.modules():
|
42
|
+
match m.__class__:
|
43
|
+
case nn.Linear:
|
44
|
+
init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
|
45
|
+
if m.bias is not None:
|
46
|
+
init.constant_(m.bias, 0)
|
47
|
+
case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
|
48
|
+
init.constant_(m.weight, 1)
|
49
|
+
init.constant_(m.bias, 0)
|
50
|
+
case nn.LSTM | nn.GRU:
|
51
|
+
for name, param in m.named_parameters():
|
52
|
+
_tmp_name = name.lower()
|
53
|
+
if 'weight_ih' in _tmp_name:
|
54
|
+
init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
|
55
|
+
elif 'weight_hh' in _tmp_name:
|
56
|
+
init.orthogonal_(param)
|
57
|
+
elif 'bias' in _tmp_name:
|
58
|
+
init.constant_(param, 0)
|
59
|
+
case _:
|
60
|
+
pass
|
61
|
+
return self
|
62
|
+
|
63
|
+
def size(self) -> dict:
|
64
|
+
total_params = trainable_params = non_trainable_params = 0
|
65
|
+
for param in self.parameters():
|
66
|
+
params = param.numel()
|
67
|
+
total_params += params
|
68
|
+
if param.requires_grad:
|
69
|
+
trainable_params += params
|
70
|
+
else:
|
71
|
+
non_trainable_params += params
|
72
|
+
return {
|
73
|
+
'total': total_params,
|
74
|
+
'trainable': trainable_params,
|
75
|
+
'non_trainable': non_trainable_params
|
76
|
+
}
|
77
|
+
|
39
78
|
def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
|
40
79
|
def _l1() -> torch.Tensor:
|
41
80
|
l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
|
@@ -77,3 +116,25 @@ class BaseNeuralNetwork(nn.Module):
|
|
77
116
|
model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
|
78
117
|
self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
|
79
118
|
return self
|
119
|
+
|
120
|
+
def __str__(self):
|
121
|
+
formatted = super().__str__()
|
122
|
+
_line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
|
123
|
+
_splitter_1 = '=' * (_line_len + 10)
|
124
|
+
_splitter_2 = '-' * (_line_len + 10)
|
125
|
+
_size = self.size()
|
126
|
+
total_param = _size['total']
|
127
|
+
trainable_param = _size['trainable']
|
128
|
+
non_trainable_param = _size['non_trainable']
|
129
|
+
formatted = (f'{_splitter_1}\n'
|
130
|
+
f'Model_Name: {self._model_name}\n'
|
131
|
+
f'In_Features: {self.in_features}\n'
|
132
|
+
f'Out_Features: {self.out_features}\n'
|
133
|
+
f'Device: {self.device}\n'
|
134
|
+
f'Dtype: {self.dtype}\n'
|
135
|
+
f'Total_Parameters: {total_param}\n'
|
136
|
+
f'Trainable_Parameters: {trainable_param}\n'
|
137
|
+
f'NonTrainable_Parameters: {non_trainable_param}\n'
|
138
|
+
f'{_splitter_2}'
|
139
|
+
f'\n{formatted}\n{_splitter_1}')
|
140
|
+
return formatted
|
@@ -12,13 +12,13 @@ class FeedForwardUnit(BaseNeuralNetwork):
|
|
12
12
|
device: str | None = None, dtype: torch.dtype | None = None):
|
13
13
|
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
14
14
|
self._dropout_rate = dropout_rate
|
15
|
-
self.
|
16
|
-
|
17
|
-
self.
|
18
|
-
|
19
|
-
self.
|
20
|
-
|
21
|
-
self.layer_norm = nn.LayerNorm(normalized_shape=self.
|
15
|
+
self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
|
16
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
17
|
+
self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
|
18
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
19
|
+
self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
|
20
|
+
device=self.device, dtype=self.dtype)
|
21
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
|
22
22
|
device=self.device, dtype=self.dtype)
|
23
23
|
|
24
24
|
@override
|
@@ -26,11 +26,11 @@ class FeedForwardUnit(BaseNeuralNetwork):
|
|
26
26
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
27
27
|
residual = x
|
28
28
|
x = self.layer_norm(x)
|
29
|
-
x = self.
|
30
|
-
x = self.
|
29
|
+
x = self.up_proj(x)
|
30
|
+
x = self.parametric_relu(x)
|
31
31
|
if self._dropout_rate > .0:
|
32
32
|
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
33
|
-
return self.
|
33
|
+
return self.down_proj(x) + residual
|
34
34
|
|
35
35
|
|
36
36
|
class FeedForward(BaseNeuralNetwork):
|
@@ -43,7 +43,7 @@ class FeedForward(BaseNeuralNetwork):
|
|
43
43
|
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
44
44
|
expansion_factor=expansion_factor, bias=bias,
|
45
45
|
dropout_rate=dropout_rate,
|
46
|
-
device=self.device, dtype=self.dtype)
|
46
|
+
device=self.device, dtype=self.dtype) for _ in range(num_layers)])
|
47
47
|
|
48
48
|
@override
|
49
49
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import torch
|
2
|
+
|
3
|
+
from deeplotx.nn import LongContextRecursiveSequential
|
4
|
+
|
5
|
+
|
6
|
+
class LongContextAutoRegression(LongContextRecursiveSequential):
|
7
|
+
def __init__(self, feature_dim: int, bias: bool = True,
|
8
|
+
encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
9
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
10
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
11
|
+
**kwargs):
|
12
|
+
super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
|
13
|
+
encoder_layers=encoder_layers, attn_heads=attn_heads,
|
14
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
15
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
|
16
|
+
model_name=model_name, device=device, dtype=dtype, **kwargs)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.attention import DEFAULT_THETA
|
7
|
+
from deeplotx.nn.recursive_sequential import RecursiveSequential
|
8
|
+
from deeplotx.nn.roformer_encoder import RoFormerEncoder
|
9
|
+
|
10
|
+
|
11
|
+
class LongContextRecursiveSequential(RecursiveSequential):
|
12
|
+
def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
|
13
|
+
encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
|
14
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
15
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
16
|
+
**kwargs):
|
17
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
|
18
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
19
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
|
20
|
+
model_name=model_name, device=device, dtype=dtype)
|
21
|
+
self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
|
22
|
+
ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
|
23
|
+
ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
|
24
|
+
dropout_rate=kwargs.get('encoder_dropout_rate', dropout_rate),
|
25
|
+
attn_ffn_layers=kwargs.get('attn_ffn_layers', 1),
|
26
|
+
attn_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
|
27
|
+
attn_dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
|
28
|
+
theta=kwargs.get('theta', DEFAULT_THETA),
|
29
|
+
device=self.device, dtype=self.dtype) for _ in range(encoder_layers)])
|
30
|
+
|
31
|
+
@override
|
32
|
+
def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
|
33
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
34
|
+
for roformer_encoder in self.roformer_encoders:
|
35
|
+
x = roformer_encoder(x)
|
36
|
+
return super().forward(x, state)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
from deeplotx.nn.attention import Attention
|
8
|
+
|
9
|
+
|
10
|
+
class MultiHeadAttention(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, num_heads: int = 1, bias: bool = True, positional: bool = True,
|
12
|
+
proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
13
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
14
|
+
**kwargs):
|
15
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
16
|
+
device=device, dtype=dtype)
|
17
|
+
self._num_heads = num_heads
|
18
|
+
self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
|
19
|
+
device=self.device, dtype=self.dtype)
|
20
|
+
self.attn_heads = nn.ModuleList([Attention(feature_dim=feature_dim, bias=bias, positional=positional,
|
21
|
+
proj_layers=proj_layers, proj_expansion_factor=proj_expansion_factor,
|
22
|
+
dropout_rate=dropout_rate, device=self.device, dtype=self.dtype,
|
23
|
+
**kwargs) for _ in range(self._num_heads)])
|
24
|
+
self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
|
25
|
+
device=self.device, dtype=self.dtype)
|
26
|
+
|
27
|
+
@override
|
28
|
+
def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
|
29
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
30
|
+
y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
|
31
|
+
x, y = self.expand_proj(x), self.expand_proj(y)
|
32
|
+
x_heads, y_heads = x.split(self.in_features, dim=-1), y.split(self.in_features, dim=-1)
|
33
|
+
head_outs = [self.attn_heads[_](x=x_heads[_], y=y_heads[_], mask=mask) for _ in range(self._num_heads)]
|
34
|
+
return self.out_proj(torch.concat(head_outs, dim=-1))
|
@@ -8,22 +8,22 @@ from deeplotx.nn.feed_forward import FeedForward
|
|
8
8
|
|
9
9
|
|
10
10
|
class RecursiveSequential(BaseNeuralNetwork):
|
11
|
-
def __init__(self, input_dim: int, output_dim: int,
|
12
|
-
|
13
|
-
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
|
11
|
+
def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
|
12
|
+
recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
13
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
14
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
15
|
+
super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
|
16
|
+
device=device, dtype=dtype)
|
17
|
+
if recursive_hidden_dim is None:
|
18
|
+
recursive_hidden_dim = input_dim
|
19
|
+
self.lstm = nn.LSTM(input_size=input_dim, hidden_size=recursive_hidden_dim,
|
21
20
|
num_layers=recursive_layers, batch_first=True,
|
22
21
|
bias=True, bidirectional=True, device=self.device,
|
23
22
|
dtype=self.dtype)
|
24
|
-
self.ffn = FeedForward(feature_dim=
|
25
|
-
|
26
|
-
|
23
|
+
self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
|
24
|
+
expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
25
|
+
device=self.device, dtype=self.dtype)
|
26
|
+
self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
|
27
27
|
device=self.device, dtype=self.dtype)
|
28
28
|
|
29
29
|
def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
|
@@ -0,0 +1,40 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
from deeplotx.nn.feed_forward import FeedForward
|
8
|
+
from deeplotx.nn.multi_head_attention import MultiHeadAttention
|
9
|
+
|
10
|
+
|
11
|
+
class RoFormerEncoder(BaseNeuralNetwork):
|
12
|
+
def __init__(self, feature_dim: int, attn_heads: int = 2, bias: bool = True,
|
13
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
14
|
+
dropout_rate: float = 0.02, model_name: str | None = None,
|
15
|
+
device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
|
16
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim,
|
17
|
+
model_name=model_name, device=device, dtype=dtype)
|
18
|
+
self.attn = MultiHeadAttention(feature_dim=feature_dim, num_heads=attn_heads,
|
19
|
+
bias=bias, positional=True,
|
20
|
+
proj_layers=kwargs.get('attn_ffn_layers', 1),
|
21
|
+
proj_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
|
22
|
+
dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
|
23
|
+
device=self.device, dtype=self.dtype, **kwargs)
|
24
|
+
self.ffn = FeedForward(feature_dim=feature_dim * 2, num_layers=ffn_layers,
|
25
|
+
expansion_factor=ffn_expansion_factor,
|
26
|
+
bias=bias, dropout_rate=dropout_rate,
|
27
|
+
device=self.device, dtype=self.dtype)
|
28
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=feature_dim, eps=1e-9,
|
29
|
+
device=self.device, dtype=self.dtype)
|
30
|
+
self.__proj = nn.Linear(in_features=feature_dim * 2, out_features=feature_dim,
|
31
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
32
|
+
|
33
|
+
@override
|
34
|
+
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
35
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
36
|
+
if mask is not None:
|
37
|
+
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
38
|
+
attn = self.attn(x=self.layer_norm(x), y=None, mask=mask)
|
39
|
+
x = torch.concat([attn, x], dim=-1)
|
40
|
+
return self.__proj(self.ffn(x))
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
6
|
+
|
7
|
+
DEFAULT_THETA = 10_000
|
8
|
+
|
9
|
+
|
10
|
+
class RoPE(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, theta: int = DEFAULT_THETA,
|
12
|
+
device: str | None = None, dtype: torch.dtype = torch.float32):
|
13
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=None,
|
14
|
+
device=device, dtype=dtype)
|
15
|
+
assert feature_dim % 2 == 0, f'feature_dim ({feature_dim}) is not divisible by 2.'
|
16
|
+
self._theta = theta
|
17
|
+
self._num_groups = feature_dim // 2
|
18
|
+
self._inv_freq = 1.0 / (theta ** (torch.arange(start=0, end=self._num_groups, step=1,
|
19
|
+
device=self.device, dtype=self.dtype).float() / self._num_groups))
|
20
|
+
self.register_buffer('inv_freq', self._inv_freq)
|
21
|
+
|
22
|
+
@property
|
23
|
+
def dim(self):
|
24
|
+
return self._dim
|
25
|
+
|
26
|
+
@property
|
27
|
+
def theta(self):
|
28
|
+
return self._theta
|
29
|
+
|
30
|
+
def rotate_half(self, _t: torch.Tensor) -> torch.Tensor:
|
31
|
+
return torch.cat((- _t[..., self._num_groups:], _t[..., :self._num_groups]), dim=-1)
|
32
|
+
|
33
|
+
@override
|
34
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
35
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
36
|
+
*other_dims, seq_len, feature_dim = x.shape
|
37
|
+
assert feature_dim == self.in_features, f"feature_dim of x doesn't match with defined feature_dim {self.in_features}."
|
38
|
+
t = torch.arange(start=0, end=seq_len, step=1, device=self.device, dtype=self.dtype)
|
39
|
+
freq = torch.outer(t, self._inv_freq)
|
40
|
+
emb = torch.cat((freq, freq), dim=-1)
|
41
|
+
sin_emb, cos_emb = emb.sin(), emb.cos()
|
42
|
+
return x * cos_emb + self.rotate_half(x) * sin_emb
|
@@ -6,6 +6,7 @@ from torch import nn, optim
|
|
6
6
|
from torch.utils.data import DataLoader, TensorDataset
|
7
7
|
|
8
8
|
from deeplotx.encoder.long_text_encoder import LongTextEncoder
|
9
|
+
from deeplotx.nn.attention import DEFAULT_THETA
|
9
10
|
from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
|
10
11
|
from deeplotx.trainer.base_trainer import BaseTrainer
|
11
12
|
|
@@ -24,8 +25,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
24
25
|
def train(self, positive_texts: list[str], negative_texts: list[str],
|
25
26
|
num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
|
26
27
|
train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
|
27
|
-
alpha: float = 1e-4, rho: float = 0.2,
|
28
|
-
|
28
|
+
alpha: float = 1e-4, rho: float = 0.2, encoder_layers: int = 4, attn_heads: int = 6,
|
29
|
+
recursive_layers: int = 2, recursive_hidden_dim: int = 256, **kwargs) -> LongContextRecursiveSequential:
|
29
30
|
if balancing_dataset:
|
30
31
|
min_length = min(len(positive_texts), len(negative_texts))
|
31
32
|
positive_texts = positive_texts[:min_length]
|
@@ -50,21 +51,24 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
50
51
|
if self.model is None:
|
51
52
|
ffn_layers = kwargs.get('ffn_layers', 5)
|
52
53
|
ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
54
|
+
bias = kwargs.get('bias', True)
|
55
|
+
dropout_rate = kwargs.get('dropout_rate', 0.1)
|
56
|
+
encoder_ffn_layers = kwargs.get('encoder_ffn_layers', ffn_layers)
|
57
|
+
encoder_expansion_factor = kwargs.get('encoder_expansion_factor', ffn_expansion_factor)
|
58
|
+
encoder_dropout_rate = kwargs.get('encoder_dropout_rate', dropout_rate)
|
59
|
+
attn_ffn_layers = kwargs.get('attn_ffn_layers', 1)
|
60
|
+
attn_expansion_factor = kwargs.get('attn_expansion_factor', ffn_expansion_factor)
|
61
|
+
attn_dropout_rate = kwargs.get('attn_dropout_rate', dropout_rate)
|
62
|
+
theta = kwargs.get('theta', DEFAULT_THETA)
|
63
|
+
self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
|
64
|
+
encoder_layers=encoder_layers, attn_heads=attn_heads,
|
65
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
66
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
|
67
|
+
encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
|
68
|
+
encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
|
69
|
+
attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
|
70
|
+
theta=theta).initialize_weights()
|
71
|
+
logger.debug(f'Training Model: \n{self.model}')
|
68
72
|
loss_function = nn.BCELoss()
|
69
73
|
optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
|
70
74
|
for epoch in range(num_epochs):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.1
|
4
4
|
Summary: Easy-2-use long text NLP toolkit.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
|
|
13
13
|
Requires-Dist: torch
|
14
14
|
Requires-Dist: transformers
|
15
15
|
Requires-Dist: typing-extensions
|
16
|
-
Requires-Dist: vortezwohl>=0.0.
|
16
|
+
Requires-Dist: vortezwohl>=0.0.8
|
17
17
|
Dynamic: license-file
|
18
18
|
|
19
19
|
[](https://deepwiki.com/vortezwohl/DeepLoTX)
|
@@ -12,6 +12,7 @@ deeplotx/encoder/encoder.py
|
|
12
12
|
deeplotx/encoder/long_text_encoder.py
|
13
13
|
deeplotx/encoder/longformer_encoder.py
|
14
14
|
deeplotx/nn/__init__.py
|
15
|
+
deeplotx/nn/attention.py
|
15
16
|
deeplotx/nn/auto_regression.py
|
16
17
|
deeplotx/nn/base_neural_network.py
|
17
18
|
deeplotx/nn/feed_forward.py
|
@@ -19,8 +20,10 @@ deeplotx/nn/linear_regression.py
|
|
19
20
|
deeplotx/nn/logistic_regression.py
|
20
21
|
deeplotx/nn/long_context_auto_regression.py
|
21
22
|
deeplotx/nn/long_context_recursive_sequential.py
|
23
|
+
deeplotx/nn/multi_head_attention.py
|
22
24
|
deeplotx/nn/recursive_sequential.py
|
23
|
-
deeplotx/nn/
|
25
|
+
deeplotx/nn/roformer_encoder.py
|
26
|
+
deeplotx/nn/rope.py
|
24
27
|
deeplotx/nn/softmax_regression.py
|
25
28
|
deeplotx/similarity/__init__.py
|
26
29
|
deeplotx/similarity/distribution.py
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "deeplotx"
|
3
|
-
version = "0.
|
3
|
+
version = "0.8.1"
|
4
4
|
description = "Easy-2-use long text NLP toolkit."
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">=3.10"
|
@@ -13,5 +13,5 @@ dependencies = [
|
|
13
13
|
"torch",
|
14
14
|
"transformers",
|
15
15
|
"typing-extensions",
|
16
|
-
"vortezwohl>=0.0.
|
16
|
+
"vortezwohl>=0.0.8",
|
17
17
|
]
|
@@ -1,15 +0,0 @@
|
|
1
|
-
import torch
|
2
|
-
|
3
|
-
from deeplotx.nn import RecursiveSequential
|
4
|
-
|
5
|
-
|
6
|
-
class AutoRegression(RecursiveSequential):
|
7
|
-
def __init__(self, feature_dim: int, hidden_dim: int | None = None,
|
8
|
-
recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
9
|
-
ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
|
10
|
-
device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
-
super().__init__(input_dim=feature_dim, output_dim=feature_dim,
|
12
|
-
hidden_dim=hidden_dim, recursive_layers=recursive_layers,
|
13
|
-
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
-
ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
|
15
|
-
model_name=model_name, device=device, dtype=dtype)
|
@@ -1,15 +0,0 @@
|
|
1
|
-
import torch
|
2
|
-
|
3
|
-
from deeplotx.nn import LongContextRecursiveSequential
|
4
|
-
|
5
|
-
|
6
|
-
class LongContextAutoRegression(LongContextRecursiveSequential):
|
7
|
-
def __init__(self, feature_dim: int, hidden_dim: int | None = None,
|
8
|
-
recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
9
|
-
ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
|
10
|
-
device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
-
super().__init__(input_dim=feature_dim, output_dim=feature_dim,
|
12
|
-
hidden_dim=hidden_dim, recursive_layers=recursive_layers,
|
13
|
-
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
-
ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
|
15
|
-
model_name=model_name, device=device, dtype=dtype)
|
@@ -1,34 +0,0 @@
|
|
1
|
-
from typing_extensions import override
|
2
|
-
|
3
|
-
import torch
|
4
|
-
from torch import nn
|
5
|
-
|
6
|
-
from deeplotx.nn.recursive_sequential import RecursiveSequential
|
7
|
-
from deeplotx.nn.self_attention import SelfAttention
|
8
|
-
|
9
|
-
|
10
|
-
class LongContextRecursiveSequential(RecursiveSequential):
|
11
|
-
def __init__(self, input_dim: int, output_dim: int,
|
12
|
-
hidden_dim: int | None = None, recursive_layers: int = 2,
|
13
|
-
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
14
|
-
ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
|
15
|
-
model_name: str | None = None, device: str | None = None,
|
16
|
-
dtype: torch.dtype | None = None, **kwargs):
|
17
|
-
super().__init__(input_dim=input_dim, output_dim=output_dim,
|
18
|
-
hidden_dim=hidden_dim, recursive_layers=recursive_layers,
|
19
|
-
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
20
|
-
ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
|
21
|
-
model_name=model_name, device=device, dtype=dtype)
|
22
|
-
self.self_attention = SelfAttention(feature_dim=input_dim, bias=kwargs.get('attn_proj_bias', ffn_bias),
|
23
|
-
proj_layers=kwargs.get('attn_proj_layers', 1),
|
24
|
-
proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
|
25
|
-
dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate))
|
26
|
-
self.__proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
|
27
|
-
bias=ffn_bias, device=self.device, dtype=self.dtype)
|
28
|
-
|
29
|
-
@override
|
30
|
-
def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
|
31
|
-
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
32
|
-
x = torch.cat([self.self_attention(x), x], dim=-1)
|
33
|
-
x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
|
34
|
-
return super().forward(self.__proj(x), state)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|