deeplotx 0.6.1__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deeplotx/__init__.py CHANGED
@@ -5,14 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
5
5
 
6
6
  from .encoder import Encoder, LongTextEncoder, LongformerEncoder
7
7
  from .nn import (
8
- BaseNeuralNetwork,
9
8
  FeedForward,
10
9
  LinearRegression,
11
10
  LogisticRegression,
12
11
  SoftmaxRegression,
13
12
  RecursiveSequential,
14
13
  LongContextRecursiveSequential,
15
- SelfAttention,
14
+ RoPE,
15
+ Attention,
16
+ MultiHeadAttention,
17
+ RoFormerEncoder,
16
18
  AutoRegression,
17
19
  LongContextAutoRegression
18
20
  )
@@ -17,6 +17,7 @@ class LongTextEncoder(Encoder):
17
17
  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
18
18
  cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
19
19
  super().__init__(model_name_or_path=model_name_or_path, device=device)
20
+ assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
20
21
  self._max_length = max_length
21
22
  self._chunk_size = chunk_size
22
23
  self._overlapping = overlapping
deeplotx/nn/__init__.py CHANGED
@@ -5,6 +5,9 @@ from .logistic_regression import LogisticRegression
5
5
  from .softmax_regression import SoftmaxRegression
6
6
  from .recursive_sequential import RecursiveSequential
7
7
  from .long_context_recursive_sequential import LongContextRecursiveSequential
8
- from .self_attention import SelfAttention
8
+ from .rope import RoPE
9
+ from .attention import Attention
10
+ from .multi_head_attention import MultiHeadAttention
11
+ from .roformer_encoder import RoFormerEncoder
9
12
  from .auto_regression import AutoRegression
10
13
  from .long_context_auto_regression import LongContextAutoRegression
@@ -4,14 +4,17 @@ import torch
4
4
 
5
5
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
6
6
  from deeplotx.nn.feed_forward import FeedForward
7
+ from deeplotx.nn.rope import RoPE, DEFAULT_THETA
7
8
 
8
9
 
9
- class SelfAttention(BaseNeuralNetwork):
10
- def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
11
- proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
12
- model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
10
+ class Attention(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
12
+ proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
13
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
14
+ **kwargs):
13
15
  super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
14
16
  device=device, dtype=dtype)
17
+ self._positional = positional
15
18
  self._feature_dim = feature_dim
16
19
  self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
17
20
  expansion_factor=proj_expansion_factor,
@@ -22,18 +25,24 @@ class SelfAttention(BaseNeuralNetwork):
22
25
  self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
23
26
  expansion_factor=proj_expansion_factor,
24
27
  bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
28
+ if self._positional:
29
+ self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
30
+ device=self.device, dtype=self.dtype)
25
31
 
26
- def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
27
- q, k = self.q_proj(x), self.k_proj(x)
32
+ def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
33
+ q, k = self.q_proj(x), self.k_proj(y)
34
+ if self._positional:
35
+ q, k = self.rope(q), self.rope(k)
28
36
  attn = torch.matmul(q, k.transpose(-2, -1))
29
37
  attn = attn / (self._feature_dim ** 0.5)
30
38
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
31
39
  return torch.softmax(attn, dim=-1)
32
40
 
33
41
  @override
34
- def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
42
+ def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
35
43
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
44
+ y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
36
45
  if mask is not None:
37
46
  mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
38
- v = self.v_proj(x)
39
- return torch.matmul(self._attention(x, mask), v)
47
+ v = self.v_proj(y)
48
+ return torch.matmul(self._attention(x, y, mask), v)
@@ -4,12 +4,11 @@ from deeplotx.nn import RecursiveSequential
4
4
 
5
5
 
6
6
  class AutoRegression(RecursiveSequential):
7
- def __init__(self, feature_dim: int, hidden_dim: int | None = None,
8
- recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
9
- ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
10
- device: str | None = None, dtype: torch.dtype | None = None):
11
- super().__init__(input_dim=feature_dim, output_dim=feature_dim,
12
- hidden_dim=hidden_dim, recursive_layers=recursive_layers,
7
+ def __init__(self, feature_dim: int, bias: bool = True,
8
+ recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
9
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
10
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
11
+ super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
12
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
13
13
  ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
14
- ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
15
- model_name=model_name, device=device, dtype=dtype)
14
+ dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
@@ -3,6 +3,7 @@ from abc import abstractmethod
3
3
 
4
4
  import torch
5
5
  from torch import nn
6
+ from torch.nn import init
6
7
 
7
8
  DEFAULT_SUFFIX = 'dlx'
8
9
 
@@ -36,6 +37,44 @@ class BaseNeuralNetwork(nn.Module):
36
37
  x = x.to(dtype)
37
38
  return x
38
39
 
40
+ def initialize_weights(self):
41
+ for m in self.modules():
42
+ match m.__class__:
43
+ case nn.Linear:
44
+ init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
45
+ if m.bias is not None:
46
+ init.constant_(m.bias, 0)
47
+ case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
48
+ init.constant_(m.weight, 1)
49
+ init.constant_(m.bias, 0)
50
+ case nn.LSTM | nn.GRU:
51
+ for name, param in m.named_parameters():
52
+ _tmp_name = name.lower()
53
+ if 'weight_ih' in _tmp_name:
54
+ init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
55
+ elif 'weight_hh' in _tmp_name:
56
+ init.orthogonal_(param)
57
+ elif 'bias' in _tmp_name:
58
+ init.constant_(param, 0)
59
+ case _:
60
+ pass
61
+ return self
62
+
63
+ def size(self) -> dict:
64
+ total_params = trainable_params = non_trainable_params = 0
65
+ for param in self.parameters():
66
+ params = param.numel()
67
+ total_params += params
68
+ if param.requires_grad:
69
+ trainable_params += params
70
+ else:
71
+ non_trainable_params += params
72
+ return {
73
+ 'total': total_params,
74
+ 'trainable': trainable_params,
75
+ 'non_trainable': non_trainable_params
76
+ }
77
+
39
78
  def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
40
79
  def _l1() -> torch.Tensor:
41
80
  l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
@@ -77,3 +116,25 @@ class BaseNeuralNetwork(nn.Module):
77
116
  model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
78
117
  self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
79
118
  return self
119
+
120
+ def __str__(self):
121
+ formatted = super().__str__()
122
+ _line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
123
+ _splitter_1 = '=' * (_line_len + 10)
124
+ _splitter_2 = '-' * (_line_len + 10)
125
+ _size = self.size()
126
+ total_param = _size['total']
127
+ trainable_param = _size['trainable']
128
+ non_trainable_param = _size['non_trainable']
129
+ formatted = (f'{_splitter_1}\n'
130
+ f'Model_Name: {self._model_name}\n'
131
+ f'In_Features: {self.in_features}\n'
132
+ f'Out_Features: {self.out_features}\n'
133
+ f'Device: {self.device}\n'
134
+ f'Dtype: {self.dtype}\n'
135
+ f'Total_Parameters: {total_param}\n'
136
+ f'Trainable_Parameters: {trainable_param}\n'
137
+ f'NonTrainable_Parameters: {non_trainable_param}\n'
138
+ f'{_splitter_2}'
139
+ f'\n{formatted}\n{_splitter_1}')
140
+ return formatted
@@ -12,13 +12,13 @@ class FeedForwardUnit(BaseNeuralNetwork):
12
12
  device: str | None = None, dtype: torch.dtype | None = None):
13
13
  super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
14
14
  self._dropout_rate = dropout_rate
15
- self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
16
- device=self.device, dtype=self.dtype)
17
- self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
18
- device=self.device, dtype=self.dtype)
19
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
20
- device=self.device, dtype=self.dtype)
21
- self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
15
+ self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
16
+ bias=bias, device=self.device, dtype=self.dtype)
17
+ self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
18
+ bias=bias, device=self.device, dtype=self.dtype)
19
+ self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
20
+ device=self.device, dtype=self.dtype)
21
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
22
22
  device=self.device, dtype=self.dtype)
23
23
 
24
24
  @override
@@ -26,11 +26,11 @@ class FeedForwardUnit(BaseNeuralNetwork):
26
26
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
27
27
  residual = x
28
28
  x = self.layer_norm(x)
29
- x = self.fc1(x)
30
- x = self.parametric_relu_1(x)
29
+ x = self.up_proj(x)
30
+ x = self.parametric_relu(x)
31
31
  if self._dropout_rate > .0:
32
32
  x = torch.dropout(x, p=self._dropout_rate, train=self.training)
33
- return self.fc2(x) + residual
33
+ return self.down_proj(x) + residual
34
34
 
35
35
 
36
36
  class FeedForward(BaseNeuralNetwork):
@@ -43,7 +43,7 @@ class FeedForward(BaseNeuralNetwork):
43
43
  self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
44
44
  expansion_factor=expansion_factor, bias=bias,
45
45
  dropout_rate=dropout_rate,
46
- device=self.device, dtype=self.dtype)] * num_layers)
46
+ device=self.device, dtype=self.dtype) for _ in range(num_layers)])
47
47
 
48
48
  @override
49
49
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -4,12 +4,13 @@ from deeplotx.nn import LongContextRecursiveSequential
4
4
 
5
5
 
6
6
  class LongContextAutoRegression(LongContextRecursiveSequential):
7
- def __init__(self, feature_dim: int, hidden_dim: int | None = None,
8
- recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
9
- ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
10
- device: str | None = None, dtype: torch.dtype | None = None):
11
- super().__init__(input_dim=feature_dim, output_dim=feature_dim,
12
- hidden_dim=hidden_dim, recursive_layers=recursive_layers,
13
- ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
14
- ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
15
- model_name=model_name, device=device, dtype=dtype)
7
+ def __init__(self, feature_dim: int, bias: bool = True,
8
+ encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
9
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
10
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
11
+ **kwargs):
12
+ super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
13
+ encoder_layers=encoder_layers, attn_heads=attn_heads,
14
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
15
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
16
+ model_name=model_name, device=device, dtype=dtype, **kwargs)
@@ -3,32 +3,34 @@ from typing_extensions import override
3
3
  import torch
4
4
  from torch import nn
5
5
 
6
+ from deeplotx.nn.attention import DEFAULT_THETA
6
7
  from deeplotx.nn.recursive_sequential import RecursiveSequential
7
- from deeplotx.nn.self_attention import SelfAttention
8
+ from deeplotx.nn.roformer_encoder import RoFormerEncoder
8
9
 
9
10
 
10
11
  class LongContextRecursiveSequential(RecursiveSequential):
11
- def __init__(self, input_dim: int, output_dim: int,
12
- hidden_dim: int | None = None, recursive_layers: int = 2,
13
- ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
14
- ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
15
- model_name: str | None = None, device: str | None = None,
16
- dtype: torch.dtype | None = None, **kwargs):
17
- super().__init__(input_dim=input_dim, output_dim=output_dim,
18
- hidden_dim=hidden_dim, recursive_layers=recursive_layers,
19
- ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
20
- ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
12
+ def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
13
+ encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
14
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
15
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
16
+ **kwargs):
17
+ super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
18
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
19
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
21
20
  model_name=model_name, device=device, dtype=dtype)
22
- self.self_attention = SelfAttention(feature_dim=input_dim, bias=kwargs.get('attn_proj_bias', ffn_bias),
23
- proj_layers=kwargs.get('attn_proj_layers', 1),
24
- proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
25
- dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate))
26
- self.__proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
27
- bias=ffn_bias, device=self.device, dtype=self.dtype)
21
+ self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
22
+ ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
23
+ ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
24
+ dropout_rate=kwargs.get('encoder_dropout_rate', dropout_rate),
25
+ attn_ffn_layers=kwargs.get('attn_ffn_layers', 1),
26
+ attn_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
27
+ attn_dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
28
+ theta=kwargs.get('theta', DEFAULT_THETA),
29
+ device=self.device, dtype=self.dtype) for _ in range(encoder_layers)])
28
30
 
29
31
  @override
30
32
  def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
31
33
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
32
- x = torch.cat([self.self_attention(x), x], dim=-1)
33
- x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
34
- return super().forward(self.__proj(x), state)
34
+ for roformer_encoder in self.roformer_encoders:
35
+ x = roformer_encoder(x)
36
+ return super().forward(x, state)
@@ -0,0 +1,34 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+ from deeplotx.nn.attention import Attention
8
+
9
+
10
+ class MultiHeadAttention(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, num_heads: int = 1, bias: bool = True, positional: bool = True,
12
+ proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
13
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
14
+ **kwargs):
15
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
16
+ device=device, dtype=dtype)
17
+ self._num_heads = num_heads
18
+ self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
19
+ device=self.device, dtype=self.dtype)
20
+ self.attn_heads = nn.ModuleList([Attention(feature_dim=feature_dim, bias=bias, positional=positional,
21
+ proj_layers=proj_layers, proj_expansion_factor=proj_expansion_factor,
22
+ dropout_rate=dropout_rate, device=self.device, dtype=self.dtype,
23
+ **kwargs) for _ in range(self._num_heads)])
24
+ self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
25
+ device=self.device, dtype=self.dtype)
26
+
27
+ @override
28
+ def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
29
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
30
+ y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
31
+ x, y = self.expand_proj(x), self.expand_proj(y)
32
+ x_heads, y_heads = x.split(self.in_features, dim=-1), y.split(self.in_features, dim=-1)
33
+ head_outs = [self.attn_heads[_](x=x_heads[_], y=y_heads[_], mask=mask) for _ in range(self._num_heads)]
34
+ return self.out_proj(torch.concat(head_outs, dim=-1))
@@ -8,22 +8,22 @@ from deeplotx.nn.feed_forward import FeedForward
8
8
 
9
9
 
10
10
  class RecursiveSequential(BaseNeuralNetwork):
11
- def __init__(self, input_dim: int, output_dim: int,
12
- hidden_dim: int | None = None, recursive_layers: int = 2,
13
- ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
14
- ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
15
- model_name: str | None = None, device: str | None = None,
16
- dtype: torch.dtype | None = None):
17
- super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
18
- if hidden_dim is None:
19
- hidden_dim = input_dim
20
- self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
11
+ def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
12
+ recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
13
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
14
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
15
+ super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
16
+ device=device, dtype=dtype)
17
+ if recursive_hidden_dim is None:
18
+ recursive_hidden_dim = input_dim
19
+ self.lstm = nn.LSTM(input_size=input_dim, hidden_size=recursive_hidden_dim,
21
20
  num_layers=recursive_layers, batch_first=True,
22
21
  bias=True, bidirectional=True, device=self.device,
23
22
  dtype=self.dtype)
24
- self.ffn = FeedForward(feature_dim=hidden_dim * 2, num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
25
- bias=ffn_bias, dropout_rate=ffn_dropout_rate, device=self.device, dtype=self.dtype)
26
- self.__proj = nn.Linear(in_features=hidden_dim * 2, out_features=output_dim, bias=ffn_bias,
23
+ self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
24
+ expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
25
+ device=self.device, dtype=self.dtype)
26
+ self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
27
27
  device=self.device, dtype=self.dtype)
28
28
 
29
29
  def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
@@ -0,0 +1,40 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+ from deeplotx.nn.feed_forward import FeedForward
8
+ from deeplotx.nn.multi_head_attention import MultiHeadAttention
9
+
10
+
11
+ class RoFormerEncoder(BaseNeuralNetwork):
12
+ def __init__(self, feature_dim: int, attn_heads: int = 2, bias: bool = True,
13
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
14
+ dropout_rate: float = 0.02, model_name: str | None = None,
15
+ device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
16
+ super().__init__(in_features=feature_dim, out_features=feature_dim,
17
+ model_name=model_name, device=device, dtype=dtype)
18
+ self.attn = MultiHeadAttention(feature_dim=feature_dim, num_heads=attn_heads,
19
+ bias=bias, positional=True,
20
+ proj_layers=kwargs.get('attn_ffn_layers', 1),
21
+ proj_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
22
+ dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
23
+ device=self.device, dtype=self.dtype, **kwargs)
24
+ self.ffn = FeedForward(feature_dim=feature_dim * 2, num_layers=ffn_layers,
25
+ expansion_factor=ffn_expansion_factor,
26
+ bias=bias, dropout_rate=dropout_rate,
27
+ device=self.device, dtype=self.dtype)
28
+ self.layer_norm = nn.LayerNorm(normalized_shape=feature_dim, eps=1e-9,
29
+ device=self.device, dtype=self.dtype)
30
+ self.__proj = nn.Linear(in_features=feature_dim * 2, out_features=feature_dim,
31
+ bias=bias, device=self.device, dtype=self.dtype)
32
+
33
+ @override
34
+ def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
35
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
36
+ if mask is not None:
37
+ mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
38
+ attn = self.attn(x=self.layer_norm(x), y=None, mask=mask)
39
+ x = torch.concat([attn, x], dim=-1)
40
+ return self.__proj(self.ffn(x))
deeplotx/nn/rope.py ADDED
@@ -0,0 +1,42 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+
5
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
6
+
7
+ DEFAULT_THETA = 10_000
8
+
9
+
10
+ class RoPE(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, theta: int = DEFAULT_THETA,
12
+ device: str | None = None, dtype: torch.dtype = torch.float32):
13
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=None,
14
+ device=device, dtype=dtype)
15
+ assert feature_dim % 2 == 0, f'feature_dim ({feature_dim}) is not divisible by 2.'
16
+ self._theta = theta
17
+ self._num_groups = feature_dim // 2
18
+ self._inv_freq = 1.0 / (theta ** (torch.arange(start=0, end=self._num_groups, step=1,
19
+ device=self.device, dtype=self.dtype).float() / self._num_groups))
20
+ self.register_buffer('inv_freq', self._inv_freq)
21
+
22
+ @property
23
+ def dim(self):
24
+ return self._dim
25
+
26
+ @property
27
+ def theta(self):
28
+ return self._theta
29
+
30
+ def rotate_half(self, _t: torch.Tensor) -> torch.Tensor:
31
+ return torch.cat((- _t[..., self._num_groups:], _t[..., :self._num_groups]), dim=-1)
32
+
33
+ @override
34
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
35
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
36
+ *other_dims, seq_len, feature_dim = x.shape
37
+ assert feature_dim == self.in_features, f"feature_dim of x doesn't match with defined feature_dim {self.in_features}."
38
+ t = torch.arange(start=0, end=seq_len, step=1, device=self.device, dtype=self.dtype)
39
+ freq = torch.outer(t, self._inv_freq)
40
+ emb = torch.cat((freq, freq), dim=-1)
41
+ sin_emb, cos_emb = emb.sin(), emb.cos()
42
+ return x * cos_emb + self.rotate_half(x) * sin_emb
@@ -6,6 +6,7 @@ from torch import nn, optim
6
6
  from torch.utils.data import DataLoader, TensorDataset
7
7
 
8
8
  from deeplotx.encoder.long_text_encoder import LongTextEncoder
9
+ from deeplotx.nn.attention import DEFAULT_THETA
9
10
  from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
10
11
  from deeplotx.trainer.base_trainer import BaseTrainer
11
12
 
@@ -24,8 +25,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
24
25
  def train(self, positive_texts: list[str], negative_texts: list[str],
25
26
  num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
26
27
  train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
27
- alpha: float = 1e-4, rho: float = 0.2,
28
- hidden_dim: int = 256, recursive_layers: int = 2, **kwargs) -> LongContextRecursiveSequential:
28
+ alpha: float = 1e-4, rho: float = 0.2, encoder_layers: int = 4, attn_heads: int = 6,
29
+ recursive_layers: int = 2, recursive_hidden_dim: int = 256, **kwargs) -> LongContextRecursiveSequential:
29
30
  if balancing_dataset:
30
31
  min_length = min(len(positive_texts), len(negative_texts))
31
32
  positive_texts = positive_texts[:min_length]
@@ -50,21 +51,24 @@ class TextBinaryClassifierTrainer(BaseTrainer):
50
51
  if self.model is None:
51
52
  ffn_layers = kwargs.get('ffn_layers', 5)
52
53
  ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
53
- ffn_bias = kwargs.get('ffn_bias', True)
54
- ffn_dropout_rate = kwargs.get('ffn_dropout_rate', 0.1)
55
- self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
56
- hidden_dim=hidden_dim,
57
- recursive_layers=recursive_layers,
58
- ffn_layers=ffn_layers,
59
- ffn_expansion_factor=ffn_expansion_factor,
60
- ffn_bias=ffn_bias,
61
- ffn_dropout_rate=ffn_dropout_rate,
62
- attn_proj_layers=kwargs.get('attn_proj_layers', ffn_layers),
63
- attn_proj_bias=kwargs.get('attn_proj_bias', ffn_bias),
64
- attn_proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
65
- attn_proj_dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate),
66
- device=self.device, dtype=dtype)
67
- logger.debug(f'Training Model: {self.model}')
54
+ bias = kwargs.get('bias', True)
55
+ dropout_rate = kwargs.get('dropout_rate', 0.1)
56
+ encoder_ffn_layers = kwargs.get('encoder_ffn_layers', ffn_layers)
57
+ encoder_expansion_factor = kwargs.get('encoder_expansion_factor', ffn_expansion_factor)
58
+ encoder_dropout_rate = kwargs.get('encoder_dropout_rate', dropout_rate)
59
+ attn_ffn_layers = kwargs.get('attn_ffn_layers', 1)
60
+ attn_expansion_factor = kwargs.get('attn_expansion_factor', ffn_expansion_factor)
61
+ attn_dropout_rate = kwargs.get('attn_dropout_rate', dropout_rate)
62
+ theta = kwargs.get('theta', DEFAULT_THETA)
63
+ self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
64
+ encoder_layers=encoder_layers, attn_heads=attn_heads,
65
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
66
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
67
+ encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
68
+ encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
69
+ attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
70
+ theta=theta).initialize_weights()
71
+ logger.debug(f'Training Model: \n{self.model}')
68
72
  loss_function = nn.BCELoss()
69
73
  optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
70
74
  for epoch in range(num_epochs):
deeplotx/util/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- from .hash import md5, sha1
1
+ from .hash import md5, sha1, sha256, sha512
2
2
  from .read_file import read_file, get_files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.6.1
3
+ Version: 0.8.1
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
13
13
  Requires-Dist: torch
14
14
  Requires-Dist: transformers
15
15
  Requires-Dist: typing-extensions
16
- Requires-Dist: vortezwohl>=0.0.6
16
+ Requires-Dist: vortezwohl>=0.0.8
17
17
  Dynamic: license-file
18
18
 
19
19
  [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/vortezwohl/DeepLoTX)
@@ -0,0 +1,34 @@
1
+ deeplotx/__init__.py,sha256=oNeA-vNu5YGiEQg0IcpKEdGh_Y_2uPvo2nqaNL_Zgv8,1159
2
+ deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
3
+ deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
4
+ deeplotx/encoder/long_text_encoder.py,sha256=PFR6jjGyg1N58TQlKsPaNQEd-EDl13Hyhu7A1KtGBbA,3743
5
+ deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
6
+ deeplotx/nn/__init__.py,sha256=01I_yqx9GTa4wy3uNyAqhtxp66tDqxgMLC4Ky5Vnkrg,651
7
+ deeplotx/nn/attention.py,sha256=HZ4nfFtkk7AnJ9nuoDSK6zIlIhZ_hbpZc3o6JQIBqJ8,2861
8
+ deeplotx/nn/auto_regression.py,sha256=uISx29t_zkDGS8s2wvGB6wOGYZitQ4hQ7wyoQl4lcqY,857
9
+ deeplotx/nn/base_neural_network.py,sha256=FjQEDFH810fJS7JV3aLgJZnaMqC6DH--wlBvuj-ghTc,5900
10
+ deeplotx/nn/feed_forward.py,sha256=4ozj7EDalO9pb6JUhZtsJqE0r8bIHFApHRt2zTrl4ho,2931
11
+ deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
12
+ deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
13
+ deeplotx/nn/long_context_auto_regression.py,sha256=uy0k_g8wEfMH5nd5HCfrHA8dgEsuWBA2x8U-g3h4vQc,1054
14
+ deeplotx/nn/long_context_recursive_sequential.py,sha256=i7kUml9RV_mkLRJ114UHsj9Gxw7LzJVQ4z8-REHa8-w,2682
15
+ deeplotx/nn/multi_head_attention.py,sha256=3z73uGbvy3jszRy1B9nxGOJjlttHpcpRF8Qd09OEams,2267
16
+ deeplotx/nn/recursive_sequential.py,sha256=8Z8vT70xTygusL-3w3QlB_B_k0xQSUU2ZTgC1LhEmzQ,2805
17
+ deeplotx/nn/roformer_encoder.py,sha256=UJjKniNdMd0rfoYQcsX6bPo6Ceq_Z6EhwHe2kgqWC_k,2426
18
+ deeplotx/nn/rope.py,sha256=RTOjnllubktdy2rzFWxBfkuLuGjhEMyDd06uojdqPhM,1848
19
+ deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
20
+ deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
21
+ deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
22
+ deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
23
+ deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
24
+ deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
25
+ deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
26
+ deeplotx/trainer/text_binary_classification_trainer.py,sha256=QMLR4cC8NCUP-v7SOYVtCykNwahENmWHv9adaeTbYmA,6528
27
+ deeplotx/util/__init__.py,sha256=5CH4MTeSgsmCe3LPMfvKoSBpwh6jDSBuHVElJvzQzgs,90
28
+ deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
29
+ deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
30
+ deeplotx-0.8.1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
31
+ deeplotx-0.8.1.dist-info/METADATA,sha256=zMKRLmdsEibLnN_hAx3OM7AbX3SiM7X1-8w4eFJGxNY,12251
32
+ deeplotx-0.8.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ deeplotx-0.8.1.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
34
+ deeplotx-0.8.1.dist-info/RECORD,,
@@ -1,31 +0,0 @@
1
- deeplotx/__init__.py,sha256=S0hLmRkHdoaxv7IPKVXh5Oat27pt_FGKGmKjp3aAyMU,1129
2
- deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
3
- deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
4
- deeplotx/encoder/long_text_encoder.py,sha256=It0hXuSe0Hq5Y_3QhjEqvF1JbtX6Hc2VzVabzOu7fLA,3625
5
- deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
6
- deeplotx/nn/__init__.py,sha256=f7f6Qx1Xkw3Nn3Lvafe7Pq4pUO7ZcESIA8KZxnSL_OM,535
7
- deeplotx/nn/auto_regression.py,sha256=8eEdXhOjRLKP4MpgX1wt9L1grU4_fS49ejVoNzFs7LM,877
8
- deeplotx/nn/base_neural_network.py,sha256=s7jHe7HprOelD1wZRbFdqb5Hxqs3sjLXLEo7OyDBHtk,3215
9
- deeplotx/nn/feed_forward.py,sha256=3lWV_snCp_PiqjxTYoiNlL9EF2heekWbMkKXoPlljkM,2839
10
- deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
11
- deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
12
- deeplotx/nn/long_context_auto_regression.py,sha256=oMrxeVuCa1M2EQJSbOYlpTjl5NrkKGAHers8qIaZdU8,911
13
- deeplotx/nn/long_context_recursive_sequential.py,sha256=sU_22QH7Z6EJurMbTVEYPd83wC2dzadMIeztVIcc04I,2173
14
- deeplotx/nn/recursive_sequential.py,sha256=WsmXaIgTdpudo2bYcpBX8bKeJgPnT-atwEmLSXqQEco,2743
15
- deeplotx/nn/self_attention.py,sha256=HW9ZB3S6-yfTQc2745rJ6TM7L01P8ewxt7nGHosE2r8,2291
16
- deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
17
- deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
18
- deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
19
- deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
20
- deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
21
- deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
22
- deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
23
- deeplotx/trainer/text_binary_classification_trainer.py,sha256=7oLzgXvdmFpQiBy7ncJ0smdqnMGr8xdZs6nTWpj6qfw,6085
24
- deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
25
- deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
26
- deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
27
- deeplotx-0.6.1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
28
- deeplotx-0.6.1.dist-info/METADATA,sha256=a1KcBHaewfyOwIywZ3wtBr8mdly4ofdb7Z4g2KYVzUk,12251
29
- deeplotx-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
- deeplotx-0.6.1.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
31
- deeplotx-0.6.1.dist-info/RECORD,,