deeplotx 0.5.6__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deeplotx/__init__.py CHANGED
@@ -5,12 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
5
5
 
6
6
  from .encoder import Encoder, LongTextEncoder, LongformerEncoder
7
7
  from .nn import (
8
+ FeedForward,
8
9
  LinearRegression,
9
10
  LogisticRegression,
10
11
  SoftmaxRegression,
11
12
  RecursiveSequential,
12
13
  LongContextRecursiveSequential,
13
- SelfAttention,
14
+ RoPE,
15
+ Attention,
16
+ MultiHeadAttention,
17
+ RoFormerEncoder,
14
18
  AutoRegression,
15
19
  LongContextAutoRegression
16
20
  )
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  import math
4
+ from requests.exceptions import ConnectTimeout, SSLError
4
5
 
5
6
  import torch
6
7
  from torch import nn
@@ -18,10 +19,27 @@ class Encoder(nn.Module):
18
19
  super().__init__()
19
20
  self.device = torch.device(device) if device is not None \
20
21
  else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
21
- self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
22
- cache_dir=CACHE_PATH, _from_auto=True)
23
- self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
24
- cache_dir=CACHE_PATH, _from_auto=True).to(self.device)
22
+ try:
23
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
24
+ cache_dir=CACHE_PATH, _from_auto=True,
25
+ trust_remote_code=True)
26
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
27
+ cache_dir=CACHE_PATH, _from_auto=True,
28
+ trust_remote_code=True).to(self.device)
29
+ except ConnectTimeout:
30
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
31
+ cache_dir=CACHE_PATH, _from_auto=True,
32
+ trust_remote_code=True, local_files_only=True)
33
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
34
+ cache_dir=CACHE_PATH, _from_auto=True,
35
+ trust_remote_code=True, local_files_only=True).to(self.device)
36
+ except SSLError:
37
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
38
+ cache_dir=CACHE_PATH, _from_auto=True,
39
+ trust_remote_code=True, local_files_only=True)
40
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
41
+ cache_dir=CACHE_PATH, _from_auto=True,
42
+ trust_remote_code=True, local_files_only=True).to(self.device)
25
43
  self.embed_dim = self.encoder.config.max_position_embeddings
26
44
  logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
27
45
 
@@ -15,12 +15,14 @@ logger = logging.getLogger('deeplotx.embedding')
15
15
  class LongTextEncoder(Encoder):
16
16
  def __init__(self, max_length: int, chunk_size: int = 448,
17
17
  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
18
- cache_capacity: int = 64, device: str | None = None):
18
+ cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
19
19
  super().__init__(model_name_or_path=model_name_or_path, device=device)
20
+ assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
20
21
  self._max_length = max_length
21
22
  self._chunk_size = chunk_size
22
23
  self._overlapping = overlapping
23
24
  self._cache = LRUCache(capacity=cache_capacity)
25
+ self._worker_group = ThreadPool(max_workers=max_workers)
24
26
 
25
27
  def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
26
28
  return idx, super().forward(x, attention_mask=mask)
@@ -63,7 +65,7 @@ class LongTextEncoder(Encoder):
63
65
  _tmp_right = (i + 1) * self._chunk_size + self._overlapping
64
66
  chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
65
67
  torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
66
- embeddings = list(ThreadPool(max_workers=min(num_chunks + 1, 8)).map(self.__chunk_embedding, chunks))
68
+ embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
67
69
  embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
68
70
  fin_embedding = [x[1] for x in embeddings]
69
71
  # write cache
deeplotx/nn/__init__.py CHANGED
@@ -1,8 +1,13 @@
1
+ from .base_neural_network import BaseNeuralNetwork
2
+ from .feed_forward import FeedForward
1
3
  from .linear_regression import LinearRegression
2
4
  from .logistic_regression import LogisticRegression
3
5
  from .softmax_regression import SoftmaxRegression
4
6
  from .recursive_sequential import RecursiveSequential
5
7
  from .long_context_recursive_sequential import LongContextRecursiveSequential
6
- from .self_attention import SelfAttention
8
+ from .rope import RoPE
9
+ from .attention import Attention
10
+ from .multi_head_attention import MultiHeadAttention
11
+ from .roformer_encoder import RoFormerEncoder
7
12
  from .auto_regression import AutoRegression
8
13
  from .long_context_auto_regression import LongContextAutoRegression
@@ -0,0 +1,48 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+
5
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
6
+ from deeplotx.nn.feed_forward import FeedForward
7
+ from deeplotx.nn.rope import RoPE, DEFAULT_THETA
8
+
9
+
10
+ class Attention(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
12
+ proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
13
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
14
+ **kwargs):
15
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
16
+ device=device, dtype=dtype)
17
+ self._positional = positional
18
+ self._feature_dim = feature_dim
19
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
20
+ expansion_factor=proj_expansion_factor,
21
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
22
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
23
+ expansion_factor=proj_expansion_factor,
24
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
25
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
26
+ expansion_factor=proj_expansion_factor,
27
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
28
+ if self._positional:
29
+ self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
30
+ device=self.device, dtype=self.dtype)
31
+
32
+ def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
33
+ q, k = self.q_proj(x), self.k_proj(y)
34
+ if self._positional:
35
+ q, k = self.rope(q), self.rope(k)
36
+ attn = torch.matmul(q, k.transpose(-2, -1))
37
+ attn = attn / (self._feature_dim ** 0.5)
38
+ attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
39
+ return torch.softmax(attn, dim=-1)
40
+
41
+ @override
42
+ def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
43
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
44
+ y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
45
+ if mask is not None:
46
+ mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
47
+ v = self.v_proj(y)
48
+ return torch.matmul(self._attention(x, y, mask), v)
@@ -4,9 +4,11 @@ from deeplotx.nn import RecursiveSequential
4
4
 
5
5
 
6
6
  class AutoRegression(RecursiveSequential):
7
- def __init__(self, feature_dim: int, hidden_dim: int | None = None,
8
- recursive_layers: int = 2, model_name: str | None = None,
9
- device: str | None = None, dtype: torch.dtype | None = None):
10
- super().__init__(input_dim=feature_dim, output_dim=feature_dim,
11
- hidden_dim=hidden_dim, recursive_layers=recursive_layers,
12
- model_name=model_name, device=device, dtype=dtype)
7
+ def __init__(self, feature_dim: int, bias: bool = True,
8
+ recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
9
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
10
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
11
+ super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
12
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
13
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
14
+ dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
@@ -3,12 +3,14 @@ from abc import abstractmethod
3
3
 
4
4
  import torch
5
5
  from torch import nn
6
+ from torch.nn import init
6
7
 
7
8
  DEFAULT_SUFFIX = 'dlx'
8
9
 
9
10
 
10
11
  class BaseNeuralNetwork(nn.Module):
11
- def __init__(self, model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
12
+ def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
13
+ device: str | None = None, dtype: torch.dtype | None = None):
12
14
  super().__init__()
13
15
  self._model_name = model_name \
14
16
  if model_name is not None \
@@ -16,6 +18,16 @@ class BaseNeuralNetwork(nn.Module):
16
18
  self.device = torch.device(device) if device is not None \
17
19
  else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
18
20
  self.dtype = dtype if dtype is not None else torch.float32
21
+ self._in_features = in_features
22
+ self._out_features = out_features
23
+
24
+ @property
25
+ def in_features(self) -> int:
26
+ return self._in_features
27
+
28
+ @property
29
+ def out_features(self) -> int:
30
+ return self._out_features
19
31
 
20
32
  @staticmethod
21
33
  def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
@@ -25,6 +37,44 @@ class BaseNeuralNetwork(nn.Module):
25
37
  x = x.to(dtype)
26
38
  return x
27
39
 
40
+ def initialize_weights(self):
41
+ for m in self.modules():
42
+ match m.__class__:
43
+ case nn.Linear:
44
+ init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
45
+ if m.bias is not None:
46
+ init.constant_(m.bias, 0)
47
+ case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
48
+ init.constant_(m.weight, 1)
49
+ init.constant_(m.bias, 0)
50
+ case nn.LSTM | nn.GRU:
51
+ for name, param in m.named_parameters():
52
+ _tmp_name = name.lower()
53
+ if 'weight_ih' in _tmp_name:
54
+ init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
55
+ elif 'weight_hh' in _tmp_name:
56
+ init.orthogonal_(param)
57
+ elif 'bias' in _tmp_name:
58
+ init.constant_(param, 0)
59
+ case _:
60
+ pass
61
+ return self
62
+
63
+ def size(self) -> dict:
64
+ total_params = trainable_params = non_trainable_params = 0
65
+ for param in self.parameters():
66
+ params = param.numel()
67
+ total_params += params
68
+ if param.requires_grad:
69
+ trainable_params += params
70
+ else:
71
+ non_trainable_params += params
72
+ return {
73
+ 'total': total_params,
74
+ 'trainable': trainable_params,
75
+ 'non_trainable': non_trainable_params
76
+ }
77
+
28
78
  def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
29
79
  def _l1() -> torch.Tensor:
30
80
  l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
@@ -66,3 +116,25 @@ class BaseNeuralNetwork(nn.Module):
66
116
  model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
67
117
  self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
68
118
  return self
119
+
120
+ def __str__(self):
121
+ formatted = super().__str__()
122
+ _line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
123
+ _splitter_1 = '=' * (_line_len + 10)
124
+ _splitter_2 = '-' * (_line_len + 10)
125
+ _size = self.size()
126
+ total_param = _size['total']
127
+ trainable_param = _size['trainable']
128
+ non_trainable_param = _size['non_trainable']
129
+ formatted = (f'{_splitter_1}\n'
130
+ f'Model_Name: {self._model_name}\n'
131
+ f'In_Features: {self.in_features}\n'
132
+ f'Out_Features: {self.out_features}\n'
133
+ f'Device: {self.device}\n'
134
+ f'Dtype: {self.dtype}\n'
135
+ f'Total_Parameters: {total_param}\n'
136
+ f'Trainable_Parameters: {trainable_param}\n'
137
+ f'NonTrainable_Parameters: {non_trainable_param}\n'
138
+ f'{_splitter_2}'
139
+ f'\n{formatted}\n{_splitter_1}')
140
+ return formatted
@@ -0,0 +1,53 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+
8
+
9
+ class FeedForwardUnit(BaseNeuralNetwork):
10
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
11
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
12
+ device: str | None = None, dtype: torch.dtype | None = None):
13
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
14
+ self._dropout_rate = dropout_rate
15
+ self.up_proj = nn.Linear(in_features=feature_dim, out_features=int(feature_dim * expansion_factor),
16
+ bias=bias, device=self.device, dtype=self.dtype)
17
+ self.down_proj = nn.Linear(in_features=int(feature_dim * expansion_factor), out_features=feature_dim,
18
+ bias=bias, device=self.device, dtype=self.dtype)
19
+ self.parametric_relu = nn.PReLU(num_parameters=1, init=5e-3,
20
+ device=self.device, dtype=self.dtype)
21
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.up_proj.in_features, eps=1e-9,
22
+ device=self.device, dtype=self.dtype)
23
+
24
+ @override
25
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
26
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
27
+ residual = x
28
+ x = self.layer_norm(x)
29
+ x = self.up_proj(x)
30
+ x = self.parametric_relu(x)
31
+ if self._dropout_rate > .0:
32
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
33
+ return self.down_proj(x) + residual
34
+
35
+
36
+ class FeedForward(BaseNeuralNetwork):
37
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
38
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
39
+ device: str | None = None, dtype: torch.dtype | None = None):
40
+ if num_layers < 1:
41
+ raise ValueError('num_layers cannot be less than 1.')
42
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
43
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
44
+ expansion_factor=expansion_factor, bias=bias,
45
+ dropout_rate=dropout_rate,
46
+ device=self.device, dtype=self.dtype) for _ in range(num_layers)])
47
+
48
+ @override
49
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
50
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
51
+ for ffn in self.ffn_layers:
52
+ x = ffn(x)
53
+ return x
@@ -4,34 +4,22 @@ import torch
4
4
  from torch import nn
5
5
 
6
6
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+ from deeplotx.nn.feed_forward import FeedForward
7
8
 
8
9
 
9
10
  class LinearRegression(BaseNeuralNetwork):
10
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
11
- device: str | None = None, dtype: torch.dtype | None = None):
12
- super().__init__(model_name=model_name, device=device, dtype=dtype)
13
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
14
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
15
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
16
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
17
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
18
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
19
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
20
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
21
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
22
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
11
+ def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
12
+ expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
13
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
14
+ super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
15
+ self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
16
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
17
+ self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
18
+ bias=bias, device=self.device, dtype=self.dtype)
23
19
 
24
20
  @override
25
21
  def forward(self, x: torch.Tensor) -> torch.Tensor:
26
22
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
27
- fc1_out = self.parametric_relu_1(self.fc1(x))
28
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
29
- x = torch.dropout(x, p=0.2, train=self.training)
30
- x = self.parametric_relu_2(self.fc2(x))
31
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
32
- x = torch.dropout(x, p=0.2, train=self.training)
33
- x = self.parametric_relu_3(self.fc3(x))
34
- x = torch.dropout(x, p=0.2, train=self.training)
35
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
36
- x = self.fc5(x)
37
- return x
23
+ residual = x
24
+ x = self.ffn(x) + residual
25
+ return self.proj(x)
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
6
6
 
7
7
 
8
8
  class LogisticRegression(LinearRegression):
9
- def __init__(self, input_dim: int, output_dim: int = 1, model_name: str | None = None,
9
+ def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
10
+ bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
10
11
  device: str | None = None, dtype: torch.dtype | None = None):
11
- super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
12
+ super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
13
+ expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
14
+ model_name=model_name, device=device, dtype=dtype)
12
15
 
13
16
  @override
14
17
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -4,9 +4,13 @@ from deeplotx.nn import LongContextRecursiveSequential
4
4
 
5
5
 
6
6
  class LongContextAutoRegression(LongContextRecursiveSequential):
7
- def __init__(self, feature_dim: int, hidden_dim: int | None = None,
8
- recursive_layers: int = 2, model_name: str | None = None,
9
- device: str | None = None, dtype: torch.dtype | None = None):
10
- super().__init__(input_dim=feature_dim, output_dim=feature_dim,
11
- hidden_dim=hidden_dim, recursive_layers=recursive_layers,
12
- model_name=model_name, device=device, dtype=dtype)
7
+ def __init__(self, feature_dim: int, bias: bool = True,
8
+ encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
9
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
10
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
11
+ **kwargs):
12
+ super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
13
+ encoder_layers=encoder_layers, attn_heads=attn_heads,
14
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
15
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
16
+ model_name=model_name, device=device, dtype=dtype, **kwargs)
@@ -3,26 +3,34 @@ from typing_extensions import override
3
3
  import torch
4
4
  from torch import nn
5
5
 
6
+ from deeplotx.nn.attention import DEFAULT_THETA
6
7
  from deeplotx.nn.recursive_sequential import RecursiveSequential
7
- from deeplotx.nn.self_attention import SelfAttention
8
+ from deeplotx.nn.roformer_encoder import RoFormerEncoder
8
9
 
9
10
 
10
11
  class LongContextRecursiveSequential(RecursiveSequential):
11
- def __init__(self, input_dim: int, output_dim: int,
12
- hidden_dim: int | None = None, recursive_layers: int = 2,
13
- model_name: str | None = None, device: str | None = None,
14
- dtype: torch.dtype | None = None):
15
- super().__init__(input_dim=input_dim, output_dim=output_dim,
16
- hidden_dim=hidden_dim, recursive_layers=recursive_layers,
12
+ def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
13
+ encoder_layers: int = 1, attn_heads: int = 1, recursive_layers: int = 2, recursive_hidden_dim: int | None = None,
14
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
15
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
16
+ **kwargs):
17
+ super().__init__(input_dim=input_dim, output_dim=output_dim, bias=bias,
18
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
19
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
17
20
  model_name=model_name, device=device, dtype=dtype)
18
- self._feature_dim = input_dim
19
- self.self_attention = SelfAttention(feature_dim=input_dim)
20
- self.proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
21
- bias=True, device=self.device, dtype=self.dtype)
21
+ self.roformer_encoders = nn.ModuleList([RoFormerEncoder(feature_dim=input_dim, attn_heads=attn_heads, bias=bias,
22
+ ffn_layers=kwargs.get('encoder_ffn_layers', ffn_layers),
23
+ ffn_expansion_factor=kwargs.get('encoder_expansion_factor', ffn_expansion_factor),
24
+ dropout_rate=kwargs.get('encoder_dropout_rate', dropout_rate),
25
+ attn_ffn_layers=kwargs.get('attn_ffn_layers', 1),
26
+ attn_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
27
+ attn_dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
28
+ theta=kwargs.get('theta', DEFAULT_THETA),
29
+ device=self.device, dtype=self.dtype) for _ in range(encoder_layers)])
22
30
 
23
31
  @override
24
32
  def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
25
33
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
26
- x = torch.cat([self.self_attention(x), x], dim=-1)
27
- x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
28
- return super().forward(self.proj(x), state)
34
+ for roformer_encoder in self.roformer_encoders:
35
+ x = roformer_encoder(x)
36
+ return super().forward(x, state)
@@ -0,0 +1,34 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+ from deeplotx.nn.attention import Attention
8
+
9
+
10
+ class MultiHeadAttention(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, num_heads: int = 1, bias: bool = True, positional: bool = True,
12
+ proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
13
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
14
+ **kwargs):
15
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
16
+ device=device, dtype=dtype)
17
+ self._num_heads = num_heads
18
+ self.expand_proj = nn.Linear(in_features=feature_dim, out_features=feature_dim * self._num_heads, bias=bias,
19
+ device=self.device, dtype=self.dtype)
20
+ self.attn_heads = nn.ModuleList([Attention(feature_dim=feature_dim, bias=bias, positional=positional,
21
+ proj_layers=proj_layers, proj_expansion_factor=proj_expansion_factor,
22
+ dropout_rate=dropout_rate, device=self.device, dtype=self.dtype,
23
+ **kwargs) for _ in range(self._num_heads)])
24
+ self.out_proj = nn.Linear(in_features=feature_dim * self._num_heads, out_features=feature_dim, bias=bias,
25
+ device=self.device, dtype=self.dtype)
26
+
27
+ @override
28
+ def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
29
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
30
+ y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
31
+ x, y = self.expand_proj(x), self.expand_proj(y)
32
+ x_heads, y_heads = x.split(self.in_features, dim=-1), y.split(self.in_features, dim=-1)
33
+ head_outs = [self.attn_heads[_](x=x_heads[_], y=y_heads[_], mask=mask) for _ in range(self._num_heads)]
34
+ return self.out_proj(torch.concat(head_outs, dim=-1))
@@ -4,23 +4,27 @@ import torch
4
4
  from torch import nn
5
5
 
6
6
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
- from deeplotx.nn import LinearRegression
7
+ from deeplotx.nn.feed_forward import FeedForward
8
8
 
9
9
 
10
10
  class RecursiveSequential(BaseNeuralNetwork):
11
- def __init__(self, input_dim: int, output_dim: int,
12
- hidden_dim: int | None = None, recursive_layers: int = 2,
13
- model_name: str | None = None, device: str | None = None,
14
- dtype: torch.dtype | None = None):
15
- super().__init__(model_name=model_name, device=device, dtype=dtype)
16
- if hidden_dim is None:
17
- hidden_dim = input_dim
18
- self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
11
+ def __init__(self, input_dim: int, output_dim: int, bias: bool = True,
12
+ recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
13
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
14
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
15
+ super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name,
16
+ device=device, dtype=dtype)
17
+ if recursive_hidden_dim is None:
18
+ recursive_hidden_dim = input_dim
19
+ self.lstm = nn.LSTM(input_size=input_dim, hidden_size=recursive_hidden_dim,
19
20
  num_layers=recursive_layers, batch_first=True,
20
21
  bias=True, bidirectional=True, device=self.device,
21
22
  dtype=self.dtype)
22
- self.regressive_head = LinearRegression(input_dim=hidden_dim * 2, output_dim=output_dim,
23
- device=self.device, dtype=self.dtype)
23
+ self.ffn = FeedForward(feature_dim=recursive_hidden_dim * 2, num_layers=ffn_layers,
24
+ expansion_factor=ffn_expansion_factor, bias=bias, dropout_rate=dropout_rate,
25
+ device=self.device, dtype=self.dtype)
26
+ self.__proj = nn.Linear(in_features=recursive_hidden_dim * 2, out_features=output_dim, bias=bias,
27
+ device=self.device, dtype=self.dtype)
24
28
 
25
29
  def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
26
30
  zeros = torch.zeros(self.lstm.num_layers * 2, batch_size, self.lstm.hidden_size, device=self.device, dtype=self.dtype)
@@ -32,7 +36,10 @@ class RecursiveSequential(BaseNeuralNetwork):
32
36
  state = (self.ensure_device_and_dtype(state[0], device=self.device, dtype=self.dtype),
33
37
  self.ensure_device_and_dtype(state[1], device=self.device, dtype=self.dtype))
34
38
  x, (hidden_state, cell_state) = self.lstm(x, state)
35
- x = self.regressive_head(x[:, -1, :])
39
+ x = x[:, -1, :]
40
+ residual = x
41
+ x = self.ffn(x) + residual
42
+ x = self.__proj(x)
36
43
  return x, (hidden_state, cell_state)
37
44
 
38
45
  @override
@@ -0,0 +1,40 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+ from deeplotx.nn.feed_forward import FeedForward
8
+ from deeplotx.nn.multi_head_attention import MultiHeadAttention
9
+
10
+
11
+ class RoFormerEncoder(BaseNeuralNetwork):
12
+ def __init__(self, feature_dim: int, attn_heads: int = 2, bias: bool = True,
13
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
14
+ dropout_rate: float = 0.02, model_name: str | None = None,
15
+ device: str | None = None, dtype: torch.dtype | None = None, **kwargs):
16
+ super().__init__(in_features=feature_dim, out_features=feature_dim,
17
+ model_name=model_name, device=device, dtype=dtype)
18
+ self.attn = MultiHeadAttention(feature_dim=feature_dim, num_heads=attn_heads,
19
+ bias=bias, positional=True,
20
+ proj_layers=kwargs.get('attn_ffn_layers', 1),
21
+ proj_expansion_factor=kwargs.get('attn_expansion_factor', ffn_expansion_factor),
22
+ dropout_rate=kwargs.get('attn_dropout_rate', dropout_rate),
23
+ device=self.device, dtype=self.dtype, **kwargs)
24
+ self.ffn = FeedForward(feature_dim=feature_dim * 2, num_layers=ffn_layers,
25
+ expansion_factor=ffn_expansion_factor,
26
+ bias=bias, dropout_rate=dropout_rate,
27
+ device=self.device, dtype=self.dtype)
28
+ self.layer_norm = nn.LayerNorm(normalized_shape=feature_dim, eps=1e-9,
29
+ device=self.device, dtype=self.dtype)
30
+ self.__proj = nn.Linear(in_features=feature_dim * 2, out_features=feature_dim,
31
+ bias=bias, device=self.device, dtype=self.dtype)
32
+
33
+ @override
34
+ def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
35
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
36
+ if mask is not None:
37
+ mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
38
+ attn = self.attn(x=self.layer_norm(x), y=None, mask=mask)
39
+ x = torch.concat([attn, x], dim=-1)
40
+ return self.__proj(self.ffn(x))
deeplotx/nn/rope.py ADDED
@@ -0,0 +1,41 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+
5
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
6
+
7
+ DEFAULT_THETA = 10_000
8
+
9
+
10
+ class RoPE(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, theta: int = DEFAULT_THETA,
12
+ device: str | None = None, dtype: torch.dtype = torch.float32):
13
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=None,
14
+ device=device, dtype=dtype)
15
+ assert feature_dim % 2 == 0, f'feature_dim ({feature_dim}) is not divisible by 2.'
16
+ self._theta = theta
17
+ self._num_groups = feature_dim // 2
18
+ self._inv_freq = 1.0 / (theta ** (torch.arange(start=0, end=self._num_groups, step=1).float() / self._num_groups))
19
+ self.register_buffer('inv_freq', self._inv_freq)
20
+
21
+ @property
22
+ def dim(self):
23
+ return self._dim
24
+
25
+ @property
26
+ def theta(self):
27
+ return self._theta
28
+
29
+ def rotate_half(self, _t: torch.Tensor) -> torch.Tensor:
30
+ return torch.cat((- _t[..., self._num_groups:], _t[..., :self._num_groups]), dim=-1)
31
+
32
+ @override
33
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
34
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
35
+ *other_dims, seq_len, feature_dim = x.shape
36
+ assert feature_dim == self.in_features, f"feature_dim of x doesn't match with defined feature_dim {self.in_features}."
37
+ t = torch.arange(start=0, end=seq_len, step=1, device=self.device, dtype=self.dtype)
38
+ freq = torch.outer(t, self._inv_freq)
39
+ emb = torch.cat((freq, freq), dim=-1)
40
+ sin_emb, cos_emb = emb.sin(), emb.cos()
41
+ return x * cos_emb + self.rotate_half(x) * sin_emb
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
6
6
 
7
7
 
8
8
  class SoftmaxRegression(LinearRegression):
9
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
9
+ def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
10
+ bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
10
11
  device: str | None = None, dtype: torch.dtype | None = None):
11
- super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
12
+ super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
13
+ expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
14
+ model_name=model_name, device=device, dtype=dtype)
12
15
 
13
16
  @override
14
17
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -6,6 +6,7 @@ from torch import nn, optim
6
6
  from torch.utils.data import DataLoader, TensorDataset
7
7
 
8
8
  from deeplotx.encoder.long_text_encoder import LongTextEncoder
9
+ from deeplotx.nn.attention import DEFAULT_THETA
9
10
  from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
10
11
  from deeplotx.trainer.base_trainer import BaseTrainer
11
12
 
@@ -24,8 +25,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
24
25
  def train(self, positive_texts: list[str], negative_texts: list[str],
25
26
  num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
26
27
  train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
27
- alpha: float = 1e-4, rho: float = 0.2,
28
- hidden_dim: int = 256, recursive_layers: int = 2) -> LongContextRecursiveSequential:
28
+ alpha: float = 1e-4, rho: float = 0.2, encoder_layers: int = 4, attn_heads: int = 6,
29
+ recursive_layers: int = 2, recursive_hidden_dim: int = 256, **kwargs) -> LongContextRecursiveSequential:
29
30
  if balancing_dataset:
30
31
  min_length = min(len(positive_texts), len(negative_texts))
31
32
  positive_texts = positive_texts[:min_length]
@@ -44,15 +45,30 @@ class TextBinaryClassifierTrainer(BaseTrainer):
44
45
  valid_dataset = TensorDataset(inputs[train_size:], labels[train_size:])
45
46
  self.train_dataset_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
46
47
  self.valid_dataset_loader = DataLoader(valid_dataset, batch_size=self._batch_size, shuffle=True)
47
-
48
- if self.model is not None and self.model.fc1.in_features != feature_dim:
48
+ if self.model is not None and self.model.in_features != feature_dim:
49
49
  logger.warning("The dimension of features doesn't match. A new model instance will be created.")
50
50
  self.model = None
51
51
  if self.model is None:
52
- self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
53
- hidden_dim=hidden_dim,
54
- recursive_layers=recursive_layers,
55
- device=self.device, dtype=dtype)
52
+ ffn_layers = kwargs.get('ffn_layers', 5)
53
+ ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
54
+ bias = kwargs.get('bias', True)
55
+ dropout_rate = kwargs.get('dropout_rate', 0.1)
56
+ encoder_ffn_layers = kwargs.get('encoder_ffn_layers', ffn_layers)
57
+ encoder_expansion_factor = kwargs.get('encoder_expansion_factor', ffn_expansion_factor)
58
+ encoder_dropout_rate = kwargs.get('encoder_dropout_rate', dropout_rate)
59
+ attn_ffn_layers = kwargs.get('attn_ffn_layers', 1)
60
+ attn_expansion_factor = kwargs.get('attn_expansion_factor', ffn_expansion_factor)
61
+ attn_dropout_rate = kwargs.get('attn_dropout_rate', dropout_rate)
62
+ theta = kwargs.get('theta', DEFAULT_THETA)
63
+ self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1, bias=bias,
64
+ encoder_layers=encoder_layers, attn_heads=attn_heads,
65
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
66
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor, dropout_rate=dropout_rate,
67
+ encoder_ffn_layers=encoder_ffn_layers, encoder_expansion_factor=encoder_expansion_factor,
68
+ encoder_dropout_rate=encoder_dropout_rate, attn_ffn_layers=attn_ffn_layers,
69
+ attn_expansion_factor=attn_expansion_factor, attn_dropout_rate=attn_dropout_rate,
70
+ theta=theta).initialize_weights()
71
+ logger.debug(f'Training Model: \n{self.model}')
56
72
  loss_function = nn.BCELoss()
57
73
  optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
58
74
  for epoch in range(num_epochs):
deeplotx/util/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- from .hash import md5, sha1
1
+ from .hash import md5, sha1, sha256, sha512
2
2
  from .read_file import read_file, get_files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.5.6
3
+ Version: 0.8.0
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
13
13
  Requires-Dist: torch
14
14
  Requires-Dist: transformers
15
15
  Requires-Dist: typing-extensions
16
- Requires-Dist: vortezwohl>=0.0.6
16
+ Requires-Dist: vortezwohl>=0.0.8
17
17
  Dynamic: license-file
18
18
 
19
19
  [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/vortezwohl/DeepLoTX)
@@ -163,6 +163,8 @@ Dynamic: license-file
163
163
 
164
164
  ```python
165
165
  from deeplotx import (
166
+ BaseNeuralNetwork, # 深度神经网络基类
167
+ FeedForward, # 前馈神经网络
166
168
  LinearRegression, # 线性回归
167
169
  LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
168
170
  SoftmaxRegression, # Softmax 回归 / 多分类
@@ -181,38 +183,54 @@ Dynamic: license-file
181
183
 
182
184
  import torch
183
185
  from torch import nn
184
-
186
+
185
187
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
186
-
187
-
188
- class LinearRegression(BaseNeuralNetwork):
189
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
190
- device: str | None = None, dtype: torch.dtype | None = None):
191
- super().__init__(model_name=model_name, device=device, dtype=dtype)
192
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
193
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
194
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
195
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
196
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
197
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
198
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
199
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
200
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
201
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
202
-
188
+
189
+
190
+ class FeedForwardUnit(BaseNeuralNetwork):
191
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
192
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
193
+ device: str | None = None, dtype: torch.dtype | None = None):
194
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
195
+ self._dropout_rate = dropout_rate
196
+ self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
197
+ device=self.device, dtype=self.dtype)
198
+ self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
199
+ device=self.device, dtype=self.dtype)
200
+ self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
201
+ device=self.device, dtype=self.dtype)
202
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
203
+ device=self.device, dtype=self.dtype)
204
+
203
205
  @override
204
- def forward(self, x) -> torch.Tensor:
206
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
205
207
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
206
- fc1_out = self.parametric_relu_1(self.fc1(x))
207
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
208
- x = torch.dropout(x, p=0.2, train=self.training)
209
- x = self.parametric_relu_2(self.fc2(x))
210
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
211
- x = torch.dropout(x, p=0.2, train=self.training)
212
- x = self.parametric_relu_3(self.fc3(x))
213
- x = torch.dropout(x, p=0.2, train=self.training)
214
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
215
- x = self.fc5(x)
208
+ residual = x
209
+ x = self.layer_norm(x)
210
+ x = self.fc1(x)
211
+ x = self.parametric_relu_1(x)
212
+ if self._dropout_rate > .0:
213
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
214
+ return self.fc2(x) + residual
215
+
216
+
217
+ class FeedForward(BaseNeuralNetwork):
218
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
219
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
220
+ device: str | None = None, dtype: torch.dtype | None = None):
221
+ if num_layers < 1:
222
+ raise ValueError('num_layers cannot be less than 1.')
223
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
224
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
225
+ expansion_factor=expansion_factor, bias=bias,
226
+ dropout_rate=dropout_rate,
227
+ device=self.device, dtype=self.dtype)] * num_layers)
228
+
229
+ @override
230
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
231
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
232
+ for ffn in self.ffn_layers:
233
+ x = ffn(x)
216
234
  return x
217
235
  ```
218
236
 
@@ -222,29 +240,34 @@ Dynamic: license-file
222
240
  from typing_extensions import override
223
241
 
224
242
  import torch
225
- from torch import nn, softmax
226
243
 
227
244
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
245
+ from deeplotx.nn.feed_forward import FeedForward
228
246
 
229
247
 
230
248
  class SelfAttention(BaseNeuralNetwork):
231
- def __init__(self, feature_dim: int, model_name: str | None = None,
232
- device: str | None = None, dtype: torch.dtype | None = None):
233
- super().__init__(model_name=model_name, device=device, dtype=dtype)
249
+ def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
250
+ proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
251
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
252
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
253
+ device=device, dtype=dtype)
234
254
  self._feature_dim = feature_dim
235
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
236
- bias=True, device=self.device, dtype=self.dtype)
237
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
238
- bias=True, device=self.device, dtype=self.dtype)
239
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
240
- bias=True, device=self.device, dtype=self.dtype)
255
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
256
+ expansion_factor=proj_expansion_factor,
257
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
258
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
259
+ expansion_factor=proj_expansion_factor,
260
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
261
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
262
+ expansion_factor=proj_expansion_factor,
263
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
241
264
 
242
265
  def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
243
266
  q, k = self.q_proj(x), self.k_proj(x)
244
267
  attn = torch.matmul(q, k.transpose(-2, -1))
245
268
  attn = attn / (self._feature_dim ** 0.5)
246
269
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
247
- return softmax(attn, dim=-1)
270
+ return torch.softmax(attn, dim=-1)
248
271
 
249
272
  @override
250
273
  def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -0,0 +1,34 @@
1
+ deeplotx/__init__.py,sha256=oNeA-vNu5YGiEQg0IcpKEdGh_Y_2uPvo2nqaNL_Zgv8,1159
2
+ deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
3
+ deeplotx/encoder/encoder.py,sha256=oSBdA-MiwMKNfTFJWR-RdvNS0G0qfX-Qchwy4LuwB00,3985
4
+ deeplotx/encoder/long_text_encoder.py,sha256=PFR6jjGyg1N58TQlKsPaNQEd-EDl13Hyhu7A1KtGBbA,3743
5
+ deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
6
+ deeplotx/nn/__init__.py,sha256=01I_yqx9GTa4wy3uNyAqhtxp66tDqxgMLC4Ky5Vnkrg,651
7
+ deeplotx/nn/attention.py,sha256=HZ4nfFtkk7AnJ9nuoDSK6zIlIhZ_hbpZc3o6JQIBqJ8,2861
8
+ deeplotx/nn/auto_regression.py,sha256=uISx29t_zkDGS8s2wvGB6wOGYZitQ4hQ7wyoQl4lcqY,857
9
+ deeplotx/nn/base_neural_network.py,sha256=FjQEDFH810fJS7JV3aLgJZnaMqC6DH--wlBvuj-ghTc,5900
10
+ deeplotx/nn/feed_forward.py,sha256=4ozj7EDalO9pb6JUhZtsJqE0r8bIHFApHRt2zTrl4ho,2931
11
+ deeplotx/nn/linear_regression.py,sha256=QybSRfsf9PpgJAWixvrSNn3OYRKJXpSZMfqdzpw-Kd8,1280
12
+ deeplotx/nn/logistic_regression.py,sha256=WfgHVNGIvAYsX2iea2wRlLgfbubYWyZkBLYpnpwOiyU,937
13
+ deeplotx/nn/long_context_auto_regression.py,sha256=uy0k_g8wEfMH5nd5HCfrHA8dgEsuWBA2x8U-g3h4vQc,1054
14
+ deeplotx/nn/long_context_recursive_sequential.py,sha256=i7kUml9RV_mkLRJ114UHsj9Gxw7LzJVQ4z8-REHa8-w,2682
15
+ deeplotx/nn/multi_head_attention.py,sha256=3z73uGbvy3jszRy1B9nxGOJjlttHpcpRF8Qd09OEams,2267
16
+ deeplotx/nn/recursive_sequential.py,sha256=8Z8vT70xTygusL-3w3QlB_B_k0xQSUU2ZTgC1LhEmzQ,2805
17
+ deeplotx/nn/roformer_encoder.py,sha256=UJjKniNdMd0rfoYQcsX6bPo6Ceq_Z6EhwHe2kgqWC_k,2426
18
+ deeplotx/nn/rope.py,sha256=r3hfENCxJv-td55L0CBfF8MkhEPd9V1vU_U6pDfCfr0,1754
19
+ deeplotx/nn/softmax_regression.py,sha256=PN_1Zr_B_z5zYC_s_8k6c5fllOtxfJEvVvCmC9GRmx0,958
20
+ deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
21
+ deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
22
+ deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
23
+ deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
24
+ deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
25
+ deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
26
+ deeplotx/trainer/text_binary_classification_trainer.py,sha256=QMLR4cC8NCUP-v7SOYVtCykNwahENmWHv9adaeTbYmA,6528
27
+ deeplotx/util/__init__.py,sha256=5CH4MTeSgsmCe3LPMfvKoSBpwh6jDSBuHVElJvzQzgs,90
28
+ deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
29
+ deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
30
+ deeplotx-0.8.0.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
31
+ deeplotx-0.8.0.dist-info/METADATA,sha256=KprDhH6R0zsqk6tPUoC9FpWeljaaJTaTsYm2Au0qQwY,12251
32
+ deeplotx-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ deeplotx-0.8.0.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
34
+ deeplotx-0.8.0.dist-info/RECORD,,
@@ -1,34 +0,0 @@
1
- from typing_extensions import override
2
-
3
- import torch
4
- from torch import nn, softmax
5
-
6
- from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
-
8
-
9
- class SelfAttention(BaseNeuralNetwork):
10
- def __init__(self, feature_dim: int, model_name: str | None = None,
11
- device: str | None = None, dtype: torch.dtype | None = None):
12
- super().__init__(model_name=model_name, device=device, dtype=dtype)
13
- self._feature_dim = feature_dim
14
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
15
- bias=True, device=self.device, dtype=self.dtype)
16
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
17
- bias=True, device=self.device, dtype=self.dtype)
18
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
19
- bias=True, device=self.device, dtype=self.dtype)
20
-
21
- def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
22
- q, k = self.q_proj(x), self.k_proj(x)
23
- attn = torch.matmul(q, k.transpose(-2, -1))
24
- attn = attn / (self._feature_dim ** 0.5)
25
- attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
26
- return softmax(attn, dim=-1)
27
-
28
- @override
29
- def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
30
- x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
31
- if mask is not None:
32
- mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
33
- v = self.v_proj(x)
34
- return torch.matmul(self._attention(x, mask), v)
@@ -1,30 +0,0 @@
1
- deeplotx/__init__.py,sha256=6El66QXHDrgNMsNIG9bG97WO8BhPK5btXbTikzx2ce4,1087
2
- deeplotx/encoder/__init__.py,sha256=BrsF5_4O-4pfihYF2wjExDOoAY-03kGJTH-Mhez4tsE,129
3
- deeplotx/encoder/encoder.py,sha256=2e1ZnZ37PkFQ5BePndmq42xmHp8YZh65Q1bd0dxejPI,2417
4
- deeplotx/encoder/long_text_encoder.py,sha256=4445FdVwubvDiebCWoT9wAUpYlMj6Mmd0OBxbFZ3ZIo,3565
5
- deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
6
- deeplotx/nn/__init__.py,sha256=CS0UwyYKa8wI6vu6FBIYxvm-HAmw39MTMFlZDtqi6UA,444
7
- deeplotx/nn/auto_regression.py,sha256=7P63opWCWMqE2DigwbsL6kfXtFtJPz00Yo1RqflBz4A,572
8
- deeplotx/nn/base_neural_network.py,sha256=o9s0NqxkDcFZdipX8UrlbBmwYHOg7wPmzbjBEeGw63s,2902
9
- deeplotx/nn/linear_regression.py,sha256=7TbbplBgY70b1l5lKvTJMzDWQ8khQfnRCyMjObhVdEc,2180
10
- deeplotx/nn/logistic_regression.py,sha256=YiSLAon8gLDtMXAkPQ210sauod24eyJYYH50fPhj6T8,667
11
- deeplotx/nn/long_context_auto_regression.py,sha256=Z67Enq1kc1bERIrQW4jHeDQQmisOXhhjrtaPklnHkyw,605
12
- deeplotx/nn/long_context_recursive_sequential.py,sha256=_fKpPA7wt6B0kPyyig4xuhmLxygK19FSLgxW1Xa453M,1487
13
- deeplotx/nn/recursive_sequential.py,sha256=8YHZ-IdLyMJN5QVWPMuizDxLodAE9Bgdg1_YtIxFw7o,2247
14
- deeplotx/nn/self_attention.py,sha256=fb34wXnfgAGYJEhqa1l9AxMa-AHcCTOLbUlAfaGIK7Q,1766
15
- deeplotx/nn/softmax_regression.py,sha256=BeVk0G2H3zKG6bsQgPRNWuTxnnNmVI2zFZtCHgARAAc,688
16
- deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
17
- deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
18
- deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
19
- deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
20
- deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
21
- deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
22
- deeplotx/trainer/text_binary_classification_trainer.py,sha256=umuvikc09Op4SB43EqmYo8W3ung8DBjEOrMG3hCVFz8,4915
23
- deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
24
- deeplotx/util/hash.py,sha256=qbNU3RLBWGQYFVte9WZBAkZ1BkdjCXiKLDaKPN54KFk,662
25
- deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
26
- deeplotx-0.5.6.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
27
- deeplotx-0.5.6.dist-info/METADATA,sha256=vBUVgshgGG_vZmJT07C7CPEhMfBUmwbCtsIY06D_14g,10925
28
- deeplotx-0.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- deeplotx-0.5.6.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
30
- deeplotx-0.5.6.dist-info/RECORD,,