deeplotx 0.4.15__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deeplotx/__init__.py CHANGED
@@ -9,7 +9,10 @@ from .nn import (
9
9
  LogisticRegression,
10
10
  SoftmaxRegression,
11
11
  RecursiveSequential,
12
- AutoRegression
12
+ LongContextRecursiveSequential,
13
+ SelfAttention,
14
+ AutoRegression,
15
+ LongContextAutoRegression
13
16
  )
14
17
  from .trainer import TextBinaryClassifierTrainer
15
18
 
deeplotx/nn/__init__.py CHANGED
@@ -2,4 +2,7 @@ from .linear_regression import LinearRegression
2
2
  from .logistic_regression import LogisticRegression
3
3
  from .softmax_regression import SoftmaxRegression
4
4
  from .recursive_sequential import RecursiveSequential
5
+ from .long_context_recursive_sequential import LongContextRecursiveSequential
6
+ from .self_attention import SelfAttention
5
7
  from .auto_regression import AutoRegression
8
+ from .long_context_auto_regression import LongContextAutoRegression
@@ -1,8 +1,11 @@
1
+ import os
1
2
  from abc import abstractmethod
2
3
 
3
4
  import torch
4
5
  from torch import nn
5
6
 
7
+ DEFAULT_SUFFIX = 'dlx'
8
+
6
9
 
7
10
  class BaseNeuralNetwork(nn.Module):
8
11
  def __init__(self, model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
@@ -44,7 +47,7 @@ class BaseNeuralNetwork(nn.Module):
44
47
  @abstractmethod
45
48
  def forward(self, *args, **kwargs) -> torch.Tensor: ...
46
49
 
47
- def predict(self, x) -> torch.Tensor:
50
+ def predict(self, x: torch.Tensor) -> torch.Tensor:
48
51
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
49
52
  __train = self.training
50
53
  self.training = False
@@ -53,10 +56,13 @@ class BaseNeuralNetwork(nn.Module):
53
56
  self.training = __train
54
57
  return res
55
58
 
56
- def save(self):
57
- torch.save(self.state_dict(), f'{self._model_name}.deeplotx')
59
+ def save(self, model_name: str | None = None, model_dir: str = '.', _suffix: str = DEFAULT_SUFFIX):
60
+ os.makedirs(model_dir, exist_ok=True)
61
+ model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
62
+ torch.save(self.state_dict(), os.path.join(model_dir, model_file_name))
58
63
  return self
59
64
 
60
- def load(self):
61
- self.load_state_dict(torch.load(f'{self._model_name}.deeplotx', map_location=self.device, weights_only=True))
65
+ def load(self, model_name: str | None = None, model_dir: str = '.', _suffix: str = DEFAULT_SUFFIX):
66
+ model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
67
+ self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
62
68
  return self
@@ -22,7 +22,7 @@ class LinearRegression(BaseNeuralNetwork):
22
22
  self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
23
23
 
24
24
  @override
25
- def forward(self, x) -> torch.Tensor:
25
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
26
26
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
27
27
  fc1_out = self.parametric_relu_1(self.fc1(x))
28
28
  x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
@@ -11,6 +11,6 @@ class LogisticRegression(LinearRegression):
11
11
  super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
12
12
 
13
13
  @override
14
- def forward(self, x) -> torch.Tensor:
14
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
15
15
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
16
16
  return torch.sigmoid(super().forward(x))
@@ -0,0 +1,12 @@
1
+ import torch
2
+
3
+ from deeplotx.nn import LongContextRecursiveSequential
4
+
5
+
6
+ class LongContextAutoRegression(LongContextRecursiveSequential):
7
+ def __init__(self, feature_dim: int, hidden_dim: int | None = None,
8
+ recursive_layers: int = 2, model_name: str | None = None,
9
+ device: str | None = None, dtype: torch.dtype | None = None):
10
+ super().__init__(input_dim=feature_dim, output_dim=feature_dim,
11
+ hidden_dim=hidden_dim, recursive_layers=recursive_layers,
12
+ model_name=model_name, device=device, dtype=dtype)
@@ -0,0 +1,28 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.recursive_sequential import RecursiveSequential
7
+ from deeplotx.nn.self_attention import SelfAttention
8
+
9
+
10
+ class LongContextRecursiveSequential(RecursiveSequential):
11
+ def __init__(self, input_dim: int, output_dim: int,
12
+ hidden_dim: int | None = None, recursive_layers: int = 2,
13
+ model_name: str | None = None, device: str | None = None,
14
+ dtype: torch.dtype | None = None):
15
+ super().__init__(input_dim=input_dim, output_dim=output_dim,
16
+ hidden_dim=hidden_dim, recursive_layers=recursive_layers,
17
+ model_name=model_name, device=device, dtype=dtype)
18
+ self._feature_dim = input_dim
19
+ self.self_attention = SelfAttention(feature_dim=input_dim)
20
+ self.proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
21
+ bias=True, device=self.device, dtype=self.dtype)
22
+
23
+ @override
24
+ def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
25
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
26
+ x = torch.cat([self.self_attention(x), x], dim=-1)
27
+ x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
28
+ return super().forward(self.proj(x), state)
@@ -27,7 +27,7 @@ class RecursiveSequential(BaseNeuralNetwork):
27
27
  return zeros, zeros
28
28
 
29
29
  @override
30
- def forward(self, x, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
30
+ def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
31
31
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
32
32
  state = (self.ensure_device_and_dtype(state[0], device=self.device, dtype=self.dtype),
33
33
  self.ensure_device_and_dtype(state[1], device=self.device, dtype=self.dtype))
@@ -36,7 +36,7 @@ class RecursiveSequential(BaseNeuralNetwork):
36
36
  return x, (hidden_state, cell_state)
37
37
 
38
38
  @override
39
- def predict(self, x) -> torch.Tensor:
39
+ def predict(self, x: torch.Tensor) -> torch.Tensor:
40
40
  __train = self.training
41
41
  self.training = False
42
42
  with torch.no_grad():
@@ -0,0 +1,34 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn, softmax
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+
8
+
9
+ class SelfAttention(BaseNeuralNetwork):
10
+ def __init__(self, feature_dim: int, model_name: str | None = None,
11
+ device: str | None = None, dtype: torch.dtype | None = None):
12
+ super().__init__(model_name=model_name, device=device, dtype=dtype)
13
+ self._feature_dim = feature_dim
14
+ self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
15
+ bias=True, device=self.device, dtype=self.dtype)
16
+ self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
17
+ bias=True, device=self.device, dtype=self.dtype)
18
+ self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
19
+ bias=True, device=self.device, dtype=self.dtype)
20
+
21
+ def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
22
+ q, k = self.q_proj(x), self.k_proj(x)
23
+ attn = torch.matmul(q, k.transpose(-2, -1))
24
+ attn = attn / (self._feature_dim ** 0.5)
25
+ attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
26
+ return softmax(attn, dim=-1)
27
+
28
+ @override
29
+ def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
30
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
31
+ if mask is not None:
32
+ mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
33
+ v = self.v_proj(x)
34
+ return torch.matmul(self._attention(x, mask), v)
@@ -11,6 +11,6 @@ class SoftmaxRegression(LinearRegression):
11
11
  super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
12
12
 
13
13
  @override
14
- def forward(self, x) -> torch.Tensor:
14
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
15
15
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
16
16
  return torch.softmax(super().forward(x), dim=-1, dtype=self.dtype)
@@ -6,7 +6,7 @@ from torch import nn, optim
6
6
  from torch.utils.data import DataLoader, TensorDataset
7
7
 
8
8
  from deeplotx.encoder.long_text_encoder import LongTextEncoder
9
- from deeplotx.nn.recursive_sequential import RecursiveSequential
9
+ from deeplotx.nn.long_context_recursive_sequential import LongContextRecursiveSequential
10
10
  from deeplotx.trainer.base_trainer import BaseTrainer
11
11
 
12
12
  logger = logging.getLogger('deeplotx.trainer')
@@ -23,7 +23,7 @@ class TextBinaryClassifierTrainer(BaseTrainer):
23
23
  num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
24
24
  train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
25
25
  alpha: float = 1e-4, rho: float = 0.2,
26
- hidden_dim: int = 256, recursive_layers: int = 2) -> RecursiveSequential:
26
+ hidden_dim: int = 256, recursive_layers: int = 2) -> LongContextRecursiveSequential:
27
27
  if balancing_dataset:
28
28
  min_length = min(len(positive_texts), len(negative_texts))
29
29
  positive_texts = positive_texts[:min_length]
@@ -46,10 +46,10 @@ class TextBinaryClassifierTrainer(BaseTrainer):
46
46
  logger.warning("The dimension of features doesn't match. A new model instance will be created.")
47
47
  self.model = None
48
48
  if self.model is None:
49
- self.model = RecursiveSequential(input_dim=feature_dim, output_dim=1,
50
- hidden_dim=hidden_dim,
51
- recursive_layers=recursive_layers,
52
- device=self.device, dtype=dtype)
49
+ self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
50
+ hidden_dim=hidden_dim,
51
+ recursive_layers=recursive_layers,
52
+ device=self.device, dtype=dtype)
53
53
  loss_function = nn.BCELoss()
54
54
  optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
55
55
  for epoch in range(num_epochs):
@@ -76,7 +76,8 @@ class TextBinaryClassifierTrainer(BaseTrainer):
76
76
  f"Valid Loss: {total_valid_loss:.4f}")
77
77
  if total_valid_loss < valid_loss_threshold:
78
78
  break
79
- logger.debug(f"Epoch {epoch + 1}/{num_epochs} | Train Loss: {total_loss:.4f}")
79
+ else:
80
+ logger.debug(f"Epoch {epoch + 1}/{num_epochs} | Train Loss: {total_loss:.4f}")
80
81
  if total_loss < train_loss_threshold:
81
82
  break
82
83
  return self.model
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.4.15
3
+ Version: 0.5.1
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,30 @@
1
+ deeplotx/__init__.py,sha256=3rVjGSRdcxpxZzHIQohT8dheB5mVdeXIrBkfH2yorcQ,1091
2
+ deeplotx/encoder/__init__.py,sha256=EM-xrTsHoGaiiFpj-iFAxilMHXC_sQKWYrcq1qCnI3U,138
3
+ deeplotx/encoder/bert_encoder.py,sha256=uLqGcXH6AGY6CcjjbYbh09VWYqSpsg-y-jHYB6Fmp3w,2377
4
+ deeplotx/encoder/long_text_encoder.py,sha256=hl_O8kR9o1kcII9YfSx2rf_Pk0l_Rv7LNbsS9UsTU0c,3373
5
+ deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
6
+ deeplotx/nn/__init__.py,sha256=CS0UwyYKa8wI6vu6FBIYxvm-HAmw39MTMFlZDtqi6UA,444
7
+ deeplotx/nn/auto_regression.py,sha256=7P63opWCWMqE2DigwbsL6kfXtFtJPz00Yo1RqflBz4A,572
8
+ deeplotx/nn/base_neural_network.py,sha256=o9s0NqxkDcFZdipX8UrlbBmwYHOg7wPmzbjBEeGw63s,2902
9
+ deeplotx/nn/linear_regression.py,sha256=7TbbplBgY70b1l5lKvTJMzDWQ8khQfnRCyMjObhVdEc,2180
10
+ deeplotx/nn/logistic_regression.py,sha256=YiSLAon8gLDtMXAkPQ210sauod24eyJYYH50fPhj6T8,667
11
+ deeplotx/nn/long_context_auto_regression.py,sha256=Z67Enq1kc1bERIrQW4jHeDQQmisOXhhjrtaPklnHkyw,605
12
+ deeplotx/nn/long_context_recursive_sequential.py,sha256=_fKpPA7wt6B0kPyyig4xuhmLxygK19FSLgxW1Xa453M,1487
13
+ deeplotx/nn/recursive_sequential.py,sha256=8YHZ-IdLyMJN5QVWPMuizDxLodAE9Bgdg1_YtIxFw7o,2247
14
+ deeplotx/nn/self_attention.py,sha256=fb34wXnfgAGYJEhqa1l9AxMa-AHcCTOLbUlAfaGIK7Q,1766
15
+ deeplotx/nn/softmax_regression.py,sha256=BeVk0G2H3zKG6bsQgPRNWuTxnnNmVI2zFZtCHgARAAc,688
16
+ deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
17
+ deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
18
+ deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
19
+ deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
20
+ deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
21
+ deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
22
+ deeplotx/trainer/text_binary_classification_trainer.py,sha256=Ktdk4rCNHgTFdXVFmbTnvIlGIJi1gphGRkuRgL2bVOo,4793
23
+ deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
24
+ deeplotx/util/hash.py,sha256=wwsC6kOQvbpuvwKsNQOARd78_wePmW9i3oaUuXRUnpc,352
25
+ deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
26
+ deeplotx-0.5.1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
27
+ deeplotx-0.5.1.dist-info/METADATA,sha256=LatUJZ1YzKrlPMDNI2UiOqSf5h9mP57kf4f5ngnfa8Q,6954
28
+ deeplotx-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ deeplotx-0.5.1.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
30
+ deeplotx-0.5.1.dist-info/RECORD,,
@@ -1,27 +0,0 @@
1
- deeplotx/__init__.py,sha256=wMN_AI14V-0BPbQghYpvd2y7eUGfhr7jKTTuur-5Upg,1002
2
- deeplotx/encoder/__init__.py,sha256=EM-xrTsHoGaiiFpj-iFAxilMHXC_sQKWYrcq1qCnI3U,138
3
- deeplotx/encoder/bert_encoder.py,sha256=uLqGcXH6AGY6CcjjbYbh09VWYqSpsg-y-jHYB6Fmp3w,2377
4
- deeplotx/encoder/long_text_encoder.py,sha256=hl_O8kR9o1kcII9YfSx2rf_Pk0l_Rv7LNbsS9UsTU0c,3373
5
- deeplotx/encoder/longformer_encoder.py,sha256=A8FXqd4mdHxSn_o_R689XtpT73ISDT788EgMQRGLC2g,1822
6
- deeplotx/nn/__init__.py,sha256=oQ-vYXyuaGelfCOs2im_gZXAiiBlCCVXh1uw9yjvRMs,253
7
- deeplotx/nn/auto_regression.py,sha256=7P63opWCWMqE2DigwbsL6kfXtFtJPz00Yo1RqflBz4A,572
8
- deeplotx/nn/base_neural_network.py,sha256=oGlqY6ZZ3DGOXWQQ9nZ7ktJpfyIaUrSraGnFRbGD1jM,2384
9
- deeplotx/nn/linear_regression.py,sha256=_LQFrOKBbQxvuNzb_B8Mr6PAQJUg-pFeu3h7_jQz04o,2166
10
- deeplotx/nn/logistic_regression.py,sha256=j8QGe0e7In97RMOXApJRID85qf1rOUCOk3V368CBfqs,653
11
- deeplotx/nn/recursive_sequential.py,sha256=pHZChjzw9cuMQ0lmv42lxxVgxCU6D7owOgph0Irj-w4,2219
12
- deeplotx/nn/softmax_regression.py,sha256=SlhvHho-Oufp7adAjm1t1ygidu-FrnHQ9aleMXyS_s8,674
13
- deeplotx/similarity/__init__.py,sha256=s3u-KSgxjnMcWpIItKgXNltFMPQ7YY3CqsqHI-5F1c8,724
14
- deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
15
- deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
16
- deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
17
- deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
18
- deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
19
- deeplotx/trainer/text_binary_classification_trainer.py,sha256=Wq_pGO78zgdXxFeBjam4yp__-dTvsuwl4H81HSl_kjE,4691
20
- deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
21
- deeplotx/util/hash.py,sha256=wwsC6kOQvbpuvwKsNQOARd78_wePmW9i3oaUuXRUnpc,352
22
- deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
23
- deeplotx-0.4.15.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
24
- deeplotx-0.4.15.dist-info/METADATA,sha256=HB6VHdLgyuMclJYLykBMnbnLa7s-rwfHyhrgjNdoRFQ,6955
25
- deeplotx-0.4.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
- deeplotx-0.4.15.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
27
- deeplotx-0.4.15.dist-info/RECORD,,