deeplotx 0.5.6__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {deeplotx-0.5.6 → deeplotx-0.8.0}/PKG-INFO +65 -42
  2. {deeplotx-0.5.6 → deeplotx-0.8.0}/README.md +63 -40
  3. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/__init__.py +5 -1
  4. deeplotx-0.8.0/deeplotx/encoder/encoder.py +66 -0
  5. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/encoder/long_text_encoder.py +4 -2
  6. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/__init__.py +6 -1
  7. deeplotx-0.8.0/deeplotx/nn/attention.py +48 -0
  8. deeplotx-0.8.0/deeplotx/nn/auto_regression.py +14 -0
  9. deeplotx-0.8.0/deeplotx/nn/base_neural_network.py +140 -0
  10. deeplotx-0.8.0/deeplotx/nn/feed_forward.py +53 -0
  11. deeplotx-0.8.0/deeplotx/nn/linear_regression.py +25 -0
  12. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/logistic_regression.py +5 -2
  13. deeplotx-0.8.0/deeplotx/nn/long_context_auto_regression.py +16 -0
  14. deeplotx-0.8.0/deeplotx/nn/long_context_recursive_sequential.py +36 -0
  15. deeplotx-0.8.0/deeplotx/nn/multi_head_attention.py +34 -0
  16. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/recursive_sequential.py +19 -12
  17. deeplotx-0.8.0/deeplotx/nn/roformer_encoder.py +40 -0
  18. deeplotx-0.8.0/deeplotx/nn/rope.py +41 -0
  19. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/softmax_regression.py +5 -2
  20. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/trainer/text_binary_classification_trainer.py +24 -8
  21. deeplotx-0.8.0/deeplotx/util/__init__.py +2 -0
  22. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/PKG-INFO +65 -42
  23. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/SOURCES.txt +5 -1
  24. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/requires.txt +1 -1
  25. {deeplotx-0.5.6 → deeplotx-0.8.0}/pyproject.toml +2 -2
  26. deeplotx-0.5.6/deeplotx/encoder/encoder.py +0 -48
  27. deeplotx-0.5.6/deeplotx/nn/auto_regression.py +0 -12
  28. deeplotx-0.5.6/deeplotx/nn/base_neural_network.py +0 -68
  29. deeplotx-0.5.6/deeplotx/nn/linear_regression.py +0 -37
  30. deeplotx-0.5.6/deeplotx/nn/long_context_auto_regression.py +0 -12
  31. deeplotx-0.5.6/deeplotx/nn/long_context_recursive_sequential.py +0 -28
  32. deeplotx-0.5.6/deeplotx/nn/self_attention.py +0 -34
  33. deeplotx-0.5.6/deeplotx/util/__init__.py +0 -2
  34. {deeplotx-0.5.6 → deeplotx-0.8.0}/LICENSE +0 -0
  35. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/encoder/__init__.py +0 -0
  36. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/encoder/longformer_encoder.py +0 -0
  37. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/__init__.py +0 -0
  38. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/distribution.py +0 -0
  39. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/set.py +0 -0
  40. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/vector.py +0 -0
  41. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/trainer/__init__.py +0 -0
  42. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/trainer/base_trainer.py +0 -0
  43. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/util/hash.py +0 -0
  44. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/util/read_file.py +0 -0
  45. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/dependency_links.txt +0 -0
  46. {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/top_level.txt +0 -0
  47. {deeplotx-0.5.6 → deeplotx-0.8.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.5.6
3
+ Version: 0.8.0
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
13
13
  Requires-Dist: torch
14
14
  Requires-Dist: transformers
15
15
  Requires-Dist: typing-extensions
16
- Requires-Dist: vortezwohl>=0.0.6
16
+ Requires-Dist: vortezwohl>=0.0.8
17
17
  Dynamic: license-file
18
18
 
19
19
  [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/vortezwohl/DeepLoTX)
@@ -163,6 +163,8 @@ Dynamic: license-file
163
163
 
164
164
  ```python
165
165
  from deeplotx import (
166
+ BaseNeuralNetwork, # 深度神经网络基类
167
+ FeedForward, # 前馈神经网络
166
168
  LinearRegression, # 线性回归
167
169
  LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
168
170
  SoftmaxRegression, # Softmax 回归 / 多分类
@@ -181,38 +183,54 @@ Dynamic: license-file
181
183
 
182
184
  import torch
183
185
  from torch import nn
184
-
186
+
185
187
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
186
-
187
-
188
- class LinearRegression(BaseNeuralNetwork):
189
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
190
- device: str | None = None, dtype: torch.dtype | None = None):
191
- super().__init__(model_name=model_name, device=device, dtype=dtype)
192
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
193
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
194
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
195
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
196
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
197
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
198
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
199
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
200
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
201
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
202
-
188
+
189
+
190
+ class FeedForwardUnit(BaseNeuralNetwork):
191
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
192
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
193
+ device: str | None = None, dtype: torch.dtype | None = None):
194
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
195
+ self._dropout_rate = dropout_rate
196
+ self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
197
+ device=self.device, dtype=self.dtype)
198
+ self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
199
+ device=self.device, dtype=self.dtype)
200
+ self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
201
+ device=self.device, dtype=self.dtype)
202
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
203
+ device=self.device, dtype=self.dtype)
204
+
203
205
  @override
204
- def forward(self, x) -> torch.Tensor:
206
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
205
207
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
206
- fc1_out = self.parametric_relu_1(self.fc1(x))
207
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
208
- x = torch.dropout(x, p=0.2, train=self.training)
209
- x = self.parametric_relu_2(self.fc2(x))
210
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
211
- x = torch.dropout(x, p=0.2, train=self.training)
212
- x = self.parametric_relu_3(self.fc3(x))
213
- x = torch.dropout(x, p=0.2, train=self.training)
214
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
215
- x = self.fc5(x)
208
+ residual = x
209
+ x = self.layer_norm(x)
210
+ x = self.fc1(x)
211
+ x = self.parametric_relu_1(x)
212
+ if self._dropout_rate > .0:
213
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
214
+ return self.fc2(x) + residual
215
+
216
+
217
+ class FeedForward(BaseNeuralNetwork):
218
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
219
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
220
+ device: str | None = None, dtype: torch.dtype | None = None):
221
+ if num_layers < 1:
222
+ raise ValueError('num_layers cannot be less than 1.')
223
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
224
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
225
+ expansion_factor=expansion_factor, bias=bias,
226
+ dropout_rate=dropout_rate,
227
+ device=self.device, dtype=self.dtype)] * num_layers)
228
+
229
+ @override
230
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
231
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
232
+ for ffn in self.ffn_layers:
233
+ x = ffn(x)
216
234
  return x
217
235
  ```
218
236
 
@@ -222,29 +240,34 @@ Dynamic: license-file
222
240
  from typing_extensions import override
223
241
 
224
242
  import torch
225
- from torch import nn, softmax
226
243
 
227
244
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
245
+ from deeplotx.nn.feed_forward import FeedForward
228
246
 
229
247
 
230
248
  class SelfAttention(BaseNeuralNetwork):
231
- def __init__(self, feature_dim: int, model_name: str | None = None,
232
- device: str | None = None, dtype: torch.dtype | None = None):
233
- super().__init__(model_name=model_name, device=device, dtype=dtype)
249
+ def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
250
+ proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
251
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
252
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
253
+ device=device, dtype=dtype)
234
254
  self._feature_dim = feature_dim
235
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
236
- bias=True, device=self.device, dtype=self.dtype)
237
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
238
- bias=True, device=self.device, dtype=self.dtype)
239
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
240
- bias=True, device=self.device, dtype=self.dtype)
255
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
256
+ expansion_factor=proj_expansion_factor,
257
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
258
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
259
+ expansion_factor=proj_expansion_factor,
260
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
261
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
262
+ expansion_factor=proj_expansion_factor,
263
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
241
264
 
242
265
  def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
243
266
  q, k = self.q_proj(x), self.k_proj(x)
244
267
  attn = torch.matmul(q, k.transpose(-2, -1))
245
268
  attn = attn / (self._feature_dim ** 0.5)
246
269
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
247
- return softmax(attn, dim=-1)
270
+ return torch.softmax(attn, dim=-1)
248
271
 
249
272
  @override
250
273
  def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -145,6 +145,8 @@
145
145
 
146
146
  ```python
147
147
  from deeplotx import (
148
+ BaseNeuralNetwork, # 深度神经网络基类
149
+ FeedForward, # 前馈神经网络
148
150
  LinearRegression, # 线性回归
149
151
  LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
150
152
  SoftmaxRegression, # Softmax 回归 / 多分类
@@ -163,38 +165,54 @@
163
165
 
164
166
  import torch
165
167
  from torch import nn
166
-
168
+
167
169
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
168
-
169
-
170
- class LinearRegression(BaseNeuralNetwork):
171
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
172
- device: str | None = None, dtype: torch.dtype | None = None):
173
- super().__init__(model_name=model_name, device=device, dtype=dtype)
174
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
175
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
176
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
177
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
178
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
179
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
180
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
181
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
182
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
183
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
184
-
170
+
171
+
172
+ class FeedForwardUnit(BaseNeuralNetwork):
173
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
174
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
175
+ device: str | None = None, dtype: torch.dtype | None = None):
176
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
177
+ self._dropout_rate = dropout_rate
178
+ self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
179
+ device=self.device, dtype=self.dtype)
180
+ self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
181
+ device=self.device, dtype=self.dtype)
182
+ self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
183
+ device=self.device, dtype=self.dtype)
184
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
185
+ device=self.device, dtype=self.dtype)
186
+
185
187
  @override
186
- def forward(self, x) -> torch.Tensor:
188
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
187
189
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
188
- fc1_out = self.parametric_relu_1(self.fc1(x))
189
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
190
- x = torch.dropout(x, p=0.2, train=self.training)
191
- x = self.parametric_relu_2(self.fc2(x))
192
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
193
- x = torch.dropout(x, p=0.2, train=self.training)
194
- x = self.parametric_relu_3(self.fc3(x))
195
- x = torch.dropout(x, p=0.2, train=self.training)
196
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
197
- x = self.fc5(x)
190
+ residual = x
191
+ x = self.layer_norm(x)
192
+ x = self.fc1(x)
193
+ x = self.parametric_relu_1(x)
194
+ if self._dropout_rate > .0:
195
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
196
+ return self.fc2(x) + residual
197
+
198
+
199
+ class FeedForward(BaseNeuralNetwork):
200
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
201
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
202
+ device: str | None = None, dtype: torch.dtype | None = None):
203
+ if num_layers < 1:
204
+ raise ValueError('num_layers cannot be less than 1.')
205
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
206
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
207
+ expansion_factor=expansion_factor, bias=bias,
208
+ dropout_rate=dropout_rate,
209
+ device=self.device, dtype=self.dtype)] * num_layers)
210
+
211
+ @override
212
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
213
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
214
+ for ffn in self.ffn_layers:
215
+ x = ffn(x)
198
216
  return x
199
217
  ```
200
218
 
@@ -204,29 +222,34 @@
204
222
  from typing_extensions import override
205
223
 
206
224
  import torch
207
- from torch import nn, softmax
208
225
 
209
226
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
227
+ from deeplotx.nn.feed_forward import FeedForward
210
228
 
211
229
 
212
230
  class SelfAttention(BaseNeuralNetwork):
213
- def __init__(self, feature_dim: int, model_name: str | None = None,
214
- device: str | None = None, dtype: torch.dtype | None = None):
215
- super().__init__(model_name=model_name, device=device, dtype=dtype)
231
+ def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
232
+ proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
233
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
234
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
235
+ device=device, dtype=dtype)
216
236
  self._feature_dim = feature_dim
217
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
218
- bias=True, device=self.device, dtype=self.dtype)
219
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
220
- bias=True, device=self.device, dtype=self.dtype)
221
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
222
- bias=True, device=self.device, dtype=self.dtype)
237
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
238
+ expansion_factor=proj_expansion_factor,
239
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
240
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
241
+ expansion_factor=proj_expansion_factor,
242
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
243
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
244
+ expansion_factor=proj_expansion_factor,
245
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
223
246
 
224
247
  def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
225
248
  q, k = self.q_proj(x), self.k_proj(x)
226
249
  attn = torch.matmul(q, k.transpose(-2, -1))
227
250
  attn = attn / (self._feature_dim ** 0.5)
228
251
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
229
- return softmax(attn, dim=-1)
252
+ return torch.softmax(attn, dim=-1)
230
253
 
231
254
  @override
232
255
  def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -5,12 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
5
5
 
6
6
  from .encoder import Encoder, LongTextEncoder, LongformerEncoder
7
7
  from .nn import (
8
+ FeedForward,
8
9
  LinearRegression,
9
10
  LogisticRegression,
10
11
  SoftmaxRegression,
11
12
  RecursiveSequential,
12
13
  LongContextRecursiveSequential,
13
- SelfAttention,
14
+ RoPE,
15
+ Attention,
16
+ MultiHeadAttention,
17
+ RoFormerEncoder,
14
18
  AutoRegression,
15
19
  LongContextAutoRegression
16
20
  )
@@ -0,0 +1,66 @@
1
+ import logging
2
+ import os
3
+ import math
4
+ from requests.exceptions import ConnectTimeout, SSLError
5
+
6
+ import torch
7
+ from torch import nn
8
+ from transformers import AutoTokenizer, AutoModel
9
+
10
+ from deeplotx import __ROOT__
11
+
12
+ CACHE_PATH = os.path.join(__ROOT__, '.cache')
13
+ DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
14
+ logger = logging.getLogger('deeplotx.embedding')
15
+
16
+
17
+ class Encoder(nn.Module):
18
+ def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
19
+ super().__init__()
20
+ self.device = torch.device(device) if device is not None \
21
+ else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
22
+ try:
23
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
24
+ cache_dir=CACHE_PATH, _from_auto=True,
25
+ trust_remote_code=True)
26
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
27
+ cache_dir=CACHE_PATH, _from_auto=True,
28
+ trust_remote_code=True).to(self.device)
29
+ except ConnectTimeout:
30
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
31
+ cache_dir=CACHE_PATH, _from_auto=True,
32
+ trust_remote_code=True, local_files_only=True)
33
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
34
+ cache_dir=CACHE_PATH, _from_auto=True,
35
+ trust_remote_code=True, local_files_only=True).to(self.device)
36
+ except SSLError:
37
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
38
+ cache_dir=CACHE_PATH, _from_auto=True,
39
+ trust_remote_code=True, local_files_only=True)
40
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
41
+ cache_dir=CACHE_PATH, _from_auto=True,
42
+ trust_remote_code=True, local_files_only=True).to(self.device)
43
+ self.embed_dim = self.encoder.config.max_position_embeddings
44
+ logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
45
+
46
+ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, *args, **kwargs) -> torch.Tensor:
47
+ def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
48
+ return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
49
+
50
+ num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
51
+ chunks = chunk_results = []
52
+ for i in range(num_chunks):
53
+ start_idx = i * self.embed_dim
54
+ end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
55
+ chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
56
+ ori_mode = self.encoder.training
57
+ self.encoder.eval()
58
+ with torch.no_grad():
59
+ chunk_results = [_encoder(x) for x in chunks]
60
+ self.encoder.train(mode=ori_mode)
61
+ return torch.cat(chunk_results, dim=-1)
62
+
63
+ def encode(self, text: str) -> torch.Tensor:
64
+ _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
65
+ _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
66
+ return self.forward(_input_ids, _att_mask).squeeze()
@@ -15,12 +15,14 @@ logger = logging.getLogger('deeplotx.embedding')
15
15
  class LongTextEncoder(Encoder):
16
16
  def __init__(self, max_length: int, chunk_size: int = 448,
17
17
  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
18
- cache_capacity: int = 64, device: str | None = None):
18
+ cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
19
19
  super().__init__(model_name_or_path=model_name_or_path, device=device)
20
+ assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
20
21
  self._max_length = max_length
21
22
  self._chunk_size = chunk_size
22
23
  self._overlapping = overlapping
23
24
  self._cache = LRUCache(capacity=cache_capacity)
25
+ self._worker_group = ThreadPool(max_workers=max_workers)
24
26
 
25
27
  def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
26
28
  return idx, super().forward(x, attention_mask=mask)
@@ -63,7 +65,7 @@ class LongTextEncoder(Encoder):
63
65
  _tmp_right = (i + 1) * self._chunk_size + self._overlapping
64
66
  chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
65
67
  torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
66
- embeddings = list(ThreadPool(max_workers=min(num_chunks + 1, 8)).map(self.__chunk_embedding, chunks))
68
+ embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
67
69
  embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
68
70
  fin_embedding = [x[1] for x in embeddings]
69
71
  # write cache
@@ -1,8 +1,13 @@
1
+ from .base_neural_network import BaseNeuralNetwork
2
+ from .feed_forward import FeedForward
1
3
  from .linear_regression import LinearRegression
2
4
  from .logistic_regression import LogisticRegression
3
5
  from .softmax_regression import SoftmaxRegression
4
6
  from .recursive_sequential import RecursiveSequential
5
7
  from .long_context_recursive_sequential import LongContextRecursiveSequential
6
- from .self_attention import SelfAttention
8
+ from .rope import RoPE
9
+ from .attention import Attention
10
+ from .multi_head_attention import MultiHeadAttention
11
+ from .roformer_encoder import RoFormerEncoder
7
12
  from .auto_regression import AutoRegression
8
13
  from .long_context_auto_regression import LongContextAutoRegression
@@ -0,0 +1,48 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+
5
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
6
+ from deeplotx.nn.feed_forward import FeedForward
7
+ from deeplotx.nn.rope import RoPE, DEFAULT_THETA
8
+
9
+
10
+ class Attention(BaseNeuralNetwork):
11
+ def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
12
+ proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
13
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
14
+ **kwargs):
15
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
16
+ device=device, dtype=dtype)
17
+ self._positional = positional
18
+ self._feature_dim = feature_dim
19
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
20
+ expansion_factor=proj_expansion_factor,
21
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
22
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
23
+ expansion_factor=proj_expansion_factor,
24
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
25
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
26
+ expansion_factor=proj_expansion_factor,
27
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
28
+ if self._positional:
29
+ self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
30
+ device=self.device, dtype=self.dtype)
31
+
32
+ def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
33
+ q, k = self.q_proj(x), self.k_proj(y)
34
+ if self._positional:
35
+ q, k = self.rope(q), self.rope(k)
36
+ attn = torch.matmul(q, k.transpose(-2, -1))
37
+ attn = attn / (self._feature_dim ** 0.5)
38
+ attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
39
+ return torch.softmax(attn, dim=-1)
40
+
41
+ @override
42
+ def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
43
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
44
+ y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
45
+ if mask is not None:
46
+ mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
47
+ v = self.v_proj(y)
48
+ return torch.matmul(self._attention(x, y, mask), v)
@@ -0,0 +1,14 @@
1
+ import torch
2
+
3
+ from deeplotx.nn import RecursiveSequential
4
+
5
+
6
+ class AutoRegression(RecursiveSequential):
7
+ def __init__(self, feature_dim: int, bias: bool = True,
8
+ recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
9
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
10
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
11
+ super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
12
+ recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
13
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
14
+ dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
@@ -0,0 +1,140 @@
1
+ import os
2
+ from abc import abstractmethod
3
+
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import init
7
+
8
+ DEFAULT_SUFFIX = 'dlx'
9
+
10
+
11
+ class BaseNeuralNetwork(nn.Module):
12
+ def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
13
+ device: str | None = None, dtype: torch.dtype | None = None):
14
+ super().__init__()
15
+ self._model_name = model_name \
16
+ if model_name is not None \
17
+ else self.__class__.__name__
18
+ self.device = torch.device(device) if device is not None \
19
+ else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
+ self.dtype = dtype if dtype is not None else torch.float32
21
+ self._in_features = in_features
22
+ self._out_features = out_features
23
+
24
+ @property
25
+ def in_features(self) -> int:
26
+ return self._in_features
27
+
28
+ @property
29
+ def out_features(self) -> int:
30
+ return self._out_features
31
+
32
+ @staticmethod
33
+ def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
34
+ if x.device != device:
35
+ x = x.to(device)
36
+ if x.dtype != dtype:
37
+ x = x.to(dtype)
38
+ return x
39
+
40
+ def initialize_weights(self):
41
+ for m in self.modules():
42
+ match m.__class__:
43
+ case nn.Linear:
44
+ init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
45
+ if m.bias is not None:
46
+ init.constant_(m.bias, 0)
47
+ case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
48
+ init.constant_(m.weight, 1)
49
+ init.constant_(m.bias, 0)
50
+ case nn.LSTM | nn.GRU:
51
+ for name, param in m.named_parameters():
52
+ _tmp_name = name.lower()
53
+ if 'weight_ih' in _tmp_name:
54
+ init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
55
+ elif 'weight_hh' in _tmp_name:
56
+ init.orthogonal_(param)
57
+ elif 'bias' in _tmp_name:
58
+ init.constant_(param, 0)
59
+ case _:
60
+ pass
61
+ return self
62
+
63
+ def size(self) -> dict:
64
+ total_params = trainable_params = non_trainable_params = 0
65
+ for param in self.parameters():
66
+ params = param.numel()
67
+ total_params += params
68
+ if param.requires_grad:
69
+ trainable_params += params
70
+ else:
71
+ non_trainable_params += params
72
+ return {
73
+ 'total': total_params,
74
+ 'trainable': trainable_params,
75
+ 'non_trainable': non_trainable_params
76
+ }
77
+
78
+ def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
79
+ def _l1() -> torch.Tensor:
80
+ l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
81
+ for param in self.parameters():
82
+ l2_reg += (torch.abs(param)).sum()
83
+ return l2_reg
84
+ return _lambda * _l1()
85
+
86
+ def l2(self, _lambda: float = 1e-4) -> torch.Tensor:
87
+ def _l2() -> torch.Tensor:
88
+ l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
89
+ for param in self.parameters():
90
+ l2_reg += (torch.pow(param, exponent=2.)).sum()
91
+ return l2_reg
92
+ return _lambda * _l2() / 2.
93
+
94
+ def elastic_net(self, alpha: float = 1e-4, rho: float = 0.5) -> torch.Tensor:
95
+ return alpha * (rho * self.l1(_lambda=1.) + (1 - rho) * self.l2(_lambda=1.))
96
+
97
+ @abstractmethod
98
+ def forward(self, *args, **kwargs) -> torch.Tensor: ...
99
+
100
+ def predict(self, x: torch.Tensor) -> torch.Tensor:
101
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
102
+ __train = self.training
103
+ self.training = False
104
+ with torch.no_grad():
105
+ res = self.forward(x)
106
+ self.training = __train
107
+ return res
108
+
109
+ def save(self, model_name: str | None = None, model_dir: str = '.', _suffix: str = DEFAULT_SUFFIX):
110
+ os.makedirs(model_dir, exist_ok=True)
111
+ model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
112
+ torch.save(self.state_dict(), os.path.join(model_dir, model_file_name))
113
+ return self
114
+
115
+ def load(self, model_name: str | None = None, model_dir: str = '.', _suffix: str = DEFAULT_SUFFIX):
116
+ model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
117
+ self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
118
+ return self
119
+
120
+ def __str__(self):
121
+ formatted = super().__str__()
122
+ _line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
123
+ _splitter_1 = '=' * (_line_len + 10)
124
+ _splitter_2 = '-' * (_line_len + 10)
125
+ _size = self.size()
126
+ total_param = _size['total']
127
+ trainable_param = _size['trainable']
128
+ non_trainable_param = _size['non_trainable']
129
+ formatted = (f'{_splitter_1}\n'
130
+ f'Model_Name: {self._model_name}\n'
131
+ f'In_Features: {self.in_features}\n'
132
+ f'Out_Features: {self.out_features}\n'
133
+ f'Device: {self.device}\n'
134
+ f'Dtype: {self.dtype}\n'
135
+ f'Total_Parameters: {total_param}\n'
136
+ f'Trainable_Parameters: {trainable_param}\n'
137
+ f'NonTrainable_Parameters: {non_trainable_param}\n'
138
+ f'{_splitter_2}'
139
+ f'\n{formatted}\n{_splitter_1}')
140
+ return formatted