deeplotx 0.5.5__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {deeplotx-0.5.5 → deeplotx-0.6.1}/PKG-INFO +66 -42
  2. {deeplotx-0.5.5 → deeplotx-0.6.1}/README.md +65 -41
  3. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/__init__.py +2 -0
  4. deeplotx-0.6.1/deeplotx/encoder/encoder.py +66 -0
  5. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/encoder/long_text_encoder.py +7 -2
  6. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/__init__.py +2 -0
  7. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/auto_regression.py +4 -1
  8. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/base_neural_network.py +12 -1
  9. deeplotx-0.6.1/deeplotx/nn/feed_forward.py +53 -0
  10. deeplotx-0.6.1/deeplotx/nn/linear_regression.py +25 -0
  11. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/logistic_regression.py +5 -2
  12. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/long_context_auto_regression.py +4 -1
  13. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/long_context_recursive_sequential.py +12 -6
  14. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/recursive_sequential.py +12 -5
  15. deeplotx-0.6.1/deeplotx/nn/self_attention.py +39 -0
  16. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/softmax_regression.py +5 -2
  17. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/trainer/text_binary_classification_trainer.py +16 -4
  18. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/PKG-INFO +66 -42
  19. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/SOURCES.txt +1 -0
  20. {deeplotx-0.5.5 → deeplotx-0.6.1}/pyproject.toml +1 -1
  21. deeplotx-0.5.5/deeplotx/encoder/encoder.py +0 -48
  22. deeplotx-0.5.5/deeplotx/nn/linear_regression.py +0 -37
  23. deeplotx-0.5.5/deeplotx/nn/self_attention.py +0 -34
  24. {deeplotx-0.5.5 → deeplotx-0.6.1}/LICENSE +0 -0
  25. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/encoder/__init__.py +0 -0
  26. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/encoder/longformer_encoder.py +0 -0
  27. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/__init__.py +0 -0
  28. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/distribution.py +0 -0
  29. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/set.py +0 -0
  30. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/vector.py +0 -0
  31. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/trainer/__init__.py +0 -0
  32. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/trainer/base_trainer.py +0 -0
  33. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/util/__init__.py +0 -0
  34. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/util/hash.py +0 -0
  35. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/util/read_file.py +0 -0
  36. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/dependency_links.txt +0 -0
  37. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/requires.txt +0 -0
  38. {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/top_level.txt +0 -0
  39. {deeplotx-0.5.5 → deeplotx-0.6.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.5.5
3
+ Version: 0.6.1
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -163,6 +163,8 @@ Dynamic: license-file
163
163
 
164
164
  ```python
165
165
  from deeplotx import (
166
+ BaseNeuralNetwork, # 深度神经网络基类
167
+ FeedForward, # 前馈神经网络
166
168
  LinearRegression, # 线性回归
167
169
  LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
168
170
  SoftmaxRegression, # Softmax 回归 / 多分类
@@ -181,38 +183,54 @@ Dynamic: license-file
181
183
 
182
184
  import torch
183
185
  from torch import nn
184
-
186
+
185
187
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
186
-
187
-
188
- class LinearRegression(BaseNeuralNetwork):
189
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
190
- device: str | None = None, dtype: torch.dtype | None = None):
191
- super().__init__(model_name=model_name, device=device, dtype=dtype)
192
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
193
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
194
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
195
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
196
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
197
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
198
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
199
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
200
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
201
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
202
-
188
+
189
+
190
+ class FeedForwardUnit(BaseNeuralNetwork):
191
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
192
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
193
+ device: str | None = None, dtype: torch.dtype | None = None):
194
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
195
+ self._dropout_rate = dropout_rate
196
+ self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
197
+ device=self.device, dtype=self.dtype)
198
+ self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
199
+ device=self.device, dtype=self.dtype)
200
+ self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
201
+ device=self.device, dtype=self.dtype)
202
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
203
+ device=self.device, dtype=self.dtype)
204
+
203
205
  @override
204
- def forward(self, x) -> torch.Tensor:
206
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
205
207
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
206
- fc1_out = self.parametric_relu_1(self.fc1(x))
207
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
208
- x = torch.dropout(x, p=0.2, train=self.training)
209
- x = self.parametric_relu_2(self.fc2(x))
210
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
211
- x = torch.dropout(x, p=0.2, train=self.training)
212
- x = self.parametric_relu_3(self.fc3(x))
213
- x = torch.dropout(x, p=0.2, train=self.training)
214
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
215
- x = self.fc5(x)
208
+ residual = x
209
+ x = self.layer_norm(x)
210
+ x = self.fc1(x)
211
+ x = self.parametric_relu_1(x)
212
+ if self._dropout_rate > .0:
213
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
214
+ return self.fc2(x) + residual
215
+
216
+
217
+ class FeedForward(BaseNeuralNetwork):
218
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
219
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
220
+ device: str | None = None, dtype: torch.dtype | None = None):
221
+ if num_layers < 1:
222
+ raise ValueError('num_layers cannot be less than 1.')
223
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
224
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
225
+ expansion_factor=expansion_factor, bias=bias,
226
+ dropout_rate=dropout_rate,
227
+ device=self.device, dtype=self.dtype)] * num_layers)
228
+
229
+ @override
230
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
231
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
232
+ for ffn in self.ffn_layers:
233
+ x = ffn(x)
216
234
  return x
217
235
  ```
218
236
 
@@ -222,29 +240,34 @@ Dynamic: license-file
222
240
  from typing_extensions import override
223
241
 
224
242
  import torch
225
- from torch import nn, softmax
226
243
 
227
244
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
245
+ from deeplotx.nn.feed_forward import FeedForward
228
246
 
229
247
 
230
248
  class SelfAttention(BaseNeuralNetwork):
231
- def __init__(self, feature_dim: int, model_name: str | None = None,
232
- device: str | None = None, dtype: torch.dtype | None = None):
233
- super().__init__(model_name=model_name, device=device, dtype=dtype)
249
+ def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
250
+ proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
251
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
252
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
253
+ device=device, dtype=dtype)
234
254
  self._feature_dim = feature_dim
235
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
236
- bias=True, device=self.device, dtype=self.dtype)
237
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
238
- bias=True, device=self.device, dtype=self.dtype)
239
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
240
- bias=True, device=self.device, dtype=self.dtype)
255
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
256
+ expansion_factor=proj_expansion_factor,
257
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
258
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
259
+ expansion_factor=proj_expansion_factor,
260
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
261
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
262
+ expansion_factor=proj_expansion_factor,
263
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
241
264
 
242
265
  def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
243
266
  q, k = self.q_proj(x), self.k_proj(x)
244
267
  attn = torch.matmul(q, k.transpose(-2, -1))
245
268
  attn = attn / (self._feature_dim ** 0.5)
246
269
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
247
- return softmax(attn, dim=-1)
270
+ return torch.softmax(attn, dim=-1)
248
271
 
249
272
  @override
250
273
  def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -265,7 +288,8 @@ Dynamic: license-file
265
288
  long_text_encoder = LongTextEncoder(
266
289
  max_length=2048, # 最大文本大小, 超出截断
267
290
  chunk_size=448, # 块大小 (按 Token 计)
268
- overlapping=32 # 块间重叠大小 (按 Token 计)
291
+ overlapping=32, # 块间重叠大小 (按 Token 计)
292
+ cache_capacity=512 # 缓存大小
269
293
  )
270
294
 
271
295
  trainer = TextBinaryClassifierTrainer(
@@ -145,6 +145,8 @@
145
145
 
146
146
  ```python
147
147
  from deeplotx import (
148
+ BaseNeuralNetwork, # 深度神经网络基类
149
+ FeedForward, # 前馈神经网络
148
150
  LinearRegression, # 线性回归
149
151
  LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
150
152
  SoftmaxRegression, # Softmax 回归 / 多分类
@@ -163,38 +165,54 @@
163
165
 
164
166
  import torch
165
167
  from torch import nn
166
-
168
+
167
169
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
168
-
169
-
170
- class LinearRegression(BaseNeuralNetwork):
171
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
172
- device: str | None = None, dtype: torch.dtype | None = None):
173
- super().__init__(model_name=model_name, device=device, dtype=dtype)
174
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
175
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
176
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
177
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
178
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
179
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
180
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
181
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
182
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
183
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
184
-
170
+
171
+
172
+ class FeedForwardUnit(BaseNeuralNetwork):
173
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
174
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
175
+ device: str | None = None, dtype: torch.dtype | None = None):
176
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
177
+ self._dropout_rate = dropout_rate
178
+ self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
179
+ device=self.device, dtype=self.dtype)
180
+ self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
181
+ device=self.device, dtype=self.dtype)
182
+ self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
183
+ device=self.device, dtype=self.dtype)
184
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
185
+ device=self.device, dtype=self.dtype)
186
+
185
187
  @override
186
- def forward(self, x) -> torch.Tensor:
188
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
187
189
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
188
- fc1_out = self.parametric_relu_1(self.fc1(x))
189
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
190
- x = torch.dropout(x, p=0.2, train=self.training)
191
- x = self.parametric_relu_2(self.fc2(x))
192
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
193
- x = torch.dropout(x, p=0.2, train=self.training)
194
- x = self.parametric_relu_3(self.fc3(x))
195
- x = torch.dropout(x, p=0.2, train=self.training)
196
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
197
- x = self.fc5(x)
190
+ residual = x
191
+ x = self.layer_norm(x)
192
+ x = self.fc1(x)
193
+ x = self.parametric_relu_1(x)
194
+ if self._dropout_rate > .0:
195
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
196
+ return self.fc2(x) + residual
197
+
198
+
199
+ class FeedForward(BaseNeuralNetwork):
200
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
201
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
202
+ device: str | None = None, dtype: torch.dtype | None = None):
203
+ if num_layers < 1:
204
+ raise ValueError('num_layers cannot be less than 1.')
205
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
206
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
207
+ expansion_factor=expansion_factor, bias=bias,
208
+ dropout_rate=dropout_rate,
209
+ device=self.device, dtype=self.dtype)] * num_layers)
210
+
211
+ @override
212
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
213
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
214
+ for ffn in self.ffn_layers:
215
+ x = ffn(x)
198
216
  return x
199
217
  ```
200
218
 
@@ -204,29 +222,34 @@
204
222
  from typing_extensions import override
205
223
 
206
224
  import torch
207
- from torch import nn, softmax
208
225
 
209
226
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
227
+ from deeplotx.nn.feed_forward import FeedForward
210
228
 
211
229
 
212
230
  class SelfAttention(BaseNeuralNetwork):
213
- def __init__(self, feature_dim: int, model_name: str | None = None,
214
- device: str | None = None, dtype: torch.dtype | None = None):
215
- super().__init__(model_name=model_name, device=device, dtype=dtype)
231
+ def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
232
+ proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
233
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
234
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
235
+ device=device, dtype=dtype)
216
236
  self._feature_dim = feature_dim
217
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
218
- bias=True, device=self.device, dtype=self.dtype)
219
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
220
- bias=True, device=self.device, dtype=self.dtype)
221
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
222
- bias=True, device=self.device, dtype=self.dtype)
237
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
238
+ expansion_factor=proj_expansion_factor,
239
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
240
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
241
+ expansion_factor=proj_expansion_factor,
242
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
243
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
244
+ expansion_factor=proj_expansion_factor,
245
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
223
246
 
224
247
  def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
225
248
  q, k = self.q_proj(x), self.k_proj(x)
226
249
  attn = torch.matmul(q, k.transpose(-2, -1))
227
250
  attn = attn / (self._feature_dim ** 0.5)
228
251
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
229
- return softmax(attn, dim=-1)
252
+ return torch.softmax(attn, dim=-1)
230
253
 
231
254
  @override
232
255
  def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -247,7 +270,8 @@
247
270
  long_text_encoder = LongTextEncoder(
248
271
  max_length=2048, # 最大文本大小, 超出截断
249
272
  chunk_size=448, # 块大小 (按 Token 计)
250
- overlapping=32 # 块间重叠大小 (按 Token 计)
273
+ overlapping=32, # 块间重叠大小 (按 Token 计)
274
+ cache_capacity=512 # 缓存大小
251
275
  )
252
276
 
253
277
  trainer = TextBinaryClassifierTrainer(
@@ -5,6 +5,8 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
5
5
 
6
6
  from .encoder import Encoder, LongTextEncoder, LongformerEncoder
7
7
  from .nn import (
8
+ BaseNeuralNetwork,
9
+ FeedForward,
8
10
  LinearRegression,
9
11
  LogisticRegression,
10
12
  SoftmaxRegression,
@@ -0,0 +1,66 @@
1
+ import logging
2
+ import os
3
+ import math
4
+ from requests.exceptions import ConnectTimeout, SSLError
5
+
6
+ import torch
7
+ from torch import nn
8
+ from transformers import AutoTokenizer, AutoModel
9
+
10
+ from deeplotx import __ROOT__
11
+
12
+ CACHE_PATH = os.path.join(__ROOT__, '.cache')
13
+ DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
14
+ logger = logging.getLogger('deeplotx.embedding')
15
+
16
+
17
+ class Encoder(nn.Module):
18
+ def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
19
+ super().__init__()
20
+ self.device = torch.device(device) if device is not None \
21
+ else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
22
+ try:
23
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
24
+ cache_dir=CACHE_PATH, _from_auto=True,
25
+ trust_remote_code=True)
26
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
27
+ cache_dir=CACHE_PATH, _from_auto=True,
28
+ trust_remote_code=True).to(self.device)
29
+ except ConnectTimeout:
30
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
31
+ cache_dir=CACHE_PATH, _from_auto=True,
32
+ trust_remote_code=True, local_files_only=True)
33
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
34
+ cache_dir=CACHE_PATH, _from_auto=True,
35
+ trust_remote_code=True, local_files_only=True).to(self.device)
36
+ except SSLError:
37
+ self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
38
+ cache_dir=CACHE_PATH, _from_auto=True,
39
+ trust_remote_code=True, local_files_only=True)
40
+ self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
41
+ cache_dir=CACHE_PATH, _from_auto=True,
42
+ trust_remote_code=True, local_files_only=True).to(self.device)
43
+ self.embed_dim = self.encoder.config.max_position_embeddings
44
+ logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
45
+
46
+ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, *args, **kwargs) -> torch.Tensor:
47
+ def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
48
+ return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
49
+
50
+ num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
51
+ chunks = chunk_results = []
52
+ for i in range(num_chunks):
53
+ start_idx = i * self.embed_dim
54
+ end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
55
+ chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
56
+ ori_mode = self.encoder.training
57
+ self.encoder.eval()
58
+ with torch.no_grad():
59
+ chunk_results = [_encoder(x) for x in chunks]
60
+ self.encoder.train(mode=ori_mode)
61
+ return torch.cat(chunk_results, dim=-1)
62
+
63
+ def encode(self, text: str) -> torch.Tensor:
64
+ _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
65
+ _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
66
+ return self.forward(_input_ids, _att_mask).squeeze()
@@ -15,16 +15,21 @@ logger = logging.getLogger('deeplotx.embedding')
15
15
  class LongTextEncoder(Encoder):
16
16
  def __init__(self, max_length: int, chunk_size: int = 448,
17
17
  overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
18
- cache_capacity: int = 64, device: str | None = None):
18
+ cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
19
19
  super().__init__(model_name_or_path=model_name_or_path, device=device)
20
20
  self._max_length = max_length
21
21
  self._chunk_size = chunk_size
22
22
  self._overlapping = overlapping
23
23
  self._cache = LRUCache(capacity=cache_capacity)
24
+ self._worker_group = ThreadPool(max_workers=max_workers)
24
25
 
25
26
  def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
26
27
  return idx, super().forward(x, attention_mask=mask)
27
28
 
29
+ @override
30
+ def forward(self, text: str, flatten: bool = False, *args, **kwargs) -> torch.Tensor:
31
+ return self.encode(text=text, flatten=flatten)
32
+
28
33
  @override
29
34
  def encode(self, text: str, flatten: bool = False) -> torch.Tensor:
30
35
  def postprocess(tensors: list[torch.Tensor], _flatten: bool) -> torch.Tensor:
@@ -59,7 +64,7 @@ class LongTextEncoder(Encoder):
59
64
  _tmp_right = (i + 1) * self._chunk_size + self._overlapping
60
65
  chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
61
66
  torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
62
- embeddings = list(ThreadPool(max_workers=min(num_chunks + 1, 8)).map(self.__chunk_embedding, chunks))
67
+ embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
63
68
  embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
64
69
  fin_embedding = [x[1] for x in embeddings]
65
70
  # write cache
@@ -1,3 +1,5 @@
1
+ from .base_neural_network import BaseNeuralNetwork
2
+ from .feed_forward import FeedForward
1
3
  from .linear_regression import LinearRegression
2
4
  from .logistic_regression import LogisticRegression
3
5
  from .softmax_regression import SoftmaxRegression
@@ -5,8 +5,11 @@ from deeplotx.nn import RecursiveSequential
5
5
 
6
6
  class AutoRegression(RecursiveSequential):
7
7
  def __init__(self, feature_dim: int, hidden_dim: int | None = None,
8
- recursive_layers: int = 2, model_name: str | None = None,
8
+ recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
9
+ ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
9
10
  device: str | None = None, dtype: torch.dtype | None = None):
10
11
  super().__init__(input_dim=feature_dim, output_dim=feature_dim,
11
12
  hidden_dim=hidden_dim, recursive_layers=recursive_layers,
13
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
14
+ ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
12
15
  model_name=model_name, device=device, dtype=dtype)
@@ -8,7 +8,8 @@ DEFAULT_SUFFIX = 'dlx'
8
8
 
9
9
 
10
10
  class BaseNeuralNetwork(nn.Module):
11
- def __init__(self, model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
11
+ def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
12
+ device: str | None = None, dtype: torch.dtype | None = None):
12
13
  super().__init__()
13
14
  self._model_name = model_name \
14
15
  if model_name is not None \
@@ -16,6 +17,16 @@ class BaseNeuralNetwork(nn.Module):
16
17
  self.device = torch.device(device) if device is not None \
17
18
  else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
18
19
  self.dtype = dtype if dtype is not None else torch.float32
20
+ self._in_features = in_features
21
+ self._out_features = out_features
22
+
23
+ @property
24
+ def in_features(self) -> int:
25
+ return self._in_features
26
+
27
+ @property
28
+ def out_features(self) -> int:
29
+ return self._out_features
19
30
 
20
31
  @staticmethod
21
32
  def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
@@ -0,0 +1,53 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+
8
+
9
+ class FeedForwardUnit(BaseNeuralNetwork):
10
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
11
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
12
+ device: str | None = None, dtype: torch.dtype | None = None):
13
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
14
+ self._dropout_rate = dropout_rate
15
+ self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
16
+ device=self.device, dtype=self.dtype)
17
+ self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
18
+ device=self.device, dtype=self.dtype)
19
+ self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
20
+ device=self.device, dtype=self.dtype)
21
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
22
+ device=self.device, dtype=self.dtype)
23
+
24
+ @override
25
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
26
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
27
+ residual = x
28
+ x = self.layer_norm(x)
29
+ x = self.fc1(x)
30
+ x = self.parametric_relu_1(x)
31
+ if self._dropout_rate > .0:
32
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
33
+ return self.fc2(x) + residual
34
+
35
+
36
+ class FeedForward(BaseNeuralNetwork):
37
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
38
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
39
+ device: str | None = None, dtype: torch.dtype | None = None):
40
+ if num_layers < 1:
41
+ raise ValueError('num_layers cannot be less than 1.')
42
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
43
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
44
+ expansion_factor=expansion_factor, bias=bias,
45
+ dropout_rate=dropout_rate,
46
+ device=self.device, dtype=self.dtype)] * num_layers)
47
+
48
+ @override
49
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
50
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
51
+ for ffn in self.ffn_layers:
52
+ x = ffn(x)
53
+ return x
@@ -0,0 +1,25 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
+ from deeplotx.nn.feed_forward import FeedForward
8
+
9
+
10
+ class LinearRegression(BaseNeuralNetwork):
11
+ def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
12
+ expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
13
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
14
+ super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
15
+ self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
16
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
17
+ self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
18
+ bias=bias, device=self.device, dtype=self.dtype)
19
+
20
+ @override
21
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
22
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
23
+ residual = x
24
+ x = self.ffn(x) + residual
25
+ return self.proj(x)
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
6
6
 
7
7
 
8
8
  class LogisticRegression(LinearRegression):
9
- def __init__(self, input_dim: int, output_dim: int = 1, model_name: str | None = None,
9
+ def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
10
+ bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
10
11
  device: str | None = None, dtype: torch.dtype | None = None):
11
- super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
12
+ super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
13
+ expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
14
+ model_name=model_name, device=device, dtype=dtype)
12
15
 
13
16
  @override
14
17
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -5,8 +5,11 @@ from deeplotx.nn import LongContextRecursiveSequential
5
5
 
6
6
  class LongContextAutoRegression(LongContextRecursiveSequential):
7
7
  def __init__(self, feature_dim: int, hidden_dim: int | None = None,
8
- recursive_layers: int = 2, model_name: str | None = None,
8
+ recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
9
+ ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
9
10
  device: str | None = None, dtype: torch.dtype | None = None):
10
11
  super().__init__(input_dim=feature_dim, output_dim=feature_dim,
11
12
  hidden_dim=hidden_dim, recursive_layers=recursive_layers,
13
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
14
+ ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
12
15
  model_name=model_name, device=device, dtype=dtype)
@@ -10,19 +10,25 @@ from deeplotx.nn.self_attention import SelfAttention
10
10
  class LongContextRecursiveSequential(RecursiveSequential):
11
11
  def __init__(self, input_dim: int, output_dim: int,
12
12
  hidden_dim: int | None = None, recursive_layers: int = 2,
13
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
14
+ ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
13
15
  model_name: str | None = None, device: str | None = None,
14
- dtype: torch.dtype | None = None):
16
+ dtype: torch.dtype | None = None, **kwargs):
15
17
  super().__init__(input_dim=input_dim, output_dim=output_dim,
16
18
  hidden_dim=hidden_dim, recursive_layers=recursive_layers,
19
+ ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
20
+ ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
17
21
  model_name=model_name, device=device, dtype=dtype)
18
- self._feature_dim = input_dim
19
- self.self_attention = SelfAttention(feature_dim=input_dim)
20
- self.proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
21
- bias=True, device=self.device, dtype=self.dtype)
22
+ self.self_attention = SelfAttention(feature_dim=input_dim, bias=kwargs.get('attn_proj_bias', ffn_bias),
23
+ proj_layers=kwargs.get('attn_proj_layers', 1),
24
+ proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
25
+ dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate))
26
+ self.__proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
27
+ bias=ffn_bias, device=self.device, dtype=self.dtype)
22
28
 
23
29
  @override
24
30
  def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
25
31
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
26
32
  x = torch.cat([self.self_attention(x), x], dim=-1)
27
33
  x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
28
- return super().forward(self.proj(x), state)
34
+ return super().forward(self.__proj(x), state)
@@ -4,23 +4,27 @@ import torch
4
4
  from torch import nn
5
5
 
6
6
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
- from deeplotx.nn import LinearRegression
7
+ from deeplotx.nn.feed_forward import FeedForward
8
8
 
9
9
 
10
10
  class RecursiveSequential(BaseNeuralNetwork):
11
11
  def __init__(self, input_dim: int, output_dim: int,
12
12
  hidden_dim: int | None = None, recursive_layers: int = 2,
13
+ ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
14
+ ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
13
15
  model_name: str | None = None, device: str | None = None,
14
16
  dtype: torch.dtype | None = None):
15
- super().__init__(model_name=model_name, device=device, dtype=dtype)
17
+ super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
16
18
  if hidden_dim is None:
17
19
  hidden_dim = input_dim
18
20
  self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
19
21
  num_layers=recursive_layers, batch_first=True,
20
22
  bias=True, bidirectional=True, device=self.device,
21
23
  dtype=self.dtype)
22
- self.regressive_head = LinearRegression(input_dim=hidden_dim * 2, output_dim=output_dim,
23
- device=self.device, dtype=self.dtype)
24
+ self.ffn = FeedForward(feature_dim=hidden_dim * 2, num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
25
+ bias=ffn_bias, dropout_rate=ffn_dropout_rate, device=self.device, dtype=self.dtype)
26
+ self.__proj = nn.Linear(in_features=hidden_dim * 2, out_features=output_dim, bias=ffn_bias,
27
+ device=self.device, dtype=self.dtype)
24
28
 
25
29
  def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
26
30
  zeros = torch.zeros(self.lstm.num_layers * 2, batch_size, self.lstm.hidden_size, device=self.device, dtype=self.dtype)
@@ -32,7 +36,10 @@ class RecursiveSequential(BaseNeuralNetwork):
32
36
  state = (self.ensure_device_and_dtype(state[0], device=self.device, dtype=self.dtype),
33
37
  self.ensure_device_and_dtype(state[1], device=self.device, dtype=self.dtype))
34
38
  x, (hidden_state, cell_state) = self.lstm(x, state)
35
- x = self.regressive_head(x[:, -1, :])
39
+ x = x[:, -1, :]
40
+ residual = x
41
+ x = self.ffn(x) + residual
42
+ x = self.__proj(x)
36
43
  return x, (hidden_state, cell_state)
37
44
 
38
45
  @override
@@ -0,0 +1,39 @@
1
+ from typing_extensions import override
2
+
3
+ import torch
4
+
5
+ from deeplotx.nn.base_neural_network import BaseNeuralNetwork
6
+ from deeplotx.nn.feed_forward import FeedForward
7
+
8
+
9
+ class SelfAttention(BaseNeuralNetwork):
10
+ def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
11
+ proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
12
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
13
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
14
+ device=device, dtype=dtype)
15
+ self._feature_dim = feature_dim
16
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
17
+ expansion_factor=proj_expansion_factor,
18
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
19
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
20
+ expansion_factor=proj_expansion_factor,
21
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
22
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
23
+ expansion_factor=proj_expansion_factor,
24
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
25
+
26
+ def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
27
+ q, k = self.q_proj(x), self.k_proj(x)
28
+ attn = torch.matmul(q, k.transpose(-2, -1))
29
+ attn = attn / (self._feature_dim ** 0.5)
30
+ attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
31
+ return torch.softmax(attn, dim=-1)
32
+
33
+ @override
34
+ def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
35
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
36
+ if mask is not None:
37
+ mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
38
+ v = self.v_proj(x)
39
+ return torch.matmul(self._attention(x, mask), v)
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
6
6
 
7
7
 
8
8
  class SoftmaxRegression(LinearRegression):
9
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
9
+ def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
10
+ bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
10
11
  device: str | None = None, dtype: torch.dtype | None = None):
11
- super().__init__(input_dim=input_dim, output_dim=output_dim, model_name=model_name, device=device, dtype=dtype)
12
+ super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
13
+ expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
14
+ model_name=model_name, device=device, dtype=dtype)
12
15
 
13
16
  @override
14
17
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -25,13 +25,13 @@ class TextBinaryClassifierTrainer(BaseTrainer):
25
25
  num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
26
26
  train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
27
27
  alpha: float = 1e-4, rho: float = 0.2,
28
- hidden_dim: int = 256, recursive_layers: int = 2) -> LongContextRecursiveSequential:
28
+ hidden_dim: int = 256, recursive_layers: int = 2, **kwargs) -> LongContextRecursiveSequential:
29
29
  if balancing_dataset:
30
30
  min_length = min(len(positive_texts), len(negative_texts))
31
31
  positive_texts = positive_texts[:min_length]
32
32
  negative_texts = negative_texts[:min_length]
33
33
  all_texts = positive_texts + negative_texts
34
- text_embeddings = [self._long_text_encoder.encode(x, flatten=False, use_cache=True) for x in all_texts]
34
+ text_embeddings = [self._long_text_encoder.encode(x, flatten=False) for x in all_texts]
35
35
  feature_dim = text_embeddings[0].shape[-1]
36
36
  dtype = text_embeddings[0].dtype
37
37
  labels = ([torch.tensor([1.], dtype=dtype, device=self.device) for _ in range(len(positive_texts))]
@@ -44,15 +44,27 @@ class TextBinaryClassifierTrainer(BaseTrainer):
44
44
  valid_dataset = TensorDataset(inputs[train_size:], labels[train_size:])
45
45
  self.train_dataset_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
46
46
  self.valid_dataset_loader = DataLoader(valid_dataset, batch_size=self._batch_size, shuffle=True)
47
-
48
- if self.model is not None and self.model.fc1.in_features != feature_dim:
47
+ if self.model is not None and self.model.in_features != feature_dim:
49
48
  logger.warning("The dimension of features doesn't match. A new model instance will be created.")
50
49
  self.model = None
51
50
  if self.model is None:
51
+ ffn_layers = kwargs.get('ffn_layers', 5)
52
+ ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
53
+ ffn_bias = kwargs.get('ffn_bias', True)
54
+ ffn_dropout_rate = kwargs.get('ffn_dropout_rate', 0.1)
52
55
  self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
53
56
  hidden_dim=hidden_dim,
54
57
  recursive_layers=recursive_layers,
58
+ ffn_layers=ffn_layers,
59
+ ffn_expansion_factor=ffn_expansion_factor,
60
+ ffn_bias=ffn_bias,
61
+ ffn_dropout_rate=ffn_dropout_rate,
62
+ attn_proj_layers=kwargs.get('attn_proj_layers', ffn_layers),
63
+ attn_proj_bias=kwargs.get('attn_proj_bias', ffn_bias),
64
+ attn_proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
65
+ attn_proj_dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate),
55
66
  device=self.device, dtype=dtype)
67
+ logger.debug(f'Training Model: {self.model}')
56
68
  loss_function = nn.BCELoss()
57
69
  optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
58
70
  for epoch in range(num_epochs):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.5.5
3
+ Version: 0.6.1
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -163,6 +163,8 @@ Dynamic: license-file
163
163
 
164
164
  ```python
165
165
  from deeplotx import (
166
+ BaseNeuralNetwork, # 深度神经网络基类
167
+ FeedForward, # 前馈神经网络
166
168
  LinearRegression, # 线性回归
167
169
  LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
168
170
  SoftmaxRegression, # Softmax 回归 / 多分类
@@ -181,38 +183,54 @@ Dynamic: license-file
181
183
 
182
184
  import torch
183
185
  from torch import nn
184
-
186
+
185
187
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
186
-
187
-
188
- class LinearRegression(BaseNeuralNetwork):
189
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
190
- device: str | None = None, dtype: torch.dtype | None = None):
191
- super().__init__(model_name=model_name, device=device, dtype=dtype)
192
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
193
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
194
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
195
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
196
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
197
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
198
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
199
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
200
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
201
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
202
-
188
+
189
+
190
+ class FeedForwardUnit(BaseNeuralNetwork):
191
+ def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
192
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
193
+ device: str | None = None, dtype: torch.dtype | None = None):
194
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
195
+ self._dropout_rate = dropout_rate
196
+ self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
197
+ device=self.device, dtype=self.dtype)
198
+ self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
199
+ device=self.device, dtype=self.dtype)
200
+ self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
201
+ device=self.device, dtype=self.dtype)
202
+ self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
203
+ device=self.device, dtype=self.dtype)
204
+
203
205
  @override
204
- def forward(self, x) -> torch.Tensor:
206
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
205
207
  x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
206
- fc1_out = self.parametric_relu_1(self.fc1(x))
207
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
208
- x = torch.dropout(x, p=0.2, train=self.training)
209
- x = self.parametric_relu_2(self.fc2(x))
210
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
211
- x = torch.dropout(x, p=0.2, train=self.training)
212
- x = self.parametric_relu_3(self.fc3(x))
213
- x = torch.dropout(x, p=0.2, train=self.training)
214
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
215
- x = self.fc5(x)
208
+ residual = x
209
+ x = self.layer_norm(x)
210
+ x = self.fc1(x)
211
+ x = self.parametric_relu_1(x)
212
+ if self._dropout_rate > .0:
213
+ x = torch.dropout(x, p=self._dropout_rate, train=self.training)
214
+ return self.fc2(x) + residual
215
+
216
+
217
+ class FeedForward(BaseNeuralNetwork):
218
+ def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
219
+ bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
220
+ device: str | None = None, dtype: torch.dtype | None = None):
221
+ if num_layers < 1:
222
+ raise ValueError('num_layers cannot be less than 1.')
223
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
224
+ self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
225
+ expansion_factor=expansion_factor, bias=bias,
226
+ dropout_rate=dropout_rate,
227
+ device=self.device, dtype=self.dtype)] * num_layers)
228
+
229
+ @override
230
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
231
+ x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
232
+ for ffn in self.ffn_layers:
233
+ x = ffn(x)
216
234
  return x
217
235
  ```
218
236
 
@@ -222,29 +240,34 @@ Dynamic: license-file
222
240
  from typing_extensions import override
223
241
 
224
242
  import torch
225
- from torch import nn, softmax
226
243
 
227
244
  from deeplotx.nn.base_neural_network import BaseNeuralNetwork
245
+ from deeplotx.nn.feed_forward import FeedForward
228
246
 
229
247
 
230
248
  class SelfAttention(BaseNeuralNetwork):
231
- def __init__(self, feature_dim: int, model_name: str | None = None,
232
- device: str | None = None, dtype: torch.dtype | None = None):
233
- super().__init__(model_name=model_name, device=device, dtype=dtype)
249
+ def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
250
+ proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
251
+ model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
252
+ super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
253
+ device=device, dtype=dtype)
234
254
  self._feature_dim = feature_dim
235
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
236
- bias=True, device=self.device, dtype=self.dtype)
237
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
238
- bias=True, device=self.device, dtype=self.dtype)
239
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
240
- bias=True, device=self.device, dtype=self.dtype)
255
+ self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
256
+ expansion_factor=proj_expansion_factor,
257
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
258
+ self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
259
+ expansion_factor=proj_expansion_factor,
260
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
261
+ self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
262
+ expansion_factor=proj_expansion_factor,
263
+ bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
241
264
 
242
265
  def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
243
266
  q, k = self.q_proj(x), self.k_proj(x)
244
267
  attn = torch.matmul(q, k.transpose(-2, -1))
245
268
  attn = attn / (self._feature_dim ** 0.5)
246
269
  attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
247
- return softmax(attn, dim=-1)
270
+ return torch.softmax(attn, dim=-1)
248
271
 
249
272
  @override
250
273
  def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
@@ -265,7 +288,8 @@ Dynamic: license-file
265
288
  long_text_encoder = LongTextEncoder(
266
289
  max_length=2048, # 最大文本大小, 超出截断
267
290
  chunk_size=448, # 块大小 (按 Token 计)
268
- overlapping=32 # 块间重叠大小 (按 Token 计)
291
+ overlapping=32, # 块间重叠大小 (按 Token 计)
292
+ cache_capacity=512 # 缓存大小
269
293
  )
270
294
 
271
295
  trainer = TextBinaryClassifierTrainer(
@@ -14,6 +14,7 @@ deeplotx/encoder/longformer_encoder.py
14
14
  deeplotx/nn/__init__.py
15
15
  deeplotx/nn/auto_regression.py
16
16
  deeplotx/nn/base_neural_network.py
17
+ deeplotx/nn/feed_forward.py
17
18
  deeplotx/nn/linear_regression.py
18
19
  deeplotx/nn/logistic_regression.py
19
20
  deeplotx/nn/long_context_auto_regression.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "deeplotx"
3
- version = "0.5.5"
3
+ version = "0.6.1"
4
4
  description = "Easy-2-use long text NLP toolkit."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1,48 +0,0 @@
1
- import logging
2
- import os
3
- import math
4
-
5
- import torch
6
- from torch import nn
7
- from transformers import AutoTokenizer, AutoModel
8
-
9
- from deeplotx import __ROOT__
10
-
11
- CACHE_PATH = os.path.join(__ROOT__, '.cache')
12
- DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
13
- logger = logging.getLogger('deeplotx.embedding')
14
-
15
-
16
- class Encoder(nn.Module):
17
- def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
18
- super().__init__()
19
- self.device = torch.device(device) if device is not None \
20
- else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
21
- self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
22
- cache_dir=CACHE_PATH, _from_auto=True)
23
- self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
24
- cache_dir=CACHE_PATH, _from_auto=True).to(self.device)
25
- self.embed_dim = self.encoder.config.max_position_embeddings
26
- logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
27
-
28
- def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
29
- def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
30
- return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
31
-
32
- num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
33
- chunks = chunk_results = []
34
- for i in range(num_chunks):
35
- start_idx = i * self.embed_dim
36
- end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
37
- chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
38
- ori_mode = self.encoder.training
39
- self.encoder.eval()
40
- with torch.no_grad():
41
- chunk_results = [_encoder(x) for x in chunks]
42
- self.encoder.train(mode=ori_mode)
43
- return torch.cat(chunk_results, dim=-1)
44
-
45
- def encode(self, text: str) -> torch.Tensor:
46
- _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
47
- _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
48
- return self.forward(_input_ids, _att_mask).squeeze()
@@ -1,37 +0,0 @@
1
- from typing_extensions import override
2
-
3
- import torch
4
- from torch import nn
5
-
6
- from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
-
8
-
9
- class LinearRegression(BaseNeuralNetwork):
10
- def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
11
- device: str | None = None, dtype: torch.dtype | None = None):
12
- super().__init__(model_name=model_name, device=device, dtype=dtype)
13
- self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
14
- self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
15
- self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
16
- self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
17
- self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
18
- self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
19
- self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
20
- self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
21
- self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
22
- self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
23
-
24
- @override
25
- def forward(self, x: torch.Tensor) -> torch.Tensor:
26
- x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
27
- fc1_out = self.parametric_relu_1(self.fc1(x))
28
- x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
29
- x = torch.dropout(x, p=0.2, train=self.training)
30
- x = self.parametric_relu_2(self.fc2(x))
31
- x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
32
- x = torch.dropout(x, p=0.2, train=self.training)
33
- x = self.parametric_relu_3(self.fc3(x))
34
- x = torch.dropout(x, p=0.2, train=self.training)
35
- x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
36
- x = self.fc5(x)
37
- return x
@@ -1,34 +0,0 @@
1
- from typing_extensions import override
2
-
3
- import torch
4
- from torch import nn, softmax
5
-
6
- from deeplotx.nn.base_neural_network import BaseNeuralNetwork
7
-
8
-
9
- class SelfAttention(BaseNeuralNetwork):
10
- def __init__(self, feature_dim: int, model_name: str | None = None,
11
- device: str | None = None, dtype: torch.dtype | None = None):
12
- super().__init__(model_name=model_name, device=device, dtype=dtype)
13
- self._feature_dim = feature_dim
14
- self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
15
- bias=True, device=self.device, dtype=self.dtype)
16
- self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
17
- bias=True, device=self.device, dtype=self.dtype)
18
- self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
19
- bias=True, device=self.device, dtype=self.dtype)
20
-
21
- def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
22
- q, k = self.q_proj(x), self.k_proj(x)
23
- attn = torch.matmul(q, k.transpose(-2, -1))
24
- attn = attn / (self._feature_dim ** 0.5)
25
- attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
26
- return softmax(attn, dim=-1)
27
-
28
- @override
29
- def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
30
- x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
31
- if mask is not None:
32
- mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
33
- v = self.v_proj(x)
34
- return torch.matmul(self._attention(x, mask), v)
File without changes
File without changes
File without changes