deeplotx 0.5.5__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deeplotx-0.5.5 → deeplotx-0.6.1}/PKG-INFO +66 -42
- {deeplotx-0.5.5 → deeplotx-0.6.1}/README.md +65 -41
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/__init__.py +2 -0
- deeplotx-0.6.1/deeplotx/encoder/encoder.py +66 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/encoder/long_text_encoder.py +7 -2
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/__init__.py +2 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/auto_regression.py +4 -1
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/base_neural_network.py +12 -1
- deeplotx-0.6.1/deeplotx/nn/feed_forward.py +53 -0
- deeplotx-0.6.1/deeplotx/nn/linear_regression.py +25 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/logistic_regression.py +5 -2
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/long_context_auto_regression.py +4 -1
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/long_context_recursive_sequential.py +12 -6
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/recursive_sequential.py +12 -5
- deeplotx-0.6.1/deeplotx/nn/self_attention.py +39 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/nn/softmax_regression.py +5 -2
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/trainer/text_binary_classification_trainer.py +16 -4
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/PKG-INFO +66 -42
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/SOURCES.txt +1 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/pyproject.toml +1 -1
- deeplotx-0.5.5/deeplotx/encoder/encoder.py +0 -48
- deeplotx-0.5.5/deeplotx/nn/linear_regression.py +0 -37
- deeplotx-0.5.5/deeplotx/nn/self_attention.py +0 -34
- {deeplotx-0.5.5 → deeplotx-0.6.1}/LICENSE +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/encoder/__init__.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/encoder/longformer_encoder.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/__init__.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/distribution.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/set.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/similarity/vector.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/trainer/__init__.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/trainer/base_trainer.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/util/__init__.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/util/hash.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx/util/read_file.py +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/dependency_links.txt +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/requires.txt +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/deeplotx.egg-info/top_level.txt +0 -0
- {deeplotx-0.5.5 → deeplotx-0.6.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.1
|
4
4
|
Summary: Easy-2-use long text NLP toolkit.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -163,6 +163,8 @@ Dynamic: license-file
|
|
163
163
|
|
164
164
|
```python
|
165
165
|
from deeplotx import (
|
166
|
+
BaseNeuralNetwork, # 深度神经网络基类
|
167
|
+
FeedForward, # 前馈神经网络
|
166
168
|
LinearRegression, # 线性回归
|
167
169
|
LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
|
168
170
|
SoftmaxRegression, # Softmax 回归 / 多分类
|
@@ -181,38 +183,54 @@ Dynamic: license-file
|
|
181
183
|
|
182
184
|
import torch
|
183
185
|
from torch import nn
|
184
|
-
|
186
|
+
|
185
187
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
186
|
-
|
187
|
-
|
188
|
-
class
|
189
|
-
def __init__(self,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
self.
|
194
|
-
self.
|
195
|
-
|
196
|
-
self.
|
197
|
-
|
198
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
199
|
-
|
200
|
-
self.
|
201
|
-
|
202
|
-
|
188
|
+
|
189
|
+
|
190
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
191
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
192
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
193
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
194
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
195
|
+
self._dropout_rate = dropout_rate
|
196
|
+
self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
|
197
|
+
device=self.device, dtype=self.dtype)
|
198
|
+
self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
|
199
|
+
device=self.device, dtype=self.dtype)
|
200
|
+
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
201
|
+
device=self.device, dtype=self.dtype)
|
202
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
|
203
|
+
device=self.device, dtype=self.dtype)
|
204
|
+
|
203
205
|
@override
|
204
|
-
def forward(self, x) -> torch.Tensor:
|
206
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
205
207
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
206
|
-
|
207
|
-
x =
|
208
|
-
x =
|
209
|
-
x = self.
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
208
|
+
residual = x
|
209
|
+
x = self.layer_norm(x)
|
210
|
+
x = self.fc1(x)
|
211
|
+
x = self.parametric_relu_1(x)
|
212
|
+
if self._dropout_rate > .0:
|
213
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
214
|
+
return self.fc2(x) + residual
|
215
|
+
|
216
|
+
|
217
|
+
class FeedForward(BaseNeuralNetwork):
|
218
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
219
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
220
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
221
|
+
if num_layers < 1:
|
222
|
+
raise ValueError('num_layers cannot be less than 1.')
|
223
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
224
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
225
|
+
expansion_factor=expansion_factor, bias=bias,
|
226
|
+
dropout_rate=dropout_rate,
|
227
|
+
device=self.device, dtype=self.dtype)] * num_layers)
|
228
|
+
|
229
|
+
@override
|
230
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
231
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
232
|
+
for ffn in self.ffn_layers:
|
233
|
+
x = ffn(x)
|
216
234
|
return x
|
217
235
|
```
|
218
236
|
|
@@ -222,29 +240,34 @@ Dynamic: license-file
|
|
222
240
|
from typing_extensions import override
|
223
241
|
|
224
242
|
import torch
|
225
|
-
from torch import nn, softmax
|
226
243
|
|
227
244
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
245
|
+
from deeplotx.nn.feed_forward import FeedForward
|
228
246
|
|
229
247
|
|
230
248
|
class SelfAttention(BaseNeuralNetwork):
|
231
|
-
def __init__(self, feature_dim: int,
|
232
|
-
|
233
|
-
|
249
|
+
def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
|
250
|
+
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
251
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
252
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
253
|
+
device=device, dtype=dtype)
|
234
254
|
self._feature_dim = feature_dim
|
235
|
-
self.q_proj =
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
bias=
|
255
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
256
|
+
expansion_factor=proj_expansion_factor,
|
257
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
258
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
259
|
+
expansion_factor=proj_expansion_factor,
|
260
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
261
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
262
|
+
expansion_factor=proj_expansion_factor,
|
263
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
241
264
|
|
242
265
|
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
243
266
|
q, k = self.q_proj(x), self.k_proj(x)
|
244
267
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
245
268
|
attn = attn / (self._feature_dim ** 0.5)
|
246
269
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
247
|
-
return softmax(attn, dim=-1)
|
270
|
+
return torch.softmax(attn, dim=-1)
|
248
271
|
|
249
272
|
@override
|
250
273
|
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
@@ -265,7 +288,8 @@ Dynamic: license-file
|
|
265
288
|
long_text_encoder = LongTextEncoder(
|
266
289
|
max_length=2048, # 最大文本大小, 超出截断
|
267
290
|
chunk_size=448, # 块大小 (按 Token 计)
|
268
|
-
overlapping=32 # 块间重叠大小 (按 Token 计)
|
291
|
+
overlapping=32, # 块间重叠大小 (按 Token 计)
|
292
|
+
cache_capacity=512 # 缓存大小
|
269
293
|
)
|
270
294
|
|
271
295
|
trainer = TextBinaryClassifierTrainer(
|
@@ -145,6 +145,8 @@
|
|
145
145
|
|
146
146
|
```python
|
147
147
|
from deeplotx import (
|
148
|
+
BaseNeuralNetwork, # 深度神经网络基类
|
149
|
+
FeedForward, # 前馈神经网络
|
148
150
|
LinearRegression, # 线性回归
|
149
151
|
LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
|
150
152
|
SoftmaxRegression, # Softmax 回归 / 多分类
|
@@ -163,38 +165,54 @@
|
|
163
165
|
|
164
166
|
import torch
|
165
167
|
from torch import nn
|
166
|
-
|
168
|
+
|
167
169
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
168
|
-
|
169
|
-
|
170
|
-
class
|
171
|
-
def __init__(self,
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
self.
|
176
|
-
self.
|
177
|
-
|
178
|
-
self.
|
179
|
-
|
180
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
181
|
-
|
182
|
-
self.
|
183
|
-
|
184
|
-
|
170
|
+
|
171
|
+
|
172
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
173
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
174
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
175
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
176
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
177
|
+
self._dropout_rate = dropout_rate
|
178
|
+
self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
|
179
|
+
device=self.device, dtype=self.dtype)
|
180
|
+
self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
|
181
|
+
device=self.device, dtype=self.dtype)
|
182
|
+
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
183
|
+
device=self.device, dtype=self.dtype)
|
184
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
|
185
|
+
device=self.device, dtype=self.dtype)
|
186
|
+
|
185
187
|
@override
|
186
|
-
def forward(self, x) -> torch.Tensor:
|
188
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
187
189
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
188
|
-
|
189
|
-
x =
|
190
|
-
x =
|
191
|
-
x = self.
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
190
|
+
residual = x
|
191
|
+
x = self.layer_norm(x)
|
192
|
+
x = self.fc1(x)
|
193
|
+
x = self.parametric_relu_1(x)
|
194
|
+
if self._dropout_rate > .0:
|
195
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
196
|
+
return self.fc2(x) + residual
|
197
|
+
|
198
|
+
|
199
|
+
class FeedForward(BaseNeuralNetwork):
|
200
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
201
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
202
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
203
|
+
if num_layers < 1:
|
204
|
+
raise ValueError('num_layers cannot be less than 1.')
|
205
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
206
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
207
|
+
expansion_factor=expansion_factor, bias=bias,
|
208
|
+
dropout_rate=dropout_rate,
|
209
|
+
device=self.device, dtype=self.dtype)] * num_layers)
|
210
|
+
|
211
|
+
@override
|
212
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
213
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
214
|
+
for ffn in self.ffn_layers:
|
215
|
+
x = ffn(x)
|
198
216
|
return x
|
199
217
|
```
|
200
218
|
|
@@ -204,29 +222,34 @@
|
|
204
222
|
from typing_extensions import override
|
205
223
|
|
206
224
|
import torch
|
207
|
-
from torch import nn, softmax
|
208
225
|
|
209
226
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
227
|
+
from deeplotx.nn.feed_forward import FeedForward
|
210
228
|
|
211
229
|
|
212
230
|
class SelfAttention(BaseNeuralNetwork):
|
213
|
-
def __init__(self, feature_dim: int,
|
214
|
-
|
215
|
-
|
231
|
+
def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
|
232
|
+
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
233
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
234
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
235
|
+
device=device, dtype=dtype)
|
216
236
|
self._feature_dim = feature_dim
|
217
|
-
self.q_proj =
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
bias=
|
237
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
238
|
+
expansion_factor=proj_expansion_factor,
|
239
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
240
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
241
|
+
expansion_factor=proj_expansion_factor,
|
242
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
243
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
244
|
+
expansion_factor=proj_expansion_factor,
|
245
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
223
246
|
|
224
247
|
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
225
248
|
q, k = self.q_proj(x), self.k_proj(x)
|
226
249
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
227
250
|
attn = attn / (self._feature_dim ** 0.5)
|
228
251
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
229
|
-
return softmax(attn, dim=-1)
|
252
|
+
return torch.softmax(attn, dim=-1)
|
230
253
|
|
231
254
|
@override
|
232
255
|
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
@@ -247,7 +270,8 @@
|
|
247
270
|
long_text_encoder = LongTextEncoder(
|
248
271
|
max_length=2048, # 最大文本大小, 超出截断
|
249
272
|
chunk_size=448, # 块大小 (按 Token 计)
|
250
|
-
overlapping=32 # 块间重叠大小 (按 Token 计)
|
273
|
+
overlapping=32, # 块间重叠大小 (按 Token 计)
|
274
|
+
cache_capacity=512 # 缓存大小
|
251
275
|
)
|
252
276
|
|
253
277
|
trainer = TextBinaryClassifierTrainer(
|
@@ -0,0 +1,66 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import math
|
4
|
+
from requests.exceptions import ConnectTimeout, SSLError
|
5
|
+
|
6
|
+
import torch
|
7
|
+
from torch import nn
|
8
|
+
from transformers import AutoTokenizer, AutoModel
|
9
|
+
|
10
|
+
from deeplotx import __ROOT__
|
11
|
+
|
12
|
+
CACHE_PATH = os.path.join(__ROOT__, '.cache')
|
13
|
+
DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
|
14
|
+
logger = logging.getLogger('deeplotx.embedding')
|
15
|
+
|
16
|
+
|
17
|
+
class Encoder(nn.Module):
|
18
|
+
def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
|
19
|
+
super().__init__()
|
20
|
+
self.device = torch.device(device) if device is not None \
|
21
|
+
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
22
|
+
try:
|
23
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
24
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
25
|
+
trust_remote_code=True)
|
26
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
27
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
28
|
+
trust_remote_code=True).to(self.device)
|
29
|
+
except ConnectTimeout:
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
31
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
32
|
+
trust_remote_code=True, local_files_only=True)
|
33
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
34
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
35
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
36
|
+
except SSLError:
|
37
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
38
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
39
|
+
trust_remote_code=True, local_files_only=True)
|
40
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
41
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
42
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
43
|
+
self.embed_dim = self.encoder.config.max_position_embeddings
|
44
|
+
logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
|
45
|
+
|
46
|
+
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, *args, **kwargs) -> torch.Tensor:
|
47
|
+
def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
|
48
|
+
return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
|
49
|
+
|
50
|
+
num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
|
51
|
+
chunks = chunk_results = []
|
52
|
+
for i in range(num_chunks):
|
53
|
+
start_idx = i * self.embed_dim
|
54
|
+
end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
|
55
|
+
chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
|
56
|
+
ori_mode = self.encoder.training
|
57
|
+
self.encoder.eval()
|
58
|
+
with torch.no_grad():
|
59
|
+
chunk_results = [_encoder(x) for x in chunks]
|
60
|
+
self.encoder.train(mode=ori_mode)
|
61
|
+
return torch.cat(chunk_results, dim=-1)
|
62
|
+
|
63
|
+
def encode(self, text: str) -> torch.Tensor:
|
64
|
+
_input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
|
65
|
+
_att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
|
66
|
+
return self.forward(_input_ids, _att_mask).squeeze()
|
@@ -15,16 +15,21 @@ logger = logging.getLogger('deeplotx.embedding')
|
|
15
15
|
class LongTextEncoder(Encoder):
|
16
16
|
def __init__(self, max_length: int, chunk_size: int = 448,
|
17
17
|
overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
|
18
|
-
cache_capacity: int = 64, device: str | None = None):
|
18
|
+
cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
|
19
19
|
super().__init__(model_name_or_path=model_name_or_path, device=device)
|
20
20
|
self._max_length = max_length
|
21
21
|
self._chunk_size = chunk_size
|
22
22
|
self._overlapping = overlapping
|
23
23
|
self._cache = LRUCache(capacity=cache_capacity)
|
24
|
+
self._worker_group = ThreadPool(max_workers=max_workers)
|
24
25
|
|
25
26
|
def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
|
26
27
|
return idx, super().forward(x, attention_mask=mask)
|
27
28
|
|
29
|
+
@override
|
30
|
+
def forward(self, text: str, flatten: bool = False, *args, **kwargs) -> torch.Tensor:
|
31
|
+
return self.encode(text=text, flatten=flatten)
|
32
|
+
|
28
33
|
@override
|
29
34
|
def encode(self, text: str, flatten: bool = False) -> torch.Tensor:
|
30
35
|
def postprocess(tensors: list[torch.Tensor], _flatten: bool) -> torch.Tensor:
|
@@ -59,7 +64,7 @@ class LongTextEncoder(Encoder):
|
|
59
64
|
_tmp_right = (i + 1) * self._chunk_size + self._overlapping
|
60
65
|
chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
|
61
66
|
torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
|
62
|
-
embeddings = list(
|
67
|
+
embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
|
63
68
|
embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
|
64
69
|
fin_embedding = [x[1] for x in embeddings]
|
65
70
|
# write cache
|
@@ -5,8 +5,11 @@ from deeplotx.nn import RecursiveSequential
|
|
5
5
|
|
6
6
|
class AutoRegression(RecursiveSequential):
|
7
7
|
def __init__(self, feature_dim: int, hidden_dim: int | None = None,
|
8
|
-
recursive_layers: int = 2,
|
8
|
+
recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
9
|
+
ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
|
9
10
|
device: str | None = None, dtype: torch.dtype | None = None):
|
10
11
|
super().__init__(input_dim=feature_dim, output_dim=feature_dim,
|
11
12
|
hidden_dim=hidden_dim, recursive_layers=recursive_layers,
|
13
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
+
ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
|
12
15
|
model_name=model_name, device=device, dtype=dtype)
|
@@ -8,7 +8,8 @@ DEFAULT_SUFFIX = 'dlx'
|
|
8
8
|
|
9
9
|
|
10
10
|
class BaseNeuralNetwork(nn.Module):
|
11
|
-
def __init__(self,
|
11
|
+
def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
|
12
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
12
13
|
super().__init__()
|
13
14
|
self._model_name = model_name \
|
14
15
|
if model_name is not None \
|
@@ -16,6 +17,16 @@ class BaseNeuralNetwork(nn.Module):
|
|
16
17
|
self.device = torch.device(device) if device is not None \
|
17
18
|
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
18
19
|
self.dtype = dtype if dtype is not None else torch.float32
|
20
|
+
self._in_features = in_features
|
21
|
+
self._out_features = out_features
|
22
|
+
|
23
|
+
@property
|
24
|
+
def in_features(self) -> int:
|
25
|
+
return self._in_features
|
26
|
+
|
27
|
+
@property
|
28
|
+
def out_features(self) -> int:
|
29
|
+
return self._out_features
|
19
30
|
|
20
31
|
@staticmethod
|
21
32
|
def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
|
8
|
+
|
9
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
10
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
11
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
12
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
13
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
14
|
+
self._dropout_rate = dropout_rate
|
15
|
+
self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
|
16
|
+
device=self.device, dtype=self.dtype)
|
17
|
+
self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
|
18
|
+
device=self.device, dtype=self.dtype)
|
19
|
+
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
20
|
+
device=self.device, dtype=self.dtype)
|
21
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
|
22
|
+
device=self.device, dtype=self.dtype)
|
23
|
+
|
24
|
+
@override
|
25
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
26
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
27
|
+
residual = x
|
28
|
+
x = self.layer_norm(x)
|
29
|
+
x = self.fc1(x)
|
30
|
+
x = self.parametric_relu_1(x)
|
31
|
+
if self._dropout_rate > .0:
|
32
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
33
|
+
return self.fc2(x) + residual
|
34
|
+
|
35
|
+
|
36
|
+
class FeedForward(BaseNeuralNetwork):
|
37
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
38
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
39
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
40
|
+
if num_layers < 1:
|
41
|
+
raise ValueError('num_layers cannot be less than 1.')
|
42
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
43
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
44
|
+
expansion_factor=expansion_factor, bias=bias,
|
45
|
+
dropout_rate=dropout_rate,
|
46
|
+
device=self.device, dtype=self.dtype)] * num_layers)
|
47
|
+
|
48
|
+
@override
|
49
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
50
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
51
|
+
for ffn in self.ffn_layers:
|
52
|
+
x = ffn(x)
|
53
|
+
return x
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from torch import nn
|
5
|
+
|
6
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
+
from deeplotx.nn.feed_forward import FeedForward
|
8
|
+
|
9
|
+
|
10
|
+
class LinearRegression(BaseNeuralNetwork):
|
11
|
+
def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1,
|
12
|
+
expansion_factor: int | float = 1.5, bias: bool = True, dropout_rate: float = 0.1,
|
13
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
14
|
+
super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
|
15
|
+
self.ffn = FeedForward(feature_dim=input_dim, num_layers=num_layers, expansion_factor=expansion_factor,
|
16
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
17
|
+
self.proj = nn.Linear(in_features=input_dim, out_features=output_dim,
|
18
|
+
bias=bias, device=self.device, dtype=self.dtype)
|
19
|
+
|
20
|
+
@override
|
21
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
22
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
23
|
+
residual = x
|
24
|
+
x = self.ffn(x) + residual
|
25
|
+
return self.proj(x)
|
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
|
|
6
6
|
|
7
7
|
|
8
8
|
class LogisticRegression(LinearRegression):
|
9
|
-
def __init__(self, input_dim: int, output_dim: int = 1,
|
9
|
+
def __init__(self, input_dim: int, output_dim: int = 1, num_layers: int = 1, expansion_factor: int | float = 1.5,
|
10
|
+
bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
|
10
11
|
device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
-
super().__init__(input_dim=input_dim, output_dim=output_dim,
|
12
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
|
13
|
+
expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
14
|
+
model_name=model_name, device=device, dtype=dtype)
|
12
15
|
|
13
16
|
@override
|
14
17
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
@@ -5,8 +5,11 @@ from deeplotx.nn import LongContextRecursiveSequential
|
|
5
5
|
|
6
6
|
class LongContextAutoRegression(LongContextRecursiveSequential):
|
7
7
|
def __init__(self, feature_dim: int, hidden_dim: int | None = None,
|
8
|
-
recursive_layers: int = 2,
|
8
|
+
recursive_layers: int = 2, ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
9
|
+
ffn_bias: bool = True, ffn_dropout_rate: float = 0.05, model_name: str | None = None,
|
9
10
|
device: str | None = None, dtype: torch.dtype | None = None):
|
10
11
|
super().__init__(input_dim=feature_dim, output_dim=feature_dim,
|
11
12
|
hidden_dim=hidden_dim, recursive_layers=recursive_layers,
|
13
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
+
ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
|
12
15
|
model_name=model_name, device=device, dtype=dtype)
|
@@ -10,19 +10,25 @@ from deeplotx.nn.self_attention import SelfAttention
|
|
10
10
|
class LongContextRecursiveSequential(RecursiveSequential):
|
11
11
|
def __init__(self, input_dim: int, output_dim: int,
|
12
12
|
hidden_dim: int | None = None, recursive_layers: int = 2,
|
13
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
14
|
+
ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
|
13
15
|
model_name: str | None = None, device: str | None = None,
|
14
|
-
dtype: torch.dtype | None = None):
|
16
|
+
dtype: torch.dtype | None = None, **kwargs):
|
15
17
|
super().__init__(input_dim=input_dim, output_dim=output_dim,
|
16
18
|
hidden_dim=hidden_dim, recursive_layers=recursive_layers,
|
19
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
20
|
+
ffn_bias=ffn_bias, ffn_dropout_rate=ffn_dropout_rate,
|
17
21
|
model_name=model_name, device=device, dtype=dtype)
|
18
|
-
self.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
+
self.self_attention = SelfAttention(feature_dim=input_dim, bias=kwargs.get('attn_proj_bias', ffn_bias),
|
23
|
+
proj_layers=kwargs.get('attn_proj_layers', 1),
|
24
|
+
proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
|
25
|
+
dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate))
|
26
|
+
self.__proj = nn.Linear(in_features=input_dim * 2, out_features=input_dim,
|
27
|
+
bias=ffn_bias, device=self.device, dtype=self.dtype)
|
22
28
|
|
23
29
|
@override
|
24
30
|
def forward(self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
|
25
31
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
26
32
|
x = torch.cat([self.self_attention(x), x], dim=-1)
|
27
33
|
x = nn.LayerNorm(normalized_shape=x.shape[-1], eps=1e-9, device=self.device, dtype=self.dtype)(x)
|
28
|
-
return super().forward(self.
|
34
|
+
return super().forward(self.__proj(x), state)
|
@@ -4,23 +4,27 @@ import torch
|
|
4
4
|
from torch import nn
|
5
5
|
|
6
6
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
-
from deeplotx.nn import
|
7
|
+
from deeplotx.nn.feed_forward import FeedForward
|
8
8
|
|
9
9
|
|
10
10
|
class RecursiveSequential(BaseNeuralNetwork):
|
11
11
|
def __init__(self, input_dim: int, output_dim: int,
|
12
12
|
hidden_dim: int | None = None, recursive_layers: int = 2,
|
13
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2,
|
14
|
+
ffn_bias: bool = True, ffn_dropout_rate: float = 0.05,
|
13
15
|
model_name: str | None = None, device: str | None = None,
|
14
16
|
dtype: torch.dtype | None = None):
|
15
|
-
super().__init__(model_name=model_name, device=device, dtype=dtype)
|
17
|
+
super().__init__(in_features=input_dim, out_features=output_dim, model_name=model_name, device=device, dtype=dtype)
|
16
18
|
if hidden_dim is None:
|
17
19
|
hidden_dim = input_dim
|
18
20
|
self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim,
|
19
21
|
num_layers=recursive_layers, batch_first=True,
|
20
22
|
bias=True, bidirectional=True, device=self.device,
|
21
23
|
dtype=self.dtype)
|
22
|
-
self.
|
23
|
-
|
24
|
+
self.ffn = FeedForward(feature_dim=hidden_dim * 2, num_layers=ffn_layers, expansion_factor=ffn_expansion_factor,
|
25
|
+
bias=ffn_bias, dropout_rate=ffn_dropout_rate, device=self.device, dtype=self.dtype)
|
26
|
+
self.__proj = nn.Linear(in_features=hidden_dim * 2, out_features=output_dim, bias=ffn_bias,
|
27
|
+
device=self.device, dtype=self.dtype)
|
24
28
|
|
25
29
|
def initial_state(self, batch_size: int = 1) -> tuple[torch.Tensor, torch.Tensor]:
|
26
30
|
zeros = torch.zeros(self.lstm.num_layers * 2, batch_size, self.lstm.hidden_size, device=self.device, dtype=self.dtype)
|
@@ -32,7 +36,10 @@ class RecursiveSequential(BaseNeuralNetwork):
|
|
32
36
|
state = (self.ensure_device_and_dtype(state[0], device=self.device, dtype=self.dtype),
|
33
37
|
self.ensure_device_and_dtype(state[1], device=self.device, dtype=self.dtype))
|
34
38
|
x, (hidden_state, cell_state) = self.lstm(x, state)
|
35
|
-
x =
|
39
|
+
x = x[:, -1, :]
|
40
|
+
residual = x
|
41
|
+
x = self.ffn(x) + residual
|
42
|
+
x = self.__proj(x)
|
36
43
|
return x, (hidden_state, cell_state)
|
37
44
|
|
38
45
|
@override
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
6
|
+
from deeplotx.nn.feed_forward import FeedForward
|
7
|
+
|
8
|
+
|
9
|
+
class SelfAttention(BaseNeuralNetwork):
|
10
|
+
def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
|
11
|
+
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
12
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
13
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
14
|
+
device=device, dtype=dtype)
|
15
|
+
self._feature_dim = feature_dim
|
16
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
17
|
+
expansion_factor=proj_expansion_factor,
|
18
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
19
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
20
|
+
expansion_factor=proj_expansion_factor,
|
21
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
22
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
23
|
+
expansion_factor=proj_expansion_factor,
|
24
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
25
|
+
|
26
|
+
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
27
|
+
q, k = self.q_proj(x), self.k_proj(x)
|
28
|
+
attn = torch.matmul(q, k.transpose(-2, -1))
|
29
|
+
attn = attn / (self._feature_dim ** 0.5)
|
30
|
+
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
31
|
+
return torch.softmax(attn, dim=-1)
|
32
|
+
|
33
|
+
@override
|
34
|
+
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
35
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
36
|
+
if mask is not None:
|
37
|
+
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
38
|
+
v = self.v_proj(x)
|
39
|
+
return torch.matmul(self._attention(x, mask), v)
|
@@ -6,9 +6,12 @@ from deeplotx.nn.linear_regression import LinearRegression
|
|
6
6
|
|
7
7
|
|
8
8
|
class SoftmaxRegression(LinearRegression):
|
9
|
-
def __init__(self, input_dim: int, output_dim: int,
|
9
|
+
def __init__(self, input_dim: int, output_dim: int, num_layers: int = 1, expansion_factor: int | float = 1.5,
|
10
|
+
bias: bool = True, dropout_rate: float = 0.1, model_name: str | None = None,
|
10
11
|
device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
-
super().__init__(input_dim=input_dim, output_dim=output_dim,
|
12
|
+
super().__init__(input_dim=input_dim, output_dim=output_dim, num_layers=num_layers,
|
13
|
+
expansion_factor=expansion_factor, bias=bias, dropout_rate=dropout_rate,
|
14
|
+
model_name=model_name, device=device, dtype=dtype)
|
12
15
|
|
13
16
|
@override
|
14
17
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
@@ -25,13 +25,13 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
25
25
|
num_epochs: int, learning_rate: float = 2e-6, balancing_dataset: bool = True,
|
26
26
|
train_loss_threshold: float = 0.0, valid_loss_threshold: float = 0.0,
|
27
27
|
alpha: float = 1e-4, rho: float = 0.2,
|
28
|
-
hidden_dim: int = 256, recursive_layers: int = 2) -> LongContextRecursiveSequential:
|
28
|
+
hidden_dim: int = 256, recursive_layers: int = 2, **kwargs) -> LongContextRecursiveSequential:
|
29
29
|
if balancing_dataset:
|
30
30
|
min_length = min(len(positive_texts), len(negative_texts))
|
31
31
|
positive_texts = positive_texts[:min_length]
|
32
32
|
negative_texts = negative_texts[:min_length]
|
33
33
|
all_texts = positive_texts + negative_texts
|
34
|
-
text_embeddings = [self._long_text_encoder.encode(x, flatten=False
|
34
|
+
text_embeddings = [self._long_text_encoder.encode(x, flatten=False) for x in all_texts]
|
35
35
|
feature_dim = text_embeddings[0].shape[-1]
|
36
36
|
dtype = text_embeddings[0].dtype
|
37
37
|
labels = ([torch.tensor([1.], dtype=dtype, device=self.device) for _ in range(len(positive_texts))]
|
@@ -44,15 +44,27 @@ class TextBinaryClassifierTrainer(BaseTrainer):
|
|
44
44
|
valid_dataset = TensorDataset(inputs[train_size:], labels[train_size:])
|
45
45
|
self.train_dataset_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
|
46
46
|
self.valid_dataset_loader = DataLoader(valid_dataset, batch_size=self._batch_size, shuffle=True)
|
47
|
-
|
48
|
-
if self.model is not None and self.model.fc1.in_features != feature_dim:
|
47
|
+
if self.model is not None and self.model.in_features != feature_dim:
|
49
48
|
logger.warning("The dimension of features doesn't match. A new model instance will be created.")
|
50
49
|
self.model = None
|
51
50
|
if self.model is None:
|
51
|
+
ffn_layers = kwargs.get('ffn_layers', 5)
|
52
|
+
ffn_expansion_factor = kwargs.get('ffn_expansion_factor', 2)
|
53
|
+
ffn_bias = kwargs.get('ffn_bias', True)
|
54
|
+
ffn_dropout_rate = kwargs.get('ffn_dropout_rate', 0.1)
|
52
55
|
self.model = LongContextRecursiveSequential(input_dim=feature_dim, output_dim=1,
|
53
56
|
hidden_dim=hidden_dim,
|
54
57
|
recursive_layers=recursive_layers,
|
58
|
+
ffn_layers=ffn_layers,
|
59
|
+
ffn_expansion_factor=ffn_expansion_factor,
|
60
|
+
ffn_bias=ffn_bias,
|
61
|
+
ffn_dropout_rate=ffn_dropout_rate,
|
62
|
+
attn_proj_layers=kwargs.get('attn_proj_layers', ffn_layers),
|
63
|
+
attn_proj_bias=kwargs.get('attn_proj_bias', ffn_bias),
|
64
|
+
attn_proj_expansion_factor=kwargs.get('attn_proj_expansion_factor', ffn_expansion_factor),
|
65
|
+
attn_proj_dropout_rate=kwargs.get('attn_proj_dropout_rate', ffn_dropout_rate),
|
55
66
|
device=self.device, dtype=dtype)
|
67
|
+
logger.debug(f'Training Model: {self.model}')
|
56
68
|
loss_function = nn.BCELoss()
|
57
69
|
optimizer = optim.Adamax(self.model.parameters(), lr=learning_rate)
|
58
70
|
for epoch in range(num_epochs):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.1
|
4
4
|
Summary: Easy-2-use long text NLP toolkit.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -163,6 +163,8 @@ Dynamic: license-file
|
|
163
163
|
|
164
164
|
```python
|
165
165
|
from deeplotx import (
|
166
|
+
BaseNeuralNetwork, # 深度神经网络基类
|
167
|
+
FeedForward, # 前馈神经网络
|
166
168
|
LinearRegression, # 线性回归
|
167
169
|
LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
|
168
170
|
SoftmaxRegression, # Softmax 回归 / 多分类
|
@@ -181,38 +183,54 @@ Dynamic: license-file
|
|
181
183
|
|
182
184
|
import torch
|
183
185
|
from torch import nn
|
184
|
-
|
186
|
+
|
185
187
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
186
|
-
|
187
|
-
|
188
|
-
class
|
189
|
-
def __init__(self,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
self.
|
194
|
-
self.
|
195
|
-
|
196
|
-
self.
|
197
|
-
|
198
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
199
|
-
|
200
|
-
self.
|
201
|
-
|
202
|
-
|
188
|
+
|
189
|
+
|
190
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
191
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
192
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
193
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
194
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
195
|
+
self._dropout_rate = dropout_rate
|
196
|
+
self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
|
197
|
+
device=self.device, dtype=self.dtype)
|
198
|
+
self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
|
199
|
+
device=self.device, dtype=self.dtype)
|
200
|
+
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
201
|
+
device=self.device, dtype=self.dtype)
|
202
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
|
203
|
+
device=self.device, dtype=self.dtype)
|
204
|
+
|
203
205
|
@override
|
204
|
-
def forward(self, x) -> torch.Tensor:
|
206
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
205
207
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
206
|
-
|
207
|
-
x =
|
208
|
-
x =
|
209
|
-
x = self.
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
208
|
+
residual = x
|
209
|
+
x = self.layer_norm(x)
|
210
|
+
x = self.fc1(x)
|
211
|
+
x = self.parametric_relu_1(x)
|
212
|
+
if self._dropout_rate > .0:
|
213
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
214
|
+
return self.fc2(x) + residual
|
215
|
+
|
216
|
+
|
217
|
+
class FeedForward(BaseNeuralNetwork):
|
218
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
219
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
220
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
221
|
+
if num_layers < 1:
|
222
|
+
raise ValueError('num_layers cannot be less than 1.')
|
223
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
224
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
225
|
+
expansion_factor=expansion_factor, bias=bias,
|
226
|
+
dropout_rate=dropout_rate,
|
227
|
+
device=self.device, dtype=self.dtype)] * num_layers)
|
228
|
+
|
229
|
+
@override
|
230
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
231
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
232
|
+
for ffn in self.ffn_layers:
|
233
|
+
x = ffn(x)
|
216
234
|
return x
|
217
235
|
```
|
218
236
|
|
@@ -222,29 +240,34 @@ Dynamic: license-file
|
|
222
240
|
from typing_extensions import override
|
223
241
|
|
224
242
|
import torch
|
225
|
-
from torch import nn, softmax
|
226
243
|
|
227
244
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
245
|
+
from deeplotx.nn.feed_forward import FeedForward
|
228
246
|
|
229
247
|
|
230
248
|
class SelfAttention(BaseNeuralNetwork):
|
231
|
-
def __init__(self, feature_dim: int,
|
232
|
-
|
233
|
-
|
249
|
+
def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
|
250
|
+
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
251
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
252
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
253
|
+
device=device, dtype=dtype)
|
234
254
|
self._feature_dim = feature_dim
|
235
|
-
self.q_proj =
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
bias=
|
255
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
256
|
+
expansion_factor=proj_expansion_factor,
|
257
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
258
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
259
|
+
expansion_factor=proj_expansion_factor,
|
260
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
261
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
262
|
+
expansion_factor=proj_expansion_factor,
|
263
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
241
264
|
|
242
265
|
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
243
266
|
q, k = self.q_proj(x), self.k_proj(x)
|
244
267
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
245
268
|
attn = attn / (self._feature_dim ** 0.5)
|
246
269
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
247
|
-
return softmax(attn, dim=-1)
|
270
|
+
return torch.softmax(attn, dim=-1)
|
248
271
|
|
249
272
|
@override
|
250
273
|
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
@@ -265,7 +288,8 @@ Dynamic: license-file
|
|
265
288
|
long_text_encoder = LongTextEncoder(
|
266
289
|
max_length=2048, # 最大文本大小, 超出截断
|
267
290
|
chunk_size=448, # 块大小 (按 Token 计)
|
268
|
-
overlapping=32 # 块间重叠大小 (按 Token 计)
|
291
|
+
overlapping=32, # 块间重叠大小 (按 Token 计)
|
292
|
+
cache_capacity=512 # 缓存大小
|
269
293
|
)
|
270
294
|
|
271
295
|
trainer = TextBinaryClassifierTrainer(
|
@@ -14,6 +14,7 @@ deeplotx/encoder/longformer_encoder.py
|
|
14
14
|
deeplotx/nn/__init__.py
|
15
15
|
deeplotx/nn/auto_regression.py
|
16
16
|
deeplotx/nn/base_neural_network.py
|
17
|
+
deeplotx/nn/feed_forward.py
|
17
18
|
deeplotx/nn/linear_regression.py
|
18
19
|
deeplotx/nn/logistic_regression.py
|
19
20
|
deeplotx/nn/long_context_auto_regression.py
|
@@ -1,48 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import os
|
3
|
-
import math
|
4
|
-
|
5
|
-
import torch
|
6
|
-
from torch import nn
|
7
|
-
from transformers import AutoTokenizer, AutoModel
|
8
|
-
|
9
|
-
from deeplotx import __ROOT__
|
10
|
-
|
11
|
-
CACHE_PATH = os.path.join(__ROOT__, '.cache')
|
12
|
-
DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
|
13
|
-
logger = logging.getLogger('deeplotx.embedding')
|
14
|
-
|
15
|
-
|
16
|
-
class Encoder(nn.Module):
|
17
|
-
def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
|
18
|
-
super().__init__()
|
19
|
-
self.device = torch.device(device) if device is not None \
|
20
|
-
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
21
|
-
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
22
|
-
cache_dir=CACHE_PATH, _from_auto=True)
|
23
|
-
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
24
|
-
cache_dir=CACHE_PATH, _from_auto=True).to(self.device)
|
25
|
-
self.embed_dim = self.encoder.config.max_position_embeddings
|
26
|
-
logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
|
27
|
-
|
28
|
-
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
|
29
|
-
def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
|
30
|
-
return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
|
31
|
-
|
32
|
-
num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
|
33
|
-
chunks = chunk_results = []
|
34
|
-
for i in range(num_chunks):
|
35
|
-
start_idx = i * self.embed_dim
|
36
|
-
end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
|
37
|
-
chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
|
38
|
-
ori_mode = self.encoder.training
|
39
|
-
self.encoder.eval()
|
40
|
-
with torch.no_grad():
|
41
|
-
chunk_results = [_encoder(x) for x in chunks]
|
42
|
-
self.encoder.train(mode=ori_mode)
|
43
|
-
return torch.cat(chunk_results, dim=-1)
|
44
|
-
|
45
|
-
def encode(self, text: str) -> torch.Tensor:
|
46
|
-
_input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
|
47
|
-
_att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
|
48
|
-
return self.forward(_input_ids, _att_mask).squeeze()
|
@@ -1,37 +0,0 @@
|
|
1
|
-
from typing_extensions import override
|
2
|
-
|
3
|
-
import torch
|
4
|
-
from torch import nn
|
5
|
-
|
6
|
-
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
-
|
8
|
-
|
9
|
-
class LinearRegression(BaseNeuralNetwork):
|
10
|
-
def __init__(self, input_dim: int, output_dim: int, model_name: str | None = None,
|
11
|
-
device: str | None = None, dtype: torch.dtype | None = None):
|
12
|
-
super().__init__(model_name=model_name, device=device, dtype=dtype)
|
13
|
-
self.fc1 = nn.Linear(input_dim, 1024, device=self.device, dtype=self.dtype)
|
14
|
-
self.fc1_to_fc4_res = nn.Linear(1024, 64, device=self.device, dtype=self.dtype)
|
15
|
-
self.fc2 = nn.Linear(1024, 768, device=self.device, dtype=self.dtype)
|
16
|
-
self.fc3 = nn.Linear(768, 128, device=self.device, dtype=self.dtype)
|
17
|
-
self.fc4 = nn.Linear(128, 64, device=self.device, dtype=self.dtype)
|
18
|
-
self.fc5 = nn.Linear(64, output_dim, device=self.device, dtype=self.dtype)
|
19
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
20
|
-
self.parametric_relu_2 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
21
|
-
self.parametric_relu_3 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
22
|
-
self.parametric_relu_4 = nn.PReLU(num_parameters=1, init=5e-3, device=self.device, dtype=self.dtype)
|
23
|
-
|
24
|
-
@override
|
25
|
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
26
|
-
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
27
|
-
fc1_out = self.parametric_relu_1(self.fc1(x))
|
28
|
-
x = nn.LayerNorm(normalized_shape=1024, eps=1e-9, device=self.device, dtype=self.dtype)(fc1_out)
|
29
|
-
x = torch.dropout(x, p=0.2, train=self.training)
|
30
|
-
x = self.parametric_relu_2(self.fc2(x))
|
31
|
-
x = nn.LayerNorm(normalized_shape=768, eps=1e-9, device=self.device, dtype=self.dtype)(x)
|
32
|
-
x = torch.dropout(x, p=0.2, train=self.training)
|
33
|
-
x = self.parametric_relu_3(self.fc3(x))
|
34
|
-
x = torch.dropout(x, p=0.2, train=self.training)
|
35
|
-
x = self.parametric_relu_4(self.fc4(x)) + self.fc1_to_fc4_res(fc1_out)
|
36
|
-
x = self.fc5(x)
|
37
|
-
return x
|
@@ -1,34 +0,0 @@
|
|
1
|
-
from typing_extensions import override
|
2
|
-
|
3
|
-
import torch
|
4
|
-
from torch import nn, softmax
|
5
|
-
|
6
|
-
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
7
|
-
|
8
|
-
|
9
|
-
class SelfAttention(BaseNeuralNetwork):
|
10
|
-
def __init__(self, feature_dim: int, model_name: str | None = None,
|
11
|
-
device: str | None = None, dtype: torch.dtype | None = None):
|
12
|
-
super().__init__(model_name=model_name, device=device, dtype=dtype)
|
13
|
-
self._feature_dim = feature_dim
|
14
|
-
self.q_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
|
15
|
-
bias=True, device=self.device, dtype=self.dtype)
|
16
|
-
self.k_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
|
17
|
-
bias=True, device=self.device, dtype=self.dtype)
|
18
|
-
self.v_proj = nn.Linear(in_features=self._feature_dim, out_features=self._feature_dim,
|
19
|
-
bias=True, device=self.device, dtype=self.dtype)
|
20
|
-
|
21
|
-
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
22
|
-
q, k = self.q_proj(x), self.k_proj(x)
|
23
|
-
attn = torch.matmul(q, k.transpose(-2, -1))
|
24
|
-
attn = attn / (self._feature_dim ** 0.5)
|
25
|
-
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
26
|
-
return softmax(attn, dim=-1)
|
27
|
-
|
28
|
-
@override
|
29
|
-
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
30
|
-
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
31
|
-
if mask is not None:
|
32
|
-
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
33
|
-
v = self.v_proj(x)
|
34
|
-
return torch.matmul(self._attention(x, mask), v)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|