deeplotx 0.5.6__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deeplotx-0.5.6 → deeplotx-0.8.0}/PKG-INFO +65 -42
- {deeplotx-0.5.6 → deeplotx-0.8.0}/README.md +63 -40
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/__init__.py +5 -1
- deeplotx-0.8.0/deeplotx/encoder/encoder.py +66 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/encoder/long_text_encoder.py +4 -2
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/__init__.py +6 -1
- deeplotx-0.8.0/deeplotx/nn/attention.py +48 -0
- deeplotx-0.8.0/deeplotx/nn/auto_regression.py +14 -0
- deeplotx-0.8.0/deeplotx/nn/base_neural_network.py +140 -0
- deeplotx-0.8.0/deeplotx/nn/feed_forward.py +53 -0
- deeplotx-0.8.0/deeplotx/nn/linear_regression.py +25 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/logistic_regression.py +5 -2
- deeplotx-0.8.0/deeplotx/nn/long_context_auto_regression.py +16 -0
- deeplotx-0.8.0/deeplotx/nn/long_context_recursive_sequential.py +36 -0
- deeplotx-0.8.0/deeplotx/nn/multi_head_attention.py +34 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/recursive_sequential.py +19 -12
- deeplotx-0.8.0/deeplotx/nn/roformer_encoder.py +40 -0
- deeplotx-0.8.0/deeplotx/nn/rope.py +41 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/nn/softmax_regression.py +5 -2
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/trainer/text_binary_classification_trainer.py +24 -8
- deeplotx-0.8.0/deeplotx/util/__init__.py +2 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/PKG-INFO +65 -42
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/SOURCES.txt +5 -1
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/requires.txt +1 -1
- {deeplotx-0.5.6 → deeplotx-0.8.0}/pyproject.toml +2 -2
- deeplotx-0.5.6/deeplotx/encoder/encoder.py +0 -48
- deeplotx-0.5.6/deeplotx/nn/auto_regression.py +0 -12
- deeplotx-0.5.6/deeplotx/nn/base_neural_network.py +0 -68
- deeplotx-0.5.6/deeplotx/nn/linear_regression.py +0 -37
- deeplotx-0.5.6/deeplotx/nn/long_context_auto_regression.py +0 -12
- deeplotx-0.5.6/deeplotx/nn/long_context_recursive_sequential.py +0 -28
- deeplotx-0.5.6/deeplotx/nn/self_attention.py +0 -34
- deeplotx-0.5.6/deeplotx/util/__init__.py +0 -2
- {deeplotx-0.5.6 → deeplotx-0.8.0}/LICENSE +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/encoder/__init__.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/encoder/longformer_encoder.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/__init__.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/distribution.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/set.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/similarity/vector.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/trainer/__init__.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/trainer/base_trainer.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/util/hash.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx/util/read_file.py +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/dependency_links.txt +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/deeplotx.egg-info/top_level.txt +0 -0
- {deeplotx-0.5.6 → deeplotx-0.8.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Easy-2-use long text NLP toolkit.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -13,7 +13,7 @@ Requires-Dist: python-dotenv
|
|
13
13
|
Requires-Dist: torch
|
14
14
|
Requires-Dist: transformers
|
15
15
|
Requires-Dist: typing-extensions
|
16
|
-
Requires-Dist: vortezwohl>=0.0.
|
16
|
+
Requires-Dist: vortezwohl>=0.0.8
|
17
17
|
Dynamic: license-file
|
18
18
|
|
19
19
|
[](https://deepwiki.com/vortezwohl/DeepLoTX)
|
@@ -163,6 +163,8 @@ Dynamic: license-file
|
|
163
163
|
|
164
164
|
```python
|
165
165
|
from deeplotx import (
|
166
|
+
BaseNeuralNetwork, # 深度神经网络基类
|
167
|
+
FeedForward, # 前馈神经网络
|
166
168
|
LinearRegression, # 线性回归
|
167
169
|
LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
|
168
170
|
SoftmaxRegression, # Softmax 回归 / 多分类
|
@@ -181,38 +183,54 @@ Dynamic: license-file
|
|
181
183
|
|
182
184
|
import torch
|
183
185
|
from torch import nn
|
184
|
-
|
186
|
+
|
185
187
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
186
|
-
|
187
|
-
|
188
|
-
class
|
189
|
-
def __init__(self,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
self.
|
194
|
-
self.
|
195
|
-
|
196
|
-
self.
|
197
|
-
|
198
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
199
|
-
|
200
|
-
self.
|
201
|
-
|
202
|
-
|
188
|
+
|
189
|
+
|
190
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
191
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
192
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
193
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
194
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
195
|
+
self._dropout_rate = dropout_rate
|
196
|
+
self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
|
197
|
+
device=self.device, dtype=self.dtype)
|
198
|
+
self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
|
199
|
+
device=self.device, dtype=self.dtype)
|
200
|
+
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
201
|
+
device=self.device, dtype=self.dtype)
|
202
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
|
203
|
+
device=self.device, dtype=self.dtype)
|
204
|
+
|
203
205
|
@override
|
204
|
-
def forward(self, x) -> torch.Tensor:
|
206
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
205
207
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
206
|
-
|
207
|
-
x =
|
208
|
-
x =
|
209
|
-
x = self.
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
208
|
+
residual = x
|
209
|
+
x = self.layer_norm(x)
|
210
|
+
x = self.fc1(x)
|
211
|
+
x = self.parametric_relu_1(x)
|
212
|
+
if self._dropout_rate > .0:
|
213
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
214
|
+
return self.fc2(x) + residual
|
215
|
+
|
216
|
+
|
217
|
+
class FeedForward(BaseNeuralNetwork):
|
218
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
219
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
220
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
221
|
+
if num_layers < 1:
|
222
|
+
raise ValueError('num_layers cannot be less than 1.')
|
223
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
224
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
225
|
+
expansion_factor=expansion_factor, bias=bias,
|
226
|
+
dropout_rate=dropout_rate,
|
227
|
+
device=self.device, dtype=self.dtype)] * num_layers)
|
228
|
+
|
229
|
+
@override
|
230
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
231
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
232
|
+
for ffn in self.ffn_layers:
|
233
|
+
x = ffn(x)
|
216
234
|
return x
|
217
235
|
```
|
218
236
|
|
@@ -222,29 +240,34 @@ Dynamic: license-file
|
|
222
240
|
from typing_extensions import override
|
223
241
|
|
224
242
|
import torch
|
225
|
-
from torch import nn, softmax
|
226
243
|
|
227
244
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
245
|
+
from deeplotx.nn.feed_forward import FeedForward
|
228
246
|
|
229
247
|
|
230
248
|
class SelfAttention(BaseNeuralNetwork):
|
231
|
-
def __init__(self, feature_dim: int,
|
232
|
-
|
233
|
-
|
249
|
+
def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
|
250
|
+
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
251
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
252
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
253
|
+
device=device, dtype=dtype)
|
234
254
|
self._feature_dim = feature_dim
|
235
|
-
self.q_proj =
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
bias=
|
255
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
256
|
+
expansion_factor=proj_expansion_factor,
|
257
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
258
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
259
|
+
expansion_factor=proj_expansion_factor,
|
260
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
261
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
262
|
+
expansion_factor=proj_expansion_factor,
|
263
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
241
264
|
|
242
265
|
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
243
266
|
q, k = self.q_proj(x), self.k_proj(x)
|
244
267
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
245
268
|
attn = attn / (self._feature_dim ** 0.5)
|
246
269
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
247
|
-
return softmax(attn, dim=-1)
|
270
|
+
return torch.softmax(attn, dim=-1)
|
248
271
|
|
249
272
|
@override
|
250
273
|
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
@@ -145,6 +145,8 @@
|
|
145
145
|
|
146
146
|
```python
|
147
147
|
from deeplotx import (
|
148
|
+
BaseNeuralNetwork, # 深度神经网络基类
|
149
|
+
FeedForward, # 前馈神经网络
|
148
150
|
LinearRegression, # 线性回归
|
149
151
|
LogisticRegression, # 逻辑回归 / 二分类 / 多标签分类
|
150
152
|
SoftmaxRegression, # Softmax 回归 / 多分类
|
@@ -163,38 +165,54 @@
|
|
163
165
|
|
164
166
|
import torch
|
165
167
|
from torch import nn
|
166
|
-
|
168
|
+
|
167
169
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
168
|
-
|
169
|
-
|
170
|
-
class
|
171
|
-
def __init__(self,
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
self.
|
176
|
-
self.
|
177
|
-
|
178
|
-
self.
|
179
|
-
|
180
|
-
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
181
|
-
|
182
|
-
self.
|
183
|
-
|
184
|
-
|
170
|
+
|
171
|
+
|
172
|
+
class FeedForwardUnit(BaseNeuralNetwork):
|
173
|
+
def __init__(self, feature_dim: int, expansion_factor: int | float = 2,
|
174
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
175
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
176
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
177
|
+
self._dropout_rate = dropout_rate
|
178
|
+
self.fc1 = nn.Linear(feature_dim, int(feature_dim * expansion_factor), bias=bias,
|
179
|
+
device=self.device, dtype=self.dtype)
|
180
|
+
self.fc2 = nn.Linear(int(feature_dim * expansion_factor), feature_dim, bias=bias,
|
181
|
+
device=self.device, dtype=self.dtype)
|
182
|
+
self.parametric_relu_1 = nn.PReLU(num_parameters=1, init=5e-3,
|
183
|
+
device=self.device, dtype=self.dtype)
|
184
|
+
self.layer_norm = nn.LayerNorm(normalized_shape=self.fc1.in_features, eps=1e-9,
|
185
|
+
device=self.device, dtype=self.dtype)
|
186
|
+
|
185
187
|
@override
|
186
|
-
def forward(self, x) -> torch.Tensor:
|
188
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
187
189
|
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
188
|
-
|
189
|
-
x =
|
190
|
-
x =
|
191
|
-
x = self.
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
190
|
+
residual = x
|
191
|
+
x = self.layer_norm(x)
|
192
|
+
x = self.fc1(x)
|
193
|
+
x = self.parametric_relu_1(x)
|
194
|
+
if self._dropout_rate > .0:
|
195
|
+
x = torch.dropout(x, p=self._dropout_rate, train=self.training)
|
196
|
+
return self.fc2(x) + residual
|
197
|
+
|
198
|
+
|
199
|
+
class FeedForward(BaseNeuralNetwork):
|
200
|
+
def __init__(self, feature_dim: int, num_layers: int = 1, expansion_factor: int | float = 2,
|
201
|
+
bias: bool = True, dropout_rate: float = 0.05, model_name: str | None = None,
|
202
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
203
|
+
if num_layers < 1:
|
204
|
+
raise ValueError('num_layers cannot be less than 1.')
|
205
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name, device=device, dtype=dtype)
|
206
|
+
self.ffn_layers = nn.ModuleList([FeedForwardUnit(feature_dim=feature_dim,
|
207
|
+
expansion_factor=expansion_factor, bias=bias,
|
208
|
+
dropout_rate=dropout_rate,
|
209
|
+
device=self.device, dtype=self.dtype)] * num_layers)
|
210
|
+
|
211
|
+
@override
|
212
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
213
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
214
|
+
for ffn in self.ffn_layers:
|
215
|
+
x = ffn(x)
|
198
216
|
return x
|
199
217
|
```
|
200
218
|
|
@@ -204,29 +222,34 @@
|
|
204
222
|
from typing_extensions import override
|
205
223
|
|
206
224
|
import torch
|
207
|
-
from torch import nn, softmax
|
208
225
|
|
209
226
|
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
227
|
+
from deeplotx.nn.feed_forward import FeedForward
|
210
228
|
|
211
229
|
|
212
230
|
class SelfAttention(BaseNeuralNetwork):
|
213
|
-
def __init__(self, feature_dim: int,
|
214
|
-
|
215
|
-
|
231
|
+
def __init__(self, feature_dim: int, bias: bool = True, proj_layers: int = 1,
|
232
|
+
proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
233
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
234
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
235
|
+
device=device, dtype=dtype)
|
216
236
|
self._feature_dim = feature_dim
|
217
|
-
self.q_proj =
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
bias=
|
237
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
238
|
+
expansion_factor=proj_expansion_factor,
|
239
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
240
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
241
|
+
expansion_factor=proj_expansion_factor,
|
242
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
243
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
244
|
+
expansion_factor=proj_expansion_factor,
|
245
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
223
246
|
|
224
247
|
def _attention(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
225
248
|
q, k = self.q_proj(x), self.k_proj(x)
|
226
249
|
attn = torch.matmul(q, k.transpose(-2, -1))
|
227
250
|
attn = attn / (self._feature_dim ** 0.5)
|
228
251
|
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
229
|
-
return softmax(attn, dim=-1)
|
252
|
+
return torch.softmax(attn, dim=-1)
|
230
253
|
|
231
254
|
@override
|
232
255
|
def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
@@ -5,12 +5,16 @@ __ROOT__ = os.path.dirname(os.path.abspath(__file__))
|
|
5
5
|
|
6
6
|
from .encoder import Encoder, LongTextEncoder, LongformerEncoder
|
7
7
|
from .nn import (
|
8
|
+
FeedForward,
|
8
9
|
LinearRegression,
|
9
10
|
LogisticRegression,
|
10
11
|
SoftmaxRegression,
|
11
12
|
RecursiveSequential,
|
12
13
|
LongContextRecursiveSequential,
|
13
|
-
|
14
|
+
RoPE,
|
15
|
+
Attention,
|
16
|
+
MultiHeadAttention,
|
17
|
+
RoFormerEncoder,
|
14
18
|
AutoRegression,
|
15
19
|
LongContextAutoRegression
|
16
20
|
)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import math
|
4
|
+
from requests.exceptions import ConnectTimeout, SSLError
|
5
|
+
|
6
|
+
import torch
|
7
|
+
from torch import nn
|
8
|
+
from transformers import AutoTokenizer, AutoModel
|
9
|
+
|
10
|
+
from deeplotx import __ROOT__
|
11
|
+
|
12
|
+
CACHE_PATH = os.path.join(__ROOT__, '.cache')
|
13
|
+
DEFAULT_BERT = 'FacebookAI/xlm-roberta-base'
|
14
|
+
logger = logging.getLogger('deeplotx.embedding')
|
15
|
+
|
16
|
+
|
17
|
+
class Encoder(nn.Module):
|
18
|
+
def __init__(self, model_name_or_path: str = DEFAULT_BERT, device: str | None = None):
|
19
|
+
super().__init__()
|
20
|
+
self.device = torch.device(device) if device is not None \
|
21
|
+
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
22
|
+
try:
|
23
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
24
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
25
|
+
trust_remote_code=True)
|
26
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
27
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
28
|
+
trust_remote_code=True).to(self.device)
|
29
|
+
except ConnectTimeout:
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
31
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
32
|
+
trust_remote_code=True, local_files_only=True)
|
33
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
34
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
35
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
36
|
+
except SSLError:
|
37
|
+
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
38
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
39
|
+
trust_remote_code=True, local_files_only=True)
|
40
|
+
self.encoder = AutoModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
|
41
|
+
cache_dir=CACHE_PATH, _from_auto=True,
|
42
|
+
trust_remote_code=True, local_files_only=True).to(self.device)
|
43
|
+
self.embed_dim = self.encoder.config.max_position_embeddings
|
44
|
+
logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
|
45
|
+
|
46
|
+
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, *args, **kwargs) -> torch.Tensor:
|
47
|
+
def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
|
48
|
+
return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
|
49
|
+
|
50
|
+
num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
|
51
|
+
chunks = chunk_results = []
|
52
|
+
for i in range(num_chunks):
|
53
|
+
start_idx = i * self.embed_dim
|
54
|
+
end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
|
55
|
+
chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
|
56
|
+
ori_mode = self.encoder.training
|
57
|
+
self.encoder.eval()
|
58
|
+
with torch.no_grad():
|
59
|
+
chunk_results = [_encoder(x) for x in chunks]
|
60
|
+
self.encoder.train(mode=ori_mode)
|
61
|
+
return torch.cat(chunk_results, dim=-1)
|
62
|
+
|
63
|
+
def encode(self, text: str) -> torch.Tensor:
|
64
|
+
_input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long, device=self.device)
|
65
|
+
_att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int, device=self.device)
|
66
|
+
return self.forward(_input_ids, _att_mask).squeeze()
|
@@ -15,12 +15,14 @@ logger = logging.getLogger('deeplotx.embedding')
|
|
15
15
|
class LongTextEncoder(Encoder):
|
16
16
|
def __init__(self, max_length: int, chunk_size: int = 448,
|
17
17
|
overlapping: int = 32, model_name_or_path: str = DEFAULT_BERT,
|
18
|
-
cache_capacity: int = 64, device: str | None = None):
|
18
|
+
cache_capacity: int = 64, max_workers: int = 8, device: str | None = None):
|
19
19
|
super().__init__(model_name_or_path=model_name_or_path, device=device)
|
20
|
+
assert overlapping < chunk_size, f'overlapping ({overlapping}) must be less than chunk size ({chunk_size}).'
|
20
21
|
self._max_length = max_length
|
21
22
|
self._chunk_size = chunk_size
|
22
23
|
self._overlapping = overlapping
|
23
24
|
self._cache = LRUCache(capacity=cache_capacity)
|
25
|
+
self._worker_group = ThreadPool(max_workers=max_workers)
|
24
26
|
|
25
27
|
def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
|
26
28
|
return idx, super().forward(x, attention_mask=mask)
|
@@ -63,7 +65,7 @@ class LongTextEncoder(Encoder):
|
|
63
65
|
_tmp_right = (i + 1) * self._chunk_size + self._overlapping
|
64
66
|
chunks.append((i, torch.tensor([_text_to_input_ids[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device),
|
65
67
|
torch.tensor([_text_to_input_ids_att_mask[_tmp_left: _tmp_right]], dtype=torch.int, device=self.device)))
|
66
|
-
embeddings = list(
|
68
|
+
embeddings = list(self._worker_group.map(self.__chunk_embedding, chunks))
|
67
69
|
embeddings = sorted([x.returns for x in embeddings], key=lambda x: x[0], reverse=False)
|
68
70
|
fin_embedding = [x[1] for x in embeddings]
|
69
71
|
# write cache
|
@@ -1,8 +1,13 @@
|
|
1
|
+
from .base_neural_network import BaseNeuralNetwork
|
2
|
+
from .feed_forward import FeedForward
|
1
3
|
from .linear_regression import LinearRegression
|
2
4
|
from .logistic_regression import LogisticRegression
|
3
5
|
from .softmax_regression import SoftmaxRegression
|
4
6
|
from .recursive_sequential import RecursiveSequential
|
5
7
|
from .long_context_recursive_sequential import LongContextRecursiveSequential
|
6
|
-
from .
|
8
|
+
from .rope import RoPE
|
9
|
+
from .attention import Attention
|
10
|
+
from .multi_head_attention import MultiHeadAttention
|
11
|
+
from .roformer_encoder import RoFormerEncoder
|
7
12
|
from .auto_regression import AutoRegression
|
8
13
|
from .long_context_auto_regression import LongContextAutoRegression
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from typing_extensions import override
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from deeplotx.nn.base_neural_network import BaseNeuralNetwork
|
6
|
+
from deeplotx.nn.feed_forward import FeedForward
|
7
|
+
from deeplotx.nn.rope import RoPE, DEFAULT_THETA
|
8
|
+
|
9
|
+
|
10
|
+
class Attention(BaseNeuralNetwork):
|
11
|
+
def __init__(self, feature_dim: int, bias: bool = True, positional: bool = True,
|
12
|
+
proj_layers: int = 1, proj_expansion_factor: int | float = 1.5, dropout_rate: float = 0.02,
|
13
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None,
|
14
|
+
**kwargs):
|
15
|
+
super().__init__(in_features=feature_dim, out_features=feature_dim, model_name=model_name,
|
16
|
+
device=device, dtype=dtype)
|
17
|
+
self._positional = positional
|
18
|
+
self._feature_dim = feature_dim
|
19
|
+
self.q_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
20
|
+
expansion_factor=proj_expansion_factor,
|
21
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
22
|
+
self.k_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
23
|
+
expansion_factor=proj_expansion_factor,
|
24
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
25
|
+
self.v_proj = FeedForward(feature_dim=self._feature_dim, num_layers=proj_layers,
|
26
|
+
expansion_factor=proj_expansion_factor,
|
27
|
+
bias=bias, dropout_rate=dropout_rate, device=self.device, dtype=self.dtype)
|
28
|
+
if self._positional:
|
29
|
+
self.rope = RoPE(feature_dim=self._feature_dim, theta=kwargs.get('theta', DEFAULT_THETA),
|
30
|
+
device=self.device, dtype=self.dtype)
|
31
|
+
|
32
|
+
def _attention(self, x: torch.Tensor, y: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
|
33
|
+
q, k = self.q_proj(x), self.k_proj(y)
|
34
|
+
if self._positional:
|
35
|
+
q, k = self.rope(q), self.rope(k)
|
36
|
+
attn = torch.matmul(q, k.transpose(-2, -1))
|
37
|
+
attn = attn / (self._feature_dim ** 0.5)
|
38
|
+
attn = attn.masked_fill(mask == 0, -1e9) if mask is not None else attn
|
39
|
+
return torch.softmax(attn, dim=-1)
|
40
|
+
|
41
|
+
@override
|
42
|
+
def forward(self, x: torch.Tensor, y: torch.Tensor | None = None, mask: torch.Tensor | None = None) -> torch.Tensor:
|
43
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
44
|
+
y = x if y is None else self.ensure_device_and_dtype(y, device=self.device, dtype=self.dtype)
|
45
|
+
if mask is not None:
|
46
|
+
mask = self.ensure_device_and_dtype(mask, device=self.device, dtype=self.dtype)
|
47
|
+
v = self.v_proj(y)
|
48
|
+
return torch.matmul(self._attention(x, y, mask), v)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
import torch
|
2
|
+
|
3
|
+
from deeplotx.nn import RecursiveSequential
|
4
|
+
|
5
|
+
|
6
|
+
class AutoRegression(RecursiveSequential):
|
7
|
+
def __init__(self, feature_dim: int, bias: bool = True,
|
8
|
+
recursive_layers: int = 1, recursive_hidden_dim: int | None = None,
|
9
|
+
ffn_layers: int = 1, ffn_expansion_factor: int | float = 2, dropout_rate: float = 0.05,
|
10
|
+
model_name: str | None = None, device: str | None = None, dtype: torch.dtype | None = None):
|
11
|
+
super().__init__(input_dim=feature_dim, output_dim=feature_dim, bias=bias,
|
12
|
+
recursive_layers=recursive_layers, recursive_hidden_dim=recursive_hidden_dim,
|
13
|
+
ffn_layers=ffn_layers, ffn_expansion_factor=ffn_expansion_factor,
|
14
|
+
dropout_rate=dropout_rate, model_name=model_name, device=device, dtype=dtype)
|
@@ -0,0 +1,140 @@
|
|
1
|
+
import os
|
2
|
+
from abc import abstractmethod
|
3
|
+
|
4
|
+
import torch
|
5
|
+
from torch import nn
|
6
|
+
from torch.nn import init
|
7
|
+
|
8
|
+
DEFAULT_SUFFIX = 'dlx'
|
9
|
+
|
10
|
+
|
11
|
+
class BaseNeuralNetwork(nn.Module):
|
12
|
+
def __init__(self, in_features: int, out_features: int, model_name: str | None = None,
|
13
|
+
device: str | None = None, dtype: torch.dtype | None = None):
|
14
|
+
super().__init__()
|
15
|
+
self._model_name = model_name \
|
16
|
+
if model_name is not None \
|
17
|
+
else self.__class__.__name__
|
18
|
+
self.device = torch.device(device) if device is not None \
|
19
|
+
else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
20
|
+
self.dtype = dtype if dtype is not None else torch.float32
|
21
|
+
self._in_features = in_features
|
22
|
+
self._out_features = out_features
|
23
|
+
|
24
|
+
@property
|
25
|
+
def in_features(self) -> int:
|
26
|
+
return self._in_features
|
27
|
+
|
28
|
+
@property
|
29
|
+
def out_features(self) -> int:
|
30
|
+
return self._out_features
|
31
|
+
|
32
|
+
@staticmethod
|
33
|
+
def ensure_device_and_dtype(x: torch.Tensor, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
|
34
|
+
if x.device != device:
|
35
|
+
x = x.to(device)
|
36
|
+
if x.dtype != dtype:
|
37
|
+
x = x.to(dtype)
|
38
|
+
return x
|
39
|
+
|
40
|
+
def initialize_weights(self):
|
41
|
+
for m in self.modules():
|
42
|
+
match m.__class__:
|
43
|
+
case nn.Linear:
|
44
|
+
init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
|
45
|
+
if m.bias is not None:
|
46
|
+
init.constant_(m.bias, 0)
|
47
|
+
case nn.BatchNorm2d | nn.BatchNorm1d | nn.BatchNorm3d:
|
48
|
+
init.constant_(m.weight, 1)
|
49
|
+
init.constant_(m.bias, 0)
|
50
|
+
case nn.LSTM | nn.GRU:
|
51
|
+
for name, param in m.named_parameters():
|
52
|
+
_tmp_name = name.lower()
|
53
|
+
if 'weight_ih' in _tmp_name:
|
54
|
+
init.kaiming_normal_(param, mode='fan_in', nonlinearity='sigmoid')
|
55
|
+
elif 'weight_hh' in _tmp_name:
|
56
|
+
init.orthogonal_(param)
|
57
|
+
elif 'bias' in _tmp_name:
|
58
|
+
init.constant_(param, 0)
|
59
|
+
case _:
|
60
|
+
pass
|
61
|
+
return self
|
62
|
+
|
63
|
+
def size(self) -> dict:
|
64
|
+
total_params = trainable_params = non_trainable_params = 0
|
65
|
+
for param in self.parameters():
|
66
|
+
params = param.numel()
|
67
|
+
total_params += params
|
68
|
+
if param.requires_grad:
|
69
|
+
trainable_params += params
|
70
|
+
else:
|
71
|
+
non_trainable_params += params
|
72
|
+
return {
|
73
|
+
'total': total_params,
|
74
|
+
'trainable': trainable_params,
|
75
|
+
'non_trainable': non_trainable_params
|
76
|
+
}
|
77
|
+
|
78
|
+
def l1(self, _lambda: float = 1e-4) -> torch.Tensor:
|
79
|
+
def _l1() -> torch.Tensor:
|
80
|
+
l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
|
81
|
+
for param in self.parameters():
|
82
|
+
l2_reg += (torch.abs(param)).sum()
|
83
|
+
return l2_reg
|
84
|
+
return _lambda * _l1()
|
85
|
+
|
86
|
+
def l2(self, _lambda: float = 1e-4) -> torch.Tensor:
|
87
|
+
def _l2() -> torch.Tensor:
|
88
|
+
l2_reg = torch.tensor(0., device=self.device, dtype=self.dtype)
|
89
|
+
for param in self.parameters():
|
90
|
+
l2_reg += (torch.pow(param, exponent=2.)).sum()
|
91
|
+
return l2_reg
|
92
|
+
return _lambda * _l2() / 2.
|
93
|
+
|
94
|
+
def elastic_net(self, alpha: float = 1e-4, rho: float = 0.5) -> torch.Tensor:
|
95
|
+
return alpha * (rho * self.l1(_lambda=1.) + (1 - rho) * self.l2(_lambda=1.))
|
96
|
+
|
97
|
+
@abstractmethod
|
98
|
+
def forward(self, *args, **kwargs) -> torch.Tensor: ...
|
99
|
+
|
100
|
+
def predict(self, x: torch.Tensor) -> torch.Tensor:
|
101
|
+
x = self.ensure_device_and_dtype(x, device=self.device, dtype=self.dtype)
|
102
|
+
__train = self.training
|
103
|
+
self.training = False
|
104
|
+
with torch.no_grad():
|
105
|
+
res = self.forward(x)
|
106
|
+
self.training = __train
|
107
|
+
return res
|
108
|
+
|
109
|
+
def save(self, model_name: str | None = None, model_dir: str = '.', _suffix: str = DEFAULT_SUFFIX):
|
110
|
+
os.makedirs(model_dir, exist_ok=True)
|
111
|
+
model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
|
112
|
+
torch.save(self.state_dict(), os.path.join(model_dir, model_file_name))
|
113
|
+
return self
|
114
|
+
|
115
|
+
def load(self, model_name: str | None = None, model_dir: str = '.', _suffix: str = DEFAULT_SUFFIX):
|
116
|
+
model_file_name = f'{model_name}.{_suffix}' if model_name is not None else f'{self._model_name}.{_suffix}'
|
117
|
+
self.load_state_dict(torch.load(os.path.join(model_dir, model_file_name), map_location=self.device, weights_only=True))
|
118
|
+
return self
|
119
|
+
|
120
|
+
def __str__(self):
|
121
|
+
formatted = super().__str__()
|
122
|
+
_line_len = len([sorted(formatted.splitlines(), key=lambda _: len(_), reverse=True)][0])
|
123
|
+
_splitter_1 = '=' * (_line_len + 10)
|
124
|
+
_splitter_2 = '-' * (_line_len + 10)
|
125
|
+
_size = self.size()
|
126
|
+
total_param = _size['total']
|
127
|
+
trainable_param = _size['trainable']
|
128
|
+
non_trainable_param = _size['non_trainable']
|
129
|
+
formatted = (f'{_splitter_1}\n'
|
130
|
+
f'Model_Name: {self._model_name}\n'
|
131
|
+
f'In_Features: {self.in_features}\n'
|
132
|
+
f'Out_Features: {self.out_features}\n'
|
133
|
+
f'Device: {self.device}\n'
|
134
|
+
f'Dtype: {self.dtype}\n'
|
135
|
+
f'Total_Parameters: {total_param}\n'
|
136
|
+
f'Trainable_Parameters: {trainable_param}\n'
|
137
|
+
f'NonTrainable_Parameters: {non_trainable_param}\n'
|
138
|
+
f'{_splitter_2}'
|
139
|
+
f'\n{formatted}\n{_splitter_1}')
|
140
|
+
return formatted
|