PyPI - deeplotx - Versions diffs - 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

deeplotx 0.2.20py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

deeplotx/encoder/bert_encoder.py CHANGED Viewed

@@ -13,19 +13,20 @@ class BertEncoder(nn.Module):
     def __init__(self, model_name_or_path: str = DEFAULT_BERT):
         super().__init__()
         self.tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                       cache_dir=CACHE_PATH)
+                                                       cache_dir=CACHE_PATH, _from_auto=True)
         self.bert = BertModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                              cache_dir=CACHE_PATH)
+                                              cache_dir=CACHE_PATH, _from_auto=True)
+        self.embed_dim = self.bert.config.max_position_embeddings
-    def forward(self, input_ids, attention_mask: torch.Tensor) -> torch.Tensor:
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
         def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
             return self.bert.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
-        num_chunks = math.ceil(input_ids.shape[-1] / 512)
+        num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
         chunks = chunk_results = []
         for i in range(num_chunks):
-            start_idx = i * 512
-            end_idx = min(start_idx + 512, input_ids.shape[-1])
+            start_idx = i * self.embed_dim
+            end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
             chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
         ori_mode = self.bert.training
         self.bert.eval()

deeplotx/encoder/long_text_encoder.py CHANGED Viewed

@@ -24,7 +24,7 @@ class LongTextEncoder(BertEncoder):
         return input_tup[0], super().forward(input_tup[1], attention_mask=input_tup[2])
     @override
-    def encode(self, text: str) -> torch.Tensor:
+    def encode(self, text: str, use_cache: bool = True) -> torch.Tensor:
         _text_to_show = text.replace("\n", str())
         logger.debug(f'Embedding \"{_text_to_show if len(_text_to_show) < 128 else _text_to_show[:128] + "..."}\".')
         # read cache
@@ -58,5 +58,6 @@ class LongTextEncoder(BertEncoder):
             fin_emb_tensor = torch.cat((fin_emb_tensor.detach().clone(), emb.detach().clone()), dim=-1)
         fin_emb_tensor = fin_emb_tensor.squeeze()
         # write cache
-        self._cache[_text_hash] = fin_emb_tensor
+        if use_cache:
+            self._cache[_text_hash] = fin_emb_tensor
         return fin_emb_tensor

deeplotx/encoder/longformer_encoder.py ADDED Viewed

@@ -0,0 +1,30 @@
+import torch
+from torch import nn
+from transformers import LongformerTokenizer, LongformerModel
+from deeplotx import __ROOT__
+CACHE_PATH = f'{__ROOT__}\\.cache'
+DEFAULT_LONGFORMER = 'allenai/longformer-base-4096'
+class LongformerEncoder(nn.Module):
+    def __init__(self, model_name_or_path: str = DEFAULT_LONGFORMER):
+        super().__init__()
+        self.tokenizer = LongformerTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                             cache_dir=CACHE_PATH, _from_auto=True)
+        self.bert = LongformerModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                    cache_dir=CACHE_PATH, _from_auto=True)
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
+        ori_mode = self.bert.training
+        self.bert.eval()
+        with torch.no_grad():
+            res = self.bert.forward(input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
+        self.bert.train(mode=ori_mode)
+        return res
+    def encode(self, text: str) -> torch.Tensor:
+        _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long)
+        _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int)
+        return self.forward(_input_ids, _att_mask).squeeze()

deeplotx/nn/base_neural_network.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from abc import abstractmethod
 import torch
 from torch import nn
@@ -28,6 +30,7 @@ class BaseNeuralNetwork(nn.Module):
     def elastic_net(self, alpha: float = 1e-4, rho: float = 0.5) -> torch.Tensor:
         return alpha * (rho * self.l1(_lambda=1.) + (1 - rho) * self.l2(_lambda=1.))
+    @abstractmethod
     def forward(self, x) -> torch.Tensor: ...
     def predict(self, x) -> torch.Tensor:

{deeplotx-0.2.20.dist-info → deeplotx-0.2.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.2.20
+Version: 0.2.21
 Summary: Easy-2-use long text classifier trainers.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -21,13 +21,19 @@ Dynamic: license-file
 - Install with pip
     ```
-    pip install git+https://github.com/vortezwohl/DeepLoTX.git
+    pip install -U deeplotx
     ```
 - Install with uv
     ```
-    uv add git+https://github.com/vortezwohl/DeepLoTX.git
+    uv add -U deeplotx
+    ```
+- Install from github
+    ```
+    pip install -U git+https://github.com/vortezwohl/DeepLoTX.git
     ```
 ## Quick Start

{deeplotx-0.2.20.dist-info → deeplotx-0.2.21.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,10 @@
 deeplotx/__init__.py,sha256=Bhxc6HRnuhPZCMNlBc6oKcFTpJbWRGrZmt00vVOsNf0,916
 deeplotx/encoder/__init__.py,sha256=x7k8IE0FXvDl7kCJGWPsetOHFdvNCiCXHbYOdvo7_JQ,87
-deeplotx/encoder/bert_encoder.py,sha256=rdT8YgZzvRoqYqtzPW95ilagSQTAQgUl7mMVetGKxCY,1822
-deeplotx/encoder/long_text_encoder.py,sha256=yEEtTVZYHJ0W3OSbh7BHm6xI33nJmVYlSrgD5RVcJLY,2967
+deeplotx/encoder/bert_encoder.py,sha256=A-B7Gj94xv6UhvsFTBH7tnkAdGHRhfUZA2QjSnTKB6c,1970
+deeplotx/encoder/long_text_encoder.py,sha256=V6VxaHW6bMMaZHgU1UZ8n19UfSIV2f2sarWXquiFffQ,3018
+deeplotx/encoder/longformer_encoder.py,sha256=mZpC5TrGHQo98-ydGtVQQ9KRHgCGl1sRoxcQs7r4SSo,1409
 deeplotx/nn/__init__.py,sha256=9gh8rhKqVWtJyvryU_wHPTLEQIorwOBhAQRc0DtNamM,153
-deeplotx/nn/base_neural_network.py,sha256=MXuID5bagdHyrFOkoybW1oiXAY2d4FGnzZoR37LZfUI,1566
+deeplotx/nn/base_neural_network.py,sha256=Rkwu58mXXcuusf-59yLX89MywQx-EvTsSXOvlzUptRE,1621
 deeplotx/nn/linear_regression.py,sha256=D4mEWVOq6q1Fm2otm57rgZ_E06HJLZBV5k636PprAf4,1520
 deeplotx/nn/logistic_regression.py,sha256=QAtZp2oyqOW8-1pJWVcahsSM83bzfA68EHObg-wSHHY,463
 deeplotx/nn/softmax_regression.py,sha256=eUn3mVNlye9ewVdw3McPHZuKbUvvaamsUgFIJMVMgBU,487
@@ -13,8 +14,8 @@ deeplotx/trainer/text_binary_classification_trainer.py,sha256=5O-5dwVMCj5EDX9gjJ
 deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
 deeplotx/util/hash.py,sha256=wwsC6kOQvbpuvwKsNQOARd78_wePmW9i3oaUuXRUnpc,352
 deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
-deeplotx-0.2.20.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
-deeplotx-0.2.20.dist-info/METADATA,sha256=NQgRWucDSAI4awAJNf9984IujFRo9PurR1qrqpmWIzA,1573
-deeplotx-0.2.20.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-deeplotx-0.2.20.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
-deeplotx-0.2.20.dist-info/RECORD,,
+deeplotx-0.2.21.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
+deeplotx-0.2.21.dist-info/METADATA,sha256=mNUcUO4dSccX1Sz8868nrbq3qWo3cINJXPVv8XtVpzY,1617
+deeplotx-0.2.21.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
+deeplotx-0.2.21.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
+deeplotx-0.2.21.dist-info/RECORD,,

{deeplotx-0.2.20.dist-info → deeplotx-0.2.21.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.1)
+Generator: setuptools (80.4.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{deeplotx-0.2.20.dist-info → deeplotx-0.2.21.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{deeplotx-0.2.20.dist-info → deeplotx-0.2.21.dist-info}/top_level.txt RENAMED Viewed

File without changes

deeplotx 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl

deeplotx 0.2.20py3-none-any.whl → 0.2.21py3-none-any.whl