PyPI - deeplotx - Versions diffs - 0.2.20__tar.gz → 0.2.21__tar.gz - Mend

deeplotx 0.2.20tar.gz → 0.2.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{deeplotx-0.2.20 → deeplotx-0.2.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.2.20
+Version: 0.2.21
 Summary: Easy-2-use long text classifier trainers.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -21,13 +21,19 @@ Dynamic: license-file
 - Install with pip
     ```
-    pip install git+https://github.com/vortezwohl/DeepLoTX.git
+    pip install -U deeplotx
     ```
 - Install with uv
     ```
-    uv add git+https://github.com/vortezwohl/DeepLoTX.git
+    uv add -U deeplotx
+    ```
+- Install from github
+    ```
+    pip install -U git+https://github.com/vortezwohl/DeepLoTX.git
     ```
 ## Quick Start

{deeplotx-0.2.20 → deeplotx-0.2.21}/README.md RENAMED Viewed

@@ -5,13 +5,19 @@
 - Install with pip
     ```
-    pip install git+https://github.com/vortezwohl/DeepLoTX.git
+    pip install -U deeplotx
     ```
 - Install with uv
     ```
-    uv add git+https://github.com/vortezwohl/DeepLoTX.git
+    uv add -U deeplotx
+    ```
+- Install from github
+    ```
+    pip install -U git+https://github.com/vortezwohl/DeepLoTX.git
     ```
 ## Quick Start

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/encoder/bert_encoder.py RENAMED Viewed

@@ -13,19 +13,20 @@ class BertEncoder(nn.Module):
     def __init__(self, model_name_or_path: str = DEFAULT_BERT):
         super().__init__()
         self.tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                                       cache_dir=CACHE_PATH)
+                                                       cache_dir=CACHE_PATH, _from_auto=True)
         self.bert = BertModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
-                                              cache_dir=CACHE_PATH)
+                                              cache_dir=CACHE_PATH, _from_auto=True)
+        self.embed_dim = self.bert.config.max_position_embeddings
-    def forward(self, input_ids, attention_mask: torch.Tensor) -> torch.Tensor:
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
         def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
             return self.bert.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
-        num_chunks = math.ceil(input_ids.shape[-1] / 512)
+        num_chunks = math.ceil(input_ids.shape[-1] / self.embed_dim)
         chunks = chunk_results = []
         for i in range(num_chunks):
-            start_idx = i * 512
-            end_idx = min(start_idx + 512, input_ids.shape[-1])
+            start_idx = i * self.embed_dim
+            end_idx = min(start_idx + self.embed_dim, input_ids.shape[-1])
             chunks.append((input_ids[:, start_idx: end_idx], attention_mask[:, start_idx: end_idx]))
         ori_mode = self.bert.training
         self.bert.eval()

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/encoder/long_text_encoder.py RENAMED Viewed

@@ -24,7 +24,7 @@ class LongTextEncoder(BertEncoder):
         return input_tup[0], super().forward(input_tup[1], attention_mask=input_tup[2])
     @override
-    def encode(self, text: str) -> torch.Tensor:
+    def encode(self, text: str, use_cache: bool = True) -> torch.Tensor:
         _text_to_show = text.replace("\n", str())
         logger.debug(f'Embedding \"{_text_to_show if len(_text_to_show) < 128 else _text_to_show[:128] + "..."}\".')
         # read cache
@@ -58,5 +58,6 @@ class LongTextEncoder(BertEncoder):
             fin_emb_tensor = torch.cat((fin_emb_tensor.detach().clone(), emb.detach().clone()), dim=-1)
         fin_emb_tensor = fin_emb_tensor.squeeze()
         # write cache
-        self._cache[_text_hash] = fin_emb_tensor
+        if use_cache:
+            self._cache[_text_hash] = fin_emb_tensor
         return fin_emb_tensor

deeplotx-0.2.21/deeplotx/encoder/longformer_encoder.py ADDED Viewed

@@ -0,0 +1,30 @@
+import torch
+from torch import nn
+from transformers import LongformerTokenizer, LongformerModel
+from deeplotx import __ROOT__
+CACHE_PATH = f'{__ROOT__}\\.cache'
+DEFAULT_LONGFORMER = 'allenai/longformer-base-4096'
+class LongformerEncoder(nn.Module):
+    def __init__(self, model_name_or_path: str = DEFAULT_LONGFORMER):
+        super().__init__()
+        self.tokenizer = LongformerTokenizer.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                             cache_dir=CACHE_PATH, _from_auto=True)
+        self.bert = LongformerModel.from_pretrained(pretrained_model_name_or_path=model_name_or_path,
+                                                    cache_dir=CACHE_PATH, _from_auto=True)
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
+        ori_mode = self.bert.training
+        self.bert.eval()
+        with torch.no_grad():
+            res = self.bert.forward(input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
+        self.bert.train(mode=ori_mode)
+        return res
+    def encode(self, text: str) -> torch.Tensor:
+        _input_ids = torch.tensor([self.tokenizer.encode(text)], dtype=torch.long)
+        _att_mask = torch.tensor([[1] * _input_ids.shape[-1]], dtype=torch.int)
+        return self.forward(_input_ids, _att_mask).squeeze()

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/nn/base_neural_network.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from abc import abstractmethod
 import torch
 from torch import nn
@@ -28,6 +30,7 @@ class BaseNeuralNetwork(nn.Module):
     def elastic_net(self, alpha: float = 1e-4, rho: float = 0.5) -> torch.Tensor:
         return alpha * (rho * self.l1(_lambda=1.) + (1 - rho) * self.l2(_lambda=1.))
+    @abstractmethod
     def forward(self, x) -> torch.Tensor: ...
     def predict(self, x) -> torch.Tensor:

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deeplotx
-Version: 0.2.20
+Version: 0.2.21
 Summary: Easy-2-use long text classifier trainers.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -21,13 +21,19 @@ Dynamic: license-file
 - Install with pip
     ```
-    pip install git+https://github.com/vortezwohl/DeepLoTX.git
+    pip install -U deeplotx
     ```
 - Install with uv
     ```
-    uv add git+https://github.com/vortezwohl/DeepLoTX.git
+    uv add -U deeplotx
+    ```
+- Install from github
+    ```
+    pip install -U git+https://github.com/vortezwohl/DeepLoTX.git
     ```
 ## Quick Start

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,6 +10,7 @@ deeplotx.egg-info/top_level.txt
 deeplotx/encoder/__init__.py
 deeplotx/encoder/bert_encoder.py
 deeplotx/encoder/long_text_encoder.py
+deeplotx/encoder/longformer_encoder.py
 deeplotx/nn/__init__.py
 deeplotx/nn/base_neural_network.py
 deeplotx/nn/linear_regression.py

{deeplotx-0.2.20 → deeplotx-0.2.21}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "deeplotx"
-version = "0.2.20"
+version = "0.2.21"
 description = "Easy-2-use long text classifier trainers."
 readme = "README.md"
 requires-python = ">=3.10"

{deeplotx-0.2.20 → deeplotx-0.2.21}/LICENSE RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/encoder/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/nn/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/nn/linear_regression.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/nn/logistic_regression.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/nn/softmax_regression.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/trainer/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/trainer/base_trainer.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/trainer/text_binary_classification_trainer.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/util/__init__.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/util/hash.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx/util/read_file.py RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx.egg-info/requires.txt RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/deeplotx.egg-info/top_level.txt RENAMED Viewed

File without changes

{deeplotx-0.2.20 → deeplotx-0.2.21}/setup.cfg RENAMED Viewed

File without changes

deeplotx 0.2.20__tar.gz → 0.2.21__tar.gz

deeplotx 0.2.20tar.gz → 0.2.21tar.gz