deeplotx 0.5.5__tar.gz → 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {deeplotx-0.5.5 → deeplotx-0.5.6}/PKG-INFO +3 -2
  2. {deeplotx-0.5.5 → deeplotx-0.5.6}/README.md +2 -1
  3. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/encoder/encoder.py +1 -1
  4. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/encoder/long_text_encoder.py +4 -0
  5. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/trainer/text_binary_classification_trainer.py +1 -1
  6. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx.egg-info/PKG-INFO +3 -2
  7. {deeplotx-0.5.5 → deeplotx-0.5.6}/pyproject.toml +1 -1
  8. {deeplotx-0.5.5 → deeplotx-0.5.6}/LICENSE +0 -0
  9. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/__init__.py +0 -0
  10. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/encoder/__init__.py +0 -0
  11. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/encoder/longformer_encoder.py +0 -0
  12. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/__init__.py +0 -0
  13. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/auto_regression.py +0 -0
  14. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/base_neural_network.py +0 -0
  15. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/linear_regression.py +0 -0
  16. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/logistic_regression.py +0 -0
  17. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/long_context_auto_regression.py +0 -0
  18. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/long_context_recursive_sequential.py +0 -0
  19. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/recursive_sequential.py +0 -0
  20. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/self_attention.py +0 -0
  21. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/nn/softmax_regression.py +0 -0
  22. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/similarity/__init__.py +0 -0
  23. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/similarity/distribution.py +0 -0
  24. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/similarity/set.py +0 -0
  25. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/similarity/vector.py +0 -0
  26. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/trainer/__init__.py +0 -0
  27. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/trainer/base_trainer.py +0 -0
  28. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/util/__init__.py +0 -0
  29. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/util/hash.py +0 -0
  30. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx/util/read_file.py +0 -0
  31. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx.egg-info/SOURCES.txt +0 -0
  32. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx.egg-info/dependency_links.txt +0 -0
  33. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx.egg-info/requires.txt +0 -0
  34. {deeplotx-0.5.5 → deeplotx-0.5.6}/deeplotx.egg-info/top_level.txt +0 -0
  35. {deeplotx-0.5.5 → deeplotx-0.5.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -265,7 +265,8 @@ Dynamic: license-file
265
265
  long_text_encoder = LongTextEncoder(
266
266
  max_length=2048, # 最大文本大小, 超出截断
267
267
  chunk_size=448, # 块大小 (按 Token 计)
268
- overlapping=32 # 块间重叠大小 (按 Token 计)
268
+ overlapping=32, # 块间重叠大小 (按 Token 计)
269
+ cache_capacity=512 # 缓存大小
269
270
  )
270
271
 
271
272
  trainer = TextBinaryClassifierTrainer(
@@ -247,7 +247,8 @@
247
247
  long_text_encoder = LongTextEncoder(
248
248
  max_length=2048, # 最大文本大小, 超出截断
249
249
  chunk_size=448, # 块大小 (按 Token 计)
250
- overlapping=32 # 块间重叠大小 (按 Token 计)
250
+ overlapping=32, # 块间重叠大小 (按 Token 计)
251
+ cache_capacity=512 # 缓存大小
251
252
  )
252
253
 
253
254
  trainer = TextBinaryClassifierTrainer(
@@ -25,7 +25,7 @@ class Encoder(nn.Module):
25
25
  self.embed_dim = self.encoder.config.max_position_embeddings
26
26
  logger.debug(f'{Encoder.__name__} initialized on device: {self.device}.')
27
27
 
28
- def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
28
+ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, *args, **kwargs) -> torch.Tensor:
29
29
  def _encoder(_input_tup: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
30
30
  return self.encoder.forward(_input_tup[0], attention_mask=_input_tup[1]).last_hidden_state[:, 0, :]
31
31
 
@@ -25,6 +25,10 @@ class LongTextEncoder(Encoder):
25
25
  def __chunk_embedding(self, idx: int, x: torch.Tensor, mask: torch.Tensor) -> tuple[int, torch.Tensor]:
26
26
  return idx, super().forward(x, attention_mask=mask)
27
27
 
28
+ @override
29
+ def forward(self, text: str, flatten: bool = False, *args, **kwargs) -> torch.Tensor:
30
+ return self.encode(text=text, flatten=flatten)
31
+
28
32
  @override
29
33
  def encode(self, text: str, flatten: bool = False) -> torch.Tensor:
30
34
  def postprocess(tensors: list[torch.Tensor], _flatten: bool) -> torch.Tensor:
@@ -31,7 +31,7 @@ class TextBinaryClassifierTrainer(BaseTrainer):
31
31
  positive_texts = positive_texts[:min_length]
32
32
  negative_texts = negative_texts[:min_length]
33
33
  all_texts = positive_texts + negative_texts
34
- text_embeddings = [self._long_text_encoder.encode(x, flatten=False, use_cache=True) for x in all_texts]
34
+ text_embeddings = [self._long_text_encoder.encode(x, flatten=False) for x in all_texts]
35
35
  feature_dim = text_embeddings[0].shape[-1]
36
36
  dtype = text_embeddings[0].dtype
37
37
  labels = ([torch.tensor([1.], dtype=dtype, device=self.device) for _ in range(len(positive_texts))]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: Easy-2-use long text NLP toolkit.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -265,7 +265,8 @@ Dynamic: license-file
265
265
  long_text_encoder = LongTextEncoder(
266
266
  max_length=2048, # 最大文本大小, 超出截断
267
267
  chunk_size=448, # 块大小 (按 Token 计)
268
- overlapping=32 # 块间重叠大小 (按 Token 计)
268
+ overlapping=32, # 块间重叠大小 (按 Token 计)
269
+ cache_capacity=512 # 缓存大小
269
270
  )
270
271
 
271
272
  trainer = TextBinaryClassifierTrainer(
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "deeplotx"
3
- version = "0.5.5"
3
+ version = "0.5.6"
4
4
  description = "Easy-2-use long text NLP toolkit."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes