deeplotx 0.9.5__tar.gz → 0.9.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {deeplotx-0.9.5 → deeplotx-0.9.6}/PKG-INFO +23 -1
  2. {deeplotx-0.9.5 → deeplotx-0.9.6}/README.md +22 -0
  3. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/bert_ner.py +17 -11
  4. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/PKG-INFO +23 -1
  5. {deeplotx-0.9.5 → deeplotx-0.9.6}/pyproject.toml +1 -1
  6. {deeplotx-0.9.5 → deeplotx-0.9.6}/LICENSE +0 -0
  7. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/__init__.py +0 -0
  8. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/__init__.py +0 -0
  9. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/encoder.py +0 -0
  10. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/long_text_encoder.py +0 -0
  11. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/longformer_encoder.py +0 -0
  12. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/__init__.py +0 -0
  13. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/base_ner.py +0 -0
  14. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/n2g/__init__.py +0 -0
  15. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/named_entity.py +0 -0
  16. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/__init__.py +0 -0
  17. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/attention.py +0 -0
  18. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/auto_regression.py +0 -0
  19. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/base_neural_network.py +0 -0
  20. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/feed_forward.py +0 -0
  21. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/linear_regression.py +0 -0
  22. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/logistic_regression.py +0 -0
  23. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/long_context_auto_regression.py +0 -0
  24. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/long_context_recursive_sequential.py +0 -0
  25. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/multi_head_attention.py +0 -0
  26. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/multi_head_feed_forward.py +0 -0
  27. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/recursive_sequential.py +0 -0
  28. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/roformer_encoder.py +0 -0
  29. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/rope.py +0 -0
  30. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/softmax_regression.py +0 -0
  31. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/__init__.py +0 -0
  32. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/distribution.py +0 -0
  33. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/set.py +0 -0
  34. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/vector.py +0 -0
  35. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/trainer/__init__.py +0 -0
  36. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/trainer/base_trainer.py +0 -0
  37. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/trainer/text_binary_classification_trainer.py +0 -0
  38. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/util/__init__.py +0 -0
  39. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/util/hash.py +0 -0
  40. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/util/read_file.py +0 -0
  41. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/SOURCES.txt +0 -0
  42. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/dependency_links.txt +0 -0
  43. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/requires.txt +0 -0
  44. {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/top_level.txt +0 -0
  45. {deeplotx-0.9.5 → deeplotx-0.9.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.9.5
3
+ Version: 0.9.6
4
4
  Summary: An out-of-the-box long-text NLP framework.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -27,6 +27,28 @@ Dynamic: license-file
27
27
 
28
28
  > Author: [vortezwohl](https://github.com/vortezwohl)
29
29
 
30
+ ## Citation
31
+
32
+ If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
33
+
34
+ Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
35
+
36
+ もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
37
+
38
+ 如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
39
+
40
+ ```bibtex
41
+ @software{Wu_DeepLoTX_2025,
42
+ author = {Wu, Zihao},
43
+ license = {GPL-3.0},
44
+ month = aug,
45
+ title = {{DeepLoTX}},
46
+ url = {https://github.com/vortezwohl/DeepLoTX},
47
+ version = {0.9.5},
48
+ year = {2025}
49
+ }
50
+ ```
51
+
30
52
  ## Installation
31
53
 
32
54
  - **With pip**
@@ -6,6 +6,28 @@
6
6
 
7
7
  > Author: [vortezwohl](https://github.com/vortezwohl)
8
8
 
9
+ ## Citation
10
+
11
+ If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
12
+
13
+ Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
14
+
15
+ もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
16
+
17
+ 如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
18
+
19
+ ```bibtex
20
+ @software{Wu_DeepLoTX_2025,
21
+ author = {Wu, Zihao},
22
+ license = {GPL-3.0},
23
+ month = aug,
24
+ title = {{DeepLoTX}},
25
+ url = {https://github.com/vortezwohl/DeepLoTX},
26
+ version = {0.9.5},
27
+ year = {2025}
28
+ }
29
+ ```
30
+
9
31
  ## Installation
10
32
 
11
33
  - **With pip**
@@ -12,7 +12,7 @@ from deeplotx.ner.named_entity import NamedEntity, NamedPerson
12
12
 
13
13
  CACHE_PATH = os.path.join(__ROOT__, '.cache')
14
14
  NEW_LINE, BLANK = '\n', ' '
15
- DEFAULT_LENGTH_THRESHOLD = 384
15
+ DEFAULT_LENGTH_THRESHOLD = 448
16
16
  DEFAULT_BERT_NER = 'Davlan/xlm-roberta-base-ner-hrl'
17
17
  N2G_MODEL: list[Name2Gender] = []
18
18
  logger = logging.getLogger('deeplotx.ner')
@@ -50,7 +50,7 @@ class BertNER(BaseNER):
50
50
 
51
51
  def _fast_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0) -> list[NamedEntity]:
52
52
  assert prob_threshold <= 1., f'prob_threshold ({prob_threshold}) cannot be larger than 1.'
53
- s = f' {s.replace(NEW_LINE, BLANK)} '
53
+ s = f' {s.replace(NEW_LINE, BLANK * 2)} '
54
54
  raw_entities = self._ner_pipeline(s)
55
55
  entities = []
56
56
  for ent in raw_entities:
@@ -60,7 +60,10 @@ class BertNER(BaseNER):
60
60
  if len(ent[0].strip()) < 1:
61
61
  del entities[i]
62
62
  if ent[1].upper().startswith('I') and entities[i - 1][1].upper().startswith('B'):
63
- entities[i - 1][0] += ent[0]
63
+ if entities[i - 1][0] + ent[0] in s:
64
+ entities[i - 1][0] += ent[0]
65
+ else:
66
+ entities[i - 1][0] += f' {ent[0]}'
64
67
  entities[i - 1][2] *= ent[2]
65
68
  del entities[i]
66
69
  _continue = False
@@ -102,18 +105,20 @@ class BertNER(BaseNER):
102
105
  gender_probability=gender_prob)
103
106
  return entities
104
107
 
105
- def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0, deduplicate: bool = True) -> list[NamedEntity]:
106
- _length_threshold = DEFAULT_LENGTH_THRESHOLD
108
+ def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0,
109
+ window_size: int = DEFAULT_LENGTH_THRESHOLD, deduplicate: bool = True) -> list[NamedEntity]:
107
110
  _s_seq = self.tokenizer.encode(s, add_special_tokens=False)
108
111
  _entities = self._fast_extract(self.tokenizer.decode(_s_seq, skip_special_tokens=True),
109
112
  with_gender=with_gender,
110
- prob_threshold=prob_threshold) if len(_s_seq) < _length_threshold else []
113
+ prob_threshold=prob_threshold) if len(_s_seq) < window_size else []
111
114
  # sliding window extracting
112
- if len(_s_seq) >= _length_threshold:
113
- _window_size = _length_threshold
114
- _stride = _length_threshold // 4
115
- for i in range(0, len(_s_seq) + _stride, _stride):
116
- _window_text = self.tokenizer.decode(_s_seq[i: i + _window_size], skip_special_tokens=True)
115
+ if len(_s_seq) >= window_size:
116
+ _stride = window_size // 4
117
+ for i in range(0, len(_s_seq) + window_size, _stride):
118
+ _tmp_s_seq = _s_seq[i: i + window_size]
119
+ if len(_tmp_s_seq) < 1:
120
+ continue
121
+ _window_text = self.tokenizer.decode(_tmp_s_seq, skip_special_tokens=True)
117
122
  _entities.extend(self._fast_extract(_window_text, with_gender=with_gender, prob_threshold=prob_threshold))
118
123
  # entity combination
119
124
  _tmp_entities = sorted(_entities, key=lambda x: len(x.text), reverse=True)
@@ -155,4 +160,5 @@ class BertNER(BaseNER):
155
160
  return self._fast_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold)
156
161
  else:
157
162
  return self._slow_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold,
163
+ window_size=kwargs.get('window_size', DEFAULT_LENGTH_THRESHOLD),
158
164
  deduplicate=kwargs.get('deduplicate', True))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.9.5
3
+ Version: 0.9.6
4
4
  Summary: An out-of-the-box long-text NLP framework.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -27,6 +27,28 @@ Dynamic: license-file
27
27
 
28
28
  > Author: [vortezwohl](https://github.com/vortezwohl)
29
29
 
30
+ ## Citation
31
+
32
+ If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
33
+
34
+ Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
35
+
36
+ もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
37
+
38
+ 如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
39
+
40
+ ```bibtex
41
+ @software{Wu_DeepLoTX_2025,
42
+ author = {Wu, Zihao},
43
+ license = {GPL-3.0},
44
+ month = aug,
45
+ title = {{DeepLoTX}},
46
+ url = {https://github.com/vortezwohl/DeepLoTX},
47
+ version = {0.9.5},
48
+ year = {2025}
49
+ }
50
+ ```
51
+
30
52
  ## Installation
31
53
 
32
54
  - **With pip**
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "deeplotx"
3
- version = "0.9.5"
3
+ version = '0.9.6'
4
4
  description = "An out-of-the-box long-text NLP framework."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes