deeplotx 0.9.5__tar.gz → 0.9.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {deeplotx-0.9.5 → deeplotx-0.9.7}/PKG-INFO +23 -1
  2. {deeplotx-0.9.5 → deeplotx-0.9.7}/README.md +22 -0
  3. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/bert_ner.py +19 -11
  4. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/util/read_file.py +11 -0
  5. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/PKG-INFO +23 -1
  6. {deeplotx-0.9.5 → deeplotx-0.9.7}/pyproject.toml +1 -1
  7. {deeplotx-0.9.5 → deeplotx-0.9.7}/LICENSE +0 -0
  8. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/__init__.py +0 -0
  9. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/__init__.py +0 -0
  10. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/encoder.py +0 -0
  11. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/long_text_encoder.py +0 -0
  12. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/longformer_encoder.py +0 -0
  13. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/__init__.py +0 -0
  14. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/base_ner.py +0 -0
  15. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/n2g/__init__.py +0 -0
  16. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/named_entity.py +0 -0
  17. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/__init__.py +0 -0
  18. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/attention.py +0 -0
  19. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/auto_regression.py +0 -0
  20. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/base_neural_network.py +0 -0
  21. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/feed_forward.py +0 -0
  22. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/linear_regression.py +0 -0
  23. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/logistic_regression.py +0 -0
  24. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/long_context_auto_regression.py +0 -0
  25. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/long_context_recursive_sequential.py +0 -0
  26. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/multi_head_attention.py +0 -0
  27. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/multi_head_feed_forward.py +0 -0
  28. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/recursive_sequential.py +0 -0
  29. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/roformer_encoder.py +0 -0
  30. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/rope.py +0 -0
  31. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/softmax_regression.py +0 -0
  32. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/__init__.py +0 -0
  33. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/distribution.py +0 -0
  34. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/set.py +0 -0
  35. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/vector.py +0 -0
  36. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/trainer/__init__.py +0 -0
  37. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/trainer/base_trainer.py +0 -0
  38. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/trainer/text_binary_classification_trainer.py +0 -0
  39. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/util/__init__.py +0 -0
  40. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/util/hash.py +0 -0
  41. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/SOURCES.txt +0 -0
  42. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/dependency_links.txt +0 -0
  43. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/requires.txt +0 -0
  44. {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/top_level.txt +0 -0
  45. {deeplotx-0.9.5 → deeplotx-0.9.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.9.5
3
+ Version: 0.9.7
4
4
  Summary: An out-of-the-box long-text NLP framework.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -27,6 +27,28 @@ Dynamic: license-file
27
27
 
28
28
  > Author: [vortezwohl](https://github.com/vortezwohl)
29
29
 
30
+ ## Citation
31
+
32
+ If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
33
+
34
+ Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
35
+
36
+ もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
37
+
38
+ 如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
39
+
40
+ ```bibtex
41
+ @software{Wu_DeepLoTX_2025,
42
+ author = {Wu, Zihao},
43
+ license = {GPL-3.0},
44
+ month = aug,
45
+ title = {{DeepLoTX}},
46
+ url = {https://github.com/vortezwohl/DeepLoTX},
47
+ version = {0.9.5},
48
+ year = {2025}
49
+ }
50
+ ```
51
+
30
52
  ## Installation
31
53
 
32
54
  - **With pip**
@@ -6,6 +6,28 @@
6
6
 
7
7
  > Author: [vortezwohl](https://github.com/vortezwohl)
8
8
 
9
+ ## Citation
10
+
11
+ If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
12
+
13
+ Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
14
+
15
+ もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
16
+
17
+ 如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
18
+
19
+ ```bibtex
20
+ @software{Wu_DeepLoTX_2025,
21
+ author = {Wu, Zihao},
22
+ license = {GPL-3.0},
23
+ month = aug,
24
+ title = {{DeepLoTX}},
25
+ url = {https://github.com/vortezwohl/DeepLoTX},
26
+ version = {0.9.5},
27
+ year = {2025}
28
+ }
29
+ ```
30
+
9
31
  ## Installation
10
32
 
11
33
  - **With pip**
@@ -12,7 +12,7 @@ from deeplotx.ner.named_entity import NamedEntity, NamedPerson
12
12
 
13
13
  CACHE_PATH = os.path.join(__ROOT__, '.cache')
14
14
  NEW_LINE, BLANK = '\n', ' '
15
- DEFAULT_LENGTH_THRESHOLD = 384
15
+ DEFAULT_LENGTH_THRESHOLD = 448
16
16
  DEFAULT_BERT_NER = 'Davlan/xlm-roberta-base-ner-hrl'
17
17
  N2G_MODEL: list[Name2Gender] = []
18
18
  logger = logging.getLogger('deeplotx.ner')
@@ -50,7 +50,7 @@ class BertNER(BaseNER):
50
50
 
51
51
  def _fast_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0) -> list[NamedEntity]:
52
52
  assert prob_threshold <= 1., f'prob_threshold ({prob_threshold}) cannot be larger than 1.'
53
- s = f' {s.replace(NEW_LINE, BLANK)} '
53
+ s = f' {s.replace(NEW_LINE, BLANK * 2)} '
54
54
  raw_entities = self._ner_pipeline(s)
55
55
  entities = []
56
56
  for ent in raw_entities:
@@ -60,7 +60,10 @@ class BertNER(BaseNER):
60
60
  if len(ent[0].strip()) < 1:
61
61
  del entities[i]
62
62
  if ent[1].upper().startswith('I') and entities[i - 1][1].upper().startswith('B'):
63
- entities[i - 1][0] += ent[0]
63
+ if entities[i - 1][0] + ent[0] in s:
64
+ entities[i - 1][0] += ent[0]
65
+ else:
66
+ entities[i - 1][0] += f' {ent[0]}'
64
67
  entities[i - 1][2] *= ent[2]
65
68
  del entities[i]
66
69
  _continue = False
@@ -71,6 +74,8 @@ class BertNER(BaseNER):
71
74
  break
72
75
  for ent in entities:
73
76
  ent[0] = ent[0].strip()
77
+ if len(ent[0]) < 1:
78
+ ent[0] = ' '
74
79
  # stripping
75
80
  while not ent[0][0].isalpha():
76
81
  if len(ent[0]) < 2:
@@ -102,18 +107,20 @@ class BertNER(BaseNER):
102
107
  gender_probability=gender_prob)
103
108
  return entities
104
109
 
105
- def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0, deduplicate: bool = True) -> list[NamedEntity]:
106
- _length_threshold = DEFAULT_LENGTH_THRESHOLD
110
+ def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0,
111
+ window_size: int = DEFAULT_LENGTH_THRESHOLD, deduplicate: bool = True) -> list[NamedEntity]:
107
112
  _s_seq = self.tokenizer.encode(s, add_special_tokens=False)
108
113
  _entities = self._fast_extract(self.tokenizer.decode(_s_seq, skip_special_tokens=True),
109
114
  with_gender=with_gender,
110
- prob_threshold=prob_threshold) if len(_s_seq) < _length_threshold else []
115
+ prob_threshold=prob_threshold) if len(_s_seq) < window_size else []
111
116
  # sliding window extracting
112
- if len(_s_seq) >= _length_threshold:
113
- _window_size = _length_threshold
114
- _stride = _length_threshold // 4
115
- for i in range(0, len(_s_seq) + _stride, _stride):
116
- _window_text = self.tokenizer.decode(_s_seq[i: i + _window_size], skip_special_tokens=True)
117
+ if len(_s_seq) >= window_size:
118
+ _stride = window_size // 4
119
+ for i in range(0, len(_s_seq) + window_size, _stride):
120
+ _tmp_s_seq = _s_seq[i: i + window_size]
121
+ if len(_tmp_s_seq) < 1:
122
+ continue
123
+ _window_text = self.tokenizer.decode(_tmp_s_seq, skip_special_tokens=True)
117
124
  _entities.extend(self._fast_extract(_window_text, with_gender=with_gender, prob_threshold=prob_threshold))
118
125
  # entity combination
119
126
  _tmp_entities = sorted(_entities, key=lambda x: len(x.text), reverse=True)
@@ -155,4 +162,5 @@ class BertNER(BaseNER):
155
162
  return self._fast_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold)
156
163
  else:
157
164
  return self._slow_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold,
165
+ window_size=kwargs.get('window_size', DEFAULT_LENGTH_THRESHOLD),
158
166
  deduplicate=kwargs.get('deduplicate', True))
@@ -13,6 +13,17 @@ def read_file(path: str, encoding: str = 'utf-8') -> str:
13
13
  pass
14
14
 
15
15
 
16
+ def write_file(content: str | bytes, path: str, encoding: str = 'utf-8') -> str:
17
+ os.makedirs(os.path.dirname(path), exist_ok=True)
18
+ if isinstance(content, bytes):
19
+ with open(path, mode='wb') as f:
20
+ f.write(content)
21
+ return path
22
+ with open(path, mode='w', encoding=encoding) as f:
23
+ f.write(content)
24
+ return path
25
+
26
+
16
27
  def get_files(path: str) -> list:
17
28
  if os.path.exists(path):
18
29
  entries = os.listdir(path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.9.5
3
+ Version: 0.9.7
4
4
  Summary: An out-of-the-box long-text NLP framework.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -27,6 +27,28 @@ Dynamic: license-file
27
27
 
28
28
  > Author: [vortezwohl](https://github.com/vortezwohl)
29
29
 
30
+ ## Citation
31
+
32
+ If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
33
+
34
+ Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
35
+
36
+ もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
37
+
38
+ 如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
39
+
40
+ ```bibtex
41
+ @software{Wu_DeepLoTX_2025,
42
+ author = {Wu, Zihao},
43
+ license = {GPL-3.0},
44
+ month = aug,
45
+ title = {{DeepLoTX}},
46
+ url = {https://github.com/vortezwohl/DeepLoTX},
47
+ version = {0.9.5},
48
+ year = {2025}
49
+ }
50
+ ```
51
+
30
52
  ## Installation
31
53
 
32
54
  - **With pip**
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "deeplotx"
3
- version = "0.9.5"
3
+ version = '0.9.7'
4
4
  description = "An out-of-the-box long-text NLP framework."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes