deeplotx 0.9.5__tar.gz → 0.9.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deeplotx-0.9.5 → deeplotx-0.9.6}/PKG-INFO +23 -1
- {deeplotx-0.9.5 → deeplotx-0.9.6}/README.md +22 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/bert_ner.py +17 -11
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/PKG-INFO +23 -1
- {deeplotx-0.9.5 → deeplotx-0.9.6}/pyproject.toml +1 -1
- {deeplotx-0.9.5 → deeplotx-0.9.6}/LICENSE +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/long_text_encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/encoder/longformer_encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/base_ner.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/n2g/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/ner/named_entity.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/attention.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/auto_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/base_neural_network.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/feed_forward.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/linear_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/logistic_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/long_context_auto_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/long_context_recursive_sequential.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/multi_head_attention.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/multi_head_feed_forward.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/recursive_sequential.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/roformer_encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/rope.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/nn/softmax_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/distribution.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/set.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/similarity/vector.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/trainer/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/trainer/base_trainer.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/trainer/text_binary_classification_trainer.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/util/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/util/hash.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx/util/read_file.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/SOURCES.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/dependency_links.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/requires.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/deeplotx.egg-info/top_level.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.6
|
4
4
|
Summary: An out-of-the-box long-text NLP framework.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -27,6 +27,28 @@ Dynamic: license-file
|
|
27
27
|
|
28
28
|
> Author: [vortezwohl](https://github.com/vortezwohl)
|
29
29
|
|
30
|
+
## Citation
|
31
|
+
|
32
|
+
If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
|
33
|
+
|
34
|
+
Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
|
35
|
+
|
36
|
+
もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
|
37
|
+
|
38
|
+
如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
|
39
|
+
|
40
|
+
```bibtex
|
41
|
+
@software{Wu_DeepLoTX_2025,
|
42
|
+
author = {Wu, Zihao},
|
43
|
+
license = {GPL-3.0},
|
44
|
+
month = aug,
|
45
|
+
title = {{DeepLoTX}},
|
46
|
+
url = {https://github.com/vortezwohl/DeepLoTX},
|
47
|
+
version = {0.9.5},
|
48
|
+
year = {2025}
|
49
|
+
}
|
50
|
+
```
|
51
|
+
|
30
52
|
## Installation
|
31
53
|
|
32
54
|
- **With pip**
|
@@ -6,6 +6,28 @@
|
|
6
6
|
|
7
7
|
> Author: [vortezwohl](https://github.com/vortezwohl)
|
8
8
|
|
9
|
+
## Citation
|
10
|
+
|
11
|
+
If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
|
12
|
+
|
13
|
+
Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
|
14
|
+
|
15
|
+
もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
|
16
|
+
|
17
|
+
如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
|
18
|
+
|
19
|
+
```bibtex
|
20
|
+
@software{Wu_DeepLoTX_2025,
|
21
|
+
author = {Wu, Zihao},
|
22
|
+
license = {GPL-3.0},
|
23
|
+
month = aug,
|
24
|
+
title = {{DeepLoTX}},
|
25
|
+
url = {https://github.com/vortezwohl/DeepLoTX},
|
26
|
+
version = {0.9.5},
|
27
|
+
year = {2025}
|
28
|
+
}
|
29
|
+
```
|
30
|
+
|
9
31
|
## Installation
|
10
32
|
|
11
33
|
- **With pip**
|
@@ -12,7 +12,7 @@ from deeplotx.ner.named_entity import NamedEntity, NamedPerson
|
|
12
12
|
|
13
13
|
CACHE_PATH = os.path.join(__ROOT__, '.cache')
|
14
14
|
NEW_LINE, BLANK = '\n', ' '
|
15
|
-
DEFAULT_LENGTH_THRESHOLD =
|
15
|
+
DEFAULT_LENGTH_THRESHOLD = 448
|
16
16
|
DEFAULT_BERT_NER = 'Davlan/xlm-roberta-base-ner-hrl'
|
17
17
|
N2G_MODEL: list[Name2Gender] = []
|
18
18
|
logger = logging.getLogger('deeplotx.ner')
|
@@ -50,7 +50,7 @@ class BertNER(BaseNER):
|
|
50
50
|
|
51
51
|
def _fast_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0) -> list[NamedEntity]:
|
52
52
|
assert prob_threshold <= 1., f'prob_threshold ({prob_threshold}) cannot be larger than 1.'
|
53
|
-
s = f' {s.replace(NEW_LINE, BLANK)} '
|
53
|
+
s = f' {s.replace(NEW_LINE, BLANK * 2)} '
|
54
54
|
raw_entities = self._ner_pipeline(s)
|
55
55
|
entities = []
|
56
56
|
for ent in raw_entities:
|
@@ -60,7 +60,10 @@ class BertNER(BaseNER):
|
|
60
60
|
if len(ent[0].strip()) < 1:
|
61
61
|
del entities[i]
|
62
62
|
if ent[1].upper().startswith('I') and entities[i - 1][1].upper().startswith('B'):
|
63
|
-
entities[i - 1][0]
|
63
|
+
if entities[i - 1][0] + ent[0] in s:
|
64
|
+
entities[i - 1][0] += ent[0]
|
65
|
+
else:
|
66
|
+
entities[i - 1][0] += f' {ent[0]}'
|
64
67
|
entities[i - 1][2] *= ent[2]
|
65
68
|
del entities[i]
|
66
69
|
_continue = False
|
@@ -102,18 +105,20 @@ class BertNER(BaseNER):
|
|
102
105
|
gender_probability=gender_prob)
|
103
106
|
return entities
|
104
107
|
|
105
|
-
def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0,
|
106
|
-
|
108
|
+
def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0,
|
109
|
+
window_size: int = DEFAULT_LENGTH_THRESHOLD, deduplicate: bool = True) -> list[NamedEntity]:
|
107
110
|
_s_seq = self.tokenizer.encode(s, add_special_tokens=False)
|
108
111
|
_entities = self._fast_extract(self.tokenizer.decode(_s_seq, skip_special_tokens=True),
|
109
112
|
with_gender=with_gender,
|
110
|
-
prob_threshold=prob_threshold) if len(_s_seq) <
|
113
|
+
prob_threshold=prob_threshold) if len(_s_seq) < window_size else []
|
111
114
|
# sliding window extracting
|
112
|
-
if len(_s_seq) >=
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
115
|
+
if len(_s_seq) >= window_size:
|
116
|
+
_stride = window_size // 4
|
117
|
+
for i in range(0, len(_s_seq) + window_size, _stride):
|
118
|
+
_tmp_s_seq = _s_seq[i: i + window_size]
|
119
|
+
if len(_tmp_s_seq) < 1:
|
120
|
+
continue
|
121
|
+
_window_text = self.tokenizer.decode(_tmp_s_seq, skip_special_tokens=True)
|
117
122
|
_entities.extend(self._fast_extract(_window_text, with_gender=with_gender, prob_threshold=prob_threshold))
|
118
123
|
# entity combination
|
119
124
|
_tmp_entities = sorted(_entities, key=lambda x: len(x.text), reverse=True)
|
@@ -155,4 +160,5 @@ class BertNER(BaseNER):
|
|
155
160
|
return self._fast_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold)
|
156
161
|
else:
|
157
162
|
return self._slow_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold,
|
163
|
+
window_size=kwargs.get('window_size', DEFAULT_LENGTH_THRESHOLD),
|
158
164
|
deduplicate=kwargs.get('deduplicate', True))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.6
|
4
4
|
Summary: An out-of-the-box long-text NLP framework.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -27,6 +27,28 @@ Dynamic: license-file
|
|
27
27
|
|
28
28
|
> Author: [vortezwohl](https://github.com/vortezwohl)
|
29
29
|
|
30
|
+
## Citation
|
31
|
+
|
32
|
+
If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
|
33
|
+
|
34
|
+
Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
|
35
|
+
|
36
|
+
もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
|
37
|
+
|
38
|
+
如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
|
39
|
+
|
40
|
+
```bibtex
|
41
|
+
@software{Wu_DeepLoTX_2025,
|
42
|
+
author = {Wu, Zihao},
|
43
|
+
license = {GPL-3.0},
|
44
|
+
month = aug,
|
45
|
+
title = {{DeepLoTX}},
|
46
|
+
url = {https://github.com/vortezwohl/DeepLoTX},
|
47
|
+
version = {0.9.5},
|
48
|
+
year = {2025}
|
49
|
+
}
|
50
|
+
```
|
51
|
+
|
30
52
|
## Installation
|
31
53
|
|
32
54
|
- **With pip**
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|