deeplotx 0.9.5__tar.gz → 0.9.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deeplotx-0.9.5 → deeplotx-0.9.7}/PKG-INFO +23 -1
- {deeplotx-0.9.5 → deeplotx-0.9.7}/README.md +22 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/bert_ner.py +19 -11
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/util/read_file.py +11 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/PKG-INFO +23 -1
- {deeplotx-0.9.5 → deeplotx-0.9.7}/pyproject.toml +1 -1
- {deeplotx-0.9.5 → deeplotx-0.9.7}/LICENSE +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/long_text_encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/encoder/longformer_encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/base_ner.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/n2g/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/ner/named_entity.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/attention.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/auto_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/base_neural_network.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/feed_forward.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/linear_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/logistic_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/long_context_auto_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/long_context_recursive_sequential.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/multi_head_attention.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/multi_head_feed_forward.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/recursive_sequential.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/roformer_encoder.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/rope.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/nn/softmax_regression.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/distribution.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/set.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/similarity/vector.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/trainer/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/trainer/base_trainer.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/trainer/text_binary_classification_trainer.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/util/__init__.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx/util/hash.py +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/SOURCES.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/dependency_links.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/requires.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/deeplotx.egg-info/top_level.txt +0 -0
- {deeplotx-0.9.5 → deeplotx-0.9.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.7
|
4
4
|
Summary: An out-of-the-box long-text NLP framework.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -27,6 +27,28 @@ Dynamic: license-file
|
|
27
27
|
|
28
28
|
> Author: [vortezwohl](https://github.com/vortezwohl)
|
29
29
|
|
30
|
+
## Citation
|
31
|
+
|
32
|
+
If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
|
33
|
+
|
34
|
+
Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
|
35
|
+
|
36
|
+
もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
|
37
|
+
|
38
|
+
如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
|
39
|
+
|
40
|
+
```bibtex
|
41
|
+
@software{Wu_DeepLoTX_2025,
|
42
|
+
author = {Wu, Zihao},
|
43
|
+
license = {GPL-3.0},
|
44
|
+
month = aug,
|
45
|
+
title = {{DeepLoTX}},
|
46
|
+
url = {https://github.com/vortezwohl/DeepLoTX},
|
47
|
+
version = {0.9.5},
|
48
|
+
year = {2025}
|
49
|
+
}
|
50
|
+
```
|
51
|
+
|
30
52
|
## Installation
|
31
53
|
|
32
54
|
- **With pip**
|
@@ -6,6 +6,28 @@
|
|
6
6
|
|
7
7
|
> Author: [vortezwohl](https://github.com/vortezwohl)
|
8
8
|
|
9
|
+
## Citation
|
10
|
+
|
11
|
+
If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
|
12
|
+
|
13
|
+
Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
|
14
|
+
|
15
|
+
もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
|
16
|
+
|
17
|
+
如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
|
18
|
+
|
19
|
+
```bibtex
|
20
|
+
@software{Wu_DeepLoTX_2025,
|
21
|
+
author = {Wu, Zihao},
|
22
|
+
license = {GPL-3.0},
|
23
|
+
month = aug,
|
24
|
+
title = {{DeepLoTX}},
|
25
|
+
url = {https://github.com/vortezwohl/DeepLoTX},
|
26
|
+
version = {0.9.5},
|
27
|
+
year = {2025}
|
28
|
+
}
|
29
|
+
```
|
30
|
+
|
9
31
|
## Installation
|
10
32
|
|
11
33
|
- **With pip**
|
@@ -12,7 +12,7 @@ from deeplotx.ner.named_entity import NamedEntity, NamedPerson
|
|
12
12
|
|
13
13
|
CACHE_PATH = os.path.join(__ROOT__, '.cache')
|
14
14
|
NEW_LINE, BLANK = '\n', ' '
|
15
|
-
DEFAULT_LENGTH_THRESHOLD =
|
15
|
+
DEFAULT_LENGTH_THRESHOLD = 448
|
16
16
|
DEFAULT_BERT_NER = 'Davlan/xlm-roberta-base-ner-hrl'
|
17
17
|
N2G_MODEL: list[Name2Gender] = []
|
18
18
|
logger = logging.getLogger('deeplotx.ner')
|
@@ -50,7 +50,7 @@ class BertNER(BaseNER):
|
|
50
50
|
|
51
51
|
def _fast_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0) -> list[NamedEntity]:
|
52
52
|
assert prob_threshold <= 1., f'prob_threshold ({prob_threshold}) cannot be larger than 1.'
|
53
|
-
s = f' {s.replace(NEW_LINE, BLANK)} '
|
53
|
+
s = f' {s.replace(NEW_LINE, BLANK * 2)} '
|
54
54
|
raw_entities = self._ner_pipeline(s)
|
55
55
|
entities = []
|
56
56
|
for ent in raw_entities:
|
@@ -60,7 +60,10 @@ class BertNER(BaseNER):
|
|
60
60
|
if len(ent[0].strip()) < 1:
|
61
61
|
del entities[i]
|
62
62
|
if ent[1].upper().startswith('I') and entities[i - 1][1].upper().startswith('B'):
|
63
|
-
entities[i - 1][0]
|
63
|
+
if entities[i - 1][0] + ent[0] in s:
|
64
|
+
entities[i - 1][0] += ent[0]
|
65
|
+
else:
|
66
|
+
entities[i - 1][0] += f' {ent[0]}'
|
64
67
|
entities[i - 1][2] *= ent[2]
|
65
68
|
del entities[i]
|
66
69
|
_continue = False
|
@@ -71,6 +74,8 @@ class BertNER(BaseNER):
|
|
71
74
|
break
|
72
75
|
for ent in entities:
|
73
76
|
ent[0] = ent[0].strip()
|
77
|
+
if len(ent[0]) < 1:
|
78
|
+
ent[0] = ' '
|
74
79
|
# stripping
|
75
80
|
while not ent[0][0].isalpha():
|
76
81
|
if len(ent[0]) < 2:
|
@@ -102,18 +107,20 @@ class BertNER(BaseNER):
|
|
102
107
|
gender_probability=gender_prob)
|
103
108
|
return entities
|
104
109
|
|
105
|
-
def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0,
|
106
|
-
|
110
|
+
def _slow_extract(self, s: str, with_gender: bool = True, prob_threshold: float = .0,
|
111
|
+
window_size: int = DEFAULT_LENGTH_THRESHOLD, deduplicate: bool = True) -> list[NamedEntity]:
|
107
112
|
_s_seq = self.tokenizer.encode(s, add_special_tokens=False)
|
108
113
|
_entities = self._fast_extract(self.tokenizer.decode(_s_seq, skip_special_tokens=True),
|
109
114
|
with_gender=with_gender,
|
110
|
-
prob_threshold=prob_threshold) if len(_s_seq) <
|
115
|
+
prob_threshold=prob_threshold) if len(_s_seq) < window_size else []
|
111
116
|
# sliding window extracting
|
112
|
-
if len(_s_seq) >=
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
+
if len(_s_seq) >= window_size:
|
118
|
+
_stride = window_size // 4
|
119
|
+
for i in range(0, len(_s_seq) + window_size, _stride):
|
120
|
+
_tmp_s_seq = _s_seq[i: i + window_size]
|
121
|
+
if len(_tmp_s_seq) < 1:
|
122
|
+
continue
|
123
|
+
_window_text = self.tokenizer.decode(_tmp_s_seq, skip_special_tokens=True)
|
117
124
|
_entities.extend(self._fast_extract(_window_text, with_gender=with_gender, prob_threshold=prob_threshold))
|
118
125
|
# entity combination
|
119
126
|
_tmp_entities = sorted(_entities, key=lambda x: len(x.text), reverse=True)
|
@@ -155,4 +162,5 @@ class BertNER(BaseNER):
|
|
155
162
|
return self._fast_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold)
|
156
163
|
else:
|
157
164
|
return self._slow_extract(s=s, with_gender=with_gender, prob_threshold=prob_threshold,
|
165
|
+
window_size=kwargs.get('window_size', DEFAULT_LENGTH_THRESHOLD),
|
158
166
|
deduplicate=kwargs.get('deduplicate', True))
|
@@ -13,6 +13,17 @@ def read_file(path: str, encoding: str = 'utf-8') -> str:
|
|
13
13
|
pass
|
14
14
|
|
15
15
|
|
16
|
+
def write_file(content: str | bytes, path: str, encoding: str = 'utf-8') -> str:
|
17
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
18
|
+
if isinstance(content, bytes):
|
19
|
+
with open(path, mode='wb') as f:
|
20
|
+
f.write(content)
|
21
|
+
return path
|
22
|
+
with open(path, mode='w', encoding=encoding) as f:
|
23
|
+
f.write(content)
|
24
|
+
return path
|
25
|
+
|
26
|
+
|
16
27
|
def get_files(path: str) -> list:
|
17
28
|
if os.path.exists(path):
|
18
29
|
entries = os.listdir(path)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: deeplotx
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.7
|
4
4
|
Summary: An out-of-the-box long-text NLP framework.
|
5
5
|
Requires-Python: >=3.10
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -27,6 +27,28 @@ Dynamic: license-file
|
|
27
27
|
|
28
28
|
> Author: [vortezwohl](https://github.com/vortezwohl)
|
29
29
|
|
30
|
+
## Citation
|
31
|
+
|
32
|
+
If you are incorporating the `DeepLoTX` framework into your research, please remember to properly cite it to acknowledge its contribution to your work.
|
33
|
+
|
34
|
+
Если вы интегрируете фреймворк `DeepLoTX` в своё исследование, пожалуйста, не забудьте правильно сослаться на него, указывая его вклад в вашу работу.
|
35
|
+
|
36
|
+
もしあなたが研究に `DeepLoTX` フレームワークを組み入れているなら、その貢献を認めるために適切に引用することを忘れないでください.
|
37
|
+
|
38
|
+
如果您正在將 `DeepLoTX` 框架整合到您的研究中,請務必正確引用它,以聲明它對您工作的貢獻.
|
39
|
+
|
40
|
+
```bibtex
|
41
|
+
@software{Wu_DeepLoTX_2025,
|
42
|
+
author = {Wu, Zihao},
|
43
|
+
license = {GPL-3.0},
|
44
|
+
month = aug,
|
45
|
+
title = {{DeepLoTX}},
|
46
|
+
url = {https://github.com/vortezwohl/DeepLoTX},
|
47
|
+
version = {0.9.5},
|
48
|
+
year = {2025}
|
49
|
+
}
|
50
|
+
```
|
51
|
+
|
30
52
|
## Installation
|
31
53
|
|
32
54
|
- **With pip**
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|