nlpertools 1.0.5__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlpertools/__init__.py +23 -20
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -55
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/cli.py +87 -0
- nlpertools/data_client.py +426 -257
- nlpertools/data_structure/base_structure.py +109 -13
- nlpertools/dataprocess.py +627 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/draw/__init__.py +0 -0
- nlpertools/draw/draw.py +83 -0
- nlpertools/draw/math_func.py +33 -0
- nlpertools/get_2fa.py +0 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +86 -36
- nlpertools/io/file.py +283 -222
- nlpertools/ml.py +511 -460
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -65
- nlpertools/other.py +475 -249
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -43
- nlpertools/reminder.py +98 -87
- nlpertools/utils/__init__.py +3 -3
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -76
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -93
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -96
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/LICENSE +200 -200
- nlpertools-1.0.8.dist-info/METADATA +132 -0
- nlpertools-1.0.8.dist-info/RECORD +49 -0
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/WHEEL +1 -1
- nlpertools-1.0.8.dist-info/entry_points.txt +2 -0
- nlpertools-1.0.8.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.5.dist-info/METADATA +0 -85
- nlpertools-1.0.5.dist-info/RECORD +0 -25
- nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/draw/draw.py
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
from ..utils.package import plt
|
5
|
+
|
6
|
+
|
7
|
+
def confused_matrix(confuse_matrix):
|
8
|
+
import seaborn as sns
|
9
|
+
sns.set()
|
10
|
+
f, ax = plt.subplots()
|
11
|
+
ticklabels = ["l1", "l2", "l31"]
|
12
|
+
sns.heatmap(confuse_matrix, annot=True, fmt=".3g", ax=ax, cmap='rainbow',
|
13
|
+
xticklabels=ticklabels, yticklabels=ticklabels) # 画热力图
|
14
|
+
|
15
|
+
ax.set_title('confusion matrix') # 标题
|
16
|
+
ax.set_xlabel('predict') # x轴
|
17
|
+
ax.set_ylabel('true') # y轴
|
18
|
+
plt.show()
|
19
|
+
|
20
|
+
f.savefig('tmp.jpg', bbox_inches='tight')
|
21
|
+
|
22
|
+
|
23
|
+
def plot_histogram(data, bin_size):
|
24
|
+
"""
|
25
|
+
画直方图,超过1000的统一按1000算
|
26
|
+
:param data:
|
27
|
+
:param bin_size:
|
28
|
+
:return:
|
29
|
+
"""
|
30
|
+
import matplotlib.pyplot as plt
|
31
|
+
import numpy as np
|
32
|
+
import pandas as pd
|
33
|
+
from matplotlib.ticker import MaxNLocator
|
34
|
+
# 将超过1000的值改为1000
|
35
|
+
def process_lengths(data):
|
36
|
+
return [length if length <= 1000 else 1003 for length in data]
|
37
|
+
|
38
|
+
# 前闭后开
|
39
|
+
min_num, max_num = 0, 1000
|
40
|
+
# min_num, max_num = min(data), max(data)
|
41
|
+
|
42
|
+
plt.figure(figsize=(12, 8))
|
43
|
+
processed_data = process_lengths(data)
|
44
|
+
bins = np.arange(0, 1000 + 2 * bin_size, bin_size)
|
45
|
+
# 绘制直方图
|
46
|
+
n, new_bins, patches = plt.hist(processed_data, bins=bins, edgecolor='black', color='skyblue', alpha=0.7,
|
47
|
+
linewidth=0)
|
48
|
+
|
49
|
+
# 添加"∞"的标签
|
50
|
+
# bins会改变
|
51
|
+
plt.gca().set_xticks(bins)
|
52
|
+
plt.gca().set_xticklabels([str(i) for i in plt.xticks()[0][:-1]] + ["∞"])
|
53
|
+
|
54
|
+
mean_val = np.mean(data)
|
55
|
+
plt.axvline(mean_val, color='red', linestyle='dashed', linewidth=1)
|
56
|
+
plt.text(mean_val + bin_size / 10, max(n) * 0.9, f'Mean: {mean_val:.2f}', color='red')
|
57
|
+
|
58
|
+
# 添加标题和标签
|
59
|
+
plt.title('Module Line Number Distribution', fontsize=16, fontweight='bold')
|
60
|
+
plt.xlabel('module line number', fontsize=14)
|
61
|
+
plt.ylabel('frequency', fontsize=14)
|
62
|
+
|
63
|
+
# 添加网格
|
64
|
+
plt.grid(True, linestyle='--', alpha=0.6)
|
65
|
+
|
66
|
+
# 美化x轴和y轴的刻度
|
67
|
+
plt.xticks(fontsize=12)
|
68
|
+
plt.yticks(fontsize=12)
|
69
|
+
|
70
|
+
# 在每个柱状图上显示数值
|
71
|
+
for i in range(len(patches)):
|
72
|
+
plt.text(patches[i].get_x() + patches[i].get_width() / 2, patches[i].get_height(),
|
73
|
+
str(int(n[i])), ha='center', va='bottom', fontsize=12)
|
74
|
+
plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))
|
75
|
+
# 显示图表
|
76
|
+
plt.show()
|
77
|
+
|
78
|
+
|
79
|
+
if __name__ == '__main__':
|
80
|
+
# 调整区间大小
|
81
|
+
bin_size = 50
|
82
|
+
# 示例模块长度数据
|
83
|
+
plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# 数学函数
|
2
|
+
def draw_log():
|
3
|
+
import matplotlib.pyplot as plt
|
4
|
+
import numpy as np
|
5
|
+
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
|
6
|
+
|
7
|
+
# 生成一些数据
|
8
|
+
x = np.linspace(0.1, 10, 100)
|
9
|
+
# 默认log指的时loge
|
10
|
+
y = np.log(x)
|
11
|
+
|
12
|
+
# 创建一个新的图形和轴
|
13
|
+
fig, ax = plt.subplots()
|
14
|
+
|
15
|
+
# 绘制log图像
|
16
|
+
ax.plot(x, y)
|
17
|
+
|
18
|
+
# 设置图像标题和轴标签
|
19
|
+
ax.set_title("Logarithmic Function")
|
20
|
+
ax.set_xlabel("x")
|
21
|
+
ax.set_ylabel("log(x)")
|
22
|
+
# 设置横坐标的刻度间隔为1
|
23
|
+
ax.xaxis.set_major_locator(MultipleLocator(1))
|
24
|
+
|
25
|
+
# 设置横坐标的刻度格式
|
26
|
+
ax.xaxis.set_major_formatter(FormatStrFormatter("%.1f"))
|
27
|
+
# 添加x=1的虚线
|
28
|
+
ax.axvline(x=1, linestyle="--", color="gray")
|
29
|
+
# 添加y=1的虚线
|
30
|
+
ax.axhline(y=0, linestyle="--", color="gray")
|
31
|
+
|
32
|
+
# 显示图像
|
33
|
+
plt.show()
|
nlpertools/get_2fa.py
ADDED
File without changes
|
nlpertools/io/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
nlpertools/io/dir.py
CHANGED
@@ -1,36 +1,86 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
4
|
-
import os
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
return
|
21
|
-
|
22
|
-
|
23
|
-
def
|
24
|
-
#
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import os
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
|
8
|
+
# dir ----------------------------------------------------------------------
|
9
|
+
def j_mkdir(name):
|
10
|
+
os.makedirs(name, exist_ok=True)
|
11
|
+
|
12
|
+
|
13
|
+
def j_walk(name, suffix=None):
|
14
|
+
paths = []
|
15
|
+
for root, dirs, files in os.walk(name):
|
16
|
+
for file in files:
|
17
|
+
path = os.path.join(root, file)
|
18
|
+
if not (suffix and not path.endswith(suffix)):
|
19
|
+
paths.append(path)
|
20
|
+
return paths
|
21
|
+
|
22
|
+
|
23
|
+
def windows_to_wsl_path(windows_path):
|
24
|
+
# 转换驱动器号
|
25
|
+
if windows_path[1:3] == ':\\':
|
26
|
+
drive_letter = windows_path[0].lower()
|
27
|
+
path = windows_path[2:].replace('\\', '/')
|
28
|
+
wsl_path = f'/mnt/{drive_letter}{path}'
|
29
|
+
else:
|
30
|
+
# 如果路径不是以驱动器号开头,则直接替换路径分隔符
|
31
|
+
wsl_path = windows_path.replace('\\', '/').replace("'", "\'")
|
32
|
+
|
33
|
+
return wsl_path
|
34
|
+
|
35
|
+
|
36
|
+
def get_filename(path, suffix=True) -> str:
|
37
|
+
"""
|
38
|
+
返回路径最后的文件名
|
39
|
+
:param path:
|
40
|
+
:return:
|
41
|
+
"""
|
42
|
+
# path = r'***/**/***.txt'
|
43
|
+
filename = os.path.split(path)[-1]
|
44
|
+
if not suffix:
|
45
|
+
filename = filename.split('.')[0]
|
46
|
+
return filename
|
47
|
+
|
48
|
+
|
49
|
+
def j_listdir(dir_name, including_dir=True):
|
50
|
+
filenames = os.listdir(dir_name)
|
51
|
+
if including_dir:
|
52
|
+
return [os.path.join(dir_name, filename) for filename in filenames]
|
53
|
+
else:
|
54
|
+
return list(filenames)
|
55
|
+
|
56
|
+
|
57
|
+
def j_listdir_yield(dir_name, including_dir=True):
|
58
|
+
filenames = os.listdir(dir_name)
|
59
|
+
for filename in filenames:
|
60
|
+
if including_dir:
|
61
|
+
yield os.path.join(dir_name, filename)
|
62
|
+
else:
|
63
|
+
yield filename
|
64
|
+
|
65
|
+
|
66
|
+
# 合并文件 TODO 还没写
|
67
|
+
def imgrate_files(path):
|
68
|
+
filenames = os.listdir(path)
|
69
|
+
return None
|
70
|
+
|
71
|
+
|
72
|
+
def case_sensitive_path_exists(path: str, relative_path=False):
|
73
|
+
"""
|
74
|
+
https://juejin.cn/post/7316725867086692391
|
75
|
+
Check if the path exists in a case-sensitive manner.
|
76
|
+
"""
|
77
|
+
# 构造成Path
|
78
|
+
if relative_path:
|
79
|
+
path = Path.cwd() / path
|
80
|
+
else:
|
81
|
+
path = Path(path)
|
82
|
+
if not path.exists():
|
83
|
+
return False
|
84
|
+
# resolved_path是系统里的该文件实际名称
|
85
|
+
resolved_path = path.resolve()
|
86
|
+
return str(resolved_path) == str(path)
|