nlpertools 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlpertools/__init__.py +2 -1
- nlpertools/cli.py +14 -17
- nlpertools/dataprocess/__init__.py +1 -0
- nlpertools/dataprocess/dedupl.py +9 -0
- nlpertools/{dataprocess.py → dataprocess/dp_main.py} +1 -1
- nlpertools/io/dir.py +25 -5
- nlpertools/io/file.py +46 -43
- nlpertools/llm/__init__.py +3 -0
- nlpertools/llm/call_llm_once.py +34 -4
- nlpertools/llm/infer.py +50 -5
- nlpertools/other.py +77 -51
- nlpertools/utils/package.py +9 -10
- nlpertools/wrapper.py +6 -4
- {nlpertools-1.0.10.dist-info → nlpertools-1.0.11.dist-info}/METADATA +3 -1
- {nlpertools-1.0.10.dist-info → nlpertools-1.0.11.dist-info}/RECORD +19 -17
- {nlpertools-1.0.10.dist-info → nlpertools-1.0.11.dist-info}/WHEEL +0 -0
- {nlpertools-1.0.10.dist-info → nlpertools-1.0.11.dist-info}/entry_points.txt +0 -0
- {nlpertools-1.0.10.dist-info → nlpertools-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {nlpertools-1.0.10.dist-info → nlpertools-1.0.11.dist-info}/top_level.txt +0 -0
nlpertools/__init__.py
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
from .algo.kmp import *
|
5
5
|
from .data_structure.base_structure import *
|
6
6
|
from .draw import *
|
7
|
+
from .dataprocess.dp_main import *
|
7
8
|
from .dataprocess import *
|
8
9
|
from .io.dir import *
|
9
10
|
from .io.file import *
|
@@ -20,4 +21,4 @@ from .cli import *
|
|
20
21
|
from .llm import *
|
21
22
|
|
22
23
|
|
23
|
-
__version__ = "1.0.
|
24
|
+
__version__ = "1.0.11"
|
nlpertools/cli.py
CHANGED
@@ -2,7 +2,7 @@ import argparse
|
|
2
2
|
import os
|
3
3
|
import uuid
|
4
4
|
import sys
|
5
|
-
from .dataprocess import startwith
|
5
|
+
from .dataprocess.dp_main import startwith
|
6
6
|
|
7
7
|
|
8
8
|
def run_git_command(command):
|
@@ -17,7 +17,7 @@ def run_git_command(command):
|
|
17
17
|
info = os.system(command)
|
18
18
|
print(str(info))
|
19
19
|
# 检查命令执行结果,若未出现错误则认为执行成功
|
20
|
-
if not startwith(str(info), ["fatal", "error", "128", "1"]):
|
20
|
+
if (not startwith(str(info), ["fatal", "error", "128", "1"])) and "fatal" not in str(info):
|
21
21
|
print("success")
|
22
22
|
print(f"success info : ##{info}##")
|
23
23
|
break
|
@@ -25,7 +25,7 @@ def run_git_command(command):
|
|
25
25
|
|
26
26
|
def get_mac_address():
|
27
27
|
mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
|
28
|
-
mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
|
28
|
+
mac_address = ":".join([mac[e : e + 2] for e in range(0, 11, 2)])
|
29
29
|
print("mac address 不一定准确")
|
30
30
|
print(mac_address)
|
31
31
|
return mac_address
|
@@ -33,6 +33,7 @@ def get_mac_address():
|
|
33
33
|
|
34
34
|
def get_2af_value(key):
|
35
35
|
import pyotp
|
36
|
+
|
36
37
|
"""
|
37
38
|
key应该是7位的
|
38
39
|
"""
|
@@ -65,15 +66,11 @@ def start_gpu_usage_notify_client():
|
|
65
66
|
from plyer import notification
|
66
67
|
import time
|
67
68
|
|
68
|
-
SERVER_URL =
|
69
|
+
SERVER_URL = "http://127.0.0.1:5000/notify" # 服务器的 API 地址
|
69
70
|
|
70
71
|
def notify(text):
|
71
72
|
# 使用 plyer 发送通知
|
72
|
-
notification.notify(
|
73
|
-
title='远程通知',
|
74
|
-
message=text,
|
75
|
-
timeout=10 # 10秒的通知显示时间
|
76
|
-
)
|
73
|
+
notification.notify(title="远程通知", message=text, timeout=10) # 10秒的通知显示时间
|
77
74
|
|
78
75
|
"""定时轮询服务器获取通知"""
|
79
76
|
while True:
|
@@ -94,12 +91,12 @@ def start_gpu_usage_notify_client():
|
|
94
91
|
|
95
92
|
def main():
|
96
93
|
parser = argparse.ArgumentParser(description="CLI tool for git operations and other functions.")
|
97
|
-
parser.add_argument(
|
98
|
-
parser.add_argument(
|
99
|
-
parser.add_argument(
|
100
|
-
parser.add_argument(
|
101
|
-
parser.add_argument(
|
102
|
-
parser.add_argument(
|
94
|
+
parser.add_argument("git_command", nargs="*", help="Any git command (e.g., push, pull)")
|
95
|
+
parser.add_argument("--mac_address", action="store_true", help="Get the MAC address.")
|
96
|
+
parser.add_argument("--get_2fa", action="store_true", help="Get the 2fa value.")
|
97
|
+
parser.add_argument("--get_2fa_key", type=str, help="Get the 2fa value.")
|
98
|
+
parser.add_argument("--monitor_gpu_cli", action="store_true", help="monitor gpu cli")
|
99
|
+
parser.add_argument("--monitor_gpu_ser", action="store_true", help="monitor gpu ser")
|
103
100
|
|
104
101
|
args = parser.parse_args()
|
105
102
|
|
@@ -121,5 +118,5 @@ def main():
|
|
121
118
|
print("No operation specified.")
|
122
119
|
|
123
120
|
|
124
|
-
if __name__ ==
|
125
|
-
main()
|
121
|
+
if __name__ == "__main__":
|
122
|
+
main()
|
@@ -0,0 +1 @@
|
|
1
|
+
from .dedupl import *
|
@@ -8,7 +8,7 @@ from typing import List
|
|
8
8
|
import numpy as np
|
9
9
|
|
10
10
|
# from . import DB_CONFIG_FILE # cannot import name 'DB_CONFIG_FILE' from partially initialized module 'nlpertools'
|
11
|
-
from
|
11
|
+
from ..utils.package import *
|
12
12
|
|
13
13
|
main_special_characters = string.punctuation + string.digits + string.whitespace
|
14
14
|
other_special_characters = (
|
nlpertools/io/dir.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
# @Author : youshu.Ji
|
4
4
|
import os
|
5
5
|
from pathlib import Path
|
6
|
+
from typing import overload,Literal,Union
|
6
7
|
|
7
8
|
|
8
9
|
# dir ----------------------------------------------------------------------
|
@@ -45,15 +46,34 @@ def get_filename(path, suffix=True) -> str:
|
|
45
46
|
filename = filename.split('.')[0]
|
46
47
|
return filename
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
|
49
|
+
"""
|
50
|
+
因为os.listdir无法支持Path类型,虽然是bytelikepath,但是传入Path后只会返回字符串
|
51
|
+
且无法只返回文件名
|
52
|
+
故重新实现
|
53
|
+
"""
|
54
|
+
@overload
|
55
|
+
def listdir(dir_name: Path, including_dir: Literal[True]) -> list[Path]: ...
|
56
|
+
@overload
|
57
|
+
def listdir(dir_name: str, including_dir: Literal[True]) -> list[str]: ...
|
58
|
+
@overload
|
59
|
+
def listdir(dir_name: Path, including_dir: Literal[False] = False) -> list[str]: ...
|
60
|
+
@overload
|
61
|
+
def listdir(dir_name: str, including_dir: Literal[False] = False) -> list[str]: ...
|
62
|
+
|
63
|
+
def listdir(dir_name: Union[Path, str], including_dir: bool = False) -> list[Path] | list[str]:
|
64
|
+
"""
|
65
|
+
including_dir=True -> list[Path] or list[str]
|
66
|
+
including_dir=False -> list[str]
|
67
|
+
"""
|
68
|
+
filenames = os.listdir(str(dir_name))
|
51
69
|
if including_dir:
|
52
|
-
|
70
|
+
if isinstance(dir_name, Path):
|
71
|
+
return [dir_name / filename for filename in filenames]
|
72
|
+
else:
|
73
|
+
return [os.path.join(dir_name, filename) for filename in filenames]
|
53
74
|
else:
|
54
75
|
return list(filenames)
|
55
76
|
|
56
|
-
|
57
77
|
def listdir_yield(dir_name, including_dir=True):
|
58
78
|
filenames = os.listdir(dir_name)
|
59
79
|
for filename in filenames:
|
nlpertools/io/file.py
CHANGED
@@ -5,8 +5,11 @@ import codecs
|
|
5
5
|
import json
|
6
6
|
import pickle
|
7
7
|
import random
|
8
|
-
from itertools import
|
8
|
+
from itertools import takewhile, repeat
|
9
|
+
from typing import Optional
|
10
|
+
from pathlib import Path
|
9
11
|
import pandas as pd
|
12
|
+
|
10
13
|
# import omegaconf
|
11
14
|
# import yaml
|
12
15
|
from ..utils.package import *
|
@@ -15,18 +18,18 @@ LARGE_FILE_THRESHOLD = 1e5
|
|
15
18
|
|
16
19
|
|
17
20
|
def safe_filename(filename: str) -> str:
|
18
|
-
for char in [
|
19
|
-
filename = filename.replace(char,
|
21
|
+
for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
|
22
|
+
filename = filename.replace(char, "_")
|
20
23
|
return filename
|
21
24
|
|
22
25
|
|
23
26
|
def read_yaml(path, omega=False):
|
24
27
|
if omega:
|
25
28
|
return omegaconf.OmegaConf.load(path)
|
26
|
-
return yaml.load(codecs.open(path, encoding=
|
29
|
+
return yaml.load(codecs.open(path, encoding="utf-8"), Loader=yaml.FullLoader)
|
27
30
|
|
28
31
|
|
29
|
-
def
|
32
|
+
def merge_file(filelist, save_filename, shuffle=False):
|
30
33
|
contents = []
|
31
34
|
for file in filelist:
|
32
35
|
content = readtxt_list_all_strip(file)
|
@@ -43,9 +46,9 @@ def iter_count(file_name):
|
|
43
46
|
author: unknown
|
44
47
|
"""
|
45
48
|
buffer = 1024 * 1024
|
46
|
-
with codecs.open(file_name,
|
49
|
+
with codecs.open(file_name, "r", "utf-8") as f:
|
47
50
|
buf_gen = takewhile(lambda x: x, (f.read(buffer) for _ in repeat(None)))
|
48
|
-
return sum(buf.count(
|
51
|
+
return sum(buf.count("\n") for buf in buf_gen)
|
49
52
|
|
50
53
|
|
51
54
|
# 需要加入进度条的函数包括
|
@@ -57,24 +60,24 @@ load_from_json
|
|
57
60
|
|
58
61
|
|
59
62
|
# 读txt文件 一次全读完 返回list 去换行
|
60
|
-
def readtxt_list_all_strip(path, encoding=
|
63
|
+
def readtxt_list_all_strip(path, encoding="utf-8") -> list:
|
61
64
|
file_line_num = iter_count(path)
|
62
65
|
lines = []
|
63
|
-
with codecs.open(path,
|
66
|
+
with codecs.open(path, "r", encoding) as r:
|
64
67
|
if file_line_num > LARGE_FILE_THRESHOLD:
|
65
68
|
iter_obj = tqdm(enumerate(r.readlines()), total=file_line_num)
|
66
69
|
else:
|
67
70
|
iter_obj = enumerate(r.readlines())
|
68
71
|
|
69
72
|
for ldx, line in iter_obj:
|
70
|
-
lines.append(line.strip(
|
73
|
+
lines.append(line.strip("\n").strip("\r"))
|
71
74
|
return lines
|
72
75
|
|
73
76
|
|
74
77
|
# 读txt 一次读一行 最后返回list
|
75
78
|
def readtxt_list_each(path) -> list:
|
76
79
|
lines = []
|
77
|
-
with codecs.open(path,
|
80
|
+
with codecs.open(path, "r", "utf-8") as r:
|
78
81
|
line = r.readline()
|
79
82
|
while line:
|
80
83
|
lines.append(line)
|
@@ -82,11 +85,11 @@ def readtxt_list_each(path) -> list:
|
|
82
85
|
return lines
|
83
86
|
|
84
87
|
|
85
|
-
def readtxt_list_each_strip(path
|
88
|
+
def readtxt_list_each_strip(path: Optional[str | Path]):
|
86
89
|
"""
|
87
90
|
yield方法
|
88
91
|
"""
|
89
|
-
with codecs.open(path,
|
92
|
+
with codecs.open(path, "r", "utf-8") as r:
|
90
93
|
line = r.readline()
|
91
94
|
while line:
|
92
95
|
yield line.strip("\n").strip("\r")
|
@@ -95,51 +98,51 @@ def readtxt_list_each_strip(path) -> list:
|
|
95
98
|
|
96
99
|
# 读txt文件 一次全读完 返回list
|
97
100
|
def readtxt_list_all(path) -> list:
|
98
|
-
with codecs.open(path,
|
101
|
+
with codecs.open(path, "r", "utf-8") as r:
|
99
102
|
lines = r.readlines()
|
100
103
|
return lines
|
101
104
|
|
102
105
|
|
103
106
|
# 读byte文件 读成一条string
|
104
107
|
def readtxt_byte(path, encoding="utf-8") -> str:
|
105
|
-
with codecs.open(path,
|
108
|
+
with codecs.open(path, "rb") as r:
|
106
109
|
lines = r.read()
|
107
110
|
lines = lines.decode(encoding)
|
108
|
-
return lines.replace(
|
111
|
+
return lines.replace("\r", "")
|
109
112
|
|
110
113
|
|
111
114
|
# 读txt文件 读成一条string
|
112
|
-
def
|
113
|
-
with codecs.open(path,
|
115
|
+
def read_text(path, encoding="utf-8") -> str:
|
116
|
+
with codecs.open(path, "r", encoding) as r:
|
114
117
|
lines = r.read()
|
115
|
-
return lines.replace(
|
118
|
+
return lines.replace("\r", "")
|
116
119
|
|
117
120
|
|
118
121
|
# 写txt文件覆盖
|
119
|
-
def writetxt_w(txt, path, r=
|
120
|
-
with codecs.open(path, r,
|
122
|
+
def writetxt_w(txt, path, r="w"):
|
123
|
+
with codecs.open(path, r, "utf-8") as w:
|
121
124
|
w.writelines(txt)
|
122
125
|
|
123
126
|
|
124
127
|
# 写txt文件追加
|
125
128
|
def writetxt_a(txt, path):
|
126
|
-
with codecs.open(path,
|
129
|
+
with codecs.open(path, "a", "utf-8") as w:
|
127
130
|
w.writelines(txt)
|
128
131
|
|
129
132
|
|
130
133
|
def writetxt(txt, path, encoding="utf-8"):
|
131
|
-
with codecs.open(path,
|
134
|
+
with codecs.open(path, "w", encoding) as w:
|
132
135
|
w.write(txt)
|
133
136
|
|
134
137
|
|
135
138
|
def writetxt_wb(txt, path):
|
136
|
-
with codecs.open(path,
|
139
|
+
with codecs.open(path, "wb") as w:
|
137
140
|
w.write(txt)
|
138
141
|
|
139
142
|
|
140
143
|
# 写list 覆盖
|
141
144
|
def writetxt_w_list(list, path, num_lf=1):
|
142
|
-
with codecs.open(path,
|
145
|
+
with codecs.open(path, "w", "utf-8") as w:
|
143
146
|
for i in list:
|
144
147
|
w.write(i)
|
145
148
|
w.write("\n" * num_lf)
|
@@ -147,7 +150,7 @@ def writetxt_w_list(list, path, num_lf=1):
|
|
147
150
|
|
148
151
|
# 写list 追加
|
149
152
|
def writetxt_a_list(list, path, num_lf=2):
|
150
|
-
with codecs.open(path,
|
153
|
+
with codecs.open(path, "a", "utf-8") as w:
|
151
154
|
for i in list:
|
152
155
|
w.write(i)
|
153
156
|
w.write("\n" * num_lf)
|
@@ -158,7 +161,7 @@ def save_to_json(content, path):
|
|
158
161
|
json.dump(content, w, ensure_ascii=False, indent=1)
|
159
162
|
|
160
163
|
|
161
|
-
def load_from_json(path):
|
164
|
+
def load_from_json(path: Optional[str | Path]):
|
162
165
|
with codecs.open(path, "r", "utf-8") as r:
|
163
166
|
content = json.load(r)
|
164
167
|
return content
|
@@ -167,60 +170,60 @@ def load_from_json(path):
|
|
167
170
|
# 读txt文件 读成一条string if gb2312
|
168
171
|
def readtxt_string_all_encoding(path):
|
169
172
|
try:
|
170
|
-
with codecs.open(path,
|
173
|
+
with codecs.open(path, "rb", "utf-8-sig") as r:
|
171
174
|
lines = r.read()
|
172
175
|
return lines
|
173
176
|
except:
|
174
177
|
try:
|
175
|
-
with codecs.open(path,
|
178
|
+
with codecs.open(path, "rb", "utf-8") as r:
|
176
179
|
lines = r.reacd()
|
177
180
|
return lines
|
178
181
|
except:
|
179
182
|
try:
|
180
|
-
with codecs.open(path,
|
183
|
+
with codecs.open(path, "rb", "big5") as r:
|
181
184
|
lines = r.read()
|
182
185
|
return lines
|
183
186
|
except:
|
184
187
|
print(path)
|
185
|
-
with codecs.open(path,
|
188
|
+
with codecs.open(path, "rb", "gb2312", errors="ignore") as r:
|
186
189
|
lines = r.read()
|
187
190
|
return lines
|
188
191
|
|
189
192
|
|
190
193
|
def readtxt_list_all_encoding(path):
|
191
194
|
try:
|
192
|
-
with codecs.open(path,
|
195
|
+
with codecs.open(path, "rb", "utf-8-sig") as r:
|
193
196
|
lines = r.readlines()
|
194
197
|
return lines
|
195
198
|
except:
|
196
199
|
try:
|
197
|
-
with codecs.open(path,
|
200
|
+
with codecs.open(path, "rb", "utf-8") as r:
|
198
201
|
lines = r.readlines()
|
199
202
|
return lines
|
200
203
|
except:
|
201
204
|
try:
|
202
|
-
with codecs.open(path,
|
205
|
+
with codecs.open(path, "rb", "big5") as r:
|
203
206
|
lines = r.readlines()
|
204
207
|
return lines
|
205
208
|
except:
|
206
|
-
with codecs.open(path,
|
209
|
+
with codecs.open(path, "rb", "gb2312", errors="ignore") as r:
|
207
210
|
lines = r.readlines()
|
208
211
|
return lines
|
209
212
|
|
210
213
|
|
211
214
|
# line by line
|
212
215
|
def save_to_jsonl(corpus, path):
|
213
|
-
with open(path,
|
216
|
+
with open(path, "w", encoding="utf-8") as wt:
|
214
217
|
for i in corpus:
|
215
218
|
wt.write(json.dumps(i, ensure_ascii=False))
|
216
|
-
wt.write(
|
219
|
+
wt.write("\n")
|
217
220
|
|
218
221
|
|
219
222
|
# line by line
|
220
223
|
def load_from_jsonl(path):
|
221
224
|
file_line_num = iter_count(path)
|
222
225
|
if file_line_num > 1e5:
|
223
|
-
with open(path,
|
226
|
+
with open(path, "r", encoding="utf-8") as rd:
|
224
227
|
corpus = []
|
225
228
|
while True:
|
226
229
|
line = rd.readline()
|
@@ -230,7 +233,7 @@ def load_from_jsonl(path):
|
|
230
233
|
break
|
231
234
|
return corpus
|
232
235
|
else:
|
233
|
-
with open(path,
|
236
|
+
with open(path, "r", encoding="utf-8") as rd:
|
234
237
|
corpus = []
|
235
238
|
while True:
|
236
239
|
line = rd.readline()
|
@@ -242,20 +245,20 @@ def load_from_jsonl(path):
|
|
242
245
|
|
243
246
|
|
244
247
|
def save_pkl(data, path):
|
245
|
-
with open(path,
|
248
|
+
with open(path, "wb") as f:
|
246
249
|
pickle.dump(data, f)
|
247
250
|
|
248
251
|
|
249
252
|
def load_pkl(path):
|
250
|
-
with open(path,
|
253
|
+
with open(path, "rb") as f:
|
251
254
|
data = pickle.load(f)
|
252
255
|
return data
|
253
256
|
|
254
257
|
|
255
258
|
def save_to_csv(df, save_path, index_flag=False):
|
256
|
-
with open(save_path,
|
259
|
+
with open(save_path, "wb+") as csvfile:
|
257
260
|
csvfile.write(codecs.BOM_UTF8)
|
258
|
-
df.to_csv(save_path, mode=
|
261
|
+
df.to_csv(save_path, mode="a", index=index_flag)
|
259
262
|
|
260
263
|
|
261
264
|
def save_to_mongo():
|
nlpertools/llm/__init__.py
CHANGED
nlpertools/llm/call_llm_once.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
from ..io.file import
|
1
|
+
from ..io.file import read_yaml
|
2
2
|
from tqdm import tqdm
|
3
3
|
import os
|
4
|
-
from openai import Openai
|
5
4
|
from typing import Optional, Union
|
6
5
|
|
7
6
|
"""
|
@@ -9,8 +8,38 @@ from typing import Optional, Union
|
|
9
8
|
"""
|
10
9
|
|
11
10
|
|
11
|
+
def call_once_stream(
|
12
|
+
client, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192, temperature=0.2
|
13
|
+
) -> str:
|
14
|
+
"""
|
15
|
+
调用LLM模型进行一次推理
|
16
|
+
:param prompt: 输入的提示文本
|
17
|
+
:param model_name: 模型名称
|
18
|
+
:param max_tokens: 最大输出token数
|
19
|
+
:return: 模型的输出文本
|
20
|
+
"""
|
21
|
+
from openai import OpenAI
|
22
|
+
|
23
|
+
if isinstance(input, str):
|
24
|
+
message = [{"role": "user", "content": input}]
|
25
|
+
elif isinstance(input, list):
|
26
|
+
message = input
|
27
|
+
|
28
|
+
completion = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens, stream=True)
|
29
|
+
text = ""
|
30
|
+
for chunk in completion:
|
31
|
+
if chunk.choices:
|
32
|
+
c = chunk.choices[0].delta.content or ""
|
33
|
+
text += c
|
34
|
+
print(c, end="")
|
35
|
+
else:
|
36
|
+
print()
|
37
|
+
print(chunk.usage)
|
38
|
+
return text
|
39
|
+
|
40
|
+
|
12
41
|
def call_once(
|
13
|
-
client
|
42
|
+
client, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192, temperature=0.8
|
14
43
|
) -> str:
|
15
44
|
"""
|
16
45
|
调用LLM模型进行一次推理
|
@@ -19,12 +48,13 @@ def call_once(
|
|
19
48
|
:param max_tokens: 最大输出token数
|
20
49
|
:return: 模型的输出文本
|
21
50
|
"""
|
51
|
+
from openai import OpenAI
|
22
52
|
|
23
53
|
if isinstance(input, str):
|
24
54
|
message = [{"role": "user", "content": input}]
|
25
55
|
elif isinstance(input, list):
|
26
56
|
message = input
|
27
57
|
|
28
|
-
response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens)
|
58
|
+
response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens,temperature=temperature)
|
29
59
|
|
30
60
|
return response.choices[0].message.content
|
nlpertools/llm/infer.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
from tqdm import tqdm
|
3
|
-
from openai import OpenAI
|
4
3
|
import concurrent.futures
|
4
|
+
import itertools
|
5
5
|
|
6
6
|
|
7
7
|
INFER_PARAS = {
|
@@ -15,14 +15,17 @@ INFER_PARAS = {
|
|
15
15
|
|
16
16
|
|
17
17
|
def parse_infer_data(infer_data: list):
|
18
|
+
# 解释一下为什么要[][],因为message本来就必须得是[]
|
18
19
|
if isinstance(infer_data[0], str):
|
19
|
-
message = [{"role": "user", "content": i} for i in infer_data]
|
20
|
+
message = [[{"role": "user", "content": i}] for i in infer_data]
|
20
21
|
elif isinstance(infer_data[0], list):
|
21
22
|
message = infer_data
|
22
23
|
return message
|
23
24
|
|
24
25
|
|
25
|
-
def common_api_infer_func(model_name, infer_data: list, infer_paras, client
|
26
|
+
def common_api_infer_func(model_name, infer_data: list, infer_paras, client):
|
27
|
+
from openai import OpenAI
|
28
|
+
|
26
29
|
"""
|
27
30
|
infer_data: list of messages/prompt
|
28
31
|
"""
|
@@ -31,16 +34,58 @@ def common_api_infer_func(model_name, infer_data: list, infer_paras, client: Ope
|
|
31
34
|
def get_response(model_name, messages, infer_paras):
|
32
35
|
responses = []
|
33
36
|
infer_times = infer_paras.get("infer_times", 1)
|
37
|
+
|
34
38
|
for _ in range(infer_times):
|
35
39
|
# 使用OpenAI API进行推理
|
36
|
-
response = client.chat.completions.create(
|
40
|
+
response = client.chat.completions.create(
|
41
|
+
model=model_name,
|
42
|
+
messages=messages,
|
43
|
+
temperature=infer_paras.get("temperature", 0.7),
|
44
|
+
max_tokens=infer_paras.get("max_tokens", 8192),
|
45
|
+
)
|
37
46
|
text = response.choices[0].message.content
|
38
47
|
responses.append({"text": text})
|
39
48
|
return responses
|
40
49
|
|
41
50
|
with concurrent.futures.ThreadPoolExecutor(16) as executor:
|
42
51
|
futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
|
43
|
-
results = [future.result() for future in concurrent.futures.as_completed(futures)]
|
52
|
+
# results = [future.result() for future in tqdm(concurrent.futures.as_completed(futures))] # 乱序
|
53
|
+
results = [future.result() for future in tqdm(futures)]
|
54
|
+
|
55
|
+
return results
|
56
|
+
|
57
|
+
|
58
|
+
def common_api_infer_func_multi_client(model_name, infer_data: list, infer_paras, clients: list):
|
59
|
+
"""
|
60
|
+
infer_data: list of messages/prompt
|
61
|
+
"""
|
62
|
+
messages = parse_infer_data(infer_data)
|
63
|
+
iter_cycle = itertools.cycle(clients)
|
64
|
+
|
65
|
+
def get_response(model_name, messages, infer_paras):
|
66
|
+
client = next(iter_cycle)
|
67
|
+
# print(client.base_url)
|
68
|
+
responses = []
|
69
|
+
infer_times = infer_paras.get("infer_times", 1)
|
70
|
+
for _ in range(infer_times):
|
71
|
+
# 使用OpenAI API进行推理
|
72
|
+
try:
|
73
|
+
response = client.chat.completions.create(
|
74
|
+
model=model_name,
|
75
|
+
messages=messages,
|
76
|
+
temperature=infer_paras.get("temperature", 0.7),
|
77
|
+
max_tokens=infer_paras.get("max_tokens", 8192),
|
78
|
+
)
|
79
|
+
text = response.choices[0].message.content
|
80
|
+
except Exception as e:
|
81
|
+
print(e.__str__())
|
82
|
+
text = ""
|
83
|
+
responses.append({"text": text})
|
84
|
+
return responses
|
85
|
+
|
86
|
+
with concurrent.futures.ThreadPoolExecutor(128) as executor:
|
87
|
+
futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
|
88
|
+
results = [future.result() for future in tqdm(futures)]
|
44
89
|
|
45
90
|
return results
|
46
91
|
|
nlpertools/other.py
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
# @Author : youshu.Ji
|
4
4
|
import itertools
|
5
5
|
import os
|
6
|
-
import re
|
7
6
|
import string
|
8
7
|
import subprocess
|
9
8
|
import threading
|
@@ -13,7 +12,10 @@ import math
|
|
13
12
|
import datetime
|
14
13
|
import difflib
|
15
14
|
import psutil
|
15
|
+
import sys
|
16
|
+
|
16
17
|
from .io.file import writetxt_w_list, writetxt_a
|
18
|
+
|
17
19
|
# import numpy as np
|
18
20
|
# import psutil
|
19
21
|
# import pyquery as pq
|
@@ -25,9 +27,9 @@ from .io.file import writetxt_w_list, writetxt_a
|
|
25
27
|
# from win32evtlogutil import langid
|
26
28
|
from .utils.package import *
|
27
29
|
|
28
|
-
CHINESE_PUNCTUATION = list(
|
29
|
-
ENGLISH_PUNCTUATION = list('
|
30
|
-
OTHER_PUNCTUATION = list(
|
30
|
+
CHINESE_PUNCTUATION = list(",。;:‘’“”!?《》「」【】<>()、")
|
31
|
+
ENGLISH_PUNCTUATION = list(",.;:'\"!?<>()")
|
32
|
+
OTHER_PUNCTUATION = list("!@#$%^&*")
|
31
33
|
|
32
34
|
|
33
35
|
def setup_logging(log_file):
|
@@ -40,11 +42,23 @@ def setup_logging(log_file):
|
|
40
42
|
logging.basicConfig(
|
41
43
|
filename=log_file,
|
42
44
|
level=logging.INFO,
|
43
|
-
format=
|
44
|
-
datefmt=
|
45
|
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
46
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
45
47
|
)
|
46
48
|
|
47
49
|
|
50
|
+
def stop():
|
51
|
+
sys.exit()
|
52
|
+
|
53
|
+
|
54
|
+
def exit():
|
55
|
+
sys.exit()
|
56
|
+
|
57
|
+
|
58
|
+
def round2(num):
|
59
|
+
return round(num * 100, 2)
|
60
|
+
|
61
|
+
|
48
62
|
def get_diff_parts(str1, str2):
|
49
63
|
# 创建一个 SequenceMatcher 对象
|
50
64
|
matcher = difflib.SequenceMatcher(None, str1, str2)
|
@@ -52,7 +66,7 @@ def get_diff_parts(str1, str2):
|
|
52
66
|
# 获取差异部分
|
53
67
|
diff_parts = []
|
54
68
|
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
55
|
-
if tag ==
|
69
|
+
if tag == "replace" or tag == "delete" or tag == "insert":
|
56
70
|
diff_parts.append((tag, str1[i1:i2], str2[j1:j2]))
|
57
71
|
|
58
72
|
return diff_parts
|
@@ -62,8 +76,9 @@ def run_cmd_with_timeout(cmd, timeout):
|
|
62
76
|
"""
|
63
77
|
https://juejin.cn/post/7391703459803086848
|
64
78
|
"""
|
65
|
-
process = subprocess.Popen(
|
66
|
-
|
79
|
+
process = subprocess.Popen(
|
80
|
+
cmd, shell=True, encoding="utf-8", errors="ignore", stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
81
|
+
)
|
67
82
|
res = [None]
|
68
83
|
|
69
84
|
def target():
|
@@ -144,8 +159,11 @@ def print_three_line_table(df):
|
|
144
159
|
border-bottom: (third_line_px)px solid black;
|
145
160
|
}
|
146
161
|
</style>"""
|
147
|
-
style =
|
148
|
-
"(
|
162
|
+
style = (
|
163
|
+
style.replace("(first_line_px)", first_line_px)
|
164
|
+
.replace("(second_line_px)", second_line_px)
|
165
|
+
.replace("(third_line_px)", third_line_px)
|
166
|
+
)
|
149
167
|
# 将 CSS 样式和 HTML 表格结合起来
|
150
168
|
html = f"{style}{html_table}"
|
151
169
|
print(html)
|
@@ -153,7 +171,7 @@ def print_three_line_table(df):
|
|
153
171
|
# 将 HTML 保存到文件中
|
154
172
|
with open(temp_file_path, "w") as f:
|
155
173
|
f.write(html)
|
156
|
-
webbrowser.open(
|
174
|
+
webbrowser.open("file://" + os.path.realpath(temp_file_path))
|
157
175
|
|
158
176
|
|
159
177
|
def jprint(obj, depth=0):
|
@@ -178,6 +196,7 @@ def print_split(sign="=", num=20, char: str = None):
|
|
178
196
|
|
179
197
|
def seed_everything():
|
180
198
|
import torch
|
199
|
+
|
181
200
|
# seed everything
|
182
201
|
seed = 7777777
|
183
202
|
np.random.seed(seed)
|
@@ -192,22 +211,23 @@ def sent_email(mail_user, mail_pass, receiver, title, content, attach_path=None)
|
|
192
211
|
from email.mime.text import MIMEText
|
193
212
|
from email.mime.application import MIMEApplication
|
194
213
|
|
195
|
-
mail_host =
|
214
|
+
mail_host = "smtp.qq.com"
|
196
215
|
mail_user = mail_user
|
197
216
|
mail_pass = mail_pass
|
198
217
|
sender = mail_user
|
199
218
|
|
200
219
|
message = MIMEMultipart()
|
201
|
-
message.attach(MIMEText(content,
|
220
|
+
message.attach(MIMEText(content, "plain", "utf-8"))
|
202
221
|
if attach_path:
|
203
|
-
attachment = MIMEApplication(open(attach_path,
|
204
|
-
attachment["Content-Type"] =
|
205
|
-
attachment.add_header(
|
206
|
-
|
222
|
+
attachment = MIMEApplication(open(attach_path, "rb").read())
|
223
|
+
attachment["Content-Type"] = "application/octet-stream"
|
224
|
+
attachment.add_header(
|
225
|
+
"Content-Dispositon", "attachment", filename=("utf-8", "", attach_path)
|
226
|
+
) # 注意:此处basename要转换为gbk编码,否则中文会有乱码。
|
207
227
|
message.attach(attachment)
|
208
|
-
message[
|
209
|
-
message[
|
210
|
-
message[
|
228
|
+
message["Subject"] = title
|
229
|
+
message["From"] = sender
|
230
|
+
message["To"] = receiver
|
211
231
|
|
212
232
|
try:
|
213
233
|
smtp_obj = smtplib.SMTP()
|
@@ -215,9 +235,9 @@ def sent_email(mail_user, mail_pass, receiver, title, content, attach_path=None)
|
|
215
235
|
smtp_obj.login(mail_user, mail_pass)
|
216
236
|
smtp_obj.sendmail(sender, receiver, message.as_string())
|
217
237
|
smtp_obj.quit()
|
218
|
-
print(
|
238
|
+
print("send email success")
|
219
239
|
except smtplib.SMTPException as e:
|
220
|
-
print(
|
240
|
+
print("send failed", e)
|
221
241
|
|
222
242
|
|
223
243
|
def convert_np_to_py(obj):
|
@@ -247,12 +267,12 @@ def camel_to_snake(s: str) -> str:
|
|
247
267
|
:param s: camel case variable
|
248
268
|
:return:
|
249
269
|
"""
|
250
|
-
return reduce(lambda x, y: x + (
|
270
|
+
return reduce(lambda x, y: x + ("_" if y.isupper() else "") + y, s).lower()
|
251
271
|
|
252
272
|
|
253
273
|
# other ----------------------------------------------------------------------
|
254
274
|
# 统计词频
|
255
|
-
def calc_word_count(list_word, mode, path=
|
275
|
+
def calc_word_count(list_word, mode, path="tempcount.txt", sort_id=1, is_reverse=True):
|
256
276
|
word_count = {}
|
257
277
|
for key in list_word:
|
258
278
|
if key not in word_count:
|
@@ -260,20 +280,20 @@ def calc_word_count(list_word, mode, path='tempcount.txt', sort_id=1, is_reverse
|
|
260
280
|
else:
|
261
281
|
word_count[key] += 1
|
262
282
|
word_dict_sort = sorted(word_count.items(), key=lambda x: x[sort_id], reverse=is_reverse)
|
263
|
-
if mode ==
|
283
|
+
if mode == "w":
|
264
284
|
for key in word_dict_sort:
|
265
|
-
writetxt_a(str(key[0]) +
|
266
|
-
elif mode ==
|
285
|
+
writetxt_a(str(key[0]) + "\t" + str(key[1]) + "\n", path)
|
286
|
+
elif mode == "p":
|
267
287
|
for key in word_dict_sort:
|
268
|
-
print(str(key[0]) +
|
269
|
-
elif mode ==
|
288
|
+
print(str(key[0]) + "\t" + str(key[1]))
|
289
|
+
elif mode == "u":
|
270
290
|
return word_dict_sort
|
271
291
|
|
272
292
|
|
273
293
|
# 字典去重
|
274
294
|
def dupl_dict(dict_list, key):
|
275
295
|
new_dict_list, value_set = [], []
|
276
|
-
print(
|
296
|
+
print("去重中...")
|
277
297
|
for i in tqdm(dict_list):
|
278
298
|
if i[key] not in value_set:
|
279
299
|
new_dict_list.append(i)
|
@@ -288,9 +308,9 @@ def multi_thread_run(_task, data):
|
|
288
308
|
|
289
309
|
|
290
310
|
def del_special_char(sentence):
|
291
|
-
special_chars = [
|
311
|
+
special_chars = ["\ufeff", "\xa0", "\u3000", "\xa0", "\ue627"]
|
292
312
|
for i in special_chars:
|
293
|
-
sentence = sentence.replace(i,
|
313
|
+
sentence = sentence.replace(i, "")
|
294
314
|
return sentence
|
295
315
|
|
296
316
|
|
@@ -306,20 +326,20 @@ def spider(url):
|
|
306
326
|
:param url:
|
307
327
|
:return:
|
308
328
|
"""
|
309
|
-
if
|
329
|
+
if "baijiahao" in url:
|
310
330
|
content = requests.get(url)
|
311
331
|
# print(content.text)
|
312
332
|
html = pq.PyQuery(content.text)
|
313
|
-
title = html(
|
314
|
-
res = html(
|
315
|
-
return
|
333
|
+
title = html(".index-module_articleTitle_28fPT").text()
|
334
|
+
res = html(".index-module_articleWrap_2Zphx").text().rstrip("举报/反馈")
|
335
|
+
return "{}\n{}".format(title, res)
|
316
336
|
|
317
337
|
|
318
338
|
def eda(sentence):
|
319
|
-
url =
|
339
|
+
url = "https://x.x.x.x:x/eda"
|
320
340
|
json_data = dict({"sentence": sentence})
|
321
341
|
res = requests.post(url, json=json_data)
|
322
|
-
return res.json()[
|
342
|
+
return res.json()["eda"]
|
323
343
|
|
324
344
|
|
325
345
|
def find_language(text):
|
@@ -353,8 +373,8 @@ def print_prf(y_true, y_pred, label=None):
|
|
353
373
|
for i in range(len(label)):
|
354
374
|
res = []
|
355
375
|
for k in result:
|
356
|
-
res.append(
|
357
|
-
print(
|
376
|
+
res.append("%.5f" % k[i])
|
377
|
+
print("{}: {} {} {}".format(label[i], *res[:3]))
|
358
378
|
|
359
379
|
|
360
380
|
def print_cpu():
|
@@ -375,14 +395,16 @@ def squeeze_list(high_dim_list):
|
|
375
395
|
|
376
396
|
def unsqueeze_list(flatten_list, each_element_len):
|
377
397
|
# 该函数是错的,被split_list替代了
|
378
|
-
two_dim_list = [
|
379
|
-
|
398
|
+
two_dim_list = [
|
399
|
+
flatten_list[i * each_element_len : (i + 1) * each_element_len]
|
400
|
+
for i in range(len(flatten_list) // each_element_len)
|
401
|
+
]
|
380
402
|
return two_dim_list
|
381
403
|
|
382
404
|
|
383
405
|
def split_list(input_list, chunk_size):
|
384
406
|
# 使用列表推导式将列表分割成二维数组
|
385
|
-
return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
|
407
|
+
return [input_list[i : i + chunk_size] for i in range(0, len(input_list), chunk_size)]
|
386
408
|
|
387
409
|
|
388
410
|
def auto_close():
|
@@ -392,6 +414,7 @@ def auto_close():
|
|
392
414
|
import pyautogui as pg
|
393
415
|
import time
|
394
416
|
import os
|
417
|
+
|
395
418
|
cmd = 'schtasks /create /tn shut /tr "shutdown -s -f" /sc once /st 23:30'
|
396
419
|
os.system(cmd)
|
397
420
|
while 1:
|
@@ -405,10 +428,13 @@ def tf_idf(corpus, save_path):
|
|
405
428
|
vectorizer = CountVectorizer() # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
|
406
429
|
transformer = TfidfTransformer() # 该类会统计每个词语的tf-idf权值
|
407
430
|
tfidf = transformer.fit_transform(
|
408
|
-
vectorizer.fit_transform(corpus)
|
431
|
+
vectorizer.fit_transform(corpus)
|
432
|
+
) # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
|
409
433
|
word = vectorizer.get_feature_names() # 获取词袋模型中的所有词语
|
410
434
|
weight = tfidf.toarray() # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
|
411
|
-
for i in range(
|
435
|
+
for i in range(
|
436
|
+
len(weight)
|
437
|
+
): # 打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
|
412
438
|
for j in range(len(word)):
|
413
439
|
getword = word[j]
|
414
440
|
getvalue = weight[i][j]
|
@@ -418,7 +444,7 @@ def tf_idf(corpus, save_path):
|
|
418
444
|
else:
|
419
445
|
tfidfdict.update({getword: getvalue})
|
420
446
|
sorted_tfidf = sorted(tfidfdict.items(), key=lambda d: d[1], reverse=True)
|
421
|
-
to_write = [
|
447
|
+
to_write = ["{} {}".format(i[0], i[1]) for i in sorted_tfidf]
|
422
448
|
writetxt_w_list(to_write, save_path, num_lf=1)
|
423
449
|
|
424
450
|
|
@@ -427,7 +453,7 @@ class GaussDecay(object):
|
|
427
453
|
当前只实现了时间的,全部使用默认值
|
428
454
|
"""
|
429
455
|
|
430
|
-
def __init__(self, origin=
|
456
|
+
def __init__(self, origin="2022-08-02", scale="90d", offset="5d", decay=0.5, task="time"):
|
431
457
|
self.origin = origin
|
432
458
|
self.task = task
|
433
459
|
self.scale, self.offset = self.translate(scale, offset)
|
@@ -451,7 +477,7 @@ class GaussDecay(object):
|
|
451
477
|
@staticmethod
|
452
478
|
def translated_minus(field_value):
|
453
479
|
origin = datetime.datetime.now()
|
454
|
-
field_value = datetime.datetime.strptime(field_value,
|
480
|
+
field_value = datetime.datetime.strptime(field_value, "%Y-%m-%d %H:%M:%S")
|
455
481
|
return (origin - field_value).days
|
456
482
|
|
457
483
|
def calc_exp(self):
|
@@ -469,13 +495,13 @@ class GaussDecay(object):
|
|
469
495
|
:return:
|
470
496
|
"""
|
471
497
|
numerator = max(0, (abs(self.translated_minus(field_value)) - self.offset)) ** 2
|
472
|
-
sigma_square = -1 * self.scale
|
498
|
+
sigma_square = -1 * self.scale**2 / (2 * math.log(self.decay, math.e))
|
473
499
|
denominator = 2 * sigma_square
|
474
500
|
s = math.exp(-1 * numerator / denominator)
|
475
501
|
return round(self.time_coefficient * s + self.related_coefficient * raw_score, 7)
|
476
502
|
|
477
503
|
|
478
|
-
if __name__ ==
|
504
|
+
if __name__ == "__main__":
|
479
505
|
gauss_decay = GaussDecay()
|
480
506
|
res = gauss_decay.calc_gauss(raw_score=1, field_value="2021-05-29 14:31:13")
|
481
507
|
print(res)
|
nlpertools/utils/package.py
CHANGED
@@ -37,20 +37,19 @@ def lazy_import(importer_name, to_import):
|
|
37
37
|
module = importlib.import_module(importer_name)
|
38
38
|
import_mapping = {}
|
39
39
|
for name in to_import:
|
40
|
-
importing, _, binding = name.partition(
|
40
|
+
importing, _, binding = name.partition(" as ")
|
41
41
|
if not binding:
|
42
|
-
_, _, binding = importing.rpartition(
|
42
|
+
_, _, binding = importing.rpartition(".")
|
43
43
|
import_mapping[binding] = importing
|
44
44
|
|
45
45
|
def __getattr__(name):
|
46
46
|
if name not in import_mapping:
|
47
|
-
message = f
|
47
|
+
message = f"module {importer_name!r} has no attribute {name!r}"
|
48
48
|
raise AttributeError(message)
|
49
49
|
importing = import_mapping[name]
|
50
50
|
# imortlib.import_module() implicitly sets submodules on this module as
|
51
51
|
# appropriate for direct imports.
|
52
|
-
imported = importlib.import_module(importing,
|
53
|
-
module.__spec__.parent)
|
52
|
+
imported = importlib.import_module(importing, module.__spec__.parent)
|
54
53
|
setattr(module, name, imported)
|
55
54
|
return imported
|
56
55
|
|
@@ -75,15 +74,15 @@ KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
|
75
74
|
np = try_import("numpy", None)
|
76
75
|
plt = try_import("matplotlib", "pyplot")
|
77
76
|
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
78
|
-
metrics = try_import("sklearn", "metrics")
|
77
|
+
# metrics = try_import("sklearn", "metrics")
|
79
78
|
requests = try_import("requests", None)
|
80
79
|
pq = try_import("pyquery", None)
|
81
|
-
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
82
|
-
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
80
|
+
# CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
81
|
+
# precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
83
82
|
tqdm = try_import("tqdm", "tqdm")
|
84
83
|
# TODO 自动导出langid和win32evtlogutil输出有bug
|
85
84
|
langid = try_import("langid", None)
|
86
85
|
win32evtlogutil = try_import("win32evtlogutil", None)
|
87
|
-
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
86
|
+
# TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
88
87
|
yaml = try_import("yaml", None)
|
89
|
-
omegaconf = try_import("omegaconf", None)
|
88
|
+
omegaconf = try_import("omegaconf", None)
|
nlpertools/wrapper.py
CHANGED
@@ -7,16 +7,18 @@ import time
|
|
7
7
|
from functools import wraps
|
8
8
|
import asyncio
|
9
9
|
|
10
|
+
|
10
11
|
def fn_async_timer(function):
|
11
12
|
"""
|
12
13
|
针对异步函数的装饰器
|
13
14
|
"""
|
15
|
+
|
14
16
|
@wraps(function)
|
15
17
|
async def function_timer(*args, **kwargs):
|
16
18
|
t0 = time.time()
|
17
19
|
result = await function(*args, **kwargs)
|
18
20
|
t1 = time.time()
|
19
|
-
print(
|
21
|
+
print("[finished {func_name} in {time:.2f}s]".format(func_name=function.__name__, time=t1 - t0))
|
20
22
|
return result
|
21
23
|
|
22
24
|
return function_timer
|
@@ -36,14 +38,14 @@ def fn_timer(async_func=False, analyse=False):
|
|
36
38
|
t0 = time.time()
|
37
39
|
result = await asyncio.create_task(func(*args, **kwargs))
|
38
40
|
t1 = time.time()
|
39
|
-
print(
|
41
|
+
print("[finished {func_name} in {time:.2f}s]".format(func_name=func.__name__, time=t1 - t0))
|
40
42
|
return result
|
41
43
|
|
42
44
|
def func_time(*args, **kwargs):
|
43
45
|
t0 = time.time()
|
44
46
|
result = func(*args, **kwargs)
|
45
47
|
t1 = time.time()
|
46
|
-
print(
|
48
|
+
print("[finished {func_name} in {time:.2f}s]".format(func_name=func.__name__, time=t1 - t0))
|
47
49
|
return result
|
48
50
|
|
49
51
|
def func_time_analyse(*args, **kwargs):
|
@@ -114,7 +116,7 @@ def fn_try(parameter):
|
|
114
116
|
return result
|
115
117
|
except Exception as e:
|
116
118
|
msg = "报错!"
|
117
|
-
print(
|
119
|
+
print("[func_name: {func_name} {msg}]".format(func_name=function.__name__, msg=msg))
|
118
120
|
parameter["msg"] = parameter["msg"].format(str(e))
|
119
121
|
return parameter
|
120
122
|
finally:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nlpertools
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.11
|
4
4
|
Summary: A small package about small basic IO operation when coding
|
5
5
|
Home-page: https://github.com/lvzii/nlpertools
|
6
6
|
Author: youshuJi
|
@@ -64,6 +64,8 @@ json_data = nlpertools.load_from_json('res.json')
|
|
64
64
|
```bash
|
65
65
|
## git, 连接github不稳定的时候非常有用
|
66
66
|
ncli git pull
|
67
|
+
## 带有参数时,加上--以避免-u被解析
|
68
|
+
ncli -- git push -u origin main
|
67
69
|
|
68
70
|
# 生成pypi双因素认证的实时密钥(需要提供key)
|
69
71
|
ncli --get_2fa --get_2fa_key your_key
|
@@ -1,20 +1,19 @@
|
|
1
|
-
nlpertools/__init__.py,sha256=
|
2
|
-
nlpertools/cli.py,sha256=
|
1
|
+
nlpertools/__init__.py,sha256=VnH7GWVSTcV010_kD4VtsOAwIjzhe8prax8Wj17uc20,537
|
2
|
+
nlpertools/cli.py,sha256=uCIUkiBXqTWJaxSQd5MlliGcTfxWzymo1UyQ3z_uhak,3612
|
3
3
|
nlpertools/data_client.py,sha256=esX8lUQrTui4uVkqPfhpHVok7Eq6ywpuemKjLeqoglc,14674
|
4
|
-
nlpertools/dataprocess.py,sha256=3ayCZAFc5t-Ov06oenRhMoGmnQrmCy-gtPhswecjEa4,23451
|
5
4
|
nlpertools/default_db_config.yml,sha256=E1K9k_xzXVlsf-HJQh8kyHXHYuvTpD12jD4Hfe5rUk8,606
|
6
5
|
nlpertools/get_2fa.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
6
|
nlpertools/ml.py,sha256=fjI-WMM1lRnOnRFfTLEGplUx_Uamgr2gfmoAvGlgF7E,18994
|
8
7
|
nlpertools/movie.py,sha256=rkyOnAXdsbWfMSbi1sE1VNRT7f66Hp9BnZsN_58Afmw,897
|
9
8
|
nlpertools/nlpertools_config.yml,sha256=ksXejxFs7pxR47tNAsrN88_4gvq9PCA2ZMO07H-dJXY,26
|
10
9
|
nlpertools/open_api.py,sha256=uyTY00OUlM57Cn0Wm0yZXcIS8vAszy9rKnDMBEWfWJM,1744
|
11
|
-
nlpertools/other.py,sha256=
|
10
|
+
nlpertools/other.py,sha256=LaNZRQ8wWJqZP6Gycq7eThEqcGXIANg7WzT6nh5QiKQ,15262
|
12
11
|
nlpertools/pic.py,sha256=13aaFJh3USGYGs4Y9tAKTvWjmdQR4YDjl3LlIhJheOA,9906
|
13
12
|
nlpertools/plugin.py,sha256=LB7j9GdoQi6TITddH-6EglHlOa0WIHLUT7X5vb_aIZY,1168
|
14
13
|
nlpertools/reminder.py,sha256=wiXwZQmxMck5vY3EvG8_oakP3FAdjGTikAIOiTPUQrs,2977
|
15
14
|
nlpertools/utils_for_nlpertools.py,sha256=SJqjfMc2Vd8ZCqzQiJCkSxjJxEKzvEUgAgbhKPtC6ww,3583
|
16
15
|
nlpertools/vector_index_demo.py,sha256=CSCzXD13bUIo9AG-bjen668H10B02HFU1Kbxakvrs68,2924
|
17
|
-
nlpertools/wrapper.py,sha256=
|
16
|
+
nlpertools/wrapper.py,sha256=8ReHv7LrBGX6wHma8rf_EhFPg0FJNoDjbn4p0O2UHzs,4350
|
18
17
|
nlpertools/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
18
|
nlpertools/algo/ac.py,sha256=4BSiJdG8-S78w_KRqvGOkYjxuTDRiBsskRv-6Doi7oE,422
|
20
19
|
nlpertools/algo/bit_ops.py,sha256=l14-j5VOqrab80CA_uBs1AiAJbzJUJH9dJXc7O9F5d0,501
|
@@ -24,15 +23,18 @@ nlpertools/algo/template.py,sha256=9vsHr4g3jZZ5KVU_2I9i97o8asRXq-8pSaCXIv0sHeM,2
|
|
24
23
|
nlpertools/algo/union.py,sha256=0l7lGZbw1qIfW1z5TE8Oo3tybL1bKIP5rzpa5ZT-vLQ,249
|
25
24
|
nlpertools/data_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
25
|
nlpertools/data_structure/base_structure.py,sha256=gVUvJZ5jsCAswRETTpMwcEjLKoageWiTuCKNEwIWKWk,2641
|
26
|
+
nlpertools/dataprocess/__init__.py,sha256=YPBPsZ8vAoMS6GJ7GlCqj01Cx1q8dDARc_gW-ysORyk,21
|
27
|
+
nlpertools/dataprocess/dedupl.py,sha256=WIBOrM6LfX3txcDa0xF7rqeBIpfqwrDBgepa6bavpt0,289
|
28
|
+
nlpertools/dataprocess/dp_main.py,sha256=iyDsmKzUx5lD8EUNwkWIlTGKVQQDVx8p3pXFv2_kR64,23452
|
27
29
|
nlpertools/draw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
30
|
nlpertools/draw/draw.py,sha256=19dskkr0wrgczxPJnphEszliwYshEh5SjD8Zz07nlk0,2615
|
29
31
|
nlpertools/draw/math_func.py,sha256=0NQ22Dfi9DFG6Bg_hXnCT27w65-dqpOOIgZX7oUIW-Q,881
|
30
32
|
nlpertools/io/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
|
31
|
-
nlpertools/io/dir.py,sha256=
|
32
|
-
nlpertools/io/file.py,sha256=
|
33
|
-
nlpertools/llm/__init__.py,sha256=
|
34
|
-
nlpertools/llm/call_llm_once.py,sha256=
|
35
|
-
nlpertools/llm/infer.py,sha256=
|
33
|
+
nlpertools/io/dir.py,sha256=jpJuCwLeBInr03iCSUfffmlchWShZ2Cjq38n0D0dILI,3106
|
34
|
+
nlpertools/io/file.py,sha256=NF1xV5iazl86-TDdMQJ-LLrqCnuW29uuFb_NA55YNr4,7274
|
35
|
+
nlpertools/llm/__init__.py,sha256=SdbGjzhu1lCeq55mC0tgsah9yzVxvvNrWMf2z8kDEoQ,71
|
36
|
+
nlpertools/llm/call_llm_once.py,sha256=W0J2Ab8dHnVZ8q_KgfTKbee7NlJnA-ewjsne80ALLXY,1793
|
37
|
+
nlpertools/llm/infer.py,sha256=q7asgwdJwo27d6rdBNQLys_bPEF0g-UNDKjt3S-Ltvs,4133
|
36
38
|
nlpertools/llm/price.py,sha256=8zzEaLrbGiDUbTFSnuBGAduiSfDVXQUk4Oc_lE6eJFw,544
|
37
39
|
nlpertools/monitor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
40
|
nlpertools/monitor/gpu.py,sha256=M59O6i0hlew7AzXZlaVZqbZA5IR93OhBY2WI0-T_HtY,531
|
@@ -41,13 +43,13 @@ nlpertools/template/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
41
43
|
nlpertools/utils/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
|
42
44
|
nlpertools/utils/lazy.py,sha256=SAeHLjxsYhpKWvcQKjs2eY0Nn5n3CJlqtxOLVOe1WjQ,29280
|
43
45
|
nlpertools/utils/log_util.py,sha256=ftJDoTOtroLH-LadOygZljeyltOQn0D2Xb5x7Td1Qdg,428
|
44
|
-
nlpertools/utils/package.py,sha256=
|
46
|
+
nlpertools/utils/package.py,sha256=8TLbrD3nmukpJw9lSpHHbUYK74qyAaSM_jUrCJOG6mo,3227
|
45
47
|
nlpertools/utils/package_v1.py,sha256=sqgFb-zbTdMd5ziJLY6YUPqR49qUNZjxBH35DnyR5Wg,3542
|
46
48
|
nlpertools/utils/package_v2.py,sha256=WOcsguWfUd4XSAfmPgCtL8HtUbqJ6GRSMHb0OsB47r0,3932
|
47
|
-
nlpertools-1.0.
|
49
|
+
nlpertools-1.0.11.dist-info/licenses/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
|
48
50
|
nlpertools_helper/__init__.py,sha256=obxRUdZDctvcvK_iA1Dx2HmQFMlMzJto-xDPryq1lJ0,198
|
49
|
-
nlpertools-1.0.
|
50
|
-
nlpertools-1.0.
|
51
|
-
nlpertools-1.0.
|
52
|
-
nlpertools-1.0.
|
53
|
-
nlpertools-1.0.
|
51
|
+
nlpertools-1.0.11.dist-info/METADATA,sha256=3KXxqbO2wWDMXLmnZJm2RvETybvIMekPelhSxE_ovKk,3386
|
52
|
+
nlpertools-1.0.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
53
|
+
nlpertools-1.0.11.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
|
54
|
+
nlpertools-1.0.11.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
|
55
|
+
nlpertools-1.0.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|