nlpertools 1.0.9__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlpertools/__init__.py +3 -2
- nlpertools/cli.py +26 -47
- nlpertools/dataprocess/__init__.py +1 -0
- nlpertools/dataprocess/dedupl.py +9 -0
- nlpertools/{dataprocess.py → dataprocess/dp_main.py} +13 -1
- nlpertools/io/dir.py +25 -5
- nlpertools/io/file.py +46 -43
- nlpertools/llm/__init__.py +3 -0
- nlpertools/llm/call_llm_once.py +60 -0
- nlpertools/llm/infer.py +119 -0
- nlpertools/llm/price.py +13 -0
- nlpertools/ml.py +72 -59
- nlpertools/other.py +82 -53
- nlpertools/utils/package.py +9 -10
- nlpertools/wrapper.py +6 -4
- {nlpertools-1.0.9.dist-info → nlpertools-1.0.11.dist-info}/METADATA +27 -25
- {nlpertools-1.0.9.dist-info → nlpertools-1.0.11.dist-info}/RECORD +21 -15
- {nlpertools-1.0.9.dist-info → nlpertools-1.0.11.dist-info}/WHEEL +1 -1
- {nlpertools-1.0.9.dist-info → nlpertools-1.0.11.dist-info}/entry_points.txt +0 -0
- {nlpertools-1.0.9.dist-info → nlpertools-1.0.11.dist-info/licenses}/LICENSE +0 -0
- {nlpertools-1.0.9.dist-info → nlpertools-1.0.11.dist-info}/top_level.txt +0 -0
nlpertools/other.py
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
# @Author : youshu.Ji
|
4
4
|
import itertools
|
5
5
|
import os
|
6
|
-
import re
|
7
6
|
import string
|
8
7
|
import subprocess
|
9
8
|
import threading
|
@@ -13,7 +12,10 @@ import math
|
|
13
12
|
import datetime
|
14
13
|
import difflib
|
15
14
|
import psutil
|
15
|
+
import sys
|
16
|
+
|
16
17
|
from .io.file import writetxt_w_list, writetxt_a
|
18
|
+
|
17
19
|
# import numpy as np
|
18
20
|
# import psutil
|
19
21
|
# import pyquery as pq
|
@@ -25,9 +27,9 @@ from .io.file import writetxt_w_list, writetxt_a
|
|
25
27
|
# from win32evtlogutil import langid
|
26
28
|
from .utils.package import *
|
27
29
|
|
28
|
-
CHINESE_PUNCTUATION = list(
|
29
|
-
ENGLISH_PUNCTUATION = list('
|
30
|
-
OTHER_PUNCTUATION = list(
|
30
|
+
CHINESE_PUNCTUATION = list(",。;:‘’“”!?《》「」【】<>()、")
|
31
|
+
ENGLISH_PUNCTUATION = list(",.;:'\"!?<>()")
|
32
|
+
OTHER_PUNCTUATION = list("!@#$%^&*")
|
31
33
|
|
32
34
|
|
33
35
|
def setup_logging(log_file):
|
@@ -40,11 +42,23 @@ def setup_logging(log_file):
|
|
40
42
|
logging.basicConfig(
|
41
43
|
filename=log_file,
|
42
44
|
level=logging.INFO,
|
43
|
-
format=
|
44
|
-
datefmt=
|
45
|
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
46
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
45
47
|
)
|
46
48
|
|
47
49
|
|
50
|
+
def stop():
|
51
|
+
sys.exit()
|
52
|
+
|
53
|
+
|
54
|
+
def exit():
|
55
|
+
sys.exit()
|
56
|
+
|
57
|
+
|
58
|
+
def round2(num):
|
59
|
+
return round(num * 100, 2)
|
60
|
+
|
61
|
+
|
48
62
|
def get_diff_parts(str1, str2):
|
49
63
|
# 创建一个 SequenceMatcher 对象
|
50
64
|
matcher = difflib.SequenceMatcher(None, str1, str2)
|
@@ -52,7 +66,7 @@ def get_diff_parts(str1, str2):
|
|
52
66
|
# 获取差异部分
|
53
67
|
diff_parts = []
|
54
68
|
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
55
|
-
if tag ==
|
69
|
+
if tag == "replace" or tag == "delete" or tag == "insert":
|
56
70
|
diff_parts.append((tag, str1[i1:i2], str2[j1:j2]))
|
57
71
|
|
58
72
|
return diff_parts
|
@@ -62,8 +76,9 @@ def run_cmd_with_timeout(cmd, timeout):
|
|
62
76
|
"""
|
63
77
|
https://juejin.cn/post/7391703459803086848
|
64
78
|
"""
|
65
|
-
process = subprocess.Popen(
|
66
|
-
|
79
|
+
process = subprocess.Popen(
|
80
|
+
cmd, shell=True, encoding="utf-8", errors="ignore", stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
81
|
+
)
|
67
82
|
res = [None]
|
68
83
|
|
69
84
|
def target():
|
@@ -144,8 +159,11 @@ def print_three_line_table(df):
|
|
144
159
|
border-bottom: (third_line_px)px solid black;
|
145
160
|
}
|
146
161
|
</style>"""
|
147
|
-
style =
|
148
|
-
"(
|
162
|
+
style = (
|
163
|
+
style.replace("(first_line_px)", first_line_px)
|
164
|
+
.replace("(second_line_px)", second_line_px)
|
165
|
+
.replace("(third_line_px)", third_line_px)
|
166
|
+
)
|
149
167
|
# 将 CSS 样式和 HTML 表格结合起来
|
150
168
|
html = f"{style}{html_table}"
|
151
169
|
print(html)
|
@@ -153,7 +171,7 @@ def print_three_line_table(df):
|
|
153
171
|
# 将 HTML 保存到文件中
|
154
172
|
with open(temp_file_path, "w") as f:
|
155
173
|
f.write(html)
|
156
|
-
webbrowser.open(
|
174
|
+
webbrowser.open("file://" + os.path.realpath(temp_file_path))
|
157
175
|
|
158
176
|
|
159
177
|
def jprint(obj, depth=0):
|
@@ -169,12 +187,16 @@ def jprint(obj, depth=0):
|
|
169
187
|
print(obj)
|
170
188
|
|
171
189
|
|
172
|
-
def print_split(sign="=", num=20):
|
173
|
-
|
190
|
+
def print_split(sign="=", num=20, char: str = None):
|
191
|
+
if char:
|
192
|
+
print(sign * num // 2, char, sign * num // 2)
|
193
|
+
else:
|
194
|
+
print(sign * num)
|
174
195
|
|
175
196
|
|
176
197
|
def seed_everything():
|
177
198
|
import torch
|
199
|
+
|
178
200
|
# seed everything
|
179
201
|
seed = 7777777
|
180
202
|
np.random.seed(seed)
|
@@ -189,22 +211,23 @@ def sent_email(mail_user, mail_pass, receiver, title, content, attach_path=None)
|
|
189
211
|
from email.mime.text import MIMEText
|
190
212
|
from email.mime.application import MIMEApplication
|
191
213
|
|
192
|
-
mail_host =
|
214
|
+
mail_host = "smtp.qq.com"
|
193
215
|
mail_user = mail_user
|
194
216
|
mail_pass = mail_pass
|
195
217
|
sender = mail_user
|
196
218
|
|
197
219
|
message = MIMEMultipart()
|
198
|
-
message.attach(MIMEText(content,
|
220
|
+
message.attach(MIMEText(content, "plain", "utf-8"))
|
199
221
|
if attach_path:
|
200
|
-
attachment = MIMEApplication(open(attach_path,
|
201
|
-
attachment["Content-Type"] =
|
202
|
-
attachment.add_header(
|
203
|
-
|
222
|
+
attachment = MIMEApplication(open(attach_path, "rb").read())
|
223
|
+
attachment["Content-Type"] = "application/octet-stream"
|
224
|
+
attachment.add_header(
|
225
|
+
"Content-Dispositon", "attachment", filename=("utf-8", "", attach_path)
|
226
|
+
) # 注意:此处basename要转换为gbk编码,否则中文会有乱码。
|
204
227
|
message.attach(attachment)
|
205
|
-
message[
|
206
|
-
message[
|
207
|
-
message[
|
228
|
+
message["Subject"] = title
|
229
|
+
message["From"] = sender
|
230
|
+
message["To"] = receiver
|
208
231
|
|
209
232
|
try:
|
210
233
|
smtp_obj = smtplib.SMTP()
|
@@ -212,9 +235,9 @@ def sent_email(mail_user, mail_pass, receiver, title, content, attach_path=None)
|
|
212
235
|
smtp_obj.login(mail_user, mail_pass)
|
213
236
|
smtp_obj.sendmail(sender, receiver, message.as_string())
|
214
237
|
smtp_obj.quit()
|
215
|
-
print(
|
238
|
+
print("send email success")
|
216
239
|
except smtplib.SMTPException as e:
|
217
|
-
print(
|
240
|
+
print("send failed", e)
|
218
241
|
|
219
242
|
|
220
243
|
def convert_np_to_py(obj):
|
@@ -244,12 +267,12 @@ def camel_to_snake(s: str) -> str:
|
|
244
267
|
:param s: camel case variable
|
245
268
|
:return:
|
246
269
|
"""
|
247
|
-
return reduce(lambda x, y: x + (
|
270
|
+
return reduce(lambda x, y: x + ("_" if y.isupper() else "") + y, s).lower()
|
248
271
|
|
249
272
|
|
250
273
|
# other ----------------------------------------------------------------------
|
251
274
|
# 统计词频
|
252
|
-
def calc_word_count(list_word, mode, path=
|
275
|
+
def calc_word_count(list_word, mode, path="tempcount.txt", sort_id=1, is_reverse=True):
|
253
276
|
word_count = {}
|
254
277
|
for key in list_word:
|
255
278
|
if key not in word_count:
|
@@ -257,20 +280,20 @@ def calc_word_count(list_word, mode, path='tempcount.txt', sort_id=1, is_reverse
|
|
257
280
|
else:
|
258
281
|
word_count[key] += 1
|
259
282
|
word_dict_sort = sorted(word_count.items(), key=lambda x: x[sort_id], reverse=is_reverse)
|
260
|
-
if mode ==
|
283
|
+
if mode == "w":
|
261
284
|
for key in word_dict_sort:
|
262
|
-
writetxt_a(str(key[0]) +
|
263
|
-
elif mode ==
|
285
|
+
writetxt_a(str(key[0]) + "\t" + str(key[1]) + "\n", path)
|
286
|
+
elif mode == "p":
|
264
287
|
for key in word_dict_sort:
|
265
|
-
print(str(key[0]) +
|
266
|
-
elif mode ==
|
288
|
+
print(str(key[0]) + "\t" + str(key[1]))
|
289
|
+
elif mode == "u":
|
267
290
|
return word_dict_sort
|
268
291
|
|
269
292
|
|
270
293
|
# 字典去重
|
271
294
|
def dupl_dict(dict_list, key):
|
272
295
|
new_dict_list, value_set = [], []
|
273
|
-
print(
|
296
|
+
print("去重中...")
|
274
297
|
for i in tqdm(dict_list):
|
275
298
|
if i[key] not in value_set:
|
276
299
|
new_dict_list.append(i)
|
@@ -285,9 +308,9 @@ def multi_thread_run(_task, data):
|
|
285
308
|
|
286
309
|
|
287
310
|
def del_special_char(sentence):
|
288
|
-
special_chars = [
|
311
|
+
special_chars = ["\ufeff", "\xa0", "\u3000", "\xa0", "\ue627"]
|
289
312
|
for i in special_chars:
|
290
|
-
sentence = sentence.replace(i,
|
313
|
+
sentence = sentence.replace(i, "")
|
291
314
|
return sentence
|
292
315
|
|
293
316
|
|
@@ -303,20 +326,20 @@ def spider(url):
|
|
303
326
|
:param url:
|
304
327
|
:return:
|
305
328
|
"""
|
306
|
-
if
|
329
|
+
if "baijiahao" in url:
|
307
330
|
content = requests.get(url)
|
308
331
|
# print(content.text)
|
309
332
|
html = pq.PyQuery(content.text)
|
310
|
-
title = html(
|
311
|
-
res = html(
|
312
|
-
return
|
333
|
+
title = html(".index-module_articleTitle_28fPT").text()
|
334
|
+
res = html(".index-module_articleWrap_2Zphx").text().rstrip("举报/反馈")
|
335
|
+
return "{}\n{}".format(title, res)
|
313
336
|
|
314
337
|
|
315
338
|
def eda(sentence):
|
316
|
-
url =
|
339
|
+
url = "https://x.x.x.x:x/eda"
|
317
340
|
json_data = dict({"sentence": sentence})
|
318
341
|
res = requests.post(url, json=json_data)
|
319
|
-
return res.json()[
|
342
|
+
return res.json()["eda"]
|
320
343
|
|
321
344
|
|
322
345
|
def find_language(text):
|
@@ -350,8 +373,8 @@ def print_prf(y_true, y_pred, label=None):
|
|
350
373
|
for i in range(len(label)):
|
351
374
|
res = []
|
352
375
|
for k in result:
|
353
|
-
res.append(
|
354
|
-
print(
|
376
|
+
res.append("%.5f" % k[i])
|
377
|
+
print("{}: {} {} {}".format(label[i], *res[:3]))
|
355
378
|
|
356
379
|
|
357
380
|
def print_cpu():
|
@@ -372,14 +395,16 @@ def squeeze_list(high_dim_list):
|
|
372
395
|
|
373
396
|
def unsqueeze_list(flatten_list, each_element_len):
|
374
397
|
# 该函数是错的,被split_list替代了
|
375
|
-
two_dim_list = [
|
376
|
-
|
398
|
+
two_dim_list = [
|
399
|
+
flatten_list[i * each_element_len : (i + 1) * each_element_len]
|
400
|
+
for i in range(len(flatten_list) // each_element_len)
|
401
|
+
]
|
377
402
|
return two_dim_list
|
378
403
|
|
379
404
|
|
380
405
|
def split_list(input_list, chunk_size):
|
381
406
|
# 使用列表推导式将列表分割成二维数组
|
382
|
-
return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
|
407
|
+
return [input_list[i : i + chunk_size] for i in range(0, len(input_list), chunk_size)]
|
383
408
|
|
384
409
|
|
385
410
|
def auto_close():
|
@@ -389,6 +414,7 @@ def auto_close():
|
|
389
414
|
import pyautogui as pg
|
390
415
|
import time
|
391
416
|
import os
|
417
|
+
|
392
418
|
cmd = 'schtasks /create /tn shut /tr "shutdown -s -f" /sc once /st 23:30'
|
393
419
|
os.system(cmd)
|
394
420
|
while 1:
|
@@ -402,10 +428,13 @@ def tf_idf(corpus, save_path):
|
|
402
428
|
vectorizer = CountVectorizer() # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
|
403
429
|
transformer = TfidfTransformer() # 该类会统计每个词语的tf-idf权值
|
404
430
|
tfidf = transformer.fit_transform(
|
405
|
-
vectorizer.fit_transform(corpus)
|
431
|
+
vectorizer.fit_transform(corpus)
|
432
|
+
) # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
|
406
433
|
word = vectorizer.get_feature_names() # 获取词袋模型中的所有词语
|
407
434
|
weight = tfidf.toarray() # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
|
408
|
-
for i in range(
|
435
|
+
for i in range(
|
436
|
+
len(weight)
|
437
|
+
): # 打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
|
409
438
|
for j in range(len(word)):
|
410
439
|
getword = word[j]
|
411
440
|
getvalue = weight[i][j]
|
@@ -415,7 +444,7 @@ def tf_idf(corpus, save_path):
|
|
415
444
|
else:
|
416
445
|
tfidfdict.update({getword: getvalue})
|
417
446
|
sorted_tfidf = sorted(tfidfdict.items(), key=lambda d: d[1], reverse=True)
|
418
|
-
to_write = [
|
447
|
+
to_write = ["{} {}".format(i[0], i[1]) for i in sorted_tfidf]
|
419
448
|
writetxt_w_list(to_write, save_path, num_lf=1)
|
420
449
|
|
421
450
|
|
@@ -424,7 +453,7 @@ class GaussDecay(object):
|
|
424
453
|
当前只实现了时间的,全部使用默认值
|
425
454
|
"""
|
426
455
|
|
427
|
-
def __init__(self, origin=
|
456
|
+
def __init__(self, origin="2022-08-02", scale="90d", offset="5d", decay=0.5, task="time"):
|
428
457
|
self.origin = origin
|
429
458
|
self.task = task
|
430
459
|
self.scale, self.offset = self.translate(scale, offset)
|
@@ -448,7 +477,7 @@ class GaussDecay(object):
|
|
448
477
|
@staticmethod
|
449
478
|
def translated_minus(field_value):
|
450
479
|
origin = datetime.datetime.now()
|
451
|
-
field_value = datetime.datetime.strptime(field_value,
|
480
|
+
field_value = datetime.datetime.strptime(field_value, "%Y-%m-%d %H:%M:%S")
|
452
481
|
return (origin - field_value).days
|
453
482
|
|
454
483
|
def calc_exp(self):
|
@@ -466,13 +495,13 @@ class GaussDecay(object):
|
|
466
495
|
:return:
|
467
496
|
"""
|
468
497
|
numerator = max(0, (abs(self.translated_minus(field_value)) - self.offset)) ** 2
|
469
|
-
sigma_square = -1 * self.scale
|
498
|
+
sigma_square = -1 * self.scale**2 / (2 * math.log(self.decay, math.e))
|
470
499
|
denominator = 2 * sigma_square
|
471
500
|
s = math.exp(-1 * numerator / denominator)
|
472
501
|
return round(self.time_coefficient * s + self.related_coefficient * raw_score, 7)
|
473
502
|
|
474
503
|
|
475
|
-
if __name__ ==
|
504
|
+
if __name__ == "__main__":
|
476
505
|
gauss_decay = GaussDecay()
|
477
506
|
res = gauss_decay.calc_gauss(raw_score=1, field_value="2021-05-29 14:31:13")
|
478
507
|
print(res)
|
nlpertools/utils/package.py
CHANGED
@@ -37,20 +37,19 @@ def lazy_import(importer_name, to_import):
|
|
37
37
|
module = importlib.import_module(importer_name)
|
38
38
|
import_mapping = {}
|
39
39
|
for name in to_import:
|
40
|
-
importing, _, binding = name.partition(
|
40
|
+
importing, _, binding = name.partition(" as ")
|
41
41
|
if not binding:
|
42
|
-
_, _, binding = importing.rpartition(
|
42
|
+
_, _, binding = importing.rpartition(".")
|
43
43
|
import_mapping[binding] = importing
|
44
44
|
|
45
45
|
def __getattr__(name):
|
46
46
|
if name not in import_mapping:
|
47
|
-
message = f
|
47
|
+
message = f"module {importer_name!r} has no attribute {name!r}"
|
48
48
|
raise AttributeError(message)
|
49
49
|
importing = import_mapping[name]
|
50
50
|
# imortlib.import_module() implicitly sets submodules on this module as
|
51
51
|
# appropriate for direct imports.
|
52
|
-
imported = importlib.import_module(importing,
|
53
|
-
module.__spec__.parent)
|
52
|
+
imported = importlib.import_module(importing, module.__spec__.parent)
|
54
53
|
setattr(module, name, imported)
|
55
54
|
return imported
|
56
55
|
|
@@ -75,15 +74,15 @@ KafkaConsumer = try_import("kafka", "KafkaConsumer")
|
|
75
74
|
np = try_import("numpy", None)
|
76
75
|
plt = try_import("matplotlib", "pyplot")
|
77
76
|
WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
|
78
|
-
metrics = try_import("sklearn", "metrics")
|
77
|
+
# metrics = try_import("sklearn", "metrics")
|
79
78
|
requests = try_import("requests", None)
|
80
79
|
pq = try_import("pyquery", None)
|
81
|
-
CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
82
|
-
precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
80
|
+
# CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
|
81
|
+
# precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
|
83
82
|
tqdm = try_import("tqdm", "tqdm")
|
84
83
|
# TODO 自动导出langid和win32evtlogutil输出有bug
|
85
84
|
langid = try_import("langid", None)
|
86
85
|
win32evtlogutil = try_import("win32evtlogutil", None)
|
87
|
-
TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
86
|
+
# TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
|
88
87
|
yaml = try_import("yaml", None)
|
89
|
-
omegaconf = try_import("omegaconf", None)
|
88
|
+
omegaconf = try_import("omegaconf", None)
|
nlpertools/wrapper.py
CHANGED
@@ -7,16 +7,18 @@ import time
|
|
7
7
|
from functools import wraps
|
8
8
|
import asyncio
|
9
9
|
|
10
|
+
|
10
11
|
def fn_async_timer(function):
|
11
12
|
"""
|
12
13
|
针对异步函数的装饰器
|
13
14
|
"""
|
15
|
+
|
14
16
|
@wraps(function)
|
15
17
|
async def function_timer(*args, **kwargs):
|
16
18
|
t0 = time.time()
|
17
19
|
result = await function(*args, **kwargs)
|
18
20
|
t1 = time.time()
|
19
|
-
print(
|
21
|
+
print("[finished {func_name} in {time:.2f}s]".format(func_name=function.__name__, time=t1 - t0))
|
20
22
|
return result
|
21
23
|
|
22
24
|
return function_timer
|
@@ -36,14 +38,14 @@ def fn_timer(async_func=False, analyse=False):
|
|
36
38
|
t0 = time.time()
|
37
39
|
result = await asyncio.create_task(func(*args, **kwargs))
|
38
40
|
t1 = time.time()
|
39
|
-
print(
|
41
|
+
print("[finished {func_name} in {time:.2f}s]".format(func_name=func.__name__, time=t1 - t0))
|
40
42
|
return result
|
41
43
|
|
42
44
|
def func_time(*args, **kwargs):
|
43
45
|
t0 = time.time()
|
44
46
|
result = func(*args, **kwargs)
|
45
47
|
t1 = time.time()
|
46
|
-
print(
|
48
|
+
print("[finished {func_name} in {time:.2f}s]".format(func_name=func.__name__, time=t1 - t0))
|
47
49
|
return result
|
48
50
|
|
49
51
|
def func_time_analyse(*args, **kwargs):
|
@@ -114,7 +116,7 @@ def fn_try(parameter):
|
|
114
116
|
return result
|
115
117
|
except Exception as e:
|
116
118
|
msg = "报错!"
|
117
|
-
print(
|
119
|
+
print("[func_name: {func_name} {msg}]".format(func_name=function.__name__, msg=msg))
|
118
120
|
parameter["msg"] = parameter["msg"].format(str(e))
|
119
121
|
return parameter
|
120
122
|
finally:
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: nlpertools
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.11
|
4
4
|
Summary: A small package about small basic IO operation when coding
|
5
5
|
Home-page: https://github.com/lvzii/nlpertools
|
6
6
|
Author: youshuJi
|
@@ -15,8 +15,10 @@ License-File: LICENSE
|
|
15
15
|
Requires-Dist: numpy
|
16
16
|
Requires-Dist: pandas
|
17
17
|
Requires-Dist: psutil
|
18
|
+
Requires-Dist: openai
|
18
19
|
Provides-Extra: torch
|
19
20
|
Requires-Dist: torch; extra == "torch"
|
21
|
+
Dynamic: license-file
|
20
22
|
Dynamic: provides-extra
|
21
23
|
Dynamic: requires-dist
|
22
24
|
|
@@ -50,6 +52,25 @@ nlpertools
|
|
50
52
|
|
51
53
|
```
|
52
54
|
|
55
|
+
# 最常用/喜欢的功能(使用示例)
|
56
|
+
```python
|
57
|
+
# 读txt, json文件
|
58
|
+
import nlpertools
|
59
|
+
|
60
|
+
txt_data = nlpertools.readtxt_list_all_strip('res.txt')
|
61
|
+
json_data = nlpertools.load_from_json('res.json')
|
62
|
+
```
|
63
|
+
|
64
|
+
```bash
|
65
|
+
## git, 连接github不稳定的时候非常有用
|
66
|
+
ncli git pull
|
67
|
+
## 带有参数时,加上--以避免-u被解析
|
68
|
+
ncli -- git push -u origin main
|
69
|
+
|
70
|
+
# 生成pypi双因素认证的实时密钥(需要提供key)
|
71
|
+
ncli --get_2fa --get_2fa_key your_key
|
72
|
+
```
|
73
|
+
|
53
74
|
# 安装
|
54
75
|
|
55
76
|
Install the latest release version
|
@@ -101,30 +122,7 @@ https://nlpertools.readthedocs.io/en/latest/
|
|
101
122
|
|
102
123
|
一些可能需要配置才能用的函数,写上示例
|
103
124
|
|
104
|
-
## 使用示例
|
105
|
-
|
106
|
-
```python
|
107
|
-
import nlpertools
|
108
|
-
|
109
|
-
a = nlpertools.readtxt_list_all_strip('res.txt')
|
110
|
-
# 或
|
111
|
-
b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
|
112
|
-
```
|
113
125
|
|
114
|
-
```bash
|
115
|
-
# 生成pypi双因素认证的实时密钥(需要提供key)
|
116
|
-
python -m nlpertools.cli --get_2fa --get_2fa_key your_key
|
117
|
-
|
118
|
-
## git
|
119
|
-
python -m nlpertools.cli --git_push
|
120
|
-
python -m nlpertools.cli --git_pull
|
121
|
-
|
122
|
-
# 以下功能被nvitop替代,不推荐使用
|
123
|
-
## 监控gpu显存
|
124
|
-
python -m nlpertools.monitor.gpu
|
125
|
-
## 监控cpu
|
126
|
-
python -m nlpertools.monitor.memory
|
127
|
-
```
|
128
126
|
|
129
127
|
## 一些常用项目
|
130
128
|
|
@@ -132,3 +130,7 @@ nvitop
|
|
132
130
|
|
133
131
|
ydata-profiling
|
134
132
|
|
133
|
+
## 贡献
|
134
|
+
|
135
|
+
https://github.com/bigscience-workshop/data-preparation
|
136
|
+
|
@@ -1,20 +1,19 @@
|
|
1
|
-
nlpertools/__init__.py,sha256=
|
2
|
-
nlpertools/cli.py,sha256=
|
1
|
+
nlpertools/__init__.py,sha256=VnH7GWVSTcV010_kD4VtsOAwIjzhe8prax8Wj17uc20,537
|
2
|
+
nlpertools/cli.py,sha256=uCIUkiBXqTWJaxSQd5MlliGcTfxWzymo1UyQ3z_uhak,3612
|
3
3
|
nlpertools/data_client.py,sha256=esX8lUQrTui4uVkqPfhpHVok7Eq6ywpuemKjLeqoglc,14674
|
4
|
-
nlpertools/dataprocess.py,sha256=v1mobuYN7I3dT6xIKlNOHVtcg31YtjF6FwNPTxeBFFY,23153
|
5
4
|
nlpertools/default_db_config.yml,sha256=E1K9k_xzXVlsf-HJQh8kyHXHYuvTpD12jD4Hfe5rUk8,606
|
6
5
|
nlpertools/get_2fa.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
nlpertools/ml.py,sha256=
|
6
|
+
nlpertools/ml.py,sha256=fjI-WMM1lRnOnRFfTLEGplUx_Uamgr2gfmoAvGlgF7E,18994
|
8
7
|
nlpertools/movie.py,sha256=rkyOnAXdsbWfMSbi1sE1VNRT7f66Hp9BnZsN_58Afmw,897
|
9
8
|
nlpertools/nlpertools_config.yml,sha256=ksXejxFs7pxR47tNAsrN88_4gvq9PCA2ZMO07H-dJXY,26
|
10
9
|
nlpertools/open_api.py,sha256=uyTY00OUlM57Cn0Wm0yZXcIS8vAszy9rKnDMBEWfWJM,1744
|
11
|
-
nlpertools/other.py,sha256=
|
10
|
+
nlpertools/other.py,sha256=LaNZRQ8wWJqZP6Gycq7eThEqcGXIANg7WzT6nh5QiKQ,15262
|
12
11
|
nlpertools/pic.py,sha256=13aaFJh3USGYGs4Y9tAKTvWjmdQR4YDjl3LlIhJheOA,9906
|
13
12
|
nlpertools/plugin.py,sha256=LB7j9GdoQi6TITddH-6EglHlOa0WIHLUT7X5vb_aIZY,1168
|
14
13
|
nlpertools/reminder.py,sha256=wiXwZQmxMck5vY3EvG8_oakP3FAdjGTikAIOiTPUQrs,2977
|
15
14
|
nlpertools/utils_for_nlpertools.py,sha256=SJqjfMc2Vd8ZCqzQiJCkSxjJxEKzvEUgAgbhKPtC6ww,3583
|
16
15
|
nlpertools/vector_index_demo.py,sha256=CSCzXD13bUIo9AG-bjen668H10B02HFU1Kbxakvrs68,2924
|
17
|
-
nlpertools/wrapper.py,sha256=
|
16
|
+
nlpertools/wrapper.py,sha256=8ReHv7LrBGX6wHma8rf_EhFPg0FJNoDjbn4p0O2UHzs,4350
|
18
17
|
nlpertools/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
18
|
nlpertools/algo/ac.py,sha256=4BSiJdG8-S78w_KRqvGOkYjxuTDRiBsskRv-6Doi7oE,422
|
20
19
|
nlpertools/algo/bit_ops.py,sha256=l14-j5VOqrab80CA_uBs1AiAJbzJUJH9dJXc7O9F5d0,501
|
@@ -24,12 +23,19 @@ nlpertools/algo/template.py,sha256=9vsHr4g3jZZ5KVU_2I9i97o8asRXq-8pSaCXIv0sHeM,2
|
|
24
23
|
nlpertools/algo/union.py,sha256=0l7lGZbw1qIfW1z5TE8Oo3tybL1bKIP5rzpa5ZT-vLQ,249
|
25
24
|
nlpertools/data_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
25
|
nlpertools/data_structure/base_structure.py,sha256=gVUvJZ5jsCAswRETTpMwcEjLKoageWiTuCKNEwIWKWk,2641
|
26
|
+
nlpertools/dataprocess/__init__.py,sha256=YPBPsZ8vAoMS6GJ7GlCqj01Cx1q8dDARc_gW-ysORyk,21
|
27
|
+
nlpertools/dataprocess/dedupl.py,sha256=WIBOrM6LfX3txcDa0xF7rqeBIpfqwrDBgepa6bavpt0,289
|
28
|
+
nlpertools/dataprocess/dp_main.py,sha256=iyDsmKzUx5lD8EUNwkWIlTGKVQQDVx8p3pXFv2_kR64,23452
|
27
29
|
nlpertools/draw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
30
|
nlpertools/draw/draw.py,sha256=19dskkr0wrgczxPJnphEszliwYshEh5SjD8Zz07nlk0,2615
|
29
31
|
nlpertools/draw/math_func.py,sha256=0NQ22Dfi9DFG6Bg_hXnCT27w65-dqpOOIgZX7oUIW-Q,881
|
30
32
|
nlpertools/io/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
|
31
|
-
nlpertools/io/dir.py,sha256=
|
32
|
-
nlpertools/io/file.py,sha256=
|
33
|
+
nlpertools/io/dir.py,sha256=jpJuCwLeBInr03iCSUfffmlchWShZ2Cjq38n0D0dILI,3106
|
34
|
+
nlpertools/io/file.py,sha256=NF1xV5iazl86-TDdMQJ-LLrqCnuW29uuFb_NA55YNr4,7274
|
35
|
+
nlpertools/llm/__init__.py,sha256=SdbGjzhu1lCeq55mC0tgsah9yzVxvvNrWMf2z8kDEoQ,71
|
36
|
+
nlpertools/llm/call_llm_once.py,sha256=W0J2Ab8dHnVZ8q_KgfTKbee7NlJnA-ewjsne80ALLXY,1793
|
37
|
+
nlpertools/llm/infer.py,sha256=q7asgwdJwo27d6rdBNQLys_bPEF0g-UNDKjt3S-Ltvs,4133
|
38
|
+
nlpertools/llm/price.py,sha256=8zzEaLrbGiDUbTFSnuBGAduiSfDVXQUk4Oc_lE6eJFw,544
|
33
39
|
nlpertools/monitor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
40
|
nlpertools/monitor/gpu.py,sha256=M59O6i0hlew7AzXZlaVZqbZA5IR93OhBY2WI0-T_HtY,531
|
35
41
|
nlpertools/monitor/memory.py,sha256=9t6q9BC8VVx4o3G4sBCn7IoQRx272zMPjSnL3yvTBAQ,657
|
@@ -37,13 +43,13 @@ nlpertools/template/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
37
43
|
nlpertools/utils/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
|
38
44
|
nlpertools/utils/lazy.py,sha256=SAeHLjxsYhpKWvcQKjs2eY0Nn5n3CJlqtxOLVOe1WjQ,29280
|
39
45
|
nlpertools/utils/log_util.py,sha256=ftJDoTOtroLH-LadOygZljeyltOQn0D2Xb5x7Td1Qdg,428
|
40
|
-
nlpertools/utils/package.py,sha256=
|
46
|
+
nlpertools/utils/package.py,sha256=8TLbrD3nmukpJw9lSpHHbUYK74qyAaSM_jUrCJOG6mo,3227
|
41
47
|
nlpertools/utils/package_v1.py,sha256=sqgFb-zbTdMd5ziJLY6YUPqR49qUNZjxBH35DnyR5Wg,3542
|
42
48
|
nlpertools/utils/package_v2.py,sha256=WOcsguWfUd4XSAfmPgCtL8HtUbqJ6GRSMHb0OsB47r0,3932
|
49
|
+
nlpertools-1.0.11.dist-info/licenses/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
|
43
50
|
nlpertools_helper/__init__.py,sha256=obxRUdZDctvcvK_iA1Dx2HmQFMlMzJto-xDPryq1lJ0,198
|
44
|
-
nlpertools-1.0.
|
45
|
-
nlpertools-1.0.
|
46
|
-
nlpertools-1.0.
|
47
|
-
nlpertools-1.0.
|
48
|
-
nlpertools-1.0.
|
49
|
-
nlpertools-1.0.9.dist-info/RECORD,,
|
51
|
+
nlpertools-1.0.11.dist-info/METADATA,sha256=3KXxqbO2wWDMXLmnZJm2RvETybvIMekPelhSxE_ovKk,3386
|
52
|
+
nlpertools-1.0.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
53
|
+
nlpertools-1.0.11.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
|
54
|
+
nlpertools-1.0.11.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
|
55
|
+
nlpertools-1.0.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|