nlpertools 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nlpertools/__init__.py CHANGED
@@ -4,6 +4,7 @@
4
4
  from .algo.kmp import *
5
5
  from .data_structure.base_structure import *
6
6
  from .draw import *
7
+ from .dataprocess.dp_main import *
7
8
  from .dataprocess import *
8
9
  from .io.dir import *
9
10
  from .io.file import *
@@ -20,4 +21,4 @@ from .cli import *
20
21
  from .llm import *
21
22
 
22
23
 
23
- __version__ = "1.0.10"
24
+ __version__ = "1.0.11"
nlpertools/cli.py CHANGED
@@ -2,7 +2,7 @@ import argparse
2
2
  import os
3
3
  import uuid
4
4
  import sys
5
- from .dataprocess import startwith
5
+ from .dataprocess.dp_main import startwith
6
6
 
7
7
 
8
8
  def run_git_command(command):
@@ -17,7 +17,7 @@ def run_git_command(command):
17
17
  info = os.system(command)
18
18
  print(str(info))
19
19
  # 检查命令执行结果,若未出现错误则认为执行成功
20
- if not startwith(str(info), ["fatal", "error", "128", "1"]):
20
+ if (not startwith(str(info), ["fatal", "error", "128", "1"])) and "fatal" not in str(info):
21
21
  print("success")
22
22
  print(f"success info : ##{info}##")
23
23
  break
@@ -25,7 +25,7 @@ def run_git_command(command):
25
25
 
26
26
  def get_mac_address():
27
27
  mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
28
- mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
28
+ mac_address = ":".join([mac[e : e + 2] for e in range(0, 11, 2)])
29
29
  print("mac address 不一定准确")
30
30
  print(mac_address)
31
31
  return mac_address
@@ -33,6 +33,7 @@ def get_mac_address():
33
33
 
34
34
  def get_2af_value(key):
35
35
  import pyotp
36
+
36
37
  """
37
38
  key应该是7位的
38
39
  """
@@ -65,15 +66,11 @@ def start_gpu_usage_notify_client():
65
66
  from plyer import notification
66
67
  import time
67
68
 
68
- SERVER_URL = 'http://127.0.0.1:5000/notify' # 服务器的 API 地址
69
+ SERVER_URL = "http://127.0.0.1:5000/notify" # 服务器的 API 地址
69
70
 
70
71
  def notify(text):
71
72
  # 使用 plyer 发送通知
72
- notification.notify(
73
- title='远程通知',
74
- message=text,
75
- timeout=10 # 10秒的通知显示时间
76
- )
73
+ notification.notify(title="远程通知", message=text, timeout=10) # 10秒的通知显示时间
77
74
 
78
75
  """定时轮询服务器获取通知"""
79
76
  while True:
@@ -94,12 +91,12 @@ def start_gpu_usage_notify_client():
94
91
 
95
92
  def main():
96
93
  parser = argparse.ArgumentParser(description="CLI tool for git operations and other functions.")
97
- parser.add_argument('git_command', nargs='*', help='Any git command (e.g., push, pull)')
98
- parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
99
- parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
100
- parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
101
- parser.add_argument('--monitor_gpu_cli', action='store_true', help='monitor gpu cli')
102
- parser.add_argument('--monitor_gpu_ser', action='store_true', help='monitor gpu ser')
94
+ parser.add_argument("git_command", nargs="*", help="Any git command (e.g., push, pull)")
95
+ parser.add_argument("--mac_address", action="store_true", help="Get the MAC address.")
96
+ parser.add_argument("--get_2fa", action="store_true", help="Get the 2fa value.")
97
+ parser.add_argument("--get_2fa_key", type=str, help="Get the 2fa value.")
98
+ parser.add_argument("--monitor_gpu_cli", action="store_true", help="monitor gpu cli")
99
+ parser.add_argument("--monitor_gpu_ser", action="store_true", help="monitor gpu ser")
103
100
 
104
101
  args = parser.parse_args()
105
102
 
@@ -121,5 +118,5 @@ def main():
121
118
  print("No operation specified.")
122
119
 
123
120
 
124
- if __name__ == '__main__':
125
- main()
121
+ if __name__ == "__main__":
122
+ main()
@@ -0,0 +1 @@
1
+ from .dedupl import *
@@ -0,0 +1,9 @@
1
+ # 根据字段对一个元素为dict的list去重
2
+ def deduplicate_dict_list(dict_list: list, key: str) -> list:
3
+ seen = set()
4
+ result = []
5
+ for d in dict_list:
6
+ if key in d and d[key] not in seen:
7
+ seen.add(d[key])
8
+ result.append(d)
9
+ return result
@@ -8,7 +8,7 @@ from typing import List
8
8
  import numpy as np
9
9
 
10
10
  # from . import DB_CONFIG_FILE # cannot import name 'DB_CONFIG_FILE' from partially initialized module 'nlpertools'
11
- from .utils.package import *
11
+ from ..utils.package import *
12
12
 
13
13
  main_special_characters = string.punctuation + string.digits + string.whitespace
14
14
  other_special_characters = (
nlpertools/io/dir.py CHANGED
@@ -3,6 +3,7 @@
3
3
  # @Author : youshu.Ji
4
4
  import os
5
5
  from pathlib import Path
6
+ from typing import overload,Literal,Union
6
7
 
7
8
 
8
9
  # dir ----------------------------------------------------------------------
@@ -45,15 +46,34 @@ def get_filename(path, suffix=True) -> str:
45
46
  filename = filename.split('.')[0]
46
47
  return filename
47
48
 
48
-
49
- def listdir(dir_name, including_dir=True):
50
- filenames = os.listdir(dir_name)
49
+ """
50
+ 因为os.listdir无法支持Path类型,虽然是bytelikepath,但是传入Path后只会返回字符串
51
+ 且无法只返回文件名
52
+ 故重新实现
53
+ """
54
+ @overload
55
+ def listdir(dir_name: Path, including_dir: Literal[True]) -> list[Path]: ...
56
+ @overload
57
+ def listdir(dir_name: str, including_dir: Literal[True]) -> list[str]: ...
58
+ @overload
59
+ def listdir(dir_name: Path, including_dir: Literal[False] = False) -> list[str]: ...
60
+ @overload
61
+ def listdir(dir_name: str, including_dir: Literal[False] = False) -> list[str]: ...
62
+
63
+ def listdir(dir_name: Union[Path, str], including_dir: bool = False) -> list[Path] | list[str]:
64
+ """
65
+ including_dir=True -> list[Path] or list[str]
66
+ including_dir=False -> list[str]
67
+ """
68
+ filenames = os.listdir(str(dir_name))
51
69
  if including_dir:
52
- return [os.path.join(dir_name, filename) for filename in filenames]
70
+ if isinstance(dir_name, Path):
71
+ return [dir_name / filename for filename in filenames]
72
+ else:
73
+ return [os.path.join(dir_name, filename) for filename in filenames]
53
74
  else:
54
75
  return list(filenames)
55
76
 
56
-
57
77
  def listdir_yield(dir_name, including_dir=True):
58
78
  filenames = os.listdir(dir_name)
59
79
  for filename in filenames:
nlpertools/io/file.py CHANGED
@@ -5,8 +5,11 @@ import codecs
5
5
  import json
6
6
  import pickle
7
7
  import random
8
- from itertools import (takewhile, repeat)
8
+ from itertools import takewhile, repeat
9
+ from typing import Optional
10
+ from pathlib import Path
9
11
  import pandas as pd
12
+
10
13
  # import omegaconf
11
14
  # import yaml
12
15
  from ..utils.package import *
@@ -15,18 +18,18 @@ LARGE_FILE_THRESHOLD = 1e5
15
18
 
16
19
 
17
20
  def safe_filename(filename: str) -> str:
18
- for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|']:
19
- filename = filename.replace(char, '_')
21
+ for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
22
+ filename = filename.replace(char, "_")
20
23
  return filename
21
24
 
22
25
 
23
26
  def read_yaml(path, omega=False):
24
27
  if omega:
25
28
  return omegaconf.OmegaConf.load(path)
26
- return yaml.load(codecs.open(path, encoding='utf-8'), Loader=yaml.FullLoader)
29
+ return yaml.load(codecs.open(path, encoding="utf-8"), Loader=yaml.FullLoader)
27
30
 
28
31
 
29
- def _merge_file(filelist, save_filename, shuffle=False):
32
+ def merge_file(filelist, save_filename, shuffle=False):
30
33
  contents = []
31
34
  for file in filelist:
32
35
  content = readtxt_list_all_strip(file)
@@ -43,9 +46,9 @@ def iter_count(file_name):
43
46
  author: unknown
44
47
  """
45
48
  buffer = 1024 * 1024
46
- with codecs.open(file_name, 'r', 'utf-8') as f:
49
+ with codecs.open(file_name, "r", "utf-8") as f:
47
50
  buf_gen = takewhile(lambda x: x, (f.read(buffer) for _ in repeat(None)))
48
- return sum(buf.count('\n') for buf in buf_gen)
51
+ return sum(buf.count("\n") for buf in buf_gen)
49
52
 
50
53
 
51
54
  # 需要加入进度条的函数包括
@@ -57,24 +60,24 @@ load_from_json
57
60
 
58
61
 
59
62
  # 读txt文件 一次全读完 返回list 去换行
60
- def readtxt_list_all_strip(path, encoding='utf-8') -> list:
63
+ def readtxt_list_all_strip(path, encoding="utf-8") -> list:
61
64
  file_line_num = iter_count(path)
62
65
  lines = []
63
- with codecs.open(path, 'r', encoding) as r:
66
+ with codecs.open(path, "r", encoding) as r:
64
67
  if file_line_num > LARGE_FILE_THRESHOLD:
65
68
  iter_obj = tqdm(enumerate(r.readlines()), total=file_line_num)
66
69
  else:
67
70
  iter_obj = enumerate(r.readlines())
68
71
 
69
72
  for ldx, line in iter_obj:
70
- lines.append(line.strip('\n').strip("\r"))
73
+ lines.append(line.strip("\n").strip("\r"))
71
74
  return lines
72
75
 
73
76
 
74
77
  # 读txt 一次读一行 最后返回list
75
78
  def readtxt_list_each(path) -> list:
76
79
  lines = []
77
- with codecs.open(path, 'r', 'utf-8') as r:
80
+ with codecs.open(path, "r", "utf-8") as r:
78
81
  line = r.readline()
79
82
  while line:
80
83
  lines.append(line)
@@ -82,11 +85,11 @@ def readtxt_list_each(path) -> list:
82
85
  return lines
83
86
 
84
87
 
85
- def readtxt_list_each_strip(path) -> list:
88
+ def readtxt_list_each_strip(path: Optional[str | Path]):
86
89
  """
87
90
  yield方法
88
91
  """
89
- with codecs.open(path, 'r', 'utf-8') as r:
92
+ with codecs.open(path, "r", "utf-8") as r:
90
93
  line = r.readline()
91
94
  while line:
92
95
  yield line.strip("\n").strip("\r")
@@ -95,51 +98,51 @@ def readtxt_list_each_strip(path) -> list:
95
98
 
96
99
  # 读txt文件 一次全读完 返回list
97
100
  def readtxt_list_all(path) -> list:
98
- with codecs.open(path, 'r', 'utf-8') as r:
101
+ with codecs.open(path, "r", "utf-8") as r:
99
102
  lines = r.readlines()
100
103
  return lines
101
104
 
102
105
 
103
106
  # 读byte文件 读成一条string
104
107
  def readtxt_byte(path, encoding="utf-8") -> str:
105
- with codecs.open(path, 'rb') as r:
108
+ with codecs.open(path, "rb") as r:
106
109
  lines = r.read()
107
110
  lines = lines.decode(encoding)
108
- return lines.replace('\r', '')
111
+ return lines.replace("\r", "")
109
112
 
110
113
 
111
114
  # 读txt文件 读成一条string
112
- def readtxt_string(path, encoding="utf-8") -> str:
113
- with codecs.open(path, 'r', encoding) as r:
115
+ def read_text(path, encoding="utf-8") -> str:
116
+ with codecs.open(path, "r", encoding) as r:
114
117
  lines = r.read()
115
- return lines.replace('\r', '')
118
+ return lines.replace("\r", "")
116
119
 
117
120
 
118
121
  # 写txt文件覆盖
119
- def writetxt_w(txt, path, r='w'):
120
- with codecs.open(path, r, 'utf-8') as w:
122
+ def writetxt_w(txt, path, r="w"):
123
+ with codecs.open(path, r, "utf-8") as w:
121
124
  w.writelines(txt)
122
125
 
123
126
 
124
127
  # 写txt文件追加
125
128
  def writetxt_a(txt, path):
126
- with codecs.open(path, 'a', 'utf-8') as w:
129
+ with codecs.open(path, "a", "utf-8") as w:
127
130
  w.writelines(txt)
128
131
 
129
132
 
130
133
  def writetxt(txt, path, encoding="utf-8"):
131
- with codecs.open(path, 'w', encoding) as w:
134
+ with codecs.open(path, "w", encoding) as w:
132
135
  w.write(txt)
133
136
 
134
137
 
135
138
  def writetxt_wb(txt, path):
136
- with codecs.open(path, 'wb') as w:
139
+ with codecs.open(path, "wb") as w:
137
140
  w.write(txt)
138
141
 
139
142
 
140
143
  # 写list 覆盖
141
144
  def writetxt_w_list(list, path, num_lf=1):
142
- with codecs.open(path, 'w', "utf-8") as w:
145
+ with codecs.open(path, "w", "utf-8") as w:
143
146
  for i in list:
144
147
  w.write(i)
145
148
  w.write("\n" * num_lf)
@@ -147,7 +150,7 @@ def writetxt_w_list(list, path, num_lf=1):
147
150
 
148
151
  # 写list 追加
149
152
  def writetxt_a_list(list, path, num_lf=2):
150
- with codecs.open(path, 'a', "utf-8") as w:
153
+ with codecs.open(path, "a", "utf-8") as w:
151
154
  for i in list:
152
155
  w.write(i)
153
156
  w.write("\n" * num_lf)
@@ -158,7 +161,7 @@ def save_to_json(content, path):
158
161
  json.dump(content, w, ensure_ascii=False, indent=1)
159
162
 
160
163
 
161
- def load_from_json(path):
164
+ def load_from_json(path: Optional[str | Path]):
162
165
  with codecs.open(path, "r", "utf-8") as r:
163
166
  content = json.load(r)
164
167
  return content
@@ -167,60 +170,60 @@ def load_from_json(path):
167
170
  # 读txt文件 读成一条string if gb2312
168
171
  def readtxt_string_all_encoding(path):
169
172
  try:
170
- with codecs.open(path, 'rb', "utf-8-sig") as r:
173
+ with codecs.open(path, "rb", "utf-8-sig") as r:
171
174
  lines = r.read()
172
175
  return lines
173
176
  except:
174
177
  try:
175
- with codecs.open(path, 'rb', "utf-8") as r:
178
+ with codecs.open(path, "rb", "utf-8") as r:
176
179
  lines = r.reacd()
177
180
  return lines
178
181
  except:
179
182
  try:
180
- with codecs.open(path, 'rb', "big5") as r:
183
+ with codecs.open(path, "rb", "big5") as r:
181
184
  lines = r.read()
182
185
  return lines
183
186
  except:
184
187
  print(path)
185
- with codecs.open(path, 'rb', "gb2312", errors='ignore') as r:
188
+ with codecs.open(path, "rb", "gb2312", errors="ignore") as r:
186
189
  lines = r.read()
187
190
  return lines
188
191
 
189
192
 
190
193
  def readtxt_list_all_encoding(path):
191
194
  try:
192
- with codecs.open(path, 'rb', "utf-8-sig") as r:
195
+ with codecs.open(path, "rb", "utf-8-sig") as r:
193
196
  lines = r.readlines()
194
197
  return lines
195
198
  except:
196
199
  try:
197
- with codecs.open(path, 'rb', "utf-8") as r:
200
+ with codecs.open(path, "rb", "utf-8") as r:
198
201
  lines = r.readlines()
199
202
  return lines
200
203
  except:
201
204
  try:
202
- with codecs.open(path, 'rb', "big5") as r:
205
+ with codecs.open(path, "rb", "big5") as r:
203
206
  lines = r.readlines()
204
207
  return lines
205
208
  except:
206
- with codecs.open(path, 'rb', "gb2312", errors='ignore') as r:
209
+ with codecs.open(path, "rb", "gb2312", errors="ignore") as r:
207
210
  lines = r.readlines()
208
211
  return lines
209
212
 
210
213
 
211
214
  # line by line
212
215
  def save_to_jsonl(corpus, path):
213
- with open(path, 'w', encoding='utf-8') as wt:
216
+ with open(path, "w", encoding="utf-8") as wt:
214
217
  for i in corpus:
215
218
  wt.write(json.dumps(i, ensure_ascii=False))
216
- wt.write('\n')
219
+ wt.write("\n")
217
220
 
218
221
 
219
222
  # line by line
220
223
  def load_from_jsonl(path):
221
224
  file_line_num = iter_count(path)
222
225
  if file_line_num > 1e5:
223
- with open(path, 'r', encoding='utf-8') as rd:
226
+ with open(path, "r", encoding="utf-8") as rd:
224
227
  corpus = []
225
228
  while True:
226
229
  line = rd.readline()
@@ -230,7 +233,7 @@ def load_from_jsonl(path):
230
233
  break
231
234
  return corpus
232
235
  else:
233
- with open(path, 'r', encoding='utf-8') as rd:
236
+ with open(path, "r", encoding="utf-8") as rd:
234
237
  corpus = []
235
238
  while True:
236
239
  line = rd.readline()
@@ -242,20 +245,20 @@ def load_from_jsonl(path):
242
245
 
243
246
 
244
247
  def save_pkl(data, path):
245
- with open(path, 'wb') as f:
248
+ with open(path, "wb") as f:
246
249
  pickle.dump(data, f)
247
250
 
248
251
 
249
252
  def load_pkl(path):
250
- with open(path, 'rb') as f:
253
+ with open(path, "rb") as f:
251
254
  data = pickle.load(f)
252
255
  return data
253
256
 
254
257
 
255
258
  def save_to_csv(df, save_path, index_flag=False):
256
- with open(save_path, 'wb+') as csvfile:
259
+ with open(save_path, "wb+") as csvfile:
257
260
  csvfile.write(codecs.BOM_UTF8)
258
- df.to_csv(save_path, mode='a', index=index_flag)
261
+ df.to_csv(save_path, mode="a", index=index_flag)
259
262
 
260
263
 
261
264
  def save_to_mongo():
@@ -0,0 +1,3 @@
1
+ from .call_llm_once import *
2
+ from .infer import *
3
+ from .price import *
@@ -1,7 +1,6 @@
1
- from ..io.file import readtxt_string, read_yaml
1
+ from ..io.file import read_yaml
2
2
  from tqdm import tqdm
3
3
  import os
4
- from openai import Openai
5
4
  from typing import Optional, Union
6
5
 
7
6
  """
@@ -9,8 +8,38 @@ from typing import Optional, Union
9
8
  """
10
9
 
11
10
 
11
+ def call_once_stream(
12
+ client, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192, temperature=0.2
13
+ ) -> str:
14
+ """
15
+ 调用LLM模型进行一次推理
16
+ :param prompt: 输入的提示文本
17
+ :param model_name: 模型名称
18
+ :param max_tokens: 最大输出token数
19
+ :return: 模型的输出文本
20
+ """
21
+ from openai import OpenAI
22
+
23
+ if isinstance(input, str):
24
+ message = [{"role": "user", "content": input}]
25
+ elif isinstance(input, list):
26
+ message = input
27
+
28
+ completion = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens, stream=True)
29
+ text = ""
30
+ for chunk in completion:
31
+ if chunk.choices:
32
+ c = chunk.choices[0].delta.content or ""
33
+ text += c
34
+ print(c, end="")
35
+ else:
36
+ print()
37
+ print(chunk.usage)
38
+ return text
39
+
40
+
12
41
  def call_once(
13
- client: Openai, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192
42
+ client, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192, temperature=0.8
14
43
  ) -> str:
15
44
  """
16
45
  调用LLM模型进行一次推理
@@ -19,12 +48,13 @@ def call_once(
19
48
  :param max_tokens: 最大输出token数
20
49
  :return: 模型的输出文本
21
50
  """
51
+ from openai import OpenAI
22
52
 
23
53
  if isinstance(input, str):
24
54
  message = [{"role": "user", "content": input}]
25
55
  elif isinstance(input, list):
26
56
  message = input
27
57
 
28
- response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens)
58
+ response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens,temperature=temperature)
29
59
 
30
60
  return response.choices[0].message.content
nlpertools/llm/infer.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  from tqdm import tqdm
3
- from openai import OpenAI
4
3
  import concurrent.futures
4
+ import itertools
5
5
 
6
6
 
7
7
  INFER_PARAS = {
@@ -15,14 +15,17 @@ INFER_PARAS = {
15
15
 
16
16
 
17
17
  def parse_infer_data(infer_data: list):
18
+ # 解释一下为什么要[][],因为message本来就必须得是[]
18
19
  if isinstance(infer_data[0], str):
19
- message = [{"role": "user", "content": i} for i in infer_data]
20
+ message = [[{"role": "user", "content": i}] for i in infer_data]
20
21
  elif isinstance(infer_data[0], list):
21
22
  message = infer_data
22
23
  return message
23
24
 
24
25
 
25
- def common_api_infer_func(model_name, infer_data: list, infer_paras, client: OpenAI):
26
+ def common_api_infer_func(model_name, infer_data: list, infer_paras, client):
27
+ from openai import OpenAI
28
+
26
29
  """
27
30
  infer_data: list of messages/prompt
28
31
  """
@@ -31,16 +34,58 @@ def common_api_infer_func(model_name, infer_data: list, infer_paras, client: Ope
31
34
  def get_response(model_name, messages, infer_paras):
32
35
  responses = []
33
36
  infer_times = infer_paras.get("infer_times", 1)
37
+
34
38
  for _ in range(infer_times):
35
39
  # 使用OpenAI API进行推理
36
- response = client.chat.completions.create(model=model_name, messages=messages, **infer_paras)
40
+ response = client.chat.completions.create(
41
+ model=model_name,
42
+ messages=messages,
43
+ temperature=infer_paras.get("temperature", 0.7),
44
+ max_tokens=infer_paras.get("max_tokens", 8192),
45
+ )
37
46
  text = response.choices[0].message.content
38
47
  responses.append({"text": text})
39
48
  return responses
40
49
 
41
50
  with concurrent.futures.ThreadPoolExecutor(16) as executor:
42
51
  futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
43
- results = [future.result() for future in concurrent.futures.as_completed(futures)]
52
+ # results = [future.result() for future in tqdm(concurrent.futures.as_completed(futures))] # 乱序
53
+ results = [future.result() for future in tqdm(futures)]
54
+
55
+ return results
56
+
57
+
58
+ def common_api_infer_func_multi_client(model_name, infer_data: list, infer_paras, clients: list):
59
+ """
60
+ infer_data: list of messages/prompt
61
+ """
62
+ messages = parse_infer_data(infer_data)
63
+ iter_cycle = itertools.cycle(clients)
64
+
65
+ def get_response(model_name, messages, infer_paras):
66
+ client = next(iter_cycle)
67
+ # print(client.base_url)
68
+ responses = []
69
+ infer_times = infer_paras.get("infer_times", 1)
70
+ for _ in range(infer_times):
71
+ # 使用OpenAI API进行推理
72
+ try:
73
+ response = client.chat.completions.create(
74
+ model=model_name,
75
+ messages=messages,
76
+ temperature=infer_paras.get("temperature", 0.7),
77
+ max_tokens=infer_paras.get("max_tokens", 8192),
78
+ )
79
+ text = response.choices[0].message.content
80
+ except Exception as e:
81
+ print(e.__str__())
82
+ text = ""
83
+ responses.append({"text": text})
84
+ return responses
85
+
86
+ with concurrent.futures.ThreadPoolExecutor(128) as executor:
87
+ futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
88
+ results = [future.result() for future in tqdm(futures)]
44
89
 
45
90
  return results
46
91
 
nlpertools/other.py CHANGED
@@ -3,7 +3,6 @@
3
3
  # @Author : youshu.Ji
4
4
  import itertools
5
5
  import os
6
- import re
7
6
  import string
8
7
  import subprocess
9
8
  import threading
@@ -13,7 +12,10 @@ import math
13
12
  import datetime
14
13
  import difflib
15
14
  import psutil
15
+ import sys
16
+
16
17
  from .io.file import writetxt_w_list, writetxt_a
18
+
17
19
  # import numpy as np
18
20
  # import psutil
19
21
  # import pyquery as pq
@@ -25,9 +27,9 @@ from .io.file import writetxt_w_list, writetxt_a
25
27
  # from win32evtlogutil import langid
26
28
  from .utils.package import *
27
29
 
28
- CHINESE_PUNCTUATION = list(',。;:‘’“”!?《》「」【】<>()、')
29
- ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
30
- OTHER_PUNCTUATION = list('!@#$%^&*')
30
+ CHINESE_PUNCTUATION = list(",。;:‘’“”!?《》「」【】<>()、")
31
+ ENGLISH_PUNCTUATION = list(",.;:'\"!?<>()")
32
+ OTHER_PUNCTUATION = list("!@#$%^&*")
31
33
 
32
34
 
33
35
  def setup_logging(log_file):
@@ -40,11 +42,23 @@ def setup_logging(log_file):
40
42
  logging.basicConfig(
41
43
  filename=log_file,
42
44
  level=logging.INFO,
43
- format='%(asctime)s - %(levelname)s - %(message)s',
44
- datefmt='%Y-%m-%d %H:%M:%S'
45
+ format="%(asctime)s - %(levelname)s - %(message)s",
46
+ datefmt="%Y-%m-%d %H:%M:%S",
45
47
  )
46
48
 
47
49
 
50
+ def stop():
51
+ sys.exit()
52
+
53
+
54
+ def exit():
55
+ sys.exit()
56
+
57
+
58
+ def round2(num):
59
+ return round(num * 100, 2)
60
+
61
+
48
62
  def get_diff_parts(str1, str2):
49
63
  # 创建一个 SequenceMatcher 对象
50
64
  matcher = difflib.SequenceMatcher(None, str1, str2)
@@ -52,7 +66,7 @@ def get_diff_parts(str1, str2):
52
66
  # 获取差异部分
53
67
  diff_parts = []
54
68
  for tag, i1, i2, j1, j2 in matcher.get_opcodes():
55
- if tag == 'replace' or tag == 'delete' or tag == 'insert':
69
+ if tag == "replace" or tag == "delete" or tag == "insert":
56
70
  diff_parts.append((tag, str1[i1:i2], str2[j1:j2]))
57
71
 
58
72
  return diff_parts
@@ -62,8 +76,9 @@ def run_cmd_with_timeout(cmd, timeout):
62
76
  """
63
77
  https://juejin.cn/post/7391703459803086848
64
78
  """
65
- process = subprocess.Popen(cmd, shell=True, encoding="utf-8", errors="ignore", stdout=subprocess.PIPE,
66
- stderr=subprocess.PIPE)
79
+ process = subprocess.Popen(
80
+ cmd, shell=True, encoding="utf-8", errors="ignore", stdout=subprocess.PIPE, stderr=subprocess.PIPE
81
+ )
67
82
  res = [None]
68
83
 
69
84
  def target():
@@ -144,8 +159,11 @@ def print_three_line_table(df):
144
159
  border-bottom: (third_line_px)px solid black;
145
160
  }
146
161
  </style>"""
147
- style = style.replace("(first_line_px)", first_line_px).replace("(second_line_px)", second_line_px).replace(
148
- "(third_line_px)", third_line_px)
162
+ style = (
163
+ style.replace("(first_line_px)", first_line_px)
164
+ .replace("(second_line_px)", second_line_px)
165
+ .replace("(third_line_px)", third_line_px)
166
+ )
149
167
  # 将 CSS 样式和 HTML 表格结合起来
150
168
  html = f"{style}{html_table}"
151
169
  print(html)
@@ -153,7 +171,7 @@ def print_three_line_table(df):
153
171
  # 将 HTML 保存到文件中
154
172
  with open(temp_file_path, "w") as f:
155
173
  f.write(html)
156
- webbrowser.open('file://' + os.path.realpath(temp_file_path))
174
+ webbrowser.open("file://" + os.path.realpath(temp_file_path))
157
175
 
158
176
 
159
177
  def jprint(obj, depth=0):
@@ -178,6 +196,7 @@ def print_split(sign="=", num=20, char: str = None):
178
196
 
179
197
  def seed_everything():
180
198
  import torch
199
+
181
200
  # seed everything
182
201
  seed = 7777777
183
202
  np.random.seed(seed)
@@ -192,22 +211,23 @@ def sent_email(mail_user, mail_pass, receiver, title, content, attach_path=None)
192
211
  from email.mime.text import MIMEText
193
212
  from email.mime.application import MIMEApplication
194
213
 
195
- mail_host = 'smtp.qq.com'
214
+ mail_host = "smtp.qq.com"
196
215
  mail_user = mail_user
197
216
  mail_pass = mail_pass
198
217
  sender = mail_user
199
218
 
200
219
  message = MIMEMultipart()
201
- message.attach(MIMEText(content, 'plain', 'utf-8'))
220
+ message.attach(MIMEText(content, "plain", "utf-8"))
202
221
  if attach_path:
203
- attachment = MIMEApplication(open(attach_path, 'rb').read())
204
- attachment["Content-Type"] = 'application/octet-stream'
205
- attachment.add_header('Content-Dispositon', 'attachment',
206
- filename=('utf-8', '', attach_path)) # 注意:此处basename要转换为gbk编码,否则中文会有乱码。
222
+ attachment = MIMEApplication(open(attach_path, "rb").read())
223
+ attachment["Content-Type"] = "application/octet-stream"
224
+ attachment.add_header(
225
+ "Content-Dispositon", "attachment", filename=("utf-8", "", attach_path)
226
+ ) # 注意:此处basename要转换为gbk编码,否则中文会有乱码。
207
227
  message.attach(attachment)
208
- message['Subject'] = title
209
- message['From'] = sender
210
- message['To'] = receiver
228
+ message["Subject"] = title
229
+ message["From"] = sender
230
+ message["To"] = receiver
211
231
 
212
232
  try:
213
233
  smtp_obj = smtplib.SMTP()
@@ -215,9 +235,9 @@ def sent_email(mail_user, mail_pass, receiver, title, content, attach_path=None)
215
235
  smtp_obj.login(mail_user, mail_pass)
216
236
  smtp_obj.sendmail(sender, receiver, message.as_string())
217
237
  smtp_obj.quit()
218
- print('send email success')
238
+ print("send email success")
219
239
  except smtplib.SMTPException as e:
220
- print('send failed', e)
240
+ print("send failed", e)
221
241
 
222
242
 
223
243
  def convert_np_to_py(obj):
@@ -247,12 +267,12 @@ def camel_to_snake(s: str) -> str:
247
267
  :param s: camel case variable
248
268
  :return:
249
269
  """
250
- return reduce(lambda x, y: x + ('_' if y.isupper() else '') + y, s).lower()
270
+ return reduce(lambda x, y: x + ("_" if y.isupper() else "") + y, s).lower()
251
271
 
252
272
 
253
273
  # other ----------------------------------------------------------------------
254
274
  # 统计词频
255
- def calc_word_count(list_word, mode, path='tempcount.txt', sort_id=1, is_reverse=True):
275
+ def calc_word_count(list_word, mode, path="tempcount.txt", sort_id=1, is_reverse=True):
256
276
  word_count = {}
257
277
  for key in list_word:
258
278
  if key not in word_count:
@@ -260,20 +280,20 @@ def calc_word_count(list_word, mode, path='tempcount.txt', sort_id=1, is_reverse
260
280
  else:
261
281
  word_count[key] += 1
262
282
  word_dict_sort = sorted(word_count.items(), key=lambda x: x[sort_id], reverse=is_reverse)
263
- if mode == 'w':
283
+ if mode == "w":
264
284
  for key in word_dict_sort:
265
- writetxt_a(str(key[0]) + '\t' + str(key[1]) + '\n', path)
266
- elif mode == 'p':
285
+ writetxt_a(str(key[0]) + "\t" + str(key[1]) + "\n", path)
286
+ elif mode == "p":
267
287
  for key in word_dict_sort:
268
- print(str(key[0]) + '\t' + str(key[1]))
269
- elif mode == 'u':
288
+ print(str(key[0]) + "\t" + str(key[1]))
289
+ elif mode == "u":
270
290
  return word_dict_sort
271
291
 
272
292
 
273
293
  # 字典去重
274
294
  def dupl_dict(dict_list, key):
275
295
  new_dict_list, value_set = [], []
276
- print('去重中...')
296
+ print("去重中...")
277
297
  for i in tqdm(dict_list):
278
298
  if i[key] not in value_set:
279
299
  new_dict_list.append(i)
@@ -288,9 +308,9 @@ def multi_thread_run(_task, data):
288
308
 
289
309
 
290
310
  def del_special_char(sentence):
291
- special_chars = ['\ufeff', '\xa0', '\u3000', '\xa0', '\ue627']
311
+ special_chars = ["\ufeff", "\xa0", "\u3000", "\xa0", "\ue627"]
292
312
  for i in special_chars:
293
- sentence = sentence.replace(i, '')
313
+ sentence = sentence.replace(i, "")
294
314
  return sentence
295
315
 
296
316
 
@@ -306,20 +326,20 @@ def spider(url):
306
326
  :param url:
307
327
  :return:
308
328
  """
309
- if 'baijiahao' in url:
329
+ if "baijiahao" in url:
310
330
  content = requests.get(url)
311
331
  # print(content.text)
312
332
  html = pq.PyQuery(content.text)
313
- title = html('.index-module_articleTitle_28fPT').text()
314
- res = html('.index-module_articleWrap_2Zphx').text().rstrip('举报/反馈')
315
- return '{}\n{}'.format(title, res)
333
+ title = html(".index-module_articleTitle_28fPT").text()
334
+ res = html(".index-module_articleWrap_2Zphx").text().rstrip("举报/反馈")
335
+ return "{}\n{}".format(title, res)
316
336
 
317
337
 
318
338
  def eda(sentence):
319
- url = 'https://x.x.x.x:x/eda'
339
+ url = "https://x.x.x.x:x/eda"
320
340
  json_data = dict({"sentence": sentence})
321
341
  res = requests.post(url, json=json_data)
322
- return res.json()['eda']
342
+ return res.json()["eda"]
323
343
 
324
344
 
325
345
  def find_language(text):
@@ -353,8 +373,8 @@ def print_prf(y_true, y_pred, label=None):
353
373
  for i in range(len(label)):
354
374
  res = []
355
375
  for k in result:
356
- res.append('%.5f' % k[i])
357
- print('{}: {} {} {}'.format(label[i], *res[:3]))
376
+ res.append("%.5f" % k[i])
377
+ print("{}: {} {} {}".format(label[i], *res[:3]))
358
378
 
359
379
 
360
380
  def print_cpu():
@@ -375,14 +395,16 @@ def squeeze_list(high_dim_list):
375
395
 
376
396
  def unsqueeze_list(flatten_list, each_element_len):
377
397
  # 该函数是错的,被split_list替代了
378
- two_dim_list = [flatten_list[i * each_element_len:(i + 1) * each_element_len] for i in
379
- range(len(flatten_list) // each_element_len)]
398
+ two_dim_list = [
399
+ flatten_list[i * each_element_len : (i + 1) * each_element_len]
400
+ for i in range(len(flatten_list) // each_element_len)
401
+ ]
380
402
  return two_dim_list
381
403
 
382
404
 
383
405
  def split_list(input_list, chunk_size):
384
406
  # 使用列表推导式将列表分割成二维数组
385
- return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
407
+ return [input_list[i : i + chunk_size] for i in range(0, len(input_list), chunk_size)]
386
408
 
387
409
 
388
410
  def auto_close():
@@ -392,6 +414,7 @@ def auto_close():
392
414
  import pyautogui as pg
393
415
  import time
394
416
  import os
417
+
395
418
  cmd = 'schtasks /create /tn shut /tr "shutdown -s -f" /sc once /st 23:30'
396
419
  os.system(cmd)
397
420
  while 1:
@@ -405,10 +428,13 @@ def tf_idf(corpus, save_path):
405
428
  vectorizer = CountVectorizer() # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
406
429
  transformer = TfidfTransformer() # 该类会统计每个词语的tf-idf权值
407
430
  tfidf = transformer.fit_transform(
408
- vectorizer.fit_transform(corpus)) # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
431
+ vectorizer.fit_transform(corpus)
432
+ ) # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
409
433
  word = vectorizer.get_feature_names() # 获取词袋模型中的所有词语
410
434
  weight = tfidf.toarray() # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
411
- for i in range(len(weight)): # 打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
435
+ for i in range(
436
+ len(weight)
437
+ ): # 打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
412
438
  for j in range(len(word)):
413
439
  getword = word[j]
414
440
  getvalue = weight[i][j]
@@ -418,7 +444,7 @@ def tf_idf(corpus, save_path):
418
444
  else:
419
445
  tfidfdict.update({getword: getvalue})
420
446
  sorted_tfidf = sorted(tfidfdict.items(), key=lambda d: d[1], reverse=True)
421
- to_write = ['{} {}'.format(i[0], i[1]) for i in sorted_tfidf]
447
+ to_write = ["{} {}".format(i[0], i[1]) for i in sorted_tfidf]
422
448
  writetxt_w_list(to_write, save_path, num_lf=1)
423
449
 
424
450
 
@@ -427,7 +453,7 @@ class GaussDecay(object):
427
453
  当前只实现了时间的,全部使用默认值
428
454
  """
429
455
 
430
- def __init__(self, origin='2022-08-02', scale='90d', offset='5d', decay=0.5, task="time"):
456
+ def __init__(self, origin="2022-08-02", scale="90d", offset="5d", decay=0.5, task="time"):
431
457
  self.origin = origin
432
458
  self.task = task
433
459
  self.scale, self.offset = self.translate(scale, offset)
@@ -451,7 +477,7 @@ class GaussDecay(object):
451
477
  @staticmethod
452
478
  def translated_minus(field_value):
453
479
  origin = datetime.datetime.now()
454
- field_value = datetime.datetime.strptime(field_value, '%Y-%m-%d %H:%M:%S')
480
+ field_value = datetime.datetime.strptime(field_value, "%Y-%m-%d %H:%M:%S")
455
481
  return (origin - field_value).days
456
482
 
457
483
  def calc_exp(self):
@@ -469,13 +495,13 @@ class GaussDecay(object):
469
495
  :return:
470
496
  """
471
497
  numerator = max(0, (abs(self.translated_minus(field_value)) - self.offset)) ** 2
472
- sigma_square = -1 * self.scale ** 2 / (2 * math.log(self.decay, math.e))
498
+ sigma_square = -1 * self.scale**2 / (2 * math.log(self.decay, math.e))
473
499
  denominator = 2 * sigma_square
474
500
  s = math.exp(-1 * numerator / denominator)
475
501
  return round(self.time_coefficient * s + self.related_coefficient * raw_score, 7)
476
502
 
477
503
 
478
- if __name__ == '__main__':
504
+ if __name__ == "__main__":
479
505
  gauss_decay = GaussDecay()
480
506
  res = gauss_decay.calc_gauss(raw_score=1, field_value="2021-05-29 14:31:13")
481
507
  print(res)
@@ -37,20 +37,19 @@ def lazy_import(importer_name, to_import):
37
37
  module = importlib.import_module(importer_name)
38
38
  import_mapping = {}
39
39
  for name in to_import:
40
- importing, _, binding = name.partition(' as ')
40
+ importing, _, binding = name.partition(" as ")
41
41
  if not binding:
42
- _, _, binding = importing.rpartition('.')
42
+ _, _, binding = importing.rpartition(".")
43
43
  import_mapping[binding] = importing
44
44
 
45
45
  def __getattr__(name):
46
46
  if name not in import_mapping:
47
- message = f'module {importer_name!r} has no attribute {name!r}'
47
+ message = f"module {importer_name!r} has no attribute {name!r}"
48
48
  raise AttributeError(message)
49
49
  importing = import_mapping[name]
50
50
  # imortlib.import_module() implicitly sets submodules on this module as
51
51
  # appropriate for direct imports.
52
- imported = importlib.import_module(importing,
53
- module.__spec__.parent)
52
+ imported = importlib.import_module(importing, module.__spec__.parent)
54
53
  setattr(module, name, imported)
55
54
  return imported
56
55
 
@@ -75,15 +74,15 @@ KafkaConsumer = try_import("kafka", "KafkaConsumer")
75
74
  np = try_import("numpy", None)
76
75
  plt = try_import("matplotlib", "pyplot")
77
76
  WordNetLemmatizer = try_import("nltk.stem", "WordNetLemmatizer")
78
- metrics = try_import("sklearn", "metrics")
77
+ # metrics = try_import("sklearn", "metrics")
79
78
  requests = try_import("requests", None)
80
79
  pq = try_import("pyquery", None)
81
- CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
82
- precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
80
+ # CountVectorizer = try_import("sklearn.feature_extraction.text", "CountVectorizer")
81
+ # precision_recall_fscore_support = try_import("sklearn.metrics", "precision_recall_fscore_support")
83
82
  tqdm = try_import("tqdm", "tqdm")
84
83
  # TODO 自动导出langid和win32evtlogutil输出有bug
85
84
  langid = try_import("langid", None)
86
85
  win32evtlogutil = try_import("win32evtlogutil", None)
87
- TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
86
+ # TfidfTransformer = try_import("sklearn.feature_extraction.text", "TfidfTransformer")
88
87
  yaml = try_import("yaml", None)
89
- omegaconf = try_import("omegaconf", None)
88
+ omegaconf = try_import("omegaconf", None)
nlpertools/wrapper.py CHANGED
@@ -7,16 +7,18 @@ import time
7
7
  from functools import wraps
8
8
  import asyncio
9
9
 
10
+
10
11
  def fn_async_timer(function):
11
12
  """
12
13
  针对异步函数的装饰器
13
14
  """
15
+
14
16
  @wraps(function)
15
17
  async def function_timer(*args, **kwargs):
16
18
  t0 = time.time()
17
19
  result = await function(*args, **kwargs)
18
20
  t1 = time.time()
19
- print('[finished {func_name} in {time:.2f}s]'.format(func_name=function.__name__, time=t1 - t0))
21
+ print("[finished {func_name} in {time:.2f}s]".format(func_name=function.__name__, time=t1 - t0))
20
22
  return result
21
23
 
22
24
  return function_timer
@@ -36,14 +38,14 @@ def fn_timer(async_func=False, analyse=False):
36
38
  t0 = time.time()
37
39
  result = await asyncio.create_task(func(*args, **kwargs))
38
40
  t1 = time.time()
39
- print('[finished {func_name} in {time:.2f}s]'.format(func_name=func.__name__, time=t1 - t0))
41
+ print("[finished {func_name} in {time:.2f}s]".format(func_name=func.__name__, time=t1 - t0))
40
42
  return result
41
43
 
42
44
  def func_time(*args, **kwargs):
43
45
  t0 = time.time()
44
46
  result = func(*args, **kwargs)
45
47
  t1 = time.time()
46
- print('[finished {func_name} in {time:.2f}s]'.format(func_name=func.__name__, time=t1 - t0))
48
+ print("[finished {func_name} in {time:.2f}s]".format(func_name=func.__name__, time=t1 - t0))
47
49
  return result
48
50
 
49
51
  def func_time_analyse(*args, **kwargs):
@@ -114,7 +116,7 @@ def fn_try(parameter):
114
116
  return result
115
117
  except Exception as e:
116
118
  msg = "报错!"
117
- print('[func_name: {func_name} {msg}]'.format(func_name=function.__name__, msg=msg))
119
+ print("[func_name: {func_name} {msg}]".format(func_name=function.__name__, msg=msg))
118
120
  parameter["msg"] = parameter["msg"].format(str(e))
119
121
  return parameter
120
122
  finally:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nlpertools
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: A small package about small basic IO operation when coding
5
5
  Home-page: https://github.com/lvzii/nlpertools
6
6
  Author: youshuJi
@@ -64,6 +64,8 @@ json_data = nlpertools.load_from_json('res.json')
64
64
  ```bash
65
65
  ## git, 连接github不稳定的时候非常有用
66
66
  ncli git pull
67
+ ## 带有参数时,加上--以避免-u被解析
68
+ ncli -- git push -u origin main
67
69
 
68
70
  # 生成pypi双因素认证的实时密钥(需要提供key)
69
71
  ncli --get_2fa --get_2fa_key your_key
@@ -1,20 +1,19 @@
1
- nlpertools/__init__.py,sha256=3tjuCeGz_Q2DAGXn2K6n58YEQ8dpwGx4yg_rh_npw9M,502
2
- nlpertools/cli.py,sha256=LlHZV9x9ZeqC9rILG4aYmNM2PymdkzYVc7lcbu1tMRw,3615
1
+ nlpertools/__init__.py,sha256=VnH7GWVSTcV010_kD4VtsOAwIjzhe8prax8Wj17uc20,537
2
+ nlpertools/cli.py,sha256=uCIUkiBXqTWJaxSQd5MlliGcTfxWzymo1UyQ3z_uhak,3612
3
3
  nlpertools/data_client.py,sha256=esX8lUQrTui4uVkqPfhpHVok7Eq6ywpuemKjLeqoglc,14674
4
- nlpertools/dataprocess.py,sha256=3ayCZAFc5t-Ov06oenRhMoGmnQrmCy-gtPhswecjEa4,23451
5
4
  nlpertools/default_db_config.yml,sha256=E1K9k_xzXVlsf-HJQh8kyHXHYuvTpD12jD4Hfe5rUk8,606
6
5
  nlpertools/get_2fa.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
6
  nlpertools/ml.py,sha256=fjI-WMM1lRnOnRFfTLEGplUx_Uamgr2gfmoAvGlgF7E,18994
8
7
  nlpertools/movie.py,sha256=rkyOnAXdsbWfMSbi1sE1VNRT7f66Hp9BnZsN_58Afmw,897
9
8
  nlpertools/nlpertools_config.yml,sha256=ksXejxFs7pxR47tNAsrN88_4gvq9PCA2ZMO07H-dJXY,26
10
9
  nlpertools/open_api.py,sha256=uyTY00OUlM57Cn0Wm0yZXcIS8vAszy9rKnDMBEWfWJM,1744
11
- nlpertools/other.py,sha256=WWUPwdBkRQrWpsmAMOYBm6GFFnKlyN1ANlFx5bLkj8s,15125
10
+ nlpertools/other.py,sha256=LaNZRQ8wWJqZP6Gycq7eThEqcGXIANg7WzT6nh5QiKQ,15262
12
11
  nlpertools/pic.py,sha256=13aaFJh3USGYGs4Y9tAKTvWjmdQR4YDjl3LlIhJheOA,9906
13
12
  nlpertools/plugin.py,sha256=LB7j9GdoQi6TITddH-6EglHlOa0WIHLUT7X5vb_aIZY,1168
14
13
  nlpertools/reminder.py,sha256=wiXwZQmxMck5vY3EvG8_oakP3FAdjGTikAIOiTPUQrs,2977
15
14
  nlpertools/utils_for_nlpertools.py,sha256=SJqjfMc2Vd8ZCqzQiJCkSxjJxEKzvEUgAgbhKPtC6ww,3583
16
15
  nlpertools/vector_index_demo.py,sha256=CSCzXD13bUIo9AG-bjen668H10B02HFU1Kbxakvrs68,2924
17
- nlpertools/wrapper.py,sha256=xQ7UUAAqSEaRJweoZyGSTjM4B3FQNLl5Gou968Sl_hA,4348
16
+ nlpertools/wrapper.py,sha256=8ReHv7LrBGX6wHma8rf_EhFPg0FJNoDjbn4p0O2UHzs,4350
18
17
  nlpertools/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
18
  nlpertools/algo/ac.py,sha256=4BSiJdG8-S78w_KRqvGOkYjxuTDRiBsskRv-6Doi7oE,422
20
19
  nlpertools/algo/bit_ops.py,sha256=l14-j5VOqrab80CA_uBs1AiAJbzJUJH9dJXc7O9F5d0,501
@@ -24,15 +23,18 @@ nlpertools/algo/template.py,sha256=9vsHr4g3jZZ5KVU_2I9i97o8asRXq-8pSaCXIv0sHeM,2
24
23
  nlpertools/algo/union.py,sha256=0l7lGZbw1qIfW1z5TE8Oo3tybL1bKIP5rzpa5ZT-vLQ,249
25
24
  nlpertools/data_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
25
  nlpertools/data_structure/base_structure.py,sha256=gVUvJZ5jsCAswRETTpMwcEjLKoageWiTuCKNEwIWKWk,2641
26
+ nlpertools/dataprocess/__init__.py,sha256=YPBPsZ8vAoMS6GJ7GlCqj01Cx1q8dDARc_gW-ysORyk,21
27
+ nlpertools/dataprocess/dedupl.py,sha256=WIBOrM6LfX3txcDa0xF7rqeBIpfqwrDBgepa6bavpt0,289
28
+ nlpertools/dataprocess/dp_main.py,sha256=iyDsmKzUx5lD8EUNwkWIlTGKVQQDVx8p3pXFv2_kR64,23452
27
29
  nlpertools/draw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
30
  nlpertools/draw/draw.py,sha256=19dskkr0wrgczxPJnphEszliwYshEh5SjD8Zz07nlk0,2615
29
31
  nlpertools/draw/math_func.py,sha256=0NQ22Dfi9DFG6Bg_hXnCT27w65-dqpOOIgZX7oUIW-Q,881
30
32
  nlpertools/io/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
31
- nlpertools/io/dir.py,sha256=FPY62COQN8Ji72pk0dYRoXkrORYaUlybKNcL4474uUI,2263
32
- nlpertools/io/file.py,sha256=mLWl09IEi0rWPN4tTq3LwdYMvAjj4e_QsjEMhufuPPo,7192
33
- nlpertools/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- nlpertools/llm/call_llm_once.py,sha256=vswnPDZmDZO2Gz2U1m7X7OhaCHUkyVnEzDy4g7CQhVU,856
35
- nlpertools/llm/infer.py,sha256=u9DbopRY1-xQymcNGucsnVwo9Bgyrqg2ncWlK1f00rA,2483
33
+ nlpertools/io/dir.py,sha256=jpJuCwLeBInr03iCSUfffmlchWShZ2Cjq38n0D0dILI,3106
34
+ nlpertools/io/file.py,sha256=NF1xV5iazl86-TDdMQJ-LLrqCnuW29uuFb_NA55YNr4,7274
35
+ nlpertools/llm/__init__.py,sha256=SdbGjzhu1lCeq55mC0tgsah9yzVxvvNrWMf2z8kDEoQ,71
36
+ nlpertools/llm/call_llm_once.py,sha256=W0J2Ab8dHnVZ8q_KgfTKbee7NlJnA-ewjsne80ALLXY,1793
37
+ nlpertools/llm/infer.py,sha256=q7asgwdJwo27d6rdBNQLys_bPEF0g-UNDKjt3S-Ltvs,4133
36
38
  nlpertools/llm/price.py,sha256=8zzEaLrbGiDUbTFSnuBGAduiSfDVXQUk4Oc_lE6eJFw,544
37
39
  nlpertools/monitor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
40
  nlpertools/monitor/gpu.py,sha256=M59O6i0hlew7AzXZlaVZqbZA5IR93OhBY2WI0-T_HtY,531
@@ -41,13 +43,13 @@ nlpertools/template/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
41
43
  nlpertools/utils/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
42
44
  nlpertools/utils/lazy.py,sha256=SAeHLjxsYhpKWvcQKjs2eY0Nn5n3CJlqtxOLVOe1WjQ,29280
43
45
  nlpertools/utils/log_util.py,sha256=ftJDoTOtroLH-LadOygZljeyltOQn0D2Xb5x7Td1Qdg,428
44
- nlpertools/utils/package.py,sha256=wLg_M8j7Y6ReRjWHWCWoZJHrzEwuAr9TyG2jvb7OQCo,3261
46
+ nlpertools/utils/package.py,sha256=8TLbrD3nmukpJw9lSpHHbUYK74qyAaSM_jUrCJOG6mo,3227
45
47
  nlpertools/utils/package_v1.py,sha256=sqgFb-zbTdMd5ziJLY6YUPqR49qUNZjxBH35DnyR5Wg,3542
46
48
  nlpertools/utils/package_v2.py,sha256=WOcsguWfUd4XSAfmPgCtL8HtUbqJ6GRSMHb0OsB47r0,3932
47
- nlpertools-1.0.10.dist-info/licenses/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
49
+ nlpertools-1.0.11.dist-info/licenses/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
48
50
  nlpertools_helper/__init__.py,sha256=obxRUdZDctvcvK_iA1Dx2HmQFMlMzJto-xDPryq1lJ0,198
49
- nlpertools-1.0.10.dist-info/METADATA,sha256=z6WqwEQxdq4xOF3Pw8QXMcrckcMTYfaeRyEqs0aM428,3304
50
- nlpertools-1.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
51
- nlpertools-1.0.10.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
52
- nlpertools-1.0.10.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
53
- nlpertools-1.0.10.dist-info/RECORD,,
51
+ nlpertools-1.0.11.dist-info/METADATA,sha256=3KXxqbO2wWDMXLmnZJm2RvETybvIMekPelhSxE_ovKk,3386
52
+ nlpertools-1.0.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ nlpertools-1.0.11.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
54
+ nlpertools-1.0.11.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
55
+ nlpertools-1.0.11.dist-info/RECORD,,