nlpertools 1.0.9__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nlpertools/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .utils_for_nlpertools import *
17
17
  from .wrapper import *
18
18
  from .monitor import *
19
19
  from .cli import *
20
+ from .llm import *
20
21
 
21
22
 
22
-
23
- __version__ = '1.0.9'
23
+ __version__ = "1.0.10"
nlpertools/cli.py CHANGED
@@ -2,39 +2,24 @@ import argparse
2
2
  import os
3
3
  import uuid
4
4
  import sys
5
+ from .dataprocess import startwith
5
6
 
6
- """
7
- 如何Debug cli.py
8
- """
9
7
 
10
-
11
- def git_push():
8
+ def run_git_command(command):
12
9
  """
13
- 针对国内提交github经常失败,自动提交
10
+ 循环执行git命令,直到成功
14
11
  """
12
+ print(command)
15
13
  num = -1
16
- while 1:
17
- num += 1
18
- print("retry num: {}".format(num))
19
- info = os.system("git push --set-upstream origin main")
20
- print(str(info))
21
- if not str(info).startswith("fatal"):
22
- print("scucess")
23
- break
24
-
25
-
26
- def git_pull():
27
- """
28
- 针对国内提交github经常失败,自动提交
29
- """
30
- num = -1
31
- while 1:
14
+ while True:
32
15
  num += 1
33
- print("retry num: {}".format(num))
34
- info = os.system("git pull")
16
+ print(f"retry num: {num}")
17
+ info = os.system(command)
35
18
  print(str(info))
36
- if not str(info).startswith("fatal") and not str(info).startswith("error"):
37
- print("scucess")
19
+ # 检查命令执行结果,若未出现错误则认为执行成功
20
+ if not startwith(str(info), ["fatal", "error", "128", "1"]):
21
+ print("success")
22
+ print(f"success info : ##{info}##")
38
23
  break
39
24
 
40
25
 
@@ -108,22 +93,19 @@ def start_gpu_usage_notify_client():
108
93
 
109
94
 
110
95
  def main():
111
- parser = argparse.ArgumentParser(description="CLI tool for git operations and getting MAC address.")
112
- parser.add_argument('--gitpush', action='store_true', help='Perform git push operation.')
113
- parser.add_argument('--gitpull', action='store_true', help='Perform git pull operation.')
96
+ parser = argparse.ArgumentParser(description="CLI tool for git operations and other functions.")
97
+ parser.add_argument('git_command', nargs='*', help='Any git command (e.g., push, pull)')
114
98
  parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
115
-
116
99
  parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
117
100
  parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
118
- parser.add_argument('--monitor_gpu_cli', action='store_true', help='Get the 2fa value.')
119
- parser.add_argument('--monitor_gpu_ser', action='store_true', help='Get the 2fa value.')
101
+ parser.add_argument('--monitor_gpu_cli', action='store_true', help='monitor gpu cli')
102
+ parser.add_argument('--monitor_gpu_ser', action='store_true', help='monitor gpu ser')
120
103
 
121
104
  args = parser.parse_args()
122
105
 
123
- if args.gitpush:
124
- git_push()
125
- elif args.gitpull:
126
- git_pull()
106
+ if args.git_command:
107
+ git_cmd = " ".join(args.git_command)
108
+ run_git_command(git_cmd)
127
109
  elif args.mac_address:
128
110
  get_mac_address()
129
111
  elif args.monitor_gpu_cli:
@@ -140,4 +122,4 @@ def main():
140
122
 
141
123
 
142
124
  if __name__ == '__main__':
143
- main()
125
+ main()
nlpertools/dataprocess.py CHANGED
@@ -19,6 +19,18 @@ other_special_characters = (
19
19
  "」﴾》"
20
20
  )
21
21
 
22
+ def startwith(text: str, pattern_list: list) -> bool:
23
+ """
24
+ 判断text是否以pattern_list中的某个pattern开头
25
+ :param text:
26
+ :param pattern_list:
27
+ :return:
28
+ """
29
+ for pattern in pattern_list:
30
+ if text.startswith(pattern):
31
+ return True
32
+ return False
33
+
22
34
 
23
35
  class Pattern:
24
36
  """
File without changes
@@ -0,0 +1,30 @@
1
+ from ..io.file import readtxt_string, read_yaml
2
+ from tqdm import tqdm
3
+ import os
4
+ from openai import Openai
5
+ from typing import Optional, Union
6
+
7
+ """
8
+ 从你当前的项目里找到.key文件 获取url和key
9
+ """
10
+
11
+
12
+ def call_once(
13
+ client: Openai, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192
14
+ ) -> str:
15
+ """
16
+ 调用LLM模型进行一次推理
17
+ :param prompt: 输入的提示文本
18
+ :param model_name: 模型名称
19
+ :param max_tokens: 最大输出token数
20
+ :return: 模型的输出文本
21
+ """
22
+
23
+ if isinstance(input, str):
24
+ message = [{"role": "user", "content": input}]
25
+ elif isinstance(input, list):
26
+ message = input
27
+
28
+ response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens)
29
+
30
+ return response.choices[0].message.content
@@ -0,0 +1,74 @@
1
+ import os
2
+ from tqdm import tqdm
3
+ from openai import OpenAI
4
+ import concurrent.futures
5
+
6
+
7
+ INFER_PARAS = {
8
+ "temperature": 0.7,
9
+ "infer_times": 1,
10
+ "max_tokens": 8192,
11
+ "top_p": 0.95,
12
+ "top_k": 40,
13
+ "repetition_penalty": 1.0,
14
+ }
15
+
16
+
17
+ def parse_infer_data(infer_data: list):
18
+ if isinstance(infer_data[0], str):
19
+ message = [{"role": "user", "content": i} for i in infer_data]
20
+ elif isinstance(infer_data[0], list):
21
+ message = infer_data
22
+ return message
23
+
24
+
25
+ def common_api_infer_func(model_name, infer_data: list, infer_paras, client: OpenAI):
26
+ """
27
+ infer_data: list of messages/prompt
28
+ """
29
+ messages = parse_infer_data(infer_data)
30
+
31
+ def get_response(model_name, messages, infer_paras):
32
+ responses = []
33
+ infer_times = infer_paras.get("infer_times", 1)
34
+ for _ in range(infer_times):
35
+ # 使用OpenAI API进行推理
36
+ response = client.chat.completions.create(model=model_name, messages=messages, **infer_paras)
37
+ text = response.choices[0].message.content
38
+ responses.append({"text": text})
39
+ return responses
40
+
41
+ with concurrent.futures.ThreadPoolExecutor(16) as executor:
42
+ futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
43
+ results = [future.result() for future in concurrent.futures.as_completed(futures)]
44
+
45
+ return results
46
+
47
+
48
+ def common_vllm_infer_func(model_path, infer_data: list, infer_paras: dict):
49
+ """
50
+ infer_data: list of messages/prompt
51
+ """
52
+ messages = parse_infer_data(infer_data)
53
+ from vllm import LLM, SamplingParams
54
+
55
+ temperature = infer_paras.get("temperature", 0.7)
56
+ infer_times = infer_paras.get("infer_times", 1)
57
+ vllm_card_num = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
58
+
59
+ llm = LLM(model=model_path, tensor_parallel_size=vllm_card_num, trust_remote_code=True, gpu_memory_utilization=0.85)
60
+ sampling_params = SamplingParams(
61
+ temperature=temperature,
62
+ n=infer_times,
63
+ max_tokens=8192,
64
+ # qwen3非思考模式推荐参数
65
+ # **infer_paras.get(template_name, {}),
66
+ # qwen3思考模式推荐参数
67
+ )
68
+ conversation = messages
69
+ outputs = llm.chat(conversation, sampling_params=sampling_params, use_tqdm=True)
70
+ return_texts = []
71
+ for idx, output in tqdm(enumerate(outputs)):
72
+ result = [{"text": i.text} for i in output.outputs]
73
+ return_texts.append(result)
74
+ return return_texts
@@ -0,0 +1,13 @@
1
+ def estimate_cost(input_token_num, output_token_num, example_num=1, input_price=1, output_price=4):
2
+ """
3
+ 估算成本
4
+ :param input_token_num: 输入token数量
5
+ :param output_token_num: 输出token数量
6
+ :param example_num: 示例数量
7
+ :param input_price: 输入token单价 / 1M
8
+ :param output_price: 输出token单价 / 1M
9
+ :return: 成本
10
+ """
11
+ price = (input_token_num * input_price + output_token_num * output_price) * example_num / 1000000
12
+ print(f"Estimated cost: {price:.2f} 元")
13
+ return price
nlpertools/ml.py CHANGED
@@ -2,9 +2,11 @@
2
2
  import codecs
3
3
  import os
4
4
  import random
5
+ import itertools
5
6
 
6
7
  from .io.dir import j_mkdir
7
8
  from .io.file import readtxt_list_all_strip, writetxt_w_list, save_to_csv
9
+
8
10
  # import numpy as np
9
11
  # import seaborn as sns
10
12
  # import torch
@@ -17,10 +19,11 @@ from .io.file import readtxt_list_all_strip, writetxt_w_list, save_to_csv
17
19
  from .utils.package import *
18
20
 
19
21
 
20
- def estimate_pass_at_k(num_samples:list, num_correct:list, k):
22
+ def estimate_pass_at_k(num_samples: list, num_correct: list, k):
21
23
  """
22
24
  copy from https://huggingface.co/spaces/evaluate-metric/code_eval/blob/main/code_eval.py
23
25
  num_samples: list
26
+ Note: if num sample < k, acc = 1, it's incomprehensibly
24
27
  """
25
28
  """Estimates pass@k of each problem and returns them in an array."""
26
29
 
@@ -39,8 +42,21 @@ def estimate_pass_at_k(num_samples:list, num_correct:list, k):
39
42
  return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
40
43
 
41
44
 
45
+ def estimate_pass_at_k_fixed(num_samples: list, num_correct: list, k):
46
+ """
47
+ 优化了num_samples小于 k的情况
48
+ """
49
+ num_samples = [k if i < k else i for i in num_samples]
50
+ return estimate_pass_at_k(num_samples, num_correct, k)
51
+
52
+
53
+ def estimate_pass_at_k_return_num(num_samples: list, num_correct: list, k):
54
+ """直接返回求完平均的"""
55
+ return round(estimate_pass_at_k(num_samples, num_correct, k).mean() * 100, 2)
56
+
57
+
42
58
  def calc_llm_train_activation_memory(
43
- model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
59
+ model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
44
60
  ):
45
61
  """
46
62
  return bytes
@@ -54,18 +70,19 @@ def calc_llm_train_activation_memory(
54
70
  # FFN
55
71
  # Layer Norm
56
72
  r1 = (
57
- sequence_length
58
- * batch_size
59
- * hidden_dim
60
- * lay_number
61
- * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
73
+ sequence_length
74
+ * batch_size
75
+ * hidden_dim
76
+ * lay_number
77
+ * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
62
78
  )
63
79
  # reference2
64
80
  r2 = (
65
- lay_number * (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
66
- * sequence_length
67
- * batch_size
68
- / gpu_num
81
+ lay_number
82
+ * (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
83
+ * sequence_length
84
+ * batch_size
85
+ / gpu_num
69
86
  )
70
87
  print(r1)
71
88
  print(r2)
@@ -100,9 +117,7 @@ class DataStructure:
100
117
  "source": "baidu",
101
118
  }
102
119
  ner_input_example = "这句话一共有两个实体分别为大象和老鼠。"
103
- ner_label_example = (
104
- list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
105
- )
120
+ ner_label_example = list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
106
121
 
107
122
 
108
123
  def text_jaccard(ipt1, ipt2, ipt_level="char", sim_level="char"):
@@ -156,7 +171,7 @@ class STEM(object):
156
171
  if each_srl:
157
172
  args = []
158
173
  for arg in each_srl:
159
- args.extend(seg[arg[1]: arg[2] + 1])
174
+ args.extend(seg[arg[1] : arg[2] + 1])
160
175
  # 添加上谓词
161
176
  args.insert(each_srl[0][2] - each_srl[0][1] + 1, seg[wdx])
162
177
  events.append(args)
@@ -195,7 +210,7 @@ def subject_object_labeling(spo_list, text):
195
210
  q_list_length = len(q_list)
196
211
  k_list_length = len(k_list)
197
212
  for idx in range(k_list_length - q_list_length + 1):
198
- t = [q == k for q, k in zip(q_list, k_list[idx: idx + q_list_length])]
213
+ t = [q == k for q, k in zip(q_list, k_list[idx : idx + q_list_length])]
199
214
  # print(idx, t)
200
215
  if all(t):
201
216
  # print(idx)
@@ -208,9 +223,7 @@ def subject_object_labeling(spo_list, text):
208
223
  if len(spo) == 2:
209
224
  labeling_list[idx_start + 1] = "I-" + spo_type
210
225
  elif len(spo) >= 3:
211
- labeling_list[idx_start + 1: idx_start + len(spo)] = ["I-" + spo_type] * (
212
- len(spo) - 1
213
- )
226
+ labeling_list[idx_start + 1 : idx_start + len(spo)] = ["I-" + spo_type] * (len(spo) - 1)
214
227
  else:
215
228
  pass
216
229
 
@@ -219,7 +232,7 @@ def subject_object_labeling(spo_list, text):
219
232
  # count = 0
220
233
  for predicate, spo_list_form in spo_predicate_dict.items():
221
234
  if predicate in text:
222
- for (spo_subject, spo_object) in spo_list_form:
235
+ for spo_subject, spo_object in spo_list_form:
223
236
  # if predicate not in spo_subject and predicate not in spo_object:
224
237
  _labeling_type(spo_subject, "SUB")
225
238
  _labeling_type(spo_object, "OBJ")
@@ -241,10 +254,7 @@ def label(text, labels):
241
254
  :return:
242
255
  """
243
256
  train_sequence = "\n".join(
244
- [
245
- "\t".join(i) if i[0] != " " else "[null]\t{}".format(i[1])
246
- for i in zip(list(text), labels)
247
- ]
257
+ ["\t".join(i) if i[0] != " " else "[null]\t{}".format(i[1]) for i in zip(list(text), labels)]
248
258
  )
249
259
  return train_sequence
250
260
 
@@ -260,16 +270,12 @@ def convert_crf_format_10_fold(corpus, objdir_path):
260
270
  split_position = int(len(corpus) / 10)
261
271
  for k in range(0, 10):
262
272
  if k == 9:
263
- dev_set = corpus[k * split_position:]
273
+ dev_set = corpus[k * split_position :]
264
274
  train_set = corpus[: k * split_position]
265
275
  else:
266
- dev_set = corpus[k * split_position: (k + 1) * split_position]
267
- train_set = (
268
- corpus[: k * split_position] + corpus[(k + 1) * split_position:]
269
- )
270
- writetxt_w_list(
271
- train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1))
272
- )
276
+ dev_set = corpus[k * split_position : (k + 1) * split_position]
277
+ train_set = corpus[: k * split_position] + corpus[(k + 1) * split_position :]
278
+ writetxt_w_list(train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1)))
273
279
  writetxt_w_list(dev_set, os.path.join(objdir_path, "test{}.txt".format(k + 1)))
274
280
  writetxt_w_list(dev_set, os.path.join(objdir_path, "dev{}.txt".format(k + 1)))
275
281
 
@@ -305,31 +311,19 @@ def read_seq_res(path, labels):
305
311
  return text, raw_label, predict_label
306
312
 
307
313
 
308
- def kfold_txt(corpus, path, k=9, is_shuffle=True):
309
- """
310
- k是10份中训练集占了几份
311
- """
312
- j_mkdir(path)
313
- if is_shuffle:
314
- random.shuffle(corpus)
315
- split_position = int(len(corpus) / 10)
316
- train_set, dev_set = corpus[: k * split_position], corpus[k * split_position:]
317
- writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
318
- writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
319
- writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
320
-
321
-
322
314
  def sample():
323
315
  import pandas as pd
324
316
  from sklearn.model_selection import StratifiedShuffleSplit
325
317
 
326
318
  # 假设 df 是你的 DataFrame
327
319
 
328
- df = pd.DataFrame({
329
- "count_line": [i for i in range(100)],
330
- "x": [i for i in range(100)],
331
- "y": [i // 10 for i in range(100)],
332
- })
320
+ df = pd.DataFrame(
321
+ {
322
+ "count_line": [i for i in range(100)],
323
+ "x": [i for i in range(100)],
324
+ "y": [i // 10 for i in range(100)],
325
+ }
326
+ )
333
327
  print(df)
334
328
  # count_line 是用于分层抽样的字段
335
329
 
@@ -337,7 +331,7 @@ def sample():
337
331
  split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
338
332
 
339
333
  # 获取训练集和测试集的索引
340
- train_index, test_index = next(split.split(df, df['y']))
334
+ train_index, test_index = next(split.split(df, df["y"]))
341
335
 
342
336
  # 根据索引划分训练集和测试集
343
337
  train_df = df.loc[train_index]
@@ -348,6 +342,27 @@ def sample():
348
342
  print("测试集行数:", len(test_df))
349
343
 
350
344
 
345
+ def kfold_txt(corpus, path, k=9, is_shuffle=True):
346
+ """
347
+ k是10份中训练集占了几份
348
+ """
349
+ j_mkdir(path)
350
+ if is_shuffle:
351
+ random.shuffle(corpus)
352
+ split_position = int(len(corpus) / 10)
353
+ train_set, dev_set = corpus[: k * split_position], corpus[k * split_position :]
354
+ writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
355
+ writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
356
+ writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
357
+
358
+
359
+ def kfold_list(list_data):
360
+ """
361
+ sklearn.model_selection.train_test_split
362
+ """
363
+ pass
364
+
365
+
351
366
  def kfold_df(df, save_dir=None):
352
367
  """
353
368
  划分train test val集, 写为windows可读的csv。
@@ -360,9 +375,7 @@ def kfold_df(df, save_dir=None):
360
375
 
361
376
  train_idx, test_and_val_idx = KFold(n_splits=8, shuffle=True).split(df).__next__()
362
377
  df_test_and_val = df.iloc[test_and_val_idx]
363
- test_idx, val_idx = (
364
- KFold(n_splits=2, shuffle=True).split(df_test_and_val).__next__()
365
- )
378
+ test_idx, val_idx = KFold(n_splits=2, shuffle=True).split(df_test_and_val).__next__()
366
379
  df_train = df.iloc[train_idx]
367
380
  df_val = df.iloc[val_idx]
368
381
  df_test = df.iloc[test_idx]
@@ -439,7 +452,7 @@ def split_sentence(sentence, language="chinese", cross_line=True):
439
452
  for idx, char in enumerate(sentence):
440
453
  if idx == len(sentence) - 1:
441
454
  if char in split_signs:
442
- sentences.append(sentence[start_idx: idx + 1].strip())
455
+ sentences.append(sentence[start_idx : idx + 1].strip())
443
456
  start_idx = idx + 1
444
457
  else:
445
458
  sentences.append(sentence[start_idx:].strip())
@@ -449,10 +462,10 @@ def split_sentence(sentence, language="chinese", cross_line=True):
449
462
  if idx < len(sentence) - 2:
450
463
  # 处理。”。
451
464
  if sentence[idx + 2] not in split_signs:
452
- sentences.append(sentence[start_idx: idx + 2].strip())
465
+ sentences.append(sentence[start_idx : idx + 2].strip())
453
466
  start_idx = idx + 2
454
467
  elif sentence[idx + 1] not in split_signs:
455
- sentences.append(sentence[start_idx: idx + 1].strip())
468
+ sentences.append(sentence[start_idx : idx + 1].strip())
456
469
  start_idx = idx + 1
457
470
  return sentences
458
471
 
@@ -528,6 +541,6 @@ if __name__ == "__main__":
528
541
  hidden_dim=4096,
529
542
  lay_number=28,
530
543
  attention_heads_num=32,
531
- gpu_num=1
544
+ gpu_num=1,
532
545
  )
533
546
  print(res, "G")
nlpertools/other.py CHANGED
@@ -169,8 +169,11 @@ def jprint(obj, depth=0):
169
169
  print(obj)
170
170
 
171
171
 
172
- def print_split(sign="=", num=20):
173
- print(sign * num)
172
+ def print_split(sign="=", num=20, char: str = None):
173
+ if char:
174
+ print(sign * num // 2, char, sign * num // 2)
175
+ else:
176
+ print(sign * num)
174
177
 
175
178
 
176
179
  def seed_everything():
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: nlpertools
3
- Version: 1.0.9
3
+ Version: 1.0.10
4
4
  Summary: A small package about small basic IO operation when coding
5
5
  Home-page: https://github.com/lvzii/nlpertools
6
6
  Author: youshuJi
@@ -15,8 +15,10 @@ License-File: LICENSE
15
15
  Requires-Dist: numpy
16
16
  Requires-Dist: pandas
17
17
  Requires-Dist: psutil
18
+ Requires-Dist: openai
18
19
  Provides-Extra: torch
19
20
  Requires-Dist: torch; extra == "torch"
21
+ Dynamic: license-file
20
22
  Dynamic: provides-extra
21
23
  Dynamic: requires-dist
22
24
 
@@ -50,6 +52,23 @@ nlpertools
50
52
 
51
53
  ```
52
54
 
55
+ # 最常用/喜欢的功能(使用示例)
56
+ ```python
57
+ # 读txt, json文件
58
+ import nlpertools
59
+
60
+ txt_data = nlpertools.readtxt_list_all_strip('res.txt')
61
+ json_data = nlpertools.load_from_json('res.json')
62
+ ```
63
+
64
+ ```bash
65
+ ## git, 连接github不稳定的时候非常有用
66
+ ncli git pull
67
+
68
+ # 生成pypi双因素认证的实时密钥(需要提供key)
69
+ ncli --get_2fa --get_2fa_key your_key
70
+ ```
71
+
53
72
  # 安装
54
73
 
55
74
  Install the latest release version
@@ -101,30 +120,7 @@ https://nlpertools.readthedocs.io/en/latest/
101
120
 
102
121
  一些可能需要配置才能用的函数,写上示例
103
122
 
104
- ## 使用示例
105
-
106
- ```python
107
- import nlpertools
108
-
109
- a = nlpertools.readtxt_list_all_strip('res.txt')
110
- # 或
111
- b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
112
- ```
113
123
 
114
- ```bash
115
- # 生成pypi双因素认证的实时密钥(需要提供key)
116
- python -m nlpertools.cli --get_2fa --get_2fa_key your_key
117
-
118
- ## git
119
- python -m nlpertools.cli --git_push
120
- python -m nlpertools.cli --git_pull
121
-
122
- # 以下功能被nvitop替代,不推荐使用
123
- ## 监控gpu显存
124
- python -m nlpertools.monitor.gpu
125
- ## 监控cpu
126
- python -m nlpertools.monitor.memory
127
- ```
128
124
 
129
125
  ## 一些常用项目
130
126
 
@@ -132,3 +128,7 @@ nvitop
132
128
 
133
129
  ydata-profiling
134
130
 
131
+ ## 贡献
132
+
133
+ https://github.com/bigscience-workshop/data-preparation
134
+
@@ -1,14 +1,14 @@
1
- nlpertools/__init__.py,sha256=5ka-NeGW2AUDJ4YZ12DD64xcxuxf9PlQUurxDp5DHbQ,483
2
- nlpertools/cli.py,sha256=4Ik1NyFaoZpZLsYLAFRLk6xuYQk0IvexPr1Ieq08viE,3932
1
+ nlpertools/__init__.py,sha256=3tjuCeGz_Q2DAGXn2K6n58YEQ8dpwGx4yg_rh_npw9M,502
2
+ nlpertools/cli.py,sha256=LlHZV9x9ZeqC9rILG4aYmNM2PymdkzYVc7lcbu1tMRw,3615
3
3
  nlpertools/data_client.py,sha256=esX8lUQrTui4uVkqPfhpHVok7Eq6ywpuemKjLeqoglc,14674
4
- nlpertools/dataprocess.py,sha256=v1mobuYN7I3dT6xIKlNOHVtcg31YtjF6FwNPTxeBFFY,23153
4
+ nlpertools/dataprocess.py,sha256=3ayCZAFc5t-Ov06oenRhMoGmnQrmCy-gtPhswecjEa4,23451
5
5
  nlpertools/default_db_config.yml,sha256=E1K9k_xzXVlsf-HJQh8kyHXHYuvTpD12jD4Hfe5rUk8,606
6
6
  nlpertools/get_2fa.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- nlpertools/ml.py,sha256=qhUBCLuHfcFy8g5ZHNGYq4eH2vYWiGetyKucv8n60-A,18523
7
+ nlpertools/ml.py,sha256=fjI-WMM1lRnOnRFfTLEGplUx_Uamgr2gfmoAvGlgF7E,18994
8
8
  nlpertools/movie.py,sha256=rkyOnAXdsbWfMSbi1sE1VNRT7f66Hp9BnZsN_58Afmw,897
9
9
  nlpertools/nlpertools_config.yml,sha256=ksXejxFs7pxR47tNAsrN88_4gvq9PCA2ZMO07H-dJXY,26
10
10
  nlpertools/open_api.py,sha256=uyTY00OUlM57Cn0Wm0yZXcIS8vAszy9rKnDMBEWfWJM,1744
11
- nlpertools/other.py,sha256=JWJiXHRI8mhiUV3k4CZ4kQQS9QN3mw67SmGgTqZFtjs,15026
11
+ nlpertools/other.py,sha256=WWUPwdBkRQrWpsmAMOYBm6GFFnKlyN1ANlFx5bLkj8s,15125
12
12
  nlpertools/pic.py,sha256=13aaFJh3USGYGs4Y9tAKTvWjmdQR4YDjl3LlIhJheOA,9906
13
13
  nlpertools/plugin.py,sha256=LB7j9GdoQi6TITddH-6EglHlOa0WIHLUT7X5vb_aIZY,1168
14
14
  nlpertools/reminder.py,sha256=wiXwZQmxMck5vY3EvG8_oakP3FAdjGTikAIOiTPUQrs,2977
@@ -30,6 +30,10 @@ nlpertools/draw/math_func.py,sha256=0NQ22Dfi9DFG6Bg_hXnCT27w65-dqpOOIgZX7oUIW-Q,
30
30
  nlpertools/io/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
31
31
  nlpertools/io/dir.py,sha256=FPY62COQN8Ji72pk0dYRoXkrORYaUlybKNcL4474uUI,2263
32
32
  nlpertools/io/file.py,sha256=mLWl09IEi0rWPN4tTq3LwdYMvAjj4e_QsjEMhufuPPo,7192
33
+ nlpertools/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ nlpertools/llm/call_llm_once.py,sha256=vswnPDZmDZO2Gz2U1m7X7OhaCHUkyVnEzDy4g7CQhVU,856
35
+ nlpertools/llm/infer.py,sha256=u9DbopRY1-xQymcNGucsnVwo9Bgyrqg2ncWlK1f00rA,2483
36
+ nlpertools/llm/price.py,sha256=8zzEaLrbGiDUbTFSnuBGAduiSfDVXQUk4Oc_lE6eJFw,544
33
37
  nlpertools/monitor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
38
  nlpertools/monitor/gpu.py,sha256=M59O6i0hlew7AzXZlaVZqbZA5IR93OhBY2WI0-T_HtY,531
35
39
  nlpertools/monitor/memory.py,sha256=9t6q9BC8VVx4o3G4sBCn7IoQRx272zMPjSnL3yvTBAQ,657
@@ -40,10 +44,10 @@ nlpertools/utils/log_util.py,sha256=ftJDoTOtroLH-LadOygZljeyltOQn0D2Xb5x7Td1Qdg,
40
44
  nlpertools/utils/package.py,sha256=wLg_M8j7Y6ReRjWHWCWoZJHrzEwuAr9TyG2jvb7OQCo,3261
41
45
  nlpertools/utils/package_v1.py,sha256=sqgFb-zbTdMd5ziJLY6YUPqR49qUNZjxBH35DnyR5Wg,3542
42
46
  nlpertools/utils/package_v2.py,sha256=WOcsguWfUd4XSAfmPgCtL8HtUbqJ6GRSMHb0OsB47r0,3932
47
+ nlpertools-1.0.10.dist-info/licenses/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
43
48
  nlpertools_helper/__init__.py,sha256=obxRUdZDctvcvK_iA1Dx2HmQFMlMzJto-xDPryq1lJ0,198
44
- nlpertools-1.0.9.dist-info/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
45
- nlpertools-1.0.9.dist-info/METADATA,sha256=lcKmxc7_mtYH47mPj8UHOM8-5T5YtrDwhHWVZkfHZXU,3330
46
- nlpertools-1.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
47
- nlpertools-1.0.9.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
48
- nlpertools-1.0.9.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
49
- nlpertools-1.0.9.dist-info/RECORD,,
49
+ nlpertools-1.0.10.dist-info/METADATA,sha256=z6WqwEQxdq4xOF3Pw8QXMcrckcMTYfaeRyEqs0aM428,3304
50
+ nlpertools-1.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
51
+ nlpertools-1.0.10.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
52
+ nlpertools-1.0.10.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
53
+ nlpertools-1.0.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5