nlpertools 1.0.10__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {nlpertools-1.0.10/src/nlpertools.egg-info → nlpertools-1.0.11}/PKG-INFO +3 -1
  2. {nlpertools-1.0.10 → nlpertools-1.0.11}/README.md +2 -0
  3. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/__init__.py +2 -1
  4. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/cli.py +14 -17
  5. nlpertools-1.0.11/src/nlpertools/dataprocess/__init__.py +1 -0
  6. nlpertools-1.0.11/src/nlpertools/dataprocess/dedupl.py +9 -0
  7. nlpertools-1.0.10/src/nlpertools/dataprocess.py → nlpertools-1.0.11/src/nlpertools/dataprocess/dp_main.py +1 -1
  8. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/io/dir.py +25 -5
  9. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/io/file.py +46 -43
  10. nlpertools-1.0.11/src/nlpertools/llm/__init__.py +3 -0
  11. nlpertools-1.0.11/src/nlpertools/llm/call_llm_once.py +60 -0
  12. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/llm/infer.py +50 -5
  13. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/other.py +77 -51
  14. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/utils/package.py +9 -10
  15. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/wrapper.py +6 -4
  16. {nlpertools-1.0.10 → nlpertools-1.0.11/src/nlpertools.egg-info}/PKG-INFO +3 -1
  17. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools.egg-info/SOURCES.txt +3 -1
  18. nlpertools-1.0.10/src/nlpertools/llm/call_llm_once.py +0 -30
  19. nlpertools-1.0.10/src/nlpertools/template/__init__.py +0 -0
  20. {nlpertools-1.0.10 → nlpertools-1.0.11}/LICENSE +0 -0
  21. {nlpertools-1.0.10 → nlpertools-1.0.11}/pyproject.toml +0 -0
  22. {nlpertools-1.0.10 → nlpertools-1.0.11}/setup.cfg +0 -0
  23. {nlpertools-1.0.10 → nlpertools-1.0.11}/setup.py +0 -0
  24. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/algo/__init__.py +0 -0
  25. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/algo/ac.py +0 -0
  26. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/algo/bit_ops.py +0 -0
  27. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/algo/kmp.py +0 -0
  28. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/algo/num_ops.py +0 -0
  29. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/algo/template.py +0 -0
  30. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/algo/union.py +0 -0
  31. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/data_client.py +0 -0
  32. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/data_structure/__init__.py +0 -0
  33. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/data_structure/base_structure.py +0 -0
  34. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/default_db_config.yml +0 -0
  35. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/draw/__init__.py +0 -0
  36. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/draw/draw.py +0 -0
  37. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/draw/math_func.py +0 -0
  38. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/get_2fa.py +0 -0
  39. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/io/__init__.py +0 -0
  40. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/llm/price.py +0 -0
  41. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/ml.py +0 -0
  42. {nlpertools-1.0.10/src/nlpertools/llm → nlpertools-1.0.11/src/nlpertools/monitor}/__init__.py +0 -0
  43. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/monitor/gpu.py +0 -0
  44. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/monitor/memory.py +0 -0
  45. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/movie.py +0 -0
  46. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/nlpertools_config.yml +0 -0
  47. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/open_api.py +0 -0
  48. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/pic.py +0 -0
  49. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/plugin.py +0 -0
  50. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/reminder.py +0 -0
  51. {nlpertools-1.0.10/src/nlpertools/monitor → nlpertools-1.0.11/src/nlpertools/template}/__init__.py +0 -0
  52. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/utils/__init__.py +0 -0
  53. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/utils/lazy.py +0 -0
  54. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/utils/log_util.py +0 -0
  55. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/utils/package_v1.py +0 -0
  56. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/utils/package_v2.py +0 -0
  57. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/utils_for_nlpertools.py +0 -0
  58. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools/vector_index_demo.py +0 -0
  59. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools.egg-info/dependency_links.txt +0 -0
  60. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools.egg-info/entry_points.txt +0 -0
  61. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools.egg-info/requires.txt +0 -0
  62. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools.egg-info/top_level.txt +0 -0
  63. {nlpertools-1.0.10 → nlpertools-1.0.11}/src/nlpertools_helper/__init__.py +0 -0
  64. {nlpertools-1.0.10 → nlpertools-1.0.11}/tests/test_kmp.py +0 -0
  65. {nlpertools-1.0.10 → nlpertools-1.0.11}/tests/test_path_exists.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nlpertools
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: A small package about small basic IO operation when coding
5
5
  Home-page: https://github.com/lvzii/nlpertools
6
6
  Author: youshuJi
@@ -64,6 +64,8 @@ json_data = nlpertools.load_from_json('res.json')
64
64
  ```bash
65
65
  ## git, 连接github不稳定的时候非常有用
66
66
  ncli git pull
67
+ ## 带有参数时,加上--以避免-u被解析
68
+ ncli -- git push -u origin main
67
69
 
68
70
  # 生成pypi双因素认证的实时密钥(需要提供key)
69
71
  ncli --get_2fa --get_2fa_key your_key
@@ -40,6 +40,8 @@ json_data = nlpertools.load_from_json('res.json')
40
40
  ```bash
41
41
  ## git, 连接github不稳定的时候非常有用
42
42
  ncli git pull
43
+ ## 带有参数时,加上--以避免-u被解析
44
+ ncli -- git push -u origin main
43
45
 
44
46
  # 生成pypi双因素认证的实时密钥(需要提供key)
45
47
  ncli --get_2fa --get_2fa_key your_key
@@ -4,6 +4,7 @@
4
4
  from .algo.kmp import *
5
5
  from .data_structure.base_structure import *
6
6
  from .draw import *
7
+ from .dataprocess.dp_main import *
7
8
  from .dataprocess import *
8
9
  from .io.dir import *
9
10
  from .io.file import *
@@ -20,4 +21,4 @@ from .cli import *
20
21
  from .llm import *
21
22
 
22
23
 
23
- __version__ = "1.0.10"
24
+ __version__ = "1.0.11"
@@ -2,7 +2,7 @@ import argparse
2
2
  import os
3
3
  import uuid
4
4
  import sys
5
- from .dataprocess import startwith
5
+ from .dataprocess.dp_main import startwith
6
6
 
7
7
 
8
8
  def run_git_command(command):
@@ -17,7 +17,7 @@ def run_git_command(command):
17
17
  info = os.system(command)
18
18
  print(str(info))
19
19
  # 检查命令执行结果,若未出现错误则认为执行成功
20
- if not startwith(str(info), ["fatal", "error", "128", "1"]):
20
+ if (not startwith(str(info), ["fatal", "error", "128", "1"])) and "fatal" not in str(info):
21
21
  print("success")
22
22
  print(f"success info : ##{info}##")
23
23
  break
@@ -25,7 +25,7 @@ def run_git_command(command):
25
25
 
26
26
  def get_mac_address():
27
27
  mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
28
- mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
28
+ mac_address = ":".join([mac[e : e + 2] for e in range(0, 11, 2)])
29
29
  print("mac address 不一定准确")
30
30
  print(mac_address)
31
31
  return mac_address
@@ -33,6 +33,7 @@ def get_mac_address():
33
33
 
34
34
  def get_2af_value(key):
35
35
  import pyotp
36
+
36
37
  """
37
38
  key应该是7位的
38
39
  """
@@ -65,15 +66,11 @@ def start_gpu_usage_notify_client():
65
66
  from plyer import notification
66
67
  import time
67
68
 
68
- SERVER_URL = 'http://127.0.0.1:5000/notify' # 服务器的 API 地址
69
+ SERVER_URL = "http://127.0.0.1:5000/notify" # 服务器的 API 地址
69
70
 
70
71
  def notify(text):
71
72
  # 使用 plyer 发送通知
72
- notification.notify(
73
- title='远程通知',
74
- message=text,
75
- timeout=10 # 10秒的通知显示时间
76
- )
73
+ notification.notify(title="远程通知", message=text, timeout=10) # 10秒的通知显示时间
77
74
 
78
75
  """定时轮询服务器获取通知"""
79
76
  while True:
@@ -94,12 +91,12 @@ def start_gpu_usage_notify_client():
94
91
 
95
92
  def main():
96
93
  parser = argparse.ArgumentParser(description="CLI tool for git operations and other functions.")
97
- parser.add_argument('git_command', nargs='*', help='Any git command (e.g., push, pull)')
98
- parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
99
- parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
100
- parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
101
- parser.add_argument('--monitor_gpu_cli', action='store_true', help='monitor gpu cli')
102
- parser.add_argument('--monitor_gpu_ser', action='store_true', help='monitor gpu ser')
94
+ parser.add_argument("git_command", nargs="*", help="Any git command (e.g., push, pull)")
95
+ parser.add_argument("--mac_address", action="store_true", help="Get the MAC address.")
96
+ parser.add_argument("--get_2fa", action="store_true", help="Get the 2fa value.")
97
+ parser.add_argument("--get_2fa_key", type=str, help="Get the 2fa value.")
98
+ parser.add_argument("--monitor_gpu_cli", action="store_true", help="monitor gpu cli")
99
+ parser.add_argument("--monitor_gpu_ser", action="store_true", help="monitor gpu ser")
103
100
 
104
101
  args = parser.parse_args()
105
102
 
@@ -121,5 +118,5 @@ def main():
121
118
  print("No operation specified.")
122
119
 
123
120
 
124
- if __name__ == '__main__':
125
- main()
121
+ if __name__ == "__main__":
122
+ main()
@@ -0,0 +1 @@
1
+ from .dedupl import *
@@ -0,0 +1,9 @@
1
+ # 根据字段对一个元素为dict的list去重
2
+ def deduplicate_dict_list(dict_list: list, key: str) -> list:
3
+ seen = set()
4
+ result = []
5
+ for d in dict_list:
6
+ if key in d and d[key] not in seen:
7
+ seen.add(d[key])
8
+ result.append(d)
9
+ return result
@@ -8,7 +8,7 @@ from typing import List
8
8
  import numpy as np
9
9
 
10
10
  # from . import DB_CONFIG_FILE # cannot import name 'DB_CONFIG_FILE' from partially initialized module 'nlpertools'
11
- from .utils.package import *
11
+ from ..utils.package import *
12
12
 
13
13
  main_special_characters = string.punctuation + string.digits + string.whitespace
14
14
  other_special_characters = (
@@ -3,6 +3,7 @@
3
3
  # @Author : youshu.Ji
4
4
  import os
5
5
  from pathlib import Path
6
+ from typing import overload,Literal,Union
6
7
 
7
8
 
8
9
  # dir ----------------------------------------------------------------------
@@ -45,15 +46,34 @@ def get_filename(path, suffix=True) -> str:
45
46
  filename = filename.split('.')[0]
46
47
  return filename
47
48
 
48
-
49
- def listdir(dir_name, including_dir=True):
50
- filenames = os.listdir(dir_name)
49
+ """
50
+ 因为os.listdir无法支持Path类型,虽然是bytelikepath,但是传入Path后只会返回字符串
51
+ 且无法只返回文件名
52
+ 故重新实现
53
+ """
54
+ @overload
55
+ def listdir(dir_name: Path, including_dir: Literal[True]) -> list[Path]: ...
56
+ @overload
57
+ def listdir(dir_name: str, including_dir: Literal[True]) -> list[str]: ...
58
+ @overload
59
+ def listdir(dir_name: Path, including_dir: Literal[False] = False) -> list[str]: ...
60
+ @overload
61
+ def listdir(dir_name: str, including_dir: Literal[False] = False) -> list[str]: ...
62
+
63
+ def listdir(dir_name: Union[Path, str], including_dir: bool = False) -> list[Path] | list[str]:
64
+ """
65
+ including_dir=True -> list[Path] or list[str]
66
+ including_dir=False -> list[str]
67
+ """
68
+ filenames = os.listdir(str(dir_name))
51
69
  if including_dir:
52
- return [os.path.join(dir_name, filename) for filename in filenames]
70
+ if isinstance(dir_name, Path):
71
+ return [dir_name / filename for filename in filenames]
72
+ else:
73
+ return [os.path.join(dir_name, filename) for filename in filenames]
53
74
  else:
54
75
  return list(filenames)
55
76
 
56
-
57
77
  def listdir_yield(dir_name, including_dir=True):
58
78
  filenames = os.listdir(dir_name)
59
79
  for filename in filenames:
@@ -5,8 +5,11 @@ import codecs
5
5
  import json
6
6
  import pickle
7
7
  import random
8
- from itertools import (takewhile, repeat)
8
+ from itertools import takewhile, repeat
9
+ from typing import Optional
10
+ from pathlib import Path
9
11
  import pandas as pd
12
+
10
13
  # import omegaconf
11
14
  # import yaml
12
15
  from ..utils.package import *
@@ -15,18 +18,18 @@ LARGE_FILE_THRESHOLD = 1e5
15
18
 
16
19
 
17
20
  def safe_filename(filename: str) -> str:
18
- for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|']:
19
- filename = filename.replace(char, '_')
21
+ for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
22
+ filename = filename.replace(char, "_")
20
23
  return filename
21
24
 
22
25
 
23
26
  def read_yaml(path, omega=False):
24
27
  if omega:
25
28
  return omegaconf.OmegaConf.load(path)
26
- return yaml.load(codecs.open(path, encoding='utf-8'), Loader=yaml.FullLoader)
29
+ return yaml.load(codecs.open(path, encoding="utf-8"), Loader=yaml.FullLoader)
27
30
 
28
31
 
29
- def _merge_file(filelist, save_filename, shuffle=False):
32
+ def merge_file(filelist, save_filename, shuffle=False):
30
33
  contents = []
31
34
  for file in filelist:
32
35
  content = readtxt_list_all_strip(file)
@@ -43,9 +46,9 @@ def iter_count(file_name):
43
46
  author: unknown
44
47
  """
45
48
  buffer = 1024 * 1024
46
- with codecs.open(file_name, 'r', 'utf-8') as f:
49
+ with codecs.open(file_name, "r", "utf-8") as f:
47
50
  buf_gen = takewhile(lambda x: x, (f.read(buffer) for _ in repeat(None)))
48
- return sum(buf.count('\n') for buf in buf_gen)
51
+ return sum(buf.count("\n") for buf in buf_gen)
49
52
 
50
53
 
51
54
  # 需要加入进度条的函数包括
@@ -57,24 +60,24 @@ load_from_json
57
60
 
58
61
 
59
62
  # 读txt文件 一次全读完 返回list 去换行
60
- def readtxt_list_all_strip(path, encoding='utf-8') -> list:
63
+ def readtxt_list_all_strip(path, encoding="utf-8") -> list:
61
64
  file_line_num = iter_count(path)
62
65
  lines = []
63
- with codecs.open(path, 'r', encoding) as r:
66
+ with codecs.open(path, "r", encoding) as r:
64
67
  if file_line_num > LARGE_FILE_THRESHOLD:
65
68
  iter_obj = tqdm(enumerate(r.readlines()), total=file_line_num)
66
69
  else:
67
70
  iter_obj = enumerate(r.readlines())
68
71
 
69
72
  for ldx, line in iter_obj:
70
- lines.append(line.strip('\n').strip("\r"))
73
+ lines.append(line.strip("\n").strip("\r"))
71
74
  return lines
72
75
 
73
76
 
74
77
  # 读txt 一次读一行 最后返回list
75
78
  def readtxt_list_each(path) -> list:
76
79
  lines = []
77
- with codecs.open(path, 'r', 'utf-8') as r:
80
+ with codecs.open(path, "r", "utf-8") as r:
78
81
  line = r.readline()
79
82
  while line:
80
83
  lines.append(line)
@@ -82,11 +85,11 @@ def readtxt_list_each(path) -> list:
82
85
  return lines
83
86
 
84
87
 
85
- def readtxt_list_each_strip(path) -> list:
88
+ def readtxt_list_each_strip(path: Optional[str | Path]):
86
89
  """
87
90
  yield方法
88
91
  """
89
- with codecs.open(path, 'r', 'utf-8') as r:
92
+ with codecs.open(path, "r", "utf-8") as r:
90
93
  line = r.readline()
91
94
  while line:
92
95
  yield line.strip("\n").strip("\r")
@@ -95,51 +98,51 @@ def readtxt_list_each_strip(path) -> list:
95
98
 
96
99
  # 读txt文件 一次全读完 返回list
97
100
  def readtxt_list_all(path) -> list:
98
- with codecs.open(path, 'r', 'utf-8') as r:
101
+ with codecs.open(path, "r", "utf-8") as r:
99
102
  lines = r.readlines()
100
103
  return lines
101
104
 
102
105
 
103
106
  # 读byte文件 读成一条string
104
107
  def readtxt_byte(path, encoding="utf-8") -> str:
105
- with codecs.open(path, 'rb') as r:
108
+ with codecs.open(path, "rb") as r:
106
109
  lines = r.read()
107
110
  lines = lines.decode(encoding)
108
- return lines.replace('\r', '')
111
+ return lines.replace("\r", "")
109
112
 
110
113
 
111
114
  # 读txt文件 读成一条string
112
- def readtxt_string(path, encoding="utf-8") -> str:
113
- with codecs.open(path, 'r', encoding) as r:
115
+ def read_text(path, encoding="utf-8") -> str:
116
+ with codecs.open(path, "r", encoding) as r:
114
117
  lines = r.read()
115
- return lines.replace('\r', '')
118
+ return lines.replace("\r", "")
116
119
 
117
120
 
118
121
  # 写txt文件覆盖
119
- def writetxt_w(txt, path, r='w'):
120
- with codecs.open(path, r, 'utf-8') as w:
122
+ def writetxt_w(txt, path, r="w"):
123
+ with codecs.open(path, r, "utf-8") as w:
121
124
  w.writelines(txt)
122
125
 
123
126
 
124
127
  # 写txt文件追加
125
128
  def writetxt_a(txt, path):
126
- with codecs.open(path, 'a', 'utf-8') as w:
129
+ with codecs.open(path, "a", "utf-8") as w:
127
130
  w.writelines(txt)
128
131
 
129
132
 
130
133
  def writetxt(txt, path, encoding="utf-8"):
131
- with codecs.open(path, 'w', encoding) as w:
134
+ with codecs.open(path, "w", encoding) as w:
132
135
  w.write(txt)
133
136
 
134
137
 
135
138
  def writetxt_wb(txt, path):
136
- with codecs.open(path, 'wb') as w:
139
+ with codecs.open(path, "wb") as w:
137
140
  w.write(txt)
138
141
 
139
142
 
140
143
  # 写list 覆盖
141
144
  def writetxt_w_list(list, path, num_lf=1):
142
- with codecs.open(path, 'w', "utf-8") as w:
145
+ with codecs.open(path, "w", "utf-8") as w:
143
146
  for i in list:
144
147
  w.write(i)
145
148
  w.write("\n" * num_lf)
@@ -147,7 +150,7 @@ def writetxt_w_list(list, path, num_lf=1):
147
150
 
148
151
  # 写list 追加
149
152
  def writetxt_a_list(list, path, num_lf=2):
150
- with codecs.open(path, 'a', "utf-8") as w:
153
+ with codecs.open(path, "a", "utf-8") as w:
151
154
  for i in list:
152
155
  w.write(i)
153
156
  w.write("\n" * num_lf)
@@ -158,7 +161,7 @@ def save_to_json(content, path):
158
161
  json.dump(content, w, ensure_ascii=False, indent=1)
159
162
 
160
163
 
161
- def load_from_json(path):
164
+ def load_from_json(path: Optional[str | Path]):
162
165
  with codecs.open(path, "r", "utf-8") as r:
163
166
  content = json.load(r)
164
167
  return content
@@ -167,60 +170,60 @@ def load_from_json(path):
167
170
  # 读txt文件 读成一条string if gb2312
168
171
  def readtxt_string_all_encoding(path):
169
172
  try:
170
- with codecs.open(path, 'rb', "utf-8-sig") as r:
173
+ with codecs.open(path, "rb", "utf-8-sig") as r:
171
174
  lines = r.read()
172
175
  return lines
173
176
  except:
174
177
  try:
175
- with codecs.open(path, 'rb', "utf-8") as r:
178
+ with codecs.open(path, "rb", "utf-8") as r:
176
179
  lines = r.reacd()
177
180
  return lines
178
181
  except:
179
182
  try:
180
- with codecs.open(path, 'rb', "big5") as r:
183
+ with codecs.open(path, "rb", "big5") as r:
181
184
  lines = r.read()
182
185
  return lines
183
186
  except:
184
187
  print(path)
185
- with codecs.open(path, 'rb', "gb2312", errors='ignore') as r:
188
+ with codecs.open(path, "rb", "gb2312", errors="ignore") as r:
186
189
  lines = r.read()
187
190
  return lines
188
191
 
189
192
 
190
193
  def readtxt_list_all_encoding(path):
191
194
  try:
192
- with codecs.open(path, 'rb', "utf-8-sig") as r:
195
+ with codecs.open(path, "rb", "utf-8-sig") as r:
193
196
  lines = r.readlines()
194
197
  return lines
195
198
  except:
196
199
  try:
197
- with codecs.open(path, 'rb', "utf-8") as r:
200
+ with codecs.open(path, "rb", "utf-8") as r:
198
201
  lines = r.readlines()
199
202
  return lines
200
203
  except:
201
204
  try:
202
- with codecs.open(path, 'rb', "big5") as r:
205
+ with codecs.open(path, "rb", "big5") as r:
203
206
  lines = r.readlines()
204
207
  return lines
205
208
  except:
206
- with codecs.open(path, 'rb', "gb2312", errors='ignore') as r:
209
+ with codecs.open(path, "rb", "gb2312", errors="ignore") as r:
207
210
  lines = r.readlines()
208
211
  return lines
209
212
 
210
213
 
211
214
  # line by line
212
215
  def save_to_jsonl(corpus, path):
213
- with open(path, 'w', encoding='utf-8') as wt:
216
+ with open(path, "w", encoding="utf-8") as wt:
214
217
  for i in corpus:
215
218
  wt.write(json.dumps(i, ensure_ascii=False))
216
- wt.write('\n')
219
+ wt.write("\n")
217
220
 
218
221
 
219
222
  # line by line
220
223
  def load_from_jsonl(path):
221
224
  file_line_num = iter_count(path)
222
225
  if file_line_num > 1e5:
223
- with open(path, 'r', encoding='utf-8') as rd:
226
+ with open(path, "r", encoding="utf-8") as rd:
224
227
  corpus = []
225
228
  while True:
226
229
  line = rd.readline()
@@ -230,7 +233,7 @@ def load_from_jsonl(path):
230
233
  break
231
234
  return corpus
232
235
  else:
233
- with open(path, 'r', encoding='utf-8') as rd:
236
+ with open(path, "r", encoding="utf-8") as rd:
234
237
  corpus = []
235
238
  while True:
236
239
  line = rd.readline()
@@ -242,20 +245,20 @@ def load_from_jsonl(path):
242
245
 
243
246
 
244
247
  def save_pkl(data, path):
245
- with open(path, 'wb') as f:
248
+ with open(path, "wb") as f:
246
249
  pickle.dump(data, f)
247
250
 
248
251
 
249
252
  def load_pkl(path):
250
- with open(path, 'rb') as f:
253
+ with open(path, "rb") as f:
251
254
  data = pickle.load(f)
252
255
  return data
253
256
 
254
257
 
255
258
  def save_to_csv(df, save_path, index_flag=False):
256
- with open(save_path, 'wb+') as csvfile:
259
+ with open(save_path, "wb+") as csvfile:
257
260
  csvfile.write(codecs.BOM_UTF8)
258
- df.to_csv(save_path, mode='a', index=index_flag)
261
+ df.to_csv(save_path, mode="a", index=index_flag)
259
262
 
260
263
 
261
264
  def save_to_mongo():
@@ -0,0 +1,3 @@
1
+ from .call_llm_once import *
2
+ from .infer import *
3
+ from .price import *
@@ -0,0 +1,60 @@
1
+ from ..io.file import read_yaml
2
+ from tqdm import tqdm
3
+ import os
4
+ from typing import Optional, Union
5
+
6
+ """
7
+ 从你当前的项目里找到.key文件 获取url和key
8
+ """
9
+
10
+
11
+ def call_once_stream(
12
+ client, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192, temperature=0.2
13
+ ) -> str:
14
+ """
15
+ 调用LLM模型进行一次推理
16
+ :param prompt: 输入的提示文本
17
+ :param model_name: 模型名称
18
+ :param max_tokens: 最大输出token数
19
+ :return: 模型的输出文本
20
+ """
21
+ from openai import OpenAI
22
+
23
+ if isinstance(input, str):
24
+ message = [{"role": "user", "content": input}]
25
+ elif isinstance(input, list):
26
+ message = input
27
+
28
+ completion = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens, stream=True)
29
+ text = ""
30
+ for chunk in completion:
31
+ if chunk.choices:
32
+ c = chunk.choices[0].delta.content or ""
33
+ text += c
34
+ print(c, end="")
35
+ else:
36
+ print()
37
+ print(chunk.usage)
38
+ return text
39
+
40
+
41
+ def call_once(
42
+ client, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192, temperature=0.8
43
+ ) -> str:
44
+ """
45
+ 调用LLM模型进行一次推理
46
+ :param prompt: 输入的提示文本
47
+ :param model_name: 模型名称
48
+ :param max_tokens: 最大输出token数
49
+ :return: 模型的输出文本
50
+ """
51
+ from openai import OpenAI
52
+
53
+ if isinstance(input, str):
54
+ message = [{"role": "user", "content": input}]
55
+ elif isinstance(input, list):
56
+ message = input
57
+
58
+ response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens,temperature=temperature)
59
+
60
+ return response.choices[0].message.content
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  from tqdm import tqdm
3
- from openai import OpenAI
4
3
  import concurrent.futures
4
+ import itertools
5
5
 
6
6
 
7
7
  INFER_PARAS = {
@@ -15,14 +15,17 @@ INFER_PARAS = {
15
15
 
16
16
 
17
17
  def parse_infer_data(infer_data: list):
18
+ # 解释一下为什么要[][],因为message本来就必须得是[]
18
19
  if isinstance(infer_data[0], str):
19
- message = [{"role": "user", "content": i} for i in infer_data]
20
+ message = [[{"role": "user", "content": i}] for i in infer_data]
20
21
  elif isinstance(infer_data[0], list):
21
22
  message = infer_data
22
23
  return message
23
24
 
24
25
 
25
- def common_api_infer_func(model_name, infer_data: list, infer_paras, client: OpenAI):
26
+ def common_api_infer_func(model_name, infer_data: list, infer_paras, client):
27
+ from openai import OpenAI
28
+
26
29
  """
27
30
  infer_data: list of messages/prompt
28
31
  """
@@ -31,16 +34,58 @@ def common_api_infer_func(model_name, infer_data: list, infer_paras, client: Ope
31
34
  def get_response(model_name, messages, infer_paras):
32
35
  responses = []
33
36
  infer_times = infer_paras.get("infer_times", 1)
37
+
34
38
  for _ in range(infer_times):
35
39
  # 使用OpenAI API进行推理
36
- response = client.chat.completions.create(model=model_name, messages=messages, **infer_paras)
40
+ response = client.chat.completions.create(
41
+ model=model_name,
42
+ messages=messages,
43
+ temperature=infer_paras.get("temperature", 0.7),
44
+ max_tokens=infer_paras.get("max_tokens", 8192),
45
+ )
37
46
  text = response.choices[0].message.content
38
47
  responses.append({"text": text})
39
48
  return responses
40
49
 
41
50
  with concurrent.futures.ThreadPoolExecutor(16) as executor:
42
51
  futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
43
- results = [future.result() for future in concurrent.futures.as_completed(futures)]
52
+ # results = [future.result() for future in tqdm(concurrent.futures.as_completed(futures))] # 乱序
53
+ results = [future.result() for future in tqdm(futures)]
54
+
55
+ return results
56
+
57
+
58
+ def common_api_infer_func_multi_client(model_name, infer_data: list, infer_paras, clients: list):
59
+ """
60
+ infer_data: list of messages/prompt
61
+ """
62
+ messages = parse_infer_data(infer_data)
63
+ iter_cycle = itertools.cycle(clients)
64
+
65
+ def get_response(model_name, messages, infer_paras):
66
+ client = next(iter_cycle)
67
+ # print(client.base_url)
68
+ responses = []
69
+ infer_times = infer_paras.get("infer_times", 1)
70
+ for _ in range(infer_times):
71
+ # 使用OpenAI API进行推理
72
+ try:
73
+ response = client.chat.completions.create(
74
+ model=model_name,
75
+ messages=messages,
76
+ temperature=infer_paras.get("temperature", 0.7),
77
+ max_tokens=infer_paras.get("max_tokens", 8192),
78
+ )
79
+ text = response.choices[0].message.content
80
+ except Exception as e:
81
+ print(e.__str__())
82
+ text = ""
83
+ responses.append({"text": text})
84
+ return responses
85
+
86
+ with concurrent.futures.ThreadPoolExecutor(128) as executor:
87
+ futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
88
+ results = [future.result() for future in tqdm(futures)]
44
89
 
45
90
  return results
46
91