nlpertools 1.0.8__tar.gz → 1.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nlpertools-1.0.8/src/nlpertools.egg-info → nlpertools-1.0.10}/PKG-INFO +28 -26
- {nlpertools-1.0.8 → nlpertools-1.0.10}/README.md +22 -24
- {nlpertools-1.0.8 → nlpertools-1.0.10}/setup.py +8 -10
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/__init__.py +2 -2
- nlpertools-1.0.10/src/nlpertools/cli.py +125 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/dataprocess.py +12 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/draw/draw.py +5 -7
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/io/dir.py +2 -2
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/io/file.py +2 -2
- nlpertools-1.0.10/src/nlpertools/llm/call_llm_once.py +30 -0
- nlpertools-1.0.10/src/nlpertools/llm/infer.py +74 -0
- nlpertools-1.0.10/src/nlpertools/llm/price.py +13 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/ml.py +93 -58
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/other.py +22 -2
- nlpertools-1.0.10/src/nlpertools/template/__init__.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10/src/nlpertools.egg-info}/PKG-INFO +28 -26
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/SOURCES.txt +4 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/requires.txt +1 -0
- nlpertools-1.0.8/src/nlpertools/cli.py +0 -87
- {nlpertools-1.0.8 → nlpertools-1.0.10}/LICENSE +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/pyproject.toml +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/setup.cfg +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/__init__.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/ac.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/bit_ops.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/kmp.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/num_ops.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/template.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/union.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/data_client.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/data_structure/__init__.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/data_structure/base_structure.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/default_db_config.yml +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/draw/__init__.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/draw/math_func.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/get_2fa.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/io/__init__.py +0 -0
- {nlpertools-1.0.8/src/nlpertools/monitor → nlpertools-1.0.10/src/nlpertools/llm}/__init__.py +0 -0
- {nlpertools-1.0.8/src/nlpertools/template → nlpertools-1.0.10/src/nlpertools/monitor}/__init__.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/monitor/gpu.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/monitor/memory.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/movie.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/nlpertools_config.yml +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/open_api.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/pic.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/plugin.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/reminder.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/__init__.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/lazy.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/log_util.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/package.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/package_v1.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/package_v2.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils_for_nlpertools.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/vector_index_demo.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/wrapper.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/dependency_links.txt +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/entry_points.txt +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/top_level.txt +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools_helper/__init__.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/tests/test_kmp.py +0 -0
- {nlpertools-1.0.8 → nlpertools-1.0.10}/tests/test_path_exists.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: nlpertools
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.10
|
4
4
|
Summary: A small package about small basic IO operation when coding
|
5
5
|
Home-page: https://github.com/lvzii/nlpertools
|
6
6
|
Author: youshuJi
|
@@ -15,8 +15,12 @@ License-File: LICENSE
|
|
15
15
|
Requires-Dist: numpy
|
16
16
|
Requires-Dist: pandas
|
17
17
|
Requires-Dist: psutil
|
18
|
+
Requires-Dist: openai
|
18
19
|
Provides-Extra: torch
|
19
20
|
Requires-Dist: torch; extra == "torch"
|
21
|
+
Dynamic: license-file
|
22
|
+
Dynamic: provides-extra
|
23
|
+
Dynamic: requires-dist
|
20
24
|
|
21
25
|
<div align="center">
|
22
26
|
<h4 align="center">
|
@@ -35,7 +39,7 @@ Requires-Dist: torch; extra == "torch"
|
|
35
39
|
|
36
40
|
它解决了什么问题:
|
37
41
|
|
38
|
-
- 很多函数是记不住的,
|
42
|
+
- 很多函数是记不住的, 每次写都要~~搜~~问大模型 ,例如pandas排序
|
39
43
|
- 刷题的时候,树结构的题目很难调试
|
40
44
|
|
41
45
|
|
@@ -48,6 +52,23 @@ nlpertools
|
|
48
52
|
|
49
53
|
```
|
50
54
|
|
55
|
+
# 最常用/喜欢的功能(使用示例)
|
56
|
+
```python
|
57
|
+
# 读txt, json文件
|
58
|
+
import nlpertools
|
59
|
+
|
60
|
+
txt_data = nlpertools.readtxt_list_all_strip('res.txt')
|
61
|
+
json_data = nlpertools.load_from_json('res.json')
|
62
|
+
```
|
63
|
+
|
64
|
+
```bash
|
65
|
+
## git, 连接github不稳定的时候非常有用
|
66
|
+
ncli git pull
|
67
|
+
|
68
|
+
# 生成pypi双因素认证的实时密钥(需要提供key)
|
69
|
+
ncli --get_2fa --get_2fa_key your_key
|
70
|
+
```
|
71
|
+
|
51
72
|
# 安装
|
52
73
|
|
53
74
|
Install the latest release version
|
@@ -99,30 +120,7 @@ https://nlpertools.readthedocs.io/en/latest/
|
|
99
120
|
|
100
121
|
一些可能需要配置才能用的函数,写上示例
|
101
122
|
|
102
|
-
## 使用示例
|
103
|
-
|
104
|
-
```python
|
105
|
-
import nlpertools
|
106
|
-
|
107
|
-
a = nlpertools.readtxt_list_all_strip('res.txt')
|
108
|
-
# 或
|
109
|
-
b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
|
110
|
-
```
|
111
123
|
|
112
|
-
```bash
|
113
|
-
# 生成pypi双因素认证的实时密钥(需要提供key)
|
114
|
-
python -m nlpertools.get_2fa your_key
|
115
|
-
|
116
|
-
## git
|
117
|
-
python nlpertools.cli --git_push
|
118
|
-
python nlpertools.cli --git_pull
|
119
|
-
|
120
|
-
# 以下功能被nvitop替代,不推荐使用
|
121
|
-
## 监控gpu显存
|
122
|
-
python -m nlpertools.monitor.gpu
|
123
|
-
## 监控cpu
|
124
|
-
python -m nlpertools.monitor.memory
|
125
|
-
```
|
126
124
|
|
127
125
|
## 一些常用项目
|
128
126
|
|
@@ -130,3 +128,7 @@ nvitop
|
|
130
128
|
|
131
129
|
ydata-profiling
|
132
130
|
|
131
|
+
## 贡献
|
132
|
+
|
133
|
+
https://github.com/bigscience-workshop/data-preparation
|
134
|
+
|
@@ -15,7 +15,7 @@
|
|
15
15
|
|
16
16
|
它解决了什么问题:
|
17
17
|
|
18
|
-
- 很多函数是记不住的,
|
18
|
+
- 很多函数是记不住的, 每次写都要~~搜~~问大模型 ,例如pandas排序
|
19
19
|
- 刷题的时候,树结构的题目很难调试
|
20
20
|
|
21
21
|
|
@@ -28,6 +28,23 @@ nlpertools
|
|
28
28
|
|
29
29
|
```
|
30
30
|
|
31
|
+
# 最常用/喜欢的功能(使用示例)
|
32
|
+
```python
|
33
|
+
# 读txt, json文件
|
34
|
+
import nlpertools
|
35
|
+
|
36
|
+
txt_data = nlpertools.readtxt_list_all_strip('res.txt')
|
37
|
+
json_data = nlpertools.load_from_json('res.json')
|
38
|
+
```
|
39
|
+
|
40
|
+
```bash
|
41
|
+
## git, 连接github不稳定的时候非常有用
|
42
|
+
ncli git pull
|
43
|
+
|
44
|
+
# 生成pypi双因素认证的实时密钥(需要提供key)
|
45
|
+
ncli --get_2fa --get_2fa_key your_key
|
46
|
+
```
|
47
|
+
|
31
48
|
# 安装
|
32
49
|
|
33
50
|
Install the latest release version
|
@@ -79,30 +96,7 @@ https://nlpertools.readthedocs.io/en/latest/
|
|
79
96
|
|
80
97
|
一些可能需要配置才能用的函数,写上示例
|
81
98
|
|
82
|
-
## 使用示例
|
83
|
-
|
84
|
-
```python
|
85
|
-
import nlpertools
|
86
|
-
|
87
|
-
a = nlpertools.readtxt_list_all_strip('res.txt')
|
88
|
-
# 或
|
89
|
-
b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
|
90
|
-
```
|
91
99
|
|
92
|
-
```bash
|
93
|
-
# 生成pypi双因素认证的实时密钥(需要提供key)
|
94
|
-
python -m nlpertools.get_2fa your_key
|
95
|
-
|
96
|
-
## git
|
97
|
-
python nlpertools.cli --git_push
|
98
|
-
python nlpertools.cli --git_pull
|
99
|
-
|
100
|
-
# 以下功能被nvitop替代,不推荐使用
|
101
|
-
## 监控gpu显存
|
102
|
-
python -m nlpertools.monitor.gpu
|
103
|
-
## 监控cpu
|
104
|
-
python -m nlpertools.monitor.memory
|
105
|
-
```
|
106
100
|
|
107
101
|
## 一些常用项目
|
108
102
|
|
@@ -110,3 +104,7 @@ nvitop
|
|
110
104
|
|
111
105
|
ydata-profiling
|
112
106
|
|
107
|
+
## 贡献
|
108
|
+
|
109
|
+
https://github.com/bigscience-workshop/data-preparation
|
110
|
+
|
@@ -7,19 +7,17 @@ from setuptools import setup
|
|
7
7
|
def get_version():
|
8
8
|
with open(os.path.join("src", "nlpertools", "__init__.py"), "r", encoding="utf-8") as f:
|
9
9
|
file_content = f.read()
|
10
|
-
pattern = r"{}\W*=\W
|
11
|
-
|
12
|
-
|
10
|
+
pattern = r"{}\W*=\W*[\"']([^\"']+)[\"']".format("__version__")
|
11
|
+
matches = re.findall(pattern, file_content)
|
12
|
+
if not matches:
|
13
|
+
raise ValueError(f"Could not find __version__ in __init__.py")
|
14
|
+
return matches[0]
|
13
15
|
|
14
16
|
|
15
17
|
def main():
|
16
18
|
setup(
|
17
19
|
# https://juejin.cn/post/7369349560421040128
|
18
|
-
install_requires=[
|
19
|
-
"numpy",
|
20
|
-
"pandas",
|
21
|
-
"psutil"
|
22
|
-
],
|
20
|
+
install_requires=["numpy", "pandas", "psutil", "openai"],
|
23
21
|
extras_require={
|
24
22
|
"torch": ["torch"],
|
25
23
|
},
|
@@ -28,9 +26,9 @@ def main():
|
|
28
26
|
"console_scripts": [
|
29
27
|
"ncli=nlpertools.cli:main",
|
30
28
|
]
|
31
|
-
}
|
29
|
+
},
|
32
30
|
)
|
33
31
|
|
34
32
|
|
35
|
-
if __name__ ==
|
33
|
+
if __name__ == "__main__":
|
36
34
|
main()
|
@@ -0,0 +1,125 @@
|
|
1
|
+
import argparse
|
2
|
+
import os
|
3
|
+
import uuid
|
4
|
+
import sys
|
5
|
+
from .dataprocess import startwith
|
6
|
+
|
7
|
+
|
8
|
+
def run_git_command(command):
|
9
|
+
"""
|
10
|
+
循环执行git命令,直到成功
|
11
|
+
"""
|
12
|
+
print(command)
|
13
|
+
num = -1
|
14
|
+
while True:
|
15
|
+
num += 1
|
16
|
+
print(f"retry num: {num}")
|
17
|
+
info = os.system(command)
|
18
|
+
print(str(info))
|
19
|
+
# 检查命令执行结果,若未出现错误则认为执行成功
|
20
|
+
if not startwith(str(info), ["fatal", "error", "128", "1"]):
|
21
|
+
print("success")
|
22
|
+
print(f"success info : ##{info}##")
|
23
|
+
break
|
24
|
+
|
25
|
+
|
26
|
+
def get_mac_address():
|
27
|
+
mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
|
28
|
+
mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
|
29
|
+
print("mac address 不一定准确")
|
30
|
+
print(mac_address)
|
31
|
+
return mac_address
|
32
|
+
|
33
|
+
|
34
|
+
def get_2af_value(key):
|
35
|
+
import pyotp
|
36
|
+
"""
|
37
|
+
key应该是7位的
|
38
|
+
"""
|
39
|
+
print(key)
|
40
|
+
totp = pyotp.TOTP(key)
|
41
|
+
print(totp.now())
|
42
|
+
|
43
|
+
|
44
|
+
def start_gpu_usage_notify_server():
|
45
|
+
from flask import Flask
|
46
|
+
|
47
|
+
app = Flask(__name__)
|
48
|
+
|
49
|
+
@app.route("/notify", methods=["GET"])
|
50
|
+
def notify():
|
51
|
+
# 这里可以根据需要动态生成通知内容
|
52
|
+
usage = os.popen("nvidia-smi --query-gpu=memory.used --format=csv").read().split("\n")[1:]
|
53
|
+
res = 0
|
54
|
+
for edx, each in enumerate(usage):
|
55
|
+
if each.startswith("0"):
|
56
|
+
res += 1
|
57
|
+
print(res)
|
58
|
+
return str(res), 200
|
59
|
+
|
60
|
+
app.run(host="0.0.0.0", port=5000)
|
61
|
+
|
62
|
+
|
63
|
+
def start_gpu_usage_notify_client():
|
64
|
+
import requests
|
65
|
+
from plyer import notification
|
66
|
+
import time
|
67
|
+
|
68
|
+
SERVER_URL = 'http://127.0.0.1:5000/notify' # 服务器的 API 地址
|
69
|
+
|
70
|
+
def notify(text):
|
71
|
+
# 使用 plyer 发送通知
|
72
|
+
notification.notify(
|
73
|
+
title='远程通知',
|
74
|
+
message=text,
|
75
|
+
timeout=10 # 10秒的通知显示时间
|
76
|
+
)
|
77
|
+
|
78
|
+
"""定时轮询服务器获取通知"""
|
79
|
+
while True:
|
80
|
+
try:
|
81
|
+
response = requests.get(SERVER_URL)
|
82
|
+
if response.status_code == 200:
|
83
|
+
num = int(response.text)
|
84
|
+
if num > 0:
|
85
|
+
notify(f"服务器有{num}张卡")
|
86
|
+
print(f"服务器有{num}张卡")
|
87
|
+
else:
|
88
|
+
print("服务器没有新通知")
|
89
|
+
except Exception as e:
|
90
|
+
print(f"与服务器连接失败: {e}")
|
91
|
+
|
92
|
+
time.sleep(1)
|
93
|
+
|
94
|
+
|
95
|
+
def main():
|
96
|
+
parser = argparse.ArgumentParser(description="CLI tool for git operations and other functions.")
|
97
|
+
parser.add_argument('git_command', nargs='*', help='Any git command (e.g., push, pull)')
|
98
|
+
parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
|
99
|
+
parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
|
100
|
+
parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
|
101
|
+
parser.add_argument('--monitor_gpu_cli', action='store_true', help='monitor gpu cli')
|
102
|
+
parser.add_argument('--monitor_gpu_ser', action='store_true', help='monitor gpu ser')
|
103
|
+
|
104
|
+
args = parser.parse_args()
|
105
|
+
|
106
|
+
if args.git_command:
|
107
|
+
git_cmd = " ".join(args.git_command)
|
108
|
+
run_git_command(git_cmd)
|
109
|
+
elif args.mac_address:
|
110
|
+
get_mac_address()
|
111
|
+
elif args.monitor_gpu_cli:
|
112
|
+
start_gpu_usage_notify_client()
|
113
|
+
elif args.monitor_gpu_ser:
|
114
|
+
start_gpu_usage_notify_server()
|
115
|
+
elif args.get_2fa:
|
116
|
+
if args.get_2fa_key:
|
117
|
+
get_2af_value(args.get_2fa_key)
|
118
|
+
else:
|
119
|
+
print("Please provide a key as an argument.")
|
120
|
+
else:
|
121
|
+
print("No operation specified.")
|
122
|
+
|
123
|
+
|
124
|
+
if __name__ == '__main__':
|
125
|
+
main()
|
@@ -19,6 +19,18 @@ other_special_characters = (
|
|
19
19
|
"」﴾》"
|
20
20
|
)
|
21
21
|
|
22
|
+
def startwith(text: str, pattern_list: list) -> bool:
|
23
|
+
"""
|
24
|
+
判断text是否以pattern_list中的某个pattern开头
|
25
|
+
:param text:
|
26
|
+
:param pattern_list:
|
27
|
+
:return:
|
28
|
+
"""
|
29
|
+
for pattern in pattern_list:
|
30
|
+
if text.startswith(pattern):
|
31
|
+
return True
|
32
|
+
return False
|
33
|
+
|
22
34
|
|
23
35
|
class Pattern:
|
24
36
|
"""
|
@@ -20,7 +20,7 @@ def confused_matrix(confuse_matrix):
|
|
20
20
|
f.savefig('tmp.jpg', bbox_inches='tight')
|
21
21
|
|
22
22
|
|
23
|
-
def plot_histogram(data, bin_size):
|
23
|
+
def plot_histogram(data, bin_size, max_bin):
|
24
24
|
"""
|
25
25
|
画直方图,超过1000的统一按1000算
|
26
26
|
:param data:
|
@@ -33,15 +33,15 @@ def plot_histogram(data, bin_size):
|
|
33
33
|
from matplotlib.ticker import MaxNLocator
|
34
34
|
# 将超过1000的值改为1000
|
35
35
|
def process_lengths(data):
|
36
|
-
return [length if length <=
|
36
|
+
return [length if length <= max_bin else max_bin + 3 for length in data]
|
37
37
|
|
38
38
|
# 前闭后开
|
39
|
-
min_num, max_num = 0, 1000
|
39
|
+
# min_num, max_num = 0, 1000
|
40
40
|
# min_num, max_num = min(data), max(data)
|
41
41
|
|
42
42
|
plt.figure(figsize=(12, 8))
|
43
43
|
processed_data = process_lengths(data)
|
44
|
-
bins = np.arange(0,
|
44
|
+
bins = np.arange(0, max_bin + 2 * bin_size, bin_size)
|
45
45
|
# 绘制直方图
|
46
46
|
n, new_bins, patches = plt.hist(processed_data, bins=bins, edgecolor='black', color='skyblue', alpha=0.7,
|
47
47
|
linewidth=0)
|
@@ -60,10 +60,8 @@ def plot_histogram(data, bin_size):
|
|
60
60
|
plt.xlabel('module line number', fontsize=14)
|
61
61
|
plt.ylabel('frequency', fontsize=14)
|
62
62
|
|
63
|
-
# 添加网格
|
64
63
|
plt.grid(True, linestyle='--', alpha=0.6)
|
65
64
|
|
66
|
-
# 美化x轴和y轴的刻度
|
67
65
|
plt.xticks(fontsize=12)
|
68
66
|
plt.yticks(fontsize=12)
|
69
67
|
|
@@ -80,4 +78,4 @@ if __name__ == '__main__':
|
|
80
78
|
# 调整区间大小
|
81
79
|
bin_size = 50
|
82
80
|
# 示例模块长度数据
|
83
|
-
plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size)
|
81
|
+
plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size, max_bin=1000)
|
@@ -46,7 +46,7 @@ def get_filename(path, suffix=True) -> str:
|
|
46
46
|
return filename
|
47
47
|
|
48
48
|
|
49
|
-
def
|
49
|
+
def listdir(dir_name, including_dir=True):
|
50
50
|
filenames = os.listdir(dir_name)
|
51
51
|
if including_dir:
|
52
52
|
return [os.path.join(dir_name, filename) for filename in filenames]
|
@@ -54,7 +54,7 @@ def j_listdir(dir_name, including_dir=True):
|
|
54
54
|
return list(filenames)
|
55
55
|
|
56
56
|
|
57
|
-
def
|
57
|
+
def listdir_yield(dir_name, including_dir=True):
|
58
58
|
filenames = os.listdir(dir_name)
|
59
59
|
for filename in filenames:
|
60
60
|
if including_dir:
|
@@ -241,12 +241,12 @@ def load_from_jsonl(path):
|
|
241
241
|
return corpus
|
242
242
|
|
243
243
|
|
244
|
-
def
|
244
|
+
def save_pkl(data, path):
|
245
245
|
with open(path, 'wb') as f:
|
246
246
|
pickle.dump(data, f)
|
247
247
|
|
248
248
|
|
249
|
-
def
|
249
|
+
def load_pkl(path):
|
250
250
|
with open(path, 'rb') as f:
|
251
251
|
data = pickle.load(f)
|
252
252
|
return data
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from ..io.file import readtxt_string, read_yaml
|
2
|
+
from tqdm import tqdm
|
3
|
+
import os
|
4
|
+
from openai import Openai
|
5
|
+
from typing import Optional, Union
|
6
|
+
|
7
|
+
"""
|
8
|
+
从你当前的项目里找到.key文件 获取url和key
|
9
|
+
"""
|
10
|
+
|
11
|
+
|
12
|
+
def call_once(
|
13
|
+
client: Openai, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192
|
14
|
+
) -> str:
|
15
|
+
"""
|
16
|
+
调用LLM模型进行一次推理
|
17
|
+
:param prompt: 输入的提示文本
|
18
|
+
:param model_name: 模型名称
|
19
|
+
:param max_tokens: 最大输出token数
|
20
|
+
:return: 模型的输出文本
|
21
|
+
"""
|
22
|
+
|
23
|
+
if isinstance(input, str):
|
24
|
+
message = [{"role": "user", "content": input}]
|
25
|
+
elif isinstance(input, list):
|
26
|
+
message = input
|
27
|
+
|
28
|
+
response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens)
|
29
|
+
|
30
|
+
return response.choices[0].message.content
|
@@ -0,0 +1,74 @@
|
|
1
|
+
import os
|
2
|
+
from tqdm import tqdm
|
3
|
+
from openai import OpenAI
|
4
|
+
import concurrent.futures
|
5
|
+
|
6
|
+
|
7
|
+
INFER_PARAS = {
|
8
|
+
"temperature": 0.7,
|
9
|
+
"infer_times": 1,
|
10
|
+
"max_tokens": 8192,
|
11
|
+
"top_p": 0.95,
|
12
|
+
"top_k": 40,
|
13
|
+
"repetition_penalty": 1.0,
|
14
|
+
}
|
15
|
+
|
16
|
+
|
17
|
+
def parse_infer_data(infer_data: list):
|
18
|
+
if isinstance(infer_data[0], str):
|
19
|
+
message = [{"role": "user", "content": i} for i in infer_data]
|
20
|
+
elif isinstance(infer_data[0], list):
|
21
|
+
message = infer_data
|
22
|
+
return message
|
23
|
+
|
24
|
+
|
25
|
+
def common_api_infer_func(model_name, infer_data: list, infer_paras, client: OpenAI):
|
26
|
+
"""
|
27
|
+
infer_data: list of messages/prompt
|
28
|
+
"""
|
29
|
+
messages = parse_infer_data(infer_data)
|
30
|
+
|
31
|
+
def get_response(model_name, messages, infer_paras):
|
32
|
+
responses = []
|
33
|
+
infer_times = infer_paras.get("infer_times", 1)
|
34
|
+
for _ in range(infer_times):
|
35
|
+
# 使用OpenAI API进行推理
|
36
|
+
response = client.chat.completions.create(model=model_name, messages=messages, **infer_paras)
|
37
|
+
text = response.choices[0].message.content
|
38
|
+
responses.append({"text": text})
|
39
|
+
return responses
|
40
|
+
|
41
|
+
with concurrent.futures.ThreadPoolExecutor(16) as executor:
|
42
|
+
futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
|
43
|
+
results = [future.result() for future in concurrent.futures.as_completed(futures)]
|
44
|
+
|
45
|
+
return results
|
46
|
+
|
47
|
+
|
48
|
+
def common_vllm_infer_func(model_path, infer_data: list, infer_paras: dict):
|
49
|
+
"""
|
50
|
+
infer_data: list of messages/prompt
|
51
|
+
"""
|
52
|
+
messages = parse_infer_data(infer_data)
|
53
|
+
from vllm import LLM, SamplingParams
|
54
|
+
|
55
|
+
temperature = infer_paras.get("temperature", 0.7)
|
56
|
+
infer_times = infer_paras.get("infer_times", 1)
|
57
|
+
vllm_card_num = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
|
58
|
+
|
59
|
+
llm = LLM(model=model_path, tensor_parallel_size=vllm_card_num, trust_remote_code=True, gpu_memory_utilization=0.85)
|
60
|
+
sampling_params = SamplingParams(
|
61
|
+
temperature=temperature,
|
62
|
+
n=infer_times,
|
63
|
+
max_tokens=8192,
|
64
|
+
# qwen3非思考模式推荐参数
|
65
|
+
# **infer_paras.get(template_name, {}),
|
66
|
+
# qwen3思考模式推荐参数
|
67
|
+
)
|
68
|
+
conversation = messages
|
69
|
+
outputs = llm.chat(conversation, sampling_params=sampling_params, use_tqdm=True)
|
70
|
+
return_texts = []
|
71
|
+
for idx, output in tqdm(enumerate(outputs)):
|
72
|
+
result = [{"text": i.text} for i in output.outputs]
|
73
|
+
return_texts.append(result)
|
74
|
+
return return_texts
|
@@ -0,0 +1,13 @@
|
|
1
|
+
def estimate_cost(input_token_num, output_token_num, example_num=1, input_price=1, output_price=4):
|
2
|
+
"""
|
3
|
+
估算成本
|
4
|
+
:param input_token_num: 输入token数量
|
5
|
+
:param output_token_num: 输出token数量
|
6
|
+
:param example_num: 示例数量
|
7
|
+
:param input_price: 输入token单价 / 1M
|
8
|
+
:param output_price: 输出token单价 / 1M
|
9
|
+
:return: 成本
|
10
|
+
"""
|
11
|
+
price = (input_token_num * input_price + output_token_num * output_price) * example_num / 1000000
|
12
|
+
print(f"Estimated cost: {price:.2f} 元")
|
13
|
+
return price
|
@@ -2,9 +2,11 @@
|
|
2
2
|
import codecs
|
3
3
|
import os
|
4
4
|
import random
|
5
|
+
import itertools
|
5
6
|
|
6
7
|
from .io.dir import j_mkdir
|
7
8
|
from .io.file import readtxt_list_all_strip, writetxt_w_list, save_to_csv
|
9
|
+
|
8
10
|
# import numpy as np
|
9
11
|
# import seaborn as sns
|
10
12
|
# import torch
|
@@ -17,8 +19,44 @@ from .io.file import readtxt_list_all_strip, writetxt_w_list, save_to_csv
|
|
17
19
|
from .utils.package import *
|
18
20
|
|
19
21
|
|
22
|
+
def estimate_pass_at_k(num_samples: list, num_correct: list, k):
|
23
|
+
"""
|
24
|
+
copy from https://huggingface.co/spaces/evaluate-metric/code_eval/blob/main/code_eval.py
|
25
|
+
num_samples: list
|
26
|
+
Note: if num sample < k, acc = 1, it's incomprehensibly
|
27
|
+
"""
|
28
|
+
"""Estimates pass@k of each problem and returns them in an array."""
|
29
|
+
|
30
|
+
def estimator(n: int, c: int, k: int) -> float:
|
31
|
+
"""Calculates 1 - comb(n - c, k) / comb(n, k)."""
|
32
|
+
if n - c < k:
|
33
|
+
return 1.0
|
34
|
+
return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
|
35
|
+
|
36
|
+
if isinstance(num_samples, int):
|
37
|
+
num_samples_it = itertools.repeat(num_samples, len(num_correct))
|
38
|
+
else:
|
39
|
+
assert len(num_samples) == len(num_correct)
|
40
|
+
num_samples_it = iter(num_samples)
|
41
|
+
|
42
|
+
return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
|
43
|
+
|
44
|
+
|
45
|
+
def estimate_pass_at_k_fixed(num_samples: list, num_correct: list, k):
|
46
|
+
"""
|
47
|
+
优化了num_samples小于 k的情况
|
48
|
+
"""
|
49
|
+
num_samples = [k if i < k else i for i in num_samples]
|
50
|
+
return estimate_pass_at_k(num_samples, num_correct, k)
|
51
|
+
|
52
|
+
|
53
|
+
def estimate_pass_at_k_return_num(num_samples: list, num_correct: list, k):
|
54
|
+
"""直接返回求完平均的"""
|
55
|
+
return round(estimate_pass_at_k(num_samples, num_correct, k).mean() * 100, 2)
|
56
|
+
|
57
|
+
|
20
58
|
def calc_llm_train_activation_memory(
|
21
|
-
|
59
|
+
model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
|
22
60
|
):
|
23
61
|
"""
|
24
62
|
return bytes
|
@@ -32,18 +70,19 @@ def calc_llm_train_activation_memory(
|
|
32
70
|
# FFN
|
33
71
|
# Layer Norm
|
34
72
|
r1 = (
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
73
|
+
sequence_length
|
74
|
+
* batch_size
|
75
|
+
* hidden_dim
|
76
|
+
* lay_number
|
77
|
+
* (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
|
40
78
|
)
|
41
79
|
# reference2
|
42
80
|
r2 = (
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
81
|
+
lay_number
|
82
|
+
* (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
|
83
|
+
* sequence_length
|
84
|
+
* batch_size
|
85
|
+
/ gpu_num
|
47
86
|
)
|
48
87
|
print(r1)
|
49
88
|
print(r2)
|
@@ -78,9 +117,7 @@ class DataStructure:
|
|
78
117
|
"source": "baidu",
|
79
118
|
}
|
80
119
|
ner_input_example = "这句话一共有两个实体分别为大象和老鼠。"
|
81
|
-
ner_label_example = (
|
82
|
-
list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
|
83
|
-
)
|
120
|
+
ner_label_example = list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
|
84
121
|
|
85
122
|
|
86
123
|
def text_jaccard(ipt1, ipt2, ipt_level="char", sim_level="char"):
|
@@ -134,7 +171,7 @@ class STEM(object):
|
|
134
171
|
if each_srl:
|
135
172
|
args = []
|
136
173
|
for arg in each_srl:
|
137
|
-
args.extend(seg[arg[1]: arg[2] + 1])
|
174
|
+
args.extend(seg[arg[1] : arg[2] + 1])
|
138
175
|
# 添加上谓词
|
139
176
|
args.insert(each_srl[0][2] - each_srl[0][1] + 1, seg[wdx])
|
140
177
|
events.append(args)
|
@@ -173,7 +210,7 @@ def subject_object_labeling(spo_list, text):
|
|
173
210
|
q_list_length = len(q_list)
|
174
211
|
k_list_length = len(k_list)
|
175
212
|
for idx in range(k_list_length - q_list_length + 1):
|
176
|
-
t = [q == k for q, k in zip(q_list, k_list[idx: idx + q_list_length])]
|
213
|
+
t = [q == k for q, k in zip(q_list, k_list[idx : idx + q_list_length])]
|
177
214
|
# print(idx, t)
|
178
215
|
if all(t):
|
179
216
|
# print(idx)
|
@@ -186,9 +223,7 @@ def subject_object_labeling(spo_list, text):
|
|
186
223
|
if len(spo) == 2:
|
187
224
|
labeling_list[idx_start + 1] = "I-" + spo_type
|
188
225
|
elif len(spo) >= 3:
|
189
|
-
labeling_list[idx_start + 1: idx_start + len(spo)] = ["I-" + spo_type] * (
|
190
|
-
len(spo) - 1
|
191
|
-
)
|
226
|
+
labeling_list[idx_start + 1 : idx_start + len(spo)] = ["I-" + spo_type] * (len(spo) - 1)
|
192
227
|
else:
|
193
228
|
pass
|
194
229
|
|
@@ -197,7 +232,7 @@ def subject_object_labeling(spo_list, text):
|
|
197
232
|
# count = 0
|
198
233
|
for predicate, spo_list_form in spo_predicate_dict.items():
|
199
234
|
if predicate in text:
|
200
|
-
for
|
235
|
+
for spo_subject, spo_object in spo_list_form:
|
201
236
|
# if predicate not in spo_subject and predicate not in spo_object:
|
202
237
|
_labeling_type(spo_subject, "SUB")
|
203
238
|
_labeling_type(spo_object, "OBJ")
|
@@ -219,10 +254,7 @@ def label(text, labels):
|
|
219
254
|
:return:
|
220
255
|
"""
|
221
256
|
train_sequence = "\n".join(
|
222
|
-
[
|
223
|
-
"\t".join(i) if i[0] != " " else "[null]\t{}".format(i[1])
|
224
|
-
for i in zip(list(text), labels)
|
225
|
-
]
|
257
|
+
["\t".join(i) if i[0] != " " else "[null]\t{}".format(i[1]) for i in zip(list(text), labels)]
|
226
258
|
)
|
227
259
|
return train_sequence
|
228
260
|
|
@@ -238,16 +270,12 @@ def convert_crf_format_10_fold(corpus, objdir_path):
|
|
238
270
|
split_position = int(len(corpus) / 10)
|
239
271
|
for k in range(0, 10):
|
240
272
|
if k == 9:
|
241
|
-
dev_set = corpus[k * split_position:]
|
273
|
+
dev_set = corpus[k * split_position :]
|
242
274
|
train_set = corpus[: k * split_position]
|
243
275
|
else:
|
244
|
-
dev_set = corpus[k * split_position: (k + 1) * split_position]
|
245
|
-
train_set = (
|
246
|
-
|
247
|
-
)
|
248
|
-
writetxt_w_list(
|
249
|
-
train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1))
|
250
|
-
)
|
276
|
+
dev_set = corpus[k * split_position : (k + 1) * split_position]
|
277
|
+
train_set = corpus[: k * split_position] + corpus[(k + 1) * split_position :]
|
278
|
+
writetxt_w_list(train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1)))
|
251
279
|
writetxt_w_list(dev_set, os.path.join(objdir_path, "test{}.txt".format(k + 1)))
|
252
280
|
writetxt_w_list(dev_set, os.path.join(objdir_path, "dev{}.txt".format(k + 1)))
|
253
281
|
|
@@ -283,31 +311,19 @@ def read_seq_res(path, labels):
|
|
283
311
|
return text, raw_label, predict_label
|
284
312
|
|
285
313
|
|
286
|
-
def kfold_txt(corpus, path, k=9, is_shuffle=True):
|
287
|
-
"""
|
288
|
-
k是10份中训练集占了几份
|
289
|
-
"""
|
290
|
-
j_mkdir(path)
|
291
|
-
if is_shuffle:
|
292
|
-
random.shuffle(corpus)
|
293
|
-
split_position = int(len(corpus) / 10)
|
294
|
-
train_set, dev_set = corpus[: k * split_position], corpus[k * split_position:]
|
295
|
-
writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
|
296
|
-
writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
|
297
|
-
writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
|
298
|
-
|
299
|
-
|
300
314
|
def sample():
|
301
315
|
import pandas as pd
|
302
316
|
from sklearn.model_selection import StratifiedShuffleSplit
|
303
317
|
|
304
318
|
# 假设 df 是你的 DataFrame
|
305
319
|
|
306
|
-
df = pd.DataFrame(
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
320
|
+
df = pd.DataFrame(
|
321
|
+
{
|
322
|
+
"count_line": [i for i in range(100)],
|
323
|
+
"x": [i for i in range(100)],
|
324
|
+
"y": [i // 10 for i in range(100)],
|
325
|
+
}
|
326
|
+
)
|
311
327
|
print(df)
|
312
328
|
# count_line 是用于分层抽样的字段
|
313
329
|
|
@@ -315,7 +331,7 @@ def sample():
|
|
315
331
|
split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
|
316
332
|
|
317
333
|
# 获取训练集和测试集的索引
|
318
|
-
train_index, test_index = next(split.split(df, df[
|
334
|
+
train_index, test_index = next(split.split(df, df["y"]))
|
319
335
|
|
320
336
|
# 根据索引划分训练集和测试集
|
321
337
|
train_df = df.loc[train_index]
|
@@ -326,6 +342,27 @@ def sample():
|
|
326
342
|
print("测试集行数:", len(test_df))
|
327
343
|
|
328
344
|
|
345
|
+
def kfold_txt(corpus, path, k=9, is_shuffle=True):
|
346
|
+
"""
|
347
|
+
k是10份中训练集占了几份
|
348
|
+
"""
|
349
|
+
j_mkdir(path)
|
350
|
+
if is_shuffle:
|
351
|
+
random.shuffle(corpus)
|
352
|
+
split_position = int(len(corpus) / 10)
|
353
|
+
train_set, dev_set = corpus[: k * split_position], corpus[k * split_position :]
|
354
|
+
writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
|
355
|
+
writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
|
356
|
+
writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
|
357
|
+
|
358
|
+
|
359
|
+
def kfold_list(list_data):
|
360
|
+
"""
|
361
|
+
sklearn.model_selection.train_test_split
|
362
|
+
"""
|
363
|
+
pass
|
364
|
+
|
365
|
+
|
329
366
|
def kfold_df(df, save_dir=None):
|
330
367
|
"""
|
331
368
|
划分train test val集, 写为windows可读的csv。
|
@@ -338,9 +375,7 @@ def kfold_df(df, save_dir=None):
|
|
338
375
|
|
339
376
|
train_idx, test_and_val_idx = KFold(n_splits=8, shuffle=True).split(df).__next__()
|
340
377
|
df_test_and_val = df.iloc[test_and_val_idx]
|
341
|
-
test_idx, val_idx = (
|
342
|
-
KFold(n_splits=2, shuffle=True).split(df_test_and_val).__next__()
|
343
|
-
)
|
378
|
+
test_idx, val_idx = KFold(n_splits=2, shuffle=True).split(df_test_and_val).__next__()
|
344
379
|
df_train = df.iloc[train_idx]
|
345
380
|
df_val = df.iloc[val_idx]
|
346
381
|
df_test = df.iloc[test_idx]
|
@@ -417,7 +452,7 @@ def split_sentence(sentence, language="chinese", cross_line=True):
|
|
417
452
|
for idx, char in enumerate(sentence):
|
418
453
|
if idx == len(sentence) - 1:
|
419
454
|
if char in split_signs:
|
420
|
-
sentences.append(sentence[start_idx: idx + 1].strip())
|
455
|
+
sentences.append(sentence[start_idx : idx + 1].strip())
|
421
456
|
start_idx = idx + 1
|
422
457
|
else:
|
423
458
|
sentences.append(sentence[start_idx:].strip())
|
@@ -427,10 +462,10 @@ def split_sentence(sentence, language="chinese", cross_line=True):
|
|
427
462
|
if idx < len(sentence) - 2:
|
428
463
|
# 处理。”。
|
429
464
|
if sentence[idx + 2] not in split_signs:
|
430
|
-
sentences.append(sentence[start_idx: idx + 2].strip())
|
465
|
+
sentences.append(sentence[start_idx : idx + 2].strip())
|
431
466
|
start_idx = idx + 2
|
432
467
|
elif sentence[idx + 1] not in split_signs:
|
433
|
-
sentences.append(sentence[start_idx: idx + 1].strip())
|
468
|
+
sentences.append(sentence[start_idx : idx + 1].strip())
|
434
469
|
start_idx = idx + 1
|
435
470
|
return sentences
|
436
471
|
|
@@ -506,6 +541,6 @@ if __name__ == "__main__":
|
|
506
541
|
hidden_dim=4096,
|
507
542
|
lay_number=28,
|
508
543
|
attention_heads_num=32,
|
509
|
-
gpu_num=1
|
544
|
+
gpu_num=1,
|
510
545
|
)
|
511
546
|
print(res, "G")
|
@@ -30,6 +30,21 @@ ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
|
|
30
30
|
OTHER_PUNCTUATION = list('!@#$%^&*')
|
31
31
|
|
32
32
|
|
33
|
+
def setup_logging(log_file):
|
34
|
+
"""
|
35
|
+
Set up logging configuration.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
log_file (str): Path to the log file.
|
39
|
+
"""
|
40
|
+
logging.basicConfig(
|
41
|
+
filename=log_file,
|
42
|
+
level=logging.INFO,
|
43
|
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
44
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
45
|
+
)
|
46
|
+
|
47
|
+
|
33
48
|
def get_diff_parts(str1, str2):
|
34
49
|
# 创建一个 SequenceMatcher 对象
|
35
50
|
matcher = difflib.SequenceMatcher(None, str1, str2)
|
@@ -154,8 +169,11 @@ def jprint(obj, depth=0):
|
|
154
169
|
print(obj)
|
155
170
|
|
156
171
|
|
157
|
-
def print_split(sign="=", num=20):
|
158
|
-
|
172
|
+
def print_split(sign="=", num=20, char: str = None):
|
173
|
+
if char:
|
174
|
+
print(sign * num // 2, char, sign * num // 2)
|
175
|
+
else:
|
176
|
+
print(sign * num)
|
159
177
|
|
160
178
|
|
161
179
|
def seed_everything():
|
@@ -361,10 +379,12 @@ def unsqueeze_list(flatten_list, each_element_len):
|
|
361
379
|
range(len(flatten_list) // each_element_len)]
|
362
380
|
return two_dim_list
|
363
381
|
|
382
|
+
|
364
383
|
def split_list(input_list, chunk_size):
|
365
384
|
# 使用列表推导式将列表分割成二维数组
|
366
385
|
return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
|
367
386
|
|
387
|
+
|
368
388
|
def auto_close():
|
369
389
|
"""
|
370
390
|
针对企业微信15分钟会显示离开的机制,假装自己还在上班
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: nlpertools
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.10
|
4
4
|
Summary: A small package about small basic IO operation when coding
|
5
5
|
Home-page: https://github.com/lvzii/nlpertools
|
6
6
|
Author: youshuJi
|
@@ -15,8 +15,12 @@ License-File: LICENSE
|
|
15
15
|
Requires-Dist: numpy
|
16
16
|
Requires-Dist: pandas
|
17
17
|
Requires-Dist: psutil
|
18
|
+
Requires-Dist: openai
|
18
19
|
Provides-Extra: torch
|
19
20
|
Requires-Dist: torch; extra == "torch"
|
21
|
+
Dynamic: license-file
|
22
|
+
Dynamic: provides-extra
|
23
|
+
Dynamic: requires-dist
|
20
24
|
|
21
25
|
<div align="center">
|
22
26
|
<h4 align="center">
|
@@ -35,7 +39,7 @@ Requires-Dist: torch; extra == "torch"
|
|
35
39
|
|
36
40
|
它解决了什么问题:
|
37
41
|
|
38
|
-
- 很多函数是记不住的,
|
42
|
+
- 很多函数是记不住的, 每次写都要~~搜~~问大模型 ,例如pandas排序
|
39
43
|
- 刷题的时候,树结构的题目很难调试
|
40
44
|
|
41
45
|
|
@@ -48,6 +52,23 @@ nlpertools
|
|
48
52
|
|
49
53
|
```
|
50
54
|
|
55
|
+
# 最常用/喜欢的功能(使用示例)
|
56
|
+
```python
|
57
|
+
# 读txt, json文件
|
58
|
+
import nlpertools
|
59
|
+
|
60
|
+
txt_data = nlpertools.readtxt_list_all_strip('res.txt')
|
61
|
+
json_data = nlpertools.load_from_json('res.json')
|
62
|
+
```
|
63
|
+
|
64
|
+
```bash
|
65
|
+
## git, 连接github不稳定的时候非常有用
|
66
|
+
ncli git pull
|
67
|
+
|
68
|
+
# 生成pypi双因素认证的实时密钥(需要提供key)
|
69
|
+
ncli --get_2fa --get_2fa_key your_key
|
70
|
+
```
|
71
|
+
|
51
72
|
# 安装
|
52
73
|
|
53
74
|
Install the latest release version
|
@@ -99,30 +120,7 @@ https://nlpertools.readthedocs.io/en/latest/
|
|
99
120
|
|
100
121
|
一些可能需要配置才能用的函数,写上示例
|
101
122
|
|
102
|
-
## 使用示例
|
103
|
-
|
104
|
-
```python
|
105
|
-
import nlpertools
|
106
|
-
|
107
|
-
a = nlpertools.readtxt_list_all_strip('res.txt')
|
108
|
-
# 或
|
109
|
-
b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
|
110
|
-
```
|
111
123
|
|
112
|
-
```bash
|
113
|
-
# 生成pypi双因素认证的实时密钥(需要提供key)
|
114
|
-
python -m nlpertools.get_2fa your_key
|
115
|
-
|
116
|
-
## git
|
117
|
-
python nlpertools.cli --git_push
|
118
|
-
python nlpertools.cli --git_pull
|
119
|
-
|
120
|
-
# 以下功能被nvitop替代,不推荐使用
|
121
|
-
## 监控gpu显存
|
122
|
-
python -m nlpertools.monitor.gpu
|
123
|
-
## 监控cpu
|
124
|
-
python -m nlpertools.monitor.memory
|
125
|
-
```
|
126
124
|
|
127
125
|
## 一些常用项目
|
128
126
|
|
@@ -130,3 +128,7 @@ nvitop
|
|
130
128
|
|
131
129
|
ydata-profiling
|
132
130
|
|
131
|
+
## 贡献
|
132
|
+
|
133
|
+
https://github.com/bigscience-workshop/data-preparation
|
134
|
+
|
@@ -41,6 +41,10 @@ src/nlpertools/draw/math_func.py
|
|
41
41
|
src/nlpertools/io/__init__.py
|
42
42
|
src/nlpertools/io/dir.py
|
43
43
|
src/nlpertools/io/file.py
|
44
|
+
src/nlpertools/llm/__init__.py
|
45
|
+
src/nlpertools/llm/call_llm_once.py
|
46
|
+
src/nlpertools/llm/infer.py
|
47
|
+
src/nlpertools/llm/price.py
|
44
48
|
src/nlpertools/monitor/__init__.py
|
45
49
|
src/nlpertools/monitor/gpu.py
|
46
50
|
src/nlpertools/monitor/memory.py
|
@@ -1,87 +0,0 @@
|
|
1
|
-
import argparse
|
2
|
-
import os
|
3
|
-
import uuid
|
4
|
-
import sys
|
5
|
-
|
6
|
-
import pyotp
|
7
|
-
|
8
|
-
"""
|
9
|
-
如何Debug cli.py
|
10
|
-
"""
|
11
|
-
|
12
|
-
|
13
|
-
def git_push():
|
14
|
-
"""
|
15
|
-
针对国内提交github经常失败,自动提交
|
16
|
-
"""
|
17
|
-
num = -1
|
18
|
-
while 1:
|
19
|
-
num += 1
|
20
|
-
print("retry num: {}".format(num))
|
21
|
-
info = os.system("git push --set-upstream origin main")
|
22
|
-
print(str(info))
|
23
|
-
if not str(info).startswith("fatal"):
|
24
|
-
print("scucess")
|
25
|
-
break
|
26
|
-
|
27
|
-
|
28
|
-
def git_pull():
|
29
|
-
"""
|
30
|
-
针对国内提交github经常失败,自动提交
|
31
|
-
"""
|
32
|
-
num = -1
|
33
|
-
while 1:
|
34
|
-
num += 1
|
35
|
-
print("retry num: {}".format(num))
|
36
|
-
info = os.system("git pull")
|
37
|
-
print(str(info))
|
38
|
-
if not str(info).startswith("fatal") and not str(info).startswith("error"):
|
39
|
-
print("scucess")
|
40
|
-
break
|
41
|
-
|
42
|
-
|
43
|
-
def get_mac_address():
|
44
|
-
mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
|
45
|
-
mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
|
46
|
-
print("mac address 不一定准确")
|
47
|
-
print(mac_address)
|
48
|
-
return mac_address
|
49
|
-
|
50
|
-
|
51
|
-
def get_2af_value(key):
|
52
|
-
"""
|
53
|
-
key应该是7位的
|
54
|
-
"""
|
55
|
-
print(key)
|
56
|
-
totp = pyotp.TOTP(key)
|
57
|
-
print(totp.now())
|
58
|
-
|
59
|
-
|
60
|
-
def main():
|
61
|
-
parser = argparse.ArgumentParser(description="CLI tool for git operations and getting MAC address.")
|
62
|
-
parser.add_argument('--gitpush', action='store_true', help='Perform git push operation.')
|
63
|
-
parser.add_argument('--gitpull', action='store_true', help='Perform git push operation.')
|
64
|
-
parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
|
65
|
-
|
66
|
-
parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
|
67
|
-
parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
|
68
|
-
|
69
|
-
args = parser.parse_args()
|
70
|
-
|
71
|
-
if args.gitpush:
|
72
|
-
git_push()
|
73
|
-
elif args.gitpull:
|
74
|
-
git_pull()
|
75
|
-
elif args.mac_address:
|
76
|
-
get_mac_address()
|
77
|
-
elif args.get_2fa:
|
78
|
-
if args.get_2fa_key:
|
79
|
-
get_2af_value(args.get_2fa_key)
|
80
|
-
else:
|
81
|
-
print("Please provide a key as an argument.")
|
82
|
-
else:
|
83
|
-
print("No operation specified. Use --gitpush or --get_mac_address.")
|
84
|
-
|
85
|
-
|
86
|
-
if __name__ == '__main__':
|
87
|
-
main()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nlpertools-1.0.8/src/nlpertools/monitor → nlpertools-1.0.10/src/nlpertools/llm}/__init__.py
RENAMED
File without changes
|
{nlpertools-1.0.8/src/nlpertools/template → nlpertools-1.0.10/src/nlpertools/monitor}/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|