nlpertools 1.0.6.dev0__py3-none-any.whl → 1.0.9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
nlpertools/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
3
  # @Author : youshu.Ji
4
4
  from .algo.kmp import *
5
5
  from .data_structure.base_structure import *
6
+ from .draw import *
6
7
  from .dataprocess import *
7
8
  from .io.dir import *
8
9
  from .io.file import *
@@ -15,10 +16,8 @@ from .reminder import *
15
16
  from .utils_for_nlpertools import *
16
17
  from .wrapper import *
17
18
  from .monitor import *
19
+ from .cli import *
18
20
 
19
- import os
20
21
 
21
22
 
22
- DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__),"default_db_config.yml")
23
-
24
- __version__ = '1.0.5'
23
+ __version__ = '1.0.9'
nlpertools/cli.py ADDED
@@ -0,0 +1,143 @@
1
+ import argparse
2
+ import os
3
+ import uuid
4
+ import sys
5
+
6
+ """
7
+ 如何Debug cli.py
8
+ """
9
+
10
+
11
+ def git_push():
12
+ """
13
+ 针对国内提交github经常失败,自动提交
14
+ """
15
+ num = -1
16
+ while 1:
17
+ num += 1
18
+ print("retry num: {}".format(num))
19
+ info = os.system("git push --set-upstream origin main")
20
+ print(str(info))
21
+ if not str(info).startswith("fatal"):
22
+ print("scucess")
23
+ break
24
+
25
+
26
+ def git_pull():
27
+ """
28
+ 针对国内提交github经常失败,自动提交
29
+ """
30
+ num = -1
31
+ while 1:
32
+ num += 1
33
+ print("retry num: {}".format(num))
34
+ info = os.system("git pull")
35
+ print(str(info))
36
+ if not str(info).startswith("fatal") and not str(info).startswith("error"):
37
+ print("scucess")
38
+ break
39
+
40
+
41
+ def get_mac_address():
42
+ mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
43
+ mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
44
+ print("mac address 不一定准确")
45
+ print(mac_address)
46
+ return mac_address
47
+
48
+
49
+ def get_2af_value(key):
50
+ import pyotp
51
+ """
52
+ key应该是7位的
53
+ """
54
+ print(key)
55
+ totp = pyotp.TOTP(key)
56
+ print(totp.now())
57
+
58
+
59
+ def start_gpu_usage_notify_server():
60
+ from flask import Flask
61
+
62
+ app = Flask(__name__)
63
+
64
+ @app.route("/notify", methods=["GET"])
65
+ def notify():
66
+ # 这里可以根据需要动态生成通知内容
67
+ usage = os.popen("nvidia-smi --query-gpu=memory.used --format=csv").read().split("\n")[1:]
68
+ res = 0
69
+ for edx, each in enumerate(usage):
70
+ if each.startswith("0"):
71
+ res += 1
72
+ print(res)
73
+ return str(res), 200
74
+
75
+ app.run(host="0.0.0.0", port=5000)
76
+
77
+
78
+ def start_gpu_usage_notify_client():
79
+ import requests
80
+ from plyer import notification
81
+ import time
82
+
83
+ SERVER_URL = 'http://127.0.0.1:5000/notify' # 服务器的 API 地址
84
+
85
+ def notify(text):
86
+ # 使用 plyer 发送通知
87
+ notification.notify(
88
+ title='远程通知',
89
+ message=text,
90
+ timeout=10 # 10秒的通知显示时间
91
+ )
92
+
93
+ """定时轮询服务器获取通知"""
94
+ while True:
95
+ try:
96
+ response = requests.get(SERVER_URL)
97
+ if response.status_code == 200:
98
+ num = int(response.text)
99
+ if num > 0:
100
+ notify(f"服务器有{num}张卡")
101
+ print(f"服务器有{num}张卡")
102
+ else:
103
+ print("服务器没有新通知")
104
+ except Exception as e:
105
+ print(f"与服务器连接失败: {e}")
106
+
107
+ time.sleep(1)
108
+
109
+
110
+ def main():
111
+ parser = argparse.ArgumentParser(description="CLI tool for git operations and getting MAC address.")
112
+ parser.add_argument('--gitpush', action='store_true', help='Perform git push operation.')
113
+ parser.add_argument('--gitpull', action='store_true', help='Perform git pull operation.')
114
+ parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
115
+
116
+ parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
117
+ parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
118
+ parser.add_argument('--monitor_gpu_cli', action='store_true', help='Get the 2fa value.')
119
+ parser.add_argument('--monitor_gpu_ser', action='store_true', help='Get the 2fa value.')
120
+
121
+ args = parser.parse_args()
122
+
123
+ if args.gitpush:
124
+ git_push()
125
+ elif args.gitpull:
126
+ git_pull()
127
+ elif args.mac_address:
128
+ get_mac_address()
129
+ elif args.monitor_gpu_cli:
130
+ start_gpu_usage_notify_client()
131
+ elif args.monitor_gpu_ser:
132
+ start_gpu_usage_notify_server()
133
+ elif args.get_2fa:
134
+ if args.get_2fa_key:
135
+ get_2af_value(args.get_2fa_key)
136
+ else:
137
+ print("Please provide a key as an argument.")
138
+ else:
139
+ print("No operation specified.")
140
+
141
+
142
+ if __name__ == '__main__':
143
+ main()
nlpertools/data_client.py CHANGED
@@ -1,3 +1,4 @@
1
+ #encoding=utf-8
1
2
  # !/usr/bin/python3.8
2
3
  # -*- coding: utf-8 -*-
3
4
  # @Author : youshu.Ji
@@ -5,9 +6,11 @@ import datetime
5
6
  import json
6
7
  import logging
7
8
 
8
- from . import DB_CONFIG_FILE
9
9
  from .io.file import read_yaml
10
10
  from .utils.package import *
11
+ import os
12
+
13
+ DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_db_config.yml")
11
14
 
12
15
  # import aioredis
13
16
  # import happybase
@@ -28,21 +31,24 @@ class Neo4jOps(object):
28
31
  NEO4J_TIMEOUT = 0.3
29
32
  pass
30
33
 
34
+
31
35
  class SqliteOps(object):
32
- import sqlite3
33
- database_path = r'xx.db'
34
- conn = sqlite3.connect(database_path)
35
- c = conn.cursor()
36
- sql = "select name from sqlite_master where type='table' order by name"
37
- c.execute(sql)
38
- print(c.fetchall())
39
- sql = "select * from typecho_contents"
40
- c.execute(sql)
41
- res = c.fetchall()
42
- print(res[3])
43
-
44
- conn.commit()
45
- conn.close()
36
+ pass
37
+ # import sqlite3
38
+ # database_path = r'xx.db'
39
+ # conn = sqlite3.connect(database_path)
40
+ # c = conn.cursor()
41
+ # sql = "select name from sqlite_master where type='table' order by name"
42
+ # c.execute(sql)
43
+ # print(c.fetchall())
44
+ # sql = "select * from typecho_contents"
45
+ # c.execute(sql)
46
+ # res = c.fetchall()
47
+ # print(res[3])
48
+ #
49
+ # conn.commit()
50
+ # conn.close()
51
+
46
52
 
47
53
  class MysqlOps(object):
48
54
  import pandas as pd
@@ -116,6 +122,41 @@ class EsOps(object):
116
122
  print(f"批量保存数据: {_res}")
117
123
 
118
124
 
125
+ class MongoDB_BETA:
126
+ def __init__(self, host='localhost', port=27017, db_name=None, collection_name=None):
127
+ self.host = host
128
+ self.port = port
129
+ self.db_name = db_name
130
+ self.collection_name = collection_name
131
+ self.client = None
132
+ self.db = None
133
+ self.collection = None
134
+
135
+ def connect(self):
136
+ self.client = MongoClient(self.host, self.port)
137
+ self.db = self.client[self.db_name]
138
+ self.collection = self.db[self.collection_name]
139
+
140
+ def close(self):
141
+ if self.client:
142
+ self.client.close()
143
+
144
+ def insert_data(self, data):
145
+ if isinstance(data, list):
146
+ self.collection.insert_many(data)
147
+ else:
148
+ self.collection.insert_one(data)
149
+
150
+ def check_data_exists(self, query):
151
+ """
152
+ 检查某个数据是否存在于数据库中
153
+ :param query: 查询条件
154
+ :return: 布尔值,表示数据是否存在
155
+ """
156
+ return self.collection.count_documents(query) > 0
157
+
158
+
159
+
119
160
  class MongoOps(object):
120
161
  from pymongo import MongoClient
121
162
  def __init__(self, config=global_db_config["mongo"]):
@@ -348,8 +389,6 @@ class KafkaOps(object):
348
389
  print(recv)
349
390
 
350
391
 
351
-
352
-
353
392
  class MilvusOps(object):
354
393
  def __init__(self, config=global_db_config.milvus):
355
394
  from pymilvus import connections, Collection
nlpertools/dataprocess.py CHANGED
@@ -55,9 +55,9 @@ class Pattern:
55
55
  # 中文人名
56
56
  chinese_name_pattern = "(?:[\u4e00-\u9fa5·]{2,3})"
57
57
  # 英文人名
58
- english_name_pattern = "(^[a-zA-Z][a-zA-Z\s]{0,20}[a-zA-Z]$)"
58
+ english_name_pattern = r"(^[a-zA-Z][a-zA-Z\s]{0,20}[a-zA-Z]$)"
59
59
  # 纯数字
60
- pure_num_pattern = "\d+"
60
+ pure_num_pattern = r"\d+"
61
61
  # xxxx图/表 之类的表述
62
62
  pic_table_descript_pattern = ".{1,15}图"
63
63
 
@@ -66,20 +66,20 @@ class Pattern:
66
66
  hlink_pattern = (
67
67
  r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]"
68
68
  )
69
- http_pattern = "(http|https):\/\/([\w.]+\/?)\S*/\S*"
69
+ http_pattern = r"(http|https):\/\/([\w.]+\/?)\S*/\S*"
70
70
  # 邮箱
71
- email_pattern = "[A-Za-z0-9\u4e00-\u9fa5]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+"
71
+ email_pattern = r"[A-Za-z0-9\u4e00-\u9fa5]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+"
72
72
  # html 可能过于严格了
73
- html_pattern = "<[\s\S]*?>"
73
+ html_pattern = r"<[\s\S]*?>"
74
74
  # 重复 “asdasdasdasd”
75
75
  repeat_pattern = "(.)\1+"
76
76
  # 日期
77
- day_time_pattern = "\d{1,4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)"
77
+ day_time_pattern = r"\d{1,4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)"
78
78
  # 小时
79
- hour_time_pattern = "(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d"
79
+ hour_time_pattern = r"(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d"
80
80
  # 股票
81
81
  stock_pattern = (
82
- "(s[hz]|S[HZ])(000[\d]{3}|002[\d]{3}|300[\d]{3}|600[\d]{3}|60[\d]{4})"
82
+ r"(s[hz]|S[HZ])(000[\d]{3}|002[\d]{3}|300[\d]{3}|600[\d]{3}|60[\d]{4})"
83
83
  )
84
84
 
85
85
  # 一般是需要替换的
@@ -91,7 +91,7 @@ class Pattern:
91
91
  # 微博视频等
92
92
  weibo_pattern = r"([\s]\w+(的微博视频)|#|【|】|转发微博)"
93
93
  # @
94
- at_pattern = "@\w+"
94
+ at_pattern = r"@\w+"
95
95
 
96
96
  # from https://github.com/bigscience-workshop/data-preparation pii
97
97
  year_patterns = [
@@ -116,7 +116,7 @@ class Pattern:
116
116
  ipv4_pattern = r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}'
117
117
  ipv6_pattern = r'(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])'
118
118
  ip_pattern = r"(?:^|[\b\s@?,!;:\'\")(.\p{Han}])(" + r"|".join(
119
- [ipv4_pattern, ipv6_pattern]) + ")(?:$|[\s@,?!;:\'\"(.\p{Han}])"
119
+ [ipv4_pattern, ipv6_pattern]) + r")(?:$|[\s@,?!;:\'\"(.\p{Han}])"
120
120
 
121
121
  # https://regex101.com/r/EpA5B7/1
122
122
  email_line_pattern = r'''
@@ -466,7 +466,7 @@ class TextProcess(object):
466
466
  p = re.compile(pattern, re.S)
467
467
  text = p.sub("", text)
468
468
 
469
- dr = re.compile("@\w+", re.S)
469
+ dr = re.compile(r"@\w+", re.S)
470
470
  text = dr.sub("", text)
471
471
 
472
472
  return text
@@ -527,7 +527,7 @@ class TextProcess(object):
527
527
  text = re.sub(pattern, replace, text)
528
528
  return text
529
529
 
530
- def calc_proportion_zh(self,text):
530
+ def calc_proportion_zh(self, text):
531
531
  text = text.strip()
532
532
  # 如果是中国英文的情况,并且英文有空格分开
533
533
  if " " in text:
@@ -538,6 +538,8 @@ class TextProcess(object):
538
538
  chinese_count += 1
539
539
  else:
540
540
  pass
541
+
542
+
541
543
  class CopyFunc():
542
544
  # from https://github.com/lemon234071/clean-dialog
543
545
  def is_chinese_char(cp):
@@ -597,6 +599,20 @@ def convert_basic2fullwidth(sentence):
597
599
  new_sentence += char
598
600
  return new_sentence
599
601
 
602
+
603
+ def clean_illegal_chars_for_excel(df):
604
+ # openpyxl 库写入 Excel 文件时,有一些非法字符,需要删除
605
+ # 定义一个函数来移除字符串中的非法字符
606
+ def remove_illegal_chars(s):
607
+ if isinstance(s, str):
608
+ # 移除 ASCII 码在非法范围内的字符
609
+ return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', s)
610
+ return s
611
+
612
+ # 应用清理函数到数据框的每个元素
613
+ return df.map(remove_illegal_chars)
614
+
615
+
600
616
  if __name__ == "__main__":
601
617
  pattern_for_filter = [
602
618
  Pattern.redundancy_space_pattern,
File without changes
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from ..utils.package import plt
5
+
6
+
7
+ def confused_matrix(confuse_matrix):
8
+ import seaborn as sns
9
+ sns.set()
10
+ f, ax = plt.subplots()
11
+ ticklabels = ["l1", "l2", "l31"]
12
+ sns.heatmap(confuse_matrix, annot=True, fmt=".3g", ax=ax, cmap='rainbow',
13
+ xticklabels=ticklabels, yticklabels=ticklabels) # 画热力图
14
+
15
+ ax.set_title('confusion matrix') # 标题
16
+ ax.set_xlabel('predict') # x轴
17
+ ax.set_ylabel('true') # y轴
18
+ plt.show()
19
+
20
+ f.savefig('tmp.jpg', bbox_inches='tight')
21
+
22
+
23
+ def plot_histogram(data, bin_size, max_bin):
24
+ """
25
+ 画直方图,超过1000的统一按1000算
26
+ :param data:
27
+ :param bin_size:
28
+ :return:
29
+ """
30
+ import matplotlib.pyplot as plt
31
+ import numpy as np
32
+ import pandas as pd
33
+ from matplotlib.ticker import MaxNLocator
34
+ # 将超过1000的值改为1000
35
+ def process_lengths(data):
36
+ return [length if length <= max_bin else max_bin + 3 for length in data]
37
+
38
+ # 前闭后开
39
+ # min_num, max_num = 0, 1000
40
+ # min_num, max_num = min(data), max(data)
41
+
42
+ plt.figure(figsize=(12, 8))
43
+ processed_data = process_lengths(data)
44
+ bins = np.arange(0, max_bin + 2 * bin_size, bin_size)
45
+ # 绘制直方图
46
+ n, new_bins, patches = plt.hist(processed_data, bins=bins, edgecolor='black', color='skyblue', alpha=0.7,
47
+ linewidth=0)
48
+
49
+ # 添加"∞"的标签
50
+ # bins会改变
51
+ plt.gca().set_xticks(bins)
52
+ plt.gca().set_xticklabels([str(i) for i in plt.xticks()[0][:-1]] + ["∞"])
53
+
54
+ mean_val = np.mean(data)
55
+ plt.axvline(mean_val, color='red', linestyle='dashed', linewidth=1)
56
+ plt.text(mean_val + bin_size / 10, max(n) * 0.9, f'Mean: {mean_val:.2f}', color='red')
57
+
58
+ # 添加标题和标签
59
+ plt.title('Module Line Number Distribution', fontsize=16, fontweight='bold')
60
+ plt.xlabel('module line number', fontsize=14)
61
+ plt.ylabel('frequency', fontsize=14)
62
+
63
+ plt.grid(True, linestyle='--', alpha=0.6)
64
+
65
+ plt.xticks(fontsize=12)
66
+ plt.yticks(fontsize=12)
67
+
68
+ # 在每个柱状图上显示数值
69
+ for i in range(len(patches)):
70
+ plt.text(patches[i].get_x() + patches[i].get_width() / 2, patches[i].get_height(),
71
+ str(int(n[i])), ha='center', va='bottom', fontsize=12)
72
+ plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))
73
+ # 显示图表
74
+ plt.show()
75
+
76
+
77
+ if __name__ == '__main__':
78
+ # 调整区间大小
79
+ bin_size = 50
80
+ # 示例模块长度数据
81
+ plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size, max_bin=1000)
@@ -0,0 +1,33 @@
1
+ # 数学函数
2
+ def draw_log():
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ from matplotlib.ticker import MultipleLocator, FormatStrFormatter
6
+
7
+ # 生成一些数据
8
+ x = np.linspace(0.1, 10, 100)
9
+ # 默认log指的时loge
10
+ y = np.log(x)
11
+
12
+ # 创建一个新的图形和轴
13
+ fig, ax = plt.subplots()
14
+
15
+ # 绘制log图像
16
+ ax.plot(x, y)
17
+
18
+ # 设置图像标题和轴标签
19
+ ax.set_title("Logarithmic Function")
20
+ ax.set_xlabel("x")
21
+ ax.set_ylabel("log(x)")
22
+ # 设置横坐标的刻度间隔为1
23
+ ax.xaxis.set_major_locator(MultipleLocator(1))
24
+
25
+ # 设置横坐标的刻度格式
26
+ ax.xaxis.set_major_formatter(FormatStrFormatter("%.1f"))
27
+ # 添加x=1的虚线
28
+ ax.axvline(x=1, linestyle="--", color="gray")
29
+ # 添加y=1的虚线
30
+ ax.axhline(y=0, linestyle="--", color="gray")
31
+
32
+ # 显示图像
33
+ plt.show()
nlpertools/get_2fa.py ADDED
File without changes
nlpertools/io/dir.py CHANGED
@@ -10,7 +10,30 @@ def j_mkdir(name):
10
10
  os.makedirs(name, exist_ok=True)
11
11
 
12
12
 
13
- def get_filename(path) -> str:
13
+ def j_walk(name, suffix=None):
14
+ paths = []
15
+ for root, dirs, files in os.walk(name):
16
+ for file in files:
17
+ path = os.path.join(root, file)
18
+ if not (suffix and not path.endswith(suffix)):
19
+ paths.append(path)
20
+ return paths
21
+
22
+
23
+ def windows_to_wsl_path(windows_path):
24
+ # 转换驱动器号
25
+ if windows_path[1:3] == ':\\':
26
+ drive_letter = windows_path[0].lower()
27
+ path = windows_path[2:].replace('\\', '/')
28
+ wsl_path = f'/mnt/{drive_letter}{path}'
29
+ else:
30
+ # 如果路径不是以驱动器号开头,则直接替换路径分隔符
31
+ wsl_path = windows_path.replace('\\', '/').replace("'", "\'")
32
+
33
+ return wsl_path
34
+
35
+
36
+ def get_filename(path, suffix=True) -> str:
14
37
  """
15
38
  返回路径最后的文件名
16
39
  :param path:
@@ -18,11 +41,20 @@ def get_filename(path) -> str:
18
41
  """
19
42
  # path = r'***/**/***.txt'
20
43
  filename = os.path.split(path)[-1]
44
+ if not suffix:
45
+ filename = filename.split('.')[0]
21
46
  return filename
22
47
 
23
48
 
24
- def j_listdir(dir_name, including_dir=True):
25
- # yield
49
+ def listdir(dir_name, including_dir=True):
50
+ filenames = os.listdir(dir_name)
51
+ if including_dir:
52
+ return [os.path.join(dir_name, filename) for filename in filenames]
53
+ else:
54
+ return list(filenames)
55
+
56
+
57
+ def listdir_yield(dir_name, including_dir=True):
26
58
  filenames = os.listdir(dir_name)
27
59
  for filename in filenames:
28
60
  if including_dir:
nlpertools/io/file.py CHANGED
@@ -5,7 +5,6 @@ import codecs
5
5
  import json
6
6
  import pickle
7
7
  import random
8
- import time
9
8
  from itertools import (takewhile, repeat)
10
9
  import pandas as pd
11
10
  # import omegaconf
@@ -15,10 +14,16 @@ from ..utils.package import *
15
14
  LARGE_FILE_THRESHOLD = 1e5
16
15
 
17
16
 
17
+ def safe_filename(filename: str) -> str:
18
+ for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|']:
19
+ filename = filename.replace(char, '_')
20
+ return filename
21
+
22
+
18
23
  def read_yaml(path, omega=False):
19
24
  if omega:
20
25
  return omegaconf.OmegaConf.load(path)
21
- return yaml.load(codecs.open(path), Loader=yaml.FullLoader)
26
+ return yaml.load(codecs.open(path, encoding='utf-8'), Loader=yaml.FullLoader)
22
27
 
23
28
 
24
29
  def _merge_file(filelist, save_filename, shuffle=False):
@@ -52,7 +57,7 @@ load_from_json
52
57
 
53
58
 
54
59
  # 读txt文件 一次全读完 返回list 去换行
55
- def readtxt_list_all_strip(path, encoding='utf-8'):
60
+ def readtxt_list_all_strip(path, encoding='utf-8') -> list:
56
61
  file_line_num = iter_count(path)
57
62
  lines = []
58
63
  with codecs.open(path, 'r', encoding) as r:
@@ -67,7 +72,7 @@ def readtxt_list_all_strip(path, encoding='utf-8'):
67
72
 
68
73
 
69
74
  # 读txt 一次读一行 最后返回list
70
- def readtxt_list_each(path):
75
+ def readtxt_list_each(path) -> list:
71
76
  lines = []
72
77
  with codecs.open(path, 'r', 'utf-8') as r:
73
78
  line = r.readline()
@@ -77,7 +82,7 @@ def readtxt_list_each(path):
77
82
  return lines
78
83
 
79
84
 
80
- def readtxt_list_each_strip(path):
85
+ def readtxt_list_each_strip(path) -> list:
81
86
  """
82
87
  yield方法
83
88
  """
@@ -89,14 +94,14 @@ def readtxt_list_each_strip(path):
89
94
 
90
95
 
91
96
  # 读txt文件 一次全读完 返回list
92
- def readtxt_list_all(path):
97
+ def readtxt_list_all(path) -> list:
93
98
  with codecs.open(path, 'r', 'utf-8') as r:
94
99
  lines = r.readlines()
95
100
  return lines
96
101
 
97
102
 
98
103
  # 读byte文件 读成一条string
99
- def readtxt_byte(path, encoding="utf-8"):
104
+ def readtxt_byte(path, encoding="utf-8") -> str:
100
105
  with codecs.open(path, 'rb') as r:
101
106
  lines = r.read()
102
107
  lines = lines.decode(encoding)
@@ -104,7 +109,7 @@ def readtxt_byte(path, encoding="utf-8"):
104
109
 
105
110
 
106
111
  # 读txt文件 读成一条string
107
- def readtxt_string(path, encoding="utf-8"):
112
+ def readtxt_string(path, encoding="utf-8") -> str:
108
113
  with codecs.open(path, 'r', encoding) as r:
109
114
  lines = r.read()
110
115
  return lines.replace('\r', '')
@@ -236,12 +241,12 @@ def load_from_jsonl(path):
236
241
  return corpus
237
242
 
238
243
 
239
- def pickle_save(data, path):
244
+ def save_pkl(data, path):
240
245
  with open(path, 'wb') as f:
241
246
  pickle.dump(data, f)
242
247
 
243
248
 
244
- def pickle_load(path):
249
+ def load_pkl(path):
245
250
  with open(path, 'rb') as f:
246
251
  data = pickle.load(f)
247
252
  return data
@@ -261,6 +266,7 @@ def save_to_mongo():
261
266
  """
262
267
  pass
263
268
 
269
+
264
270
  def load_from_mongo():
265
271
  pass
266
272
 
@@ -274,4 +280,4 @@ def unmerge_cells_df(df) -> pd.DataFrame:
274
280
  else:
275
281
  values.append(i)
276
282
  df[column] = values
277
- return df
283
+ return df
nlpertools/ml.py CHANGED
@@ -17,10 +17,31 @@ from .io.file import readtxt_list_all_strip, writetxt_w_list, save_to_csv
17
17
  from .utils.package import *
18
18
 
19
19
 
20
+ def estimate_pass_at_k(num_samples:list, num_correct:list, k):
21
+ """
22
+ copy from https://huggingface.co/spaces/evaluate-metric/code_eval/blob/main/code_eval.py
23
+ num_samples: list
24
+ """
25
+ """Estimates pass@k of each problem and returns them in an array."""
26
+
27
+ def estimator(n: int, c: int, k: int) -> float:
28
+ """Calculates 1 - comb(n - c, k) / comb(n, k)."""
29
+ if n - c < k:
30
+ return 1.0
31
+ return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
32
+
33
+ if isinstance(num_samples, int):
34
+ num_samples_it = itertools.repeat(num_samples, len(num_correct))
35
+ else:
36
+ assert len(num_samples) == len(num_correct)
37
+ num_samples_it = iter(num_samples)
38
+
39
+ return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
40
+
41
+
20
42
  def calc_llm_train_activation_memory(
21
- model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
43
+ model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
22
44
  ):
23
-
24
45
  """
25
46
  return bytes
26
47
 
@@ -33,18 +54,18 @@ def calc_llm_train_activation_memory(
33
54
  # FFN
34
55
  # Layer Norm
35
56
  r1 = (
36
- sequence_length
37
- * batch_size
38
- * hidden_dim
39
- * lay_number
40
- * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
57
+ sequence_length
58
+ * batch_size
59
+ * hidden_dim
60
+ * lay_number
61
+ * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
41
62
  )
42
63
  # reference2
43
64
  r2 = (
44
- lay_number*(2 * sequence_length * attention_heads_num + 16 * hidden_dim)
45
- * sequence_length
46
- * batch_size
47
- / gpu_num
65
+ lay_number * (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
66
+ * sequence_length
67
+ * batch_size
68
+ / gpu_num
48
69
  )
49
70
  print(r1)
50
71
  print(r2)
@@ -80,7 +101,7 @@ class DataStructure:
80
101
  }
81
102
  ner_input_example = "这句话一共有两个实体分别为大象和老鼠。"
82
103
  ner_label_example = (
83
- list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
104
+ list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
84
105
  )
85
106
 
86
107
 
@@ -135,7 +156,7 @@ class STEM(object):
135
156
  if each_srl:
136
157
  args = []
137
158
  for arg in each_srl:
138
- args.extend(seg[arg[1] : arg[2] + 1])
159
+ args.extend(seg[arg[1]: arg[2] + 1])
139
160
  # 添加上谓词
140
161
  args.insert(each_srl[0][2] - each_srl[0][1] + 1, seg[wdx])
141
162
  events.append(args)
@@ -174,7 +195,7 @@ def subject_object_labeling(spo_list, text):
174
195
  q_list_length = len(q_list)
175
196
  k_list_length = len(k_list)
176
197
  for idx in range(k_list_length - q_list_length + 1):
177
- t = [q == k for q, k in zip(q_list, k_list[idx : idx + q_list_length])]
198
+ t = [q == k for q, k in zip(q_list, k_list[idx: idx + q_list_length])]
178
199
  # print(idx, t)
179
200
  if all(t):
180
201
  # print(idx)
@@ -187,8 +208,8 @@ def subject_object_labeling(spo_list, text):
187
208
  if len(spo) == 2:
188
209
  labeling_list[idx_start + 1] = "I-" + spo_type
189
210
  elif len(spo) >= 3:
190
- labeling_list[idx_start + 1 : idx_start + len(spo)] = ["I-" + spo_type] * (
191
- len(spo) - 1
211
+ labeling_list[idx_start + 1: idx_start + len(spo)] = ["I-" + spo_type] * (
212
+ len(spo) - 1
192
213
  )
193
214
  else:
194
215
  pass
@@ -239,12 +260,12 @@ def convert_crf_format_10_fold(corpus, objdir_path):
239
260
  split_position = int(len(corpus) / 10)
240
261
  for k in range(0, 10):
241
262
  if k == 9:
242
- dev_set = corpus[k * split_position :]
263
+ dev_set = corpus[k * split_position:]
243
264
  train_set = corpus[: k * split_position]
244
265
  else:
245
- dev_set = corpus[k * split_position : (k + 1) * split_position]
266
+ dev_set = corpus[k * split_position: (k + 1) * split_position]
246
267
  train_set = (
247
- corpus[: k * split_position] + corpus[(k + 1) * split_position :]
268
+ corpus[: k * split_position] + corpus[(k + 1) * split_position:]
248
269
  )
249
270
  writetxt_w_list(
250
271
  train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1))
@@ -292,12 +313,41 @@ def kfold_txt(corpus, path, k=9, is_shuffle=True):
292
313
  if is_shuffle:
293
314
  random.shuffle(corpus)
294
315
  split_position = int(len(corpus) / 10)
295
- train_set, dev_set = corpus[: k * split_position], corpus[k * split_position :]
316
+ train_set, dev_set = corpus[: k * split_position], corpus[k * split_position:]
296
317
  writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
297
318
  writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
298
319
  writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
299
320
 
300
321
 
322
+ def sample():
323
+ import pandas as pd
324
+ from sklearn.model_selection import StratifiedShuffleSplit
325
+
326
+ # 假设 df 是你的 DataFrame
327
+
328
+ df = pd.DataFrame({
329
+ "count_line": [i for i in range(100)],
330
+ "x": [i for i in range(100)],
331
+ "y": [i // 10 for i in range(100)],
332
+ })
333
+ print(df)
334
+ # count_line 是用于分层抽样的字段
335
+
336
+ # 创建 StratifiedShuffleSplit 对象,设置测试集比例为 0.1
337
+ split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
338
+
339
+ # 获取训练集和测试集的索引
340
+ train_index, test_index = next(split.split(df, df['y']))
341
+
342
+ # 根据索引划分训练集和测试集
343
+ train_df = df.loc[train_index]
344
+ test_df = df.loc[test_index]
345
+
346
+ # 打印训练集和测试集的行数
347
+ print("训练集行数:", len(train_df))
348
+ print("测试集行数:", len(test_df))
349
+
350
+
301
351
  def kfold_df(df, save_dir=None):
302
352
  """
303
353
  划分train test val集, 写为windows可读的csv。
@@ -389,7 +439,7 @@ def split_sentence(sentence, language="chinese", cross_line=True):
389
439
  for idx, char in enumerate(sentence):
390
440
  if idx == len(sentence) - 1:
391
441
  if char in split_signs:
392
- sentences.append(sentence[start_idx : idx + 1].strip())
442
+ sentences.append(sentence[start_idx: idx + 1].strip())
393
443
  start_idx = idx + 1
394
444
  else:
395
445
  sentences.append(sentence[start_idx:].strip())
@@ -399,10 +449,10 @@ def split_sentence(sentence, language="chinese", cross_line=True):
399
449
  if idx < len(sentence) - 2:
400
450
  # 处理。”。
401
451
  if sentence[idx + 2] not in split_signs:
402
- sentences.append(sentence[start_idx : idx + 2].strip())
452
+ sentences.append(sentence[start_idx: idx + 2].strip())
403
453
  start_idx = idx + 2
404
454
  elif sentence[idx + 1] not in split_signs:
405
- sentences.append(sentence[start_idx : idx + 1].strip())
455
+ sentences.append(sentence[start_idx: idx + 1].strip())
406
456
  start_idx = idx + 1
407
457
  return sentences
408
458
 
@@ -480,4 +530,4 @@ if __name__ == "__main__":
480
530
  attention_heads_num=32,
481
531
  gpu_num=1
482
532
  )
483
- print(res, "G")
533
+ print(res, "G")
nlpertools/other.py CHANGED
@@ -5,10 +5,13 @@ import itertools
5
5
  import os
6
6
  import re
7
7
  import string
8
+ import subprocess
9
+ import threading
8
10
  from concurrent.futures import ThreadPoolExecutor
9
11
  from functools import reduce
10
12
  import math
11
13
  import datetime
14
+ import difflib
12
15
  import psutil
13
16
  from .io.file import writetxt_w_list, writetxt_a
14
17
  # import numpy as np
@@ -27,6 +30,149 @@ ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
27
30
  OTHER_PUNCTUATION = list('!@#$%^&*')
28
31
 
29
32
 
33
+ def setup_logging(log_file):
34
+ """
35
+ Set up logging configuration.
36
+
37
+ Args:
38
+ log_file (str): Path to the log file.
39
+ """
40
+ logging.basicConfig(
41
+ filename=log_file,
42
+ level=logging.INFO,
43
+ format='%(asctime)s - %(levelname)s - %(message)s',
44
+ datefmt='%Y-%m-%d %H:%M:%S'
45
+ )
46
+
47
+
48
+ def get_diff_parts(str1, str2):
49
+ # 创建一个 SequenceMatcher 对象
50
+ matcher = difflib.SequenceMatcher(None, str1, str2)
51
+
52
+ # 获取差异部分
53
+ diff_parts = []
54
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
55
+ if tag == 'replace' or tag == 'delete' or tag == 'insert':
56
+ diff_parts.append((tag, str1[i1:i2], str2[j1:j2]))
57
+
58
+ return diff_parts
59
+
60
+
61
+ def run_cmd_with_timeout(cmd, timeout):
62
+ """
63
+ https://juejin.cn/post/7391703459803086848
64
+ """
65
+ process = subprocess.Popen(cmd, shell=True, encoding="utf-8", errors="ignore", stdout=subprocess.PIPE,
66
+ stderr=subprocess.PIPE)
67
+ res = [None]
68
+
69
+ def target():
70
+ try:
71
+ ans = process.communicate()
72
+ res[0] = ans
73
+ except subprocess.TimeoutExpired:
74
+ process.kill()
75
+ process.communicate()
76
+
77
+ thread = threading.Thread(target=target)
78
+ thread.start()
79
+ thread.join(timeout)
80
+ if thread.is_alive():
81
+ print(f"Terminating {cmd}")
82
+ process.terminate()
83
+ thread.join()
84
+ print("Terminated successfully")
85
+ return False, f"{cmd} is running over {timeout}s"
86
+ if process.returncode == 0:
87
+ # res[0][0] 是output
88
+ return True, res[0][0]
89
+ else:
90
+ return False, res[0][0]
91
+
92
+
93
+ def print_three_line_table(df):
94
+ # TODO 这里需要添加可以支持excel里变红的功能
95
+ import webbrowser
96
+
97
+ # import pandas as pd
98
+ # data = {'from_pc': ['valid_data', 'illegal_char', 'more_data'],
99
+ # 'rom_pc': ['another_valid_data', 'illegal_char', 'data']}
100
+ # df = pd.DataFrame(data)
101
+
102
+ # 将 DataFrame 转换为 HTML 表格
103
+ html_table = df.to_html(index=False)
104
+ html_table = html_table.replace('border="1"', 'border="0"')
105
+
106
+ first_line_px = str(2)
107
+ second_line_px = str(1)
108
+ third_line_px = str(2)
109
+ # 定义三线表的 CSS 样式
110
+ # // thead 表头
111
+ # // tr 行
112
+ # // td 单元格
113
+ head = """<!DOCTYPE html>
114
+ <html lang="zh">
115
+ <head>
116
+ <meta charset="UTF-8">
117
+ <title>页面标题</title>
118
+ </head>"""
119
+ style = """
120
+ <style>
121
+
122
+ table {
123
+ border-collapse: collapse;
124
+ }
125
+
126
+ tr, td, th {
127
+ text-align: center; /* 水平居中文本 */
128
+ vertical-align: middle; /* 垂直居中文本 */
129
+ }
130
+ thead tr {
131
+ border-top: (first_line_px)px solid black;
132
+ border-bottom: (second_line_px)px solid black;
133
+ }
134
+
135
+ thead th {
136
+ border-bottom: (second_line_px)px solid black;
137
+ }
138
+
139
+ tbody tr td {
140
+ border-bottom: 0px solid black;
141
+ }
142
+
143
+ tbody tr:last-child td {
144
+ border-bottom: (third_line_px)px solid black;
145
+ }
146
+ </style>"""
147
+ style = style.replace("(first_line_px)", first_line_px).replace("(second_line_px)", second_line_px).replace(
148
+ "(third_line_px)", third_line_px)
149
+ # 将 CSS 样式和 HTML 表格结合起来
150
+ html = f"{style}{html_table}"
151
+ print(html)
152
+ temp_file_path = "temp.html"
153
+ # 将 HTML 保存到文件中
154
+ with open(temp_file_path, "w") as f:
155
+ f.write(html)
156
+ webbrowser.open('file://' + os.path.realpath(temp_file_path))
157
+
158
+
159
+ def jprint(obj, depth=0):
160
+ if isinstance(obj, dict):
161
+ sep = "-" * (10 - depth * 3)
162
+ for k, v in obj.items():
163
+ print(depth * "|", sep, k, sep)
164
+ jprint(v)
165
+ elif isinstance(obj, list):
166
+ for v in obj:
167
+ jprint(v, depth + 1)
168
+ else:
169
+ print(obj)
170
+
171
+
172
+ def print_split(sign="=", num=20):
173
+ print(sign * num)
174
+
175
+
30
176
  def seed_everything():
31
177
  import torch
32
178
  # seed everything
@@ -82,21 +228,6 @@ def convert_np_to_py(obj):
82
228
  return obj
83
229
 
84
230
 
85
- def git_push():
86
- """
87
- 针对国内提交github经常失败,自动提交
88
- """
89
- num = -1
90
- while 1:
91
- num += 1
92
- print("retry num: {}".format(num))
93
- info = os.system("git push --set-upstream origin main")
94
- print(str(info))
95
- if not str(info).startswith("fatal"):
96
- print("scucess")
97
- break
98
-
99
-
100
231
  def snake_to_camel(s: str) -> str:
101
232
  """
102
233
  author: u
@@ -235,25 +366,22 @@ def stress_test(func, ipts):
235
366
  return results
236
367
 
237
368
 
238
- def get_substring_loc(text, subtext):
239
- res = re.finditer(
240
- subtext.replace('\\', '\\\\').replace('?', '\?').replace('(', '\(').replace(')', '\)').replace(']',
241
- '\]').replace(
242
- '[', '\[').replace('+', '\+'), text)
243
- l, r = [i for i in res][0].regs[0]
244
- return l, r
245
-
246
-
247
369
  def squeeze_list(high_dim_list):
248
370
  return list(itertools.chain.from_iterable(high_dim_list))
249
371
 
250
372
 
251
373
  def unsqueeze_list(flatten_list, each_element_len):
374
+ # 该函数是错的,被split_list替代了
252
375
  two_dim_list = [flatten_list[i * each_element_len:(i + 1) * each_element_len] for i in
253
376
  range(len(flatten_list) // each_element_len)]
254
377
  return two_dim_list
255
378
 
256
379
 
380
+ def split_list(input_list, chunk_size):
381
+ # 使用列表推导式将列表分割成二维数组
382
+ return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
383
+
384
+
257
385
  def auto_close():
258
386
  """
259
387
  针对企业微信15分钟会显示离开的机制,假装自己还在上班
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: nlpertools
3
- Version: 1.0.6.dev0
3
+ Version: 1.0.9
4
4
  Summary: A small package about small basic IO operation when coding
5
5
  Home-page: https://github.com/lvzii/nlpertools
6
6
  Author: youshuJi
@@ -12,6 +12,13 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.6
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
+ Requires-Dist: numpy
16
+ Requires-Dist: pandas
17
+ Requires-Dist: psutil
18
+ Provides-Extra: torch
19
+ Requires-Dist: torch; extra == "torch"
20
+ Dynamic: provides-extra
21
+ Dynamic: requires-dist
15
22
 
16
23
  <div align="center">
17
24
  <h4 align="center">
@@ -23,9 +30,6 @@ License-File: LICENSE
23
30
  </div>
24
31
 
25
32
 
26
- # 当前版本
27
-
28
- 1.0.5
29
33
 
30
34
  # 说明
31
35
 
@@ -33,7 +37,7 @@ License-File: LICENSE
33
37
 
34
38
  它解决了什么问题:
35
39
 
36
- - 很多函数是记不住的, ~~每次写每次都要搜~~ 每次都要问大模型 ,例如pandas排序
40
+ - 很多函数是记不住的, 每次写都要~~搜~~问大模型 ,例如pandas排序
37
41
  - 刷题的时候,树结构的题目很难调试
38
42
 
39
43
 
@@ -75,9 +79,9 @@ https://nlpertools.readthedocs.io/en/latest/
75
79
  def __init__(self, IPT_MODEL_PATH):
76
80
  self.ltp = LTP(IPT_MODEL_PATH)
77
81
  ```
78
- 通过pyinstrument判断,超过1s的包即采用这种方式
82
+ 通过`pyinstrument`判断,超过1s的包即采用这种方式
79
83
  - 2s+ happybase、seaborn、torch、jieba
80
- - 1s+
84
+ - 1s+ /
81
85
  - 0.5s+ pandas elasticsearch transformers xgboost nltk mongo
82
86
 
83
87
 
@@ -85,6 +89,8 @@ https://nlpertools.readthedocs.io/en/latest/
85
89
 
86
90
  - [readthedoc 检查文档构建状况](https://readthedocs.org/projects/nlpertools/builds)
87
91
 
92
+ - [打包发布指南](https://juejin.cn/post/7369413136224878644)
93
+
88
94
  - 发布版本需要加tag
89
95
 
90
96
  ## 开发哲学
@@ -106,6 +112,23 @@ b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
106
112
  ```
107
113
 
108
114
  ```bash
109
- # 监控gpu显存
110
- python -m nlpertools
115
+ # 生成pypi双因素认证的实时密钥(需要提供key)
116
+ python -m nlpertools.cli --get_2fa --get_2fa_key your_key
117
+
118
+ ## git
119
+ python -m nlpertools.cli --git_push
120
+ python -m nlpertools.cli --git_pull
121
+
122
+ # 以下功能被nvitop替代,不推荐使用
123
+ ## 监控gpu显存
124
+ python -m nlpertools.monitor.gpu
125
+ ## 监控cpu
126
+ python -m nlpertools.monitor.memory
111
127
  ```
128
+
129
+ ## 一些常用项目
130
+
131
+ nvitop
132
+
133
+ ydata-profiling
134
+
@@ -1,12 +1,14 @@
1
- nlpertools/__init__.py,sha256=MTicDqOofy0loriBCpRcUdy8yQ9j5c3dFrwCVWWa8ic,536
2
- nlpertools/data_client.py,sha256=83jv7r7CsL8FYt7fJy_8ZKNG0XfAYiU8UPTYUURx4m8,13534
3
- nlpertools/dataprocess.py,sha256=z3nLWvWbGVi8N2mmOm70itgUhb_vhQYGVWWpVMwziNg,22658
1
+ nlpertools/__init__.py,sha256=5ka-NeGW2AUDJ4YZ12DD64xcxuxf9PlQUurxDp5DHbQ,483
2
+ nlpertools/cli.py,sha256=4Ik1NyFaoZpZLsYLAFRLk6xuYQk0IvexPr1Ieq08viE,3932
3
+ nlpertools/data_client.py,sha256=esX8lUQrTui4uVkqPfhpHVok7Eq6ywpuemKjLeqoglc,14674
4
+ nlpertools/dataprocess.py,sha256=v1mobuYN7I3dT6xIKlNOHVtcg31YtjF6FwNPTxeBFFY,23153
4
5
  nlpertools/default_db_config.yml,sha256=E1K9k_xzXVlsf-HJQh8kyHXHYuvTpD12jD4Hfe5rUk8,606
5
- nlpertools/ml.py,sha256=n_WZMCAuD4KaaeYixq0RRiHiU1RuYLMV3dkGV7k4OaE,16798
6
+ nlpertools/get_2fa.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ nlpertools/ml.py,sha256=qhUBCLuHfcFy8g5ZHNGYq4eH2vYWiGetyKucv8n60-A,18523
6
8
  nlpertools/movie.py,sha256=rkyOnAXdsbWfMSbi1sE1VNRT7f66Hp9BnZsN_58Afmw,897
7
9
  nlpertools/nlpertools_config.yml,sha256=ksXejxFs7pxR47tNAsrN88_4gvq9PCA2ZMO07H-dJXY,26
8
10
  nlpertools/open_api.py,sha256=uyTY00OUlM57Cn0Wm0yZXcIS8vAszy9rKnDMBEWfWJM,1744
9
- nlpertools/other.py,sha256=TuMx0B4qL_0kIJu469k94gE5NsncCE-IEo3ejt3gH9A,11583
11
+ nlpertools/other.py,sha256=JWJiXHRI8mhiUV3k4CZ4kQQS9QN3mw67SmGgTqZFtjs,15026
10
12
  nlpertools/pic.py,sha256=13aaFJh3USGYGs4Y9tAKTvWjmdQR4YDjl3LlIhJheOA,9906
11
13
  nlpertools/plugin.py,sha256=LB7j9GdoQi6TITddH-6EglHlOa0WIHLUT7X5vb_aIZY,1168
12
14
  nlpertools/reminder.py,sha256=wiXwZQmxMck5vY3EvG8_oakP3FAdjGTikAIOiTPUQrs,2977
@@ -22,9 +24,12 @@ nlpertools/algo/template.py,sha256=9vsHr4g3jZZ5KVU_2I9i97o8asRXq-8pSaCXIv0sHeM,2
22
24
  nlpertools/algo/union.py,sha256=0l7lGZbw1qIfW1z5TE8Oo3tybL1bKIP5rzpa5ZT-vLQ,249
23
25
  nlpertools/data_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
26
  nlpertools/data_structure/base_structure.py,sha256=gVUvJZ5jsCAswRETTpMwcEjLKoageWiTuCKNEwIWKWk,2641
27
+ nlpertools/draw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ nlpertools/draw/draw.py,sha256=19dskkr0wrgczxPJnphEszliwYshEh5SjD8Zz07nlk0,2615
29
+ nlpertools/draw/math_func.py,sha256=0NQ22Dfi9DFG6Bg_hXnCT27w65-dqpOOIgZX7oUIW-Q,881
25
30
  nlpertools/io/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
26
- nlpertools/io/dir.py,sha256=cK65qSZ9Tu5HrNrDiNyx0PEPrPldRSq34LpCWLz9WHc,1272
27
- nlpertools/io/file.py,sha256=tALfmzFRWztYpsmmBNvz-U6DXBe6cxef9j3_KexWdRM,6970
31
+ nlpertools/io/dir.py,sha256=FPY62COQN8Ji72pk0dYRoXkrORYaUlybKNcL4474uUI,2263
32
+ nlpertools/io/file.py,sha256=mLWl09IEi0rWPN4tTq3LwdYMvAjj4e_QsjEMhufuPPo,7192
28
33
  nlpertools/monitor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
34
  nlpertools/monitor/gpu.py,sha256=M59O6i0hlew7AzXZlaVZqbZA5IR93OhBY2WI0-T_HtY,531
30
35
  nlpertools/monitor/memory.py,sha256=9t6q9BC8VVx4o3G4sBCn7IoQRx272zMPjSnL3yvTBAQ,657
@@ -36,8 +41,9 @@ nlpertools/utils/package.py,sha256=wLg_M8j7Y6ReRjWHWCWoZJHrzEwuAr9TyG2jvb7OQCo,3
36
41
  nlpertools/utils/package_v1.py,sha256=sqgFb-zbTdMd5ziJLY6YUPqR49qUNZjxBH35DnyR5Wg,3542
37
42
  nlpertools/utils/package_v2.py,sha256=WOcsguWfUd4XSAfmPgCtL8HtUbqJ6GRSMHb0OsB47r0,3932
38
43
  nlpertools_helper/__init__.py,sha256=obxRUdZDctvcvK_iA1Dx2HmQFMlMzJto-xDPryq1lJ0,198
39
- nlpertools-1.0.6.dev0.dist-info/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
40
- nlpertools-1.0.6.dev0.dist-info/METADATA,sha256=VLFgFPh2o0YeWXEEoPIinO_rn6--mhUFU4vBASPPoNc,2772
41
- nlpertools-1.0.6.dev0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
- nlpertools-1.0.6.dev0.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
43
- nlpertools-1.0.6.dev0.dist-info/RECORD,,
44
+ nlpertools-1.0.9.dist-info/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
45
+ nlpertools-1.0.9.dist-info/METADATA,sha256=lcKmxc7_mtYH47mPj8UHOM8-5T5YtrDwhHWVZkfHZXU,3330
46
+ nlpertools-1.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
47
+ nlpertools-1.0.9.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
48
+ nlpertools-1.0.9.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
49
+ nlpertools-1.0.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ncli = nlpertools.cli:main