nlpertools 1.0.6.dev0__py3-none-any.whl → 1.0.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
nlpertools/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
3
  # @Author : youshu.Ji
4
4
  from .algo.kmp import *
5
5
  from .data_structure.base_structure import *
6
+ from .draw import *
6
7
  from .dataprocess import *
7
8
  from .io.dir import *
8
9
  from .io.file import *
@@ -15,10 +16,8 @@ from .reminder import *
15
16
  from .utils_for_nlpertools import *
16
17
  from .wrapper import *
17
18
  from .monitor import *
19
+ from .cli import *
18
20
 
19
- import os
20
21
 
21
22
 
22
- DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__),"default_db_config.yml")
23
-
24
- __version__ = '1.0.5'
23
+ __version__ = '1.0.8'
nlpertools/cli.py ADDED
@@ -0,0 +1,87 @@
1
+ import argparse
2
+ import os
3
+ import uuid
4
+ import sys
5
+
6
+ import pyotp
7
+
8
+ """
9
+ 如何Debug cli.py
10
+ """
11
+
12
+
13
+ def git_push():
14
+ """
15
+ 针对国内提交github经常失败,自动提交
16
+ """
17
+ num = -1
18
+ while 1:
19
+ num += 1
20
+ print("retry num: {}".format(num))
21
+ info = os.system("git push --set-upstream origin main")
22
+ print(str(info))
23
+ if not str(info).startswith("fatal"):
24
+ print("scucess")
25
+ break
26
+
27
+
28
+ def git_pull():
29
+ """
30
+ 针对国内提交github经常失败,自动提交
31
+ """
32
+ num = -1
33
+ while 1:
34
+ num += 1
35
+ print("retry num: {}".format(num))
36
+ info = os.system("git pull")
37
+ print(str(info))
38
+ if not str(info).startswith("fatal") and not str(info).startswith("error"):
39
+ print("scucess")
40
+ break
41
+
42
+
43
+ def get_mac_address():
44
+ mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
45
+ mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
46
+ print("mac address 不一定准确")
47
+ print(mac_address)
48
+ return mac_address
49
+
50
+
51
+ def get_2af_value(key):
52
+ """
53
+ key应该是7位的
54
+ """
55
+ print(key)
56
+ totp = pyotp.TOTP(key)
57
+ print(totp.now())
58
+
59
+
60
+ def main():
61
+ parser = argparse.ArgumentParser(description="CLI tool for git operations and getting MAC address.")
62
+ parser.add_argument('--gitpush', action='store_true', help='Perform git push operation.')
63
+ parser.add_argument('--gitpull', action='store_true', help='Perform git push operation.')
64
+ parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
65
+
66
+ parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
67
+ parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
68
+
69
+ args = parser.parse_args()
70
+
71
+ if args.gitpush:
72
+ git_push()
73
+ elif args.gitpull:
74
+ git_pull()
75
+ elif args.mac_address:
76
+ get_mac_address()
77
+ elif args.get_2fa:
78
+ if args.get_2fa_key:
79
+ get_2af_value(args.get_2fa_key)
80
+ else:
81
+ print("Please provide a key as an argument.")
82
+ else:
83
+ print("No operation specified. Use --gitpush or --get_mac_address.")
84
+
85
+
86
+ if __name__ == '__main__':
87
+ main()
nlpertools/data_client.py CHANGED
@@ -1,3 +1,4 @@
1
+ #encoding=utf-8
1
2
  # !/usr/bin/python3.8
2
3
  # -*- coding: utf-8 -*-
3
4
  # @Author : youshu.Ji
@@ -5,9 +6,11 @@ import datetime
5
6
  import json
6
7
  import logging
7
8
 
8
- from . import DB_CONFIG_FILE
9
9
  from .io.file import read_yaml
10
10
  from .utils.package import *
11
+ import os
12
+
13
+ DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_db_config.yml")
11
14
 
12
15
  # import aioredis
13
16
  # import happybase
@@ -28,21 +31,24 @@ class Neo4jOps(object):
28
31
  NEO4J_TIMEOUT = 0.3
29
32
  pass
30
33
 
34
+
31
35
  class SqliteOps(object):
32
- import sqlite3
33
- database_path = r'xx.db'
34
- conn = sqlite3.connect(database_path)
35
- c = conn.cursor()
36
- sql = "select name from sqlite_master where type='table' order by name"
37
- c.execute(sql)
38
- print(c.fetchall())
39
- sql = "select * from typecho_contents"
40
- c.execute(sql)
41
- res = c.fetchall()
42
- print(res[3])
43
-
44
- conn.commit()
45
- conn.close()
36
+ pass
37
+ # import sqlite3
38
+ # database_path = r'xx.db'
39
+ # conn = sqlite3.connect(database_path)
40
+ # c = conn.cursor()
41
+ # sql = "select name from sqlite_master where type='table' order by name"
42
+ # c.execute(sql)
43
+ # print(c.fetchall())
44
+ # sql = "select * from typecho_contents"
45
+ # c.execute(sql)
46
+ # res = c.fetchall()
47
+ # print(res[3])
48
+ #
49
+ # conn.commit()
50
+ # conn.close()
51
+
46
52
 
47
53
  class MysqlOps(object):
48
54
  import pandas as pd
@@ -116,6 +122,41 @@ class EsOps(object):
116
122
  print(f"批量保存数据: {_res}")
117
123
 
118
124
 
125
+ class MongoDB_BETA:
126
+ def __init__(self, host='localhost', port=27017, db_name=None, collection_name=None):
127
+ self.host = host
128
+ self.port = port
129
+ self.db_name = db_name
130
+ self.collection_name = collection_name
131
+ self.client = None
132
+ self.db = None
133
+ self.collection = None
134
+
135
+ def connect(self):
136
+ self.client = MongoClient(self.host, self.port)
137
+ self.db = self.client[self.db_name]
138
+ self.collection = self.db[self.collection_name]
139
+
140
+ def close(self):
141
+ if self.client:
142
+ self.client.close()
143
+
144
+ def insert_data(self, data):
145
+ if isinstance(data, list):
146
+ self.collection.insert_many(data)
147
+ else:
148
+ self.collection.insert_one(data)
149
+
150
+ def check_data_exists(self, query):
151
+ """
152
+ 检查某个数据是否存在于数据库中
153
+ :param query: 查询条件
154
+ :return: 布尔值,表示数据是否存在
155
+ """
156
+ return self.collection.count_documents(query) > 0
157
+
158
+
159
+
119
160
  class MongoOps(object):
120
161
  from pymongo import MongoClient
121
162
  def __init__(self, config=global_db_config["mongo"]):
@@ -348,8 +389,6 @@ class KafkaOps(object):
348
389
  print(recv)
349
390
 
350
391
 
351
-
352
-
353
392
  class MilvusOps(object):
354
393
  def __init__(self, config=global_db_config.milvus):
355
394
  from pymilvus import connections, Collection
nlpertools/dataprocess.py CHANGED
@@ -55,9 +55,9 @@ class Pattern:
55
55
  # 中文人名
56
56
  chinese_name_pattern = "(?:[\u4e00-\u9fa5·]{2,3})"
57
57
  # 英文人名
58
- english_name_pattern = "(^[a-zA-Z][a-zA-Z\s]{0,20}[a-zA-Z]$)"
58
+ english_name_pattern = r"(^[a-zA-Z][a-zA-Z\s]{0,20}[a-zA-Z]$)"
59
59
  # 纯数字
60
- pure_num_pattern = "\d+"
60
+ pure_num_pattern = r"\d+"
61
61
  # xxxx图/表 之类的表述
62
62
  pic_table_descript_pattern = ".{1,15}图"
63
63
 
@@ -66,20 +66,20 @@ class Pattern:
66
66
  hlink_pattern = (
67
67
  r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]"
68
68
  )
69
- http_pattern = "(http|https):\/\/([\w.]+\/?)\S*/\S*"
69
+ http_pattern = r"(http|https):\/\/([\w.]+\/?)\S*/\S*"
70
70
  # 邮箱
71
- email_pattern = "[A-Za-z0-9\u4e00-\u9fa5]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+"
71
+ email_pattern = r"[A-Za-z0-9\u4e00-\u9fa5]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+"
72
72
  # html 可能过于严格了
73
- html_pattern = "<[\s\S]*?>"
73
+ html_pattern = r"<[\s\S]*?>"
74
74
  # 重复 “asdasdasdasd”
75
75
  repeat_pattern = "(.)\1+"
76
76
  # 日期
77
- day_time_pattern = "\d{1,4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)"
77
+ day_time_pattern = r"\d{1,4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)"
78
78
  # 小时
79
- hour_time_pattern = "(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d"
79
+ hour_time_pattern = r"(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d"
80
80
  # 股票
81
81
  stock_pattern = (
82
- "(s[hz]|S[HZ])(000[\d]{3}|002[\d]{3}|300[\d]{3}|600[\d]{3}|60[\d]{4})"
82
+ r"(s[hz]|S[HZ])(000[\d]{3}|002[\d]{3}|300[\d]{3}|600[\d]{3}|60[\d]{4})"
83
83
  )
84
84
 
85
85
  # 一般是需要替换的
@@ -91,7 +91,7 @@ class Pattern:
91
91
  # 微博视频等
92
92
  weibo_pattern = r"([\s]\w+(的微博视频)|#|【|】|转发微博)"
93
93
  # @
94
- at_pattern = "@\w+"
94
+ at_pattern = r"@\w+"
95
95
 
96
96
  # from https://github.com/bigscience-workshop/data-preparation pii
97
97
  year_patterns = [
@@ -116,7 +116,7 @@ class Pattern:
116
116
  ipv4_pattern = r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}'
117
117
  ipv6_pattern = r'(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])'
118
118
  ip_pattern = r"(?:^|[\b\s@?,!;:\'\")(.\p{Han}])(" + r"|".join(
119
- [ipv4_pattern, ipv6_pattern]) + ")(?:$|[\s@,?!;:\'\"(.\p{Han}])"
119
+ [ipv4_pattern, ipv6_pattern]) + r")(?:$|[\s@,?!;:\'\"(.\p{Han}])"
120
120
 
121
121
  # https://regex101.com/r/EpA5B7/1
122
122
  email_line_pattern = r'''
@@ -466,7 +466,7 @@ class TextProcess(object):
466
466
  p = re.compile(pattern, re.S)
467
467
  text = p.sub("", text)
468
468
 
469
- dr = re.compile("@\w+", re.S)
469
+ dr = re.compile(r"@\w+", re.S)
470
470
  text = dr.sub("", text)
471
471
 
472
472
  return text
@@ -527,7 +527,7 @@ class TextProcess(object):
527
527
  text = re.sub(pattern, replace, text)
528
528
  return text
529
529
 
530
- def calc_proportion_zh(self,text):
530
+ def calc_proportion_zh(self, text):
531
531
  text = text.strip()
532
532
  # 如果是中国英文的情况,并且英文有空格分开
533
533
  if " " in text:
@@ -538,6 +538,8 @@ class TextProcess(object):
538
538
  chinese_count += 1
539
539
  else:
540
540
  pass
541
+
542
+
541
543
  class CopyFunc():
542
544
  # from https://github.com/lemon234071/clean-dialog
543
545
  def is_chinese_char(cp):
@@ -597,6 +599,20 @@ def convert_basic2fullwidth(sentence):
597
599
  new_sentence += char
598
600
  return new_sentence
599
601
 
602
+
603
+ def clean_illegal_chars_for_excel(df):
604
+ # openpyxl 库写入 Excel 文件时,有一些非法字符,需要删除
605
+ # 定义一个函数来移除字符串中的非法字符
606
+ def remove_illegal_chars(s):
607
+ if isinstance(s, str):
608
+ # 移除 ASCII 码在非法范围内的字符
609
+ return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', s)
610
+ return s
611
+
612
+ # 应用清理函数到数据框的每个元素
613
+ return df.map(remove_illegal_chars)
614
+
615
+
600
616
  if __name__ == "__main__":
601
617
  pattern_for_filter = [
602
618
  Pattern.redundancy_space_pattern,
File without changes
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from ..utils.package import plt
5
+
6
+
7
+ def confused_matrix(confuse_matrix):
8
+ import seaborn as sns
9
+ sns.set()
10
+ f, ax = plt.subplots()
11
+ ticklabels = ["l1", "l2", "l31"]
12
+ sns.heatmap(confuse_matrix, annot=True, fmt=".3g", ax=ax, cmap='rainbow',
13
+ xticklabels=ticklabels, yticklabels=ticklabels) # 画热力图
14
+
15
+ ax.set_title('confusion matrix') # 标题
16
+ ax.set_xlabel('predict') # x轴
17
+ ax.set_ylabel('true') # y轴
18
+ plt.show()
19
+
20
+ f.savefig('tmp.jpg', bbox_inches='tight')
21
+
22
+
23
+ def plot_histogram(data, bin_size):
24
+ """
25
+ 画直方图,超过1000的统一按1000算
26
+ :param data:
27
+ :param bin_size:
28
+ :return:
29
+ """
30
+ import matplotlib.pyplot as plt
31
+ import numpy as np
32
+ import pandas as pd
33
+ from matplotlib.ticker import MaxNLocator
34
+ # 将超过1000的值改为1000
35
+ def process_lengths(data):
36
+ return [length if length <= 1000 else 1003 for length in data]
37
+
38
+ # 前闭后开
39
+ min_num, max_num = 0, 1000
40
+ # min_num, max_num = min(data), max(data)
41
+
42
+ plt.figure(figsize=(12, 8))
43
+ processed_data = process_lengths(data)
44
+ bins = np.arange(0, 1000 + 2 * bin_size, bin_size)
45
+ # 绘制直方图
46
+ n, new_bins, patches = plt.hist(processed_data, bins=bins, edgecolor='black', color='skyblue', alpha=0.7,
47
+ linewidth=0)
48
+
49
+ # 添加"∞"的标签
50
+ # bins会改变
51
+ plt.gca().set_xticks(bins)
52
+ plt.gca().set_xticklabels([str(i) for i in plt.xticks()[0][:-1]] + ["∞"])
53
+
54
+ mean_val = np.mean(data)
55
+ plt.axvline(mean_val, color='red', linestyle='dashed', linewidth=1)
56
+ plt.text(mean_val + bin_size / 10, max(n) * 0.9, f'Mean: {mean_val:.2f}', color='red')
57
+
58
+ # 添加标题和标签
59
+ plt.title('Module Line Number Distribution', fontsize=16, fontweight='bold')
60
+ plt.xlabel('module line number', fontsize=14)
61
+ plt.ylabel('frequency', fontsize=14)
62
+
63
+ # 添加网格
64
+ plt.grid(True, linestyle='--', alpha=0.6)
65
+
66
+ # 美化x轴和y轴的刻度
67
+ plt.xticks(fontsize=12)
68
+ plt.yticks(fontsize=12)
69
+
70
+ # 在每个柱状图上显示数值
71
+ for i in range(len(patches)):
72
+ plt.text(patches[i].get_x() + patches[i].get_width() / 2, patches[i].get_height(),
73
+ str(int(n[i])), ha='center', va='bottom', fontsize=12)
74
+ plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))
75
+ # 显示图表
76
+ plt.show()
77
+
78
+
79
+ if __name__ == '__main__':
80
+ # 调整区间大小
81
+ bin_size = 50
82
+ # 示例模块长度数据
83
+ plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size)
@@ -0,0 +1,33 @@
1
+ # 数学函数
2
+ def draw_log():
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ from matplotlib.ticker import MultipleLocator, FormatStrFormatter
6
+
7
+ # 生成一些数据
8
+ x = np.linspace(0.1, 10, 100)
9
+ # 默认log指的时loge
10
+ y = np.log(x)
11
+
12
+ # 创建一个新的图形和轴
13
+ fig, ax = plt.subplots()
14
+
15
+ # 绘制log图像
16
+ ax.plot(x, y)
17
+
18
+ # 设置图像标题和轴标签
19
+ ax.set_title("Logarithmic Function")
20
+ ax.set_xlabel("x")
21
+ ax.set_ylabel("log(x)")
22
+ # 设置横坐标的刻度间隔为1
23
+ ax.xaxis.set_major_locator(MultipleLocator(1))
24
+
25
+ # 设置横坐标的刻度格式
26
+ ax.xaxis.set_major_formatter(FormatStrFormatter("%.1f"))
27
+ # 添加x=1的虚线
28
+ ax.axvline(x=1, linestyle="--", color="gray")
29
+ # 添加y=1的虚线
30
+ ax.axhline(y=0, linestyle="--", color="gray")
31
+
32
+ # 显示图像
33
+ plt.show()
nlpertools/get_2fa.py ADDED
File without changes
nlpertools/io/dir.py CHANGED
@@ -10,7 +10,30 @@ def j_mkdir(name):
10
10
  os.makedirs(name, exist_ok=True)
11
11
 
12
12
 
13
- def get_filename(path) -> str:
13
+ def j_walk(name, suffix=None):
14
+ paths = []
15
+ for root, dirs, files in os.walk(name):
16
+ for file in files:
17
+ path = os.path.join(root, file)
18
+ if not (suffix and not path.endswith(suffix)):
19
+ paths.append(path)
20
+ return paths
21
+
22
+
23
+ def windows_to_wsl_path(windows_path):
24
+ # 转换驱动器号
25
+ if windows_path[1:3] == ':\\':
26
+ drive_letter = windows_path[0].lower()
27
+ path = windows_path[2:].replace('\\', '/')
28
+ wsl_path = f'/mnt/{drive_letter}{path}'
29
+ else:
30
+ # 如果路径不是以驱动器号开头,则直接替换路径分隔符
31
+ wsl_path = windows_path.replace('\\', '/').replace("'", "\'")
32
+
33
+ return wsl_path
34
+
35
+
36
+ def get_filename(path, suffix=True) -> str:
14
37
  """
15
38
  返回路径最后的文件名
16
39
  :param path:
@@ -18,11 +41,20 @@ def get_filename(path) -> str:
18
41
  """
19
42
  # path = r'***/**/***.txt'
20
43
  filename = os.path.split(path)[-1]
44
+ if not suffix:
45
+ filename = filename.split('.')[0]
21
46
  return filename
22
47
 
23
48
 
24
49
  def j_listdir(dir_name, including_dir=True):
25
- # yield
50
+ filenames = os.listdir(dir_name)
51
+ if including_dir:
52
+ return [os.path.join(dir_name, filename) for filename in filenames]
53
+ else:
54
+ return list(filenames)
55
+
56
+
57
+ def j_listdir_yield(dir_name, including_dir=True):
26
58
  filenames = os.listdir(dir_name)
27
59
  for filename in filenames:
28
60
  if including_dir:
nlpertools/io/file.py CHANGED
@@ -5,7 +5,6 @@ import codecs
5
5
  import json
6
6
  import pickle
7
7
  import random
8
- import time
9
8
  from itertools import (takewhile, repeat)
10
9
  import pandas as pd
11
10
  # import omegaconf
@@ -15,10 +14,16 @@ from ..utils.package import *
15
14
  LARGE_FILE_THRESHOLD = 1e5
16
15
 
17
16
 
17
+ def safe_filename(filename: str) -> str:
18
+ for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|']:
19
+ filename = filename.replace(char, '_')
20
+ return filename
21
+
22
+
18
23
  def read_yaml(path, omega=False):
19
24
  if omega:
20
25
  return omegaconf.OmegaConf.load(path)
21
- return yaml.load(codecs.open(path), Loader=yaml.FullLoader)
26
+ return yaml.load(codecs.open(path, encoding='utf-8'), Loader=yaml.FullLoader)
22
27
 
23
28
 
24
29
  def _merge_file(filelist, save_filename, shuffle=False):
@@ -52,7 +57,7 @@ load_from_json
52
57
 
53
58
 
54
59
  # 读txt文件 一次全读完 返回list 去换行
55
- def readtxt_list_all_strip(path, encoding='utf-8'):
60
+ def readtxt_list_all_strip(path, encoding='utf-8') -> list:
56
61
  file_line_num = iter_count(path)
57
62
  lines = []
58
63
  with codecs.open(path, 'r', encoding) as r:
@@ -67,7 +72,7 @@ def readtxt_list_all_strip(path, encoding='utf-8'):
67
72
 
68
73
 
69
74
  # 读txt 一次读一行 最后返回list
70
- def readtxt_list_each(path):
75
+ def readtxt_list_each(path) -> list:
71
76
  lines = []
72
77
  with codecs.open(path, 'r', 'utf-8') as r:
73
78
  line = r.readline()
@@ -77,7 +82,7 @@ def readtxt_list_each(path):
77
82
  return lines
78
83
 
79
84
 
80
- def readtxt_list_each_strip(path):
85
+ def readtxt_list_each_strip(path) -> list:
81
86
  """
82
87
  yield方法
83
88
  """
@@ -89,14 +94,14 @@ def readtxt_list_each_strip(path):
89
94
 
90
95
 
91
96
  # 读txt文件 一次全读完 返回list
92
- def readtxt_list_all(path):
97
+ def readtxt_list_all(path) -> list:
93
98
  with codecs.open(path, 'r', 'utf-8') as r:
94
99
  lines = r.readlines()
95
100
  return lines
96
101
 
97
102
 
98
103
  # 读byte文件 读成一条string
99
- def readtxt_byte(path, encoding="utf-8"):
104
+ def readtxt_byte(path, encoding="utf-8") -> str:
100
105
  with codecs.open(path, 'rb') as r:
101
106
  lines = r.read()
102
107
  lines = lines.decode(encoding)
@@ -104,7 +109,7 @@ def readtxt_byte(path, encoding="utf-8"):
104
109
 
105
110
 
106
111
  # 读txt文件 读成一条string
107
- def readtxt_string(path, encoding="utf-8"):
112
+ def readtxt_string(path, encoding="utf-8") -> str:
108
113
  with codecs.open(path, 'r', encoding) as r:
109
114
  lines = r.read()
110
115
  return lines.replace('\r', '')
@@ -261,6 +266,7 @@ def save_to_mongo():
261
266
  """
262
267
  pass
263
268
 
269
+
264
270
  def load_from_mongo():
265
271
  pass
266
272
 
@@ -274,4 +280,4 @@ def unmerge_cells_df(df) -> pd.DataFrame:
274
280
  else:
275
281
  values.append(i)
276
282
  df[column] = values
277
- return df
283
+ return df
nlpertools/ml.py CHANGED
@@ -18,9 +18,8 @@ from .utils.package import *
18
18
 
19
19
 
20
20
  def calc_llm_train_activation_memory(
21
- model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
21
+ model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
22
22
  ):
23
-
24
23
  """
25
24
  return bytes
26
25
 
@@ -33,18 +32,18 @@ def calc_llm_train_activation_memory(
33
32
  # FFN
34
33
  # Layer Norm
35
34
  r1 = (
36
- sequence_length
37
- * batch_size
38
- * hidden_dim
39
- * lay_number
40
- * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
35
+ sequence_length
36
+ * batch_size
37
+ * hidden_dim
38
+ * lay_number
39
+ * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
41
40
  )
42
41
  # reference2
43
42
  r2 = (
44
- lay_number*(2 * sequence_length * attention_heads_num + 16 * hidden_dim)
45
- * sequence_length
46
- * batch_size
47
- / gpu_num
43
+ lay_number * (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
44
+ * sequence_length
45
+ * batch_size
46
+ / gpu_num
48
47
  )
49
48
  print(r1)
50
49
  print(r2)
@@ -80,7 +79,7 @@ class DataStructure:
80
79
  }
81
80
  ner_input_example = "这句话一共有两个实体分别为大象和老鼠。"
82
81
  ner_label_example = (
83
- list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
82
+ list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
84
83
  )
85
84
 
86
85
 
@@ -135,7 +134,7 @@ class STEM(object):
135
134
  if each_srl:
136
135
  args = []
137
136
  for arg in each_srl:
138
- args.extend(seg[arg[1] : arg[2] + 1])
137
+ args.extend(seg[arg[1]: arg[2] + 1])
139
138
  # 添加上谓词
140
139
  args.insert(each_srl[0][2] - each_srl[0][1] + 1, seg[wdx])
141
140
  events.append(args)
@@ -174,7 +173,7 @@ def subject_object_labeling(spo_list, text):
174
173
  q_list_length = len(q_list)
175
174
  k_list_length = len(k_list)
176
175
  for idx in range(k_list_length - q_list_length + 1):
177
- t = [q == k for q, k in zip(q_list, k_list[idx : idx + q_list_length])]
176
+ t = [q == k for q, k in zip(q_list, k_list[idx: idx + q_list_length])]
178
177
  # print(idx, t)
179
178
  if all(t):
180
179
  # print(idx)
@@ -187,8 +186,8 @@ def subject_object_labeling(spo_list, text):
187
186
  if len(spo) == 2:
188
187
  labeling_list[idx_start + 1] = "I-" + spo_type
189
188
  elif len(spo) >= 3:
190
- labeling_list[idx_start + 1 : idx_start + len(spo)] = ["I-" + spo_type] * (
191
- len(spo) - 1
189
+ labeling_list[idx_start + 1: idx_start + len(spo)] = ["I-" + spo_type] * (
190
+ len(spo) - 1
192
191
  )
193
192
  else:
194
193
  pass
@@ -239,12 +238,12 @@ def convert_crf_format_10_fold(corpus, objdir_path):
239
238
  split_position = int(len(corpus) / 10)
240
239
  for k in range(0, 10):
241
240
  if k == 9:
242
- dev_set = corpus[k * split_position :]
241
+ dev_set = corpus[k * split_position:]
243
242
  train_set = corpus[: k * split_position]
244
243
  else:
245
- dev_set = corpus[k * split_position : (k + 1) * split_position]
244
+ dev_set = corpus[k * split_position: (k + 1) * split_position]
246
245
  train_set = (
247
- corpus[: k * split_position] + corpus[(k + 1) * split_position :]
246
+ corpus[: k * split_position] + corpus[(k + 1) * split_position:]
248
247
  )
249
248
  writetxt_w_list(
250
249
  train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1))
@@ -292,12 +291,41 @@ def kfold_txt(corpus, path, k=9, is_shuffle=True):
292
291
  if is_shuffle:
293
292
  random.shuffle(corpus)
294
293
  split_position = int(len(corpus) / 10)
295
- train_set, dev_set = corpus[: k * split_position], corpus[k * split_position :]
294
+ train_set, dev_set = corpus[: k * split_position], corpus[k * split_position:]
296
295
  writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
297
296
  writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
298
297
  writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
299
298
 
300
299
 
300
+ def sample():
301
+ import pandas as pd
302
+ from sklearn.model_selection import StratifiedShuffleSplit
303
+
304
+ # 假设 df 是你的 DataFrame
305
+
306
+ df = pd.DataFrame({
307
+ "count_line": [i for i in range(100)],
308
+ "x": [i for i in range(100)],
309
+ "y": [i // 10 for i in range(100)],
310
+ })
311
+ print(df)
312
+ # count_line 是用于分层抽样的字段
313
+
314
+ # 创建 StratifiedShuffleSplit 对象,设置测试集比例为 0.1
315
+ split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
316
+
317
+ # 获取训练集和测试集的索引
318
+ train_index, test_index = next(split.split(df, df['y']))
319
+
320
+ # 根据索引划分训练集和测试集
321
+ train_df = df.loc[train_index]
322
+ test_df = df.loc[test_index]
323
+
324
+ # 打印训练集和测试集的行数
325
+ print("训练集行数:", len(train_df))
326
+ print("测试集行数:", len(test_df))
327
+
328
+
301
329
  def kfold_df(df, save_dir=None):
302
330
  """
303
331
  划分train test val集, 写为windows可读的csv。
@@ -389,7 +417,7 @@ def split_sentence(sentence, language="chinese", cross_line=True):
389
417
  for idx, char in enumerate(sentence):
390
418
  if idx == len(sentence) - 1:
391
419
  if char in split_signs:
392
- sentences.append(sentence[start_idx : idx + 1].strip())
420
+ sentences.append(sentence[start_idx: idx + 1].strip())
393
421
  start_idx = idx + 1
394
422
  else:
395
423
  sentences.append(sentence[start_idx:].strip())
@@ -399,10 +427,10 @@ def split_sentence(sentence, language="chinese", cross_line=True):
399
427
  if idx < len(sentence) - 2:
400
428
  # 处理。”。
401
429
  if sentence[idx + 2] not in split_signs:
402
- sentences.append(sentence[start_idx : idx + 2].strip())
430
+ sentences.append(sentence[start_idx: idx + 2].strip())
403
431
  start_idx = idx + 2
404
432
  elif sentence[idx + 1] not in split_signs:
405
- sentences.append(sentence[start_idx : idx + 1].strip())
433
+ sentences.append(sentence[start_idx: idx + 1].strip())
406
434
  start_idx = idx + 1
407
435
  return sentences
408
436
 
@@ -480,4 +508,4 @@ if __name__ == "__main__":
480
508
  attention_heads_num=32,
481
509
  gpu_num=1
482
510
  )
483
- print(res, "G")
511
+ print(res, "G")
nlpertools/other.py CHANGED
@@ -5,10 +5,13 @@ import itertools
5
5
  import os
6
6
  import re
7
7
  import string
8
+ import subprocess
9
+ import threading
8
10
  from concurrent.futures import ThreadPoolExecutor
9
11
  from functools import reduce
10
12
  import math
11
13
  import datetime
14
+ import difflib
12
15
  import psutil
13
16
  from .io.file import writetxt_w_list, writetxt_a
14
17
  # import numpy as np
@@ -27,6 +30,134 @@ ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
27
30
  OTHER_PUNCTUATION = list('!@#$%^&*')
28
31
 
29
32
 
33
+ def get_diff_parts(str1, str2):
34
+ # 创建一个 SequenceMatcher 对象
35
+ matcher = difflib.SequenceMatcher(None, str1, str2)
36
+
37
+ # 获取差异部分
38
+ diff_parts = []
39
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
40
+ if tag == 'replace' or tag == 'delete' or tag == 'insert':
41
+ diff_parts.append((tag, str1[i1:i2], str2[j1:j2]))
42
+
43
+ return diff_parts
44
+
45
+
46
+ def run_cmd_with_timeout(cmd, timeout):
47
+ """
48
+ https://juejin.cn/post/7391703459803086848
49
+ """
50
+ process = subprocess.Popen(cmd, shell=True, encoding="utf-8", errors="ignore", stdout=subprocess.PIPE,
51
+ stderr=subprocess.PIPE)
52
+ res = [None]
53
+
54
+ def target():
55
+ try:
56
+ ans = process.communicate()
57
+ res[0] = ans
58
+ except subprocess.TimeoutExpired:
59
+ process.kill()
60
+ process.communicate()
61
+
62
+ thread = threading.Thread(target=target)
63
+ thread.start()
64
+ thread.join(timeout)
65
+ if thread.is_alive():
66
+ print(f"Terminating {cmd}")
67
+ process.terminate()
68
+ thread.join()
69
+ print("Terminated successfully")
70
+ return False, f"{cmd} is running over {timeout}s"
71
+ if process.returncode == 0:
72
+ # res[0][0] 是output
73
+ return True, res[0][0]
74
+ else:
75
+ return False, res[0][0]
76
+
77
+
78
+ def print_three_line_table(df):
79
+ # TODO 这里需要添加可以支持excel里变红的功能
80
+ import webbrowser
81
+
82
+ # import pandas as pd
83
+ # data = {'from_pc': ['valid_data', 'illegal_char', 'more_data'],
84
+ # 'rom_pc': ['another_valid_data', 'illegal_char', 'data']}
85
+ # df = pd.DataFrame(data)
86
+
87
+ # 将 DataFrame 转换为 HTML 表格
88
+ html_table = df.to_html(index=False)
89
+ html_table = html_table.replace('border="1"', 'border="0"')
90
+
91
+ first_line_px = str(2)
92
+ second_line_px = str(1)
93
+ third_line_px = str(2)
94
+ # 定义三线表的 CSS 样式
95
+ # // thead 表头
96
+ # // tr 行
97
+ # // td 单元格
98
+ head = """<!DOCTYPE html>
99
+ <html lang="zh">
100
+ <head>
101
+ <meta charset="UTF-8">
102
+ <title>页面标题</title>
103
+ </head>"""
104
+ style = """
105
+ <style>
106
+
107
+ table {
108
+ border-collapse: collapse;
109
+ }
110
+
111
+ tr, td, th {
112
+ text-align: center; /* 水平居中文本 */
113
+ vertical-align: middle; /* 垂直居中文本 */
114
+ }
115
+ thead tr {
116
+ border-top: (first_line_px)px solid black;
117
+ border-bottom: (second_line_px)px solid black;
118
+ }
119
+
120
+ thead th {
121
+ border-bottom: (second_line_px)px solid black;
122
+ }
123
+
124
+ tbody tr td {
125
+ border-bottom: 0px solid black;
126
+ }
127
+
128
+ tbody tr:last-child td {
129
+ border-bottom: (third_line_px)px solid black;
130
+ }
131
+ </style>"""
132
+ style = style.replace("(first_line_px)", first_line_px).replace("(second_line_px)", second_line_px).replace(
133
+ "(third_line_px)", third_line_px)
134
+ # 将 CSS 样式和 HTML 表格结合起来
135
+ html = f"{style}{html_table}"
136
+ print(html)
137
+ temp_file_path = "temp.html"
138
+ # 将 HTML 保存到文件中
139
+ with open(temp_file_path, "w") as f:
140
+ f.write(html)
141
+ webbrowser.open('file://' + os.path.realpath(temp_file_path))
142
+
143
+
144
+ def jprint(obj, depth=0):
145
+ if isinstance(obj, dict):
146
+ sep = "-" * (10 - depth * 3)
147
+ for k, v in obj.items():
148
+ print(depth * "|", sep, k, sep)
149
+ jprint(v)
150
+ elif isinstance(obj, list):
151
+ for v in obj:
152
+ jprint(v, depth + 1)
153
+ else:
154
+ print(obj)
155
+
156
+
157
+ def print_split(sign="=", num=20):
158
+ print(sign * num)
159
+
160
+
30
161
  def seed_everything():
31
162
  import torch
32
163
  # seed everything
@@ -82,21 +213,6 @@ def convert_np_to_py(obj):
82
213
  return obj
83
214
 
84
215
 
85
- def git_push():
86
- """
87
- 针对国内提交github经常失败,自动提交
88
- """
89
- num = -1
90
- while 1:
91
- num += 1
92
- print("retry num: {}".format(num))
93
- info = os.system("git push --set-upstream origin main")
94
- print(str(info))
95
- if not str(info).startswith("fatal"):
96
- print("scucess")
97
- break
98
-
99
-
100
216
  def snake_to_camel(s: str) -> str:
101
217
  """
102
218
  author: u
@@ -235,24 +351,19 @@ def stress_test(func, ipts):
235
351
  return results
236
352
 
237
353
 
238
- def get_substring_loc(text, subtext):
239
- res = re.finditer(
240
- subtext.replace('\\', '\\\\').replace('?', '\?').replace('(', '\(').replace(')', '\)').replace(']',
241
- '\]').replace(
242
- '[', '\[').replace('+', '\+'), text)
243
- l, r = [i for i in res][0].regs[0]
244
- return l, r
245
-
246
-
247
354
  def squeeze_list(high_dim_list):
248
355
  return list(itertools.chain.from_iterable(high_dim_list))
249
356
 
250
357
 
251
358
  def unsqueeze_list(flatten_list, each_element_len):
359
+ # 该函数是错的,被split_list替代了
252
360
  two_dim_list = [flatten_list[i * each_element_len:(i + 1) * each_element_len] for i in
253
361
  range(len(flatten_list) // each_element_len)]
254
362
  return two_dim_list
255
363
 
364
+ def split_list(input_list, chunk_size):
365
+ # 使用列表推导式将列表分割成二维数组
366
+ return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
256
367
 
257
368
  def auto_close():
258
369
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nlpertools
3
- Version: 1.0.6.dev0
3
+ Version: 1.0.8
4
4
  Summary: A small package about small basic IO operation when coding
5
5
  Home-page: https://github.com/lvzii/nlpertools
6
6
  Author: youshuJi
@@ -12,6 +12,11 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.6
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
+ Requires-Dist: numpy
16
+ Requires-Dist: pandas
17
+ Requires-Dist: psutil
18
+ Provides-Extra: torch
19
+ Requires-Dist: torch; extra == "torch"
15
20
 
16
21
  <div align="center">
17
22
  <h4 align="center">
@@ -23,9 +28,6 @@ License-File: LICENSE
23
28
  </div>
24
29
 
25
30
 
26
- # 当前版本
27
-
28
- 1.0.5
29
31
 
30
32
  # 说明
31
33
 
@@ -75,9 +77,9 @@ https://nlpertools.readthedocs.io/en/latest/
75
77
  def __init__(self, IPT_MODEL_PATH):
76
78
  self.ltp = LTP(IPT_MODEL_PATH)
77
79
  ```
78
- 通过pyinstrument判断,超过1s的包即采用这种方式
80
+ 通过`pyinstrument`判断,超过1s的包即采用这种方式
79
81
  - 2s+ happybase、seaborn、torch、jieba
80
- - 1s+
82
+ - 1s+ /
81
83
  - 0.5s+ pandas elasticsearch transformers xgboost nltk mongo
82
84
 
83
85
 
@@ -85,6 +87,8 @@ https://nlpertools.readthedocs.io/en/latest/
85
87
 
86
88
  - [readthedoc 检查文档构建状况](https://readthedocs.org/projects/nlpertools/builds)
87
89
 
90
+ - [打包发布指南](https://juejin.cn/post/7369413136224878644)
91
+
88
92
  - 发布版本需要加tag
89
93
 
90
94
  ## 开发哲学
@@ -106,6 +110,23 @@ b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
106
110
  ```
107
111
 
108
112
  ```bash
109
- # 监控gpu显存
110
- python -m nlpertools
113
+ # 生成pypi双因素认证的实时密钥(需要提供key)
114
+ python -m nlpertools.get_2fa your_key
115
+
116
+ ## git
117
+ python nlpertools.cli --git_push
118
+ python nlpertools.cli --git_pull
119
+
120
+ # 以下功能被nvitop替代,不推荐使用
121
+ ## 监控gpu显存
122
+ python -m nlpertools.monitor.gpu
123
+ ## 监控cpu
124
+ python -m nlpertools.monitor.memory
111
125
  ```
126
+
127
+ ## 一些常用项目
128
+
129
+ nvitop
130
+
131
+ ydata-profiling
132
+
@@ -1,12 +1,14 @@
1
- nlpertools/__init__.py,sha256=MTicDqOofy0loriBCpRcUdy8yQ9j5c3dFrwCVWWa8ic,536
2
- nlpertools/data_client.py,sha256=83jv7r7CsL8FYt7fJy_8ZKNG0XfAYiU8UPTYUURx4m8,13534
3
- nlpertools/dataprocess.py,sha256=z3nLWvWbGVi8N2mmOm70itgUhb_vhQYGVWWpVMwziNg,22658
1
+ nlpertools/__init__.py,sha256=h7JJEN_JRn3iKcqIcaFgYtAjP90XiT1KILrm8utoHvQ,483
2
+ nlpertools/cli.py,sha256=xDl_tWl9pfqQ3PUdd7oesvgM2FVqnaw8dFFliEX5c4Y,2203
3
+ nlpertools/data_client.py,sha256=esX8lUQrTui4uVkqPfhpHVok7Eq6ywpuemKjLeqoglc,14674
4
+ nlpertools/dataprocess.py,sha256=v1mobuYN7I3dT6xIKlNOHVtcg31YtjF6FwNPTxeBFFY,23153
4
5
  nlpertools/default_db_config.yml,sha256=E1K9k_xzXVlsf-HJQh8kyHXHYuvTpD12jD4Hfe5rUk8,606
5
- nlpertools/ml.py,sha256=n_WZMCAuD4KaaeYixq0RRiHiU1RuYLMV3dkGV7k4OaE,16798
6
+ nlpertools/get_2fa.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ nlpertools/ml.py,sha256=z-0ep9svAyzcS2n7Lsyqo65VEQRGzWKFMLdZofCv1LQ,17716
6
8
  nlpertools/movie.py,sha256=rkyOnAXdsbWfMSbi1sE1VNRT7f66Hp9BnZsN_58Afmw,897
7
9
  nlpertools/nlpertools_config.yml,sha256=ksXejxFs7pxR47tNAsrN88_4gvq9PCA2ZMO07H-dJXY,26
8
10
  nlpertools/open_api.py,sha256=uyTY00OUlM57Cn0Wm0yZXcIS8vAszy9rKnDMBEWfWJM,1744
9
- nlpertools/other.py,sha256=TuMx0B4qL_0kIJu469k94gE5NsncCE-IEo3ejt3gH9A,11583
11
+ nlpertools/other.py,sha256=CeUea17Oe5MV_r-CmeYdAhdj5kWLvmxoDDgRc56o7bE,14704
10
12
  nlpertools/pic.py,sha256=13aaFJh3USGYGs4Y9tAKTvWjmdQR4YDjl3LlIhJheOA,9906
11
13
  nlpertools/plugin.py,sha256=LB7j9GdoQi6TITddH-6EglHlOa0WIHLUT7X5vb_aIZY,1168
12
14
  nlpertools/reminder.py,sha256=wiXwZQmxMck5vY3EvG8_oakP3FAdjGTikAIOiTPUQrs,2977
@@ -22,9 +24,12 @@ nlpertools/algo/template.py,sha256=9vsHr4g3jZZ5KVU_2I9i97o8asRXq-8pSaCXIv0sHeM,2
22
24
  nlpertools/algo/union.py,sha256=0l7lGZbw1qIfW1z5TE8Oo3tybL1bKIP5rzpa5ZT-vLQ,249
23
25
  nlpertools/data_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
26
  nlpertools/data_structure/base_structure.py,sha256=gVUvJZ5jsCAswRETTpMwcEjLKoageWiTuCKNEwIWKWk,2641
27
+ nlpertools/draw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ nlpertools/draw/draw.py,sha256=PgdG7unpCtbbQdYISODTYMV7p10GwWDh9czeURkG0x4,2629
29
+ nlpertools/draw/math_func.py,sha256=0NQ22Dfi9DFG6Bg_hXnCT27w65-dqpOOIgZX7oUIW-Q,881
25
30
  nlpertools/io/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
26
- nlpertools/io/dir.py,sha256=cK65qSZ9Tu5HrNrDiNyx0PEPrPldRSq34LpCWLz9WHc,1272
27
- nlpertools/io/file.py,sha256=tALfmzFRWztYpsmmBNvz-U6DXBe6cxef9j3_KexWdRM,6970
31
+ nlpertools/io/dir.py,sha256=p7J34qUxYCqKSO5DQMhL8FxFcHDrwn_1lIxNl0klasU,2267
32
+ nlpertools/io/file.py,sha256=CsFdluEczuz3fonbeZi9dHPasL1Hm18JL3Aux2ziQMU,7198
28
33
  nlpertools/monitor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
34
  nlpertools/monitor/gpu.py,sha256=M59O6i0hlew7AzXZlaVZqbZA5IR93OhBY2WI0-T_HtY,531
30
35
  nlpertools/monitor/memory.py,sha256=9t6q9BC8VVx4o3G4sBCn7IoQRx272zMPjSnL3yvTBAQ,657
@@ -36,8 +41,9 @@ nlpertools/utils/package.py,sha256=wLg_M8j7Y6ReRjWHWCWoZJHrzEwuAr9TyG2jvb7OQCo,3
36
41
  nlpertools/utils/package_v1.py,sha256=sqgFb-zbTdMd5ziJLY6YUPqR49qUNZjxBH35DnyR5Wg,3542
37
42
  nlpertools/utils/package_v2.py,sha256=WOcsguWfUd4XSAfmPgCtL8HtUbqJ6GRSMHb0OsB47r0,3932
38
43
  nlpertools_helper/__init__.py,sha256=obxRUdZDctvcvK_iA1Dx2HmQFMlMzJto-xDPryq1lJ0,198
39
- nlpertools-1.0.6.dev0.dist-info/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
40
- nlpertools-1.0.6.dev0.dist-info/METADATA,sha256=VLFgFPh2o0YeWXEEoPIinO_rn6--mhUFU4vBASPPoNc,2772
41
- nlpertools-1.0.6.dev0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
- nlpertools-1.0.6.dev0.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
43
- nlpertools-1.0.6.dev0.dist-info/RECORD,,
44
+ nlpertools-1.0.8.dist-info/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
45
+ nlpertools-1.0.8.dist-info/METADATA,sha256=v2doRda1amZbXXfIYuzo-rFPvTICt3ByDCKVr6gsUw0,3276
46
+ nlpertools-1.0.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
47
+ nlpertools-1.0.8.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
48
+ nlpertools-1.0.8.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
49
+ nlpertools-1.0.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ncli = nlpertools.cli:main