nlpertools 1.0.6.dev0__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nlpertools/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
3
  # @Author : youshu.Ji
4
4
  from .algo.kmp import *
5
5
  from .data_structure.base_structure import *
6
+ from .draw import *
6
7
  from .dataprocess import *
7
8
  from .io.dir import *
8
9
  from .io.file import *
@@ -15,10 +16,8 @@ from .reminder import *
15
16
  from .utils_for_nlpertools import *
16
17
  from .wrapper import *
17
18
  from .monitor import *
19
+ from .cli import *
18
20
 
19
- import os
20
21
 
21
22
 
22
- DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__),"default_db_config.yml")
23
-
24
- __version__ = '1.0.5'
23
+ __version__ = '1.0.8'
nlpertools/cli.py ADDED
@@ -0,0 +1,87 @@
1
+ import argparse
2
+ import os
3
+ import uuid
4
+ import sys
5
+
6
+ import pyotp
7
+
8
+ """
9
+ 如何Debug cli.py
10
+ """
11
+
12
+
13
+ def git_push():
14
+ """
15
+ 针对国内提交github经常失败,自动提交
16
+ """
17
+ num = -1
18
+ while 1:
19
+ num += 1
20
+ print("retry num: {}".format(num))
21
+ info = os.system("git push --set-upstream origin main")
22
+ print(str(info))
23
+ if not str(info).startswith("fatal"):
24
+ print("scucess")
25
+ break
26
+
27
+
28
+ def git_pull():
29
+ """
30
+ 针对国内提交github经常失败,自动提交
31
+ """
32
+ num = -1
33
+ while 1:
34
+ num += 1
35
+ print("retry num: {}".format(num))
36
+ info = os.system("git pull")
37
+ print(str(info))
38
+ if not str(info).startswith("fatal") and not str(info).startswith("error"):
39
+ print("scucess")
40
+ break
41
+
42
+
43
+ def get_mac_address():
44
+ mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
45
+ mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
46
+ print("mac address 不一定准确")
47
+ print(mac_address)
48
+ return mac_address
49
+
50
+
51
+ def get_2af_value(key):
52
+ """
53
+ key应该是7位的
54
+ """
55
+ print(key)
56
+ totp = pyotp.TOTP(key)
57
+ print(totp.now())
58
+
59
+
60
+ def main():
61
+ parser = argparse.ArgumentParser(description="CLI tool for git operations and getting MAC address.")
62
+ parser.add_argument('--gitpush', action='store_true', help='Perform git push operation.')
63
+ parser.add_argument('--gitpull', action='store_true', help='Perform git push operation.')
64
+ parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
65
+
66
+ parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
67
+ parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
68
+
69
+ args = parser.parse_args()
70
+
71
+ if args.gitpush:
72
+ git_push()
73
+ elif args.gitpull:
74
+ git_pull()
75
+ elif args.mac_address:
76
+ get_mac_address()
77
+ elif args.get_2fa:
78
+ if args.get_2fa_key:
79
+ get_2af_value(args.get_2fa_key)
80
+ else:
81
+ print("Please provide a key as an argument.")
82
+ else:
83
+ print("No operation specified. Use --gitpush or --get_mac_address.")
84
+
85
+
86
+ if __name__ == '__main__':
87
+ main()
nlpertools/data_client.py CHANGED
@@ -1,3 +1,4 @@
1
+ #encoding=utf-8
1
2
  # !/usr/bin/python3.8
2
3
  # -*- coding: utf-8 -*-
3
4
  # @Author : youshu.Ji
@@ -5,9 +6,11 @@ import datetime
5
6
  import json
6
7
  import logging
7
8
 
8
- from . import DB_CONFIG_FILE
9
9
  from .io.file import read_yaml
10
10
  from .utils.package import *
11
+ import os
12
+
13
+ DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_db_config.yml")
11
14
 
12
15
  # import aioredis
13
16
  # import happybase
@@ -28,21 +31,24 @@ class Neo4jOps(object):
28
31
  NEO4J_TIMEOUT = 0.3
29
32
  pass
30
33
 
34
+
31
35
  class SqliteOps(object):
32
- import sqlite3
33
- database_path = r'xx.db'
34
- conn = sqlite3.connect(database_path)
35
- c = conn.cursor()
36
- sql = "select name from sqlite_master where type='table' order by name"
37
- c.execute(sql)
38
- print(c.fetchall())
39
- sql = "select * from typecho_contents"
40
- c.execute(sql)
41
- res = c.fetchall()
42
- print(res[3])
43
-
44
- conn.commit()
45
- conn.close()
36
+ pass
37
+ # import sqlite3
38
+ # database_path = r'xx.db'
39
+ # conn = sqlite3.connect(database_path)
40
+ # c = conn.cursor()
41
+ # sql = "select name from sqlite_master where type='table' order by name"
42
+ # c.execute(sql)
43
+ # print(c.fetchall())
44
+ # sql = "select * from typecho_contents"
45
+ # c.execute(sql)
46
+ # res = c.fetchall()
47
+ # print(res[3])
48
+ #
49
+ # conn.commit()
50
+ # conn.close()
51
+
46
52
 
47
53
  class MysqlOps(object):
48
54
  import pandas as pd
@@ -116,6 +122,41 @@ class EsOps(object):
116
122
  print(f"批量保存数据: {_res}")
117
123
 
118
124
 
125
+ class MongoDB_BETA:
126
+ def __init__(self, host='localhost', port=27017, db_name=None, collection_name=None):
127
+ self.host = host
128
+ self.port = port
129
+ self.db_name = db_name
130
+ self.collection_name = collection_name
131
+ self.client = None
132
+ self.db = None
133
+ self.collection = None
134
+
135
+ def connect(self):
136
+ self.client = MongoClient(self.host, self.port)
137
+ self.db = self.client[self.db_name]
138
+ self.collection = self.db[self.collection_name]
139
+
140
+ def close(self):
141
+ if self.client:
142
+ self.client.close()
143
+
144
+ def insert_data(self, data):
145
+ if isinstance(data, list):
146
+ self.collection.insert_many(data)
147
+ else:
148
+ self.collection.insert_one(data)
149
+
150
+ def check_data_exists(self, query):
151
+ """
152
+ 检查某个数据是否存在于数据库中
153
+ :param query: 查询条件
154
+ :return: 布尔值,表示数据是否存在
155
+ """
156
+ return self.collection.count_documents(query) > 0
157
+
158
+
159
+
119
160
  class MongoOps(object):
120
161
  from pymongo import MongoClient
121
162
  def __init__(self, config=global_db_config["mongo"]):
@@ -348,8 +389,6 @@ class KafkaOps(object):
348
389
  print(recv)
349
390
 
350
391
 
351
-
352
-
353
392
  class MilvusOps(object):
354
393
  def __init__(self, config=global_db_config.milvus):
355
394
  from pymilvus import connections, Collection
nlpertools/dataprocess.py CHANGED
@@ -55,9 +55,9 @@ class Pattern:
55
55
  # 中文人名
56
56
  chinese_name_pattern = "(?:[\u4e00-\u9fa5·]{2,3})"
57
57
  # 英文人名
58
- english_name_pattern = "(^[a-zA-Z][a-zA-Z\s]{0,20}[a-zA-Z]$)"
58
+ english_name_pattern = r"(^[a-zA-Z][a-zA-Z\s]{0,20}[a-zA-Z]$)"
59
59
  # 纯数字
60
- pure_num_pattern = "\d+"
60
+ pure_num_pattern = r"\d+"
61
61
  # xxxx图/表 之类的表述
62
62
  pic_table_descript_pattern = ".{1,15}图"
63
63
 
@@ -66,20 +66,20 @@ class Pattern:
66
66
  hlink_pattern = (
67
67
  r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]"
68
68
  )
69
- http_pattern = "(http|https):\/\/([\w.]+\/?)\S*/\S*"
69
+ http_pattern = r"(http|https):\/\/([\w.]+\/?)\S*/\S*"
70
70
  # 邮箱
71
- email_pattern = "[A-Za-z0-9\u4e00-\u9fa5]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+"
71
+ email_pattern = r"[A-Za-z0-9\u4e00-\u9fa5]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+"
72
72
  # html 可能过于严格了
73
- html_pattern = "<[\s\S]*?>"
73
+ html_pattern = r"<[\s\S]*?>"
74
74
  # 重复 “asdasdasdasd”
75
75
  repeat_pattern = "(.)\1+"
76
76
  # 日期
77
- day_time_pattern = "\d{1,4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)"
77
+ day_time_pattern = r"\d{1,4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)"
78
78
  # 小时
79
- hour_time_pattern = "(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d"
79
+ hour_time_pattern = r"(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d"
80
80
  # 股票
81
81
  stock_pattern = (
82
- "(s[hz]|S[HZ])(000[\d]{3}|002[\d]{3}|300[\d]{3}|600[\d]{3}|60[\d]{4})"
82
+ r"(s[hz]|S[HZ])(000[\d]{3}|002[\d]{3}|300[\d]{3}|600[\d]{3}|60[\d]{4})"
83
83
  )
84
84
 
85
85
  # 一般是需要替换的
@@ -91,7 +91,7 @@ class Pattern:
91
91
  # 微博视频等
92
92
  weibo_pattern = r"([\s]\w+(的微博视频)|#|【|】|转发微博)"
93
93
  # @
94
- at_pattern = "@\w+"
94
+ at_pattern = r"@\w+"
95
95
 
96
96
  # from https://github.com/bigscience-workshop/data-preparation pii
97
97
  year_patterns = [
@@ -116,7 +116,7 @@ class Pattern:
116
116
  ipv4_pattern = r'(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}'
117
117
  ipv6_pattern = r'(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])'
118
118
  ip_pattern = r"(?:^|[\b\s@?,!;:\'\")(.\p{Han}])(" + r"|".join(
119
- [ipv4_pattern, ipv6_pattern]) + ")(?:$|[\s@,?!;:\'\"(.\p{Han}])"
119
+ [ipv4_pattern, ipv6_pattern]) + r")(?:$|[\s@,?!;:\'\"(.\p{Han}])"
120
120
 
121
121
  # https://regex101.com/r/EpA5B7/1
122
122
  email_line_pattern = r'''
@@ -466,7 +466,7 @@ class TextProcess(object):
466
466
  p = re.compile(pattern, re.S)
467
467
  text = p.sub("", text)
468
468
 
469
- dr = re.compile("@\w+", re.S)
469
+ dr = re.compile(r"@\w+", re.S)
470
470
  text = dr.sub("", text)
471
471
 
472
472
  return text
@@ -527,7 +527,7 @@ class TextProcess(object):
527
527
  text = re.sub(pattern, replace, text)
528
528
  return text
529
529
 
530
- def calc_proportion_zh(self,text):
530
+ def calc_proportion_zh(self, text):
531
531
  text = text.strip()
532
532
  # 如果是中国英文的情况,并且英文有空格分开
533
533
  if " " in text:
@@ -538,6 +538,8 @@ class TextProcess(object):
538
538
  chinese_count += 1
539
539
  else:
540
540
  pass
541
+
542
+
541
543
  class CopyFunc():
542
544
  # from https://github.com/lemon234071/clean-dialog
543
545
  def is_chinese_char(cp):
@@ -597,6 +599,20 @@ def convert_basic2fullwidth(sentence):
597
599
  new_sentence += char
598
600
  return new_sentence
599
601
 
602
+
603
+ def clean_illegal_chars_for_excel(df):
604
+ # openpyxl 库写入 Excel 文件时,有一些非法字符,需要删除
605
+ # 定义一个函数来移除字符串中的非法字符
606
+ def remove_illegal_chars(s):
607
+ if isinstance(s, str):
608
+ # 移除 ASCII 码在非法范围内的字符
609
+ return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', s)
610
+ return s
611
+
612
+ # 应用清理函数到数据框的每个元素
613
+ return df.map(remove_illegal_chars)
614
+
615
+
600
616
  if __name__ == "__main__":
601
617
  pattern_for_filter = [
602
618
  Pattern.redundancy_space_pattern,
File without changes
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from ..utils.package import plt
5
+
6
+
7
+ def confused_matrix(confuse_matrix):
8
+ import seaborn as sns
9
+ sns.set()
10
+ f, ax = plt.subplots()
11
+ ticklabels = ["l1", "l2", "l31"]
12
+ sns.heatmap(confuse_matrix, annot=True, fmt=".3g", ax=ax, cmap='rainbow',
13
+ xticklabels=ticklabels, yticklabels=ticklabels) # 画热力图
14
+
15
+ ax.set_title('confusion matrix') # 标题
16
+ ax.set_xlabel('predict') # x轴
17
+ ax.set_ylabel('true') # y轴
18
+ plt.show()
19
+
20
+ f.savefig('tmp.jpg', bbox_inches='tight')
21
+
22
+
23
+ def plot_histogram(data, bin_size):
24
+ """
25
+ 画直方图,超过1000的统一按1000算
26
+ :param data:
27
+ :param bin_size:
28
+ :return:
29
+ """
30
+ import matplotlib.pyplot as plt
31
+ import numpy as np
32
+ import pandas as pd
33
+ from matplotlib.ticker import MaxNLocator
34
+ # 将超过1000的值改为1000
35
+ def process_lengths(data):
36
+ return [length if length <= 1000 else 1003 for length in data]
37
+
38
+ # 前闭后开
39
+ min_num, max_num = 0, 1000
40
+ # min_num, max_num = min(data), max(data)
41
+
42
+ plt.figure(figsize=(12, 8))
43
+ processed_data = process_lengths(data)
44
+ bins = np.arange(0, 1000 + 2 * bin_size, bin_size)
45
+ # 绘制直方图
46
+ n, new_bins, patches = plt.hist(processed_data, bins=bins, edgecolor='black', color='skyblue', alpha=0.7,
47
+ linewidth=0)
48
+
49
+ # 添加"∞"的标签
50
+ # bins会改变
51
+ plt.gca().set_xticks(bins)
52
+ plt.gca().set_xticklabels([str(i) for i in plt.xticks()[0][:-1]] + ["∞"])
53
+
54
+ mean_val = np.mean(data)
55
+ plt.axvline(mean_val, color='red', linestyle='dashed', linewidth=1)
56
+ plt.text(mean_val + bin_size / 10, max(n) * 0.9, f'Mean: {mean_val:.2f}', color='red')
57
+
58
+ # 添加标题和标签
59
+ plt.title('Module Line Number Distribution', fontsize=16, fontweight='bold')
60
+ plt.xlabel('module line number', fontsize=14)
61
+ plt.ylabel('frequency', fontsize=14)
62
+
63
+ # 添加网格
64
+ plt.grid(True, linestyle='--', alpha=0.6)
65
+
66
+ # 美化x轴和y轴的刻度
67
+ plt.xticks(fontsize=12)
68
+ plt.yticks(fontsize=12)
69
+
70
+ # 在每个柱状图上显示数值
71
+ for i in range(len(patches)):
72
+ plt.text(patches[i].get_x() + patches[i].get_width() / 2, patches[i].get_height(),
73
+ str(int(n[i])), ha='center', va='bottom', fontsize=12)
74
+ plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))
75
+ # 显示图表
76
+ plt.show()
77
+
78
+
79
+ if __name__ == '__main__':
80
+ # 调整区间大小
81
+ bin_size = 50
82
+ # 示例模块长度数据
83
+ plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size)
@@ -0,0 +1,33 @@
1
+ # 数学函数
2
+ def draw_log():
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ from matplotlib.ticker import MultipleLocator, FormatStrFormatter
6
+
7
+ # 生成一些数据
8
+ x = np.linspace(0.1, 10, 100)
9
+ # 默认log指的时loge
10
+ y = np.log(x)
11
+
12
+ # 创建一个新的图形和轴
13
+ fig, ax = plt.subplots()
14
+
15
+ # 绘制log图像
16
+ ax.plot(x, y)
17
+
18
+ # 设置图像标题和轴标签
19
+ ax.set_title("Logarithmic Function")
20
+ ax.set_xlabel("x")
21
+ ax.set_ylabel("log(x)")
22
+ # 设置横坐标的刻度间隔为1
23
+ ax.xaxis.set_major_locator(MultipleLocator(1))
24
+
25
+ # 设置横坐标的刻度格式
26
+ ax.xaxis.set_major_formatter(FormatStrFormatter("%.1f"))
27
+ # 添加x=1的虚线
28
+ ax.axvline(x=1, linestyle="--", color="gray")
29
+ # 添加y=1的虚线
30
+ ax.axhline(y=0, linestyle="--", color="gray")
31
+
32
+ # 显示图像
33
+ plt.show()
nlpertools/get_2fa.py ADDED
File without changes
nlpertools/io/dir.py CHANGED
@@ -10,7 +10,30 @@ def j_mkdir(name):
10
10
  os.makedirs(name, exist_ok=True)
11
11
 
12
12
 
13
- def get_filename(path) -> str:
13
+ def j_walk(name, suffix=None):
14
+ paths = []
15
+ for root, dirs, files in os.walk(name):
16
+ for file in files:
17
+ path = os.path.join(root, file)
18
+ if not (suffix and not path.endswith(suffix)):
19
+ paths.append(path)
20
+ return paths
21
+
22
+
23
+ def windows_to_wsl_path(windows_path):
24
+ # 转换驱动器号
25
+ if windows_path[1:3] == ':\\':
26
+ drive_letter = windows_path[0].lower()
27
+ path = windows_path[2:].replace('\\', '/')
28
+ wsl_path = f'/mnt/{drive_letter}{path}'
29
+ else:
30
+ # 如果路径不是以驱动器号开头,则直接替换路径分隔符
31
+ wsl_path = windows_path.replace('\\', '/').replace("'", "\'")
32
+
33
+ return wsl_path
34
+
35
+
36
+ def get_filename(path, suffix=True) -> str:
14
37
  """
15
38
  返回路径最后的文件名
16
39
  :param path:
@@ -18,11 +41,20 @@ def get_filename(path) -> str:
18
41
  """
19
42
  # path = r'***/**/***.txt'
20
43
  filename = os.path.split(path)[-1]
44
+ if not suffix:
45
+ filename = filename.split('.')[0]
21
46
  return filename
22
47
 
23
48
 
24
49
  def j_listdir(dir_name, including_dir=True):
25
- # yield
50
+ filenames = os.listdir(dir_name)
51
+ if including_dir:
52
+ return [os.path.join(dir_name, filename) for filename in filenames]
53
+ else:
54
+ return list(filenames)
55
+
56
+
57
+ def j_listdir_yield(dir_name, including_dir=True):
26
58
  filenames = os.listdir(dir_name)
27
59
  for filename in filenames:
28
60
  if including_dir:
nlpertools/io/file.py CHANGED
@@ -5,7 +5,6 @@ import codecs
5
5
  import json
6
6
  import pickle
7
7
  import random
8
- import time
9
8
  from itertools import (takewhile, repeat)
10
9
  import pandas as pd
11
10
  # import omegaconf
@@ -15,10 +14,16 @@ from ..utils.package import *
15
14
  LARGE_FILE_THRESHOLD = 1e5
16
15
 
17
16
 
17
+ def safe_filename(filename: str) -> str:
18
+ for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|']:
19
+ filename = filename.replace(char, '_')
20
+ return filename
21
+
22
+
18
23
  def read_yaml(path, omega=False):
19
24
  if omega:
20
25
  return omegaconf.OmegaConf.load(path)
21
- return yaml.load(codecs.open(path), Loader=yaml.FullLoader)
26
+ return yaml.load(codecs.open(path, encoding='utf-8'), Loader=yaml.FullLoader)
22
27
 
23
28
 
24
29
  def _merge_file(filelist, save_filename, shuffle=False):
@@ -52,7 +57,7 @@ load_from_json
52
57
 
53
58
 
54
59
  # 读txt文件 一次全读完 返回list 去换行
55
- def readtxt_list_all_strip(path, encoding='utf-8'):
60
+ def readtxt_list_all_strip(path, encoding='utf-8') -> list:
56
61
  file_line_num = iter_count(path)
57
62
  lines = []
58
63
  with codecs.open(path, 'r', encoding) as r:
@@ -67,7 +72,7 @@ def readtxt_list_all_strip(path, encoding='utf-8'):
67
72
 
68
73
 
69
74
  # 读txt 一次读一行 最后返回list
70
- def readtxt_list_each(path):
75
+ def readtxt_list_each(path) -> list:
71
76
  lines = []
72
77
  with codecs.open(path, 'r', 'utf-8') as r:
73
78
  line = r.readline()
@@ -77,7 +82,7 @@ def readtxt_list_each(path):
77
82
  return lines
78
83
 
79
84
 
80
- def readtxt_list_each_strip(path):
85
+ def readtxt_list_each_strip(path) -> list:
81
86
  """
82
87
  yield方法
83
88
  """
@@ -89,14 +94,14 @@ def readtxt_list_each_strip(path):
89
94
 
90
95
 
91
96
  # 读txt文件 一次全读完 返回list
92
- def readtxt_list_all(path):
97
+ def readtxt_list_all(path) -> list:
93
98
  with codecs.open(path, 'r', 'utf-8') as r:
94
99
  lines = r.readlines()
95
100
  return lines
96
101
 
97
102
 
98
103
  # 读byte文件 读成一条string
99
- def readtxt_byte(path, encoding="utf-8"):
104
+ def readtxt_byte(path, encoding="utf-8") -> str:
100
105
  with codecs.open(path, 'rb') as r:
101
106
  lines = r.read()
102
107
  lines = lines.decode(encoding)
@@ -104,7 +109,7 @@ def readtxt_byte(path, encoding="utf-8"):
104
109
 
105
110
 
106
111
  # 读txt文件 读成一条string
107
- def readtxt_string(path, encoding="utf-8"):
112
+ def readtxt_string(path, encoding="utf-8") -> str:
108
113
  with codecs.open(path, 'r', encoding) as r:
109
114
  lines = r.read()
110
115
  return lines.replace('\r', '')
@@ -261,6 +266,7 @@ def save_to_mongo():
261
266
  """
262
267
  pass
263
268
 
269
+
264
270
  def load_from_mongo():
265
271
  pass
266
272
 
@@ -274,4 +280,4 @@ def unmerge_cells_df(df) -> pd.DataFrame:
274
280
  else:
275
281
  values.append(i)
276
282
  df[column] = values
277
- return df
283
+ return df
nlpertools/ml.py CHANGED
@@ -18,9 +18,8 @@ from .utils.package import *
18
18
 
19
19
 
20
20
  def calc_llm_train_activation_memory(
21
- model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
21
+ model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
22
22
  ):
23
-
24
23
  """
25
24
  return bytes
26
25
 
@@ -33,18 +32,18 @@ def calc_llm_train_activation_memory(
33
32
  # FFN
34
33
  # Layer Norm
35
34
  r1 = (
36
- sequence_length
37
- * batch_size
38
- * hidden_dim
39
- * lay_number
40
- * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
35
+ sequence_length
36
+ * batch_size
37
+ * hidden_dim
38
+ * lay_number
39
+ * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
41
40
  )
42
41
  # reference2
43
42
  r2 = (
44
- lay_number*(2 * sequence_length * attention_heads_num + 16 * hidden_dim)
45
- * sequence_length
46
- * batch_size
47
- / gpu_num
43
+ lay_number * (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
44
+ * sequence_length
45
+ * batch_size
46
+ / gpu_num
48
47
  )
49
48
  print(r1)
50
49
  print(r2)
@@ -80,7 +79,7 @@ class DataStructure:
80
79
  }
81
80
  ner_input_example = "这句话一共有两个实体分别为大象和老鼠。"
82
81
  ner_label_example = (
83
- list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
82
+ list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
84
83
  )
85
84
 
86
85
 
@@ -135,7 +134,7 @@ class STEM(object):
135
134
  if each_srl:
136
135
  args = []
137
136
  for arg in each_srl:
138
- args.extend(seg[arg[1] : arg[2] + 1])
137
+ args.extend(seg[arg[1]: arg[2] + 1])
139
138
  # 添加上谓词
140
139
  args.insert(each_srl[0][2] - each_srl[0][1] + 1, seg[wdx])
141
140
  events.append(args)
@@ -174,7 +173,7 @@ def subject_object_labeling(spo_list, text):
174
173
  q_list_length = len(q_list)
175
174
  k_list_length = len(k_list)
176
175
  for idx in range(k_list_length - q_list_length + 1):
177
- t = [q == k for q, k in zip(q_list, k_list[idx : idx + q_list_length])]
176
+ t = [q == k for q, k in zip(q_list, k_list[idx: idx + q_list_length])]
178
177
  # print(idx, t)
179
178
  if all(t):
180
179
  # print(idx)
@@ -187,8 +186,8 @@ def subject_object_labeling(spo_list, text):
187
186
  if len(spo) == 2:
188
187
  labeling_list[idx_start + 1] = "I-" + spo_type
189
188
  elif len(spo) >= 3:
190
- labeling_list[idx_start + 1 : idx_start + len(spo)] = ["I-" + spo_type] * (
191
- len(spo) - 1
189
+ labeling_list[idx_start + 1: idx_start + len(spo)] = ["I-" + spo_type] * (
190
+ len(spo) - 1
192
191
  )
193
192
  else:
194
193
  pass
@@ -239,12 +238,12 @@ def convert_crf_format_10_fold(corpus, objdir_path):
239
238
  split_position = int(len(corpus) / 10)
240
239
  for k in range(0, 10):
241
240
  if k == 9:
242
- dev_set = corpus[k * split_position :]
241
+ dev_set = corpus[k * split_position:]
243
242
  train_set = corpus[: k * split_position]
244
243
  else:
245
- dev_set = corpus[k * split_position : (k + 1) * split_position]
244
+ dev_set = corpus[k * split_position: (k + 1) * split_position]
246
245
  train_set = (
247
- corpus[: k * split_position] + corpus[(k + 1) * split_position :]
246
+ corpus[: k * split_position] + corpus[(k + 1) * split_position:]
248
247
  )
249
248
  writetxt_w_list(
250
249
  train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1))
@@ -292,12 +291,41 @@ def kfold_txt(corpus, path, k=9, is_shuffle=True):
292
291
  if is_shuffle:
293
292
  random.shuffle(corpus)
294
293
  split_position = int(len(corpus) / 10)
295
- train_set, dev_set = corpus[: k * split_position], corpus[k * split_position :]
294
+ train_set, dev_set = corpus[: k * split_position], corpus[k * split_position:]
296
295
  writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
297
296
  writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
298
297
  writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
299
298
 
300
299
 
300
+ def sample():
301
+ import pandas as pd
302
+ from sklearn.model_selection import StratifiedShuffleSplit
303
+
304
+ # 假设 df 是你的 DataFrame
305
+
306
+ df = pd.DataFrame({
307
+ "count_line": [i for i in range(100)],
308
+ "x": [i for i in range(100)],
309
+ "y": [i // 10 for i in range(100)],
310
+ })
311
+ print(df)
312
+ # count_line 是用于分层抽样的字段
313
+
314
+ # 创建 StratifiedShuffleSplit 对象,设置测试集比例为 0.1
315
+ split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
316
+
317
+ # 获取训练集和测试集的索引
318
+ train_index, test_index = next(split.split(df, df['y']))
319
+
320
+ # 根据索引划分训练集和测试集
321
+ train_df = df.loc[train_index]
322
+ test_df = df.loc[test_index]
323
+
324
+ # 打印训练集和测试集的行数
325
+ print("训练集行数:", len(train_df))
326
+ print("测试集行数:", len(test_df))
327
+
328
+
301
329
  def kfold_df(df, save_dir=None):
302
330
  """
303
331
  划分train test val集, 写为windows可读的csv。
@@ -389,7 +417,7 @@ def split_sentence(sentence, language="chinese", cross_line=True):
389
417
  for idx, char in enumerate(sentence):
390
418
  if idx == len(sentence) - 1:
391
419
  if char in split_signs:
392
- sentences.append(sentence[start_idx : idx + 1].strip())
420
+ sentences.append(sentence[start_idx: idx + 1].strip())
393
421
  start_idx = idx + 1
394
422
  else:
395
423
  sentences.append(sentence[start_idx:].strip())
@@ -399,10 +427,10 @@ def split_sentence(sentence, language="chinese", cross_line=True):
399
427
  if idx < len(sentence) - 2:
400
428
  # 处理。”。
401
429
  if sentence[idx + 2] not in split_signs:
402
- sentences.append(sentence[start_idx : idx + 2].strip())
430
+ sentences.append(sentence[start_idx: idx + 2].strip())
403
431
  start_idx = idx + 2
404
432
  elif sentence[idx + 1] not in split_signs:
405
- sentences.append(sentence[start_idx : idx + 1].strip())
433
+ sentences.append(sentence[start_idx: idx + 1].strip())
406
434
  start_idx = idx + 1
407
435
  return sentences
408
436
 
@@ -480,4 +508,4 @@ if __name__ == "__main__":
480
508
  attention_heads_num=32,
481
509
  gpu_num=1
482
510
  )
483
- print(res, "G")
511
+ print(res, "G")
nlpertools/other.py CHANGED
@@ -5,10 +5,13 @@ import itertools
5
5
  import os
6
6
  import re
7
7
  import string
8
+ import subprocess
9
+ import threading
8
10
  from concurrent.futures import ThreadPoolExecutor
9
11
  from functools import reduce
10
12
  import math
11
13
  import datetime
14
+ import difflib
12
15
  import psutil
13
16
  from .io.file import writetxt_w_list, writetxt_a
14
17
  # import numpy as np
@@ -27,6 +30,134 @@ ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
27
30
  OTHER_PUNCTUATION = list('!@#$%^&*')
28
31
 
29
32
 
33
+ def get_diff_parts(str1, str2):
34
+ # 创建一个 SequenceMatcher 对象
35
+ matcher = difflib.SequenceMatcher(None, str1, str2)
36
+
37
+ # 获取差异部分
38
+ diff_parts = []
39
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
40
+ if tag == 'replace' or tag == 'delete' or tag == 'insert':
41
+ diff_parts.append((tag, str1[i1:i2], str2[j1:j2]))
42
+
43
+ return diff_parts
44
+
45
+
46
+ def run_cmd_with_timeout(cmd, timeout):
47
+ """
48
+ https://juejin.cn/post/7391703459803086848
49
+ """
50
+ process = subprocess.Popen(cmd, shell=True, encoding="utf-8", errors="ignore", stdout=subprocess.PIPE,
51
+ stderr=subprocess.PIPE)
52
+ res = [None]
53
+
54
+ def target():
55
+ try:
56
+ ans = process.communicate()
57
+ res[0] = ans
58
+ except subprocess.TimeoutExpired:
59
+ process.kill()
60
+ process.communicate()
61
+
62
+ thread = threading.Thread(target=target)
63
+ thread.start()
64
+ thread.join(timeout)
65
+ if thread.is_alive():
66
+ print(f"Terminating {cmd}")
67
+ process.terminate()
68
+ thread.join()
69
+ print("Terminated successfully")
70
+ return False, f"{cmd} is running over {timeout}s"
71
+ if process.returncode == 0:
72
+ # res[0][0] 是output
73
+ return True, res[0][0]
74
+ else:
75
+ return False, res[0][0]
76
+
77
+
78
+ def print_three_line_table(df):
79
+ # TODO 这里需要添加可以支持excel里变红的功能
80
+ import webbrowser
81
+
82
+ # import pandas as pd
83
+ # data = {'from_pc': ['valid_data', 'illegal_char', 'more_data'],
84
+ # 'rom_pc': ['another_valid_data', 'illegal_char', 'data']}
85
+ # df = pd.DataFrame(data)
86
+
87
+ # 将 DataFrame 转换为 HTML 表格
88
+ html_table = df.to_html(index=False)
89
+ html_table = html_table.replace('border="1"', 'border="0"')
90
+
91
+ first_line_px = str(2)
92
+ second_line_px = str(1)
93
+ third_line_px = str(2)
94
+ # 定义三线表的 CSS 样式
95
+ # // thead 表头
96
+ # // tr 行
97
+ # // td 单元格
98
+ head = """<!DOCTYPE html>
99
+ <html lang="zh">
100
+ <head>
101
+ <meta charset="UTF-8">
102
+ <title>页面标题</title>
103
+ </head>"""
104
+ style = """
105
+ <style>
106
+
107
+ table {
108
+ border-collapse: collapse;
109
+ }
110
+
111
+ tr, td, th {
112
+ text-align: center; /* 水平居中文本 */
113
+ vertical-align: middle; /* 垂直居中文本 */
114
+ }
115
+ thead tr {
116
+ border-top: (first_line_px)px solid black;
117
+ border-bottom: (second_line_px)px solid black;
118
+ }
119
+
120
+ thead th {
121
+ border-bottom: (second_line_px)px solid black;
122
+ }
123
+
124
+ tbody tr td {
125
+ border-bottom: 0px solid black;
126
+ }
127
+
128
+ tbody tr:last-child td {
129
+ border-bottom: (third_line_px)px solid black;
130
+ }
131
+ </style>"""
132
+ style = style.replace("(first_line_px)", first_line_px).replace("(second_line_px)", second_line_px).replace(
133
+ "(third_line_px)", third_line_px)
134
+ # 将 CSS 样式和 HTML 表格结合起来
135
+ html = f"{style}{html_table}"
136
+ print(html)
137
+ temp_file_path = "temp.html"
138
+ # 将 HTML 保存到文件中
139
+ with open(temp_file_path, "w") as f:
140
+ f.write(html)
141
+ webbrowser.open('file://' + os.path.realpath(temp_file_path))
142
+
143
+
144
+ def jprint(obj, depth=0):
145
+ if isinstance(obj, dict):
146
+ sep = "-" * (10 - depth * 3)
147
+ for k, v in obj.items():
148
+ print(depth * "|", sep, k, sep)
149
+ jprint(v)
150
+ elif isinstance(obj, list):
151
+ for v in obj:
152
+ jprint(v, depth + 1)
153
+ else:
154
+ print(obj)
155
+
156
+
157
+ def print_split(sign="=", num=20):
158
+ print(sign * num)
159
+
160
+
30
161
  def seed_everything():
31
162
  import torch
32
163
  # seed everything
@@ -82,21 +213,6 @@ def convert_np_to_py(obj):
82
213
  return obj
83
214
 
84
215
 
85
- def git_push():
86
- """
87
- 针对国内提交github经常失败,自动提交
88
- """
89
- num = -1
90
- while 1:
91
- num += 1
92
- print("retry num: {}".format(num))
93
- info = os.system("git push --set-upstream origin main")
94
- print(str(info))
95
- if not str(info).startswith("fatal"):
96
- print("scucess")
97
- break
98
-
99
-
100
216
  def snake_to_camel(s: str) -> str:
101
217
  """
102
218
  author: u
@@ -235,24 +351,19 @@ def stress_test(func, ipts):
235
351
  return results
236
352
 
237
353
 
238
- def get_substring_loc(text, subtext):
239
- res = re.finditer(
240
- subtext.replace('\\', '\\\\').replace('?', '\?').replace('(', '\(').replace(')', '\)').replace(']',
241
- '\]').replace(
242
- '[', '\[').replace('+', '\+'), text)
243
- l, r = [i for i in res][0].regs[0]
244
- return l, r
245
-
246
-
247
354
  def squeeze_list(high_dim_list):
248
355
  return list(itertools.chain.from_iterable(high_dim_list))
249
356
 
250
357
 
251
358
  def unsqueeze_list(flatten_list, each_element_len):
359
+ # 该函数是错的,被split_list替代了
252
360
  two_dim_list = [flatten_list[i * each_element_len:(i + 1) * each_element_len] for i in
253
361
  range(len(flatten_list) // each_element_len)]
254
362
  return two_dim_list
255
363
 
364
+ def split_list(input_list, chunk_size):
365
+ # 使用列表推导式将列表分割成二维数组
366
+ return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
256
367
 
257
368
  def auto_close():
258
369
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nlpertools
3
- Version: 1.0.6.dev0
3
+ Version: 1.0.8
4
4
  Summary: A small package about small basic IO operation when coding
5
5
  Home-page: https://github.com/lvzii/nlpertools
6
6
  Author: youshuJi
@@ -12,6 +12,11 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.6
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
+ Requires-Dist: numpy
16
+ Requires-Dist: pandas
17
+ Requires-Dist: psutil
18
+ Provides-Extra: torch
19
+ Requires-Dist: torch; extra == "torch"
15
20
 
16
21
  <div align="center">
17
22
  <h4 align="center">
@@ -23,9 +28,6 @@ License-File: LICENSE
23
28
  </div>
24
29
 
25
30
 
26
- # 当前版本
27
-
28
- 1.0.5
29
31
 
30
32
  # 说明
31
33
 
@@ -75,9 +77,9 @@ https://nlpertools.readthedocs.io/en/latest/
75
77
  def __init__(self, IPT_MODEL_PATH):
76
78
  self.ltp = LTP(IPT_MODEL_PATH)
77
79
  ```
78
- 通过pyinstrument判断,超过1s的包即采用这种方式
80
+ 通过`pyinstrument`判断,超过1s的包即采用这种方式
79
81
  - 2s+ happybase、seaborn、torch、jieba
80
- - 1s+
82
+ - 1s+ /
81
83
  - 0.5s+ pandas elasticsearch transformers xgboost nltk mongo
82
84
 
83
85
 
@@ -85,6 +87,8 @@ https://nlpertools.readthedocs.io/en/latest/
85
87
 
86
88
  - [readthedoc 检查文档构建状况](https://readthedocs.org/projects/nlpertools/builds)
87
89
 
90
+ - [打包发布指南](https://juejin.cn/post/7369413136224878644)
91
+
88
92
  - 发布版本需要加tag
89
93
 
90
94
  ## 开发哲学
@@ -106,6 +110,23 @@ b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
106
110
  ```
107
111
 
108
112
  ```bash
109
- # 监控gpu显存
110
- python -m nlpertools
113
+ # 生成pypi双因素认证的实时密钥(需要提供key)
114
+ python -m nlpertools.get_2fa your_key
115
+
116
+ ## git
117
+ python nlpertools.cli --git_push
118
+ python nlpertools.cli --git_pull
119
+
120
+ # 以下功能被nvitop替代,不推荐使用
121
+ ## 监控gpu显存
122
+ python -m nlpertools.monitor.gpu
123
+ ## 监控cpu
124
+ python -m nlpertools.monitor.memory
111
125
  ```
126
+
127
+ ## 一些常用项目
128
+
129
+ nvitop
130
+
131
+ ydata-profiling
132
+
@@ -1,12 +1,14 @@
1
- nlpertools/__init__.py,sha256=MTicDqOofy0loriBCpRcUdy8yQ9j5c3dFrwCVWWa8ic,536
2
- nlpertools/data_client.py,sha256=83jv7r7CsL8FYt7fJy_8ZKNG0XfAYiU8UPTYUURx4m8,13534
3
- nlpertools/dataprocess.py,sha256=z3nLWvWbGVi8N2mmOm70itgUhb_vhQYGVWWpVMwziNg,22658
1
+ nlpertools/__init__.py,sha256=h7JJEN_JRn3iKcqIcaFgYtAjP90XiT1KILrm8utoHvQ,483
2
+ nlpertools/cli.py,sha256=xDl_tWl9pfqQ3PUdd7oesvgM2FVqnaw8dFFliEX5c4Y,2203
3
+ nlpertools/data_client.py,sha256=esX8lUQrTui4uVkqPfhpHVok7Eq6ywpuemKjLeqoglc,14674
4
+ nlpertools/dataprocess.py,sha256=v1mobuYN7I3dT6xIKlNOHVtcg31YtjF6FwNPTxeBFFY,23153
4
5
  nlpertools/default_db_config.yml,sha256=E1K9k_xzXVlsf-HJQh8kyHXHYuvTpD12jD4Hfe5rUk8,606
5
- nlpertools/ml.py,sha256=n_WZMCAuD4KaaeYixq0RRiHiU1RuYLMV3dkGV7k4OaE,16798
6
+ nlpertools/get_2fa.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ nlpertools/ml.py,sha256=z-0ep9svAyzcS2n7Lsyqo65VEQRGzWKFMLdZofCv1LQ,17716
6
8
  nlpertools/movie.py,sha256=rkyOnAXdsbWfMSbi1sE1VNRT7f66Hp9BnZsN_58Afmw,897
7
9
  nlpertools/nlpertools_config.yml,sha256=ksXejxFs7pxR47tNAsrN88_4gvq9PCA2ZMO07H-dJXY,26
8
10
  nlpertools/open_api.py,sha256=uyTY00OUlM57Cn0Wm0yZXcIS8vAszy9rKnDMBEWfWJM,1744
9
- nlpertools/other.py,sha256=TuMx0B4qL_0kIJu469k94gE5NsncCE-IEo3ejt3gH9A,11583
11
+ nlpertools/other.py,sha256=CeUea17Oe5MV_r-CmeYdAhdj5kWLvmxoDDgRc56o7bE,14704
10
12
  nlpertools/pic.py,sha256=13aaFJh3USGYGs4Y9tAKTvWjmdQR4YDjl3LlIhJheOA,9906
11
13
  nlpertools/plugin.py,sha256=LB7j9GdoQi6TITddH-6EglHlOa0WIHLUT7X5vb_aIZY,1168
12
14
  nlpertools/reminder.py,sha256=wiXwZQmxMck5vY3EvG8_oakP3FAdjGTikAIOiTPUQrs,2977
@@ -22,9 +24,12 @@ nlpertools/algo/template.py,sha256=9vsHr4g3jZZ5KVU_2I9i97o8asRXq-8pSaCXIv0sHeM,2
22
24
  nlpertools/algo/union.py,sha256=0l7lGZbw1qIfW1z5TE8Oo3tybL1bKIP5rzpa5ZT-vLQ,249
23
25
  nlpertools/data_structure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
26
  nlpertools/data_structure/base_structure.py,sha256=gVUvJZ5jsCAswRETTpMwcEjLKoageWiTuCKNEwIWKWk,2641
27
+ nlpertools/draw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ nlpertools/draw/draw.py,sha256=PgdG7unpCtbbQdYISODTYMV7p10GwWDh9czeURkG0x4,2629
29
+ nlpertools/draw/math_func.py,sha256=0NQ22Dfi9DFG6Bg_hXnCT27w65-dqpOOIgZX7oUIW-Q,881
25
30
  nlpertools/io/__init__.py,sha256=YMuKtC2Ddh5dL5MvXjyUKYOOuqzFYUhBPFaP2kyFG9I,68
26
- nlpertools/io/dir.py,sha256=cK65qSZ9Tu5HrNrDiNyx0PEPrPldRSq34LpCWLz9WHc,1272
27
- nlpertools/io/file.py,sha256=tALfmzFRWztYpsmmBNvz-U6DXBe6cxef9j3_KexWdRM,6970
31
+ nlpertools/io/dir.py,sha256=p7J34qUxYCqKSO5DQMhL8FxFcHDrwn_1lIxNl0klasU,2267
32
+ nlpertools/io/file.py,sha256=CsFdluEczuz3fonbeZi9dHPasL1Hm18JL3Aux2ziQMU,7198
28
33
  nlpertools/monitor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
34
  nlpertools/monitor/gpu.py,sha256=M59O6i0hlew7AzXZlaVZqbZA5IR93OhBY2WI0-T_HtY,531
30
35
  nlpertools/monitor/memory.py,sha256=9t6q9BC8VVx4o3G4sBCn7IoQRx272zMPjSnL3yvTBAQ,657
@@ -36,8 +41,9 @@ nlpertools/utils/package.py,sha256=wLg_M8j7Y6ReRjWHWCWoZJHrzEwuAr9TyG2jvb7OQCo,3
36
41
  nlpertools/utils/package_v1.py,sha256=sqgFb-zbTdMd5ziJLY6YUPqR49qUNZjxBH35DnyR5Wg,3542
37
42
  nlpertools/utils/package_v2.py,sha256=WOcsguWfUd4XSAfmPgCtL8HtUbqJ6GRSMHb0OsB47r0,3932
38
43
  nlpertools_helper/__init__.py,sha256=obxRUdZDctvcvK_iA1Dx2HmQFMlMzJto-xDPryq1lJ0,198
39
- nlpertools-1.0.6.dev0.dist-info/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
40
- nlpertools-1.0.6.dev0.dist-info/METADATA,sha256=VLFgFPh2o0YeWXEEoPIinO_rn6--mhUFU4vBASPPoNc,2772
41
- nlpertools-1.0.6.dev0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
- nlpertools-1.0.6.dev0.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
43
- nlpertools-1.0.6.dev0.dist-info/RECORD,,
44
+ nlpertools-1.0.8.dist-info/LICENSE,sha256=SBcMozykvTbZJ--MqSiKUmHLLROdnr25V70xCQgEwqw,11331
45
+ nlpertools-1.0.8.dist-info/METADATA,sha256=v2doRda1amZbXXfIYuzo-rFPvTICt3ByDCKVr6gsUw0,3276
46
+ nlpertools-1.0.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
47
+ nlpertools-1.0.8.dist-info/entry_points.txt,sha256=XEazQ4vUwJMoMAgAwk1Lq4PRQGklPkPBaFkiP0zN_JE,45
48
+ nlpertools-1.0.8.dist-info/top_level.txt,sha256=_4q4MIFvMr4cAUbhWKWYdRXIXsF4PJDg4BUsZvgk94s,29
49
+ nlpertools-1.0.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ncli = nlpertools.cli:main