nlpertools 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. nlpertools/__init__.py +24 -20
  2. nlpertools/algo/ac.py +18 -0
  3. nlpertools/algo/bit_ops.py +28 -0
  4. nlpertools/algo/kmp.py +94 -55
  5. nlpertools/algo/num_ops.py +12 -0
  6. nlpertools/algo/template.py +116 -0
  7. nlpertools/algo/union.py +13 -0
  8. nlpertools/data_client.py +387 -257
  9. nlpertools/data_structure/base_structure.py +109 -13
  10. nlpertools/dataprocess.py +611 -3
  11. nlpertools/default_db_config.yml +41 -0
  12. nlpertools/io/__init__.py +3 -3
  13. nlpertools/io/dir.py +54 -36
  14. nlpertools/io/file.py +277 -222
  15. nlpertools/ml.py +483 -460
  16. nlpertools/monitor/__init__.py +0 -0
  17. nlpertools/monitor/gpu.py +18 -0
  18. nlpertools/monitor/memory.py +24 -0
  19. nlpertools/movie.py +36 -0
  20. nlpertools/nlpertools_config.yml +1 -0
  21. nlpertools/{openApi.py → open_api.py} +65 -65
  22. nlpertools/other.py +364 -249
  23. nlpertools/pic.py +288 -0
  24. nlpertools/plugin.py +43 -43
  25. nlpertools/reminder.py +98 -87
  26. nlpertools/utils/__init__.py +3 -3
  27. nlpertools/utils/lazy.py +727 -0
  28. nlpertools/utils/log_util.py +20 -0
  29. nlpertools/utils/package.py +89 -76
  30. nlpertools/utils/package_v1.py +94 -0
  31. nlpertools/utils/package_v2.py +117 -0
  32. nlpertools/utils_for_nlpertools.py +93 -93
  33. nlpertools/vector_index_demo.py +108 -0
  34. nlpertools/wrapper.py +161 -96
  35. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
  36. nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
  37. nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
  38. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
  39. nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
  40. nlpertools_helper/__init__.py +10 -0
  41. nlpertools-1.0.5.dist-info/METADATA +0 -85
  42. nlpertools-1.0.5.dist-info/RECORD +0 -25
  43. nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/pic.py ADDED
@@ -0,0 +1,288 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from io import BytesIO
5
+
6
+
7
+ def convert_pic_dpi(path):
8
+ from PIL import Image
9
+
10
+ img = Image.open(path)
11
+ w, h = img.size
12
+ rate = 0.1
13
+ img = img.resize((int(w * rate), int(h * rate)))
14
+ img.save("test.jpg") # (224,224)
15
+
16
+
17
+ def image2binary(image):
18
+ """
19
+ image: PIL.image
20
+ """
21
+ # 假设你已经有了一个Image对象
22
+ # image = Image.open('a.png')
23
+ # 创建一个BytesIO对象来保存二进制数据
24
+ buffered = BytesIO()
25
+ # 保存Image对象到BytesIO对象,确保使用正确的格式
26
+ image.save(buffered, format="JPEG")
27
+ # 获取二进制数据
28
+ binary_data = buffered.getvalue()
29
+ # 确保输出缓冲区被重置,以便后续使用
30
+ buffered.seek(0)
31
+ # 现在,binary_data包含了完整的JPEG图像数据
32
+ # 你可以将这个数据发送到网络请求,或者保存到文件
33
+ # with open('aa.jpg', 'wb') as f:
34
+ # f.write(binary_data)
35
+ return binary_data
36
+
37
+
38
+ def invert_colors(image_path, output_path):
39
+ from PIL import Image, ImageOps
40
+
41
+ image = Image.open(image_path)
42
+ black_and_white = image.convert("L")
43
+
44
+ # 对调黑白颜色
45
+ inverted = ImageOps.invert(black_and_white)
46
+
47
+ # 保存修改后的图片
48
+ inverted.save(output_path)
49
+
50
+
51
+ def pdf2pic(path):
52
+ from pdf2image import convert_from_path
53
+
54
+ pages = convert_from_path(path, 500)
55
+
56
+ # 保存
57
+ num = 1
58
+ for page in pages:
59
+ page.save("out{}.jpg".format(num), "JPEG")
60
+ num += 1
61
+
62
+
63
+ def concat_image():
64
+ import numpy as np
65
+
66
+ from PIL import Image
67
+
68
+ # 这里是需要合并的图片路径
69
+ paths = ["out{}.jpg".format(i) for i in range(1, 14)]
70
+ img_array = ""
71
+ img = ""
72
+ for i, v in enumerate(paths):
73
+ if i == 0:
74
+ img = Image.open(v) # 打开图片
75
+ img_array = np.array(img) # 转化为np array对象
76
+ if i > 0:
77
+ img_array2 = np.array(Image.open(v))
78
+ img_array = np.concatenate((img_array, img_array2), axis=1) # 横向拼接
79
+ # img_array = np.concatenate((img_array, img_array2), axis=0) # 纵向拼接
80
+ img = Image.fromarray(img_array)
81
+
82
+ # 保存图片
83
+ img.save("图1.jpg")
84
+
85
+
86
+ class DrawDesktopBackground:
87
+ @staticmethod
88
+ def generate_image(text1, text2, text3, text4, color1, color2, color3, color4):
89
+ # 不支持中文
90
+ # 样式参考 https://zhuanlan.zhihu.com/p/365624498
91
+ from PIL import Image, ImageDraw, ImageFont
92
+
93
+ # 配色方案1:
94
+ # '#1a4b61', '#f47678', '#79a863', '#a8a8a8'
95
+ # Create image object with black background
96
+ # 创建黑色背景的图像对象
97
+ img = Image.new("RGB", (1920, 1080), color="black")
98
+
99
+ # Create draw object
100
+ # 创建绘图对象
101
+ draw = ImageDraw.Draw(img)
102
+
103
+ # Define font and font size
104
+ # 定义字体和字体大小
105
+ font = ImageFont.truetype("arial.ttf", size=100)
106
+
107
+ # Define text color
108
+ # 定义文本颜色
109
+ text_color = (255, 255, 255)
110
+
111
+ # Define rectangle coordinates
112
+ # 定义矩形坐标
113
+ rect1 = (0, 0, 960, 540)
114
+ rect2 = (960, 0, 1920, 540)
115
+ rect3 = (0, 540, 960, 1080)
116
+ rect4 = (960, 540, 1920, 1080)
117
+
118
+ # Draw rectangles
119
+ # 绘制矩形
120
+ draw.rectangle(rect1, fill=color1)
121
+ draw.rectangle(rect2, fill=color2)
122
+ draw.rectangle(rect3, fill=color3)
123
+ draw.rectangle(rect4, fill=color4)
124
+ # 通过边框和裁剪实现样式3
125
+ # draw.rectangle(rect4, fill=color4, outline="white", width=2)
126
+ # img = img.crop(box=(2, 2, 1919, 1079))
127
+ # Draw text in rectangles
128
+ # 在矩形中绘制文本
129
+ draw.text((480, 270), text1, font=font, fill=text_color, anchor="mm")
130
+ draw.text((1440, 270), text2, font=font, fill=text_color, anchor="mm")
131
+ draw.text((480, 810), text3, font=font, fill=text_color, anchor="mm")
132
+ draw.text((1440, 810), text4, font=font, fill=text_color, anchor="mm")
133
+
134
+ # Save image
135
+ # 保存图像
136
+ img.save("generated_image.png")
137
+
138
+ # generate_image('Text 1', 'Text 2', 'Text 3', 'Text 4', '#F0E68C', '#ADD8E6', '#98FB98', '#FFC0CB')
139
+
140
+ @staticmethod
141
+ def generate_image_style_2(
142
+ text1, text2, text3, text4, color1, color2, color3, color4
143
+ ):
144
+ # 不支持中文
145
+ # 样式参考 https://zhuanlan.zhihu.com/p/365624498
146
+ from PIL import Image, ImageDraw, ImageFont
147
+
148
+ # Create image object with white background
149
+ bg_width, bg_height = 1920, 1080
150
+ rate = 0.5
151
+ # 小矩形的宽何高
152
+ width, height = 400, 100
153
+ # 小矩形距离底边的距离
154
+ margin = height
155
+ # 文本距离小框左侧的距离
156
+ text_left_margin = 50
157
+ # 文本距离小框的上边距
158
+ text_up_margin = 12
159
+ font_size = 65
160
+ width, height, margin, text_up_margin, text_left_margin, font_size = (
161
+ width * rate,
162
+ height * rate,
163
+ margin * rate,
164
+ text_up_margin * rate,
165
+ text_left_margin * rate,
166
+ int(font_size * rate),
167
+ )
168
+ # Define font
169
+ font = ImageFont.truetype("arial.ttf", size=font_size)
170
+
171
+ # Create drawing object
172
+ img = Image.new("RGB", (bg_width, bg_height), color="white")
173
+ draw = ImageDraw.Draw(img)
174
+
175
+ # Draw rectangles
176
+ big_rect1 = (0, 0, 960, 540)
177
+ big_rect2 = (960, 0, 1920, 540)
178
+ big_rect3 = (0, 540, 960, 1080)
179
+ big_rect4 = (960, 540, 1920, 1080)
180
+ draw.rectangle(big_rect1, fill=color1)
181
+ draw.rectangle(big_rect2, fill=color2)
182
+ draw.rectangle(big_rect3, fill=color3)
183
+ draw.rectangle(big_rect4, fill=color4)
184
+
185
+ # Draw small rectangles in corners
186
+ small_rect1 = (0, margin, width, height + margin)
187
+ small_rect2 = (bg_width - width, margin, bg_width, height + margin)
188
+ small_rect3 = (0, bg_height - margin - height, width, bg_height - margin)
189
+ small_rect4 = (
190
+ bg_width - width,
191
+ bg_height - margin - height,
192
+ bg_width,
193
+ bg_height - margin,
194
+ )
195
+ draw.rectangle(small_rect1, fill="white")
196
+ draw.rectangle(small_rect2, fill="white")
197
+ draw.rectangle(small_rect3, fill="white")
198
+ draw.rectangle(small_rect4, fill="white")
199
+
200
+ # Draw text in rectangles
201
+ text_point1 = (text_left_margin, margin + text_up_margin)
202
+ text_point2 = (text_left_margin + bg_width - width, margin + text_up_margin)
203
+ text_point3 = (text_left_margin, bg_height - margin - height + text_up_margin)
204
+ text_point4 = (
205
+ text_left_margin + bg_width - width,
206
+ bg_height - margin - height + text_up_margin,
207
+ )
208
+ draw.text(text_point1, text1, font=font, fill=color1)
209
+ draw.text(text_point2, text2, font=font, fill=color2)
210
+ draw.text(text_point3, text3, font=font, fill=color3)
211
+ draw.text(text_point4, text4, font=font, fill=color4)
212
+
213
+ # Save image
214
+ img.save("generated_image.png")
215
+
216
+ # generate_image('OpenSource', 'Doing', 'Fixed', 'Tmp',
217
+ # "#1a4b61", "#a8a8a8", "#f47678", "#fad048")
218
+
219
+ @staticmethod
220
+ def generate_image_style_3(
221
+ text1, text2, text3, bg_color, rec_color, text_color, pic
222
+ ):
223
+ # 样式参考小红书 http://xhslink.com/f1JBTp
224
+ from PIL import Image, ImageDraw, ImageFont
225
+
226
+ # Create image object with white background
227
+ bg_width, bg_height = 1920, 1080
228
+
229
+ rate = 0.5
230
+ font_size = 50
231
+ text_rec_distance = 20
232
+ font_size = int(font_size * rate)
233
+ # Define font
234
+ font = ImageFont.truetype("arial.ttf", size=font_size)
235
+
236
+ # Create drawing object
237
+ img = Image.new("RGB", (bg_width, bg_height), color=bg_color)
238
+ draw = ImageDraw.Draw(img)
239
+
240
+ margin_up = 60
241
+ margin_left = margin_right = 50
242
+ rec_im_distance = -50
243
+ rec_rec_distance = 45
244
+ rec2_width = 600
245
+ rec2_height = 500
246
+ rec1_width, rec1_height = 600, bg_height - margin_up * 2
247
+
248
+ rec1_x, rec1_y = margin_left, margin_up
249
+ rec2_x, rec2_y = rec1_x + rec1_width + rec_rec_distance, rec1_y
250
+ rec3_x, rec3_y, rec3_width, rec3_height = (
251
+ rec2_x,
252
+ rec2_y + rec2_height + rec_rec_distance,
253
+ -1,
254
+ -1,
255
+ )
256
+ im_width = im_height = 600
257
+
258
+ # Insert Pic
259
+ im = Image.open(pic)
260
+ im = im.resize((im_width, im_height))
261
+ img.paste(im, (rec2_x + rec2_width + rec_im_distance, margin_up))
262
+ # Draw rectangles
263
+ big_rect1 = (rec1_x, rec1_y, rec1_x + rec1_width, rec1_y + rec1_height)
264
+ big_rect2 = (rec2_x, rec2_y, rec2_x + rec2_width, rec2_y + rec2_height)
265
+ big_rect3 = (rec3_x, rec3_y, bg_width - margin_right, bg_height - margin_up)
266
+ draw.rectangle(big_rect1, fill=rec_color)
267
+ draw.rectangle(big_rect2, fill=rec_color)
268
+ draw.rectangle(big_rect3, fill=rec_color)
269
+
270
+ # Draw text in rectangles
271
+ text_point1 = (rec1_x + text_rec_distance, rec1_y + text_rec_distance)
272
+ text_point2 = (rec2_x + text_rec_distance, rec2_y + text_rec_distance)
273
+ text_point3 = (rec3_x + text_rec_distance, rec3_y + text_rec_distance)
274
+
275
+ draw.text(text_point1, text1, font=font, fill=text_color)
276
+ draw.text(text_point2, text2, font=font, fill=text_color)
277
+ draw.text(text_point3, text3, font=font, fill=text_color)
278
+
279
+ # Save image
280
+ img.save("generated_image.png")
281
+
282
+ # generate_image_style_3('· OpenSource ·', '· Doing ·', '· Fixed ·',
283
+ # "#e8e8e8", "#dfdfdf", "#707070", "cat.jpg")
284
+
285
+ @staticmethod
286
+ def generate_from_pic():
287
+ # 通过版面识别识别出框所在的位置,
288
+ pass
nlpertools/plugin.py CHANGED
@@ -1,43 +1,43 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
- import smtplib
5
- from email.mime.text import MIMEText
6
-
7
-
8
- class EmailClient(object):
9
- def __init__(self):
10
- self.mail_user = ""
11
- self.mail_pass = ""
12
- self.receiver = ""
13
-
14
- def sent_email(self, title, content):
15
- """
16
- # mail_user = 'xxx'
17
- # mail_pass = 'xxx'
18
- # receiver = 'xxx'
19
- # sent_email(mail_user, mail_pass, receiver)
20
- """
21
-
22
- # log info
23
- mail_host = 'smtp.qq.com'
24
- mail_user = self.mail_user
25
- mail_pass = self.mail_pass
26
- sender = mail_user
27
-
28
- # email info
29
- message = MIMEText(content, 'plain', 'utf-8')
30
- message['Subject'] = title
31
- message['From'] = sender
32
- message['To'] = self.receiver
33
-
34
- # log and send
35
- try:
36
- smtpObj = smtplib.SMTP()
37
- smtpObj.connect(mail_host, 25)
38
- smtpObj.login(mail_user, mail_pass)
39
- smtpObj.sendmail(sender, self.receiver, message.as_string())
40
- smtpObj.quit()
41
- print('send email succes')
42
- except smtplib.SMTPException as e:
43
- print('erro', e)
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ import smtplib
5
+ from email.mime.text import MIMEText
6
+
7
+
8
+ class EmailClient(object):
9
+ def __init__(self):
10
+ self.mail_user = ""
11
+ self.mail_pass = ""
12
+ self.receiver = ""
13
+
14
+ def sent_email(self, title, content):
15
+ """
16
+ # mail_user = 'xxx'
17
+ # mail_pass = 'xxx'
18
+ # receiver = 'xxx'
19
+ # sent_email(mail_user, mail_pass, receiver)
20
+ """
21
+
22
+ # log info
23
+ mail_host = 'smtp.qq.com'
24
+ mail_user = self.mail_user
25
+ mail_pass = self.mail_pass
26
+ sender = mail_user
27
+
28
+ # email info
29
+ message = MIMEText(content, 'plain', 'utf-8')
30
+ message['Subject'] = title
31
+ message['From'] = sender
32
+ message['To'] = self.receiver
33
+
34
+ # log and send
35
+ try:
36
+ smtpObj = smtplib.SMTP()
37
+ smtpObj.connect(mail_host, 25)
38
+ smtpObj.login(mail_user, mail_pass)
39
+ smtpObj.sendmail(sender, self.receiver, message.as_string())
40
+ smtpObj.quit()
41
+ print('send email succes')
42
+ except smtplib.SMTPException as e:
43
+ print('erro', e)
nlpertools/reminder.py CHANGED
@@ -1,87 +1,98 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
- from .utils.package import *
5
-
6
-
7
- def remind_assert():
8
- assert "train_extension" in ["csv", "json"], "`train_file` should be a csv or a json file."
9
-
10
-
11
- def remind_dir():
12
- reminder = "os.path.dirname(os.path.abspath(__file__))"
13
-
14
-
15
- def remind_me():
16
- reminder = "-> 数据获取 -> 数据清洗 -> dataclean -> 预标注 -> get_TexSmart -> 校对 -> 添加数据训练 -> 评价 -> 纠正标注数据"
17
-
18
-
19
- class PandasLookup:
20
- @staticmethod
21
- def merge_df(a, b):
22
- # example
23
- a = pd.DataFrame({
24
- "id": [1, 2]
25
- })
26
- b = pd.DataFrame({
27
- "id": [2, 3, 1],
28
- "content": ['b', 'c', 'a'],
29
- })
30
- merged = pd.DataFrame({
31
- "id": [1, 2],
32
- "content": ["a", 'b']
33
- })
34
- merged = a.merge(b, left_on='id', right_on='id', how='left')
35
- return merged
36
-
37
-
38
- class OtherLookup:
39
- def prometheus_demo(self):
40
- return
41
-
42
- """
43
- import prometheus_client
44
- from flask import Response, Flask, request
45
- from flask_restful import Api, Resource
46
- from prometheus_client.core import Counter
47
-
48
-
49
- def create_app():
50
- app = Flask(__name__)
51
- return app
52
-
53
-
54
- app = create_app()
55
- api = Api(app)
56
- requests_total = Counter("request_count", "Total request count of the host")
57
-
58
-
59
- class getMetrics(Resource):
60
- def post(self):
61
- return Response(prometheus_client.generate_latest(),
62
- mimetype="text/plain")
63
-
64
-
65
- class getInfo(Resource):
66
- def post(self):
67
- requests_total.inc()
68
- return {}
69
-
70
-
71
- api.add_resource(getInfo, '/test')
72
- api.add_resource(getMetrics, "/metrics")
73
- """
74
-
75
- def flask_download_demo(self):
76
- return
77
-
78
- """
79
- from flask import send_file, send_from_directory
80
- import os
81
-
82
- @app.route("/download/<filename>", methods=['GET'])
83
- def download_file(filename):
84
- # 需要知道2个参数, 第1个参数是本地目录的path, 第2个参数是文件名(带扩展名)
85
- directory = os.getcwd() # 假设在当前目录
86
- return send_from_directory(directory, filename, as_attachment=True)
87
- """
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from .utils.package import *
5
+
6
+
7
+ def remind_assert():
8
+ assert "train_extension" in ["csv", "json"], "`train_file` should be a csv or a json file."
9
+
10
+
11
+ def remind_dir():
12
+ reminder = "os.path.dirname(os.path.abspath(__file__))"
13
+
14
+
15
+ def remind_me():
16
+ reminder = "-> 数据获取 -> 数据清洗 -> dataclean -> 预标注 -> get_TexSmart -> 校对 -> 添加数据训练 -> 评价 -> 纠正标注数据"
17
+
18
+
19
+ class PandasLookup:
20
+ @staticmethod
21
+ def merge_df(a, b):
22
+ # example
23
+ a = pd.DataFrame({
24
+ "id": [1, 2]
25
+ })
26
+ b = pd.DataFrame({
27
+ "id": [2, 3, 1],
28
+ "content": ['b', 'c', 'a'],
29
+ })
30
+ merged = pd.DataFrame({
31
+ "id": [1, 2],
32
+ "content": ["a", 'b']
33
+ })
34
+ merged = a.merge(b, left_on='id', right_on='id', how='left')
35
+ return merged
36
+
37
+
38
+ class OtherLookup:
39
+ def prometheus_demo(self):
40
+ return
41
+
42
+ def load_two_dataset(self):
43
+ from datasets import interleave_datasets
44
+ from itertools import islice
45
+ en_dataset = load_dataset('oscar', "unshuffled_deduplicated_en", split='train', streaming=True)
46
+ fr_dataset = load_dataset('oscar', "unshuffled_deduplicated_fr", split='train', streaming=True)
47
+ multilingual_dataset = interleave_datasets([en_dataset, fr_dataset])
48
+ print(list(islice(multilingual_dataset, 2)))
49
+ multilingual_dataset_with_oversampling = interleave_datasets([en_dataset, fr_dataset], probabilities=[0.8, 0.2],
50
+ seed=42)
51
+ print(list(islice(multilingual_dataset_with_oversampling, 2)))
52
+
53
+ """
54
+ import prometheus_client
55
+ from flask import Response, Flask, request
56
+ from flask_restful import Api, Resource
57
+ from prometheus_client.core import Counter
58
+
59
+
60
+ def create_app():
61
+ app = Flask(__name__)
62
+ return app
63
+
64
+
65
+ app = create_app()
66
+ api = Api(app)
67
+ requests_total = Counter("request_count", "Total request count of the host")
68
+
69
+
70
+ class getMetrics(Resource):
71
+ def post(self):
72
+ return Response(prometheus_client.generate_latest(),
73
+ mimetype="text/plain")
74
+
75
+
76
+ class getInfo(Resource):
77
+ def post(self):
78
+ requests_total.inc()
79
+ return {}
80
+
81
+
82
+ api.add_resource(getInfo, '/test')
83
+ api.add_resource(getMetrics, "/metrics")
84
+ """
85
+
86
+ def flask_download_demo(self):
87
+ return
88
+
89
+ """
90
+ from flask import send_file, send_from_directory
91
+ import os
92
+
93
+ @app.route("/download/<filename>", methods=['GET'])
94
+ def download_file(filename):
95
+ # 需要知道2个参数, 第1个参数是本地目录的path, 第2个参数是文件名(带扩展名)
96
+ directory = os.getcwd() # 假设在当前目录
97
+ return send_from_directory(directory, filename, as_attachment=True)
98
+ """
@@ -1,3 +1,3 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji