nlpertools 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. nlpertools/__init__.py +24 -20
  2. nlpertools/algo/ac.py +18 -0
  3. nlpertools/algo/bit_ops.py +28 -0
  4. nlpertools/algo/kmp.py +94 -55
  5. nlpertools/algo/num_ops.py +12 -0
  6. nlpertools/algo/template.py +116 -0
  7. nlpertools/algo/union.py +13 -0
  8. nlpertools/data_client.py +387 -257
  9. nlpertools/data_structure/base_structure.py +109 -13
  10. nlpertools/dataprocess.py +611 -3
  11. nlpertools/default_db_config.yml +41 -0
  12. nlpertools/io/__init__.py +3 -3
  13. nlpertools/io/dir.py +54 -36
  14. nlpertools/io/file.py +277 -222
  15. nlpertools/ml.py +483 -460
  16. nlpertools/monitor/__init__.py +0 -0
  17. nlpertools/monitor/gpu.py +18 -0
  18. nlpertools/monitor/memory.py +24 -0
  19. nlpertools/movie.py +36 -0
  20. nlpertools/nlpertools_config.yml +1 -0
  21. nlpertools/{openApi.py → open_api.py} +65 -65
  22. nlpertools/other.py +364 -249
  23. nlpertools/pic.py +288 -0
  24. nlpertools/plugin.py +43 -43
  25. nlpertools/reminder.py +98 -87
  26. nlpertools/utils/__init__.py +3 -3
  27. nlpertools/utils/lazy.py +727 -0
  28. nlpertools/utils/log_util.py +20 -0
  29. nlpertools/utils/package.py +89 -76
  30. nlpertools/utils/package_v1.py +94 -0
  31. nlpertools/utils/package_v2.py +117 -0
  32. nlpertools/utils_for_nlpertools.py +93 -93
  33. nlpertools/vector_index_demo.py +108 -0
  34. nlpertools/wrapper.py +161 -96
  35. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
  36. nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
  37. nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
  38. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
  39. nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
  40. nlpertools_helper/__init__.py +10 -0
  41. nlpertools-1.0.5.dist-info/METADATA +0 -85
  42. nlpertools-1.0.5.dist-info/RECORD +0 -25
  43. nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/pic.py ADDED
@@ -0,0 +1,288 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from io import BytesIO
5
+
6
+
7
+ def convert_pic_dpi(path):
8
+ from PIL import Image
9
+
10
+ img = Image.open(path)
11
+ w, h = img.size
12
+ rate = 0.1
13
+ img = img.resize((int(w * rate), int(h * rate)))
14
+ img.save("test.jpg") # (224,224)
15
+
16
+
17
+ def image2binary(image):
18
+ """
19
+ image: PIL.image
20
+ """
21
+ # 假设你已经有了一个Image对象
22
+ # image = Image.open('a.png')
23
+ # 创建一个BytesIO对象来保存二进制数据
24
+ buffered = BytesIO()
25
+ # 保存Image对象到BytesIO对象,确保使用正确的格式
26
+ image.save(buffered, format="JPEG")
27
+ # 获取二进制数据
28
+ binary_data = buffered.getvalue()
29
+ # 确保输出缓冲区被重置,以便后续使用
30
+ buffered.seek(0)
31
+ # 现在,binary_data包含了完整的JPEG图像数据
32
+ # 你可以将这个数据发送到网络请求,或者保存到文件
33
+ # with open('aa.jpg', 'wb') as f:
34
+ # f.write(binary_data)
35
+ return binary_data
36
+
37
+
38
+ def invert_colors(image_path, output_path):
39
+ from PIL import Image, ImageOps
40
+
41
+ image = Image.open(image_path)
42
+ black_and_white = image.convert("L")
43
+
44
+ # 对调黑白颜色
45
+ inverted = ImageOps.invert(black_and_white)
46
+
47
+ # 保存修改后的图片
48
+ inverted.save(output_path)
49
+
50
+
51
+ def pdf2pic(path):
52
+ from pdf2image import convert_from_path
53
+
54
+ pages = convert_from_path(path, 500)
55
+
56
+ # 保存
57
+ num = 1
58
+ for page in pages:
59
+ page.save("out{}.jpg".format(num), "JPEG")
60
+ num += 1
61
+
62
+
63
+ def concat_image():
64
+ import numpy as np
65
+
66
+ from PIL import Image
67
+
68
+ # 这里是需要合并的图片路径
69
+ paths = ["out{}.jpg".format(i) for i in range(1, 14)]
70
+ img_array = ""
71
+ img = ""
72
+ for i, v in enumerate(paths):
73
+ if i == 0:
74
+ img = Image.open(v) # 打开图片
75
+ img_array = np.array(img) # 转化为np array对象
76
+ if i > 0:
77
+ img_array2 = np.array(Image.open(v))
78
+ img_array = np.concatenate((img_array, img_array2), axis=1) # 横向拼接
79
+ # img_array = np.concatenate((img_array, img_array2), axis=0) # 纵向拼接
80
+ img = Image.fromarray(img_array)
81
+
82
+ # 保存图片
83
+ img.save("图1.jpg")
84
+
85
+
86
+ class DrawDesktopBackground:
87
+ @staticmethod
88
+ def generate_image(text1, text2, text3, text4, color1, color2, color3, color4):
89
+ # 不支持中文
90
+ # 样式参考 https://zhuanlan.zhihu.com/p/365624498
91
+ from PIL import Image, ImageDraw, ImageFont
92
+
93
+ # 配色方案1:
94
+ # '#1a4b61', '#f47678', '#79a863', '#a8a8a8'
95
+ # Create image object with black background
96
+ # 创建黑色背景的图像对象
97
+ img = Image.new("RGB", (1920, 1080), color="black")
98
+
99
+ # Create draw object
100
+ # 创建绘图对象
101
+ draw = ImageDraw.Draw(img)
102
+
103
+ # Define font and font size
104
+ # 定义字体和字体大小
105
+ font = ImageFont.truetype("arial.ttf", size=100)
106
+
107
+ # Define text color
108
+ # 定义文本颜色
109
+ text_color = (255, 255, 255)
110
+
111
+ # Define rectangle coordinates
112
+ # 定义矩形坐标
113
+ rect1 = (0, 0, 960, 540)
114
+ rect2 = (960, 0, 1920, 540)
115
+ rect3 = (0, 540, 960, 1080)
116
+ rect4 = (960, 540, 1920, 1080)
117
+
118
+ # Draw rectangles
119
+ # 绘制矩形
120
+ draw.rectangle(rect1, fill=color1)
121
+ draw.rectangle(rect2, fill=color2)
122
+ draw.rectangle(rect3, fill=color3)
123
+ draw.rectangle(rect4, fill=color4)
124
+ # 通过边框和裁剪实现样式3
125
+ # draw.rectangle(rect4, fill=color4, outline="white", width=2)
126
+ # img = img.crop(box=(2, 2, 1919, 1079))
127
+ # Draw text in rectangles
128
+ # 在矩形中绘制文本
129
+ draw.text((480, 270), text1, font=font, fill=text_color, anchor="mm")
130
+ draw.text((1440, 270), text2, font=font, fill=text_color, anchor="mm")
131
+ draw.text((480, 810), text3, font=font, fill=text_color, anchor="mm")
132
+ draw.text((1440, 810), text4, font=font, fill=text_color, anchor="mm")
133
+
134
+ # Save image
135
+ # 保存图像
136
+ img.save("generated_image.png")
137
+
138
+ # generate_image('Text 1', 'Text 2', 'Text 3', 'Text 4', '#F0E68C', '#ADD8E6', '#98FB98', '#FFC0CB')
139
+
140
+ @staticmethod
141
+ def generate_image_style_2(
142
+ text1, text2, text3, text4, color1, color2, color3, color4
143
+ ):
144
+ # 不支持中文
145
+ # 样式参考 https://zhuanlan.zhihu.com/p/365624498
146
+ from PIL import Image, ImageDraw, ImageFont
147
+
148
+ # Create image object with white background
149
+ bg_width, bg_height = 1920, 1080
150
+ rate = 0.5
151
+ # 小矩形的宽何高
152
+ width, height = 400, 100
153
+ # 小矩形距离底边的距离
154
+ margin = height
155
+ # 文本距离小框左侧的距离
156
+ text_left_margin = 50
157
+ # 文本距离小框的上边距
158
+ text_up_margin = 12
159
+ font_size = 65
160
+ width, height, margin, text_up_margin, text_left_margin, font_size = (
161
+ width * rate,
162
+ height * rate,
163
+ margin * rate,
164
+ text_up_margin * rate,
165
+ text_left_margin * rate,
166
+ int(font_size * rate),
167
+ )
168
+ # Define font
169
+ font = ImageFont.truetype("arial.ttf", size=font_size)
170
+
171
+ # Create drawing object
172
+ img = Image.new("RGB", (bg_width, bg_height), color="white")
173
+ draw = ImageDraw.Draw(img)
174
+
175
+ # Draw rectangles
176
+ big_rect1 = (0, 0, 960, 540)
177
+ big_rect2 = (960, 0, 1920, 540)
178
+ big_rect3 = (0, 540, 960, 1080)
179
+ big_rect4 = (960, 540, 1920, 1080)
180
+ draw.rectangle(big_rect1, fill=color1)
181
+ draw.rectangle(big_rect2, fill=color2)
182
+ draw.rectangle(big_rect3, fill=color3)
183
+ draw.rectangle(big_rect4, fill=color4)
184
+
185
+ # Draw small rectangles in corners
186
+ small_rect1 = (0, margin, width, height + margin)
187
+ small_rect2 = (bg_width - width, margin, bg_width, height + margin)
188
+ small_rect3 = (0, bg_height - margin - height, width, bg_height - margin)
189
+ small_rect4 = (
190
+ bg_width - width,
191
+ bg_height - margin - height,
192
+ bg_width,
193
+ bg_height - margin,
194
+ )
195
+ draw.rectangle(small_rect1, fill="white")
196
+ draw.rectangle(small_rect2, fill="white")
197
+ draw.rectangle(small_rect3, fill="white")
198
+ draw.rectangle(small_rect4, fill="white")
199
+
200
+ # Draw text in rectangles
201
+ text_point1 = (text_left_margin, margin + text_up_margin)
202
+ text_point2 = (text_left_margin + bg_width - width, margin + text_up_margin)
203
+ text_point3 = (text_left_margin, bg_height - margin - height + text_up_margin)
204
+ text_point4 = (
205
+ text_left_margin + bg_width - width,
206
+ bg_height - margin - height + text_up_margin,
207
+ )
208
+ draw.text(text_point1, text1, font=font, fill=color1)
209
+ draw.text(text_point2, text2, font=font, fill=color2)
210
+ draw.text(text_point3, text3, font=font, fill=color3)
211
+ draw.text(text_point4, text4, font=font, fill=color4)
212
+
213
+ # Save image
214
+ img.save("generated_image.png")
215
+
216
+ # generate_image('OpenSource', 'Doing', 'Fixed', 'Tmp',
217
+ # "#1a4b61", "#a8a8a8", "#f47678", "#fad048")
218
+
219
+ @staticmethod
220
+ def generate_image_style_3(
221
+ text1, text2, text3, bg_color, rec_color, text_color, pic
222
+ ):
223
+ # 样式参考小红书 http://xhslink.com/f1JBTp
224
+ from PIL import Image, ImageDraw, ImageFont
225
+
226
+ # Create image object with white background
227
+ bg_width, bg_height = 1920, 1080
228
+
229
+ rate = 0.5
230
+ font_size = 50
231
+ text_rec_distance = 20
232
+ font_size = int(font_size * rate)
233
+ # Define font
234
+ font = ImageFont.truetype("arial.ttf", size=font_size)
235
+
236
+ # Create drawing object
237
+ img = Image.new("RGB", (bg_width, bg_height), color=bg_color)
238
+ draw = ImageDraw.Draw(img)
239
+
240
+ margin_up = 60
241
+ margin_left = margin_right = 50
242
+ rec_im_distance = -50
243
+ rec_rec_distance = 45
244
+ rec2_width = 600
245
+ rec2_height = 500
246
+ rec1_width, rec1_height = 600, bg_height - margin_up * 2
247
+
248
+ rec1_x, rec1_y = margin_left, margin_up
249
+ rec2_x, rec2_y = rec1_x + rec1_width + rec_rec_distance, rec1_y
250
+ rec3_x, rec3_y, rec3_width, rec3_height = (
251
+ rec2_x,
252
+ rec2_y + rec2_height + rec_rec_distance,
253
+ -1,
254
+ -1,
255
+ )
256
+ im_width = im_height = 600
257
+
258
+ # Insert Pic
259
+ im = Image.open(pic)
260
+ im = im.resize((im_width, im_height))
261
+ img.paste(im, (rec2_x + rec2_width + rec_im_distance, margin_up))
262
+ # Draw rectangles
263
+ big_rect1 = (rec1_x, rec1_y, rec1_x + rec1_width, rec1_y + rec1_height)
264
+ big_rect2 = (rec2_x, rec2_y, rec2_x + rec2_width, rec2_y + rec2_height)
265
+ big_rect3 = (rec3_x, rec3_y, bg_width - margin_right, bg_height - margin_up)
266
+ draw.rectangle(big_rect1, fill=rec_color)
267
+ draw.rectangle(big_rect2, fill=rec_color)
268
+ draw.rectangle(big_rect3, fill=rec_color)
269
+
270
+ # Draw text in rectangles
271
+ text_point1 = (rec1_x + text_rec_distance, rec1_y + text_rec_distance)
272
+ text_point2 = (rec2_x + text_rec_distance, rec2_y + text_rec_distance)
273
+ text_point3 = (rec3_x + text_rec_distance, rec3_y + text_rec_distance)
274
+
275
+ draw.text(text_point1, text1, font=font, fill=text_color)
276
+ draw.text(text_point2, text2, font=font, fill=text_color)
277
+ draw.text(text_point3, text3, font=font, fill=text_color)
278
+
279
+ # Save image
280
+ img.save("generated_image.png")
281
+
282
+ # generate_image_style_3('· OpenSource ·', '· Doing ·', '· Fixed ·',
283
+ # "#e8e8e8", "#dfdfdf", "#707070", "cat.jpg")
284
+
285
+ @staticmethod
286
+ def generate_from_pic():
287
+ # 通过版面识别识别出框所在的位置,
288
+ pass
nlpertools/plugin.py CHANGED
@@ -1,43 +1,43 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
- import smtplib
5
- from email.mime.text import MIMEText
6
-
7
-
8
- class EmailClient(object):
9
- def __init__(self):
10
- self.mail_user = ""
11
- self.mail_pass = ""
12
- self.receiver = ""
13
-
14
- def sent_email(self, title, content):
15
- """
16
- # mail_user = 'xxx'
17
- # mail_pass = 'xxx'
18
- # receiver = 'xxx'
19
- # sent_email(mail_user, mail_pass, receiver)
20
- """
21
-
22
- # log info
23
- mail_host = 'smtp.qq.com'
24
- mail_user = self.mail_user
25
- mail_pass = self.mail_pass
26
- sender = mail_user
27
-
28
- # email info
29
- message = MIMEText(content, 'plain', 'utf-8')
30
- message['Subject'] = title
31
- message['From'] = sender
32
- message['To'] = self.receiver
33
-
34
- # log and send
35
- try:
36
- smtpObj = smtplib.SMTP()
37
- smtpObj.connect(mail_host, 25)
38
- smtpObj.login(mail_user, mail_pass)
39
- smtpObj.sendmail(sender, self.receiver, message.as_string())
40
- smtpObj.quit()
41
- print('send email succes')
42
- except smtplib.SMTPException as e:
43
- print('erro', e)
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ import smtplib
5
+ from email.mime.text import MIMEText
6
+
7
+
8
+ class EmailClient(object):
9
+ def __init__(self):
10
+ self.mail_user = ""
11
+ self.mail_pass = ""
12
+ self.receiver = ""
13
+
14
+ def sent_email(self, title, content):
15
+ """
16
+ # mail_user = 'xxx'
17
+ # mail_pass = 'xxx'
18
+ # receiver = 'xxx'
19
+ # sent_email(mail_user, mail_pass, receiver)
20
+ """
21
+
22
+ # log info
23
+ mail_host = 'smtp.qq.com'
24
+ mail_user = self.mail_user
25
+ mail_pass = self.mail_pass
26
+ sender = mail_user
27
+
28
+ # email info
29
+ message = MIMEText(content, 'plain', 'utf-8')
30
+ message['Subject'] = title
31
+ message['From'] = sender
32
+ message['To'] = self.receiver
33
+
34
+ # log and send
35
+ try:
36
+ smtpObj = smtplib.SMTP()
37
+ smtpObj.connect(mail_host, 25)
38
+ smtpObj.login(mail_user, mail_pass)
39
+ smtpObj.sendmail(sender, self.receiver, message.as_string())
40
+ smtpObj.quit()
41
+ print('send email succes')
42
+ except smtplib.SMTPException as e:
43
+ print('erro', e)
nlpertools/reminder.py CHANGED
@@ -1,87 +1,98 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
- from .utils.package import *
5
-
6
-
7
- def remind_assert():
8
- assert "train_extension" in ["csv", "json"], "`train_file` should be a csv or a json file."
9
-
10
-
11
- def remind_dir():
12
- reminder = "os.path.dirname(os.path.abspath(__file__))"
13
-
14
-
15
- def remind_me():
16
- reminder = "-> 数据获取 -> 数据清洗 -> dataclean -> 预标注 -> get_TexSmart -> 校对 -> 添加数据训练 -> 评价 -> 纠正标注数据"
17
-
18
-
19
- class PandasLookup:
20
- @staticmethod
21
- def merge_df(a, b):
22
- # example
23
- a = pd.DataFrame({
24
- "id": [1, 2]
25
- })
26
- b = pd.DataFrame({
27
- "id": [2, 3, 1],
28
- "content": ['b', 'c', 'a'],
29
- })
30
- merged = pd.DataFrame({
31
- "id": [1, 2],
32
- "content": ["a", 'b']
33
- })
34
- merged = a.merge(b, left_on='id', right_on='id', how='left')
35
- return merged
36
-
37
-
38
- class OtherLookup:
39
- def prometheus_demo(self):
40
- return
41
-
42
- """
43
- import prometheus_client
44
- from flask import Response, Flask, request
45
- from flask_restful import Api, Resource
46
- from prometheus_client.core import Counter
47
-
48
-
49
- def create_app():
50
- app = Flask(__name__)
51
- return app
52
-
53
-
54
- app = create_app()
55
- api = Api(app)
56
- requests_total = Counter("request_count", "Total request count of the host")
57
-
58
-
59
- class getMetrics(Resource):
60
- def post(self):
61
- return Response(prometheus_client.generate_latest(),
62
- mimetype="text/plain")
63
-
64
-
65
- class getInfo(Resource):
66
- def post(self):
67
- requests_total.inc()
68
- return {}
69
-
70
-
71
- api.add_resource(getInfo, '/test')
72
- api.add_resource(getMetrics, "/metrics")
73
- """
74
-
75
- def flask_download_demo(self):
76
- return
77
-
78
- """
79
- from flask import send_file, send_from_directory
80
- import os
81
-
82
- @app.route("/download/<filename>", methods=['GET'])
83
- def download_file(filename):
84
- # 需要知道2个参数, 第1个参数是本地目录的path, 第2个参数是文件名(带扩展名)
85
- directory = os.getcwd() # 假设在当前目录
86
- return send_from_directory(directory, filename, as_attachment=True)
87
- """
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ from .utils.package import *
5
+
6
+
7
+ def remind_assert():
8
+ assert "train_extension" in ["csv", "json"], "`train_file` should be a csv or a json file."
9
+
10
+
11
+ def remind_dir():
12
+ reminder = "os.path.dirname(os.path.abspath(__file__))"
13
+
14
+
15
+ def remind_me():
16
+ reminder = "-> 数据获取 -> 数据清洗 -> dataclean -> 预标注 -> get_TexSmart -> 校对 -> 添加数据训练 -> 评价 -> 纠正标注数据"
17
+
18
+
19
+ class PandasLookup:
20
+ @staticmethod
21
+ def merge_df(a, b):
22
+ # example
23
+ a = pd.DataFrame({
24
+ "id": [1, 2]
25
+ })
26
+ b = pd.DataFrame({
27
+ "id": [2, 3, 1],
28
+ "content": ['b', 'c', 'a'],
29
+ })
30
+ merged = pd.DataFrame({
31
+ "id": [1, 2],
32
+ "content": ["a", 'b']
33
+ })
34
+ merged = a.merge(b, left_on='id', right_on='id', how='left')
35
+ return merged
36
+
37
+
38
+ class OtherLookup:
39
+ def prometheus_demo(self):
40
+ return
41
+
42
+ def load_two_dataset(self):
43
+ from datasets import interleave_datasets
44
+ from itertools import islice
45
+ en_dataset = load_dataset('oscar', "unshuffled_deduplicated_en", split='train', streaming=True)
46
+ fr_dataset = load_dataset('oscar', "unshuffled_deduplicated_fr", split='train', streaming=True)
47
+ multilingual_dataset = interleave_datasets([en_dataset, fr_dataset])
48
+ print(list(islice(multilingual_dataset, 2)))
49
+ multilingual_dataset_with_oversampling = interleave_datasets([en_dataset, fr_dataset], probabilities=[0.8, 0.2],
50
+ seed=42)
51
+ print(list(islice(multilingual_dataset_with_oversampling, 2)))
52
+
53
+ """
54
+ import prometheus_client
55
+ from flask import Response, Flask, request
56
+ from flask_restful import Api, Resource
57
+ from prometheus_client.core import Counter
58
+
59
+
60
+ def create_app():
61
+ app = Flask(__name__)
62
+ return app
63
+
64
+
65
+ app = create_app()
66
+ api = Api(app)
67
+ requests_total = Counter("request_count", "Total request count of the host")
68
+
69
+
70
+ class getMetrics(Resource):
71
+ def post(self):
72
+ return Response(prometheus_client.generate_latest(),
73
+ mimetype="text/plain")
74
+
75
+
76
+ class getInfo(Resource):
77
+ def post(self):
78
+ requests_total.inc()
79
+ return {}
80
+
81
+
82
+ api.add_resource(getInfo, '/test')
83
+ api.add_resource(getMetrics, "/metrics")
84
+ """
85
+
86
+ def flask_download_demo(self):
87
+ return
88
+
89
+ """
90
+ from flask import send_file, send_from_directory
91
+ import os
92
+
93
+ @app.route("/download/<filename>", methods=['GET'])
94
+ def download_file(filename):
95
+ # 需要知道2个参数, 第1个参数是本地目录的path, 第2个参数是文件名(带扩展名)
96
+ directory = os.getcwd() # 假设在当前目录
97
+ return send_from_directory(directory, filename, as_attachment=True)
98
+ """
@@ -1,3 +1,3 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji