PyPI - nlpertools - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl - Mend

nlpertools 1.0.5py3-none-any.whl → 1.0.6.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

nlpertools/__init__.py +24 -20
nlpertools/algo/ac.py +18 -0
nlpertools/algo/bit_ops.py +28 -0
nlpertools/algo/kmp.py +94 -55
nlpertools/algo/num_ops.py +12 -0
nlpertools/algo/template.py +116 -0
nlpertools/algo/union.py +13 -0
nlpertools/data_client.py +387 -257
nlpertools/data_structure/base_structure.py +109 -13
nlpertools/dataprocess.py +611 -3
nlpertools/default_db_config.yml +41 -0
nlpertools/io/__init__.py +3 -3
nlpertools/io/dir.py +54 -36
nlpertools/io/file.py +277 -222
nlpertools/ml.py +483 -460
nlpertools/monitor/__init__.py +0 -0
nlpertools/monitor/gpu.py +18 -0
nlpertools/monitor/memory.py +24 -0
nlpertools/movie.py +36 -0
nlpertools/nlpertools_config.yml +1 -0
nlpertools/{openApi.py → open_api.py} +65 -65
nlpertools/other.py +364 -249
nlpertools/pic.py +288 -0
nlpertools/plugin.py +43 -43
nlpertools/reminder.py +98 -87
nlpertools/utils/__init__.py +3 -3
nlpertools/utils/lazy.py +727 -0
nlpertools/utils/log_util.py +20 -0
nlpertools/utils/package.py +89 -76
nlpertools/utils/package_v1.py +94 -0
nlpertools/utils/package_v2.py +117 -0
nlpertools/utils_for_nlpertools.py +93 -93
nlpertools/vector_index_demo.py +108 -0
nlpertools/wrapper.py +161 -96
{nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
{nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
nlpertools_helper/__init__.py +10 -0
nlpertools-1.0.5.dist-info/METADATA +0 -85
nlpertools-1.0.5.dist-info/RECORD +0 -25
nlpertools-1.0.5.dist-info/top_level.txt +0 -1

nlpertools/pic.py ADDED Viewed

@@ -0,0 +1,288 @@
+#!/usr/bin/python3.8
+# -*- coding: utf-8 -*-
+# @Author  : youshu.Ji
+from io import BytesIO
+def convert_pic_dpi(path):
+    from PIL import Image
+    img = Image.open(path)
+    w, h = img.size
+    rate = 0.1
+    img = img.resize((int(w * rate), int(h * rate)))
+    img.save("test.jpg")  # （224，224）
+def image2binary(image):
+    """
+    image: PIL.image
+    """
+    # 假设你已经有了一个Image对象
+    # image = Image.open('a.png')
+    # 创建一个BytesIO对象来保存二进制数据
+    buffered = BytesIO()
+    # 保存Image对象到BytesIO对象，确保使用正确的格式
+    image.save(buffered, format="JPEG")
+    # 获取二进制数据
+    binary_data = buffered.getvalue()
+    # 确保输出缓冲区被重置，以便后续使用
+    buffered.seek(0)
+    # 现在，binary_data包含了完整的JPEG图像数据
+    # 你可以将这个数据发送到网络请求，或者保存到文件
+    # with open('aa.jpg', 'wb') as f:
+    #     f.write(binary_data)
+    return binary_data
+def invert_colors(image_path, output_path):
+    from PIL import Image, ImageOps
+    image = Image.open(image_path)
+    black_and_white = image.convert("L")
+    # 对调黑白颜色
+    inverted = ImageOps.invert(black_and_white)
+    # 保存修改后的图片
+    inverted.save(output_path)
+def pdf2pic(path):
+    from pdf2image import convert_from_path
+    pages = convert_from_path(path, 500)
+    # 保存
+    num = 1
+    for page in pages:
+        page.save("out{}.jpg".format(num), "JPEG")
+        num += 1
+def concat_image():
+    import numpy as np
+    from PIL import Image
+    # 这里是需要合并的图片路径
+    paths = ["out{}.jpg".format(i) for i in range(1, 14)]
+    img_array = ""
+    img = ""
+    for i, v in enumerate(paths):
+        if i == 0:
+            img = Image.open(v)  # 打开图片
+            img_array = np.array(img)  # 转化为np array对象
+        if i > 0:
+            img_array2 = np.array(Image.open(v))
+            img_array = np.concatenate((img_array, img_array2), axis=1)  # 横向拼接
+            # img_array = np.concatenate((img_array, img_array2), axis=0)  # 纵向拼接
+            img = Image.fromarray(img_array)
+    # 保存图片
+    img.save("图1.jpg")
+class DrawDesktopBackground:
+    @staticmethod
+    def generate_image(text1, text2, text3, text4, color1, color2, color3, color4):
+        # 不支持中文
+        # 样式参考 https://zhuanlan.zhihu.com/p/365624498
+        from PIL import Image, ImageDraw, ImageFont
+        # 配色方案1：
+        # '#1a4b61', '#f47678', '#79a863', '#a8a8a8'
+        # Create image object with black background
+        # 创建黑色背景的图像对象
+        img = Image.new("RGB", (1920, 1080), color="black")
+        # Create draw object
+        # 创建绘图对象
+        draw = ImageDraw.Draw(img)
+        # Define font and font size
+        # 定义字体和字体大小
+        font = ImageFont.truetype("arial.ttf", size=100)
+        # Define text color
+        # 定义文本颜色
+        text_color = (255, 255, 255)
+        # Define rectangle coordinates
+        # 定义矩形坐标
+        rect1 = (0, 0, 960, 540)
+        rect2 = (960, 0, 1920, 540)
+        rect3 = (0, 540, 960, 1080)
+        rect4 = (960, 540, 1920, 1080)
+        # Draw rectangles
+        # 绘制矩形
+        draw.rectangle(rect1, fill=color1)
+        draw.rectangle(rect2, fill=color2)
+        draw.rectangle(rect3, fill=color3)
+        draw.rectangle(rect4, fill=color4)
+        # 通过边框和裁剪实现样式3
+        # draw.rectangle(rect4, fill=color4, outline="white", width=2)
+        # img = img.crop(box=(2, 2, 1919, 1079))
+        # Draw text in rectangles
+        # 在矩形中绘制文本
+        draw.text((480, 270), text1, font=font, fill=text_color, anchor="mm")
+        draw.text((1440, 270), text2, font=font, fill=text_color, anchor="mm")
+        draw.text((480, 810), text3, font=font, fill=text_color, anchor="mm")
+        draw.text((1440, 810), text4, font=font, fill=text_color, anchor="mm")
+        # Save image
+        # 保存图像
+        img.save("generated_image.png")
+        # generate_image('Text 1', 'Text 2', 'Text 3', 'Text 4', '#F0E68C', '#ADD8E6', '#98FB98', '#FFC0CB')
+    @staticmethod
+    def generate_image_style_2(
+        text1, text2, text3, text4, color1, color2, color3, color4
+    ):
+        # 不支持中文
+        # 样式参考 https://zhuanlan.zhihu.com/p/365624498
+        from PIL import Image, ImageDraw, ImageFont
+        # Create image object with white background
+        bg_width, bg_height = 1920, 1080
+        rate = 0.5
+        # 小矩形的宽何高
+        width, height = 400, 100
+        # 小矩形距离底边的距离
+        margin = height
+        # 文本距离小框左侧的距离
+        text_left_margin = 50
+        # 文本距离小框的上边距
+        text_up_margin = 12
+        font_size = 65
+        width, height, margin, text_up_margin, text_left_margin, font_size = (
+            width * rate,
+            height * rate,
+            margin * rate,
+            text_up_margin * rate,
+            text_left_margin * rate,
+            int(font_size * rate),
+        )
+        # Define font
+        font = ImageFont.truetype("arial.ttf", size=font_size)
+        # Create drawing object
+        img = Image.new("RGB", (bg_width, bg_height), color="white")
+        draw = ImageDraw.Draw(img)
+        # Draw rectangles
+        big_rect1 = (0, 0, 960, 540)
+        big_rect2 = (960, 0, 1920, 540)
+        big_rect3 = (0, 540, 960, 1080)
+        big_rect4 = (960, 540, 1920, 1080)
+        draw.rectangle(big_rect1, fill=color1)
+        draw.rectangle(big_rect2, fill=color2)
+        draw.rectangle(big_rect3, fill=color3)
+        draw.rectangle(big_rect4, fill=color4)
+        # Draw small rectangles in corners
+        small_rect1 = (0, margin, width, height + margin)
+        small_rect2 = (bg_width - width, margin, bg_width, height + margin)
+        small_rect3 = (0, bg_height - margin - height, width, bg_height - margin)
+        small_rect4 = (
+            bg_width - width,
+            bg_height - margin - height,
+            bg_width,
+            bg_height - margin,
+        )
+        draw.rectangle(small_rect1, fill="white")
+        draw.rectangle(small_rect2, fill="white")
+        draw.rectangle(small_rect3, fill="white")
+        draw.rectangle(small_rect4, fill="white")
+        # Draw text in rectangles
+        text_point1 = (text_left_margin, margin + text_up_margin)
+        text_point2 = (text_left_margin + bg_width - width, margin + text_up_margin)
+        text_point3 = (text_left_margin, bg_height - margin - height + text_up_margin)
+        text_point4 = (
+            text_left_margin + bg_width - width,
+            bg_height - margin - height + text_up_margin,
+        )
+        draw.text(text_point1, text1, font=font, fill=color1)
+        draw.text(text_point2, text2, font=font, fill=color2)
+        draw.text(text_point3, text3, font=font, fill=color3)
+        draw.text(text_point4, text4, font=font, fill=color4)
+        # Save image
+        img.save("generated_image.png")
+        # generate_image('OpenSource', 'Doing', 'Fixed', 'Tmp',
+        #            "#1a4b61", "#a8a8a8", "#f47678", "#fad048")
+    @staticmethod
+    def generate_image_style_3(
+        text1, text2, text3, bg_color, rec_color, text_color, pic
+    ):
+        # 样式参考小红书 http://xhslink.com/f1JBTp
+        from PIL import Image, ImageDraw, ImageFont
+        # Create image object with white background
+        bg_width, bg_height = 1920, 1080
+        rate = 0.5
+        font_size = 50
+        text_rec_distance = 20
+        font_size = int(font_size * rate)
+        # Define font
+        font = ImageFont.truetype("arial.ttf", size=font_size)
+        # Create drawing object
+        img = Image.new("RGB", (bg_width, bg_height), color=bg_color)
+        draw = ImageDraw.Draw(img)
+        margin_up = 60
+        margin_left = margin_right = 50
+        rec_im_distance = -50
+        rec_rec_distance = 45
+        rec2_width = 600
+        rec2_height = 500
+        rec1_width, rec1_height = 600, bg_height - margin_up * 2
+        rec1_x, rec1_y = margin_left, margin_up
+        rec2_x, rec2_y = rec1_x + rec1_width + rec_rec_distance, rec1_y
+        rec3_x, rec3_y, rec3_width, rec3_height = (
+            rec2_x,
+            rec2_y + rec2_height + rec_rec_distance,
+            -1,
+            -1,
+        )
+        im_width = im_height = 600
+        # Insert Pic
+        im = Image.open(pic)
+        im = im.resize((im_width, im_height))
+        img.paste(im, (rec2_x + rec2_width + rec_im_distance, margin_up))
+        # Draw rectangles
+        big_rect1 = (rec1_x, rec1_y, rec1_x + rec1_width, rec1_y + rec1_height)
+        big_rect2 = (rec2_x, rec2_y, rec2_x + rec2_width, rec2_y + rec2_height)
+        big_rect3 = (rec3_x, rec3_y, bg_width - margin_right, bg_height - margin_up)
+        draw.rectangle(big_rect1, fill=rec_color)
+        draw.rectangle(big_rect2, fill=rec_color)
+        draw.rectangle(big_rect3, fill=rec_color)
+        # Draw text in rectangles
+        text_point1 = (rec1_x + text_rec_distance, rec1_y + text_rec_distance)
+        text_point2 = (rec2_x + text_rec_distance, rec2_y + text_rec_distance)
+        text_point3 = (rec3_x + text_rec_distance, rec3_y + text_rec_distance)
+        draw.text(text_point1, text1, font=font, fill=text_color)
+        draw.text(text_point2, text2, font=font, fill=text_color)
+        draw.text(text_point3, text3, font=font, fill=text_color)
+        # Save image
+        img.save("generated_image.png")
+        # generate_image_style_3('· OpenSource ·', '· Doing ·', '· Fixed ·',
+        #                        "#e8e8e8", "#dfdfdf", "#707070", "cat.jpg")
+    @staticmethod
+    def generate_from_pic():
+        # 通过版面识别识别出框所在的位置，
+        pass

nlpertools/plugin.py CHANGED Viewed

@@ -1,43 +1,43 @@
-#!/usr/bin/python3.8
-# -*- coding: utf-8 -*-
-# @Author  : youshu.Ji
-import smtplib
-from email.mime.text import MIMEText
-class EmailClient(object):
-    def __init__(self):
-        self.mail_user = ""
-        self.mail_pass = ""
-        self.receiver = ""
-    def sent_email(self, title, content):
-        """
-        # mail_user = 'xxx'
-        # mail_pass = 'xxx'
-        # receiver = 'xxx'
-        # sent_email(mail_user, mail_pass, receiver)
-        """
-        # log info
-        mail_host = 'smtp.qq.com'
-        mail_user = self.mail_user
-        mail_pass = self.mail_pass
-        sender = mail_user
-        # email info
-        message = MIMEText(content, 'plain', 'utf-8')
-        message['Subject'] = title
-        message['From'] = sender
-        message['To'] = self.receiver
-        # log and send
-        try:
-            smtpObj = smtplib.SMTP()
-            smtpObj.connect(mail_host, 25)
-            smtpObj.login(mail_user, mail_pass)
-            smtpObj.sendmail(sender, self.receiver, message.as_string())
-            smtpObj.quit()
-            print('send email succes')
-        except smtplib.SMTPException as e:
-            print('erro', e)
+#!/usr/bin/python3.8
+# -*- coding: utf-8 -*-
+# @Author  : youshu.Ji
+import smtplib
+from email.mime.text import MIMEText
+class EmailClient(object):
+    def __init__(self):
+        self.mail_user = ""
+        self.mail_pass = ""
+        self.receiver = ""
+    def sent_email(self, title, content):
+        """
+        # mail_user = 'xxx'
+        # mail_pass = 'xxx'
+        # receiver = 'xxx'
+        # sent_email(mail_user, mail_pass, receiver)
+        """
+        # log info
+        mail_host = 'smtp.qq.com'
+        mail_user = self.mail_user
+        mail_pass = self.mail_pass
+        sender = mail_user
+        # email info
+        message = MIMEText(content, 'plain', 'utf-8')
+        message['Subject'] = title
+        message['From'] = sender
+        message['To'] = self.receiver
+        # log and send
+        try:
+            smtpObj = smtplib.SMTP()
+            smtpObj.connect(mail_host, 25)
+            smtpObj.login(mail_user, mail_pass)
+            smtpObj.sendmail(sender, self.receiver, message.as_string())
+            smtpObj.quit()
+            print('send email succes')
+        except smtplib.SMTPException as e:
+            print('erro', e)

nlpertools/reminder.py CHANGED Viewed

@@ -1,87 +1,98 @@
-#!/usr/bin/python3.8
-# -*- coding: utf-8 -*-
-# @Author  : youshu.Ji
-from .utils.package import *
-def remind_assert():
-    assert "train_extension" in ["csv", "json"], "`train_file` should be a csv or a json file."
-def remind_dir():
-    reminder = "os.path.dirname(os.path.abspath(__file__))"
-def remind_me():
-    reminder = "-> 数据获取 -> 数据清洗  -> dataclean -> 预标注  -> get_TexSmart -> 校对 -> 添加数据训练 -> 评价 -> 纠正标注数据"
-class PandasLookup:
-    @staticmethod
-    def merge_df(a, b):
-        # example
-        a = pd.DataFrame({
-            "id": [1, 2]
-        })
-        b = pd.DataFrame({
-            "id": [2, 3, 1],
-            "content": ['b', 'c', 'a'],
-        })
-        merged = pd.DataFrame({
-            "id": [1, 2],
-            "content": ["a", 'b']
-        })
-        merged = a.merge(b, left_on='id', right_on='id', how='left')
-        return merged
-class OtherLookup:
-    def prometheus_demo(self):
-        return
-    """
-    import prometheus_client
-    from flask import Response, Flask, request
-    from flask_restful import Api, Resource
-    from prometheus_client.core import Counter
-    def create_app():
-        app = Flask(__name__)
-        return app
-    app = create_app()
-    api = Api(app)
-    requests_total = Counter("request_count", "Total request count of the host")
-    class getMetrics(Resource):
-        def post(self):
-            return Response(prometheus_client.generate_latest(),
-                            mimetype="text/plain")
-    class getInfo(Resource):
-        def post(self):
-            requests_total.inc()
-            return {}
-    api.add_resource(getInfo, '/test')
-    api.add_resource(getMetrics, "/metrics")
-    """
-    def flask_download_demo(self):
-        return
-    """
-    from flask import send_file, send_from_directory
-    import os
-    @app.route("/download/<filename>", methods=['GET'])
-    def download_file(filename):
-        # 需要知道2个参数, 第1个参数是本地目录的path, 第2个参数是文件名(带扩展名)
-        directory = os.getcwd()  # 假设在当前目录
-        return send_from_directory(directory, filename, as_attachment=True)
-    """
+#!/usr/bin/python3.8
+# -*- coding: utf-8 -*-
+# @Author  : youshu.Ji
+from .utils.package import *
+def remind_assert():
+    assert "train_extension" in ["csv", "json"], "`train_file` should be a csv or a json file."
+def remind_dir():
+    reminder = "os.path.dirname(os.path.abspath(__file__))"
+def remind_me():
+    reminder = "-> 数据获取 -> 数据清洗  -> dataclean -> 预标注  -> get_TexSmart -> 校对 -> 添加数据训练 -> 评价 -> 纠正标注数据"
+class PandasLookup:
+    @staticmethod
+    def merge_df(a, b):
+        # example
+        a = pd.DataFrame({
+            "id": [1, 2]
+        })
+        b = pd.DataFrame({
+            "id": [2, 3, 1],
+            "content": ['b', 'c', 'a'],
+        })
+        merged = pd.DataFrame({
+            "id": [1, 2],
+            "content": ["a", 'b']
+        })
+        merged = a.merge(b, left_on='id', right_on='id', how='left')
+        return merged
+class OtherLookup:
+    def prometheus_demo(self):
+        return
+    def load_two_dataset(self):
+        from datasets import interleave_datasets
+        from itertools import islice
+        en_dataset = load_dataset('oscar', "unshuffled_deduplicated_en", split='train', streaming=True)
+        fr_dataset = load_dataset('oscar', "unshuffled_deduplicated_fr", split='train', streaming=True)
+        multilingual_dataset = interleave_datasets([en_dataset, fr_dataset])
+        print(list(islice(multilingual_dataset, 2)))
+        multilingual_dataset_with_oversampling = interleave_datasets([en_dataset, fr_dataset], probabilities=[0.8, 0.2],
+                                                                     seed=42)
+        print(list(islice(multilingual_dataset_with_oversampling, 2)))
+    """
+    import prometheus_client
+    from flask import Response, Flask, request
+    from flask_restful import Api, Resource
+    from prometheus_client.core import Counter
+    def create_app():
+        app = Flask(__name__)
+        return app
+    app = create_app()
+    api = Api(app)
+    requests_total = Counter("request_count", "Total request count of the host")
+    class getMetrics(Resource):
+        def post(self):
+            return Response(prometheus_client.generate_latest(),
+                            mimetype="text/plain")
+    class getInfo(Resource):
+        def post(self):
+            requests_total.inc()
+            return {}
+    api.add_resource(getInfo, '/test')
+    api.add_resource(getMetrics, "/metrics")
+    """
+    def flask_download_demo(self):
+        return
+    """
+    from flask import send_file, send_from_directory
+    import os
+    @app.route("/download/<filename>", methods=['GET'])
+    def download_file(filename):
+        # 需要知道2个参数, 第1个参数是本地目录的path, 第2个参数是文件名(带扩展名)
+        directory = os.getcwd()  # 假设在当前目录
+        return send_from_directory(directory, filename, as_attachment=True)
+    """

nlpertools/utils/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-#!/usr/bin/python3.8
-# -*- coding: utf-8 -*-
-# @Author  : youshu.Ji
+#!/usr/bin/python3.8
+# -*- coding: utf-8 -*-
+# @Author  : youshu.Ji

nlpertools 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl

nlpertools 1.0.5py3-none-any.whl → 1.0.6.dev0py3-none-any.whl