PyPI - nlpertools - Versions diffs - 1.0.8__tar.gz → 1.0.10__tar.gz - Mend

nlpertools 1.0.8tar.gz → 1.0.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{nlpertools-1.0.8/src/nlpertools.egg-info → nlpertools-1.0.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: nlpertools
-Version: 1.0.8
+Version: 1.0.10
 Summary: A small package about small basic IO operation when coding
 Home-page: https://github.com/lvzii/nlpertools
 Author: youshuJi
@@ -15,8 +15,12 @@ License-File: LICENSE
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: psutil
+Requires-Dist: openai
 Provides-Extra: torch
 Requires-Dist: torch; extra == "torch"
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
 <div align="center">
   <h4 align="center">
@@ -35,7 +39,7 @@ Requires-Dist: torch; extra == "torch"
 它解决了什么问题：
-- 很多函数是记不住的， ~~每次写每次都要搜~~ 每次都要问大模型 ，例如pandas排序
+- 很多函数是记不住的， 每次写都要~~搜~~问大模型 ，例如pandas排序
 - 刷题的时候，树结构的题目很难调试
@@ -48,6 +52,23 @@ nlpertools
 ```
+# 最常用/喜欢的功能（使用示例）
+```python
+# 读txt, json文件
+import nlpertools
+txt_data = nlpertools.readtxt_list_all_strip('res.txt')
+json_data = nlpertools.load_from_json('res.json')
+```
+```bash
+## git, 连接github不稳定的时候非常有用
+ncli git pull
+# 生成pypi双因素认证的实时密钥(需要提供key)
+ncli --get_2fa --get_2fa_key your_key
+```
 # 安装
 Install the latest release version
@@ -99,30 +120,7 @@ https://nlpertools.readthedocs.io/en/latest/
 一些可能需要配置才能用的函数，写上示例
-## 使用示例
-```python
-import nlpertools
-a = nlpertools.readtxt_list_all_strip('res.txt')
-# 或
-b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
-```
-```bash
-# 生成pypi双因素认证的实时密钥(需要提供key)
-python -m nlpertools.get_2fa your_key
-## git
-python nlpertools.cli --git_push
-python nlpertools.cli --git_pull
-# 以下功能被nvitop替代，不推荐使用
-## 监控gpu显存
-python -m nlpertools.monitor.gpu
-## 监控cpu
-python -m  nlpertools.monitor.memory
-```
 ## 一些常用项目
@@ -130,3 +128,7 @@ nvitop
 ydata-profiling
+## 贡献
+https://github.com/bigscience-workshop/data-preparation

{nlpertools-1.0.8 → nlpertools-1.0.10}/README.md RENAMED Viewed

@@ -15,7 +15,7 @@
 它解决了什么问题：
-- 很多函数是记不住的， ~~每次写每次都要搜~~ 每次都要问大模型 ，例如pandas排序
+- 很多函数是记不住的， 每次写都要~~搜~~问大模型 ，例如pandas排序
 - 刷题的时候，树结构的题目很难调试
@@ -28,6 +28,23 @@ nlpertools
 ```
+# 最常用/喜欢的功能（使用示例）
+```python
+# 读txt, json文件
+import nlpertools
+txt_data = nlpertools.readtxt_list_all_strip('res.txt')
+json_data = nlpertools.load_from_json('res.json')
+```
+```bash
+## git, 连接github不稳定的时候非常有用
+ncli git pull
+# 生成pypi双因素认证的实时密钥(需要提供key)
+ncli --get_2fa --get_2fa_key your_key
+```
 # 安装
 Install the latest release version
@@ -79,30 +96,7 @@ https://nlpertools.readthedocs.io/en/latest/
 一些可能需要配置才能用的函数，写上示例
-## 使用示例
-```python
-import nlpertools
-a = nlpertools.readtxt_list_all_strip('res.txt')
-# 或
-b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
-```
-```bash
-# 生成pypi双因素认证的实时密钥(需要提供key)
-python -m nlpertools.get_2fa your_key
-## git
-python nlpertools.cli --git_push
-python nlpertools.cli --git_pull
-# 以下功能被nvitop替代，不推荐使用
-## 监控gpu显存
-python -m nlpertools.monitor.gpu
-## 监控cpu
-python -m  nlpertools.monitor.memory
-```
 ## 一些常用项目
@@ -110,3 +104,7 @@ nvitop
 ydata-profiling
+## 贡献
+https://github.com/bigscience-workshop/data-preparation

{nlpertools-1.0.8 → nlpertools-1.0.10}/setup.py RENAMED Viewed

@@ -7,19 +7,17 @@ from setuptools import setup
 def get_version():
     with open(os.path.join("src", "nlpertools", "__init__.py"), "r", encoding="utf-8") as f:
         file_content = f.read()
-        pattern = r"{}\W*=\W*\'([^\"]+)\'".format("__version__")
-        (version,) = re.findall(pattern, file_content)
-        return version
+        pattern = r"{}\W*=\W*[\"']([^\"']+)[\"']".format("__version__")
+        matches = re.findall(pattern, file_content)
+        if not matches:
+            raise ValueError(f"Could not find __version__ in __init__.py")
+        return matches[0]
 def main():
     setup(
         # https://juejin.cn/post/7369349560421040128
-        install_requires=[
-            "numpy",
-            "pandas",
-            "psutil"
-        ],
+        install_requires=["numpy", "pandas", "psutil", "openai"],
         extras_require={
             "torch": ["torch"],
         },
@@ -28,9 +26,9 @@ def main():
             "console_scripts": [
                 "ncli=nlpertools.cli:main",
             ]
-        }
+        },
     )
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/__init__.py RENAMED Viewed

@@ -17,7 +17,7 @@ from .utils_for_nlpertools import *
 from .wrapper import *
 from .monitor import *
 from .cli import *
+from .llm import *
-__version__ = '1.0.8'
+__version__ = "1.0.10"

nlpertools-1.0.10/src/nlpertools/cli.py ADDED Viewed

@@ -0,0 +1,125 @@
+import argparse
+import os
+import uuid
+import sys
+from .dataprocess import startwith
+def run_git_command(command):
+    """
+    循环执行git命令，直到成功
+    """
+    print(command)
+    num = -1
+    while True:
+        num += 1
+        print(f"retry num: {num}")
+        info = os.system(command)
+        print(str(info))
+        # 检查命令执行结果，若未出现错误则认为执行成功
+        if not startwith(str(info), ["fatal", "error", "128", "1"]):
+            print("success")
+            print(f"success info : ##{info}##")
+            break
+def get_mac_address():
+    mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
+    mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
+    print("mac address 不一定准确")
+    print(mac_address)
+    return mac_address
+def get_2af_value(key):
+    import pyotp
+    """
+    key应该是7位的
+    """
+    print(key)
+    totp = pyotp.TOTP(key)
+    print(totp.now())
+def start_gpu_usage_notify_server():
+    from flask import Flask
+    app = Flask(__name__)
+    @app.route("/notify", methods=["GET"])
+    def notify():
+        # 这里可以根据需要动态生成通知内容
+        usage = os.popen("nvidia-smi --query-gpu=memory.used --format=csv").read().split("\n")[1:]
+        res = 0
+        for edx, each in enumerate(usage):
+            if each.startswith("0"):
+                res += 1
+        print(res)
+        return str(res), 200
+    app.run(host="0.0.0.0", port=5000)
+def start_gpu_usage_notify_client():
+    import requests
+    from plyer import notification
+    import time
+    SERVER_URL = 'http://127.0.0.1:5000/notify'  # 服务器的 API 地址
+    def notify(text):
+        # 使用 plyer 发送通知
+        notification.notify(
+            title='远程通知',
+            message=text,
+            timeout=10  # 10秒的通知显示时间
+        )
+    """定时轮询服务器获取通知"""
+    while True:
+        try:
+            response = requests.get(SERVER_URL)
+            if response.status_code == 200:
+                num = int(response.text)
+                if num > 0:
+                    notify(f"服务器有{num}张卡")
+                print(f"服务器有{num}张卡")
+            else:
+                print("服务器没有新通知")
+        except Exception as e:
+            print(f"与服务器连接失败: {e}")
+        time.sleep(1)
+def main():
+    parser = argparse.ArgumentParser(description="CLI tool for git operations and other functions.")
+    parser.add_argument('git_command', nargs='*', help='Any git command (e.g., push, pull)')
+    parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
+    parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
+    parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
+    parser.add_argument('--monitor_gpu_cli', action='store_true', help='monitor gpu cli')
+    parser.add_argument('--monitor_gpu_ser', action='store_true', help='monitor gpu ser')
+    args = parser.parse_args()
+    if args.git_command:
+        git_cmd = " ".join(args.git_command)
+        run_git_command(git_cmd)
+    elif args.mac_address:
+        get_mac_address()
+    elif args.monitor_gpu_cli:
+        start_gpu_usage_notify_client()
+    elif args.monitor_gpu_ser:
+        start_gpu_usage_notify_server()
+    elif args.get_2fa:
+        if args.get_2fa_key:
+            get_2af_value(args.get_2fa_key)
+        else:
+            print("Please provide a key as an argument.")
+    else:
+        print("No operation specified.")
+if __name__ == '__main__':
+    main()

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/dataprocess.py RENAMED Viewed

@@ -19,6 +19,18 @@ other_special_characters = (
     "」﴾》"
 )
+def startwith(text: str, pattern_list: list) -> bool:
+    """
+    判断text是否以pattern_list中的某个pattern开头
+    :param text:
+    :param pattern_list:
+    :return:
+    """
+    for pattern in pattern_list:
+        if text.startswith(pattern):
+            return True
+    return False
 class Pattern:
     """

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/draw/draw.py RENAMED Viewed

@@ -20,7 +20,7 @@ def confused_matrix(confuse_matrix):
     f.savefig('tmp.jpg', bbox_inches='tight')
-def plot_histogram(data, bin_size):
+def plot_histogram(data, bin_size, max_bin):
     """
     画直方图，超过1000的统一按1000算
     :param data:
@@ -33,15 +33,15 @@ def plot_histogram(data, bin_size):
     from matplotlib.ticker import MaxNLocator
     # 将超过1000的值改为1000
     def process_lengths(data):
-        return [length if length <= 1000 else 1003 for length in data]
+        return [length if length <= max_bin else max_bin + 3 for length in data]
     # 前闭后开
-    min_num, max_num = 0, 1000
+    # min_num, max_num = 0, 1000
     # min_num, max_num = min(data), max(data)
     plt.figure(figsize=(12, 8))
     processed_data = process_lengths(data)
-    bins = np.arange(0, 1000 + 2 * bin_size, bin_size)
+    bins = np.arange(0, max_bin + 2 * bin_size, bin_size)
     # 绘制直方图
     n, new_bins, patches = plt.hist(processed_data, bins=bins, edgecolor='black', color='skyblue', alpha=0.7,
                                     linewidth=0)
@@ -60,10 +60,8 @@ def plot_histogram(data, bin_size):
     plt.xlabel('module line number', fontsize=14)
     plt.ylabel('frequency', fontsize=14)
-    # 添加网格
     plt.grid(True, linestyle='--', alpha=0.6)
-    # 美化x轴和y轴的刻度
     plt.xticks(fontsize=12)
     plt.yticks(fontsize=12)
@@ -80,4 +78,4 @@ if __name__ == '__main__':
     # 调整区间大小
     bin_size = 50
     # 示例模块长度数据
-    plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size)
+    plot_histogram([1, 100, 999, 1000, 1002, 1100, 1150], bin_size, max_bin=1000)

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/io/dir.py RENAMED Viewed

@@ -46,7 +46,7 @@ def get_filename(path, suffix=True) -> str:
     return filename
-def j_listdir(dir_name, including_dir=True):
+def listdir(dir_name, including_dir=True):
     filenames = os.listdir(dir_name)
     if including_dir:
         return [os.path.join(dir_name, filename) for filename in filenames]
@@ -54,7 +54,7 @@ def j_listdir(dir_name, including_dir=True):
         return list(filenames)
-def j_listdir_yield(dir_name, including_dir=True):
+def listdir_yield(dir_name, including_dir=True):
     filenames = os.listdir(dir_name)
     for filename in filenames:
         if including_dir:

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/io/file.py RENAMED Viewed

@@ -241,12 +241,12 @@ def load_from_jsonl(path):
         return corpus
-def pickle_save(data, path):
+def save_pkl(data, path):
     with open(path, 'wb') as f:
         pickle.dump(data, f)
-def pickle_load(path):
+def load_pkl(path):
     with open(path, 'rb') as f:
         data = pickle.load(f)
     return data

nlpertools-1.0.10/src/nlpertools/llm/call_llm_once.py ADDED Viewed

@@ -0,0 +1,30 @@
+from ..io.file import readtxt_string, read_yaml
+from tqdm import tqdm
+import os
+from openai import Openai
+from typing import Optional, Union
+"""
+从你当前的项目里找到.key文件 获取url和key
+"""
+def call_once(
+    client: Openai, input: Optional[Union[str, list]], model_name: str = "qwen3-0626-e4", max_tokens: int = 8192
+) -> str:
+    """
+    调用LLM模型进行一次推理
+    :param prompt: 输入的提示文本
+    :param model_name: 模型名称
+    :param max_tokens: 最大输出token数
+    :return: 模型的输出文本
+    """
+    if isinstance(input, str):
+        message = [{"role": "user", "content": input}]
+    elif isinstance(input, list):
+        message = input
+    response = client.chat.completions.create(model=model_name, messages=message, max_tokens=max_tokens)
+    return response.choices[0].message.content

nlpertools-1.0.10/src/nlpertools/llm/infer.py ADDED Viewed

@@ -0,0 +1,74 @@
+import os
+from tqdm import tqdm
+from openai import OpenAI
+import concurrent.futures
+INFER_PARAS = {
+    "temperature": 0.7,
+    "infer_times": 1,
+    "max_tokens": 8192,
+    "top_p": 0.95,
+    "top_k": 40,
+    "repetition_penalty": 1.0,
+}
+def parse_infer_data(infer_data: list):
+    if isinstance(infer_data[0], str):
+        message = [{"role": "user", "content": i} for i in infer_data]
+    elif isinstance(infer_data[0], list):
+        message = infer_data
+    return message
+def common_api_infer_func(model_name, infer_data: list, infer_paras, client: OpenAI):
+    """
+    infer_data: list of messages/prompt
+    """
+    messages = parse_infer_data(infer_data)
+    def get_response(model_name, messages, infer_paras):
+        responses = []
+        infer_times = infer_paras.get("infer_times", 1)
+        for _ in range(infer_times):
+            # 使用OpenAI API进行推理
+            response = client.chat.completions.create(model=model_name, messages=messages, **infer_paras)
+            text = response.choices[0].message.content
+            responses.append({"text": text})
+        return responses
+    with concurrent.futures.ThreadPoolExecutor(16) as executor:
+        futures = [executor.submit(get_response, model_name, message, infer_paras) for message in messages]
+        results = [future.result() for future in concurrent.futures.as_completed(futures)]
+    return results
+def common_vllm_infer_func(model_path, infer_data: list, infer_paras: dict):
+    """
+    infer_data: list of messages/prompt
+    """
+    messages = parse_infer_data(infer_data)
+    from vllm import LLM, SamplingParams
+    temperature = infer_paras.get("temperature", 0.7)
+    infer_times = infer_paras.get("infer_times", 1)
+    vllm_card_num = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
+    llm = LLM(model=model_path, tensor_parallel_size=vllm_card_num, trust_remote_code=True, gpu_memory_utilization=0.85)
+    sampling_params = SamplingParams(
+        temperature=temperature,
+        n=infer_times,
+        max_tokens=8192,
+        # qwen3非思考模式推荐参数
+        # **infer_paras.get(template_name, {}),
+        # qwen3思考模式推荐参数
+    )
+    conversation = messages
+    outputs = llm.chat(conversation, sampling_params=sampling_params, use_tqdm=True)
+    return_texts = []
+    for idx, output in tqdm(enumerate(outputs)):
+        result = [{"text": i.text} for i in output.outputs]
+        return_texts.append(result)
+    return return_texts

nlpertools-1.0.10/src/nlpertools/llm/price.py ADDED Viewed

@@ -0,0 +1,13 @@
+def estimate_cost(input_token_num, output_token_num, example_num=1, input_price=1, output_price=4):
+    """
+    估算成本
+    :param input_token_num: 输入token数量
+    :param output_token_num: 输出token数量
+    :param example_num: 示例数量
+    :param input_price: 输入token单价  / 1M
+    :param output_price: 输出token单价 / 1M
+    :return: 成本
+    """
+    price = (input_token_num * input_price + output_token_num * output_price) * example_num / 1000000
+    print(f"Estimated cost: {price:.2f} 元")
+    return price

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/ml.py RENAMED Viewed

@@ -2,9 +2,11 @@
 import codecs
 import os
 import random
+import itertools
 from .io.dir import j_mkdir
 from .io.file import readtxt_list_all_strip, writetxt_w_list, save_to_csv
 # import numpy as np
 # import seaborn as sns
 # import torch
@@ -17,8 +19,44 @@ from .io.file import readtxt_list_all_strip, writetxt_w_list, save_to_csv
 from .utils.package import *
+def estimate_pass_at_k(num_samples: list, num_correct: list, k):
+    """
+    copy from https://huggingface.co/spaces/evaluate-metric/code_eval/blob/main/code_eval.py
+    num_samples: list
+    Note: if num sample < k, acc = 1, it's incomprehensibly
+    """
+    """Estimates pass@k of each problem and returns them in an array."""
+    def estimator(n: int, c: int, k: int) -> float:
+        """Calculates 1 - comb(n - c, k) / comb(n, k)."""
+        if n - c < k:
+            return 1.0
+        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
+    if isinstance(num_samples, int):
+        num_samples_it = itertools.repeat(num_samples, len(num_correct))
+    else:
+        assert len(num_samples) == len(num_correct)
+        num_samples_it = iter(num_samples)
+    return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
+def estimate_pass_at_k_fixed(num_samples: list, num_correct: list, k):
+    """
+    优化了num_samples小于 k的情况
+    """
+    num_samples = [k if i < k else i for i in num_samples]
+    return estimate_pass_at_k(num_samples, num_correct, k)
+def estimate_pass_at_k_return_num(num_samples: list, num_correct: list, k):
+    """直接返回求完平均的"""
+    return round(estimate_pass_at_k(num_samples, num_correct, k).mean() * 100, 2)
 def calc_llm_train_activation_memory(
-        model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
+    model_name, sequence_length, batch_size, hidden_dim, lay_number, attention_heads_num, gpu_num=1
 ):
     """
     return bytes
@@ -32,18 +70,19 @@ def calc_llm_train_activation_memory(
     # FFN
     # Layer Norm
     r1 = (
-            sequence_length
-            * batch_size
-            * hidden_dim
-            * lay_number
-            * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
+        sequence_length
+        * batch_size
+        * hidden_dim
+        * lay_number
+        * (34 + 5 * attention_heads_num * sequence_length / hidden_dim)
     )
     # reference2
     r2 = (
-            lay_number * (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
-            * sequence_length
-            * batch_size
-            / gpu_num
+        lay_number
+        * (2 * sequence_length * attention_heads_num + 16 * hidden_dim)
+        * sequence_length
+        * batch_size
+        / gpu_num
     )
     print(r1)
     print(r2)
@@ -78,9 +117,7 @@ class DataStructure:
         "source": "baidu",
     }
     ner_input_example = "这句话一共有两个实体分别为大象和老鼠。"
-    ner_label_example = (
-            list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
-    )
+    ner_label_example = list("OOOOOOOOOOOOO") + ["B-s", "I-s"] + ["O"] + ["B-o", "I-o"] + ["O"]
 def text_jaccard(ipt1, ipt2, ipt_level="char", sim_level="char"):
@@ -134,7 +171,7 @@ class STEM(object):
             if each_srl:
                 args = []
                 for arg in each_srl:
-                    args.extend(seg[arg[1]: arg[2] + 1])
+                    args.extend(seg[arg[1] : arg[2] + 1])
                 # 添加上谓词
                 args.insert(each_srl[0][2] - each_srl[0][1] + 1, seg[wdx])
                 events.append(args)
@@ -173,7 +210,7 @@ def subject_object_labeling(spo_list, text):
         q_list_length = len(q_list)
         k_list_length = len(k_list)
         for idx in range(k_list_length - q_list_length + 1):
-            t = [q == k for q, k in zip(q_list, k_list[idx: idx + q_list_length])]
+            t = [q == k for q, k in zip(q_list, k_list[idx : idx + q_list_length])]
             # print(idx, t)
             if all(t):
                 # print(idx)
@@ -186,9 +223,7 @@ def subject_object_labeling(spo_list, text):
         if len(spo) == 2:
             labeling_list[idx_start + 1] = "I-" + spo_type
         elif len(spo) >= 3:
-            labeling_list[idx_start + 1: idx_start + len(spo)] = ["I-" + spo_type] * (
-                    len(spo) - 1
-            )
+            labeling_list[idx_start + 1 : idx_start + len(spo)] = ["I-" + spo_type] * (len(spo) - 1)
         else:
             pass
@@ -197,7 +232,7 @@ def subject_object_labeling(spo_list, text):
     # count = 0
     for predicate, spo_list_form in spo_predicate_dict.items():
         if predicate in text:
-            for (spo_subject, spo_object) in spo_list_form:
+            for spo_subject, spo_object in spo_list_form:
                 # if predicate not in spo_subject and predicate not in spo_object:
                 _labeling_type(spo_subject, "SUB")
                 _labeling_type(spo_object, "OBJ")
@@ -219,10 +254,7 @@ def label(text, labels):
     :return:
     """
     train_sequence = "\n".join(
-        [
-            "\t".join(i) if i[0] != " " else "[null]\t{}".format(i[1])
-            for i in zip(list(text), labels)
-        ]
+        ["\t".join(i) if i[0] != " " else "[null]\t{}".format(i[1]) for i in zip(list(text), labels)]
     )
     return train_sequence
@@ -238,16 +270,12 @@ def convert_crf_format_10_fold(corpus, objdir_path):
     split_position = int(len(corpus) / 10)
     for k in range(0, 10):
         if k == 9:
-            dev_set = corpus[k * split_position:]
+            dev_set = corpus[k * split_position :]
             train_set = corpus[: k * split_position]
         else:
-            dev_set = corpus[k * split_position: (k + 1) * split_position]
-            train_set = (
-                    corpus[: k * split_position] + corpus[(k + 1) * split_position:]
-            )
-        writetxt_w_list(
-            train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1))
-        )
+            dev_set = corpus[k * split_position : (k + 1) * split_position]
+            train_set = corpus[: k * split_position] + corpus[(k + 1) * split_position :]
+        writetxt_w_list(train_set, os.path.join(objdir_path, "train{}.txt".format(k + 1)))
         writetxt_w_list(dev_set, os.path.join(objdir_path, "test{}.txt".format(k + 1)))
         writetxt_w_list(dev_set, os.path.join(objdir_path, "dev{}.txt".format(k + 1)))
@@ -283,31 +311,19 @@ def read_seq_res(path, labels):
     return text, raw_label, predict_label
-def kfold_txt(corpus, path, k=9, is_shuffle=True):
-    """
-    k是10份中训练集占了几份
-    """
-    j_mkdir(path)
-    if is_shuffle:
-        random.shuffle(corpus)
-    split_position = int(len(corpus) / 10)
-    train_set, dev_set = corpus[: k * split_position], corpus[k * split_position:]
-    writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
-    writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
-    writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
 def sample():
     import pandas as pd
     from sklearn.model_selection import StratifiedShuffleSplit
     # 假设 df 是你的 DataFrame
-    df = pd.DataFrame({
-        "count_line": [i for i in range(100)],
-        "x": [i for i in range(100)],
-        "y": [i // 10 for i in range(100)],
-    })
+    df = pd.DataFrame(
+        {
+            "count_line": [i for i in range(100)],
+            "x": [i for i in range(100)],
+            "y": [i // 10 for i in range(100)],
+        }
+    )
     print(df)
     # count_line 是用于分层抽样的字段
@@ -315,7 +331,7 @@ def sample():
     split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
     # 获取训练集和测试集的索引
-    train_index, test_index = next(split.split(df, df['y']))
+    train_index, test_index = next(split.split(df, df["y"]))
     # 根据索引划分训练集和测试集
     train_df = df.loc[train_index]
@@ -326,6 +342,27 @@ def sample():
     print("测试集行数：", len(test_df))
+def kfold_txt(corpus, path, k=9, is_shuffle=True):
+    """
+    k是10份中训练集占了几份
+    """
+    j_mkdir(path)
+    if is_shuffle:
+        random.shuffle(corpus)
+    split_position = int(len(corpus) / 10)
+    train_set, dev_set = corpus[: k * split_position], corpus[k * split_position :]
+    writetxt_w_list(train_set, os.path.join(path, "train.tsv"), num_lf=1)
+    writetxt_w_list(dev_set, os.path.join(path, "test.tsv"), num_lf=1)
+    writetxt_w_list(dev_set, os.path.join(path, "dev.tsv"), num_lf=1)
+def kfold_list(list_data):
+    """
+    sklearn.model_selection.train_test_split
+    """
+    pass
 def kfold_df(df, save_dir=None):
     """
     划分train test val集， 写为windows可读的csv。
@@ -338,9 +375,7 @@ def kfold_df(df, save_dir=None):
     train_idx, test_and_val_idx = KFold(n_splits=8, shuffle=True).split(df).__next__()
     df_test_and_val = df.iloc[test_and_val_idx]
-    test_idx, val_idx = (
-        KFold(n_splits=2, shuffle=True).split(df_test_and_val).__next__()
-    )
+    test_idx, val_idx = KFold(n_splits=2, shuffle=True).split(df_test_and_val).__next__()
     df_train = df.iloc[train_idx]
     df_val = df.iloc[val_idx]
     df_test = df.iloc[test_idx]
@@ -417,7 +452,7 @@ def split_sentence(sentence, language="chinese", cross_line=True):
     for idx, char in enumerate(sentence):
         if idx == len(sentence) - 1:
             if char in split_signs:
-                sentences.append(sentence[start_idx: idx + 1].strip())
+                sentences.append(sentence[start_idx : idx + 1].strip())
                 start_idx = idx + 1
             else:
                 sentences.append(sentence[start_idx:].strip())
@@ -427,10 +462,10 @@ def split_sentence(sentence, language="chinese", cross_line=True):
                     if idx < len(sentence) - 2:
                         # 处理。”。
                         if sentence[idx + 2] not in split_signs:
-                            sentences.append(sentence[start_idx: idx + 2].strip())
+                            sentences.append(sentence[start_idx : idx + 2].strip())
                             start_idx = idx + 2
                 elif sentence[idx + 1] not in split_signs:
-                    sentences.append(sentence[start_idx: idx + 1].strip())
+                    sentences.append(sentence[start_idx : idx + 1].strip())
                     start_idx = idx + 1
     return sentences
@@ -506,6 +541,6 @@ if __name__ == "__main__":
         hidden_dim=4096,
         lay_number=28,
         attention_heads_num=32,
-        gpu_num=1
+        gpu_num=1,
     )
     print(res, "G")

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/other.py RENAMED Viewed

@@ -30,6 +30,21 @@ ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
 OTHER_PUNCTUATION = list('!@#$%^&*')
+def setup_logging(log_file):
+    """
+    Set up logging configuration.
+    Args:
+        log_file (str): Path to the log file.
+    """
+    logging.basicConfig(
+        filename=log_file,
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
 def get_diff_parts(str1, str2):
     # 创建一个 SequenceMatcher 对象
     matcher = difflib.SequenceMatcher(None, str1, str2)
@@ -154,8 +169,11 @@ def jprint(obj, depth=0):
         print(obj)
-def print_split(sign="=", num=20):
-    print(sign * num)
+def print_split(sign="=", num=20, char: str = None):
+    if char:
+        print(sign * num // 2, char, sign * num // 2)
+    else:
+        print(sign * num)
 def seed_everything():
@@ -361,10 +379,12 @@ def unsqueeze_list(flatten_list, each_element_len):
                     range(len(flatten_list) // each_element_len)]
     return two_dim_list
 def split_list(input_list, chunk_size):
     # 使用列表推导式将列表分割成二维数组
     return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
 def auto_close():
     """
     针对企业微信15分钟会显示离开的机制，假装自己还在上班

nlpertools-1.0.10/src/nlpertools/template/__init__.py ADDED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10/src/nlpertools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: nlpertools
-Version: 1.0.8
+Version: 1.0.10
 Summary: A small package about small basic IO operation when coding
 Home-page: https://github.com/lvzii/nlpertools
 Author: youshuJi
@@ -15,8 +15,12 @@ License-File: LICENSE
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: psutil
+Requires-Dist: openai
 Provides-Extra: torch
 Requires-Dist: torch; extra == "torch"
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
 <div align="center">
   <h4 align="center">
@@ -35,7 +39,7 @@ Requires-Dist: torch; extra == "torch"
 它解决了什么问题：
-- 很多函数是记不住的， ~~每次写每次都要搜~~ 每次都要问大模型 ，例如pandas排序
+- 很多函数是记不住的， 每次写都要~~搜~~问大模型 ，例如pandas排序
 - 刷题的时候，树结构的题目很难调试
@@ -48,6 +52,23 @@ nlpertools
 ```
+# 最常用/喜欢的功能（使用示例）
+```python
+# 读txt, json文件
+import nlpertools
+txt_data = nlpertools.readtxt_list_all_strip('res.txt')
+json_data = nlpertools.load_from_json('res.json')
+```
+```bash
+## git, 连接github不稳定的时候非常有用
+ncli git pull
+# 生成pypi双因素认证的实时密钥(需要提供key)
+ncli --get_2fa --get_2fa_key your_key
+```
 # 安装
 Install the latest release version
@@ -99,30 +120,7 @@ https://nlpertools.readthedocs.io/en/latest/
 一些可能需要配置才能用的函数，写上示例
-## 使用示例
-```python
-import nlpertools
-a = nlpertools.readtxt_list_all_strip('res.txt')
-# 或
-b = nlpertools.io.file.readtxt_list_all_strip('res.txt')
-```
-```bash
-# 生成pypi双因素认证的实时密钥(需要提供key)
-python -m nlpertools.get_2fa your_key
-## git
-python nlpertools.cli --git_push
-python nlpertools.cli --git_pull
-# 以下功能被nvitop替代，不推荐使用
-## 监控gpu显存
-python -m nlpertools.monitor.gpu
-## 监控cpu
-python -m  nlpertools.monitor.memory
-```
 ## 一些常用项目
@@ -130,3 +128,7 @@ nvitop
 ydata-profiling
+## 贡献
+https://github.com/bigscience-workshop/data-preparation

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/SOURCES.txt RENAMED Viewed

@@ -41,6 +41,10 @@ src/nlpertools/draw/math_func.py
 src/nlpertools/io/__init__.py
 src/nlpertools/io/dir.py
 src/nlpertools/io/file.py
+src/nlpertools/llm/__init__.py
+src/nlpertools/llm/call_llm_once.py
+src/nlpertools/llm/infer.py
+src/nlpertools/llm/price.py
 src/nlpertools/monitor/__init__.py
 src/nlpertools/monitor/gpu.py
 src/nlpertools/monitor/memory.py

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,7 @@
 numpy
 pandas
 psutil
+openai
 [torch]
 torch

nlpertools-1.0.8/src/nlpertools/cli.py DELETED Viewed

@@ -1,87 +0,0 @@
-import argparse
-import os
-import uuid
-import sys
-import pyotp
-"""
-如何Debug cli.py
-"""
-def git_push():
-    """
-    针对国内提交github经常失败，自动提交
-    """
-    num = -1
-    while 1:
-        num += 1
-        print("retry num: {}".format(num))
-        info = os.system("git push --set-upstream origin main")
-        print(str(info))
-        if not str(info).startswith("fatal"):
-            print("scucess")
-            break
-def git_pull():
-    """
-    针对国内提交github经常失败，自动提交
-    """
-    num = -1
-    while 1:
-        num += 1
-        print("retry num: {}".format(num))
-        info = os.system("git pull")
-        print(str(info))
-        if not str(info).startswith("fatal") and not str(info).startswith("error"):
-            print("scucess")
-            break
-def get_mac_address():
-    mac = uuid.UUID(int=uuid.getnode()).hex[-12:]
-    mac_address = ":".join([mac[e:e + 2] for e in range(0, 11, 2)])
-    print("mac address 不一定准确")
-    print(mac_address)
-    return mac_address
-def get_2af_value(key):
-    """
-    key应该是7位的
-    """
-    print(key)
-    totp = pyotp.TOTP(key)
-    print(totp.now())
-def main():
-    parser = argparse.ArgumentParser(description="CLI tool for git operations and getting MAC address.")
-    parser.add_argument('--gitpush', action='store_true', help='Perform git push operation.')
-    parser.add_argument('--gitpull', action='store_true', help='Perform git push operation.')
-    parser.add_argument('--mac_address', action='store_true', help='Get the MAC address.')
-    parser.add_argument('--get_2fa', action='store_true', help='Get the 2fa value.')
-    parser.add_argument('--get_2fa_key', type=str, help='Get the 2fa value.')
-    args = parser.parse_args()
-    if args.gitpush:
-        git_push()
-    elif args.gitpull:
-        git_pull()
-    elif args.mac_address:
-        get_mac_address()
-    elif args.get_2fa:
-        if args.get_2fa_key:
-            get_2af_value(args.get_2fa_key)
-        else:
-            print("Please provide a key as an argument.")
-    else:
-        print("No operation specified. Use --gitpush or --get_mac_address.")
-if __name__ == '__main__':
-    main()

{nlpertools-1.0.8 → nlpertools-1.0.10}/LICENSE RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/pyproject.toml RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/setup.cfg RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/ac.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/bit_ops.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/kmp.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/num_ops.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/template.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/algo/union.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/data_client.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/data_structure/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/data_structure/base_structure.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/default_db_config.yml RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/draw/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/draw/math_func.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/get_2fa.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/io/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8/src/nlpertools/monitor → nlpertools-1.0.10/src/nlpertools/llm}/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8/src/nlpertools/template → nlpertools-1.0.10/src/nlpertools/monitor}/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/monitor/gpu.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/monitor/memory.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/movie.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/nlpertools_config.yml RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/open_api.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/pic.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/plugin.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/reminder.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/lazy.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/log_util.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/package.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/package_v1.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils/package_v2.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/utils_for_nlpertools.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/vector_index_demo.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools/wrapper.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/entry_points.txt RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools.egg-info/top_level.txt RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/src/nlpertools_helper/__init__.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/tests/test_kmp.py RENAMED Viewed

File without changes

{nlpertools-1.0.8 → nlpertools-1.0.10}/tests/test_path_exists.py RENAMED Viewed

File without changes

nlpertools 1.0.8__tar.gz → 1.0.10__tar.gz

nlpertools 1.0.8tar.gz → 1.0.10tar.gz