pymecli 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
utils/logger.py ADDED
@@ -0,0 +1,26 @@
1
+ import logging
2
+
3
+
4
+ def get_logger(name, level=logging.DEBUG, fmt=logging.Formatter("%(message)s")):
5
+ root_logger = logging.getLogger()
6
+ root_level = root_logger.level if root_logger.level != 0 else logging.NOTSET
7
+
8
+ # 如果 root 的 level 已设置且与传入 level 相同,则直接返回,不做本地修改
9
+ if root_level != logging.NOTSET and level == root_level:
10
+ return logging.getLogger(name)
11
+
12
+ # 否则按传入 level 配置当前模块的 logger
13
+ logger = logging.getLogger(name)
14
+ logger.setLevel(level)
15
+
16
+ # 如果没有 StreamHandler,则添加一个,保证有输出目的地
17
+ if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
18
+ handler = logging.StreamHandler()
19
+ handler.setLevel(level)
20
+ # fmt = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
21
+ handler.setFormatter(fmt)
22
+ logger.addHandler(handler)
23
+
24
+ # 禁止传播到父 logger,避免被父 handler 过滤或重复输出
25
+ logger.propagate = False
26
+ return logger
utils/mysql.py ADDED
@@ -0,0 +1,79 @@
1
+ import os
2
+
3
+ import pandas as pd
4
+ from dotenv import load_dotenv
5
+ from sqlalchemy import Engine, create_engine, text
6
+
7
+ from utils.pd import deduplicated
8
+
9
+
10
+ def get_database_engine(env_path: str) -> Engine:
11
+ """创建数据库引擎"""
12
+ load_dotenv(env_path)
13
+ host = os.getenv("UNI_CLI_MYSQL_HOST")
14
+ port = os.getenv("UNI_CLI_MYSQL_PORT")
15
+ user = os.getenv("UNI_CLI_MYSQL_USER")
16
+ password = os.getenv("UNI_CLI_MYSQL_PASSWORD")
17
+ database = os.getenv("UNI_CLI_MYSQL_DATABASE")
18
+
19
+ engine = create_engine(
20
+ f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"
21
+ )
22
+
23
+ try:
24
+ with engine.connect() as connection:
25
+ connection.execute(text("SELECT 1"))
26
+ except Exception as e:
27
+ print(f"数据库连接失败: {str(e)}")
28
+ raise
29
+
30
+ return engine
31
+
32
+
33
+ def mysql_to_csv(
34
+ engine: Engine,
35
+ csv_path: str,
36
+ table: str,
37
+ query: str,
38
+ update_status: int,
39
+ d_column_names: list[str],
40
+ pd_dtype: dict | None = None,
41
+ del_column_names: list[str] = ["id"],
42
+ ) -> int:
43
+ # 查询数据
44
+ data_frame = pd.read_sql(query, engine, dtype=pd_dtype)
45
+ # 提取 'id' 列
46
+ ids = data_frame["id"].tolist()
47
+ # 删除 'id' 列
48
+ data_frame = data_frame.drop(columns=del_column_names)
49
+
50
+ # 根据 'open_at' 列降序排序
51
+ # data_frame = data_frame.sort_values(by="open_at", ascending=False)
52
+
53
+ # 将数据追加写入 CSV 文件
54
+ data_frame.to_csv(
55
+ csv_path,
56
+ mode="a",
57
+ header=not os.path.exists(csv_path),
58
+ index=False,
59
+ encoding="utf-8",
60
+ )
61
+ # csv去重,保留最后加入的数据
62
+ deduplicated(csv_path, d_column_names, "last", pd_dtype)
63
+
64
+ # 根据提取的 'id' 列更新数据库中 up_status 字段
65
+ if ids:
66
+ # 使用 text() 构建查询时,确保 :ids 是一个列表
67
+ update_query = text(
68
+ f"UPDATE {table} SET up_status = :status WHERE id IN ({','.join(map(str, ids))});"
69
+ )
70
+ with engine.connect() as connection:
71
+ with connection.begin():
72
+ result = connection.execute(
73
+ update_query,
74
+ {"status": update_status},
75
+ )
76
+
77
+ return result.rowcount
78
+
79
+ return 0
utils/pd.py ADDED
@@ -0,0 +1,20 @@
1
+ import os
2
+ from typing import Literal
3
+
4
+ import pandas as pd
5
+ from pandas import DataFrame
6
+
7
+
8
+ def deduplicated(
9
+ file_path: str,
10
+ column_names: list[str],
11
+ keep: Literal["first", "last"] = "last",
12
+ pd_dtype: dict | None = None,
13
+ ):
14
+ if os.path.exists(file_path):
15
+ existing_df: DataFrame = pd.read_csv(
16
+ file_path, encoding="utf-8", dtype=pd_dtype
17
+ )
18
+ existing_df.drop_duplicates(subset=column_names, keep=keep, inplace=True)
19
+ existing_df.to_csv(file_path, index=False, encoding="utf-8")
20
+ return existing_df
utils/sleep.py ADDED
@@ -0,0 +1,16 @@
1
+ import time
2
+ from datetime import datetime, timedelta
3
+
4
+
5
+ def interruptible_sleep(seconds: float) -> bool:
6
+ """
7
+ 可中断的休眠函数
8
+ :param seconds: 休眠秒数
9
+ """
10
+ end_time = datetime.now() + timedelta(seconds=seconds)
11
+ try:
12
+ while datetime.now() < end_time:
13
+ time.sleep(1) # 每1秒检查一次,便于中断
14
+ except KeyboardInterrupt:
15
+ return False
16
+ return True
utils/text.py ADDED
@@ -0,0 +1,33 @@
1
+ import unicodedata
2
+
3
+
4
+ def normalize_text(text):
5
+ """
6
+ 将全角字符转换为半角字符
7
+ """
8
+ normalized = ""
9
+ for char in text:
10
+ # 将全角字符转换为半角字符
11
+ normalized_char = unicodedata.normalize("NFKC", char)
12
+ normalized += normalized_char
13
+ return normalized
14
+
15
+
16
+ def match_text(text: str, target_list: list[str]):
17
+ text = text.strip()
18
+ # text = str.replace(text, "I", "")
19
+ remaining_text = ""
20
+
21
+ if not text:
22
+ return None, remaining_text
23
+
24
+ for target_text in target_list:
25
+ if text.startswith(target_text):
26
+ len_target_text = len(target_text)
27
+ # if target_text == "JO":
28
+ # len_target_text = 5
29
+ remaining_text = text[len_target_text:]
30
+
31
+ return target_text, remaining_text
32
+
33
+ return None, remaining_text
utils/toml.py ADDED
@@ -0,0 +1,6 @@
1
+ import toml
2
+
3
+
4
+ def read_toml(fp: str):
5
+ with open(fp, "r", encoding="utf-8") as f:
6
+ return toml.load(f)