mdbq 3.6.12__py3-none-any.whl → 3.6.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/optimize_data.py +7 -5
- mdbq/aggregation/query_data.py +41 -39
- mdbq/mysql/mysql.py +121 -119
- mdbq/mysql/s_query.py +72 -73
- mdbq/redis/getredis.py +1 -1
- {mdbq-3.6.12.dist-info → mdbq-3.6.14.dist-info}/METADATA +1 -1
- {mdbq-3.6.12.dist-info → mdbq-3.6.14.dist-info}/RECORD +9 -9
- {mdbq-3.6.12.dist-info → mdbq-3.6.14.dist-info}/WHEEL +0 -0
- {mdbq-3.6.12.dist-info → mdbq-3.6.14.dist-info}/top_level.txt +0 -0
mdbq/mysql/s_query.py
CHANGED
@@ -13,11 +13,13 @@ import os
|
|
13
13
|
import calendar
|
14
14
|
from mdbq.dataframe import converter
|
15
15
|
from decimal import Decimal
|
16
|
+
import logging
|
16
17
|
|
17
18
|
warnings.filterwarnings('ignore')
|
18
19
|
"""
|
19
20
|
程序专门用来下载数据库数据, 并返回 df, 不做清洗数据操作;
|
20
21
|
"""
|
22
|
+
logger = logging.getLogger(__name__)
|
21
23
|
|
22
24
|
|
23
25
|
class QueryDatas:
|
@@ -44,86 +46,83 @@ class QueryDatas:
|
|
44
46
|
connection = pymysql.connect(**self.config) # 重新连接数据库
|
45
47
|
with connection.cursor() as cursor:
|
46
48
|
sql = f"SELECT 更新时间 FROM {table_name} WHERE {condition}"
|
47
|
-
#
|
49
|
+
# logger.info(sql)
|
48
50
|
cursor.execute(sql)
|
49
51
|
columns = cursor.fetchall()
|
50
52
|
return columns
|
51
53
|
|
52
|
-
def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict=
|
54
|
+
def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict = None):
|
53
55
|
"""
|
54
|
-
|
56
|
+
从数据库表获取数据到DataFrame,支持列筛选和日期范围过滤
|
57
|
+
Args:
|
58
|
+
db_name: 数据库名
|
59
|
+
table_name: 表名
|
60
|
+
start_date: 起始日期(包含)
|
61
|
+
end_date: 结束日期(包含)
|
62
|
+
projection: 列筛选字典,e.g. {'日期': 1, '场景名字': 1}
|
55
63
|
"""
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
if self.check_infos(db_name, table_name) == False:
|
64
|
+
# 初始化默认参数
|
65
|
+
projection = projection or {}
|
66
|
+
df = pd.DataFrame()
|
67
|
+
# 日期处理
|
68
|
+
start_date = pd.to_datetime(start_date or '1970-01-01').strftime('%Y-%m-%d')
|
69
|
+
end_date = pd.to_datetime(end_date or datetime.datetime.today()).strftime('%Y-%m-%d')
|
70
|
+
|
71
|
+
# 前置检查
|
72
|
+
if not self.check_infos(db_name, table_name):
|
67
73
|
return df
|
68
74
|
|
69
|
-
|
70
|
-
|
75
|
+
# 配置数据库连接
|
76
|
+
self.config['database'] = db_name
|
77
|
+
connection = None
|
71
78
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
79
|
+
try:
|
80
|
+
connection = pymysql.connect(**self.config)
|
81
|
+
with connection.cursor() as cursor:
|
82
|
+
# 获取表结构(排除id列)
|
83
|
+
cursor.execute(
|
84
|
+
"""SELECT COLUMN_NAME
|
85
|
+
FROM information_schema.columns
|
86
|
+
WHERE table_schema = %s AND table_name = %s""",
|
87
|
+
(db_name, table_name)
|
88
|
+
)
|
89
|
+
cols_exist = {col['COLUMN_NAME'] for col in cursor.fetchall()} - {'id'}
|
90
|
+
|
91
|
+
# 处理列选择
|
92
|
+
selected_columns = []
|
93
|
+
if projection:
|
94
|
+
selected_columns = [k for k, v in projection.items() if v and k in cols_exist]
|
95
|
+
if not selected_columns:
|
96
|
+
logger.info("Warning: Projection 参数不匹配任何数据库字段")
|
97
|
+
return df
|
98
|
+
else:
|
99
|
+
selected_columns = list(cols_exist)
|
100
|
+
# 构建基础SQL
|
101
|
+
quoted_columns = [f'`{col}`' for col in selected_columns]
|
102
|
+
base_sql = f"SELECT {', '.join(quoted_columns)} FROM `{db_name}`.`{table_name}`"
|
103
|
+
|
104
|
+
# 添加日期条件
|
105
|
+
if '日期' in cols_exist:
|
106
|
+
base_sql += f" WHERE 日期 BETWEEN '{start_date}' AND '{end_date}'"
|
107
|
+
|
108
|
+
# 执行查询
|
109
|
+
cursor.execute(base_sql)
|
110
|
+
result = cursor.fetchall()
|
111
|
+
|
112
|
+
# 处理结果集
|
113
|
+
if result:
|
114
|
+
df = pd.DataFrame(result, columns=[desc[0] for desc in cursor.description])
|
115
|
+
# 类型转换优化
|
116
|
+
decimal_cols = [col for col in df.columns if df[col].apply(lambda x: isinstance(x, Decimal)).any()]
|
117
|
+
df[decimal_cols] = df[decimal_cols].astype(float)
|
78
118
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
columns_in.append(key) # 提取值为 1 的键并清理不在数据表的键
|
85
|
-
columns_in = [f"`{item}`" for item in columns_in]
|
86
|
-
if not columns_in:
|
87
|
-
print(f'传递的参数 projection,在数据库中没有找到匹配的列,请检查 projection: {projection}')
|
88
|
-
return df
|
89
|
-
columns_in = ', '.join(columns_in)
|
90
|
-
if '日期' in cols_exist: # 不论是否指定, 只要数据表有日期,则执行
|
91
|
-
sql = (f"SELECT {columns_in} FROM `{db_name}`.`{table_name}` "
|
92
|
-
f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
|
93
|
-
else: # 数据表没有日期列时,返回指定列的所有数据
|
94
|
-
sql = f"SELECT {columns_in} FROM `{db_name}`.`{table_name}`"
|
95
|
-
else: # 没有指定获取列时
|
96
|
-
if '日期' in cols_exist: # 但数据表有日期,仍然执行
|
97
|
-
cols_exist = [f"`{item}`" for item in cols_exist]
|
98
|
-
columns_in = ', '.join(cols_exist)
|
99
|
-
sql = (f"SELECT {columns_in} FROM `{db_name}`.`{table_name}` "
|
100
|
-
f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
|
101
|
-
else: # 没有指定获取列,且数据表也没有日期列,则返回全部列的全部数据
|
102
|
-
all_col = ', '.join([f"`{item}`" for item in cols_exist if item != 'id'])
|
103
|
-
sql = f"SELECT %s FROM `%s`.`%s`" % (all_col, db_name, table_name)
|
104
|
-
# print(sql)
|
105
|
-
cursor.execute(sql)
|
106
|
-
rows = cursor.fetchall() # 获取查询结果
|
107
|
-
columns = [desc[0] for desc in cursor.description]
|
108
|
-
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
109
|
-
# 使用applymap将每个Decimal转换为float
|
110
|
-
df_float = df.applymap(lambda x: float(x) if isinstance(x, Decimal) else x)
|
111
|
-
|
112
|
-
if 'id' in df.columns.tolist():
|
113
|
-
df.pop('id') # 默认不返回 id 列
|
114
|
-
if len(df) == 0:
|
115
|
-
print(f's_query.py -> data_to_df -> database: {db_name}, table: {table_name} 查询的数据为空1')
|
116
|
-
connection.close()
|
117
|
-
return df
|
119
|
+
except Exception as e:
|
120
|
+
logger.info(f"Database operation failed: {str(e)}")
|
121
|
+
finally:
|
122
|
+
if connection:
|
123
|
+
connection.close()
|
118
124
|
|
119
|
-
|
120
|
-
# print(f'database: {db_name}, table: {table_name} 查询的数据为空2')
|
121
|
-
# return pd.DataFrame()
|
122
|
-
# cv = converter.DataFrameConverter()
|
123
|
-
# df = cv.convert_df_cols(df)
|
124
|
-
# if 'id' in df.columns.tolist():
|
125
|
-
# df.pop('id') # 默认不返回 id 列
|
126
|
-
# return df
|
125
|
+
return df
|
127
126
|
|
128
127
|
def columns_to_list(self, db_name, table_name, columns_name) -> list:
|
129
128
|
"""
|
@@ -179,7 +178,7 @@ class QueryDatas:
|
|
179
178
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
180
179
|
database_exists = cursor.fetchone()
|
181
180
|
if not database_exists:
|
182
|
-
|
181
|
+
logger.info(f"Database <{db_name}>: 数据库不存在")
|
183
182
|
return False
|
184
183
|
finally:
|
185
184
|
connection.close() # 这里要断开连接
|
@@ -192,11 +191,11 @@ class QueryDatas:
|
|
192
191
|
sql = f"SHOW TABLES LIKE '{table_name}'"
|
193
192
|
cursor.execute(sql)
|
194
193
|
if not cursor.fetchone():
|
195
|
-
|
194
|
+
logger.info(f'{db_name} -> <{table_name}>: 表不存在')
|
196
195
|
return False
|
197
196
|
return True
|
198
197
|
except Exception as e:
|
199
|
-
|
198
|
+
logger.info(e)
|
200
199
|
return False
|
201
200
|
finally:
|
202
201
|
connection.close() # 断开连接
|
@@ -209,4 +208,4 @@ if __name__ == '__main__':
|
|
209
208
|
|
210
209
|
q = QueryDatas(username, password, host, port)
|
211
210
|
res = q.columns_to_list(db_name='视频数据', table_name='bilibili视频', columns_name=['视频bv号', '下载进度'])
|
212
|
-
|
211
|
+
logger.info(res)
|
mdbq/redis/getredis.py
CHANGED
@@ -45,7 +45,6 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(
|
|
45
45
|
logger = logging.getLogger(__name__)
|
46
46
|
|
47
47
|
# 创建一个文件处理器,用于将日志写入文件
|
48
|
-
# file_handler = logging.FileHandler(os.path.join(D_PATH, 'logfile', 'redis.log'))
|
49
48
|
if not os.path.isdir(os.path.join(D_PATH, 'logfile')):
|
50
49
|
os.makedirs(os.path.join(D_PATH, 'logfile'))
|
51
50
|
log_file = os.path.join(D_PATH, 'logfile', 'redis.log')
|
@@ -358,6 +357,7 @@ class RedisDataHash(object):
|
|
358
357
|
return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date, projection)
|
359
358
|
|
360
359
|
filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
|
360
|
+
|
361
361
|
if not filtered_df.empty:
|
362
362
|
if '日期' in filtered_df.columns.tolist():
|
363
363
|
exsit_min_date = filtered_df['日期'].min()
|
@@ -3,8 +3,8 @@ mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/aggregation.py,sha256=-yzApnlqSN2L0E1YMu5ml-W827qpKQvWPCOI7jj2kzY,80264
|
5
5
|
mdbq/aggregation/datashow.py,sha256=1AYSIDkdUx-4as1Ax2rPj0cExM9d-qFMrFYLAaPHNuk,54962
|
6
|
-
mdbq/aggregation/optimize_data.py,sha256=
|
7
|
-
mdbq/aggregation/query_data.py,sha256=
|
6
|
+
mdbq/aggregation/optimize_data.py,sha256=87Dc2RQTF-wqp8Gct_e0S_xwiNlzsEmM82mwuHPZKlc,3152
|
7
|
+
mdbq/aggregation/query_data.py,sha256=7g1Z2qyU8dgnOHdpN5ltr7BX8ZFRrbVmFpk6s9045Pw,193363
|
8
8
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
9
9
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
10
10
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -18,10 +18,10 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
18
18
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
19
19
|
mdbq/mongo/mongo.py,sha256=M9DUeUCMPDngkwn9-ui0uTiFrvfNU1kLs22s5SmoNm0,31899
|
20
20
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
21
|
-
mdbq/mysql/mysql.py,sha256=
|
21
|
+
mdbq/mysql/mysql.py,sha256=1HoOvSZKPHc3Bz1YI_A_5x3JmGvjUN4suCM9oHCF-t4,100465
|
22
22
|
mdbq/mysql/mysql_bak.py,sha256=_jFo2_OC1BNm5wEmoYiBG_TcuNNA2xUWKNhMBfgDiAM,99699
|
23
23
|
mdbq/mysql/recheck_mysql.py,sha256=ppBTfBLgkRWirMVZ31e_ZPULiGPJU7K3PP9G6QBZ3QI,8605
|
24
|
-
mdbq/mysql/s_query.py,sha256=
|
24
|
+
mdbq/mysql/s_query.py,sha256=CL2Ayo2sL11RbLnh9nE-GXA-NpA815-rrlFo24TipKY,8792
|
25
25
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
26
26
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
27
27
|
mdbq/other/download_sku_picture.py,sha256=GdphR7Q3psXXVuZoyJ4u_6OWn_rWlcbT0iJ-1zPT6O0,45368
|
@@ -34,11 +34,11 @@ mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,239
|
|
34
34
|
mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
35
35
|
mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
|
36
36
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
37
|
-
mdbq/redis/getredis.py,sha256=
|
37
|
+
mdbq/redis/getredis.py,sha256=4rYk9lMRvvlpY7cV3VNQcSnbDWlZIsZZ-tSq--YqfSQ,26638
|
38
38
|
mdbq/redis/getredis_优化hash.py,sha256=q7omKJCPw_6Zr_r6WwTv4RGSXzZzpLPkIaqJ22svJhE,29104
|
39
39
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
40
40
|
mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
|
41
|
-
mdbq-3.6.
|
42
|
-
mdbq-3.6.
|
43
|
-
mdbq-3.6.
|
44
|
-
mdbq-3.6.
|
41
|
+
mdbq-3.6.14.dist-info/METADATA,sha256=6nLgVL5HWxyW9-EBob6LnpGnfncM3l8rOL4nKQtqtbg,244
|
42
|
+
mdbq-3.6.14.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
43
|
+
mdbq-3.6.14.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
44
|
+
mdbq-3.6.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|