mdbq 3.6.12__py3-none-any.whl → 3.6.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/s_query.py CHANGED
@@ -13,11 +13,13 @@ import os
13
13
  import calendar
14
14
  from mdbq.dataframe import converter
15
15
  from decimal import Decimal
16
+ import logging
16
17
 
17
18
  warnings.filterwarnings('ignore')
18
19
  """
19
20
  程序专门用来下载数据库数据, 并返回 df, 不做清洗数据操作;
20
21
  """
22
+ logger = logging.getLogger(__name__)
21
23
 
22
24
 
23
25
  class QueryDatas:
@@ -44,86 +46,83 @@ class QueryDatas:
44
46
  connection = pymysql.connect(**self.config) # 重新连接数据库
45
47
  with connection.cursor() as cursor:
46
48
  sql = f"SELECT 更新时间 FROM {table_name} WHERE {condition}"
47
- # print(sql)
49
+ # logger.info(sql)
48
50
  cursor.execute(sql)
49
51
  columns = cursor.fetchall()
50
52
  return columns
51
53
 
52
- def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict=[]):
54
+ def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict = None):
53
55
  """
54
- projection = {'日期': 1, '场景名字': 1,}
56
+ 从数据库表获取数据到DataFrame,支持列筛选和日期范围过滤
57
+ Args:
58
+ db_name: 数据库名
59
+ table_name: 表名
60
+ start_date: 起始日期(包含)
61
+ end_date: 结束日期(包含)
62
+ projection: 列筛选字典,e.g. {'日期': 1, '场景名字': 1}
55
63
  """
56
- if start_date:
57
- start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
58
- else:
59
- start_date = '1970-01-01'
60
- if end_date:
61
- end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
62
- else:
63
- end_date = datetime.datetime.today().strftime('%Y-%m-%d')
64
- df = pd.DataFrame() # 初始化df
65
-
66
- if self.check_infos(db_name, table_name) == False:
64
+ # 初始化默认参数
65
+ projection = projection or {}
66
+ df = pd.DataFrame()
67
+ # 日期处理
68
+ start_date = pd.to_datetime(start_date or '1970-01-01').strftime('%Y-%m-%d')
69
+ end_date = pd.to_datetime(end_date or datetime.datetime.today()).strftime('%Y-%m-%d')
70
+
71
+ # 前置检查
72
+ if not self.check_infos(db_name, table_name):
67
73
  return df
68
74
 
69
- self.config.update({'database': db_name})
70
- connection = pymysql.connect(**self.config) # 重新连接数据库
75
+ # 配置数据库连接
76
+ self.config['database'] = db_name
77
+ connection = None
71
78
 
72
- with connection.cursor() as cursor:
73
- # 3. 获取数据表的所有列信息
74
- sql = 'SELECT `COLUMN_NAME` FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
75
- cursor.execute(sql, (db_name, {table_name}))
76
- columns = cursor.fetchall()
77
- cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
79
+ try:
80
+ connection = pymysql.connect(**self.config)
81
+ with connection.cursor() as cursor:
82
+ # 获取表结构(排除id列)
83
+ cursor.execute(
84
+ """SELECT COLUMN_NAME
85
+ FROM information_schema.columns
86
+ WHERE table_schema = %s AND table_name = %s""",
87
+ (db_name, table_name)
88
+ )
89
+ cols_exist = {col['COLUMN_NAME'] for col in cursor.fetchall()} - {'id'}
90
+
91
+ # 处理列选择
92
+ selected_columns = []
93
+ if projection:
94
+ selected_columns = [k for k, v in projection.items() if v and k in cols_exist]
95
+ if not selected_columns:
96
+ logger.info("Warning: Projection 参数不匹配任何数据库字段")
97
+ return df
98
+ else:
99
+ selected_columns = list(cols_exist)
100
+ # 构建基础SQL
101
+ quoted_columns = [f'`{col}`' for col in selected_columns]
102
+ base_sql = f"SELECT {', '.join(quoted_columns)} FROM `{db_name}`.`{table_name}`"
103
+
104
+ # 添加日期条件
105
+ if '日期' in cols_exist:
106
+ base_sql += f" WHERE 日期 BETWEEN '{start_date}' AND '{end_date}'"
107
+
108
+ # 执行查询
109
+ cursor.execute(base_sql)
110
+ result = cursor.fetchall()
111
+
112
+ # 处理结果集
113
+ if result:
114
+ df = pd.DataFrame(result, columns=[desc[0] for desc in cursor.description])
115
+ # 类型转换优化
116
+ decimal_cols = [col for col in df.columns if df[col].apply(lambda x: isinstance(x, Decimal)).any()]
117
+ df[decimal_cols] = df[decimal_cols].astype(float)
78
118
 
79
- # 4. 构建 SQL 查询语句
80
- if projection: # 获取指定列
81
- columns_in = []
82
- for key, value in projection.items():
83
- if value == 1 and key in cols_exist:
84
- columns_in.append(key) # 提取值为 1 的键并清理不在数据表的键
85
- columns_in = [f"`{item}`" for item in columns_in]
86
- if not columns_in:
87
- print(f'传递的参数 projection,在数据库中没有找到匹配的列,请检查 projection: {projection}')
88
- return df
89
- columns_in = ', '.join(columns_in)
90
- if '日期' in cols_exist: # 不论是否指定, 只要数据表有日期,则执行
91
- sql = (f"SELECT {columns_in} FROM `{db_name}`.`{table_name}` "
92
- f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
93
- else: # 数据表没有日期列时,返回指定列的所有数据
94
- sql = f"SELECT {columns_in} FROM `{db_name}`.`{table_name}`"
95
- else: # 没有指定获取列时
96
- if '日期' in cols_exist: # 但数据表有日期,仍然执行
97
- cols_exist = [f"`{item}`" for item in cols_exist]
98
- columns_in = ', '.join(cols_exist)
99
- sql = (f"SELECT {columns_in} FROM `{db_name}`.`{table_name}` "
100
- f"WHERE {'日期'} BETWEEN '{start_date}' AND '{end_date}'")
101
- else: # 没有指定获取列,且数据表也没有日期列,则返回全部列的全部数据
102
- all_col = ', '.join([f"`{item}`" for item in cols_exist if item != 'id'])
103
- sql = f"SELECT %s FROM `%s`.`%s`" % (all_col, db_name, table_name)
104
- # print(sql)
105
- cursor.execute(sql)
106
- rows = cursor.fetchall() # 获取查询结果
107
- columns = [desc[0] for desc in cursor.description]
108
- df = pd.DataFrame(rows, columns=columns) # 转为 df
109
- # 使用applymap将每个Decimal转换为float
110
- df_float = df.applymap(lambda x: float(x) if isinstance(x, Decimal) else x)
111
-
112
- if 'id' in df.columns.tolist():
113
- df.pop('id') # 默认不返回 id 列
114
- if len(df) == 0:
115
- print(f's_query.py -> data_to_df -> database: {db_name}, table: {table_name} 查询的数据为空1')
116
- connection.close()
117
- return df
119
+ except Exception as e:
120
+ logger.info(f"Database operation failed: {str(e)}")
121
+ finally:
122
+ if connection:
123
+ connection.close()
118
124
 
119
- # if len(df) == 0:
120
- # print(f'database: {db_name}, table: {table_name} 查询的数据为空2')
121
- # return pd.DataFrame()
122
- # cv = converter.DataFrameConverter()
123
- # df = cv.convert_df_cols(df)
124
- # if 'id' in df.columns.tolist():
125
- # df.pop('id') # 默认不返回 id 列
126
- # return df
125
+ return df
127
126
 
128
127
  def columns_to_list(self, db_name, table_name, columns_name) -> list:
129
128
  """
@@ -179,7 +178,7 @@ class QueryDatas:
179
178
  cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
180
179
  database_exists = cursor.fetchone()
181
180
  if not database_exists:
182
- print(f"Database <{db_name}>: 数据库不存在")
181
+ logger.info(f"Database <{db_name}>: 数据库不存在")
183
182
  return False
184
183
  finally:
185
184
  connection.close() # 这里要断开连接
@@ -192,11 +191,11 @@ class QueryDatas:
192
191
  sql = f"SHOW TABLES LIKE '{table_name}'"
193
192
  cursor.execute(sql)
194
193
  if not cursor.fetchone():
195
- print(f'{db_name} -> <{table_name}>: 表不存在')
194
+ logger.info(f'{db_name} -> <{table_name}>: 表不存在')
196
195
  return False
197
196
  return True
198
197
  except Exception as e:
199
- print(e)
198
+ logger.info(e)
200
199
  return False
201
200
  finally:
202
201
  connection.close() # 断开连接
@@ -209,4 +208,4 @@ if __name__ == '__main__':
209
208
 
210
209
  q = QueryDatas(username, password, host, port)
211
210
  res = q.columns_to_list(db_name='视频数据', table_name='bilibili视频', columns_name=['视频bv号', '下载进度'])
212
- print(res)
211
+ logger.info(res)
mdbq/redis/getredis.py CHANGED
@@ -45,7 +45,6 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(
45
45
  logger = logging.getLogger(__name__)
46
46
 
47
47
  # 创建一个文件处理器,用于将日志写入文件
48
- # file_handler = logging.FileHandler(os.path.join(D_PATH, 'logfile', 'redis.log'))
49
48
  if not os.path.isdir(os.path.join(D_PATH, 'logfile')):
50
49
  os.makedirs(os.path.join(D_PATH, 'logfile'))
51
50
  log_file = os.path.join(D_PATH, 'logfile', 'redis.log')
@@ -358,6 +357,7 @@ class RedisDataHash(object):
358
357
  return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date, projection)
359
358
 
360
359
  filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
360
+
361
361
  if not filtered_df.empty:
362
362
  if '日期' in filtered_df.columns.tolist():
363
363
  exsit_min_date = filtered_df['日期'].min()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.12
3
+ Version: 3.6.14
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,8 +3,8 @@ mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/aggregation.py,sha256=-yzApnlqSN2L0E1YMu5ml-W827qpKQvWPCOI7jj2kzY,80264
5
5
  mdbq/aggregation/datashow.py,sha256=1AYSIDkdUx-4as1Ax2rPj0cExM9d-qFMrFYLAaPHNuk,54962
6
- mdbq/aggregation/optimize_data.py,sha256=RXIv7cACCgYyehAxMjUYi_S7rVyjIwXKWMaM3nduGtA,3068
7
- mdbq/aggregation/query_data.py,sha256=KrQqe9qzrKAs3s6QNoQZuY8_U36I852dFPiIcv1QIi0,193075
6
+ mdbq/aggregation/optimize_data.py,sha256=87Dc2RQTF-wqp8Gct_e0S_xwiNlzsEmM82mwuHPZKlc,3152
7
+ mdbq/aggregation/query_data.py,sha256=7g1Z2qyU8dgnOHdpN5ltr7BX8ZFRrbVmFpk6s9045Pw,193363
8
8
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
9
9
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
10
10
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -18,10 +18,10 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
18
18
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
19
19
  mdbq/mongo/mongo.py,sha256=M9DUeUCMPDngkwn9-ui0uTiFrvfNU1kLs22s5SmoNm0,31899
20
20
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
21
- mdbq/mysql/mysql.py,sha256=_jFo2_OC1BNm5wEmoYiBG_TcuNNA2xUWKNhMBfgDiAM,99699
21
+ mdbq/mysql/mysql.py,sha256=1HoOvSZKPHc3Bz1YI_A_5x3JmGvjUN4suCM9oHCF-t4,100465
22
22
  mdbq/mysql/mysql_bak.py,sha256=_jFo2_OC1BNm5wEmoYiBG_TcuNNA2xUWKNhMBfgDiAM,99699
23
23
  mdbq/mysql/recheck_mysql.py,sha256=ppBTfBLgkRWirMVZ31e_ZPULiGPJU7K3PP9G6QBZ3QI,8605
24
- mdbq/mysql/s_query.py,sha256=rUyemVsjFM7OYG_o-DYGy18aRCbMsDlpzNGNS8Un7dg,9722
24
+ mdbq/mysql/s_query.py,sha256=CL2Ayo2sL11RbLnh9nE-GXA-NpA815-rrlFo24TipKY,8792
25
25
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
26
26
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
27
27
  mdbq/other/download_sku_picture.py,sha256=GdphR7Q3psXXVuZoyJ4u_6OWn_rWlcbT0iJ-1zPT6O0,45368
@@ -34,11 +34,11 @@ mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,239
34
34
  mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
35
35
  mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
36
36
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
37
- mdbq/redis/getredis.py,sha256=DKahNJeO3W3RZ-u6LsVbbGLi-CK-dZ8y3UV9dxk8YM8,26720
37
+ mdbq/redis/getredis.py,sha256=4rYk9lMRvvlpY7cV3VNQcSnbDWlZIsZZ-tSq--YqfSQ,26638
38
38
  mdbq/redis/getredis_优化hash.py,sha256=q7omKJCPw_6Zr_r6WwTv4RGSXzZzpLPkIaqJ22svJhE,29104
39
39
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
40
40
  mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
41
- mdbq-3.6.12.dist-info/METADATA,sha256=La5gbnCv5fzKJ69TGAZ6HZPgeF_bc8-jJnM0D1r06Fs,244
42
- mdbq-3.6.12.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
43
- mdbq-3.6.12.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
44
- mdbq-3.6.12.dist-info/RECORD,,
41
+ mdbq-3.6.14.dist-info/METADATA,sha256=6nLgVL5HWxyW9-EBob6LnpGnfncM3l8rOL4nKQtqtbg,244
42
+ mdbq-3.6.14.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
43
+ mdbq-3.6.14.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
44
+ mdbq-3.6.14.dist-info/RECORD,,
File without changes