mdbq 3.10.7__py3-none-any.whl → 3.10.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/s_query.py CHANGED
@@ -1,14 +1,11 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  import datetime
3
- import re
4
- import time
5
3
  import warnings
6
4
  import pymysql
7
- import numpy as np
8
5
  import pandas as pd
9
- import os
10
6
  from decimal import Decimal
11
7
  import logging
8
+ from contextlib import closing
12
9
 
13
10
  warnings.filterwarnings('ignore')
14
11
  """
@@ -18,7 +15,20 @@ logger = logging.getLogger(__name__)
18
15
 
19
16
 
20
17
  class QueryDatas:
18
+ """
19
+ 数据库查询工具类。
20
+ 用于连接MySQL数据库,支持表结构检查、条件查询、数据导出为DataFrame、列名和类型获取等功能。
21
+ """
22
+
21
23
  def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
24
+ """
25
+ 初始化数据库连接配置。
26
+ :param username: 数据库用户名
27
+ :param password: 数据库密码
28
+ :param host: 数据库主机
29
+ :param port: 数据库端口
30
+ :param charset: 字符集,默认utf8mb4
31
+ """
22
32
  self.username = username
23
33
  self.password = password
24
34
  self.host = host
@@ -32,175 +42,181 @@ class QueryDatas:
32
42
  'cursorclass': pymysql.cursors.DictCursor,
33
43
  }
34
44
 
35
- def check_condition(self, db_name, table_name, condition):
36
- """ 按指定条件查询数据库,并返回 """
37
- if self.check_infos(db_name, table_name) == False:
38
- return
39
-
45
+ def check_condition(self, db_name, table_name, condition, columns='更新时间'):
46
+ """
47
+ 按指定条件查询数据库表,返回满足条件的指定字段数据。
48
+ :param db_name: 数据库名
49
+ :param table_name: 表名
50
+ :param condition: SQL条件字符串(不含WHERE)
51
+ :param columns: 查询字段字符串或以逗号分隔的字段名,默认'更新时间'
52
+ :return: 查询结果列表或None
53
+ """
54
+ if not self.check_infos(db_name, table_name):
55
+ return None
40
56
  self.config.update({'database': db_name})
41
- connection = pymysql.connect(**self.config) # 重新连接数据库
42
- with connection.cursor() as cursor:
43
- sql = f"SELECT 更新时间 FROM {table_name} WHERE {condition}"
44
- # logger.info(sql)
45
- cursor.execute(sql)
46
- columns = cursor.fetchall()
47
- return columns
57
+ try:
58
+ with closing(pymysql.connect(**self.config)) as connection:
59
+ with closing(connection.cursor()) as cursor:
60
+ sql = f"SELECT {columns} FROM `{table_name}` WHERE {condition}"
61
+ logger.debug(f"check_condition SQL: {sql}")
62
+ cursor.execute(sql)
63
+ result = cursor.fetchall()
64
+ return result
65
+ except Exception as e:
66
+ logger.error(f"check_condition error: {e}")
67
+ return None
48
68
 
49
- def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict = None):
69
+ def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict = None, limit: int = None):
50
70
  """
51
- 从数据库表获取数据到DataFrame,支持列筛选和日期范围过滤
52
- Args:
53
- db_name: 数据库名
54
- table_name: 表名
55
- start_date: 起始日期(包含)
56
- end_date: 结束日期(包含)
57
- projection: 列筛选字典,e.g. {'日期': 1, '场景名字': 1}
71
+ 从数据库表获取数据到DataFrame,支持列筛选、日期范围过滤和行数限制。
72
+ :param db_name: 数据库名
73
+ :param table_name: 表名
74
+ :param start_date: 起始日期(包含)
75
+ :param end_date: 结束日期(包含)
76
+ :param projection: 列筛选字典,e.g. {'日期': 1, '场景名字': 1}
77
+ :param limit: 限制返回的最大行数
78
+ :return: 查询结果的DataFrame
58
79
  """
59
- # 初始化默认参数
60
80
  projection = projection or {}
61
81
  df = pd.DataFrame()
62
- # 日期处理
63
- start_date = pd.to_datetime(start_date or '1970-01-01').strftime('%Y-%m-%d')
64
- end_date = pd.to_datetime(end_date or datetime.datetime.today()).strftime('%Y-%m-%d')
65
-
66
- # 前置检查
82
+ try:
83
+ start_date = pd.to_datetime(start_date or '1970-01-01').strftime('%Y-%m-%d')
84
+ end_date = pd.to_datetime(end_date or datetime.datetime.today()).strftime('%Y-%m-%d')
85
+ except Exception as e:
86
+ logger.error(f"日期格式错误: {e}")
87
+ return df
67
88
  if not self.check_infos(db_name, table_name):
68
89
  return df
69
-
70
- # 配置数据库连接
71
90
  self.config['database'] = db_name
72
- connection = None
73
-
74
91
  try:
75
- connection = pymysql.connect(**self.config)
76
- with connection.cursor() as cursor:
77
- # 获取表结构(排除id列)
78
- cursor.execute(
79
- """SELECT COLUMN_NAME
80
- FROM information_schema.columns
81
- WHERE table_schema = %s AND table_name = %s""",
82
- (db_name, table_name)
83
- )
84
- cols_exist = {col['COLUMN_NAME'] for col in cursor.fetchall()} - {'id'}
85
-
86
- # 处理列选择
87
- selected_columns = []
88
- if projection:
89
- selected_columns = [k for k, v in projection.items() if v and k in cols_exist]
92
+ with closing(pymysql.connect(**self.config)) as connection:
93
+ with closing(connection.cursor()) as cursor:
94
+ cursor.execute(
95
+ """SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s""",
96
+ (db_name, table_name)
97
+ )
98
+ cols_exist = {col['COLUMN_NAME'] for col in cursor.fetchall()} - {'id'}
99
+ if projection:
100
+ selected_columns = [k for k, v in projection.items() if v and k in cols_exist]
101
+ if not selected_columns:
102
+ logger.info("Warning: Projection 参数不匹配任何数据库字段")
103
+ return df
104
+ else:
105
+ selected_columns = list(cols_exist)
90
106
  if not selected_columns:
91
- logger.info("Warning: Projection 参数不匹配任何数据库字段")
107
+ logger.info("未找到可用字段")
92
108
  return df
93
- else:
94
- selected_columns = list(cols_exist)
95
- # 构建基础SQL
96
- quoted_columns = [f'`{col}`' for col in selected_columns]
97
- base_sql = f"SELECT {', '.join(quoted_columns)} FROM `{db_name}`.`{table_name}`"
98
-
99
- # 添加日期条件
100
- if '日期' in cols_exist:
101
- base_sql += f" WHERE 日期 BETWEEN '{start_date}' AND '{end_date}'"
102
-
103
- # 执行查询
104
- cursor.execute(base_sql)
105
- result = cursor.fetchall()
106
-
107
- # 处理结果集
108
- if result:
109
- df = pd.DataFrame(result, columns=[desc[0] for desc in cursor.description])
110
- # 类型转换优化
111
- decimal_cols = [col for col in df.columns if df[col].apply(lambda x: isinstance(x, Decimal)).any()]
112
- df[decimal_cols] = df[decimal_cols].astype(float)
113
-
109
+ quoted_columns = [f'`{col}`' for col in selected_columns]
110
+ base_sql = f"SELECT {', '.join(quoted_columns)} FROM `{db_name}`.`{table_name}`"
111
+ params = []
112
+ if '日期' in cols_exist:
113
+ base_sql += f" WHERE 日期 BETWEEN %s AND %s"
114
+ params.extend([start_date, end_date])
115
+ if limit is not None and isinstance(limit, int) and limit > 0:
116
+ base_sql += f" LIMIT %s"
117
+ params.append(limit)
118
+ logger.debug(f"data_to_df SQL: {base_sql}, params: {params}")
119
+ cursor.execute(base_sql, tuple(params))
120
+ result = cursor.fetchall()
121
+ if result:
122
+ df = pd.DataFrame(result)
123
+ for col in df.columns:
124
+ if df[col].apply(lambda x: isinstance(x, Decimal)).any():
125
+ df[col] = df[col].astype(float)
114
126
  except Exception as e:
115
- logger.error(f"Database operation failed: {str(e)}")
116
- finally:
117
- if connection:
118
- connection.close()
119
-
127
+ logger.error(f"data_to_df error: {e}")
120
128
  return df
121
129
 
122
- def columns_to_list(self, db_name, table_name, columns_name) -> list:
130
+ def columns_to_list(self, db_name, table_name, columns_name, where: str = None) -> list:
123
131
  """
124
- 获取数据表的指定列, 返回列表
125
- [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
132
+ 获取数据表的指定列, 支持where条件筛选, 返回列表字典。
133
+ :param db_name: 数据库名
134
+ :param table_name: 表名
135
+ :param columns_name: 需要获取的列名列表
136
+ :param where: 可选,SQL条件字符串(不含WHERE)
137
+ :return: [{列1:值, 列2:值, ...}, ...]
126
138
  """
127
- if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
139
+ if not self.check_infos(db_name, table_name):
128
140
  return []
129
-
130
141
  self.config.update({'database': db_name})
131
- connection = pymysql.connect(**self.config) # 重新连接数据库
132
- with connection.cursor() as cursor:
133
- # 3. 获取数据表的所有列信息
134
- sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
135
- cursor.execute(sql, (db_name, {table_name}))
136
- columns = cursor.fetchall()
137
- cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
138
- columns_name = [item for item in columns_name if item in cols_exist]
139
- if len(columns_name) == 0:
140
- return []
141
- columns_in = ', '.join(columns_name)
142
- sql = (f"SELECT {columns_in} FROM {db_name}.{table_name} ")
143
- cursor.execute(sql)
144
- column_values = cursor.fetchall() # 返回指定列,结果是[dict, dict, dict, ...]
145
- # column_values = [item[column_name] for item in column_values] # 提取字典的值, 组成列表
146
- connection.close()
147
- return column_values
142
+ try:
143
+ with closing(pymysql.connect(**self.config)) as connection:
144
+ with closing(connection.cursor()) as cursor:
145
+ sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
146
+ cursor.execute(sql, (db_name, table_name))
147
+ cols_exist = [col['COLUMN_NAME'] for col in cursor.fetchall()]
148
+ columns_name = [item for item in columns_name if item in cols_exist]
149
+ if not columns_name:
150
+ logger.info("columns_to_list: 未找到匹配的列名")
151
+ return []
152
+ columns_in = ', '.join([f'`{col}`' for col in columns_name])
153
+ sql = f"SELECT {columns_in} FROM `{db_name}`.`{table_name}`"
154
+ if where:
155
+ sql += f" WHERE {where}"
156
+ logger.debug(f"columns_to_list SQL: {sql}")
157
+ cursor.execute(sql)
158
+ column_values = cursor.fetchall()
159
+ return column_values
160
+ except Exception as e:
161
+ logger.error(f"columns_to_list error: {e}")
162
+ return []
148
163
 
149
- def dtypes_to_list(self, db_name, table_name) -> list:
164
+ def dtypes_to_list(self, db_name, table_name, columns_name=None) -> list:
150
165
  """
151
- 获取数据表的指定列, 返回列表
152
- [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
166
+ 获取数据表的列名和类型, 支持只返回部分字段类型。
167
+ :param db_name: 数据库名
168
+ :param table_name: 表名
169
+ :param columns_name: 可选,字段名列表,仅返回这些字段的类型
170
+ :return: [{'COLUMN_NAME': ..., 'COLUMN_TYPE': ...}, ...]
153
171
  """
154
- if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
172
+ if not self.check_infos(db_name, table_name):
155
173
  return []
156
-
157
174
  self.config.update({'database': db_name})
158
- connection = pymysql.connect(**self.config) # 重新连接数据库
159
- with connection.cursor() as cursor:
160
- # 3. 获取数据表的所有列信息
161
- sql = 'SELECT COLUMN_NAME, COLUMN_TYPE FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
162
- cursor.execute(sql, (db_name, {table_name}))
163
- column_name_and_type = cursor.fetchall()
164
- connection.close()
165
- return column_name_and_type
175
+ try:
176
+ with closing(pymysql.connect(**self.config)) as connection:
177
+ with closing(connection.cursor()) as cursor:
178
+ sql = 'SELECT COLUMN_NAME, COLUMN_TYPE FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
179
+ cursor.execute(sql, (db_name, table_name))
180
+ column_name_and_type = cursor.fetchall()
181
+ if columns_name:
182
+ columns_name = set(columns_name)
183
+ column_name_and_type = [row for row in column_name_and_type if row['COLUMN_NAME'] in columns_name]
184
+ return column_name_and_type
185
+ except Exception as e:
186
+ logger.error(f"dtypes_to_list error: {e}")
187
+ return []
166
188
 
167
189
  def check_infos(self, db_name, table_name) -> bool:
168
- """ 检查数据库、数据表是否存在 """
169
- connection = pymysql.connect(**self.config) # 连接数据库
190
+ """
191
+ 检查数据库和数据表是否存在。
192
+ :param db_name: 数据库名
193
+ :param table_name: 表名
194
+ :return: 存在返回True,否则False
195
+ """
170
196
  try:
171
- with connection.cursor() as cursor:
172
- # 1. 检查数据库是否存在
173
- cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
174
- database_exists = cursor.fetchone()
175
- if not database_exists:
176
- logger.info(f"Database <{db_name}>: 数据库不存在")
177
- return False
178
- finally:
179
- connection.close() # 这里要断开连接
180
-
181
- self.config.update({'database': db_name}) # 添加更新 config 字段
182
- connection = pymysql.connect(**self.config) # 重新连接数据库
197
+ with closing(pymysql.connect(**self.config)) as connection:
198
+ with closing(connection.cursor()) as cursor:
199
+ cursor.execute(f"SHOW DATABASES LIKE %s", (db_name,))
200
+ database_exists = cursor.fetchone()
201
+ if not database_exists:
202
+ logger.info(f"Database <{db_name}>: 数据库不存在")
203
+ return False
204
+ except Exception as e:
205
+ logger.error(f"check_infos-db error: {e}")
206
+ return False
207
+ self.config.update({'database': db_name})
183
208
  try:
184
- with connection.cursor() as cursor:
185
- # 2. 查询表是否存在
186
- sql = f"SHOW TABLES LIKE '{table_name}'"
187
- cursor.execute(sql)
188
- if not cursor.fetchone():
189
- logger.info(f'{db_name} -> <{table_name}>: 表不存在')
190
- return False
191
- return True
209
+ with closing(pymysql.connect(**self.config)) as connection:
210
+ with closing(connection.cursor()) as cursor:
211
+ cursor.execute(f"SHOW TABLES LIKE %s", (table_name,))
212
+ if not cursor.fetchone():
213
+ logger.info(f'{db_name} -> <{table_name}>: 表不存在')
214
+ return False
215
+ return True
192
216
  except Exception as e:
193
- logger.error(e)
217
+ logger.error(f"check_infos-table error: {e}")
194
218
  return False
195
- finally:
196
- connection.close() # 断开连接
197
219
 
198
220
 
199
221
  if __name__ == '__main__':
200
- conf = ConfigTxt()
201
- data = conf.config_datas['Windows']['xigua_lx']['mysql']['remoto']
202
- username, password, host, port = data['username'], data['password'], data['host'], data['port']
203
-
204
- q = QueryDatas(username, password, host, port)
205
- res = q.columns_to_list(db_name='视频数据', table_name='bilibili视频', columns_name=['视频bv号', '下载进度'])
206
- logger.info(res)
222
+ pass