mdbq 3.7.5__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. mdbq/__init__.py +1 -0
  2. mdbq/__version__.py +3 -0
  3. mdbq/aggregation/__init__.py +4 -0
  4. mdbq/aggregation/aggregation_bak.py +1438 -0
  5. mdbq/aggregation/datashow_bak.py +1264 -0
  6. mdbq/aggregation/optimize_data.py +76 -0
  7. mdbq/aggregation/query_data.py +3869 -0
  8. mdbq/bdup/__init__.py +5 -0
  9. mdbq/bdup/bdup.py +111 -0
  10. mdbq/config/__init__.py +4 -0
  11. mdbq/config/default.py +131 -0
  12. mdbq/config/myconfig.py +32 -0
  13. mdbq/config/products.py +159 -0
  14. mdbq/config/set_support.py +25 -0
  15. mdbq/dataframe/__init__.py +4 -0
  16. mdbq/dataframe/converter.py +107 -0
  17. mdbq/log/__init__.py +4 -0
  18. mdbq/log/mylogger.py +66 -0
  19. mdbq/log/spider_logging.py +55 -0
  20. mdbq/mongo/__init__.py +4 -0
  21. mdbq/mongo/mongo.py +729 -0
  22. mdbq/mysql/__init__.py +4 -0
  23. mdbq/mysql/mysql.py +1784 -0
  24. mdbq/mysql/s_query.py +211 -0
  25. mdbq/mysql/year_month_day.py +38 -0
  26. mdbq/other/__init__.py +4 -0
  27. mdbq/other/download_sku_picture.py +985 -0
  28. mdbq/other/porxy.py +115 -0
  29. mdbq/other/pov_city.py +405 -0
  30. mdbq/other/sku_picture_bak.py +1081 -0
  31. mdbq/other/ua_sj.py +222 -0
  32. mdbq/pbix/__init__.py +4 -0
  33. mdbq/pbix/pbix_refresh.py +70 -0
  34. mdbq/pbix/refresh_all.py +158 -0
  35. mdbq/pbix/refresh_all_old.py +177 -0
  36. mdbq/redis/__init__.py +4 -0
  37. mdbq/redis/getredis.py +642 -0
  38. mdbq/spider/__init__.py +4 -0
  39. mdbq/spider/aikucun.py +494 -0
  40. {mdbq-3.7.5.dist-info → mdbq-3.7.6.dist-info}/METADATA +1 -1
  41. mdbq-3.7.6.dist-info/RECORD +43 -0
  42. mdbq-3.7.6.dist-info/top_level.txt +1 -0
  43. mdbq-3.7.5.dist-info/RECORD +0 -4
  44. mdbq-3.7.5.dist-info/top_level.txt +0 -1
  45. {mdbq-3.7.5.dist-info → mdbq-3.7.6.dist-info}/WHEEL +0 -0
mdbq/mysql/s_query.py ADDED
@@ -0,0 +1,211 @@
1
+ # -*- coding:utf-8 -*-
2
+ import datetime
3
+ import platform
4
+ import re
5
+ import time
6
+ from functools import wraps
7
+ import warnings
8
+ import pymysql
9
+ import numpy as np
10
+ import pandas as pd
11
+ from sqlalchemy import create_engine
12
+ import os
13
+ import calendar
14
+ from mdbq.dataframe import converter
15
+ from decimal import Decimal
16
+ import logging
17
+
18
+ warnings.filterwarnings('ignore')
19
+ """
20
+ 程序专门用来下载数据库数据, 并返回 df, 不做清洗数据操作;
21
+ """
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class QueryDatas:
26
+ def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
27
+ self.username = username
28
+ self.password = password
29
+ self.host = host
30
+ self.port = port
31
+ self.config = {
32
+ 'host': self.host,
33
+ 'port': int(self.port),
34
+ 'user': self.username,
35
+ 'password': self.password,
36
+ 'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
37
+ 'cursorclass': pymysql.cursors.DictCursor,
38
+ }
39
+
40
+ def check_condition(self, db_name, table_name, condition):
41
+ """ 按指定条件查询数据库,并返回 """
42
+ if self.check_infos(db_name, table_name) == False:
43
+ return
44
+
45
+ self.config.update({'database': db_name})
46
+ connection = pymysql.connect(**self.config) # 重新连接数据库
47
+ with connection.cursor() as cursor:
48
+ sql = f"SELECT 更新时间 FROM {table_name} WHERE {condition}"
49
+ # logger.info(sql)
50
+ cursor.execute(sql)
51
+ columns = cursor.fetchall()
52
+ return columns
53
+
54
+ def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict = None):
55
+ """
56
+ 从数据库表获取数据到DataFrame,支持列筛选和日期范围过滤
57
+ Args:
58
+ db_name: 数据库名
59
+ table_name: 表名
60
+ start_date: 起始日期(包含)
61
+ end_date: 结束日期(包含)
62
+ projection: 列筛选字典,e.g. {'日期': 1, '场景名字': 1}
63
+ """
64
+ # 初始化默认参数
65
+ projection = projection or {}
66
+ df = pd.DataFrame()
67
+ # 日期处理
68
+ start_date = pd.to_datetime(start_date or '1970-01-01').strftime('%Y-%m-%d')
69
+ end_date = pd.to_datetime(end_date or datetime.datetime.today()).strftime('%Y-%m-%d')
70
+
71
+ # 前置检查
72
+ if not self.check_infos(db_name, table_name):
73
+ return df
74
+
75
+ # 配置数据库连接
76
+ self.config['database'] = db_name
77
+ connection = None
78
+
79
+ try:
80
+ connection = pymysql.connect(**self.config)
81
+ with connection.cursor() as cursor:
82
+ # 获取表结构(排除id列)
83
+ cursor.execute(
84
+ """SELECT COLUMN_NAME
85
+ FROM information_schema.columns
86
+ WHERE table_schema = %s AND table_name = %s""",
87
+ (db_name, table_name)
88
+ )
89
+ cols_exist = {col['COLUMN_NAME'] for col in cursor.fetchall()} - {'id'}
90
+
91
+ # 处理列选择
92
+ selected_columns = []
93
+ if projection:
94
+ selected_columns = [k for k, v in projection.items() if v and k in cols_exist]
95
+ if not selected_columns:
96
+ logger.info("Warning: Projection 参数不匹配任何数据库字段")
97
+ return df
98
+ else:
99
+ selected_columns = list(cols_exist)
100
+ # 构建基础SQL
101
+ quoted_columns = [f'`{col}`' for col in selected_columns]
102
+ base_sql = f"SELECT {', '.join(quoted_columns)} FROM `{db_name}`.`{table_name}`"
103
+
104
+ # 添加日期条件
105
+ if '日期' in cols_exist:
106
+ base_sql += f" WHERE 日期 BETWEEN '{start_date}' AND '{end_date}'"
107
+
108
+ # 执行查询
109
+ cursor.execute(base_sql)
110
+ result = cursor.fetchall()
111
+
112
+ # 处理结果集
113
+ if result:
114
+ df = pd.DataFrame(result, columns=[desc[0] for desc in cursor.description])
115
+ # 类型转换优化
116
+ decimal_cols = [col for col in df.columns if df[col].apply(lambda x: isinstance(x, Decimal)).any()]
117
+ df[decimal_cols] = df[decimal_cols].astype(float)
118
+
119
+ except Exception as e:
120
+ logger.error(f"Database operation failed: {str(e)}")
121
+ finally:
122
+ if connection:
123
+ connection.close()
124
+
125
+ return df
126
+
127
+ def columns_to_list(self, db_name, table_name, columns_name) -> list:
128
+ """
129
+ 获取数据表的指定列, 返回列表
130
+ [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
131
+ """
132
+ if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
133
+ return []
134
+
135
+ self.config.update({'database': db_name})
136
+ connection = pymysql.connect(**self.config) # 重新连接数据库
137
+ with connection.cursor() as cursor:
138
+ # 3. 获取数据表的所有列信息
139
+ sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
140
+ cursor.execute(sql, (db_name, {table_name}))
141
+ columns = cursor.fetchall()
142
+ cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
143
+ columns_name = [item for item in columns_name if item in cols_exist]
144
+ if len(columns_name) == 0:
145
+ return []
146
+ columns_in = ', '.join(columns_name)
147
+ sql = (f"SELECT {columns_in} FROM {db_name}.{table_name} ")
148
+ cursor.execute(sql)
149
+ column_values = cursor.fetchall() # 返回指定列,结果是[dict, dict, dict, ...]
150
+ # column_values = [item[column_name] for item in column_values] # 提取字典的值, 组成列表
151
+ connection.close()
152
+ return column_values
153
+
154
+ def dtypes_to_list(self, db_name, table_name) -> list:
155
+ """
156
+ 获取数据表的指定列, 返回列表
157
+ [{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
158
+ """
159
+ if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
160
+ return []
161
+
162
+ self.config.update({'database': db_name})
163
+ connection = pymysql.connect(**self.config) # 重新连接数据库
164
+ with connection.cursor() as cursor:
165
+ # 3. 获取数据表的所有列信息
166
+ sql = 'SELECT COLUMN_NAME, COLUMN_TYPE FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
167
+ cursor.execute(sql, (db_name, {table_name}))
168
+ column_name_and_type = cursor.fetchall()
169
+ connection.close()
170
+ return column_name_and_type
171
+
172
+ def check_infos(self, db_name, table_name) -> bool:
173
+ """ 检查数据库、数据表是否存在 """
174
+ connection = pymysql.connect(**self.config) # 连接数据库
175
+ try:
176
+ with connection.cursor() as cursor:
177
+ # 1. 检查数据库是否存在
178
+ cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
179
+ database_exists = cursor.fetchone()
180
+ if not database_exists:
181
+ logger.info(f"Database <{db_name}>: 数据库不存在")
182
+ return False
183
+ finally:
184
+ connection.close() # 这里要断开连接
185
+
186
+ self.config.update({'database': db_name}) # 添加更新 config 字段
187
+ connection = pymysql.connect(**self.config) # 重新连接数据库
188
+ try:
189
+ with connection.cursor() as cursor:
190
+ # 2. 查询表是否存在
191
+ sql = f"SHOW TABLES LIKE '{table_name}'"
192
+ cursor.execute(sql)
193
+ if not cursor.fetchone():
194
+ logger.info(f'{db_name} -> <{table_name}>: 表不存在')
195
+ return False
196
+ return True
197
+ except Exception as e:
198
+ logger.error(e)
199
+ return False
200
+ finally:
201
+ connection.close() # 断开连接
202
+
203
+
204
+ if __name__ == '__main__':
205
+ conf = ConfigTxt()
206
+ data = conf.config_datas['Windows']['xigua_lx']['mysql']['remoto']
207
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
208
+
209
+ q = QueryDatas(username, password, host, port)
210
+ res = q.columns_to_list(db_name='视频数据', table_name='bilibili视频', columns_name=['视频bv号', '下载进度'])
211
+ logger.info(res)
@@ -0,0 +1,38 @@
1
+ # -*- coding:utf-8 -*-
2
+ import warnings
3
+ import pandas as pd
4
+ import calendar
5
+
6
+ warnings.filterwarnings('ignore')
7
+
8
+
9
+ def year_month_day(start_date, end_date):
10
+ """
11
+ 使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
12
+ calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
13
+ 返回值: [{'起始日期': '2025-05-01', '结束日期': '2025-05-31'}, {'起始日期': '2025-06-01', '结束日期': '2025-06-30'}]
14
+ """
15
+ # 替换年月日中的日, 以便即使传入当月日期也有返回值
16
+ try:
17
+ start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
18
+ except Exception as e:
19
+ print(e)
20
+ return []
21
+ # 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
22
+ date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
23
+ # 转换格式
24
+ year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
25
+
26
+ results = []
27
+ for year_month in year_months:
28
+ year = re.findall(r'(\d{4})', year_month)[0]
29
+ month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
30
+ s, d = calendar.monthrange(int(year), int(month))
31
+ results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
32
+
33
+ return results # start_date至end_date之间的所有年月日
34
+
35
+
36
+ if __name__ == '__main__':
37
+ results = year_month_day(start_date='2025-05-01', end_date='2025-08-01')
38
+ print(results)
mdbq/other/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+
2
+
3
+
4
+ # 配置文件