mdbq 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-3.7.4.dist-info → mdbq-3.7.5.dist-info}/METADATA +1 -1
- mdbq-3.7.5.dist-info/RECORD +4 -0
- {mdbq-3.7.4.dist-info → mdbq-3.7.5.dist-info}/WHEEL +1 -1
- mdbq-3.7.5.dist-info/top_level.txt +1 -0
- mdbq/__init__.py +0 -1
- mdbq/__version__.py +0 -3
- mdbq/aggregation/__init__.py +0 -4
- mdbq/aggregation/aggregation_bak.py +0 -1438
- mdbq/aggregation/datashow_bak.py +0 -1264
- mdbq/aggregation/optimize_data.py +0 -76
- mdbq/aggregation/query_data.py +0 -3869
- mdbq/bdup/__init__.py +0 -5
- mdbq/bdup/bdup.py +0 -111
- mdbq/config/__init__.py +0 -4
- mdbq/config/default.py +0 -131
- mdbq/config/myconfig.py +0 -32
- mdbq/config/products.py +0 -159
- mdbq/config/set_support.py +0 -22
- mdbq/dataframe/__init__.py +0 -4
- mdbq/dataframe/converter.py +0 -107
- mdbq/log/__init__.py +0 -4
- mdbq/log/mylogger.py +0 -66
- mdbq/log/spider_logging.py +0 -55
- mdbq/mongo/__init__.py +0 -4
- mdbq/mongo/mongo.py +0 -729
- mdbq/mysql/__init__.py +0 -4
- mdbq/mysql/mysql.py +0 -1784
- mdbq/mysql/s_query.py +0 -211
- mdbq/mysql/year_month_day.py +0 -38
- mdbq/other/__init__.py +0 -4
- mdbq/other/download_sku_picture.py +0 -985
- mdbq/other/porxy.py +0 -115
- mdbq/other/pov_city.py +0 -405
- mdbq/other/sku_picture_bak.py +0 -1081
- mdbq/other/ua_sj.py +0 -222
- mdbq/pbix/__init__.py +0 -4
- mdbq/pbix/pbix_refresh.py +0 -70
- mdbq/pbix/refresh_all.py +0 -158
- mdbq/pbix/refresh_all_old.py +0 -177
- mdbq/redis/__init__.py +0 -4
- mdbq/redis/getredis.py +0 -642
- mdbq/spider/__init__.py +0 -4
- mdbq/spider/aikucun.py +0 -494
- mdbq-3.7.4.dist-info/RECORD +0 -43
- mdbq-3.7.4.dist-info/top_level.txt +0 -1
mdbq/mysql/s_query.py
DELETED
@@ -1,211 +0,0 @@
|
|
1
|
-
# -*- coding:utf-8 -*-
|
2
|
-
import datetime
|
3
|
-
import platform
|
4
|
-
import re
|
5
|
-
import time
|
6
|
-
from functools import wraps
|
7
|
-
import warnings
|
8
|
-
import pymysql
|
9
|
-
import numpy as np
|
10
|
-
import pandas as pd
|
11
|
-
from sqlalchemy import create_engine
|
12
|
-
import os
|
13
|
-
import calendar
|
14
|
-
from mdbq.dataframe import converter
|
15
|
-
from decimal import Decimal
|
16
|
-
import logging
|
17
|
-
|
18
|
-
warnings.filterwarnings('ignore')
|
19
|
-
"""
|
20
|
-
程序专门用来下载数据库数据, 并返回 df, 不做清洗数据操作;
|
21
|
-
"""
|
22
|
-
logger = logging.getLogger(__name__)
|
23
|
-
|
24
|
-
|
25
|
-
class QueryDatas:
|
26
|
-
def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
|
27
|
-
self.username = username
|
28
|
-
self.password = password
|
29
|
-
self.host = host
|
30
|
-
self.port = port
|
31
|
-
self.config = {
|
32
|
-
'host': self.host,
|
33
|
-
'port': int(self.port),
|
34
|
-
'user': self.username,
|
35
|
-
'password': self.password,
|
36
|
-
'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
|
37
|
-
'cursorclass': pymysql.cursors.DictCursor,
|
38
|
-
}
|
39
|
-
|
40
|
-
def check_condition(self, db_name, table_name, condition):
|
41
|
-
""" 按指定条件查询数据库,并返回 """
|
42
|
-
if self.check_infos(db_name, table_name) == False:
|
43
|
-
return
|
44
|
-
|
45
|
-
self.config.update({'database': db_name})
|
46
|
-
connection = pymysql.connect(**self.config) # 重新连接数据库
|
47
|
-
with connection.cursor() as cursor:
|
48
|
-
sql = f"SELECT 更新时间 FROM {table_name} WHERE {condition}"
|
49
|
-
# logger.info(sql)
|
50
|
-
cursor.execute(sql)
|
51
|
-
columns = cursor.fetchall()
|
52
|
-
return columns
|
53
|
-
|
54
|
-
def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict = None):
|
55
|
-
"""
|
56
|
-
从数据库表获取数据到DataFrame,支持列筛选和日期范围过滤
|
57
|
-
Args:
|
58
|
-
db_name: 数据库名
|
59
|
-
table_name: 表名
|
60
|
-
start_date: 起始日期(包含)
|
61
|
-
end_date: 结束日期(包含)
|
62
|
-
projection: 列筛选字典,e.g. {'日期': 1, '场景名字': 1}
|
63
|
-
"""
|
64
|
-
# 初始化默认参数
|
65
|
-
projection = projection or {}
|
66
|
-
df = pd.DataFrame()
|
67
|
-
# 日期处理
|
68
|
-
start_date = pd.to_datetime(start_date or '1970-01-01').strftime('%Y-%m-%d')
|
69
|
-
end_date = pd.to_datetime(end_date or datetime.datetime.today()).strftime('%Y-%m-%d')
|
70
|
-
|
71
|
-
# 前置检查
|
72
|
-
if not self.check_infos(db_name, table_name):
|
73
|
-
return df
|
74
|
-
|
75
|
-
# 配置数据库连接
|
76
|
-
self.config['database'] = db_name
|
77
|
-
connection = None
|
78
|
-
|
79
|
-
try:
|
80
|
-
connection = pymysql.connect(**self.config)
|
81
|
-
with connection.cursor() as cursor:
|
82
|
-
# 获取表结构(排除id列)
|
83
|
-
cursor.execute(
|
84
|
-
"""SELECT COLUMN_NAME
|
85
|
-
FROM information_schema.columns
|
86
|
-
WHERE table_schema = %s AND table_name = %s""",
|
87
|
-
(db_name, table_name)
|
88
|
-
)
|
89
|
-
cols_exist = {col['COLUMN_NAME'] for col in cursor.fetchall()} - {'id'}
|
90
|
-
|
91
|
-
# 处理列选择
|
92
|
-
selected_columns = []
|
93
|
-
if projection:
|
94
|
-
selected_columns = [k for k, v in projection.items() if v and k in cols_exist]
|
95
|
-
if not selected_columns:
|
96
|
-
logger.info("Warning: Projection 参数不匹配任何数据库字段")
|
97
|
-
return df
|
98
|
-
else:
|
99
|
-
selected_columns = list(cols_exist)
|
100
|
-
# 构建基础SQL
|
101
|
-
quoted_columns = [f'`{col}`' for col in selected_columns]
|
102
|
-
base_sql = f"SELECT {', '.join(quoted_columns)} FROM `{db_name}`.`{table_name}`"
|
103
|
-
|
104
|
-
# 添加日期条件
|
105
|
-
if '日期' in cols_exist:
|
106
|
-
base_sql += f" WHERE 日期 BETWEEN '{start_date}' AND '{end_date}'"
|
107
|
-
|
108
|
-
# 执行查询
|
109
|
-
cursor.execute(base_sql)
|
110
|
-
result = cursor.fetchall()
|
111
|
-
|
112
|
-
# 处理结果集
|
113
|
-
if result:
|
114
|
-
df = pd.DataFrame(result, columns=[desc[0] for desc in cursor.description])
|
115
|
-
# 类型转换优化
|
116
|
-
decimal_cols = [col for col in df.columns if df[col].apply(lambda x: isinstance(x, Decimal)).any()]
|
117
|
-
df[decimal_cols] = df[decimal_cols].astype(float)
|
118
|
-
|
119
|
-
except Exception as e:
|
120
|
-
logger.error(f"Database operation failed: {str(e)}")
|
121
|
-
finally:
|
122
|
-
if connection:
|
123
|
-
connection.close()
|
124
|
-
|
125
|
-
return df
|
126
|
-
|
127
|
-
def columns_to_list(self, db_name, table_name, columns_name) -> list:
|
128
|
-
"""
|
129
|
-
获取数据表的指定列, 返回列表
|
130
|
-
[{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
131
|
-
"""
|
132
|
-
if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
|
133
|
-
return []
|
134
|
-
|
135
|
-
self.config.update({'database': db_name})
|
136
|
-
connection = pymysql.connect(**self.config) # 重新连接数据库
|
137
|
-
with connection.cursor() as cursor:
|
138
|
-
# 3. 获取数据表的所有列信息
|
139
|
-
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
140
|
-
cursor.execute(sql, (db_name, {table_name}))
|
141
|
-
columns = cursor.fetchall()
|
142
|
-
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
143
|
-
columns_name = [item for item in columns_name if item in cols_exist]
|
144
|
-
if len(columns_name) == 0:
|
145
|
-
return []
|
146
|
-
columns_in = ', '.join(columns_name)
|
147
|
-
sql = (f"SELECT {columns_in} FROM {db_name}.{table_name} ")
|
148
|
-
cursor.execute(sql)
|
149
|
-
column_values = cursor.fetchall() # 返回指定列,结果是[dict, dict, dict, ...]
|
150
|
-
# column_values = [item[column_name] for item in column_values] # 提取字典的值, 组成列表
|
151
|
-
connection.close()
|
152
|
-
return column_values
|
153
|
-
|
154
|
-
def dtypes_to_list(self, db_name, table_name) -> list:
|
155
|
-
"""
|
156
|
-
获取数据表的指定列, 返回列表
|
157
|
-
[{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
158
|
-
"""
|
159
|
-
if self.check_infos(db_name, table_name) == False: # 检查传入的数据库和数据表是否存在
|
160
|
-
return []
|
161
|
-
|
162
|
-
self.config.update({'database': db_name})
|
163
|
-
connection = pymysql.connect(**self.config) # 重新连接数据库
|
164
|
-
with connection.cursor() as cursor:
|
165
|
-
# 3. 获取数据表的所有列信息
|
166
|
-
sql = 'SELECT COLUMN_NAME, COLUMN_TYPE FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
167
|
-
cursor.execute(sql, (db_name, {table_name}))
|
168
|
-
column_name_and_type = cursor.fetchall()
|
169
|
-
connection.close()
|
170
|
-
return column_name_and_type
|
171
|
-
|
172
|
-
def check_infos(self, db_name, table_name) -> bool:
|
173
|
-
""" 检查数据库、数据表是否存在 """
|
174
|
-
connection = pymysql.connect(**self.config) # 连接数据库
|
175
|
-
try:
|
176
|
-
with connection.cursor() as cursor:
|
177
|
-
# 1. 检查数据库是否存在
|
178
|
-
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
179
|
-
database_exists = cursor.fetchone()
|
180
|
-
if not database_exists:
|
181
|
-
logger.info(f"Database <{db_name}>: 数据库不存在")
|
182
|
-
return False
|
183
|
-
finally:
|
184
|
-
connection.close() # 这里要断开连接
|
185
|
-
|
186
|
-
self.config.update({'database': db_name}) # 添加更新 config 字段
|
187
|
-
connection = pymysql.connect(**self.config) # 重新连接数据库
|
188
|
-
try:
|
189
|
-
with connection.cursor() as cursor:
|
190
|
-
# 2. 查询表是否存在
|
191
|
-
sql = f"SHOW TABLES LIKE '{table_name}'"
|
192
|
-
cursor.execute(sql)
|
193
|
-
if not cursor.fetchone():
|
194
|
-
logger.info(f'{db_name} -> <{table_name}>: 表不存在')
|
195
|
-
return False
|
196
|
-
return True
|
197
|
-
except Exception as e:
|
198
|
-
logger.error(e)
|
199
|
-
return False
|
200
|
-
finally:
|
201
|
-
connection.close() # 断开连接
|
202
|
-
|
203
|
-
|
204
|
-
if __name__ == '__main__':
|
205
|
-
conf = ConfigTxt()
|
206
|
-
data = conf.config_datas['Windows']['xigua_lx']['mysql']['remoto']
|
207
|
-
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
208
|
-
|
209
|
-
q = QueryDatas(username, password, host, port)
|
210
|
-
res = q.columns_to_list(db_name='视频数据', table_name='bilibili视频', columns_name=['视频bv号', '下载进度'])
|
211
|
-
logger.info(res)
|
mdbq/mysql/year_month_day.py
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
# -*- coding:utf-8 -*-
|
2
|
-
import warnings
|
3
|
-
import pandas as pd
|
4
|
-
import calendar
|
5
|
-
|
6
|
-
warnings.filterwarnings('ignore')
|
7
|
-
|
8
|
-
|
9
|
-
def year_month_day(start_date, end_date):
|
10
|
-
"""
|
11
|
-
使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
|
12
|
-
calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
|
13
|
-
返回值: [{'起始日期': '2025-05-01', '结束日期': '2025-05-31'}, {'起始日期': '2025-06-01', '结束日期': '2025-06-30'}]
|
14
|
-
"""
|
15
|
-
# 替换年月日中的日, 以便即使传入当月日期也有返回值
|
16
|
-
try:
|
17
|
-
start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
|
18
|
-
except Exception as e:
|
19
|
-
print(e)
|
20
|
-
return []
|
21
|
-
# 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
|
22
|
-
date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
|
23
|
-
# 转换格式
|
24
|
-
year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
|
25
|
-
|
26
|
-
results = []
|
27
|
-
for year_month in year_months:
|
28
|
-
year = re.findall(r'(\d{4})', year_month)[0]
|
29
|
-
month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
|
30
|
-
s, d = calendar.monthrange(int(year), int(month))
|
31
|
-
results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
|
32
|
-
|
33
|
-
return results # start_date至end_date之间的所有年月日
|
34
|
-
|
35
|
-
|
36
|
-
if __name__ == '__main__':
|
37
|
-
results = year_month_day(start_date='2025-05-01', end_date='2025-08-01')
|
38
|
-
print(results)
|
mdbq/other/__init__.py
DELETED