mdbq 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +3 -0
- mdbq/aggregation/aggregation.py +17 -26
- mdbq/aggregation/query_data.py +307 -0
- mdbq/mysql/s_query.py +165 -0
- mdbq/other/{xigua_porxy.py → porxy.py} +0 -3
- {mdbq-0.0.5.dist-info → mdbq-0.0.6.dist-info}/METADATA +1 -1
- {mdbq-0.0.5.dist-info → mdbq-0.0.6.dist-info}/RECORD +9 -6
- {mdbq-0.0.5.dist-info → mdbq-0.0.6.dist-info}/WHEEL +0 -0
- {mdbq-0.0.5.dist-info → mdbq-0.0.6.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
ADDED
mdbq/aggregation/aggregation.py
CHANGED
@@ -960,7 +960,22 @@ def main():
|
|
960
960
|
# print(d.datas)
|
961
961
|
|
962
962
|
|
963
|
+
def update_dtypte():
|
964
|
+
""" 更新一个文件的 dtype 信息到 json 文件 """
|
965
|
+
file = '/Users/xigua/数据中心/原始文件2/月数据/流量来源/【生意参谋平台】无线店铺流量来源-2023-04-01_2023-04-30.csv'
|
966
|
+
df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
|
967
|
+
d = DataTypes()
|
968
|
+
d.read_dtypes(
|
969
|
+
df=df,
|
970
|
+
db_name='生意参谋数据2',
|
971
|
+
collection_name='店铺来源_月数据',
|
972
|
+
is_file_dtype=False, # 关闭文件优先
|
973
|
+
)
|
974
|
+
d.dtypes_to_file()
|
975
|
+
|
976
|
+
|
963
977
|
def upload():
|
978
|
+
""" 上传一个文件夹到数据库 """
|
964
979
|
path = '/Users/xigua/数据中心/原始文件2/生意参谋/客户_客户概况_画像'
|
965
980
|
db_name = '生意参谋数据2'
|
966
981
|
collection_name = '客户_客户概况_画像'
|
@@ -995,7 +1010,7 @@ def upload():
|
|
995
1010
|
# print(dtypes)
|
996
1011
|
for root, dirs, files in os.walk(path, topdown=False):
|
997
1012
|
for name in files:
|
998
|
-
if '~$' in name or '.DS' in name or '.localized' in name or '
|
1013
|
+
if '~$' in name or '.DS' in name or '.localized' in name or 'baidu' in name:
|
999
1014
|
continue
|
1000
1015
|
if name.endswith('.csv'):
|
1001
1016
|
# print(name)
|
@@ -1005,7 +1020,7 @@ def upload():
|
|
1005
1020
|
continue
|
1006
1021
|
for col in df.columns.tolist():
|
1007
1022
|
df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
|
1008
|
-
df.replace(to_replace=['--'], value='', regex=False, inplace=True)
|
1023
|
+
# df.replace(to_replace=['--'], value='', regex=False, inplace=True)
|
1009
1024
|
df = dt.convert_df_cols(df=df)
|
1010
1025
|
try:
|
1011
1026
|
df = df.astype(dtypes)
|
@@ -1032,29 +1047,5 @@ if __name__ == '__main__':
|
|
1032
1047
|
username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
|
1033
1048
|
print(username, password, host, port)
|
1034
1049
|
|
1035
|
-
d = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1036
|
-
d.upload_df(service_databases=[{'home_lx': 'mongodb'},])
|
1037
|
-
|
1038
1050
|
# main()
|
1039
1051
|
# upload()
|
1040
|
-
# path = '/Users/xigua/数据中心/原始文件2/月数据/流量来源-自助取数-月数据'
|
1041
|
-
# for root, dirs, files in os.walk(path, topdown=False):
|
1042
|
-
# for name in files:
|
1043
|
-
# if name.endswith('.csv') and 'baidu' not in name:
|
1044
|
-
# with open(os.path.join(root, name), 'rb') as f:
|
1045
|
-
# f1 = f.read()
|
1046
|
-
# encod = chardet.detect(f1).get('encoding')
|
1047
|
-
# print(name, encod)
|
1048
|
-
# # df = pd.read_csv(os.path.join(root, name), encoding=encod, header=0, na_filter=False)
|
1049
|
-
# # df.to_csv(os.path.join(root, name), index=False, encoding='utf-8_sig', header=True)
|
1050
|
-
|
1051
|
-
# file = '/Users/xigua/数据中心/原始文件2/月数据/流量来源/【生意参谋平台】无线店铺流量来源-2023-04-01_2023-04-30.csv'
|
1052
|
-
# df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
|
1053
|
-
# d = DataTypes()
|
1054
|
-
# d.read_dtypes(
|
1055
|
-
# df=df,
|
1056
|
-
# db_name='生意参谋数据2',
|
1057
|
-
# collection_name='店铺来源_月数据',
|
1058
|
-
# is_file_dtype=False, # 关闭文件优先
|
1059
|
-
# )
|
1060
|
-
# d.dtypes_to_file()
|
@@ -0,0 +1,307 @@
|
|
1
|
+
# -*- coding: UTF-8 –*-
|
2
|
+
from mdbq.mongo import mongo
|
3
|
+
from mdbq.mysql import s_query
|
4
|
+
from mdbq.config import get_myconf
|
5
|
+
import datetime
|
6
|
+
from dateutil.relativedelta import relativedelta
|
7
|
+
import pandas as pd
|
8
|
+
import numpy as np
|
9
|
+
import platform
|
10
|
+
import getpass
|
11
|
+
import json
|
12
|
+
import os
|
13
|
+
|
14
|
+
|
15
|
+
class MongoDatasQuery:
|
16
|
+
"""
|
17
|
+
从 数据库 中下载数据
|
18
|
+
self.output: 数据库默认导出目录
|
19
|
+
self.is_maximize: 是否最大转化数据
|
20
|
+
"""
|
21
|
+
def __init__(self, target_service):
|
22
|
+
# target_service 从哪个服务器下载数据
|
23
|
+
self.is_maximize = True
|
24
|
+
if platform.system() == 'Darwin':
|
25
|
+
self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
|
26
|
+
elif platform.system() == 'Windows':
|
27
|
+
self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
|
28
|
+
else:
|
29
|
+
self.output = os.path.join('数据中心/数据库导出')
|
30
|
+
|
31
|
+
# 实例化一个下载类
|
32
|
+
username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mongodb')
|
33
|
+
self.download = mongo.DownMongo(username=username, password=password, host=host, port=port, save_path=None)
|
34
|
+
|
35
|
+
def tg_wxt(self):
|
36
|
+
self.download.start_date, self.download.end_date = self.months_data(num=1)
|
37
|
+
projection = {
|
38
|
+
'日期': 1,
|
39
|
+
'场景名字': 1,
|
40
|
+
'主体id': 1,
|
41
|
+
'花费': 1,
|
42
|
+
'展现量': 1,
|
43
|
+
'点击量': 1,
|
44
|
+
'总购物车数': 1,
|
45
|
+
'总成交笔数': 1,
|
46
|
+
'总成交金额': 1,
|
47
|
+
'自然流量曝光量': 1,
|
48
|
+
'直接成交笔数': 1,
|
49
|
+
'直接成交金额': 1,
|
50
|
+
}
|
51
|
+
df = self.download.data_to_df(db_name='天猫数据2', collection_name='推广数据_宝贝主体报表', projection=projection)
|
52
|
+
df.rename(columns={
|
53
|
+
'场景名字': '营销场景',
|
54
|
+
'主体id': '商品id',
|
55
|
+
'总购物车数': '加购量',
|
56
|
+
'总成交笔数': '成交笔数',
|
57
|
+
'总成交金额': '成交金额'
|
58
|
+
}, inplace=True)
|
59
|
+
df = df.astype({
|
60
|
+
'花费': float,
|
61
|
+
'展现量': int,
|
62
|
+
'点击量': int,
|
63
|
+
'加购量': int,
|
64
|
+
'成交笔数': int,
|
65
|
+
'成交金额': float,
|
66
|
+
'自然流量曝光量': int,
|
67
|
+
'直接成交笔数': int,
|
68
|
+
'直接成交金额': float,
|
69
|
+
}, errors='raise')
|
70
|
+
df.fillna(0, inplace=True)
|
71
|
+
if self.is_maximize:
|
72
|
+
df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
|
73
|
+
**{'加购量': ('加购量', np.max),
|
74
|
+
'成交笔数': ('成交笔数', np.max),
|
75
|
+
'成交金额': ('成交金额', np.max),
|
76
|
+
'自然流量曝光量': ('自然流量曝光量', np.max),
|
77
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
78
|
+
'直接成交金额': ('直接成交金额', np.max)
|
79
|
+
}
|
80
|
+
)
|
81
|
+
else:
|
82
|
+
df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
|
83
|
+
**{'加购量': ('加购量', np.min),
|
84
|
+
'成交笔数': ('成交笔数', np.min),
|
85
|
+
'成交金额': ('成交金额', np.min),
|
86
|
+
'自然流量曝光量': ('自然流量曝光量', np.min),
|
87
|
+
'直接成交笔数': ('直接成交笔数', np.max),
|
88
|
+
'直接成交金额': ('直接成交金额', np.max)
|
89
|
+
}
|
90
|
+
)
|
91
|
+
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
92
|
+
# print(df)
|
93
|
+
return df
|
94
|
+
|
95
|
+
@staticmethod
|
96
|
+
def days_data(days, end_date=None):
|
97
|
+
""" 读取近 days 天的数据 """
|
98
|
+
if not end_date:
|
99
|
+
end_date = datetime.datetime.now()
|
100
|
+
start_date = end_date - datetime.timedelta(days=days)
|
101
|
+
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
102
|
+
|
103
|
+
@staticmethod
|
104
|
+
def months_data(num=0, end_date=None):
|
105
|
+
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
106
|
+
if not end_date:
|
107
|
+
end_date = datetime.datetime.now()
|
108
|
+
start_date = end_date - relativedelta(months=num) # n 月以前的今天
|
109
|
+
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
110
|
+
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
111
|
+
|
112
|
+
def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
|
113
|
+
index=False, header=True, st_ascend=None, ascend=None, freq=None):
|
114
|
+
"""
|
115
|
+
path: 子文件夹,可以不传,默认导出目录 self.output
|
116
|
+
st_ascend: 排序参数
|
117
|
+
ascend: 升降序
|
118
|
+
freq: 将创建子文件夹并按月分类存储, freq='Y',或 freq='M'
|
119
|
+
"""
|
120
|
+
if not path:
|
121
|
+
path = self.output
|
122
|
+
else:
|
123
|
+
path = os.path.join(self.output, path)
|
124
|
+
if not os.path.exists(path):
|
125
|
+
os.makedirs(path)
|
126
|
+
if st_ascend and ascend:
|
127
|
+
try:
|
128
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
129
|
+
except:
|
130
|
+
print(f'{filename}: sort_values排序参数错误!')
|
131
|
+
if freq:
|
132
|
+
if '日期' not in df.columns.tolist():
|
133
|
+
return print(f'{filename}: 数据缺少日期列,无法按日期分组')
|
134
|
+
groups = df.groupby(pd.Grouper(key='日期', freq=freq))
|
135
|
+
for name1, df in groups:
|
136
|
+
if freq == 'M':
|
137
|
+
sheet_name = name1.strftime('%Y-%m')
|
138
|
+
elif freq == 'Y':
|
139
|
+
sheet_name = name1.strftime('%Y年')
|
140
|
+
else:
|
141
|
+
sheet_name = '_未分类'
|
142
|
+
new_path = os.path.join(path, filename)
|
143
|
+
if not os.path.exists(new_path):
|
144
|
+
os.makedirs(new_path)
|
145
|
+
new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
|
146
|
+
if st_ascend and ascend: # 这里需要重新排序一次,原因未知
|
147
|
+
try:
|
148
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
149
|
+
except:
|
150
|
+
print(f'{filename}: sort_values排序参数错误!')
|
151
|
+
|
152
|
+
df.to_csv(new_path, encoding=encoding, index=index, header=header)
|
153
|
+
else:
|
154
|
+
df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
|
155
|
+
|
156
|
+
def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
|
157
|
+
if not path:
|
158
|
+
path = self.output
|
159
|
+
else:
|
160
|
+
path = os.path.join(self.output, path)
|
161
|
+
if st_ascend and ascend:
|
162
|
+
try:
|
163
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
164
|
+
except:
|
165
|
+
print(f'{filename}: sort_values排序参数错误!')
|
166
|
+
df.to_json(os.path.join(path, filename + '.json'),
|
167
|
+
orient=orient, force_ascii=force_ascii)
|
168
|
+
|
169
|
+
def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
|
170
|
+
freeze_panes=(1, 0), st_ascend=None, ascend=None):
|
171
|
+
if not path:
|
172
|
+
path = self.output
|
173
|
+
else:
|
174
|
+
path = os.path.join(self.output, path)
|
175
|
+
if st_ascend and ascend:
|
176
|
+
try:
|
177
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
178
|
+
except:
|
179
|
+
print(f'{filename}: sort_values排序参数错误!')
|
180
|
+
df.to_excel(os.path.join(path, filename + '.xlsx'),
|
181
|
+
index=index, header=header, engine=engine, freeze_panes=freeze_panes)
|
182
|
+
|
183
|
+
|
184
|
+
class MysqlDatasQuery:
|
185
|
+
"""
|
186
|
+
从 数据库 中下载数据
|
187
|
+
self.output: 数据库默认导出目录
|
188
|
+
self.is_maximize: 是否最大转化数据
|
189
|
+
"""
|
190
|
+
def __init__(self, target_service):
|
191
|
+
# target_service 从哪个服务器下载数据
|
192
|
+
self.is_maximize = True
|
193
|
+
if platform.system() == 'Darwin':
|
194
|
+
self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
|
195
|
+
elif platform.system() == 'Windows':
|
196
|
+
self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
|
197
|
+
else:
|
198
|
+
self.output = os.path.join('数据中心/数据库导出')
|
199
|
+
self.months = 1 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
|
200
|
+
|
201
|
+
# 实例化一个下载类
|
202
|
+
username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mysql')
|
203
|
+
self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
204
|
+
|
205
|
+
def tg_wxt(self):
|
206
|
+
start_date, end_date = self.months_data(num=self.months)
|
207
|
+
df = self.download.data_to_df(db_name='天猫数据2', tabel_name='推广数据_宝贝主体报表', start_date=start_date, end_date=end_date)
|
208
|
+
return df
|
209
|
+
|
210
|
+
@staticmethod
|
211
|
+
def days_data(days, end_date=None):
|
212
|
+
""" 读取近 days 天的数据 """
|
213
|
+
if not end_date:
|
214
|
+
end_date = datetime.datetime.now()
|
215
|
+
start_date = end_date - datetime.timedelta(days=days)
|
216
|
+
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
217
|
+
|
218
|
+
@staticmethod
|
219
|
+
def months_data(num=0, end_date=None):
|
220
|
+
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
221
|
+
if not end_date:
|
222
|
+
end_date = datetime.datetime.now()
|
223
|
+
start_date = end_date - relativedelta(months=num) # n 月以前的今天
|
224
|
+
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
225
|
+
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
226
|
+
|
227
|
+
def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
|
228
|
+
index=False, header=True, st_ascend=None, ascend=None, freq=None):
|
229
|
+
"""
|
230
|
+
path: 子文件夹,可以不传,默认导出目录 self.output
|
231
|
+
st_ascend: 排序参数
|
232
|
+
ascend: 升降序
|
233
|
+
freq: 将创建子文件夹并按月分类存储, freq='Y',或 freq='M'
|
234
|
+
"""
|
235
|
+
if not path:
|
236
|
+
path = self.output
|
237
|
+
else:
|
238
|
+
path = os.path.join(self.output, path)
|
239
|
+
if not os.path.exists(path):
|
240
|
+
os.makedirs(path)
|
241
|
+
if st_ascend and ascend:
|
242
|
+
try:
|
243
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
244
|
+
except:
|
245
|
+
print(f'{filename}: sort_values排序参数错误!')
|
246
|
+
if freq:
|
247
|
+
if '日期' not in df.columns.tolist():
|
248
|
+
return print(f'{filename}: 数据缺少日期列,无法按日期分组')
|
249
|
+
groups = df.groupby(pd.Grouper(key='日期', freq=freq))
|
250
|
+
for name1, df in groups:
|
251
|
+
if freq == 'M':
|
252
|
+
sheet_name = name1.strftime('%Y-%m')
|
253
|
+
elif freq == 'Y':
|
254
|
+
sheet_name = name1.strftime('%Y年')
|
255
|
+
else:
|
256
|
+
sheet_name = '_未分类'
|
257
|
+
new_path = os.path.join(path, filename)
|
258
|
+
if not os.path.exists(new_path):
|
259
|
+
os.makedirs(new_path)
|
260
|
+
new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
|
261
|
+
if st_ascend and ascend: # 这里需要重新排序一次,原因未知
|
262
|
+
try:
|
263
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
264
|
+
except:
|
265
|
+
print(f'{filename}: sort_values排序参数错误!')
|
266
|
+
|
267
|
+
df.to_csv(new_path, encoding=encoding, index=index, header=header)
|
268
|
+
else:
|
269
|
+
df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
|
270
|
+
|
271
|
+
def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
|
272
|
+
if not path:
|
273
|
+
path = self.output
|
274
|
+
else:
|
275
|
+
path = os.path.join(self.output, path)
|
276
|
+
if st_ascend and ascend:
|
277
|
+
try:
|
278
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
279
|
+
except:
|
280
|
+
print(f'{filename}: sort_values排序参数错误!')
|
281
|
+
df.to_json(os.path.join(path, filename + '.json'),
|
282
|
+
orient=orient, force_ascii=force_ascii)
|
283
|
+
|
284
|
+
def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
|
285
|
+
freeze_panes=(1, 0), st_ascend=None, ascend=None):
|
286
|
+
if not path:
|
287
|
+
path = self.output
|
288
|
+
else:
|
289
|
+
path = os.path.join(self.output, path)
|
290
|
+
if st_ascend and ascend:
|
291
|
+
try:
|
292
|
+
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
293
|
+
except:
|
294
|
+
print(f'{filename}: sort_values排序参数错误!')
|
295
|
+
df.to_excel(os.path.join(path, filename + '.xlsx'),
|
296
|
+
index=index, header=header, engine=engine, freeze_panes=freeze_panes)
|
297
|
+
|
298
|
+
|
299
|
+
def main():
|
300
|
+
sdq = MysqlDatasQuery(target_service='company')
|
301
|
+
sdq.months = 0
|
302
|
+
df = sdq.tg_wxt()
|
303
|
+
print(df)
|
304
|
+
|
305
|
+
|
306
|
+
if __name__ == '__main__':
|
307
|
+
main()
|
mdbq/mysql/s_query.py
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
# -*- coding:utf-8 -*-
|
2
|
+
import datetime
|
3
|
+
import platform
|
4
|
+
import re
|
5
|
+
import time
|
6
|
+
from functools import wraps
|
7
|
+
import warnings
|
8
|
+
import pymysql
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
from sqlalchemy import create_engine
|
12
|
+
import os
|
13
|
+
import calendar
|
14
|
+
from mdbq.config import get_myconf
|
15
|
+
|
16
|
+
warnings.filterwarnings('ignore')
|
17
|
+
|
18
|
+
|
19
|
+
class QueryDatas:
|
20
|
+
def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4'):
|
21
|
+
self.username = username
|
22
|
+
self.password = password
|
23
|
+
self.host = host
|
24
|
+
self.port = port
|
25
|
+
self.config = {
|
26
|
+
'host': self.host,
|
27
|
+
'port': self.port,
|
28
|
+
'user': self.username,
|
29
|
+
'password': self.password,
|
30
|
+
'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
|
31
|
+
'cursorclass': pymysql.cursors.DictCursor,
|
32
|
+
}
|
33
|
+
|
34
|
+
def data_to_df(self, db_name, tabel_name, start_date, end_date, projection=[]):
|
35
|
+
start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
|
36
|
+
end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
|
37
|
+
df = pd.DataFrame()
|
38
|
+
|
39
|
+
connection = pymysql.connect(**self.config) # 连接数据库
|
40
|
+
try:
|
41
|
+
with connection.cursor() as cursor:
|
42
|
+
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
43
|
+
database_exists = cursor.fetchone()
|
44
|
+
if not database_exists:
|
45
|
+
print(f"Database <{db_name}>: 数据库不存在")
|
46
|
+
finally:
|
47
|
+
connection.close() # 这里要断开连接
|
48
|
+
time.sleep(0.2)
|
49
|
+
|
50
|
+
self.config.update({'database': db_name}) # 添加更新 config 字段
|
51
|
+
connection = pymysql.connect(**self.config) # 重新连接数据库
|
52
|
+
try:
|
53
|
+
with connection.cursor() as cursor:
|
54
|
+
# 1. 查询表是否存在
|
55
|
+
sql = f"SHOW TABLES LIKE '{tabel_name}'"
|
56
|
+
cursor.execute(sql)
|
57
|
+
if not cursor.fetchone():
|
58
|
+
print(f'{db_name} -> <{tabel_name}>: 表不存在')
|
59
|
+
return df
|
60
|
+
|
61
|
+
# 查询列
|
62
|
+
for col in projection:
|
63
|
+
sql = ('SELECT 1 FROM information_schema.columns WHERE table_schema = %s AND table_name = %s AND '
|
64
|
+
'column_name = %s')
|
65
|
+
cursor.execute(sql, (db_name, {tabel_name}, col))
|
66
|
+
if cursor.fetchone() is None: # 移除不存在的列
|
67
|
+
projection.remove(col)
|
68
|
+
except Exception as e:
|
69
|
+
print(e)
|
70
|
+
return df
|
71
|
+
finally:
|
72
|
+
connection.close() # 断开连接
|
73
|
+
|
74
|
+
# before_time = time.time()
|
75
|
+
# 读取数据
|
76
|
+
self.config.update({'database': db_name})
|
77
|
+
connection = pymysql.connect(**self.config) # 重新连接数据库
|
78
|
+
try:
|
79
|
+
with connection.cursor() as cursor:
|
80
|
+
if not projection: # 如果未指定,则查询所有列,获取 cols_exist
|
81
|
+
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
82
|
+
cursor.execute(sql, (db_name, {tabel_name}))
|
83
|
+
columns = cursor.fetchall()
|
84
|
+
cols_exist = [col['COLUMN_NAME'] for col in columns]
|
85
|
+
|
86
|
+
if '日期' in projection or '日期' in cols_exist: # 指定含日期的 projection 或者未指定 projection 但表中有日期列
|
87
|
+
sql = f"SELECT * FROM {db_name}.{tabel_name} WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
|
88
|
+
elif projection: # 指定未含日期的 projection
|
89
|
+
sql = f"SELECT '%s' FROM {db_name}.{tabel_name}" % (', '.join(projection))
|
90
|
+
else: # 未指定 projection 且表中无日期
|
91
|
+
sql = f"SELECT * FROM {db_name}.{tabel_name}"
|
92
|
+
cursor.execute(sql)
|
93
|
+
rows = cursor.fetchall() # 获取查询结果
|
94
|
+
columns = [desc[0] for desc in cursor.description]
|
95
|
+
df = pd.DataFrame(rows, columns=columns)
|
96
|
+
except Exception as e:
|
97
|
+
print(f'{e} {db_name} -> <{tabel_name}>: 表不存在')
|
98
|
+
return df
|
99
|
+
finally:
|
100
|
+
connection.close()
|
101
|
+
|
102
|
+
if len(df) == 0:
|
103
|
+
print(f'database: {db_name}, table: {tabel_name} 查询的数据为空')
|
104
|
+
# else:
|
105
|
+
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
106
|
+
# cost_time = int(time.time() - before_time)
|
107
|
+
# if cost_time < 1:
|
108
|
+
# cost_time = round(time.time() - before_time, 2)
|
109
|
+
# print(f'{now}mysql ({self.host}) 表: {tabel_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
|
110
|
+
return df
|
111
|
+
|
112
|
+
|
113
|
+
def year_month_day(start_date, end_date):
|
114
|
+
"""
|
115
|
+
使用date_range函数和DataFrame来获取从start_date至end_date之间的所有年月日
|
116
|
+
calendar.monthrange: 获取当月第一个工作日的星期值(0,6) 以及当月天数
|
117
|
+
"""
|
118
|
+
# 替换年月日中的日, 以便即使传入当月日期也有返回值
|
119
|
+
try:
|
120
|
+
start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
|
121
|
+
except Exception as e:
|
122
|
+
print(e)
|
123
|
+
return []
|
124
|
+
# 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
|
125
|
+
date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
|
126
|
+
# 转换格式
|
127
|
+
year_months = date_range.strftime('%Y-%m').drop_duplicates().sort_values()
|
128
|
+
|
129
|
+
results = []
|
130
|
+
for year_month in year_months:
|
131
|
+
year = re.findall(r'(\d{4})', year_month)[0]
|
132
|
+
month = re.findall(r'\d{4}-(\d{2})', year_month)[0]
|
133
|
+
s, d = calendar.monthrange(int(year), int(month))
|
134
|
+
results.append({'起始日期': f'{year_month}-01', '结束日期': f'{year_month}-{d}'})
|
135
|
+
|
136
|
+
return results # start_date至end_date之间的所有年月日
|
137
|
+
|
138
|
+
|
139
|
+
def download_datas(tabel_name, save_path, start_date):
|
140
|
+
username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
141
|
+
print(username, password, host, port)
|
142
|
+
m = MysqlUpload(username=username, password=password, host=host, port=port)
|
143
|
+
m.port = port
|
144
|
+
results = year_month_day(start_date=start_date, end_date='today')
|
145
|
+
# print(results)
|
146
|
+
for result in results:
|
147
|
+
start_date = result['起始日期']
|
148
|
+
end_date = result['结束日期']
|
149
|
+
# print(start_date, end_date)
|
150
|
+
df = m.data_to_df(db_name='市场数据2', tabel_name=tabel_name, start_date=start_date, end_date=end_date)
|
151
|
+
if len(df) == 0:
|
152
|
+
continue
|
153
|
+
path = os.path.join(save_path, f'{tabel_name}_{str(start_date)}_{str(end_date)}.csv')
|
154
|
+
df['日期'] = df['日期'].apply(lambda x: re.sub(' .*', '', str(x)))
|
155
|
+
df.to_csv(path, index=False, encoding='utf-8_sig', header=True)
|
156
|
+
|
157
|
+
|
158
|
+
if __name__ == '__main__':
|
159
|
+
# username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
160
|
+
# print(username, password, host, port)
|
161
|
+
|
162
|
+
username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
163
|
+
qd = QueryDatas(username=username, password=password, host=host, port=port)
|
164
|
+
df = qd.data_to_df(db_name='市场数据2', tabel_name='市场排行_店铺', start_date='2024-08-13', end_date='2024-08-31')
|
165
|
+
print(df)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
+
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
2
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
3
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=RHQa2rs4fimRvJzluujErg6I8fn7s9q1-kwC2bPZohE,59439
|
5
|
+
mdbq/aggregation/query_data.py,sha256=avbMc36kCuyTjLOXMzIEIKSc0x227c1t-Ydf0vdkViM,13756
|
4
6
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
5
7
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
6
8
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -16,15 +18,16 @@ mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
|
16
18
|
mdbq/mongo/mongo.py,sha256=hF93-kP2lxK4WY1KCdBBszLQ_I7W0mQQxZ7t4qU2w3A,32930
|
17
19
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
18
20
|
mdbq/mysql/mysql.py,sha256=H9onFYKSYRjdXghK_29Aj7vgvUgDHexJjIECrdxLbE0,29925
|
21
|
+
mdbq/mysql/s_query.py,sha256=P0QNwJL3ytyN75c8Qny1xfxrOUI4ks-FuRghNsyMWic,7409
|
19
22
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
23
|
+
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
20
24
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
21
25
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
22
|
-
mdbq/other/xigua_porxy.py,sha256=zTOxsdkdDAyGfHWPUm_7WIztjrGExONAwvPzTaC7Rho,5007
|
23
26
|
mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
24
27
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
25
28
|
mdbq/pbix/refresh_all.py,sha256=wulHs4rivf4Mi0Pii2QR5Nk9-TBcvSwnCB_WH9QULKE,5939
|
26
29
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
|
-
mdbq-0.0.
|
28
|
-
mdbq-0.0.
|
29
|
-
mdbq-0.0.
|
30
|
-
mdbq-0.0.
|
30
|
+
mdbq-0.0.6.dist-info/METADATA,sha256=G_RkhGDJMmXpNdrECY4LBT0f6HF9sHpfU-LYxNUzWPk,245
|
31
|
+
mdbq-0.0.6.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
|
32
|
+
mdbq-0.0.6.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
33
|
+
mdbq-0.0.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|