mdbq 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/dataframe/__init__.py +4 -0
- mdbq/dataframe/converter.py +57 -0
- mdbq/mongo/mongo.py +11 -53
- mdbq/mysql/mysql.py +3 -37
- {mdbq-0.0.7.dist-info → mdbq-0.0.9.dist-info}/METADATA +1 -1
- {mdbq-0.0.7.dist-info → mdbq-0.0.9.dist-info}/RECORD +8 -9
- mdbq/aggregation/__init__.py +0 -4
- mdbq/aggregation/aggregation.py +0 -1051
- mdbq/aggregation/query_data.py +0 -266
- {mdbq-0.0.7.dist-info → mdbq-0.0.9.dist-info}/WHEEL +0 -0
- {mdbq-0.0.7.dist-info → mdbq-0.0.9.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
DELETED
@@ -1,266 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
from mdbq.mongo import mongo
|
3
|
-
from mdbq.mysql import s_query
|
4
|
-
from mdbq.config import get_myconf
|
5
|
-
import datetime
|
6
|
-
from dateutil.relativedelta import relativedelta
|
7
|
-
import pandas as pd
|
8
|
-
import numpy as np
|
9
|
-
import platform
|
10
|
-
import getpass
|
11
|
-
import json
|
12
|
-
import os
|
13
|
-
|
14
|
-
|
15
|
-
class MongoDatasQuery:
|
16
|
-
"""
|
17
|
-
从 数据库 中下载数据
|
18
|
-
self.output: 数据库默认导出目录
|
19
|
-
self.is_maximize: 是否最大转化数据
|
20
|
-
"""
|
21
|
-
def __init__(self, target_service):
|
22
|
-
# target_service 从哪个服务器下载数据
|
23
|
-
self.months = 0 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
|
24
|
-
# 实例化一个下载类
|
25
|
-
username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mongodb')
|
26
|
-
self.download = mongo.DownMongo(username=username, password=password, host=host, port=port, save_path=None)
|
27
|
-
|
28
|
-
def tg_wxt(self):
|
29
|
-
self.download.start_date, self.download.end_date = self.months_data(num=self.months)
|
30
|
-
projection = {
|
31
|
-
'日期': 1,
|
32
|
-
'场景名字': 1,
|
33
|
-
'主体id': 1,
|
34
|
-
'花费': 1,
|
35
|
-
'展现量': 1,
|
36
|
-
'点击量': 1,
|
37
|
-
'总购物车数': 1,
|
38
|
-
'总成交笔数': 1,
|
39
|
-
'总成交金额': 1,
|
40
|
-
'自然流量曝光量': 1,
|
41
|
-
'直接成交笔数': 1,
|
42
|
-
'直接成交金额': 1,
|
43
|
-
}
|
44
|
-
df = self.download.data_to_df(
|
45
|
-
db_name='天猫数据2',
|
46
|
-
collection_name='推广数据_宝贝主体报表',
|
47
|
-
projection=projection,
|
48
|
-
)
|
49
|
-
return df
|
50
|
-
|
51
|
-
@staticmethod
|
52
|
-
def days_data(days, end_date=None):
|
53
|
-
""" 读取近 days 天的数据 """
|
54
|
-
if not end_date:
|
55
|
-
end_date = datetime.datetime.now()
|
56
|
-
start_date = end_date - datetime.timedelta(days=days)
|
57
|
-
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
58
|
-
|
59
|
-
@staticmethod
|
60
|
-
def months_data(num=0, end_date=None):
|
61
|
-
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
62
|
-
if not end_date:
|
63
|
-
end_date = datetime.datetime.now()
|
64
|
-
start_date = end_date - relativedelta(months=num) # n 月以前的今天
|
65
|
-
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
66
|
-
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
67
|
-
|
68
|
-
|
69
|
-
class MysqlDatasQuery:
|
70
|
-
"""
|
71
|
-
从数据库中下载数据
|
72
|
-
"""
|
73
|
-
def __init__(self, target_service):
|
74
|
-
# target_service 从哪个服务器下载数据
|
75
|
-
self.months = 0 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
|
76
|
-
# 实例化一个下载类
|
77
|
-
username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mysql')
|
78
|
-
self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
79
|
-
|
80
|
-
def tg_wxt(self):
|
81
|
-
start_date, end_date = self.months_data(num=self.months)
|
82
|
-
projection = {
|
83
|
-
'日期': 1,
|
84
|
-
'场景名字': 1,
|
85
|
-
'主体id': 1,
|
86
|
-
'花费': 1,
|
87
|
-
'展现量': 1,
|
88
|
-
'点击量': 1,
|
89
|
-
'总购物车数': 1,
|
90
|
-
'总成交笔数': 1,
|
91
|
-
'总成交金额': 1,
|
92
|
-
'自然流量曝光量': 1,
|
93
|
-
'直接成交笔数': 1,
|
94
|
-
'直接成交金额': 1,
|
95
|
-
}
|
96
|
-
df = self.download.data_to_df(
|
97
|
-
db_name='天猫数据2',
|
98
|
-
tabel_name='推广数据_宝贝主体报表',
|
99
|
-
start_date=start_date,
|
100
|
-
end_date=end_date,
|
101
|
-
projection=projection,
|
102
|
-
)
|
103
|
-
return df
|
104
|
-
|
105
|
-
@staticmethod
|
106
|
-
def months_data(num=0, end_date=None):
|
107
|
-
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
108
|
-
if not end_date:
|
109
|
-
end_date = datetime.datetime.now()
|
110
|
-
start_date = end_date - relativedelta(months=num) # n 月以前的今天
|
111
|
-
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
112
|
-
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
113
|
-
|
114
|
-
|
115
|
-
class GroupBy:
|
116
|
-
""" 数据聚合和导出 """
|
117
|
-
def __init__(self):
|
118
|
-
# self.output: 数据库默认导出目录
|
119
|
-
if platform.system() == 'Darwin':
|
120
|
-
self.output = os.path.join('/Users', getpass.getuser(), '数据中心/数据库导出')
|
121
|
-
elif platform.system() == 'Windows':
|
122
|
-
self.output = os.path.join('C:\\同步空间\\BaiduSyncdisk\\数据库导出')
|
123
|
-
else:
|
124
|
-
self.output = os.path.join('数据中心/数据库导出')
|
125
|
-
|
126
|
-
def groupby(self, df, tabel_name, is_maximize=True):
|
127
|
-
"""
|
128
|
-
self.is_maximize: 是否最大转化数据
|
129
|
-
"""
|
130
|
-
if '宝贝主体报表' in tabel_name:
|
131
|
-
df.rename(columns={
|
132
|
-
'场景名字': '营销场景',
|
133
|
-
'主体id': '商品id',
|
134
|
-
'总购物车数': '加购量',
|
135
|
-
'总成交笔数': '成交笔数',
|
136
|
-
'总成交金额': '成交金额'
|
137
|
-
}, inplace=True)
|
138
|
-
df = df.astype({
|
139
|
-
'花费': float,
|
140
|
-
'展现量': int,
|
141
|
-
'点击量': int,
|
142
|
-
'加购量': int,
|
143
|
-
'成交笔数': int,
|
144
|
-
'成交金额': float,
|
145
|
-
'自然流量曝光量': int,
|
146
|
-
'直接成交笔数': int,
|
147
|
-
'直接成交金额': float,
|
148
|
-
}, errors='raise')
|
149
|
-
df.fillna(0, inplace=True)
|
150
|
-
if is_maximize:
|
151
|
-
df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
|
152
|
-
**{'加购量': ('加购量', np.max),
|
153
|
-
'成交笔数': ('成交笔数', np.max),
|
154
|
-
'成交金额': ('成交金额', np.max),
|
155
|
-
'自然流量曝光量': ('自然流量曝光量', np.max),
|
156
|
-
'直接成交笔数': ('直接成交笔数', np.max),
|
157
|
-
'直接成交金额': ('直接成交金额', np.max)
|
158
|
-
}
|
159
|
-
)
|
160
|
-
else:
|
161
|
-
df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
|
162
|
-
**{'加购量': ('加购量', np.min),
|
163
|
-
'成交笔数': ('成交笔数', np.min),
|
164
|
-
'成交金额': ('成交金额', np.min),
|
165
|
-
'自然流量曝光量': ('自然流量曝光量', np.min),
|
166
|
-
'直接成交笔数': ('直接成交笔数', np.max),
|
167
|
-
'直接成交金额': ('直接成交金额', np.max)
|
168
|
-
}
|
169
|
-
)
|
170
|
-
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
171
|
-
return df
|
172
|
-
|
173
|
-
def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
|
174
|
-
index=False, header=True, st_ascend=None, ascend=None, freq=None):
|
175
|
-
"""
|
176
|
-
path: 默认导出目录 self.output, 这个函数的 path 作为子文件夹,可以不传,
|
177
|
-
st_ascend: 排序参数 ['column1', 'column2']
|
178
|
-
ascend: 升降序 [True, False]
|
179
|
-
freq: 将创建子文件夹并按月分类存储, freq='Y', 或 freq='M'
|
180
|
-
"""
|
181
|
-
if len(df) == 0:
|
182
|
-
return
|
183
|
-
if not path:
|
184
|
-
path = self.output
|
185
|
-
else:
|
186
|
-
path = os.path.join(self.output, path)
|
187
|
-
if not os.path.exists(path):
|
188
|
-
os.makedirs(path)
|
189
|
-
if st_ascend and ascend:
|
190
|
-
try:
|
191
|
-
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
192
|
-
except:
|
193
|
-
print(f'{filename}: sort_values排序参数错误!')
|
194
|
-
if freq:
|
195
|
-
if '日期' not in df.columns.tolist():
|
196
|
-
return print(f'{filename}: 数据缺少日期列,无法按日期分组')
|
197
|
-
groups = df.groupby(pd.Grouper(key='日期', freq=freq))
|
198
|
-
for name1, df in groups:
|
199
|
-
if freq == 'M':
|
200
|
-
sheet_name = name1.strftime('%Y-%m')
|
201
|
-
elif freq == 'Y':
|
202
|
-
sheet_name = name1.strftime('%Y年')
|
203
|
-
else:
|
204
|
-
sheet_name = '_未分类'
|
205
|
-
new_path = os.path.join(path, filename)
|
206
|
-
if not os.path.exists(new_path):
|
207
|
-
os.makedirs(new_path)
|
208
|
-
new_path = os.path.join(new_path, f'{filename}{sheet_name}.csv')
|
209
|
-
if st_ascend and ascend: # 这里需要重新排序一次,原因未知
|
210
|
-
try:
|
211
|
-
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
212
|
-
except:
|
213
|
-
print(f'{filename}: sort_values排序参数错误!')
|
214
|
-
|
215
|
-
df.to_csv(new_path, encoding=encoding, index=index, header=header)
|
216
|
-
else:
|
217
|
-
df.to_csv(os.path.join(path, filename + '.csv'), encoding=encoding, index=index, header=header)
|
218
|
-
|
219
|
-
def as_json(self, df, filename, path=None, orient='records', force_ascii=False, st_ascend=None, ascend=None):
|
220
|
-
if len(df) == 0:
|
221
|
-
return
|
222
|
-
if not path:
|
223
|
-
path = self.output
|
224
|
-
else:
|
225
|
-
path = os.path.join(self.output, path)
|
226
|
-
if not os.path.exists(path):
|
227
|
-
os.makedirs(path)
|
228
|
-
if st_ascend and ascend:
|
229
|
-
try:
|
230
|
-
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
231
|
-
except:
|
232
|
-
print(f'{filename}: sort_values排序参数错误!')
|
233
|
-
df.to_json(os.path.join(path, filename + '.json'),
|
234
|
-
orient=orient, force_ascii=force_ascii)
|
235
|
-
|
236
|
-
def as_excel(self, df, filename, path=None, index=False, header=True, engine='openpyxl',
|
237
|
-
freeze_panes=(1, 0), st_ascend=None, ascend=None):
|
238
|
-
if len(df) == 0:
|
239
|
-
return
|
240
|
-
if not path:
|
241
|
-
path = self.output
|
242
|
-
else:
|
243
|
-
path = os.path.join(self.output, path)
|
244
|
-
if not os.path.exists(path):
|
245
|
-
os.makedirs(path)
|
246
|
-
if st_ascend and ascend:
|
247
|
-
try:
|
248
|
-
df.sort_values(st_ascend, ascending=ascend, ignore_index=True, inplace=True)
|
249
|
-
except:
|
250
|
-
print(f'{filename}: sort_values排序参数错误!')
|
251
|
-
df.to_excel(os.path.join(path, filename + '.xlsx'),
|
252
|
-
index=index, header=header, engine=engine, freeze_panes=freeze_panes)
|
253
|
-
|
254
|
-
|
255
|
-
def main():
|
256
|
-
sdq = MysqlDatasQuery(target_service='home_lx')
|
257
|
-
sdq.months = 0
|
258
|
-
df = sdq.tg_wxt() # 从数据库中获取数据并转为 df
|
259
|
-
|
260
|
-
g = GroupBy() # 数据聚合
|
261
|
-
df = g.groupby(df=df, tabel_name='推广数据_宝贝主体报表', is_maximize=True)
|
262
|
-
g.as_csv(df=df, filename='test') # 数据导出
|
263
|
-
|
264
|
-
|
265
|
-
if __name__ == '__main__':
|
266
|
-
main()
|
File without changes
|
File without changes
|