mdbq 3.7.14__py3-none-any.whl → 3.7.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/optimize_data.py +11 -1
- mdbq/aggregation/query_data.py +13 -1
- mdbq/config/default.py +0 -8
- mdbq/config/products.py +12 -1
- mdbq/log/spider_logging.py +0 -9
- mdbq/other/download_sku_picture.py +13 -1
- mdbq/redis/getredis.py +12 -1
- mdbq/spider/aikucun.py +12 -1
- {mdbq-3.7.14.dist-info → mdbq-3.7.16.dist-info}/METADATA +1 -1
- {mdbq-3.7.14.dist-info → mdbq-3.7.16.dist-info}/RECORD +12 -13
- {mdbq-3.7.14.dist-info → mdbq-3.7.16.dist-info}/WHEEL +1 -1
- mdbq/aggregation/datashow_bak.py +0 -1264
- {mdbq-3.7.14.dist-info → mdbq-3.7.16.dist-info}/top_level.txt +0 -0
mdbq/aggregation/datashow_bak.py
DELETED
@@ -1,1264 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import decimal
|
3
|
-
import os
|
4
|
-
import re
|
5
|
-
import socket
|
6
|
-
import platform
|
7
|
-
import getpass
|
8
|
-
import datetime
|
9
|
-
import time
|
10
|
-
from mdbq.config import myconfig
|
11
|
-
from mdbq.mysql import mysql
|
12
|
-
from mdbq.mysql import s_query
|
13
|
-
from mdbq.other import ua_sj
|
14
|
-
import pandas as pd
|
15
|
-
import numpy as np
|
16
|
-
import plotly.express as px
|
17
|
-
import plotly.graph_objects as go
|
18
|
-
from plotly.subplots import make_subplots
|
19
|
-
import tkinter as tk
|
20
|
-
import requests
|
21
|
-
from io import BytesIO
|
22
|
-
from PIL import Image
|
23
|
-
import base64
|
24
|
-
import matplotlib.pyplot as plt
|
25
|
-
from matplotlib import rcParams
|
26
|
-
|
27
|
-
from sqlalchemy.sql.functions import count
|
28
|
-
|
29
|
-
if platform.system() == 'Windows':
|
30
|
-
ip_address = '192.168.1.117'
|
31
|
-
rcParams['font.sans-serif'] = ['SimHei'] # matplotlibrc 防止中文乱码
|
32
|
-
D_PATH = os.path.join(f'C:\\Users\\{getpass.getuser()}\\Downloads')
|
33
|
-
elif platform.system() == 'Linux':
|
34
|
-
ip_address = '127.0.0.1'
|
35
|
-
rcParams['font.sans-serif'] = ['Arial Unicode MS'] # matplotlibrc 防止中文乱码
|
36
|
-
D_PATH = 'Downloads'
|
37
|
-
if not os.path.exists(D_PATH):
|
38
|
-
os.makedirs(D_PATH)
|
39
|
-
else:
|
40
|
-
ip_address = '127.0.0.1'
|
41
|
-
rcParams['font.sans-serif'] = ['Arial Unicode MS'] # matplotlibrc 防止中文乱码
|
42
|
-
D_PATH = os.path.join(f'/Users/{getpass.getuser()}/Downloads')
|
43
|
-
|
44
|
-
PORT = 5050
|
45
|
-
DIRECTORY = os.path.join(D_PATH, 'http_server')
|
46
|
-
|
47
|
-
rcParams['axes.unicode_minus'] = False # 防止负号'-'被当作减号处理
|
48
|
-
m_engine = mysql.MysqlUpload(username='', password='', host='', port=0, charset='utf8mb4')
|
49
|
-
company_engine = mysql.MysqlUpload(username='', password='', host='', port=0, charset='utf8mb4')
|
50
|
-
|
51
|
-
if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
|
52
|
-
conf = myconfig.main()
|
53
|
-
conf_data = conf['Windows']['xigua_lx']['mysql']['remoto']
|
54
|
-
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
55
|
-
m_engine = mysql.MysqlUpload(
|
56
|
-
username=username,
|
57
|
-
password=password,
|
58
|
-
host=host,
|
59
|
-
port=port,
|
60
|
-
charset='utf8mb4'
|
61
|
-
)
|
62
|
-
conf_data = conf['Windows']['company']['mysql']['local']
|
63
|
-
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
64
|
-
company_engine = mysql.MysqlUpload(
|
65
|
-
username=username,
|
66
|
-
password=password,
|
67
|
-
host=host,
|
68
|
-
port=port,
|
69
|
-
charset='utf8mb4'
|
70
|
-
)
|
71
|
-
targe_host = 'company'
|
72
|
-
|
73
|
-
else:
|
74
|
-
conf = myconfig.main()
|
75
|
-
|
76
|
-
conf_data = conf['Windows']['company']['mysql']['remoto']
|
77
|
-
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
78
|
-
company_engine = mysql.MysqlUpload(
|
79
|
-
username=username,
|
80
|
-
password=password,
|
81
|
-
host=host,
|
82
|
-
port=port,
|
83
|
-
charset='utf8mb4'
|
84
|
-
)
|
85
|
-
|
86
|
-
conf_data = conf['Windows']['xigua_lx']['mysql']['local']
|
87
|
-
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
88
|
-
m_engine = mysql.MysqlUpload(
|
89
|
-
username=username,
|
90
|
-
password=password,
|
91
|
-
host=host,
|
92
|
-
port=port,
|
93
|
-
charset='utf8mb4'
|
94
|
-
)
|
95
|
-
targe_host = 'xigua_lx'
|
96
|
-
|
97
|
-
|
98
|
-
# def getdata():
|
99
|
-
# download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
100
|
-
# start_date, end_date = '2024-01-01', '2024-12-20'
|
101
|
-
# projection = {
|
102
|
-
# '日期': 1,
|
103
|
-
# '三级来源': 1,
|
104
|
-
# '访客数': 1,
|
105
|
-
# }
|
106
|
-
# __res = []
|
107
|
-
# for year in range(2024, datetime.datetime.today().year + 1):
|
108
|
-
# df = download.data_to_df(
|
109
|
-
# db_name='聚合数据',
|
110
|
-
# table_name=f'店铺流量来源构成',
|
111
|
-
# start_date=start_date,
|
112
|
-
# end_date=end_date,
|
113
|
-
# projection=projection,
|
114
|
-
# )
|
115
|
-
# __res.append(df)
|
116
|
-
# df = pd.concat(__res, ignore_index=True)
|
117
|
-
# return df
|
118
|
-
|
119
|
-
|
120
|
-
class DataShow:
|
121
|
-
def __init__(self):
|
122
|
-
self.path = os.path.join(D_PATH, 'http_server')
|
123
|
-
if not os.path.isdir(self.path):
|
124
|
-
os.makedirs(self.path)
|
125
|
-
root = tk.Tk()
|
126
|
-
self.screen_width = root.winfo_screenwidth()
|
127
|
-
self.screen_height = root.winfo_screenheight()
|
128
|
-
root.destroy()
|
129
|
-
self.today = datetime.date.today()
|
130
|
-
self.start_date = (self.today - datetime.timedelta(days=15)).strftime('%Y-%m-%d')
|
131
|
-
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
132
|
-
|
133
|
-
def getdata(self, db_name, table_name, pro_list, start_date=None, end_date=None):
|
134
|
-
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
135
|
-
if not start_date:
|
136
|
-
start_date = '2000-01-01' # 从数据库提取数据,不能是 self.start_date
|
137
|
-
if not end_date:
|
138
|
-
end_date = self.today.strftime('%Y-%m-%d')
|
139
|
-
projection = {}
|
140
|
-
[projection.update({k: 1}) for k in pro_list]
|
141
|
-
__res = []
|
142
|
-
for year in range(2024, datetime.datetime.today().year + 1):
|
143
|
-
df = download.data_to_df(
|
144
|
-
db_name=db_name,
|
145
|
-
table_name=table_name,
|
146
|
-
start_date=start_date,
|
147
|
-
end_date=end_date,
|
148
|
-
projection=projection,
|
149
|
-
)
|
150
|
-
__res.append(df)
|
151
|
-
df = pd.concat(__res, ignore_index=True)
|
152
|
-
return df
|
153
|
-
|
154
|
-
def pov_city(self, db_name='生意经3', filename='销售地域分布', start_date=None, end_date=None, percent=None):
|
155
|
-
"""
|
156
|
-
生意经 省份城市销售分析
|
157
|
-
"""
|
158
|
-
if not start_date:
|
159
|
-
start_date = self.start_date
|
160
|
-
if not end_date:
|
161
|
-
end_date = self.today.strftime('%Y-%m-%d')
|
162
|
-
pov_set = self.getdata(
|
163
|
-
db_name='属性设置3',
|
164
|
-
table_name=f'城市等级',
|
165
|
-
pro_list=[],
|
166
|
-
start_date=start_date,
|
167
|
-
end_date=end_date
|
168
|
-
)
|
169
|
-
# print(pov_set)
|
170
|
-
# 城市
|
171
|
-
pro_list = ['日期', '店铺名称', '城市', '销售额', '退款额']
|
172
|
-
year = datetime.datetime.today().year
|
173
|
-
df_city = self.getdata(
|
174
|
-
db_name=db_name,
|
175
|
-
table_name=f'地域分析_城市_{year}',
|
176
|
-
pro_list=pro_list,
|
177
|
-
start_date=start_date,
|
178
|
-
end_date=end_date
|
179
|
-
)
|
180
|
-
df_city = df_city[df_city['店铺名称'] == '万里马官方旗舰店']
|
181
|
-
df_city = df_city.groupby(['店铺名称', '城市'], as_index=False).agg(
|
182
|
-
**{'销售额': ('销售额', np.sum), '退款额': ('退款额', np.sum)})
|
183
|
-
df_city = df_city[df_city['销售额'] > 0]
|
184
|
-
|
185
|
-
# 省份
|
186
|
-
pro_list = ['日期', '店铺名称', '省份', '销售额', '退款额']
|
187
|
-
year = datetime.datetime.today().year
|
188
|
-
df_pov = self.getdata(
|
189
|
-
db_name=db_name,
|
190
|
-
table_name=f'地域分析_省份_{year}',
|
191
|
-
pro_list=pro_list,
|
192
|
-
start_date=start_date,
|
193
|
-
end_date=end_date
|
194
|
-
)
|
195
|
-
df_pov = df_pov[df_pov['店铺名称'] == '万里马官方旗舰店']
|
196
|
-
# print(df_pov[df_pov['省份'] == '广东'])
|
197
|
-
df_pov = df_pov.groupby(['店铺名称', '省份'], as_index=False).agg(
|
198
|
-
**{'销售额': ('销售额', np.sum), '退款额': ('退款额', np.sum)})
|
199
|
-
df_pov.drop_duplicates(subset='省份', keep='last', inplace=True, ignore_index=True)
|
200
|
-
|
201
|
-
# df_pov2: gmv 的饼图
|
202
|
-
df_pov['gmv销售'] = df_pov.apply(lambda x: x['销售额'] + x['退款额'], axis=1)
|
203
|
-
df_pov.sort_values(['gmv销售'], ascending=[False], ignore_index=True, inplace=True)
|
204
|
-
df_pov2 = df_pov.copy()
|
205
|
-
sales_sum = df_pov2['gmv销售'].sum()
|
206
|
-
df_pov2['省份'] = df_pov2.apply(lambda x: '其他' if (x['gmv销售'] / sales_sum) < percent else x['省份'], axis=1)
|
207
|
-
|
208
|
-
# df_pov3: 销售额的饼图
|
209
|
-
df_pov.sort_values(['销售额'], ascending=[False], ignore_index=True, inplace=True)
|
210
|
-
df_pov3 = df_pov.copy()
|
211
|
-
sales_sum = df_pov3['销售额'].sum()
|
212
|
-
df_pov3['省份'] = df_pov3.apply(lambda x: '其他' if (x['销售额'] / sales_sum) < 0.016 else x['省份'], axis=1)
|
213
|
-
|
214
|
-
# df_pov1: 省份 销售额 堆叠柱形图
|
215
|
-
df_pov1 = df_pov.copy()
|
216
|
-
df_pov1 = df_pov1.head(15)
|
217
|
-
pov_sales_sum = df_pov1['销售额'].tolist()
|
218
|
-
pov_refunds = df_pov1['退款额'].tolist()
|
219
|
-
percentages = df_pov1['gmv销售'] / df_pov1['gmv销售'].sum() * 100
|
220
|
-
bar_list = [('省份销售/退款', df_pov1['省份'].tolist(), pov_sales_sum, percentages, pov_refunds)]
|
221
|
-
|
222
|
-
# 将城市等级添加到df
|
223
|
-
pov_set = pov_set[['城市等级', '城市']]
|
224
|
-
pov_set.drop_duplicates(subset='城市', keep='last', inplace=True, ignore_index=True)
|
225
|
-
df_city = pd.merge(df_city, pov_set, left_on=['城市'], right_on=['城市'], how='left')
|
226
|
-
df_level = df_city.groupby(['店铺名称', '城市等级'], as_index=False).agg(
|
227
|
-
**{'销售额': ('销售额', np.sum), '退款额': ('退款额', np.sum)})
|
228
|
-
pie_list = [
|
229
|
-
('按城市等级', df_level['城市等级'].tolist(), df_level['销售额'].tolist()),
|
230
|
-
('净销售 top省份', df_pov3['省份'].tolist(), df_pov3['销售额'].tolist()),
|
231
|
-
('GMV top省份', df_pov2['省份'].tolist(), df_pov2['gmv销售'].tolist())
|
232
|
-
]
|
233
|
-
|
234
|
-
# df_city1: 城市 销售额 堆叠柱形图
|
235
|
-
df_city.drop_duplicates(subset='城市', keep='last', inplace=True, ignore_index=True)
|
236
|
-
df_city['gmv销售'] = df_city.apply(lambda x: x['销售额'] + x['退款额'], axis=1)
|
237
|
-
df_city.sort_values(['销售额'], ascending=[False], ignore_index=True, inplace=True)
|
238
|
-
df_city = df_city[df_city['城市'] != '其他']
|
239
|
-
percentages = df_city['gmv销售'] / df_city['gmv销售'].sum() * 100
|
240
|
-
df_city1 = df_city.head(15)
|
241
|
-
city_sales_sum = df_city1['销售额'].tolist()
|
242
|
-
city_refunds = df_city1['退款额'].tolist()
|
243
|
-
bar_list += [('城市销售/退款', df_city1['城市'].tolist(), city_sales_sum, percentages, city_refunds)]
|
244
|
-
|
245
|
-
t_p1 = []
|
246
|
-
for i in range(3):
|
247
|
-
t_p1.extend([{"type": "pie"}])
|
248
|
-
t_p2 = []
|
249
|
-
for i in range(3):
|
250
|
-
t_p2.extend([{"type": "bar"}])
|
251
|
-
specs = [t_p1, t_p2]
|
252
|
-
fig = make_subplots(rows=2, cols=3, specs=specs)
|
253
|
-
|
254
|
-
row = 0
|
255
|
-
col = 0
|
256
|
-
for i in range(6):
|
257
|
-
if row // 3 == 0:
|
258
|
-
try:
|
259
|
-
title, labels, values = pie_list[col % 3]
|
260
|
-
except:
|
261
|
-
row += 1
|
262
|
-
col += 1
|
263
|
-
continue
|
264
|
-
# 添加饼图
|
265
|
-
fig.add_trace(
|
266
|
-
go.Pie(
|
267
|
-
labels=labels,
|
268
|
-
values=values,
|
269
|
-
name=title,
|
270
|
-
textinfo='label+percent'
|
271
|
-
),
|
272
|
-
row=row//3 + 1,
|
273
|
-
col=col % 3 + 1,
|
274
|
-
)
|
275
|
-
else:
|
276
|
-
try:
|
277
|
-
title, labels, values, percentages, refunds = bar_list[col % 3]
|
278
|
-
except:
|
279
|
-
row += 1
|
280
|
-
col += 1
|
281
|
-
continue
|
282
|
-
bar = go.Bar(
|
283
|
-
x=labels,
|
284
|
-
y=values,
|
285
|
-
name='销售额',
|
286
|
-
orientation='v', # 垂直柱形图
|
287
|
-
# text=percentages.map('{:.1f}%'.format), # 设置要显示的文本(百分比)
|
288
|
-
# textposition = 'outside', # 设置文本位置在柱形图外部
|
289
|
-
width=0.55, # 调整柱子最大宽度
|
290
|
-
# marker_color='blue',
|
291
|
-
)
|
292
|
-
fig.add_trace(
|
293
|
-
bar,
|
294
|
-
row=row // 3 + 1,
|
295
|
-
col=col % 3 + 1,
|
296
|
-
)
|
297
|
-
bar = go.Bar(
|
298
|
-
x=labels,
|
299
|
-
y=refunds,
|
300
|
-
name='退款额',
|
301
|
-
orientation='v', # 垂直柱形图
|
302
|
-
text=percentages.map('{:.1f}%'.format), # 设置要显示的文本(百分比)
|
303
|
-
textposition='outside', # 设置文本位置在柱形图外部
|
304
|
-
width=0.55, # 调整柱子最大宽度
|
305
|
-
# marker_color = 'red',
|
306
|
-
)
|
307
|
-
fig.add_trace(
|
308
|
-
bar,
|
309
|
-
row=row // 3 + 1,
|
310
|
-
col=col % 3 + 1,
|
311
|
-
)
|
312
|
-
|
313
|
-
x = 0.14 + 0.355 * (row % 3)
|
314
|
-
y = 0.99 - 0.58 * (row // 3)
|
315
|
-
fig.add_annotation(
|
316
|
-
text=title,
|
317
|
-
x=x,
|
318
|
-
y=y,
|
319
|
-
xref='paper', # # 相对于整个图表区域
|
320
|
-
yref='paper',
|
321
|
-
showarrow=True, # 显示箭头
|
322
|
-
align="left", # 文本对齐方式
|
323
|
-
font=dict(size=14)
|
324
|
-
)
|
325
|
-
row += 1
|
326
|
-
col += 1
|
327
|
-
|
328
|
-
fig.update_layout(
|
329
|
-
title_text=f'销售地域分布',
|
330
|
-
margin=dict(
|
331
|
-
l=100, # 左边距
|
332
|
-
r=100,
|
333
|
-
t=80, # 上边距
|
334
|
-
b=80,
|
335
|
-
),
|
336
|
-
legend=dict(
|
337
|
-
orientation='v', # 图例方向('h' 表示水平,'v' 表示垂直)
|
338
|
-
font=dict(
|
339
|
-
size=12 # 图例字体大小
|
340
|
-
)
|
341
|
-
),
|
342
|
-
barmode='stack', # stack(堆叠)、group(并列)、overlay(覆盖)、relative(相对)
|
343
|
-
)
|
344
|
-
fig.add_annotation(
|
345
|
-
text=f'统计时间周期: {start_date}~{end_date} tips: 饼图剔除了销售<{f"{percent * 100}%"}的数据',
|
346
|
-
x=0.5,
|
347
|
-
y=-0.09,
|
348
|
-
xref='paper', # # 相对于整个图表区域
|
349
|
-
yref='paper',
|
350
|
-
showarrow=False, # 显示箭头
|
351
|
-
align="left", # 文本对齐方式
|
352
|
-
font=dict(size=12),
|
353
|
-
)
|
354
|
-
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
355
|
-
|
356
|
-
|
357
|
-
def dpll(self, db_name='聚合数据', table_name='店铺流量来源构成', pro_list=None, filename='店铺流量来源'):
|
358
|
-
if not pro_list:
|
359
|
-
pro_list = ['日期', '店铺名称', '类别', '来源构成', '二级来源', '三级来源', '访客数']
|
360
|
-
df = self.getdata(db_name=db_name, table_name=table_name, pro_list=pro_list, start_date='2024-11-01', end_date=self.end_date)
|
361
|
-
if len(df) == 0:
|
362
|
-
print(f'数据不能为空: {table_name}')
|
363
|
-
return
|
364
|
-
df['日期'] = pd.to_datetime(df['日期'])
|
365
|
-
df = df[
|
366
|
-
(df['店铺名称'] == '万里马官方旗舰店') &
|
367
|
-
(df['类别'] == '非全站推广期') &
|
368
|
-
(df['来源构成'] == '商品流量')
|
369
|
-
]
|
370
|
-
today = datetime.date.today()
|
371
|
-
|
372
|
-
def st_date(num=1):
|
373
|
-
return pd.to_datetime(today - datetime.timedelta(days=num))
|
374
|
-
max_date = df['日期'].max().strftime('%Y-%m-%d')
|
375
|
-
|
376
|
-
data_list = []
|
377
|
-
for days in [1, 7, 30]:
|
378
|
-
df_linshi = df[df['日期'] >= st_date(num=days)]
|
379
|
-
# 统计三级来源
|
380
|
-
df_linshi3 = df_linshi[df_linshi['二级来源'] != '汇总']
|
381
|
-
th_list = df_linshi3.groupby(['日期', '店铺名称', '类别', '来源构成', '二级来源']).size()
|
382
|
-
th_list = th_list.reset_index()
|
383
|
-
th_list = th_list[th_list[0] > 1]
|
384
|
-
th_list = th_list['二级来源'].tolist()
|
385
|
-
df_linshi3['三级来源'] = df_linshi3.apply(lambda x: x['三级来源'] if x['三级来源'] != '汇总' else '' if x['三级来源'] == '汇总' and x['二级来源'] in th_list else x['二级来源'], axis=1)
|
386
|
-
df_linshi3 = df_linshi3[df_linshi3['三级来源'] != '']
|
387
|
-
df_linshi3 = df_linshi3.groupby(['三级来源'], as_index=False).agg(**{'访客数': ('访客数', np.sum)})
|
388
|
-
|
389
|
-
df_linshi2 = df_linshi[(df_linshi['二级来源'] != '汇总') & (df_linshi['三级来源'] == '汇总')]
|
390
|
-
df_linshi2 = df_linshi2.groupby(['二级来源'], as_index=False).agg(**{'访客数': ('访客数', np.sum)})
|
391
|
-
data_list.append({'来源类型': '三级来源', '统计周期': days, '数据主体': df_linshi3})
|
392
|
-
data_list.append({'来源类型': '二级来源', '统计周期': days, '数据主体': df_linshi2})
|
393
|
-
# print(data_list)
|
394
|
-
t_p1 = []
|
395
|
-
for i in range(3):
|
396
|
-
t_p1.extend([{"type": "pie"}]) # 折线图类型
|
397
|
-
t_p2 = []
|
398
|
-
for i in range(3):
|
399
|
-
t_p2.extend([{"type": "pie"}]) # 饼图类型
|
400
|
-
specs = [t_p1, t_p2]
|
401
|
-
fig = make_subplots(rows=2, cols=3, specs=specs)
|
402
|
-
|
403
|
-
count1 = 0
|
404
|
-
count2 = 0
|
405
|
-
for item in data_list:
|
406
|
-
labels = item['数据主体'][item['来源类型']].tolist()
|
407
|
-
values = item['数据主体']['访客数'].tolist()
|
408
|
-
# 计算每个扇区的百分比,并找出哪些扇区应该被保留
|
409
|
-
total = sum(values)
|
410
|
-
# 计算每个扇区的百分比,并找出哪些扇区应该被保留
|
411
|
-
threshold_percentage = 1 # 阈值百分比
|
412
|
-
filtered_indices = [i for i, value in enumerate(values) if
|
413
|
-
(value / total) * 100 >= threshold_percentage]
|
414
|
-
# 提取被保留的扇区的标签和值
|
415
|
-
filtered_labels = [labels[i] for i in filtered_indices]
|
416
|
-
filtered_values = [values[i] for i in filtered_indices]
|
417
|
-
if item['来源类型'] == '二级来源':
|
418
|
-
# 添加饼图
|
419
|
-
fig.add_trace(
|
420
|
-
go.Pie(
|
421
|
-
labels=filtered_labels,
|
422
|
-
values=filtered_values,
|
423
|
-
name=item['来源类型'],
|
424
|
-
textinfo='label+percent'
|
425
|
-
),
|
426
|
-
row=1,
|
427
|
-
col=count1+1,
|
428
|
-
)
|
429
|
-
x = 0.14 + 0.355 * (count1)
|
430
|
-
y = 0.98
|
431
|
-
fig.add_annotation(
|
432
|
-
text=f'{item['来源类型']} 最近{item['统计周期']}天',
|
433
|
-
x=x,
|
434
|
-
y=y,
|
435
|
-
xref='paper', # # 相对于整个图表区域
|
436
|
-
yref='paper',
|
437
|
-
showarrow=True, # 显示箭头
|
438
|
-
align="left", # 文本对齐方式
|
439
|
-
font=dict(size=14),
|
440
|
-
)
|
441
|
-
count1 += 1
|
442
|
-
else:
|
443
|
-
# 添加饼图
|
444
|
-
fig.add_trace(
|
445
|
-
go.Pie(
|
446
|
-
labels=filtered_labels,
|
447
|
-
values=filtered_values,
|
448
|
-
name=item['来源类型'],
|
449
|
-
textinfo='label+percent'
|
450
|
-
),
|
451
|
-
row=2,
|
452
|
-
col=count2+1,
|
453
|
-
)
|
454
|
-
x = 0.12 + 0.39 * (count2 % 3)
|
455
|
-
y = -0.12
|
456
|
-
fig.add_annotation(
|
457
|
-
text=f'{item['来源类型']} 最近{item['统计周期']}天',
|
458
|
-
x=x,
|
459
|
-
y=y,
|
460
|
-
xref='paper', # # 相对于整个图表区域
|
461
|
-
yref='paper',
|
462
|
-
showarrow=False, # 显示箭头
|
463
|
-
align="left", # 文本对齐方式
|
464
|
-
font=dict(size=14),
|
465
|
-
)
|
466
|
-
count2 += 1
|
467
|
-
fig.update_layout(
|
468
|
-
title_text=f'店铺流量来源',
|
469
|
-
# xaxis_title='X Axis',
|
470
|
-
# yaxis_title='Y Axis',
|
471
|
-
# width=self.screen_width // 1.4,
|
472
|
-
# height=self.screen_width // 2,
|
473
|
-
margin=dict(
|
474
|
-
l=100, # 左边距
|
475
|
-
r=100,
|
476
|
-
t=100, # 上边距
|
477
|
-
b=100,
|
478
|
-
),
|
479
|
-
legend=dict(
|
480
|
-
# title='Legend Title', # 图例标题
|
481
|
-
orientation='v', # 图例方向('h' 表示水平,'v' 表示垂直)
|
482
|
-
# x=0.5, # 图例在图表中的 x 位置(0 到 1 的比例)
|
483
|
-
# y=1.02, # 图例在图表中的 y 位置(稍微超出顶部以避免遮挡数据)
|
484
|
-
font=dict(
|
485
|
-
size=12 # 图例字体大小
|
486
|
-
)
|
487
|
-
)
|
488
|
-
)
|
489
|
-
fig.add_annotation(
|
490
|
-
text=f'最近数据日期: {max_date}',
|
491
|
-
x=0.5,
|
492
|
-
y=-0.25,
|
493
|
-
xref='paper', # # 相对于整个图表区域
|
494
|
-
yref='paper',
|
495
|
-
showarrow=False, # 显示箭头
|
496
|
-
align="left", # 文本对齐方式
|
497
|
-
font=dict(size=12),
|
498
|
-
)
|
499
|
-
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
500
|
-
|
501
|
-
def tg(self, db_name='聚合数据', table_name='多店推广场景_按日聚合', pro_list=None, filename='多店推广场景', days=None, start_date=None, end_date=None):
|
502
|
-
"""
|
503
|
-
:param db_name:
|
504
|
-
:param table_name:
|
505
|
-
:param pro_list:
|
506
|
-
:param filename:
|
507
|
-
:param days:
|
508
|
-
:param start_date: 如果指定,则 days 失效,如果都不指定,则设置 days = 7
|
509
|
-
:param end_date:
|
510
|
-
:return:
|
511
|
-
"""
|
512
|
-
if not pro_list:
|
513
|
-
pro_list = ['日期', '店铺名称', '营销场景', '花费', '成交金额']
|
514
|
-
df = self.getdata(db_name=db_name, table_name=table_name, pro_list=pro_list)
|
515
|
-
if len(df) == 0:
|
516
|
-
print(f'数据不能为空: {table_name}')
|
517
|
-
return
|
518
|
-
df['日期'] = pd.to_datetime(df['日期'])
|
519
|
-
today = datetime.date.today()
|
520
|
-
|
521
|
-
def st_date(num=1):
|
522
|
-
return pd.to_datetime(today - datetime.timedelta(days=num))
|
523
|
-
|
524
|
-
if start_date and end_date:
|
525
|
-
df = df[(df['日期'] >= pd.to_datetime(start_date)) & (df['日期'] <= pd.to_datetime(end_date))]
|
526
|
-
elif days:
|
527
|
-
df = df[df['日期'] >= st_date(num=days)]
|
528
|
-
else:
|
529
|
-
df = df[df['日期'] >= st_date(num=7)]
|
530
|
-
|
531
|
-
df = df.groupby(['日期', '店铺名称', '营销场景'], as_index=False).agg(**{'花费': ('花费', np.sum), '成交金额': ('成交金额', np.sum)})
|
532
|
-
max_date = df['日期'].max().strftime('%Y-%m-%d')
|
533
|
-
min_date = df['日期'].min().strftime('%Y-%m-%d')
|
534
|
-
df_other = df.groupby(['店铺名称'], as_index=False).agg(**{'花费': ('花费', np.sum)})
|
535
|
-
df_other = df_other.sort_values('花费', ascending=False)
|
536
|
-
data_list = []
|
537
|
-
for shopname in df_other['店铺名称'].tolist():
|
538
|
-
data_list.append(df[df['店铺名称'] == shopname])
|
539
|
-
# df1 = df[df['店铺名称'] == '万里马官方旗舰店']
|
540
|
-
# df2 = df[df['店铺名称'] == '万里马官方企业店']
|
541
|
-
# df3 = df[df['店铺名称'] == '京东箱包旗舰店']
|
542
|
-
# data_list = [df1, df2, df3]
|
543
|
-
|
544
|
-
def make_sub(data_list):
|
545
|
-
steps = len(data_list)
|
546
|
-
specs = []
|
547
|
-
t_p1 = []
|
548
|
-
for i in range(steps):
|
549
|
-
t_p1.extend([{"type": "xy"}]) # 折线图类型
|
550
|
-
t_p2 = []
|
551
|
-
for i in range(steps):
|
552
|
-
t_p2.extend([{"type": "pie"}]) # 饼图类型
|
553
|
-
specs = [t_p1, t_p2]
|
554
|
-
|
555
|
-
# 创建一个包含两个子图的图表,子图排列为1行2列
|
556
|
-
fig = make_subplots(
|
557
|
-
rows=2,
|
558
|
-
cols=steps,
|
559
|
-
specs=specs, # 注意 specs 是用列表传入
|
560
|
-
# subplot_titles=("First Line Chart", "Second Line Chart")
|
561
|
-
)
|
562
|
-
count = 1
|
563
|
-
for df in data_list:
|
564
|
-
shop = df['店铺名称'].tolist()[0]
|
565
|
-
# 在第 1 行添加折线图
|
566
|
-
scences = df['营销场景'].unique()
|
567
|
-
for scence in scences:
|
568
|
-
df_inside = df[df['营销场景'] == scence]
|
569
|
-
# if len(df_inside) < 7:
|
570
|
-
# continue
|
571
|
-
fig.add_trace(go.Scatter(x=df_inside['日期'].tolist(), y=df_inside['花费'].tolist(), mode='lines', name=f'{scence}_{shop}'), row=1, col=count)
|
572
|
-
# 在第 2 行添加饼图
|
573
|
-
df = df.groupby(['营销场景'], as_index=False).agg(**{'花费': ('花费', np.sum)})
|
574
|
-
labels = df['营销场景'].tolist()
|
575
|
-
values = df['花费'].tolist()
|
576
|
-
fig.add_trace(go.Pie(labels=labels, values=values, name=shop, textinfo='label+percent'), row=2, col=count)
|
577
|
-
fig.add_annotation(
|
578
|
-
text=shop,
|
579
|
-
x=0.01 + 0.395 * (count - 1),
|
580
|
-
y=1.04,
|
581
|
-
xref='paper', # # 相对于整个图表区域
|
582
|
-
yref='paper',
|
583
|
-
showarrow=False, # 显示箭头
|
584
|
-
align="left", # 文本对齐方式
|
585
|
-
font=dict(size=16),
|
586
|
-
)
|
587
|
-
count += 1
|
588
|
-
return fig
|
589
|
-
|
590
|
-
fig = make_sub(data_list=data_list)
|
591
|
-
fig.add_annotation(
|
592
|
-
text=f'统计范围: {min_date} ~ {max_date}',
|
593
|
-
x=0.5,
|
594
|
-
y=-0.15,
|
595
|
-
xref='paper', # # 相对于整个图表区域
|
596
|
-
yref='paper',
|
597
|
-
showarrow=False, # 显示箭头
|
598
|
-
align="left", # 文本对齐方式
|
599
|
-
font=dict(size=14),
|
600
|
-
)
|
601
|
-
fig.update_layout(
|
602
|
-
title_text=f'多店推广花费_按日聚合',
|
603
|
-
xaxis_title='日期',
|
604
|
-
yaxis_title='花费',
|
605
|
-
# width=self.screen_width // 1.4,
|
606
|
-
# height=self.screen_width // 2,
|
607
|
-
margin=dict(
|
608
|
-
l=100, # 左边距
|
609
|
-
r=100,
|
610
|
-
t=100, # 上边距
|
611
|
-
b=150,
|
612
|
-
),
|
613
|
-
# legend=dict(orientation="h")
|
614
|
-
)
|
615
|
-
count = 1
|
616
|
-
for item in data_list:
|
617
|
-
roi = round(item['成交金额'].sum() / item['花费'].sum(), 2)
|
618
|
-
fig.add_annotation(
|
619
|
-
text=f'合计: {int(item['花费'].sum())}元 / roi: {roi}',
|
620
|
-
x=0.15 + 0.425 * (count - 1),
|
621
|
-
y=1.04,
|
622
|
-
xref='paper', # # 相对于整个图表区域
|
623
|
-
yref='paper',
|
624
|
-
showarrow=False, # 显示箭头
|
625
|
-
align="left", # 文本对齐方式
|
626
|
-
font=dict(size=16),
|
627
|
-
)
|
628
|
-
count += 1
|
629
|
-
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
630
|
-
|
631
|
-
def item_crowd(self, db_name='商品人群画像2', table_list=None, pro_list=None, filename='商品人群画像', item_id=None, lab='全部渠道', option='商详浏览', d_str='近30天', last_date=None):
|
632
|
-
# item_ids = [696017020186, 714066010148, 830890472575]
|
633
|
-
if not pro_list:
|
634
|
-
pro_list = ['日期', '店铺名称', '洞察类型', '行为类型', '商品id', '统计周期', '标签名称', '标签人群数量']
|
635
|
-
if not table_list:
|
636
|
-
table_list = [
|
637
|
-
'消费能力等级',
|
638
|
-
'用户年龄',
|
639
|
-
'月均消费金额',
|
640
|
-
'大快消策略人群',
|
641
|
-
'店铺潜新老客',
|
642
|
-
'城市等级',
|
643
|
-
'用户职业',
|
644
|
-
]
|
645
|
-
if not item_id:
|
646
|
-
item_id = 696017020186
|
647
|
-
dict_list = {}
|
648
|
-
for table_name in table_list:
|
649
|
-
df = self.getdata(db_name=db_name, table_name=table_name, pro_list=pro_list)
|
650
|
-
if len(df) == 0:
|
651
|
-
print(f'{table_name}: 数据长度不能为 0')
|
652
|
-
continue
|
653
|
-
df['日期'] = pd.to_datetime(df['日期'])
|
654
|
-
|
655
|
-
df['商品id'] = df['商品id'].astype('int64')
|
656
|
-
df = df[df['商品id'] == int(item_id)]
|
657
|
-
# 对数据进行筛选
|
658
|
-
df = df[
|
659
|
-
~df['标签名称'].str.contains('unknown', case=False) &
|
660
|
-
(df['洞察类型'] == lab) &
|
661
|
-
(df['行为类型'] == option) &
|
662
|
-
(df['统计周期'] == d_str)
|
663
|
-
]
|
664
|
-
dict_list.update({table_name: df})
|
665
|
-
|
666
|
-
fig = make_subplots(rows=2, cols=3)
|
667
|
-
# 在每个子图中绘制柱形图
|
668
|
-
count = 0
|
669
|
-
sv_date = {}
|
670
|
-
for table_name, df in dict_list.items():
|
671
|
-
if len(df) == 0:
|
672
|
-
count += 1
|
673
|
-
continue
|
674
|
-
# print(count, table_name)
|
675
|
-
if count > 5:
|
676
|
-
break
|
677
|
-
last_date = df['日期'].max()
|
678
|
-
sv_date.update({table_name: last_date.strftime('%Y-%m-%d')})
|
679
|
-
df = df[df['日期'] == last_date]
|
680
|
-
# 先进行排序,以便柱形图从高到底
|
681
|
-
df.sort_values(['标签人群数量'], ascending=[False], ignore_index=True, inplace=True)
|
682
|
-
labels = df['标签名称'].tolist() # 由于上面有自定义排序,labels 和 values 要放在一起
|
683
|
-
values = df['标签人群数量'].tolist()
|
684
|
-
df['Percentage'] = df['标签人群数量'] / df['标签人群数量'].sum() * 100
|
685
|
-
percentages = df['Percentage']
|
686
|
-
bar = go.Bar(
|
687
|
-
x=labels,
|
688
|
-
y=values,
|
689
|
-
name=table_name,
|
690
|
-
orientation='v', # 垂直柱形图
|
691
|
-
text=percentages.map('{:.1f}%'.format), # 设置要显示的文本(百分比)
|
692
|
-
textposition = 'outside', # 设置文本位置在柱形图外部
|
693
|
-
width=0.55 # 调整柱子最大宽度
|
694
|
-
)
|
695
|
-
row = count // 3 + 1
|
696
|
-
col = count % 3 + 1
|
697
|
-
fig.add_trace(
|
698
|
-
bar,
|
699
|
-
row=row,
|
700
|
-
col=col,
|
701
|
-
)
|
702
|
-
if count < 3:
|
703
|
-
x = 0.01 + 0.385 * (count)
|
704
|
-
y = 1.04
|
705
|
-
else:
|
706
|
-
x = 0.01 + 0.385 * (count % 3)
|
707
|
-
y = 1.04 - 0.59 * (count // 3)
|
708
|
-
fig.add_annotation(
|
709
|
-
text=f'{table_name}',
|
710
|
-
x=x,
|
711
|
-
y=y,
|
712
|
-
xref='paper', # # 相对于整个图表区域
|
713
|
-
yref='paper',
|
714
|
-
showarrow=False, # 显示箭头
|
715
|
-
align="left", # 文本对齐方式
|
716
|
-
font=dict(size=15),
|
717
|
-
)
|
718
|
-
count += 1
|
719
|
-
|
720
|
-
fig.update_layout(
|
721
|
-
title_text=f'{db_name} 商品id: {item_id}',
|
722
|
-
xaxis_title='标签',
|
723
|
-
yaxis_title='人群数量',
|
724
|
-
# width=self.screen_width // 1.4,
|
725
|
-
# height=self.screen_width // 2,
|
726
|
-
margin=dict(
|
727
|
-
l=100, # 左边距
|
728
|
-
r=100,
|
729
|
-
t=100, # 上边距
|
730
|
-
b=100,
|
731
|
-
),
|
732
|
-
# legend=dict(orientation="h")
|
733
|
-
)
|
734
|
-
fig.add_annotation(
|
735
|
-
text=f'统计范围: {lab}/{option} {d_str}',
|
736
|
-
x=0.5,
|
737
|
-
y=-0.1,
|
738
|
-
xref='paper', # # 相对于整个图表区域
|
739
|
-
yref='paper',
|
740
|
-
showarrow=False, # 显示箭头
|
741
|
-
align="left", # 文本对齐方式
|
742
|
-
font=dict(size=14),
|
743
|
-
)
|
744
|
-
fig.add_annotation(
|
745
|
-
text=re.sub('[{}\',]', '', str(sv_date)),
|
746
|
-
x=0.5,
|
747
|
-
y=-0.135,
|
748
|
-
xref='paper', # # 相对于整个图表区域
|
749
|
-
yref='paper',
|
750
|
-
showarrow=False, # 显示箭头
|
751
|
-
align="left", # 文本对齐方式
|
752
|
-
font=dict(size=12),
|
753
|
-
)
|
754
|
-
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
755
|
-
|
756
|
-
def crowd(self, db_name='人群画像2', table_list=None, pro_list=None, filename='达摩盘人群画像', crowd_id=None, last_date=None):
|
757
|
-
# item_ids = [696017020186, 714066010148, 830890472575]
|
758
|
-
if not pro_list:
|
759
|
-
pro_list = ['日期', '店铺名称', '人群id', '人群名称', '标签名称', '标签人群数量']
|
760
|
-
if not table_list:
|
761
|
-
table_list = [
|
762
|
-
'消费能力等级',
|
763
|
-
'用户年龄',
|
764
|
-
'月均消费金额',
|
765
|
-
'大快消策略人群',
|
766
|
-
'店铺潜新老客',
|
767
|
-
'城市等级',
|
768
|
-
'用户职业',
|
769
|
-
]
|
770
|
-
if not crowd_id:
|
771
|
-
crowd_id = 40457369
|
772
|
-
|
773
|
-
dict_list = {}
|
774
|
-
for table_name in table_list:
|
775
|
-
df = self.getdata(db_name=db_name, table_name=table_name, pro_list=pro_list)
|
776
|
-
if len(df) == 0:
|
777
|
-
print(f'{table_name}: 数据长度不能为 0')
|
778
|
-
continue
|
779
|
-
df['日期'] = pd.to_datetime(df['日期'])
|
780
|
-
|
781
|
-
df['人群id'] = df['人群id'].astype('int64')
|
782
|
-
df = df[df['人群id'] == int(crowd_id)]
|
783
|
-
# 对数据进行筛选
|
784
|
-
df = df[
|
785
|
-
(df['店铺名称'] == '万里马官方旗舰店')
|
786
|
-
# ~df['标签名称'].str.contains('unknown', case=False)
|
787
|
-
]
|
788
|
-
dict_list.update({table_name: df})
|
789
|
-
crowd_name = df.head(1)['人群名称'].tolist()[0] # 随便取一条数据读取人群名称
|
790
|
-
fig = make_subplots(rows=2, cols=3)
|
791
|
-
# 在每个子图中绘制柱形图
|
792
|
-
count = 0
|
793
|
-
sv_date = {}
|
794
|
-
unknown_dict = {}
|
795
|
-
for table_name, df in dict_list.items():
|
796
|
-
if len(df) == 0:
|
797
|
-
count += 1
|
798
|
-
continue
|
799
|
-
# print(count, table_name)
|
800
|
-
if count > 5:
|
801
|
-
break
|
802
|
-
last_date = df['日期'].max()
|
803
|
-
df = df[df['日期'] == last_date]
|
804
|
-
unknown = df[df['标签名称'].str.contains('unknown', case=False)]
|
805
|
-
if len(unknown) > 0:
|
806
|
-
unknown = unknown['标签人群数量'].tolist()[0] # 未知人群数量值
|
807
|
-
|
808
|
-
df = df[~df['标签名称'].str.contains('unknown', case=False)]
|
809
|
-
# 先进行排序,以便柱形图从高到底
|
810
|
-
df.sort_values(['标签人群数量'], ascending=[False], ignore_index=True, inplace=True)
|
811
|
-
labels = df['标签名称'].tolist() # 由于上面有自定义排序,labels 和 values 要放在一起
|
812
|
-
values = df['标签人群数量'].tolist()
|
813
|
-
crowd_sum = df['标签人群数量'].values.sum()
|
814
|
-
sv_date.update({table_name: crowd_sum})
|
815
|
-
unknown_dict.update({table_name: unknown})
|
816
|
-
df['Percentage'] = df['标签人群数量'] / df['标签人群数量'].sum() * 100
|
817
|
-
percentages = df['Percentage']
|
818
|
-
bar = go.Bar(
|
819
|
-
x=labels,
|
820
|
-
y=values,
|
821
|
-
name=table_name,
|
822
|
-
orientation='v', # 垂直柱形图
|
823
|
-
text=percentages.map('{:.1f}%'.format), # 设置要显示的文本(百分比)
|
824
|
-
textposition = 'outside', # 设置文本位置在柱形图外部
|
825
|
-
width=0.55 # 调整柱子最大宽度
|
826
|
-
)
|
827
|
-
row = count // 3 + 1
|
828
|
-
col = count % 3 + 1
|
829
|
-
fig.add_trace(
|
830
|
-
bar,
|
831
|
-
row=row,
|
832
|
-
col=col,
|
833
|
-
)
|
834
|
-
if count < 3:
|
835
|
-
x = 0.01 + 0.42 * (count)
|
836
|
-
y = 1.04
|
837
|
-
else:
|
838
|
-
x = 0.01 + 0.42 * (count % 3)
|
839
|
-
y = 1.04 - 0.59 * (count // 3)
|
840
|
-
fig.add_annotation(
|
841
|
-
text=f'{table_name} 人群数量: {crowd_sum}',
|
842
|
-
x=x,
|
843
|
-
y=y,
|
844
|
-
xref='paper', # # 相对于整个图表区域
|
845
|
-
yref='paper',
|
846
|
-
showarrow=False, # 显示箭头
|
847
|
-
align="left", # 文本对齐方式
|
848
|
-
font=dict(size=15),
|
849
|
-
)
|
850
|
-
count += 1
|
851
|
-
|
852
|
-
fig.update_layout(
|
853
|
-
title_text=f'达摩盘人群画像 人群id: {crowd_id} / 人群名字: 【{crowd_name}】',
|
854
|
-
xaxis_title='标签',
|
855
|
-
yaxis_title='人群数量',
|
856
|
-
# width=self.screen_width // 1.4,
|
857
|
-
# height=self.screen_width // 2,
|
858
|
-
margin=dict(
|
859
|
-
l=100, # 左边距
|
860
|
-
r=100,
|
861
|
-
t=100, # 上边距
|
862
|
-
b=100,
|
863
|
-
),
|
864
|
-
# legend=dict(orientation="h")
|
865
|
-
)
|
866
|
-
res = {}
|
867
|
-
for k, v in sv_date.items():
|
868
|
-
res.update({k: int(v)})
|
869
|
-
unknown_res = {}
|
870
|
-
for k, v in unknown_dict.items():
|
871
|
-
unknown_res.update({k: int(v)})
|
872
|
-
|
873
|
-
fig.add_annotation(
|
874
|
-
text=f'分析人群数量: {re.sub('[{}\',]', '', str(res))}',
|
875
|
-
x=0.5,
|
876
|
-
y=-0.1,
|
877
|
-
xref='paper', # # 相对于整个图表区域
|
878
|
-
yref='paper',
|
879
|
-
showarrow=False, # 显示箭头
|
880
|
-
align="left", # 文本对齐方式
|
881
|
-
font=dict(size=12),
|
882
|
-
)
|
883
|
-
fig.add_annotation(
|
884
|
-
text=f'与官方统计存在差异,官方计算中包含未知人群,数量为: {re.sub('[{}\',]', '', str(unknown_res))},未知人群占比越大,同官方差异越大',
|
885
|
-
x=0.5,
|
886
|
-
y=-0.135,
|
887
|
-
xref='paper', # # 相对于整个图表区域
|
888
|
-
yref='paper',
|
889
|
-
showarrow=False, # 显示箭头
|
890
|
-
align="left", # 文本对齐方式
|
891
|
-
font=dict(size=12),
|
892
|
-
)
|
893
|
-
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
894
|
-
|
895
|
-
def item_show(self, db_name='聚合数据', table_list=None, pro_list=None, filename='商品数据', start_date=None, end_date=None):
|
896
|
-
if not pro_list:
|
897
|
-
pro_list = ['日期', '店铺名称', '营销场景', '商品id', '花费', '点击量', '加购量', '成交笔数', '成交金额']
|
898
|
-
table_name = '天猫_主体报表'
|
899
|
-
df = self.getdata(
|
900
|
-
db_name=db_name,
|
901
|
-
table_name=table_name,
|
902
|
-
pro_list=pro_list,
|
903
|
-
start_date=start_date,
|
904
|
-
end_date=end_date
|
905
|
-
)
|
906
|
-
df_set = self.getdata(
|
907
|
-
db_name='属性设置3',
|
908
|
-
table_name='商品sku属性',
|
909
|
-
pro_list=['商品id', '白底图'],
|
910
|
-
start_date='2020-01-01',
|
911
|
-
end_date=end_date
|
912
|
-
)
|
913
|
-
df_set = df_set[df_set['白底图'] != '0']
|
914
|
-
df_set.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
|
915
|
-
|
916
|
-
if len(df) == 0:
|
917
|
-
print(f'数据不能为空: {table_name}')
|
918
|
-
return
|
919
|
-
df['日期'] = pd.to_datetime(df['日期'])
|
920
|
-
min_date = df['日期'].min().strftime('%Y-%m-%d')
|
921
|
-
max_date = df['日期'].max().strftime('%Y-%m-%d')
|
922
|
-
|
923
|
-
df = df.groupby(['店铺名称', '商品id'], as_index=False).agg(
|
924
|
-
**{
|
925
|
-
'花费': ('花费', np.sum),
|
926
|
-
'点击量': ('点击量', np.sum),
|
927
|
-
'加购量': ('加购量', np.sum),
|
928
|
-
'成交笔数': ('成交笔数', np.sum),
|
929
|
-
'成交金额': ('成交金额', np.sum),
|
930
|
-
})
|
931
|
-
cost_sum = df['花费'].sum()
|
932
|
-
df['花费占比'] = df.apply(lambda x: f'{round(x['花费']/cost_sum * 100, 1)}%', axis=1)
|
933
|
-
df['roi投产'] = df.apply(lambda x: f'{round(x['成交金额'] / x['花费'], 2)}' if x['花费'] > 0 else 0, axis=1)
|
934
|
-
df = pd.merge(df, df_set, left_on='商品id', right_on='商品id', how='left')
|
935
|
-
df.sort_values(['花费'], ascending=[False], ignore_index=True, inplace=True)
|
936
|
-
df = df.head(100)
|
937
|
-
df.reset_index(inplace=True)
|
938
|
-
df['index'] = df['index'] + 1
|
939
|
-
df.rename(columns={'index': '序号'}, inplace=True)
|
940
|
-
|
941
|
-
# 创建临时目录来存储图片
|
942
|
-
temp_dir = os.path.join(self.path, 'temp_images')
|
943
|
-
os.makedirs(temp_dir, exist_ok=True)
|
944
|
-
|
945
|
-
df_new = df.copy()
|
946
|
-
df_new = df_new.head(10)
|
947
|
-
pic_title1 = '商品花费占比'
|
948
|
-
img_file1 = os.path.join(temp_dir, f'{pic_title1}.png')
|
949
|
-
if not os.path.isfile(img_file1):
|
950
|
-
fig, ax = plt.subplots()
|
951
|
-
ax.pie(df_new['花费'], labels=df_new['商品id'], autopct='%1.1f%%', startangle=140)
|
952
|
-
ax.set_title(pic_title1) # 设置饼图的标题
|
953
|
-
ax.axis('equal') # 确保饼图是圆形的
|
954
|
-
plt.savefig(img_file1) # 保存饼图为PNG文件
|
955
|
-
plt.close()
|
956
|
-
|
957
|
-
# # 下载图片并保存到临时目录
|
958
|
-
# for i, url in enumerate(df['白底图']):
|
959
|
-
# item_id = df['商品id'].tolist()[i]
|
960
|
-
# img_path = os.path.join(temp_dir, f'image_{item_id}.jpg')
|
961
|
-
# if os.path.isfile(img_path):
|
962
|
-
# df.at[i, '白底图'] = img_path
|
963
|
-
# continue
|
964
|
-
# response = requests.get(url, headers={'User-Agent': ua_sj.get_ua()})
|
965
|
-
# if response.status_code == 200:
|
966
|
-
# with open(img_path, 'wb') as f:
|
967
|
-
# f.write(response.content)
|
968
|
-
# # 更新 DataFrame 中的图片地址列为本地路径
|
969
|
-
# df.at[i, '白底图'] = img_path
|
970
|
-
# else:
|
971
|
-
# print(f"Failed to download image at URL: {url}")
|
972
|
-
|
973
|
-
# 转换图片列
|
974
|
-
def convert_image_to_html(image_url_or_base64):
|
975
|
-
if os.path.isfile(image_url_or_base64):
|
976
|
-
# image_url_or_base64 是本地图片, 将图片路径转换为 Base64 编码的 <img> 标签
|
977
|
-
with open(image_url_or_base64, "rb") as image_file:
|
978
|
-
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
|
979
|
-
img_tag = (f'<img class="img" src="data:image/jpeg;base64,{encoded_string}" alt="Image">')
|
980
|
-
return img_tag
|
981
|
-
else:
|
982
|
-
# image_url_or_base64 是在线 url 或者 Base64编码的图片
|
983
|
-
return f'<img class="img" src="{image_url_or_base64}" alt="Image">'
|
984
|
-
|
985
|
-
# 应用这个函数到图片列
|
986
|
-
df['Image_HTML'] = df['白底图'].apply(convert_image_to_html)
|
987
|
-
|
988
|
-
# 创建 HTML
|
989
|
-
html_template = """
|
990
|
-
<!DOCTYPE html>
|
991
|
-
<html lang="zh-CN">
|
992
|
-
<head>
|
993
|
-
<meta charset="UTF-8">
|
994
|
-
<meta http-equiv="Content-Type" content="text/html>
|
995
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
996
|
-
<title>商品推广数据</title>
|
997
|
-
<style>
|
998
|
-
body {
|
999
|
-
font-family: Arial, Helvetica, sans-serif;
|
1000
|
-
line-height: 1.6;
|
1001
|
-
margin: 0;
|
1002
|
-
padding: 0;
|
1003
|
-
background-color: #f4f4f4;
|
1004
|
-
}
|
1005
|
-
.centered-table {
|
1006
|
-
margin-top: 20px;
|
1007
|
-
margin-down: 100px;
|
1008
|
-
margin-left: auto;
|
1009
|
-
margin-right: auto;
|
1010
|
-
border-collapse: collapse; /* 可选,用于合并表格边框 */
|
1011
|
-
width: 60%; /* 设置表格宽度为父容器宽度的50%,或者你可以使用固定宽度 */
|
1012
|
-
}
|
1013
|
-
thead th {
|
1014
|
-
background-color: #f2f2f2; /* 设置表头背景颜色 */
|
1015
|
-
font-size: 16px; /* 增大表头字体 */
|
1016
|
-
font-weight: bold; /* 加粗表头字体 */
|
1017
|
-
text-align: center; /* 设置表头文本居中 */
|
1018
|
-
}
|
1019
|
-
caption {
|
1020
|
-
caption-side: top; /* 标题显示在表格上方 */
|
1021
|
-
font-size: 24px; /* 设置标题字体大小 */
|
1022
|
-
font-weight: bold; /* 设置标题字体加粗 */
|
1023
|
-
text-align: center; /* 设置标题文本居中 */
|
1024
|
-
margin-bottom: 20px; /* 为标题和表格之间添加间距 */
|
1025
|
-
}
|
1026
|
-
td, th {
|
1027
|
-
border: 1px solid #ddd; /* 单元格边框 */
|
1028
|
-
line-height: 1em; /* 设置行高为2倍的当前字体大小 */
|
1029
|
-
padding: 5 5px; /* 设置左右边距,内边距增加单元格的整体高度 */
|
1030
|
-
text-align: center; /* 设置文本对齐方式 */
|
1031
|
-
}
|
1032
|
-
img {
|
1033
|
-
width: 80px; /* 设置图片宽度 */
|
1034
|
-
height: auto; /* 高度自动调整以保持宽高比 */
|
1035
|
-
/* 如果需要垂直居中且图片是块级元素,则可以使用以下样式(但通常不是必需的,因为图片默认是内联元素)
|
1036
|
-
text-align: center; /* 水平居中(适用于内联或块级子元素) */
|
1037
|
-
display: block;
|
1038
|
-
margin: 0 auto; */
|
1039
|
-
}
|
1040
|
-
button {
|
1041
|
-
border: none;
|
1042
|
-
padding: 8px 12px;
|
1043
|
-
font-size: 14px;
|
1044
|
-
cursor: pointer;
|
1045
|
-
}
|
1046
|
-
.centered-text {
|
1047
|
-
position: fixed; /* 固定定位 */
|
1048
|
-
bottom: 15px; /* 距离页面顶部10px(可根据需要调整) */
|
1049
|
-
right: calc(25vw - 420px); /* 距离页面右侧1/4宽度减去文本自身的宽度和可能的边距(这里假设文本宽度和边距共10px,实际情况需根据文本样式调整) */
|
1050
|
-
/* 如果文本宽度未知或可变,可以只使用25vw并接受可能的溢出 */
|
1051
|
-
/* right: 25vw; */ /* 直接使用25vw定位,不考虑文本宽度 */
|
1052
|
-
padding: 3px 10px; /* 可选的文本内边距 */
|
1053
|
-
background-color: rgba(255, 255, 255, 0.8); /* 可选的背景色和透明度 */
|
1054
|
-
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); /* 可选的阴影效果 */
|
1055
|
-
}
|
1056
|
-
|
1057
|
-
.image-container {
|
1058
|
-
position: absolute; /* 使用绝对定位 */
|
1059
|
-
width: 15%; /* 设置图片宽度 */
|
1060
|
-
left: 10px; /* 距离页面左侧20px */
|
1061
|
-
top: 50%; /* 距离页面顶部50% */
|
1062
|
-
transform: translateY(-50%); /* 向上移动自身高度的一半,以实现垂直居中 */
|
1063
|
-
}
|
1064
|
-
.image-container img {
|
1065
|
-
width: 20%; /* 设置图片宽度 */
|
1066
|
-
height: auto; /* 高度自动调整以保持宽高比 */
|
1067
|
-
/* 如果需要垂直居中且图片是块级元素,则可以使用以下样式(但通常不是必需的,因为图片默认是内联元素)*/
|
1068
|
-
display: flex;
|
1069
|
-
flex-direction: column;
|
1070
|
-
align-items: flex-start;
|
1071
|
-
}
|
1072
|
-
.button1 {
|
1073
|
-
border: none;
|
1074
|
-
padding: 8px 12px;
|
1075
|
-
font-size: 14px;
|
1076
|
-
cursor: pointer;
|
1077
|
-
position: absolute; /* 使用绝对定位 */
|
1078
|
-
left: 5%; /* 距离页面左侧20px */
|
1079
|
-
top: 10%; /* 距离页面顶部50% */
|
1080
|
-
}
|
1081
|
-
.button2 {
|
1082
|
-
border: none;
|
1083
|
-
padding: 8px 12px;
|
1084
|
-
font-size: 14px;
|
1085
|
-
cursor: pointer;
|
1086
|
-
position: absolute; /* 使用绝对定位 */
|
1087
|
-
left: 5%; /* 距离页面左侧20px */
|
1088
|
-
top: 17%; /* 距离页面顶部50% */
|
1089
|
-
}
|
1090
|
-
.button3 {
|
1091
|
-
border: none;
|
1092
|
-
padding: 8px 12px;
|
1093
|
-
font-size: 14px;
|
1094
|
-
cursor: pointer;
|
1095
|
-
position: absolute; /* 使用绝对定位 */
|
1096
|
-
left: 5%; /* 距离页面左侧20px */
|
1097
|
-
top: 24%; /* 距离页面顶部50% */
|
1098
|
-
}
|
1099
|
-
.button4 {
|
1100
|
-
border: none;
|
1101
|
-
padding: 8px 12px;
|
1102
|
-
font-size: 14px;
|
1103
|
-
cursor: pointer;
|
1104
|
-
position: absolute; /* 使用绝对定位 */
|
1105
|
-
left: 5%; /* 距离页面左侧20px */
|
1106
|
-
top: 31%; /* 距离页面顶部50% */
|
1107
|
-
}
|
1108
|
-
.button5 {
|
1109
|
-
border: none;
|
1110
|
-
padding: 8px 12px;
|
1111
|
-
font-size: 14px;
|
1112
|
-
cursor: pointer;
|
1113
|
-
position: absolute; /* 使用绝对定位 */
|
1114
|
-
left: 5%; /* 距离页面左侧20px */
|
1115
|
-
top: 38%; /* 距离页面顶部50% */
|
1116
|
-
}
|
1117
|
-
</style>
|
1118
|
-
</head>
|
1119
|
-
|
1120
|
-
<div class="div-button">
|
1121
|
-
<!-- 创建一个按钮 -->
|
1122
|
-
<button id="button1" class="button1">多店推广场景</button>
|
1123
|
-
<button id="button2" class="button2">店铺流量来源</button>
|
1124
|
-
<button id="button3" class="button3">达摩盘人群画像</button>
|
1125
|
-
<button id="button4" class="button4">商品人群画像</button>
|
1126
|
-
<button id="button5" class="button5">销售地域分布</button>
|
1127
|
-
</div>
|
1128
|
-
<script>
|
1129
|
-
// 获取按钮元素
|
1130
|
-
var tg = document.getElementById('button1');
|
1131
|
-
var dpll = document.getElementById('button2');
|
1132
|
-
var dmp1 = document.getElementById('button3');
|
1133
|
-
var dmp2 = document.getElementById('button4');
|
1134
|
-
var syj = document.getElementById('button5');
|
1135
|
-
tg.addEventListener('click', function() {
|
1136
|
-
window.open('{local_file1}', '_blank');
|
1137
|
-
});
|
1138
|
-
dpll.addEventListener('click', function() {
|
1139
|
-
window.open('{local_file2}', '_blank');
|
1140
|
-
});
|
1141
|
-
dmp1.addEventListener('click', function() {
|
1142
|
-
window.open('{local_file3}', '_blank');
|
1143
|
-
});
|
1144
|
-
dmp2.addEventListener('click', function() {
|
1145
|
-
window.open('{local_file4}', '_blank');
|
1146
|
-
});
|
1147
|
-
syj.addEventListener('click', function() {
|
1148
|
-
window.open('{local_file5}', '_blank');
|
1149
|
-
});
|
1150
|
-
</script>
|
1151
|
-
|
1152
|
-
<p class="centered-text">统计周期</p>
|
1153
|
-
<!--
|
1154
|
-
<img class="image-container" src="{img_file1}" alt="图片">
|
1155
|
-
-->
|
1156
|
-
<table class="centered-table">
|
1157
|
-
<thead>
|
1158
|
-
<caption>天猫商品推广数据</caption>
|
1159
|
-
<div>
|
1160
|
-
<tr>
|
1161
|
-
<th>序号</th>
|
1162
|
-
<th>商品</th>
|
1163
|
-
<th>店铺名称</th>
|
1164
|
-
<th>商品id</th>
|
1165
|
-
<th>花费</th>
|
1166
|
-
<th>花费占比</th>
|
1167
|
-
<th>点击量</th>
|
1168
|
-
<th>加购量</th>
|
1169
|
-
<th>成交笔数</th>
|
1170
|
-
<th>成交金额</th>
|
1171
|
-
<th>roi投产</th>
|
1172
|
-
</tr>
|
1173
|
-
</div>
|
1174
|
-
</thead>
|
1175
|
-
<tbody>
|
1176
|
-
{rows}
|
1177
|
-
</tbody>
|
1178
|
-
</table>
|
1179
|
-
</html>
|
1180
|
-
"""
|
1181
|
-
rows = []
|
1182
|
-
for _, row in df.iterrows():
|
1183
|
-
row_html = (f'<tr>'
|
1184
|
-
f'<td>{row["序号"]}</td>'
|
1185
|
-
f'<td>{row["Image_HTML"]}</td>'
|
1186
|
-
f'<td>{row["店铺名称"]}</td>'
|
1187
|
-
f'<td>{row["商品id"]}</td>'
|
1188
|
-
f'<td>{row["花费"]}</td>'
|
1189
|
-
f'<td>{row["花费占比"]}</td>'
|
1190
|
-
f'<td>{row["点击量"]}</td>'
|
1191
|
-
f'<td>{row["加购量"]}</td>'
|
1192
|
-
f'<td>{row["成交笔数"]}</td>'
|
1193
|
-
f'<td>{row["成交金额"]}</td>'
|
1194
|
-
f'<td>{row["roi投产"]}</td>'
|
1195
|
-
f'</tr>'
|
1196
|
-
)
|
1197
|
-
rows.append(row_html)
|
1198
|
-
|
1199
|
-
final_html = html_template.replace('{rows}', ''.join(rows))
|
1200
|
-
final_html = final_html.replace('统计周期', f'统计周期: {min_date} ~ {max_date}')
|
1201
|
-
final_html = final_html.replace('{local_file1}', '多店推广场景.html')
|
1202
|
-
final_html = final_html.replace('{local_file2}', '店铺流量来源.html')
|
1203
|
-
final_html = final_html.replace('{local_file3}', '达摩盘人群画像.html')
|
1204
|
-
final_html = final_html.replace('{local_file4}', '商品人群画像.html')
|
1205
|
-
final_html = final_html.replace('{local_file5}', '销售地域分布.html')
|
1206
|
-
file = os.path.join(self.path, f'{filename}.html')
|
1207
|
-
with open(file, 'w', encoding='utf-8') as f:
|
1208
|
-
f.write(final_html)
|
1209
|
-
|
1210
|
-
|
1211
|
-
def main():
|
1212
|
-
ds = DataShow()
|
1213
|
-
|
1214
|
-
ds.item_show(
|
1215
|
-
db_name='聚合数据',
|
1216
|
-
table_list=None,
|
1217
|
-
pro_list=None,
|
1218
|
-
filename='天猫商品推广数据',
|
1219
|
-
start_date='2024-12-01',
|
1220
|
-
end_date=None,
|
1221
|
-
)
|
1222
|
-
# # 店铺流量来源
|
1223
|
-
# ds.dpll()
|
1224
|
-
# # 多店聚合推广数据
|
1225
|
-
# ds.tg(
|
1226
|
-
# days=15,
|
1227
|
-
# # start_date='2024-11-01',
|
1228
|
-
# # end_date='2024-11-30',
|
1229
|
-
# )
|
1230
|
-
#
|
1231
|
-
# # 商品人群画像
|
1232
|
-
# item_id_list = [
|
1233
|
-
# 839148235697,
|
1234
|
-
# ]
|
1235
|
-
# for item_id in item_id_list:
|
1236
|
-
# ds.item_crowd(
|
1237
|
-
# item_id=item_id,
|
1238
|
-
# lab='全部渠道',
|
1239
|
-
# option='商详浏览',
|
1240
|
-
# last_date=None,
|
1241
|
-
# d_str='近30天',
|
1242
|
-
# )
|
1243
|
-
#
|
1244
|
-
# # 达摩盘人群画像
|
1245
|
-
# crowid_list = [
|
1246
|
-
# 40457166,
|
1247
|
-
# ]
|
1248
|
-
# for crowid in crowid_list:
|
1249
|
-
# ds.crowd(
|
1250
|
-
# crowd_id=crowid,
|
1251
|
-
# last_date=None,
|
1252
|
-
# )
|
1253
|
-
#
|
1254
|
-
# ds.pov_city(
|
1255
|
-
# db_name='生意经3',
|
1256
|
-
# filename='销售地域分布',
|
1257
|
-
# start_date='2024-12-01',
|
1258
|
-
# end_date=None,
|
1259
|
-
# percent=0.015,
|
1260
|
-
# )
|
1261
|
-
|
1262
|
-
|
1263
|
-
if __name__ == '__main__':
|
1264
|
-
main()
|