mdbq 3.3.14__py3-none-any.whl → 3.3.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/aggregation/datashow.py
CHANGED
@@ -118,7 +118,7 @@ class DataShow:
|
|
118
118
|
df = pd.concat(__res, ignore_index=True)
|
119
119
|
return df
|
120
120
|
|
121
|
-
def
|
121
|
+
def dpll_bak(self, db_name='聚合数据', table_name='店铺流量来源构成', pro_list=None, filename='店铺流量来源'):
|
122
122
|
if not pro_list:
|
123
123
|
pro_list = ['日期', '三级来源', '访客数']
|
124
124
|
df = self.getdata(db_name=db_name, table_name=table_name, pro_list=pro_list, start_date='2024-11-01', end_date=self.end_date)
|
@@ -131,7 +131,7 @@ class DataShow:
|
|
131
131
|
|
132
132
|
def st_date(num=1):
|
133
133
|
return pd.to_datetime(today - datetime.timedelta(days=num))
|
134
|
-
|
134
|
+
max_date = df['日期'].max().strftime('%Y-%m-%d')
|
135
135
|
df1 = df[df['日期'] >= st_date(1)]
|
136
136
|
df2 = df[df['日期'] >= st_date(7)]
|
137
137
|
df3 = df[df['日期'] >= st_date(30)]
|
@@ -184,7 +184,7 @@ class DataShow:
|
|
184
184
|
fig.add_annotation(
|
185
185
|
text=f'最近{pie_title[i]}天',
|
186
186
|
x=0.15 + 0.35 * (i - 1),
|
187
|
-
y=0.
|
187
|
+
y=0.98,
|
188
188
|
xref='paper', # # 相对于整个图表区域
|
189
189
|
yref='paper',
|
190
190
|
showarrow=True, # 显示箭头
|
@@ -193,7 +193,7 @@ class DataShow:
|
|
193
193
|
)
|
194
194
|
i += 1
|
195
195
|
fig.update_layout(
|
196
|
-
title_text='店铺流量来源',
|
196
|
+
title_text=f'店铺流量来源 最近一天: {max_date}',
|
197
197
|
xaxis_title='X Axis',
|
198
198
|
yaxis_title='Y Axis',
|
199
199
|
# width=self.screen_width // 1.4,
|
@@ -212,6 +212,140 @@ class DataShow:
|
|
212
212
|
fig = make_sub(data_list=data_list, num=3)
|
213
213
|
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
214
214
|
|
215
|
+
def dpll(self, db_name='聚合数据', table_name='店铺流量来源构成', pro_list=None, filename='店铺流量来源'):
|
216
|
+
if not pro_list:
|
217
|
+
pro_list = ['日期', '店铺名称', '类别', '来源构成', '二级来源', '三级来源', '访客数']
|
218
|
+
df = self.getdata(db_name=db_name, table_name=table_name, pro_list=pro_list, start_date='2024-11-01', end_date=self.end_date)
|
219
|
+
if len(df) == 0:
|
220
|
+
print(f'数据不能为空: {table_name}')
|
221
|
+
return
|
222
|
+
df['日期'] = pd.to_datetime(df['日期'])
|
223
|
+
df = df[
|
224
|
+
(df['店铺名称'] == '万里马官方旗舰店') &
|
225
|
+
(df['类别'] == '非全站推广期') &
|
226
|
+
(df['来源构成'] == '商品流量')
|
227
|
+
]
|
228
|
+
today = datetime.date.today()
|
229
|
+
|
230
|
+
def st_date(num=1):
|
231
|
+
return pd.to_datetime(today - datetime.timedelta(days=num))
|
232
|
+
max_date = df['日期'].max().strftime('%Y-%m-%d')
|
233
|
+
|
234
|
+
data_list = []
|
235
|
+
for days in [1, 7, 30]:
|
236
|
+
df_linshi = df[df['日期'] >= st_date(num=days)]
|
237
|
+
# 统计三级来源
|
238
|
+
df_linshi3 = df_linshi[df_linshi['二级来源'] != '汇总']
|
239
|
+
th_list = df_linshi3.groupby(['日期', '店铺名称', '类别', '来源构成', '二级来源']).size()
|
240
|
+
th_list = th_list.reset_index()
|
241
|
+
th_list = th_list[th_list[0] > 1]
|
242
|
+
th_list = th_list['二级来源'].tolist()
|
243
|
+
df_linshi3['三级来源'] = df_linshi3.apply(lambda x: x['三级来源'] if x['三级来源'] != '汇总' else '' if x['三级来源'] == '汇总' and x['二级来源'] in th_list else x['二级来源'], axis=1)
|
244
|
+
df_linshi3 = df_linshi3[df_linshi3['三级来源'] != '']
|
245
|
+
df_linshi3 = df_linshi3.groupby(['三级来源'], as_index=False).agg(**{'访客数': ('访客数', np.sum)})
|
246
|
+
|
247
|
+
df_linshi2 = df_linshi[(df_linshi['二级来源'] != '汇总') & (df_linshi['三级来源'] == '汇总')]
|
248
|
+
df_linshi2 = df_linshi2.groupby(['二级来源'], as_index=False).agg(**{'访客数': ('访客数', np.sum)})
|
249
|
+
data_list.append({'来源类型': '三级来源', '统计周期': days, '数据主体': df_linshi3})
|
250
|
+
data_list.append({'来源类型': '二级来源', '统计周期': days, '数据主体': df_linshi2})
|
251
|
+
# print(data_list)
|
252
|
+
t_p1 = []
|
253
|
+
for i in range(3):
|
254
|
+
t_p1.extend([{"type": "pie"}]) # 折线图类型
|
255
|
+
t_p2 = []
|
256
|
+
for i in range(3):
|
257
|
+
t_p2.extend([{"type": "pie"}]) # 饼图类型
|
258
|
+
specs = [t_p1, t_p2]
|
259
|
+
fig = make_subplots(rows=2, cols=3, specs=specs)
|
260
|
+
|
261
|
+
count1 = 0
|
262
|
+
count2 = 0
|
263
|
+
for item in data_list:
|
264
|
+
labels = item['数据主体'][item['来源类型']].tolist()
|
265
|
+
values = item['数据主体']['访客数'].tolist()
|
266
|
+
# 计算每个扇区的百分比,并找出哪些扇区应该被保留
|
267
|
+
total = sum(values)
|
268
|
+
# 计算每个扇区的百分比,并找出哪些扇区应该被保留
|
269
|
+
threshold_percentage = 1 # 阈值百分比
|
270
|
+
filtered_indices = [i for i, value in enumerate(values) if
|
271
|
+
(value / total) * 100 >= threshold_percentage]
|
272
|
+
# 提取被保留的扇区的标签和值
|
273
|
+
filtered_labels = [labels[i] for i in filtered_indices]
|
274
|
+
filtered_values = [values[i] for i in filtered_indices]
|
275
|
+
if item['来源类型'] == '二级来源':
|
276
|
+
# 添加饼图
|
277
|
+
fig.add_trace(
|
278
|
+
go.Pie(
|
279
|
+
labels=filtered_labels,
|
280
|
+
values=filtered_values,
|
281
|
+
name=item['来源类型'],
|
282
|
+
textinfo='label+percent'
|
283
|
+
),
|
284
|
+
row=1,
|
285
|
+
col=count1+1,
|
286
|
+
)
|
287
|
+
x = 0.14 + 0.355 * (count1)
|
288
|
+
y = 0.98
|
289
|
+
fig.add_annotation(
|
290
|
+
text=f'{item['来源类型']} 最近{item['统计周期']}天',
|
291
|
+
x=x,
|
292
|
+
y=y,
|
293
|
+
xref='paper', # # 相对于整个图表区域
|
294
|
+
yref='paper',
|
295
|
+
showarrow=True, # 显示箭头
|
296
|
+
align="left", # 文本对齐方式
|
297
|
+
font=dict(size=14),
|
298
|
+
)
|
299
|
+
count1 += 1
|
300
|
+
else:
|
301
|
+
# 添加饼图
|
302
|
+
fig.add_trace(
|
303
|
+
go.Pie(
|
304
|
+
labels=filtered_labels,
|
305
|
+
values=filtered_values,
|
306
|
+
name=item['来源类型'],
|
307
|
+
textinfo='label+percent'
|
308
|
+
),
|
309
|
+
row=2,
|
310
|
+
col=count2+1,
|
311
|
+
)
|
312
|
+
x = 0.12 + 0.39 * (count2 % 3)
|
313
|
+
y = -0.12
|
314
|
+
fig.add_annotation(
|
315
|
+
text=f'{item['来源类型']} 最近{item['统计周期']}天',
|
316
|
+
x=x,
|
317
|
+
y=y,
|
318
|
+
xref='paper', # # 相对于整个图表区域
|
319
|
+
yref='paper',
|
320
|
+
showarrow=False, # 显示箭头
|
321
|
+
align="left", # 文本对齐方式
|
322
|
+
font=dict(size=14),
|
323
|
+
)
|
324
|
+
count2 += 1
|
325
|
+
fig.update_layout(
|
326
|
+
title_text=f'店铺流量来源 最近数据: {max_date}',
|
327
|
+
# xaxis_title='X Axis',
|
328
|
+
# yaxis_title='Y Axis',
|
329
|
+
# width=self.screen_width // 1.4,
|
330
|
+
# height=self.screen_width // 2,
|
331
|
+
margin=dict(
|
332
|
+
l=100, # 左边距
|
333
|
+
r=100,
|
334
|
+
t=100, # 上边距
|
335
|
+
b=100,
|
336
|
+
),
|
337
|
+
legend=dict(
|
338
|
+
# title='Legend Title', # 图例标题
|
339
|
+
orientation='v', # 图例方向('h' 表示水平,'v' 表示垂直)
|
340
|
+
# x=0.5, # 图例在图表中的 x 位置(0 到 1 的比例)
|
341
|
+
# y=1.02, # 图例在图表中的 y 位置(稍微超出顶部以避免遮挡数据)
|
342
|
+
font=dict(
|
343
|
+
size=12 # 图例字体大小
|
344
|
+
)
|
345
|
+
)
|
346
|
+
)
|
347
|
+
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
348
|
+
|
215
349
|
def tg(self, db_name='聚合数据', table_name='多店推广场景_按日聚合', pro_list=None, filename='多店推广场景', days=None, start_date=None, end_date=None):
|
216
350
|
"""
|
217
351
|
:param db_name:
|
@@ -243,6 +377,8 @@ class DataShow:
|
|
243
377
|
df = df[df['日期'] >= st_date(num=7)]
|
244
378
|
|
245
379
|
df = df.groupby(['日期', '店铺名称', '营销场景'], as_index=False).agg(**{'花费': ('花费', np.sum), '成交金额': ('成交金额', np.sum)})
|
380
|
+
max_date = df['日期'].max().strftime('%Y-%m-%d')
|
381
|
+
min_date = df['日期'].min().strftime('%Y-%m-%d')
|
246
382
|
df_other = df.groupby(['店铺名称'], as_index=False).agg(**{'花费': ('花费', np.sum)})
|
247
383
|
df_other = df_other.sort_values('花费', ascending=False)
|
248
384
|
data_list = []
|
@@ -300,8 +436,18 @@ class DataShow:
|
|
300
436
|
return fig
|
301
437
|
|
302
438
|
fig = make_sub(data_list=data_list)
|
439
|
+
fig.add_annotation(
|
440
|
+
text=f'统计范围: {min_date} ~ {max_date}',
|
441
|
+
x=0.5,
|
442
|
+
y=-0.15,
|
443
|
+
xref='paper', # # 相对于整个图表区域
|
444
|
+
yref='paper',
|
445
|
+
showarrow=False, # 显示箭头
|
446
|
+
align="left", # 文本对齐方式
|
447
|
+
font=dict(size=16),
|
448
|
+
)
|
303
449
|
fig.update_layout(
|
304
|
-
title_text='多店推广花费_按日聚合',
|
450
|
+
title_text=f'多店推广花费_按日聚合',
|
305
451
|
xaxis_title='日期',
|
306
452
|
yaxis_title='花费',
|
307
453
|
# width=self.screen_width // 1.4,
|
@@ -330,43 +476,69 @@ class DataShow:
|
|
330
476
|
count += 1
|
331
477
|
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
332
478
|
|
333
|
-
def item_crowd(self, db_name='商品人群画像2', table_list=None, pro_list=None, filename='商品人群画像', item_id=None):
|
479
|
+
def item_crowd(self, db_name='商品人群画像2', table_list=None, pro_list=None, filename='商品人群画像', item_id=None, lab='全部渠道', option='商详浏览', d_str='近30天', last_date=None):
|
480
|
+
# item_ids = [696017020186, 714066010148, 830890472575]
|
334
481
|
if not pro_list:
|
335
482
|
pro_list = ['日期', '店铺名称', '洞察类型', '行为类型', '商品id', '统计周期', '标签名称', '标签人群数量']
|
336
483
|
if not table_list:
|
337
|
-
table_list = [
|
484
|
+
table_list = [
|
485
|
+
'消费能力等级',
|
486
|
+
'用户年龄',
|
487
|
+
'月均消费金额',
|
488
|
+
'大快消策略人群',
|
489
|
+
'店铺潜新老客',
|
490
|
+
'城市等级',
|
491
|
+
'用户职业',
|
492
|
+
]
|
493
|
+
if not item_id:
|
494
|
+
item_id = 696017020186
|
495
|
+
dict_list = {}
|
338
496
|
for table_name in table_list:
|
339
497
|
df = self.getdata(db_name=db_name, table_name=table_name, pro_list=pro_list)
|
340
|
-
|
341
|
-
|
342
|
-
|
498
|
+
if len(df) == 0:
|
499
|
+
print(f'{table_name}: 数据长度不能为 0')
|
500
|
+
continue
|
501
|
+
df['日期'] = pd.to_datetime(df['日期'])
|
502
|
+
df['商品id'] = df['商品id'].astype('int64')
|
343
503
|
df = df[df['商品id'] == int(item_id)]
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
504
|
+
last_date = df['日期'].max()
|
505
|
+
# 对数据进行筛选
|
506
|
+
df = df[
|
507
|
+
(df['日期'] == last_date) &
|
508
|
+
~df['标签名称'].str.contains('unknown', case=False) &
|
509
|
+
(df['洞察类型'] == lab) &
|
510
|
+
(df['行为类型'] == option) &
|
511
|
+
(df['统计周期'] == d_str)
|
512
|
+
]
|
513
|
+
dict_list.update({table_name: df})
|
350
514
|
|
351
515
|
fig = make_subplots(rows=2, cols=3)
|
352
516
|
# 在每个子图中绘制柱形图
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
517
|
+
count = 0
|
518
|
+
for table_name, df in dict_list.items():
|
519
|
+
if len(df) == 0:
|
520
|
+
count += 1
|
521
|
+
continue
|
522
|
+
# print(count, table_name)
|
523
|
+
if count > 5:
|
524
|
+
break
|
525
|
+
labels = df['标签名称'].tolist()
|
526
|
+
values = df['标签人群数量'].tolist()
|
527
|
+
df['Percentage'] = df['标签人群数量'] / df['标签人群数量'].sum() * 100
|
528
|
+
percentages = df['Percentage']
|
359
529
|
bar = go.Bar(
|
360
530
|
x=labels,
|
361
531
|
y=values,
|
362
|
-
name=
|
532
|
+
name=table_name,
|
363
533
|
text=percentages.map('{:.2f}%'.format), # 设置要显示的文本(百分比)
|
364
534
|
# textposition = 'outside', # 设置文本位置在柱形图外部
|
365
535
|
)
|
536
|
+
row = count // 3 + 1
|
537
|
+
col = count % 3 + 1
|
366
538
|
fig.add_trace(
|
367
539
|
bar,
|
368
|
-
row=
|
369
|
-
col=
|
540
|
+
row=row,
|
541
|
+
col=col,
|
370
542
|
)
|
371
543
|
if count < 3:
|
372
544
|
x = 0.01 + 0.395 * (count)
|
@@ -375,7 +547,7 @@ class DataShow:
|
|
375
547
|
x = 0.01 + 0.395 * (count % 3)
|
376
548
|
y = 1.04 - 0.59 * (count // 3)
|
377
549
|
fig.add_annotation(
|
378
|
-
text=
|
550
|
+
text=table_name,
|
379
551
|
x=x,
|
380
552
|
y=y,
|
381
553
|
xref='paper', # # 相对于整个图表区域
|
@@ -384,10 +556,10 @@ class DataShow:
|
|
384
556
|
align="left", # 文本对齐方式
|
385
557
|
font=dict(size=16),
|
386
558
|
)
|
387
|
-
|
388
|
-
|
559
|
+
count += 1
|
560
|
+
|
389
561
|
fig.update_layout(
|
390
|
-
title_text=db_name,
|
562
|
+
title_text=f'{db_name} 商品id: {item_id}',
|
391
563
|
xaxis_title='标签',
|
392
564
|
yaxis_title='人群数量',
|
393
565
|
# width=self.screen_width // 1.4,
|
@@ -396,23 +568,20 @@ class DataShow:
|
|
396
568
|
l=100, # 左边距
|
397
569
|
r=100,
|
398
570
|
t=100, # 上边距
|
399
|
-
b=
|
571
|
+
b=100,
|
400
572
|
),
|
401
573
|
# legend=dict(orientation="h")
|
402
574
|
)
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
font=dict(size=16),
|
414
|
-
)
|
415
|
-
count += 1
|
575
|
+
fig.add_annotation(
|
576
|
+
text=f'数据日期: {last_date.strftime('%Y-%m-%d')} 统计范围: {lab}/{option} {d_str}',
|
577
|
+
x=0.5,
|
578
|
+
y=-0.1,
|
579
|
+
xref='paper', # # 相对于整个图表区域
|
580
|
+
yref='paper',
|
581
|
+
showarrow=False, # 显示箭头
|
582
|
+
align="left", # 文本对齐方式
|
583
|
+
font=dict(size=14),
|
584
|
+
)
|
416
585
|
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
417
586
|
|
418
587
|
|
@@ -421,11 +590,15 @@ def main():
|
|
421
590
|
# ds.dpll()
|
422
591
|
# ds.tg(
|
423
592
|
# days=15,
|
424
|
-
# # start_date='2024-
|
425
|
-
# # end_date='2024-
|
593
|
+
# # start_date='2024-11-01',
|
594
|
+
# # end_date='2024-11-30',
|
426
595
|
# )
|
427
596
|
ds.item_crowd(
|
428
|
-
|
597
|
+
item_id=839148235697,
|
598
|
+
lab='全部渠道',
|
599
|
+
option='商详浏览',
|
600
|
+
last_date=None,
|
601
|
+
d_str='近30天',
|
429
602
|
)
|
430
603
|
|
431
604
|
|
@@ -2,7 +2,7 @@ mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/aggregation.py,sha256=-yzApnlqSN2L0E1YMu5ml-W827qpKQvWPCOI7jj2kzY,80264
|
5
|
-
mdbq/aggregation/datashow.py,sha256=
|
5
|
+
mdbq/aggregation/datashow.py,sha256=sNZYwhPraF3KmcYdBVKvVaVoN-jGjh7ALRRLpanzQ6w,25673
|
6
6
|
mdbq/aggregation/optimize_data.py,sha256=RXIv7cACCgYyehAxMjUYi_S7rVyjIwXKWMaM3nduGtA,3068
|
7
7
|
mdbq/aggregation/query_data.py,sha256=4Fd4dMGi6Cu-KgNTf1OBNYe8InjvpMA5JALxCwvsHyw,173841
|
8
8
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
@@ -34,7 +34,7 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
|
34
34
|
mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
|
35
35
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
36
36
|
mdbq/spider/aikucun.py,sha256=eAIITxnbbxsR_EoohJ78CRw2dEdfSHOltfpxBrh0cvc,22207
|
37
|
-
mdbq-3.3.
|
38
|
-
mdbq-3.3.
|
39
|
-
mdbq-3.3.
|
40
|
-
mdbq-3.3.
|
37
|
+
mdbq-3.3.15.dist-info/METADATA,sha256=ZoivLw_LLapTkSRtAZGP2xvD8jpaBpX53MrIGKJ_LkQ,244
|
38
|
+
mdbq-3.3.15.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
39
|
+
mdbq-3.3.15.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
40
|
+
mdbq-3.3.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|