mdbq 3.4.1__py3-none-any.whl → 3.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/datashow.py +124 -85
- mdbq/aggregation/query_data.py +78 -1
- {mdbq-3.4.1.dist-info → mdbq-3.4.2.dist-info}/METADATA +1 -1
- {mdbq-3.4.1.dist-info → mdbq-3.4.2.dist-info}/RECORD +6 -6
- {mdbq-3.4.1.dist-info → mdbq-3.4.2.dist-info}/WHEEL +0 -0
- {mdbq-3.4.1.dist-info → mdbq-3.4.2.dist-info}/top_level.txt +0 -0
mdbq/aggregation/datashow.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- coding: UTF-8 –*-
|
2
|
+
import decimal
|
2
3
|
import os
|
3
4
|
import re
|
4
5
|
import socket
|
@@ -152,21 +153,6 @@ class DataShow:
|
|
152
153
|
**{'销售额': ('销售额', np.sum), '退款额': ('退款额', np.sum)})
|
153
154
|
df_city = df_city[df_city['销售额'] > 0]
|
154
155
|
|
155
|
-
# 将城市等级添加到df
|
156
|
-
pov_set = pov_set[['城市等级', '城市']]
|
157
|
-
pov_set.drop_duplicates(subset='城市', keep='last', inplace=True, ignore_index=True)
|
158
|
-
df_city = pd.merge(df_city, pov_set, left_on=['城市'], right_on=['城市'], how='left')
|
159
|
-
df_level = df_city.groupby(['店铺名称', '城市等级'], as_index=False).agg(
|
160
|
-
**{'销售额': ('销售额', np.sum), '退款额': ('退款额', np.sum)})
|
161
|
-
data_list = [('销售 按城市等级', df_level['城市等级'].tolist(), df_level['销售额'].tolist())]
|
162
|
-
if percentage:
|
163
|
-
print(df_city['销售额'].sum())
|
164
|
-
return
|
165
|
-
df_city1 = df_city[df_city['销售额'] > int(percentage)]
|
166
|
-
data_list += ('销售额top城市', df_city1['城市'].tolist(), df_city1['销售额'].tolist())
|
167
|
-
df_city2 = df_city[df_city['退款额'] > int(percentage)]
|
168
|
-
data_list += ('退款额top城市', df_city2['城市'].tolist(), df_city2['退款额'].tolist())
|
169
|
-
|
170
156
|
# 省份
|
171
157
|
pro_list = ['日期', '店铺名称', '省份', '销售额', '退款额']
|
172
158
|
year = datetime.datetime.today().year
|
@@ -181,46 +167,85 @@ class DataShow:
|
|
181
167
|
# print(df_pov[df_pov['省份'] == '广东'])
|
182
168
|
df_pov = df_pov.groupby(['店铺名称', '省份'], as_index=False).agg(
|
183
169
|
**{'销售额': ('销售额', np.sum), '退款额': ('退款额', np.sum)})
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
170
|
+
df_pov.drop_duplicates(subset='省份', keep='last', inplace=True, ignore_index=True)
|
171
|
+
df_pov.sort_values(['销售额'], ascending=[False], ignore_index=True, inplace=True)
|
172
|
+
df_pov = df_pov[df_pov['省份'] != '其他']
|
173
|
+
percentages = df_pov['销售额'] / df_pov['销售额'].sum() * 100
|
174
|
+
df_pov1 = df_pov.head(10)
|
175
|
+
data_list = [('销售 top省份', df_pov1['省份'].tolist(), df_pov1['销售额'].tolist(), percentages)]
|
189
176
|
|
190
|
-
|
191
|
-
|
192
|
-
|
177
|
+
# 将城市等级添加到df
|
178
|
+
pov_set = pov_set[['城市等级', '城市']]
|
179
|
+
pov_set.drop_duplicates(subset='城市', keep='last', inplace=True, ignore_index=True)
|
180
|
+
df_city = pd.merge(df_city, pov_set, left_on=['城市'], right_on=['城市'], how='left')
|
181
|
+
df_level = df_city.groupby(['店铺名称', '城市等级'], as_index=False).agg(
|
182
|
+
**{'销售额': ('销售额', np.sum), '退款额': ('退款额', np.sum)})
|
183
|
+
city_level_list = [('按城市等级', df_level['城市等级'].tolist(), df_level['销售额'].tolist())]
|
184
|
+
df_city.drop_duplicates(subset='城市', keep='last', inplace=True, ignore_index=True)
|
185
|
+
df_city.sort_values(['销售额'], ascending=[False], ignore_index=True, inplace=True)
|
186
|
+
df_city = df_city[df_city['城市'] != '其他']
|
187
|
+
percentages = df_city['销售额'] / df_city['销售额'].sum() * 100
|
188
|
+
df_city1 = df_city.head(10)
|
189
|
+
data_list += [('销售 top城市', df_city1['城市'].tolist(), df_city1['销售额'].tolist(), percentages)]
|
190
|
+
|
191
|
+
# 退款 top 城市
|
192
|
+
df_city.sort_values(['退款额'], ascending=[False], ignore_index=True, inplace=True)
|
193
|
+
percentages = df_city['退款额'] / df_city['退款额'].sum() * 100
|
194
|
+
df_city2 = df_city.head(10)
|
195
|
+
data_list += [('退款 top城市', df_city2['城市'].tolist(), df_city2['退款额'].tolist(), percentages)]
|
196
|
+
|
197
|
+
t_p1 = [{"type": "pie"}]
|
198
|
+
for i in range(2):
|
199
|
+
t_p1.extend([{"type": "bar"}]) # 折线图类型
|
193
200
|
t_p2 = []
|
194
201
|
for i in range(3):
|
195
|
-
t_p2.extend([{"type": "
|
202
|
+
t_p2.extend([{"type": "bar"}]) # 饼图类型
|
196
203
|
specs = [t_p1, t_p2]
|
197
204
|
fig = make_subplots(rows=2, cols=3, specs=specs)
|
198
|
-
|
199
|
-
|
205
|
+
title, labels, values = city_level_list[0]
|
206
|
+
# 添加饼图
|
207
|
+
fig.add_trace(
|
208
|
+
go.Pie(
|
209
|
+
labels=labels,
|
210
|
+
values=values,
|
211
|
+
name=title,
|
212
|
+
textinfo='label+percent'
|
213
|
+
),
|
214
|
+
row=1,
|
215
|
+
col=1,
|
216
|
+
)
|
217
|
+
x = 0.14
|
218
|
+
y = 1
|
219
|
+
fig.add_annotation(
|
220
|
+
text=title,
|
221
|
+
x=x,
|
222
|
+
y=y,
|
223
|
+
xref='paper', # # 相对于整个图表区域
|
224
|
+
yref='paper',
|
225
|
+
showarrow=True, # 显示箭头
|
226
|
+
align="left", # 文本对齐方式
|
227
|
+
font=dict(size=14),
|
228
|
+
)
|
229
|
+
row = 1
|
230
|
+
col = 1
|
200
231
|
for item in data_list:
|
201
|
-
title, labels, values = item
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
# 添加饼图
|
232
|
+
title, labels, values, percentages = item
|
233
|
+
bar = go.Bar(
|
234
|
+
x=labels,
|
235
|
+
y=values,
|
236
|
+
name=title,
|
237
|
+
orientation='v', # 垂直柱形图
|
238
|
+
text=percentages.map('{:.2f}%'.format), # 设置要显示的文本(百分比)
|
239
|
+
textposition = 'outside', # 设置文本位置在柱形图外部
|
240
|
+
width=0.55 # 调整柱子最大宽度
|
241
|
+
)
|
212
242
|
fig.add_trace(
|
213
|
-
|
214
|
-
labels=filtered_labels,
|
215
|
-
values=filtered_values,
|
216
|
-
name=title,
|
217
|
-
textinfo='label+percent'
|
218
|
-
),
|
243
|
+
bar,
|
219
244
|
row=row // 3 + 1,
|
220
245
|
col=col % 3 + 1,
|
221
246
|
)
|
222
247
|
x = 0.14 + 0.355 * (row % 3)
|
223
|
-
y = 1
|
248
|
+
y = 1 - 0.575 * (row // 3)
|
224
249
|
fig.add_annotation(
|
225
250
|
text=title,
|
226
251
|
x=x,
|
@@ -233,12 +258,9 @@ class DataShow:
|
|
233
258
|
)
|
234
259
|
row += 1
|
235
260
|
col += 1
|
261
|
+
|
236
262
|
fig.update_layout(
|
237
263
|
title_text=f'销售地域分布',
|
238
|
-
# xaxis_title='X Axis',
|
239
|
-
# yaxis_title='Y Axis',
|
240
|
-
# width=self.screen_width // 1.4,
|
241
|
-
# height=self.screen_width // 2,
|
242
264
|
margin=dict(
|
243
265
|
l=100, # 左边距
|
244
266
|
r=100,
|
@@ -246,15 +268,22 @@ class DataShow:
|
|
246
268
|
b=100,
|
247
269
|
),
|
248
270
|
legend=dict(
|
249
|
-
# title='Legend Title', # 图例标题
|
250
271
|
orientation='v', # 图例方向('h' 表示水平,'v' 表示垂直)
|
251
|
-
# x=0.5, # 图例在图表中的 x 位置(0 到 1 的比例)
|
252
|
-
# y=1.02, # 图例在图表中的 y 位置(稍微超出顶部以避免遮挡数据)
|
253
272
|
font=dict(
|
254
273
|
size=12 # 图例字体大小
|
255
274
|
)
|
256
275
|
)
|
257
276
|
)
|
277
|
+
fig.add_annotation(
|
278
|
+
text=f'统计时间周期: {start_date}~{end_date}',
|
279
|
+
x=0.5,
|
280
|
+
y=-0.1,
|
281
|
+
xref='paper', # # 相对于整个图表区域
|
282
|
+
yref='paper',
|
283
|
+
showarrow=False, # 显示箭头
|
284
|
+
align="left", # 文本对齐方式
|
285
|
+
font=dict(size=12),
|
286
|
+
)
|
258
287
|
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
259
288
|
|
260
289
|
|
@@ -369,7 +398,7 @@ class DataShow:
|
|
369
398
|
)
|
370
399
|
count2 += 1
|
371
400
|
fig.update_layout(
|
372
|
-
title_text=f'店铺流量来源
|
401
|
+
title_text=f'店铺流量来源',
|
373
402
|
# xaxis_title='X Axis',
|
374
403
|
# yaxis_title='Y Axis',
|
375
404
|
# width=self.screen_width // 1.4,
|
@@ -390,6 +419,16 @@ class DataShow:
|
|
390
419
|
)
|
391
420
|
)
|
392
421
|
)
|
422
|
+
fig.add_annotation(
|
423
|
+
text=f'最近数据日期: {max_date}',
|
424
|
+
x=0.5,
|
425
|
+
y=-0.25,
|
426
|
+
xref='paper', # # 相对于整个图表区域
|
427
|
+
yref='paper',
|
428
|
+
showarrow=False, # 显示箭头
|
429
|
+
align="left", # 文本对齐方式
|
430
|
+
font=dict(size=12),
|
431
|
+
)
|
393
432
|
fig.write_html(os.path.join(self.path, f'{filename}.html'))
|
394
433
|
|
395
434
|
def tg(self, db_name='聚合数据', table_name='多店推广场景_按日聚合', pro_list=None, filename='多店推广场景', days=None, start_date=None, end_date=None):
|
@@ -790,44 +829,44 @@ class DataShow:
|
|
790
829
|
def main():
|
791
830
|
ds = DataShow()
|
792
831
|
|
793
|
-
#
|
794
|
-
|
795
|
-
#
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
#
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
#
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
832
|
+
# 店铺流量来源
|
833
|
+
ds.dpll()
|
834
|
+
# 多店聚合推广数据
|
835
|
+
ds.tg(
|
836
|
+
days=15,
|
837
|
+
# start_date='2024-11-01',
|
838
|
+
# end_date='2024-11-30',
|
839
|
+
)
|
840
|
+
|
841
|
+
# 商品人群画像
|
842
|
+
item_id_list = [
|
843
|
+
839148235697,
|
844
|
+
]
|
845
|
+
for item_id in item_id_list:
|
846
|
+
ds.item_crowd(
|
847
|
+
item_id=item_id,
|
848
|
+
lab='全部渠道',
|
849
|
+
option='商详浏览',
|
850
|
+
last_date=None,
|
851
|
+
d_str='近30天',
|
852
|
+
)
|
853
|
+
|
854
|
+
# 达摩盘人群画像
|
855
|
+
crowid_list = [
|
856
|
+
40457166,
|
857
|
+
]
|
858
|
+
for crowid in crowid_list:
|
859
|
+
ds.crowd(
|
860
|
+
crowd_id=crowid,
|
861
|
+
last_date=None,
|
862
|
+
)
|
824
863
|
|
825
864
|
ds.pov_city(
|
826
865
|
db_name='生意经3',
|
827
866
|
filename='销售地域分布',
|
828
|
-
start_date='2024-
|
867
|
+
start_date='2024-12-01',
|
829
868
|
end_date='2024-12-11',
|
830
|
-
percentage=
|
869
|
+
percentage=0.02,
|
831
870
|
)
|
832
871
|
|
833
872
|
if __name__ == '__main__':
|
mdbq/aggregation/query_data.py
CHANGED
@@ -1193,6 +1193,83 @@ class MysqlDatasQuery:
|
|
1193
1193
|
)
|
1194
1194
|
return True
|
1195
1195
|
|
1196
|
+
def item_up(self, db_name='聚合数据', table_name='淘宝店铺货品'):
|
1197
|
+
start_date, end_date = self.months_data(num=self.months)
|
1198
|
+
projection = {}
|
1199
|
+
df_set = self.download.data_to_df(
|
1200
|
+
db_name='属性设置3',
|
1201
|
+
table_name=f'货品年份基准',
|
1202
|
+
start_date=start_date,
|
1203
|
+
end_date=end_date,
|
1204
|
+
projection={'商品id':1, '上市年份':1},
|
1205
|
+
)
|
1206
|
+
df = self.download.data_to_df(
|
1207
|
+
db_name='市场数据3',
|
1208
|
+
table_name=f'淘宝店铺数据',
|
1209
|
+
start_date=start_date,
|
1210
|
+
end_date=end_date,
|
1211
|
+
projection=projection,
|
1212
|
+
)
|
1213
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
|
1214
|
+
df = df[df['日期'] == pd.to_datetime('2024-12-12')]
|
1215
|
+
|
1216
|
+
df_set['商品id'] = df_set['商品id'].astype('int64')
|
1217
|
+
df['商品id'] = df['商品id'].astype('int64')
|
1218
|
+
df_set.sort_values('商品id', ascending=False, ignore_index=True, inplace=True)
|
1219
|
+
|
1220
|
+
def check_year(item_id):
|
1221
|
+
for item in df_set.to_dict(orient='records'):
|
1222
|
+
if item_id > item['商品id']:
|
1223
|
+
return item['上市年份']
|
1224
|
+
|
1225
|
+
df['上市年份'] = df['商品id'].apply(lambda x: check_year(x))
|
1226
|
+
p = df.pop('上市年份')
|
1227
|
+
df.insert(loc=5, column='上市年份', value=p)
|
1228
|
+
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1229
|
+
print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name}')
|
1230
|
+
set_typ = {
|
1231
|
+
'日期': 'date',
|
1232
|
+
'店铺id': 'bigint',
|
1233
|
+
'店铺名称': 'varchar(255)',
|
1234
|
+
'商家id': 'bigint',
|
1235
|
+
'商品id': 'bigint',
|
1236
|
+
'商品标题': 'varchar(255)',
|
1237
|
+
'商品链接': 'varchar(255)',
|
1238
|
+
'商品图片': 'varchar(255)',
|
1239
|
+
'销量': 'varchar(50)',
|
1240
|
+
'页面价': 'int',
|
1241
|
+
'data_sku': 'varchar(1000)',
|
1242
|
+
'更新时间': 'timestamp',
|
1243
|
+
'上市年份': 'varchar(50)',
|
1244
|
+
}
|
1245
|
+
m_engine.df_to_mysql(
|
1246
|
+
df=df,
|
1247
|
+
db_name=db_name,
|
1248
|
+
table_name=table_name,
|
1249
|
+
# icm_update=['日期', '一级来源', '二级来源', '三级来源', '访客数'], # 增量更新, 在聚合数据中使用,其他不要用
|
1250
|
+
move_insert=True, # 先删除,再插入
|
1251
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1252
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1253
|
+
count=None,
|
1254
|
+
filename=None, # 用来追踪处理进度
|
1255
|
+
reset_id=True, # 是否重置自增列
|
1256
|
+
set_typ=set_typ,
|
1257
|
+
)
|
1258
|
+
company_engine.df_to_mysql(
|
1259
|
+
df=df,
|
1260
|
+
db_name=db_name,
|
1261
|
+
table_name=table_name,
|
1262
|
+
# icm_update=['日期', '一级来源', '二级来源', '三级来源', '访客数'], # 增量更新, 在聚合数据中使用,其他不要用
|
1263
|
+
move_insert=True, # 先删除,再插入
|
1264
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1265
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1266
|
+
count=None,
|
1267
|
+
filename=None, # 用来追踪处理进度
|
1268
|
+
reset_id=True, # 是否重置自增列
|
1269
|
+
set_typ=set_typ,
|
1270
|
+
)
|
1271
|
+
|
1272
|
+
|
1196
1273
|
def spph(self, db_name='聚合数据', table_name='天猫_商品排行'):
|
1197
1274
|
""" """
|
1198
1275
|
start_date, end_date = self.months_data(num=self.months)
|
@@ -3840,7 +3917,7 @@ if __name__ == '__main__':
|
|
3840
3917
|
sdq = MysqlDatasQuery() # 实例化数据处理类
|
3841
3918
|
sdq.months = 1 # 设置数据周期, 1 表示近 2 个月
|
3842
3919
|
sdq.update_service = True # 调试时加,true: 将数据写入 mysql 服务器
|
3843
|
-
sdq.
|
3920
|
+
sdq.item_up()
|
3844
3921
|
|
3845
3922
|
# string = '30-34岁,35-39岁,40-49岁'
|
3846
3923
|
# d = "~".join(re.findall(r'(\d+)\D.*\D(\d+)岁', string)[0])
|
@@ -2,9 +2,9 @@ mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/aggregation.py,sha256=-yzApnlqSN2L0E1YMu5ml-W827qpKQvWPCOI7jj2kzY,80264
|
5
|
-
mdbq/aggregation/datashow.py,sha256=
|
5
|
+
mdbq/aggregation/datashow.py,sha256=k4gUYldnmi_iZJrM7wNtjeenXJl82hUoYcPu6iIL3PU,35864
|
6
6
|
mdbq/aggregation/optimize_data.py,sha256=RXIv7cACCgYyehAxMjUYi_S7rVyjIwXKWMaM3nduGtA,3068
|
7
|
-
mdbq/aggregation/query_data.py,sha256=
|
7
|
+
mdbq/aggregation/query_data.py,sha256=9NALeHTP9tblOEPyntLBRtdroLG_qN9qWi34Hg4rXFM,178891
|
8
8
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
9
9
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
10
10
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -34,7 +34,7 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
|
34
34
|
mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
|
35
35
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
36
36
|
mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
|
37
|
-
mdbq-3.4.
|
38
|
-
mdbq-3.4.
|
39
|
-
mdbq-3.4.
|
40
|
-
mdbq-3.4.
|
37
|
+
mdbq-3.4.2.dist-info/METADATA,sha256=I2lVjMi-WsvegW9ZCQcR4UV8wg4g1A9-mzgVFQ_H7x4,243
|
38
|
+
mdbq-3.4.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
39
|
+
mdbq-3.4.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
40
|
+
mdbq-3.4.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|