mdbq 3.0.9__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/query_data.py +71 -31
- mdbq/mysql/mysql.py +1 -0
- {mdbq-3.0.9.dist-info → mdbq-3.1.0.dist-info}/METADATA +1 -1
- {mdbq-3.0.9.dist-info → mdbq-3.1.0.dist-info}/RECORD +6 -6
- {mdbq-3.0.9.dist-info → mdbq-3.1.0.dist-info}/WHEEL +0 -0
- {mdbq-3.0.9.dist-info → mdbq-3.1.0.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
CHANGED
@@ -16,6 +16,7 @@ import getpass
|
|
16
16
|
import json
|
17
17
|
import os
|
18
18
|
import time
|
19
|
+
import calendar
|
19
20
|
|
20
21
|
"""
|
21
22
|
|
@@ -134,7 +135,6 @@ class MysqlDatasQuery:
|
|
134
135
|
'直接成交金额': ('直接成交金额', np.max)
|
135
136
|
}
|
136
137
|
)
|
137
|
-
df.to_csv('/Users/xigua/Downloads/zhuti.csv', index=False, header=True, encoding='utf-8_sig')
|
138
138
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
139
139
|
set_typ = {
|
140
140
|
'日期': 'date',
|
@@ -990,12 +990,15 @@ class MysqlDatasQuery:
|
|
990
990
|
)
|
991
991
|
return True
|
992
992
|
|
993
|
-
@try_except
|
993
|
+
# @try_except
|
994
994
|
def dplyd(self, db_name='聚合数据', table_name='店铺流量来源构成'):
|
995
995
|
""" 新旧版取的字段是一样的 """
|
996
996
|
start_date, end_date = self.months_data(num=self.months)
|
997
997
|
projection = {
|
998
998
|
'日期': 1,
|
999
|
+
'店铺名称': 1,
|
1000
|
+
'类别': 1,
|
1001
|
+
'来源构成': 1,
|
999
1002
|
'一级来源': 1,
|
1000
1003
|
'二级来源': 1,
|
1001
1004
|
'三级来源': 1,
|
@@ -1004,7 +1007,9 @@ class MysqlDatasQuery:
|
|
1004
1007
|
'支付买家数': 1,
|
1005
1008
|
'支付转化率': 1,
|
1006
1009
|
'加购人数': 1,
|
1007
|
-
'
|
1010
|
+
'加购件数': 1,
|
1011
|
+
'下单买家数': 1,
|
1012
|
+
'关注店铺人数': 1,
|
1008
1013
|
}
|
1009
1014
|
df = self.download.data_to_df(
|
1010
1015
|
db_name='生意参谋3',
|
@@ -1013,33 +1018,50 @@ class MysqlDatasQuery:
|
|
1013
1018
|
end_date=end_date,
|
1014
1019
|
projection=projection,
|
1015
1020
|
)
|
1021
|
+
df.drop_duplicates(subset=['日期', '店铺名称', '类别', '来源构成', '一级来源', '二级来源', '三级来源', '访客数'], keep='last', inplace=True, ignore_index=True)
|
1016
1022
|
# 包含三级来源名称和预设索引值列
|
1017
1023
|
# 截取 从上月1日 至 今天的花费数据, 推广款式按此数据从高到低排序(商品图+排序)
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1024
|
+
last_month, ii = get_day_of_month(1)
|
1025
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
|
1026
|
+
df_visitor3 = df[df['日期'] >= pd.to_datetime(last_month)]
|
1027
|
+
df_visitor3 = df_visitor3[(df_visitor3['三级来源'] != '汇总') & (df_visitor3['三级来源'] != '0')]
|
1021
1028
|
df_visitor3 = df_visitor3.groupby(['三级来源'], as_index=False).agg({'访客数': 'sum'})
|
1022
1029
|
df_visitor3.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
1023
1030
|
df_visitor3.reset_index(inplace=True)
|
1024
1031
|
df_visitor3['index'] = df_visitor3['index'] + 100
|
1025
|
-
df_visitor3.rename(columns={'index': '
|
1026
|
-
df_visitor3 = df_visitor3[['三级来源', '
|
1032
|
+
df_visitor3.rename(columns={'index': '三级来源索引'}, inplace=True)
|
1033
|
+
df_visitor3 = df_visitor3[['三级来源', '三级来源索引']]
|
1027
1034
|
|
1028
1035
|
# 包含二级来源名称和预设索引值列
|
1029
|
-
df_visitor2 = df
|
1030
|
-
df_visitor2 = df_visitor2[
|
1031
|
-
# df_visitor2 = df_visitor2[(df_visitor2['日期'] >= f'{year_my}-{last_month.month}-01')]
|
1036
|
+
df_visitor2 = df[df['日期'] >= pd.to_datetime(last_month)]
|
1037
|
+
df_visitor2 = df_visitor2[(df_visitor2['二级来源'] != '汇总') & (df_visitor2['二级来源'] != '0')]
|
1032
1038
|
df_visitor2 = df_visitor2.groupby(['二级来源'], as_index=False).agg({'访客数': 'sum'})
|
1033
1039
|
df_visitor2.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
1034
1040
|
df_visitor2.reset_index(inplace=True)
|
1035
1041
|
df_visitor2['index'] = df_visitor2['index'] + 100
|
1036
|
-
df_visitor2.rename(columns={'index': '
|
1037
|
-
df_visitor2 = df_visitor2[['二级来源', '
|
1038
|
-
|
1042
|
+
df_visitor2.rename(columns={'index': '二级来源索引'}, inplace=True)
|
1043
|
+
df_visitor2 = df_visitor2[['二级来源', '二级来源索引']]
|
1044
|
+
|
1045
|
+
# 包含一级来源名称和预设索引值列
|
1046
|
+
df_visitor1 = df[df['日期'] >= pd.to_datetime(last_month)]
|
1047
|
+
df_visitor1 = df_visitor1[(df_visitor1['一级来源'] != '汇总') & (df_visitor1['一级来源'] != '0')]
|
1048
|
+
df_visitor1 = df_visitor1.groupby(['一级来源'], as_index=False).agg({'访客数': 'sum'})
|
1049
|
+
df_visitor1.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
1050
|
+
df_visitor1.reset_index(inplace=True)
|
1051
|
+
df_visitor1['index'] = df_visitor1['index'] + 100
|
1052
|
+
df_visitor1.rename(columns={'index': '一级来源索引'}, inplace=True)
|
1053
|
+
df_visitor1 = df_visitor1[['一级来源', '一级来源索引']]
|
1054
|
+
|
1055
|
+
df = pd.merge(df, df_visitor1, how='left', left_on='一级来源', right_on='一级来源')
|
1039
1056
|
df = pd.merge(df, df_visitor2, how='left', left_on='二级来源', right_on='二级来源')
|
1040
1057
|
df = pd.merge(df, df_visitor3, how='left', left_on='三级来源', right_on='三级来源')
|
1058
|
+
for col in ['一级来源索引', '二级来源索引', '三级来源索引']:
|
1059
|
+
df[col] = df[col].apply(lambda x: 1000 if str(x) == 'nan' else x)
|
1041
1060
|
set_typ = {
|
1042
1061
|
'日期': 'date',
|
1062
|
+
'店铺名称': 'varchar(100)',
|
1063
|
+
'类别': 'varchar(100)',
|
1064
|
+
'来源构成': 'varchar(100)',
|
1043
1065
|
'一级来源': 'varchar(100)',
|
1044
1066
|
'二级来源': 'varchar(100)',
|
1045
1067
|
'三级来源': 'varchar(100)',
|
@@ -1048,10 +1070,11 @@ class MysqlDatasQuery:
|
|
1048
1070
|
'支付买家数': 'int',
|
1049
1071
|
'支付转化率': 'decimal(10,4)',
|
1050
1072
|
'加购人数': 'int',
|
1051
|
-
'
|
1052
|
-
'
|
1053
|
-
'
|
1073
|
+
'一级来源索引': 'smallint',
|
1074
|
+
'二级来源索引': 'smallint',
|
1075
|
+
'三级来源索引': 'smallint',
|
1054
1076
|
}
|
1077
|
+
# df.to_csv('/Users/xigua/Downloads/ll.csv', index=False, header=True, encoding='utf-8_sig')
|
1055
1078
|
min_date = df['日期'].min()
|
1056
1079
|
max_date = df['日期'].max()
|
1057
1080
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
@@ -1068,7 +1091,6 @@ class MysqlDatasQuery:
|
|
1068
1091
|
filename=None, # 用来追踪处理进度
|
1069
1092
|
reset_id=False, # 是否重置自增列
|
1070
1093
|
set_typ=set_typ,
|
1071
|
-
|
1072
1094
|
)
|
1073
1095
|
return True
|
1074
1096
|
|
@@ -2925,6 +2947,19 @@ class MysqlDatasQuery:
|
|
2925
2947
|
return True
|
2926
2948
|
|
2927
2949
|
|
2950
|
+
def get_day_of_month(num):
|
2951
|
+
"""
|
2952
|
+
num: 获取n月以前的第一天和最后一天, num=0时, 返回当月第一天和最后一天
|
2953
|
+
"""
|
2954
|
+
_today = datetime.date.today()
|
2955
|
+
months_ago = _today - relativedelta(months=num) # n 月以前的今天
|
2956
|
+
_, _lastDay = calendar.monthrange(months_ago.year, months_ago.month) # 返回月的第一天的星期和当月总天数
|
2957
|
+
_firstDay = datetime.date(months_ago.year, months_ago.month, day=1).strftime('%Y-%m-%d')
|
2958
|
+
_lastDay = datetime.date(months_ago.year, months_ago.month, day=_lastDay).strftime('%Y-%m-%d')
|
2959
|
+
|
2960
|
+
return _firstDay, _lastDay
|
2961
|
+
|
2962
|
+
|
2928
2963
|
def date_table():
|
2929
2964
|
"""
|
2930
2965
|
生成 pbix 使用的日期表
|
@@ -3056,17 +3091,22 @@ def query_(months=1, less_dict=[]):
|
|
3056
3091
|
|
3057
3092
|
|
3058
3093
|
if __name__ == '__main__':
|
3059
|
-
main(
|
3060
|
-
#
|
3061
|
-
#
|
3062
|
-
# system = platform.system() # 本机系统
|
3063
|
-
# host_name = socket.gethostname() # 本机名
|
3064
|
-
# conf = myconfig.main()
|
3065
|
-
# db_list = conf[system][host_name]['mysql']['数据库集']
|
3066
|
-
# # 4. 清理聚合数据
|
3067
|
-
# optimize_data.op_data(
|
3068
|
-
# db_name_lists=['聚合数据'],
|
3069
|
-
# days=3650,
|
3070
|
-
# is_mongo=False,
|
3071
|
-
# is_mysql=True,
|
3094
|
+
# main(
|
3095
|
+
# days=150, # 清理聚合数据的日期长度
|
3096
|
+
# months=3 # 生成聚合数据的长度
|
3072
3097
|
# )
|
3098
|
+
|
3099
|
+
query_(months=3)
|
3100
|
+
|
3101
|
+
system = platform.system() # 本机系统
|
3102
|
+
host_name = socket.gethostname() # 本机名
|
3103
|
+
conf = myconfig.main()
|
3104
|
+
db_list = conf[system][host_name]['mysql']['数据库集']
|
3105
|
+
# 4. 清理聚合数据
|
3106
|
+
optimize_data.op_data(
|
3107
|
+
db_name_lists=['聚合数据'],
|
3108
|
+
days=3650,
|
3109
|
+
is_mongo=False,
|
3110
|
+
is_mysql=True,
|
3111
|
+
)
|
3112
|
+
|
mdbq/mysql/mysql.py
CHANGED
@@ -495,6 +495,7 @@ class MysqlUpload:
|
|
495
495
|
col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()]
|
496
496
|
cols = df.columns.tolist()
|
497
497
|
col_not_exist = [col for col in cols if col not in col_exist]
|
498
|
+
|
498
499
|
# 检查列,不存在则新建列
|
499
500
|
if col_not_exist: # 数据表中不存在的列
|
500
501
|
for col in col_not_exist:
|
@@ -5,7 +5,7 @@ mdbq/aggregation/aggregation.py,sha256=vgswBnIvXPmll6M1DGoCWozmlM6jdQDSHPdD4NJgq
|
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=rDnxnIL_ORszTzIJa-XinaVgedCdVSzXjM4CdbJOiV4,138540
|
9
9
|
mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
|
10
10
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
11
11
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
@@ -27,7 +27,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
27
27
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
28
28
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
29
29
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
30
|
-
mdbq/mysql/mysql.py,sha256=
|
30
|
+
mdbq/mysql/mysql.py,sha256=NHCtz65fslci9sR_0o6063pjI3GgGJ1sOOu_H-Ltt-A,60456
|
31
31
|
mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
|
32
32
|
mdbq/mysql/s_query.py,sha256=fnXncwSmA7CB0ELn1a-YxYZDrYkC2Bcgnj2J4dcQ8X8,8481
|
33
33
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
@@ -44,7 +44,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
44
44
|
mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
|
45
45
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
46
46
|
mdbq/spider/aikucun.py,sha256=01qJo_Di5Kmi2lG5_HKb0OI283b1-Pgqh-nnA0pX4TY,19038
|
47
|
-
mdbq-3.0.
|
48
|
-
mdbq-3.0.
|
49
|
-
mdbq-3.0.
|
50
|
-
mdbq-3.0.
|
47
|
+
mdbq-3.1.0.dist-info/METADATA,sha256=d0y0FnoOHOwMLCAfoP1JD1dhVbWy0s_yXsbjqaWWCcI,243
|
48
|
+
mdbq-3.1.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
49
|
+
mdbq-3.1.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
50
|
+
mdbq-3.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|