mdbq 3.0.9__py3-none-any.whl → 3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/query_data.py +73 -33
- mdbq/mysql/mysql.py +7 -2
- {mdbq-3.0.9.dist-info → mdbq-3.1.1.dist-info}/METADATA +1 -1
- {mdbq-3.0.9.dist-info → mdbq-3.1.1.dist-info}/RECORD +6 -6
- {mdbq-3.0.9.dist-info → mdbq-3.1.1.dist-info}/WHEEL +0 -0
- {mdbq-3.0.9.dist-info → mdbq-3.1.1.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
CHANGED
@@ -16,6 +16,7 @@ import getpass
|
|
16
16
|
import json
|
17
17
|
import os
|
18
18
|
import time
|
19
|
+
import calendar
|
19
20
|
|
20
21
|
"""
|
21
22
|
|
@@ -134,7 +135,6 @@ class MysqlDatasQuery:
|
|
134
135
|
'直接成交金额': ('直接成交金额', np.max)
|
135
136
|
}
|
136
137
|
)
|
137
|
-
df.to_csv('/Users/xigua/Downloads/zhuti.csv', index=False, header=True, encoding='utf-8_sig')
|
138
138
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
139
139
|
set_typ = {
|
140
140
|
'日期': 'date',
|
@@ -442,9 +442,9 @@ class MysqlDatasQuery:
|
|
442
442
|
dir_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\分类配置文件.xlsx'
|
443
443
|
dir_file2 = '/Volumes/时尚事业部/01.运营部/0-电商周报-每周五更新/分类配置文件.xlsx'
|
444
444
|
if platform.system() == 'Windows':
|
445
|
-
dir_file3 = 'C:\\同步空间\\BaiduSyncdisk\\原始文件
|
445
|
+
dir_file3 = 'C:\\同步空间\\BaiduSyncdisk\\原始文件3\\分类配置文件.xlsx'
|
446
446
|
else:
|
447
|
-
dir_file3 = '/Users/xigua/数据中心/原始文件
|
447
|
+
dir_file3 = '/Users/xigua/数据中心/原始文件3/分类配置文件.xlsx'
|
448
448
|
if not os.path.isfile(dir_file):
|
449
449
|
dir_file = dir_file2
|
450
450
|
if not os.path.isfile(dir_file):
|
@@ -990,12 +990,15 @@ class MysqlDatasQuery:
|
|
990
990
|
)
|
991
991
|
return True
|
992
992
|
|
993
|
-
@try_except
|
993
|
+
# @try_except
|
994
994
|
def dplyd(self, db_name='聚合数据', table_name='店铺流量来源构成'):
|
995
995
|
""" 新旧版取的字段是一样的 """
|
996
996
|
start_date, end_date = self.months_data(num=self.months)
|
997
997
|
projection = {
|
998
998
|
'日期': 1,
|
999
|
+
'店铺名称': 1,
|
1000
|
+
'类别': 1,
|
1001
|
+
'来源构成': 1,
|
999
1002
|
'一级来源': 1,
|
1000
1003
|
'二级来源': 1,
|
1001
1004
|
'三级来源': 1,
|
@@ -1004,7 +1007,9 @@ class MysqlDatasQuery:
|
|
1004
1007
|
'支付买家数': 1,
|
1005
1008
|
'支付转化率': 1,
|
1006
1009
|
'加购人数': 1,
|
1007
|
-
'
|
1010
|
+
'加购件数': 1,
|
1011
|
+
'下单买家数': 1,
|
1012
|
+
'关注店铺人数': 1,
|
1008
1013
|
}
|
1009
1014
|
df = self.download.data_to_df(
|
1010
1015
|
db_name='生意参谋3',
|
@@ -1013,33 +1018,50 @@ class MysqlDatasQuery:
|
|
1013
1018
|
end_date=end_date,
|
1014
1019
|
projection=projection,
|
1015
1020
|
)
|
1021
|
+
df.drop_duplicates(subset=['日期', '店铺名称', '类别', '来源构成', '一级来源', '二级来源', '三级来源', '访客数'], keep='last', inplace=True, ignore_index=True)
|
1016
1022
|
# 包含三级来源名称和预设索引值列
|
1017
1023
|
# 截取 从上月1日 至 今天的花费数据, 推广款式按此数据从高到低排序(商品图+排序)
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1024
|
+
last_month, ii = get_day_of_month(1)
|
1025
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
|
1026
|
+
df_visitor3 = df[df['日期'] >= pd.to_datetime(last_month)]
|
1027
|
+
df_visitor3 = df_visitor3[(df_visitor3['三级来源'] != '汇总') & (df_visitor3['三级来源'] != '0')]
|
1021
1028
|
df_visitor3 = df_visitor3.groupby(['三级来源'], as_index=False).agg({'访客数': 'sum'})
|
1022
1029
|
df_visitor3.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
1023
1030
|
df_visitor3.reset_index(inplace=True)
|
1024
1031
|
df_visitor3['index'] = df_visitor3['index'] + 100
|
1025
|
-
df_visitor3.rename(columns={'index': '
|
1026
|
-
df_visitor3 = df_visitor3[['三级来源', '
|
1032
|
+
df_visitor3.rename(columns={'index': '三级来源索引'}, inplace=True)
|
1033
|
+
df_visitor3 = df_visitor3[['三级来源', '三级来源索引']]
|
1027
1034
|
|
1028
1035
|
# 包含二级来源名称和预设索引值列
|
1029
|
-
df_visitor2 = df
|
1030
|
-
df_visitor2 = df_visitor2[
|
1031
|
-
# df_visitor2 = df_visitor2[(df_visitor2['日期'] >= f'{year_my}-{last_month.month}-01')]
|
1036
|
+
df_visitor2 = df[df['日期'] >= pd.to_datetime(last_month)]
|
1037
|
+
df_visitor2 = df_visitor2[(df_visitor2['二级来源'] != '汇总') & (df_visitor2['二级来源'] != '0')]
|
1032
1038
|
df_visitor2 = df_visitor2.groupby(['二级来源'], as_index=False).agg({'访客数': 'sum'})
|
1033
1039
|
df_visitor2.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
1034
1040
|
df_visitor2.reset_index(inplace=True)
|
1035
1041
|
df_visitor2['index'] = df_visitor2['index'] + 100
|
1036
|
-
df_visitor2.rename(columns={'index': '
|
1037
|
-
df_visitor2 = df_visitor2[['二级来源', '
|
1038
|
-
|
1042
|
+
df_visitor2.rename(columns={'index': '二级来源索引'}, inplace=True)
|
1043
|
+
df_visitor2 = df_visitor2[['二级来源', '二级来源索引']]
|
1044
|
+
|
1045
|
+
# 包含一级来源名称和预设索引值列
|
1046
|
+
df_visitor1 = df[df['日期'] >= pd.to_datetime(last_month)]
|
1047
|
+
df_visitor1 = df_visitor1[(df_visitor1['一级来源'] != '汇总') & (df_visitor1['一级来源'] != '0')]
|
1048
|
+
df_visitor1 = df_visitor1.groupby(['一级来源'], as_index=False).agg({'访客数': 'sum'})
|
1049
|
+
df_visitor1.sort_values('访客数', ascending=False, ignore_index=True, inplace=True)
|
1050
|
+
df_visitor1.reset_index(inplace=True)
|
1051
|
+
df_visitor1['index'] = df_visitor1['index'] + 100
|
1052
|
+
df_visitor1.rename(columns={'index': '一级来源索引'}, inplace=True)
|
1053
|
+
df_visitor1 = df_visitor1[['一级来源', '一级来源索引']]
|
1054
|
+
|
1055
|
+
df = pd.merge(df, df_visitor1, how='left', left_on='一级来源', right_on='一级来源')
|
1039
1056
|
df = pd.merge(df, df_visitor2, how='left', left_on='二级来源', right_on='二级来源')
|
1040
1057
|
df = pd.merge(df, df_visitor3, how='left', left_on='三级来源', right_on='三级来源')
|
1058
|
+
for col in ['一级来源索引', '二级来源索引', '三级来源索引']:
|
1059
|
+
df[col] = df[col].apply(lambda x: 1000 if str(x) == 'nan' else x)
|
1041
1060
|
set_typ = {
|
1042
1061
|
'日期': 'date',
|
1062
|
+
'店铺名称': 'varchar(100)',
|
1063
|
+
'类别': 'varchar(100)',
|
1064
|
+
'来源构成': 'varchar(100)',
|
1043
1065
|
'一级来源': 'varchar(100)',
|
1044
1066
|
'二级来源': 'varchar(100)',
|
1045
1067
|
'三级来源': 'varchar(100)',
|
@@ -1048,10 +1070,11 @@ class MysqlDatasQuery:
|
|
1048
1070
|
'支付买家数': 'int',
|
1049
1071
|
'支付转化率': 'decimal(10,4)',
|
1050
1072
|
'加购人数': 'int',
|
1051
|
-
'
|
1052
|
-
'
|
1053
|
-
'
|
1073
|
+
'一级来源索引': 'smallint',
|
1074
|
+
'二级来源索引': 'smallint',
|
1075
|
+
'三级来源索引': 'smallint',
|
1054
1076
|
}
|
1077
|
+
# df.to_csv('/Users/xigua/Downloads/ll.csv', index=False, header=True, encoding='utf-8_sig')
|
1055
1078
|
min_date = df['日期'].min()
|
1056
1079
|
max_date = df['日期'].max()
|
1057
1080
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
@@ -1068,7 +1091,6 @@ class MysqlDatasQuery:
|
|
1068
1091
|
filename=None, # 用来追踪处理进度
|
1069
1092
|
reset_id=False, # 是否重置自增列
|
1070
1093
|
set_typ=set_typ,
|
1071
|
-
|
1072
1094
|
)
|
1073
1095
|
return True
|
1074
1096
|
|
@@ -2925,6 +2947,19 @@ class MysqlDatasQuery:
|
|
2925
2947
|
return True
|
2926
2948
|
|
2927
2949
|
|
2950
|
+
def get_day_of_month(num):
|
2951
|
+
"""
|
2952
|
+
num: 获取n月以前的第一天和最后一天, num=0时, 返回当月第一天和最后一天
|
2953
|
+
"""
|
2954
|
+
_today = datetime.date.today()
|
2955
|
+
months_ago = _today - relativedelta(months=num) # n 月以前的今天
|
2956
|
+
_, _lastDay = calendar.monthrange(months_ago.year, months_ago.month) # 返回月的第一天的星期和当月总天数
|
2957
|
+
_firstDay = datetime.date(months_ago.year, months_ago.month, day=1).strftime('%Y-%m-%d')
|
2958
|
+
_lastDay = datetime.date(months_ago.year, months_ago.month, day=_lastDay).strftime('%Y-%m-%d')
|
2959
|
+
|
2960
|
+
return _firstDay, _lastDay
|
2961
|
+
|
2962
|
+
|
2928
2963
|
def date_table():
|
2929
2964
|
"""
|
2930
2965
|
生成 pbix 使用的日期表
|
@@ -3056,17 +3091,22 @@ def query_(months=1, less_dict=[]):
|
|
3056
3091
|
|
3057
3092
|
|
3058
3093
|
if __name__ == '__main__':
|
3059
|
-
main(
|
3060
|
-
#
|
3061
|
-
#
|
3062
|
-
# system = platform.system() # 本机系统
|
3063
|
-
# host_name = socket.gethostname() # 本机名
|
3064
|
-
# conf = myconfig.main()
|
3065
|
-
# db_list = conf[system][host_name]['mysql']['数据库集']
|
3066
|
-
# # 4. 清理聚合数据
|
3067
|
-
# optimize_data.op_data(
|
3068
|
-
# db_name_lists=['聚合数据'],
|
3069
|
-
# days=3650,
|
3070
|
-
# is_mongo=False,
|
3071
|
-
# is_mysql=True,
|
3094
|
+
# main(
|
3095
|
+
# days=150, # 清理聚合数据的日期长度
|
3096
|
+
# months=3 # 生成聚合数据的长度
|
3072
3097
|
# )
|
3098
|
+
|
3099
|
+
query_(months=3)
|
3100
|
+
|
3101
|
+
system = platform.system() # 本机系统
|
3102
|
+
host_name = socket.gethostname() # 本机名
|
3103
|
+
conf = myconfig.main()
|
3104
|
+
db_list = conf[system][host_name]['mysql']['数据库集']
|
3105
|
+
# 4. 清理聚合数据
|
3106
|
+
optimize_data.op_data(
|
3107
|
+
db_name_lists=['聚合数据'],
|
3108
|
+
days=3650,
|
3109
|
+
is_mongo=False,
|
3110
|
+
is_mysql=True,
|
3111
|
+
)
|
3112
|
+
|
mdbq/mysql/mysql.py
CHANGED
@@ -428,7 +428,7 @@ class MysqlUpload:
|
|
428
428
|
|
429
429
|
# @try_except
|
430
430
|
def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False, drop_duplicates=False,
|
431
|
-
filename=None, count=None, reset_id=False):
|
431
|
+
filename=None, count=None, reset_id=False, allow_not_null=False):
|
432
432
|
"""
|
433
433
|
db_name: 数据库名
|
434
434
|
table_name: 表名
|
@@ -440,6 +440,7 @@ class MysqlUpload:
|
|
440
440
|
icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_duplicates 改为 False
|
441
441
|
使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
|
442
442
|
filename: 用来追踪处理进度,传这个参数是方便定位产生错误的文件
|
443
|
+
allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
|
443
444
|
"""
|
444
445
|
self.filename = filename
|
445
446
|
if isinstance(df, pd.DataFrame):
|
@@ -495,11 +496,15 @@ class MysqlUpload:
|
|
495
496
|
col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()]
|
496
497
|
cols = df.columns.tolist()
|
497
498
|
col_not_exist = [col for col in cols if col not in col_exist]
|
499
|
+
|
498
500
|
# 检查列,不存在则新建列
|
499
501
|
if col_not_exist: # 数据表中不存在的列
|
500
502
|
for col in col_not_exist:
|
501
503
|
# 创建列,需转义
|
502
|
-
|
504
|
+
if allow_not_null:
|
505
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
506
|
+
else:
|
507
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
503
508
|
cursor.execute(sql)
|
504
509
|
print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
505
510
|
|
@@ -5,7 +5,7 @@ mdbq/aggregation/aggregation.py,sha256=vgswBnIvXPmll6M1DGoCWozmlM6jdQDSHPdD4NJgq
|
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=L8JtjWpvowbauRgvXn6ukPIbUvcpgdSgatEU6vaZPRA,138540
|
9
9
|
mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
|
10
10
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
11
11
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
@@ -27,7 +27,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
27
27
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
28
28
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
29
29
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
30
|
-
mdbq/mysql/mysql.py,sha256=
|
30
|
+
mdbq/mysql/mysql.py,sha256=wtBrjNL3CcF75OR1Uj_n0haZsM7tIZ1t2_QQPUWrtlw,60729
|
31
31
|
mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
|
32
32
|
mdbq/mysql/s_query.py,sha256=fnXncwSmA7CB0ELn1a-YxYZDrYkC2Bcgnj2J4dcQ8X8,8481
|
33
33
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
@@ -44,7 +44,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
44
44
|
mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
|
45
45
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
46
46
|
mdbq/spider/aikucun.py,sha256=01qJo_Di5Kmi2lG5_HKb0OI283b1-Pgqh-nnA0pX4TY,19038
|
47
|
-
mdbq-3.
|
48
|
-
mdbq-3.
|
49
|
-
mdbq-3.
|
50
|
-
mdbq-3.
|
47
|
+
mdbq-3.1.1.dist-info/METADATA,sha256=aSbFBYgdB_LrvXNt4bwau6i801bUxK5FudTEP51bAx8,243
|
48
|
+
mdbq-3.1.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
49
|
+
mdbq-3.1.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
50
|
+
mdbq-3.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|