mdbq 3.1.8__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +60 -18
- mdbq/aggregation/query_data.py +273 -173
- mdbq/mysql/mysql.py +58 -39
- mdbq/mysql/s_query.py +14 -11
- {mdbq-3.1.8.dist-info → mdbq-3.2.0.dist-info}/METADATA +1 -1
- {mdbq-3.1.8.dist-info → mdbq-3.2.0.dist-info}/RECORD +8 -8
- {mdbq-3.1.8.dist-info → mdbq-3.2.0.dist-info}/WHEEL +0 -0
- {mdbq-3.1.8.dist-info → mdbq-3.2.0.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1213,38 +1213,80 @@ def one_file_to_mysql(file, db_name, table_name):
|
|
1213
1213
|
)
|
1214
1214
|
|
1215
1215
|
|
1216
|
-
def
|
1217
|
-
|
1218
|
-
|
1216
|
+
def cut_as_year_month(as_month=False):
|
1217
|
+
"""
|
1218
|
+
将表格数据按年划分
|
1219
|
+
as_month: true 按月重新保存
|
1220
|
+
"""
|
1221
|
+
file_name = '达摩盘_人群报表'
|
1222
|
+
path = r'/Users/xigua/Downloads/数据库导出'
|
1219
1223
|
|
1220
1224
|
for root, dirs, files in os.walk(path, topdown=False):
|
1221
1225
|
for name in files:
|
1222
1226
|
if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
# print(name)
|
1227
|
-
if len(df) == 0:
|
1227
|
+
pattern = re.findall(r'\d{4}.csv|\d{4}-\d{2}.csv', name)
|
1228
|
+
if pattern:
|
1228
1229
|
continue
|
1229
|
-
|
1230
|
-
|
1231
|
-
df.
|
1230
|
+
if file_name not in name:
|
1231
|
+
continue
|
1232
|
+
# df = pd.read_excel(os.path.join(root, name), header=0)
|
1233
|
+
df_before = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1234
|
+
df_before['日期'] = pd.to_datetime(df_before['日期'], format='%Y-%m-%d', errors='ignore')
|
1235
|
+
max_date = df_before['日期'].max(skipna=True).year
|
1236
|
+
min_date = df_before['日期'].min(skipna=True).year
|
1237
|
+
for year in range(min_date, max_date+1):
|
1238
|
+
df = df_before[(df_before['日期'] >= f'{year}-01-01') & (df_before['日期'] <= f'{year}-12-31')]
|
1239
|
+
if as_month:
|
1240
|
+
for month in range(1, 13):
|
1241
|
+
if month < 10:
|
1242
|
+
month = f'0{month}'
|
1243
|
+
for n in range(31, 27, -1):
|
1244
|
+
try:
|
1245
|
+
end_day = pd.to_datetime(f'{year}-{month}-{n}')
|
1246
|
+
break
|
1247
|
+
except:
|
1248
|
+
continue
|
1249
|
+
st_day = pd.to_datetime(f'{year}-{month}-01')
|
1250
|
+
df_month = df[(df['日期'] >= st_day) & (df['日期'] <= end_day)]
|
1251
|
+
if len(df_month) == 0:
|
1252
|
+
continue
|
1253
|
+
df_month.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
1254
|
+
df_month = df_month.reset_index(drop=True)
|
1255
|
+
df_month = df_month.reset_index(drop=False)
|
1256
|
+
df_month.pop('id')
|
1257
|
+
df_month.rename(columns={'index': 'id'}, inplace=True)
|
1258
|
+
df_month['id'] = df_month['id'].apply(lambda x: x + 1)
|
1259
|
+
new_name = f'{os.path.splitext(name)[0]}_{year}_{month}.csv'
|
1260
|
+
print(new_name)
|
1261
|
+
df_month.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
1262
|
+
else:
|
1263
|
+
df.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
1264
|
+
df = df.reset_index(drop=True)
|
1265
|
+
df = df.reset_index(drop=False)
|
1266
|
+
df.pop('id')
|
1267
|
+
df.rename(columns={'index': 'id'}, inplace=True)
|
1268
|
+
df['id'] = df['id'].apply(lambda x: x + 1)
|
1269
|
+
new_name = f'{os.path.splitext(name)[0]}_{year}.csv'
|
1270
|
+
print(new_name)
|
1271
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
1232
1272
|
|
1233
1273
|
|
1234
1274
|
if __name__ == '__main__':
|
1275
|
+
cut_as_year_month(as_month=False)
|
1276
|
+
|
1235
1277
|
# username = 'root'
|
1236
1278
|
# password = ''
|
1237
1279
|
# host = ''
|
1238
1280
|
# port = ''
|
1239
1281
|
#
|
1240
|
-
# 上传 1 个文件到数据库
|
1241
|
-
one_file_to_mysql(
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
)
|
1282
|
+
# # 上传 1 个文件到数据库
|
1283
|
+
# one_file_to_mysql(
|
1284
|
+
# file=r'/Users/xigua/Downloads/批量SKU导出-批量任务.xlsx',
|
1285
|
+
# db_name='属性设置3',
|
1286
|
+
# table_name='京东商品属性',
|
1287
|
+
# )
|
1288
|
+
|
1246
1289
|
|
1247
|
-
# test()
|
1248
1290
|
# col = 1
|
1249
1291
|
# if col:
|
1250
1292
|
# # 上传一个目录到指定数据库
|
mdbq/aggregation/query_data.py
CHANGED
@@ -85,13 +85,17 @@ class MysqlDatasQuery:
|
|
85
85
|
'直接成交金额': 1,
|
86
86
|
'店铺名称': 1,
|
87
87
|
}
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
88
|
+
__res = []
|
89
|
+
for year in range(2024, datetime.datetime.today().year+1):
|
90
|
+
df = self.download.data_to_df(
|
91
|
+
db_name='推广数据2',
|
92
|
+
table_name=f'主体报表_{year}',
|
93
|
+
start_date=start_date,
|
94
|
+
end_date=end_date,
|
95
|
+
projection=projection,
|
96
|
+
)
|
97
|
+
__res.append(df)
|
98
|
+
df = pd.concat(__res, ignore_index=True)
|
95
99
|
df.rename(columns={
|
96
100
|
'场景名字': '营销场景',
|
97
101
|
'主体id': '商品id',
|
@@ -233,7 +237,6 @@ class MysqlDatasQuery:
|
|
233
237
|
filename=None, # 用来追踪处理进度
|
234
238
|
reset_id=False, # 是否重置自增列
|
235
239
|
set_typ=set_typ,
|
236
|
-
|
237
240
|
)
|
238
241
|
return True
|
239
242
|
|
@@ -254,13 +257,17 @@ class MysqlDatasQuery:
|
|
254
257
|
'退货量_发货后': 1,
|
255
258
|
'店铺名称': 1,
|
256
259
|
}
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
260
|
+
__res = []
|
261
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
262
|
+
df = self.download.data_to_df(
|
263
|
+
db_name='生意经3',
|
264
|
+
table_name=f'宝贝指标_{year}',
|
265
|
+
start_date=start_date,
|
266
|
+
end_date=end_date,
|
267
|
+
projection=projection,
|
268
|
+
)
|
269
|
+
__res.append(df)
|
270
|
+
df = pd.concat(__res, ignore_index=True)
|
264
271
|
df['宝贝id'] = df['宝贝id'].astype(str)
|
265
272
|
df = df.groupby(['日期', '店铺名称', '宝贝id', '行业类目'], as_index=False).agg(
|
266
273
|
**{
|
@@ -322,7 +329,6 @@ class MysqlDatasQuery:
|
|
322
329
|
filename=None, # 用来追踪处理进度
|
323
330
|
reset_id=False, # 是否重置自增列
|
324
331
|
set_typ=set_typ,
|
325
|
-
|
326
332
|
)
|
327
333
|
return True
|
328
334
|
|
@@ -344,13 +350,17 @@ class MysqlDatasQuery:
|
|
344
350
|
'人群名字': 1,
|
345
351
|
'店铺名称': 1,
|
346
352
|
}
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
353
|
+
__res = []
|
354
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
355
|
+
df = self.download.data_to_df(
|
356
|
+
db_name='推广数据2',
|
357
|
+
table_name=f'人群报表_{year}',
|
358
|
+
start_date=start_date,
|
359
|
+
end_date=end_date,
|
360
|
+
projection=projection,
|
361
|
+
)
|
362
|
+
__res.append(df)
|
363
|
+
df = pd.concat(__res, ignore_index=True)
|
354
364
|
df.rename(columns={
|
355
365
|
'场景名字': '营销场景',
|
356
366
|
'主体id': '商品id',
|
@@ -509,7 +519,6 @@ class MysqlDatasQuery:
|
|
509
519
|
filename=None, # 用来追踪处理进度
|
510
520
|
reset_id=False, # 是否重置自增列
|
511
521
|
set_typ=set_typ,
|
512
|
-
|
513
522
|
)
|
514
523
|
return True
|
515
524
|
|
@@ -532,13 +541,17 @@ class MysqlDatasQuery:
|
|
532
541
|
'直接成交金额': 1,
|
533
542
|
'店铺名称': 1,
|
534
543
|
}
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
544
|
+
__res = []
|
545
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
546
|
+
df = self.download.data_to_df(
|
547
|
+
db_name='推广数据2',
|
548
|
+
table_name=f'关键词报表_{year}',
|
549
|
+
start_date=start_date,
|
550
|
+
end_date=end_date,
|
551
|
+
projection=projection,
|
552
|
+
)
|
553
|
+
__res.append(df)
|
554
|
+
df = pd.concat(__res, ignore_index=True)
|
542
555
|
df.rename(columns={
|
543
556
|
'场景名字': '营销场景',
|
544
557
|
'宝贝id': '商品id',
|
@@ -642,7 +655,6 @@ class MysqlDatasQuery:
|
|
642
655
|
filename=None, # 用来追踪处理进度
|
643
656
|
reset_id=False, # 是否重置自增列
|
644
657
|
set_typ=set_typ,
|
645
|
-
|
646
658
|
)
|
647
659
|
return True
|
648
660
|
|
@@ -666,13 +678,17 @@ class MysqlDatasQuery:
|
|
666
678
|
'直接成交金额': 1,
|
667
679
|
'店铺名称': 1,
|
668
680
|
}
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
681
|
+
__res = []
|
682
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
683
|
+
df = self.download.data_to_df(
|
684
|
+
db_name='推广数据2',
|
685
|
+
table_name=f'超级直播报表_人群_{year}',
|
686
|
+
start_date=start_date,
|
687
|
+
end_date=end_date,
|
688
|
+
projection=projection,
|
689
|
+
)
|
690
|
+
__res.append(df)
|
691
|
+
df = pd.concat(__res, ignore_index=True)
|
676
692
|
df.rename(columns={
|
677
693
|
'观看次数': '观看次数',
|
678
694
|
'总购物车数': '加购量',
|
@@ -763,7 +779,6 @@ class MysqlDatasQuery:
|
|
763
779
|
filename=None, # 用来追踪处理进度
|
764
780
|
reset_id=False, # 是否重置自增列
|
765
781
|
set_typ=set_typ,
|
766
|
-
|
767
782
|
)
|
768
783
|
return True
|
769
784
|
|
@@ -785,13 +800,17 @@ class MysqlDatasQuery:
|
|
785
800
|
# '成交访客数': 1
|
786
801
|
'店铺名称': 1,
|
787
802
|
}
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
803
|
+
__res = []
|
804
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
805
|
+
df = self.download.data_to_df(
|
806
|
+
db_name='推广数据2',
|
807
|
+
table_name=f'品销宝_{year}',
|
808
|
+
start_date=start_date,
|
809
|
+
end_date=end_date,
|
810
|
+
projection=projection,
|
811
|
+
)
|
812
|
+
__res.append(df)
|
813
|
+
df = pd.concat(__res, ignore_index=True)
|
795
814
|
df = df[df['报表类型'] == '账户']
|
796
815
|
df.fillna(value=0, inplace=True)
|
797
816
|
df.rename(columns={
|
@@ -873,18 +892,20 @@ class MysqlDatasQuery:
|
|
873
892
|
filename=None, # 用来追踪处理进度
|
874
893
|
reset_id=False, # 是否重置自增列
|
875
894
|
set_typ=set_typ,
|
876
|
-
|
877
895
|
)
|
878
896
|
return True
|
879
897
|
|
880
898
|
@try_except
|
881
899
|
def idbm(self, db_name='聚合数据', table_name='商品id编码表'):
|
882
900
|
""" 用生意经日数据制作商品 id 和编码对照表 """
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
901
|
+
year = datetime.datetime.today().year
|
902
|
+
data_values = []
|
903
|
+
for year in range(2022, year+1):
|
904
|
+
data_values += self.download.columns_to_list(
|
905
|
+
db_name='生意经3',
|
906
|
+
table_name=f'宝贝指标_{year}',
|
907
|
+
columns_name=['宝贝id', '商家编码', '行业类目'],
|
908
|
+
)
|
888
909
|
df = pd.DataFrame(data=data_values)
|
889
910
|
df['宝贝id'] = df['宝贝id'].astype(str)
|
890
911
|
df.drop_duplicates(subset='宝贝id', keep='last', inplace=True, ignore_index=True)
|
@@ -898,7 +919,7 @@ class MysqlDatasQuery:
|
|
898
919
|
df['一级类目'] = df['行业类目']
|
899
920
|
df.drop('行业类目', axis=1, inplace=True)
|
900
921
|
df.sort_values('宝贝id', ascending=False, inplace=True)
|
901
|
-
df = df[(df['宝贝id'] != '973') & (df['宝贝id'] !=
|
922
|
+
df = df[(df['宝贝id'] != '973') & (df['宝贝id'] != 973) & (df['宝贝id'] != '0')]
|
902
923
|
set_typ = {
|
903
924
|
'宝贝id': 'bigint',
|
904
925
|
'商家编码': 'varchar(100)',
|
@@ -928,7 +949,6 @@ class MysqlDatasQuery:
|
|
928
949
|
filename=None, # 用来追踪处理进度
|
929
950
|
reset_id=False, # 是否重置自增列
|
930
951
|
set_typ=set_typ,
|
931
|
-
|
932
952
|
)
|
933
953
|
return True
|
934
954
|
|
@@ -1011,13 +1031,17 @@ class MysqlDatasQuery:
|
|
1011
1031
|
'下单买家数': 1,
|
1012
1032
|
'关注店铺人数': 1,
|
1013
1033
|
}
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1034
|
+
__res = []
|
1035
|
+
for year in range(2024, datetime.datetime.today().year+1):
|
1036
|
+
df = self.download.data_to_df(
|
1037
|
+
db_name='生意参谋3',
|
1038
|
+
table_name=f'店铺流量来源构成_{year}',
|
1039
|
+
start_date=start_date,
|
1040
|
+
end_date=end_date,
|
1041
|
+
projection=projection,
|
1042
|
+
)
|
1043
|
+
__res.append(df)
|
1044
|
+
df = pd.concat(__res, ignore_index=True)
|
1021
1045
|
df.drop_duplicates(subset=['日期', '店铺名称', '类别', '来源构成', '一级来源', '二级来源', '三级来源', '访客数'], keep='last', inplace=True, ignore_index=True)
|
1022
1046
|
# 包含三级来源名称和预设索引值列
|
1023
1047
|
# 截取 从上月1日 至 今天的花费数据, 推广款式按此数据从高到低排序(商品图+排序)
|
@@ -1162,13 +1186,17 @@ class MysqlDatasQuery:
|
|
1162
1186
|
'spu_id': 1,
|
1163
1187
|
'店铺名称':1,
|
1164
1188
|
}
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1189
|
+
__res = []
|
1190
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1191
|
+
df = self.download.data_to_df(
|
1192
|
+
db_name='京东数据3',
|
1193
|
+
table_name=f'推广数据_京准通_{year}',
|
1194
|
+
start_date=start_date,
|
1195
|
+
end_date=end_date,
|
1196
|
+
projection=projection,
|
1197
|
+
)
|
1198
|
+
__res.append(df)
|
1199
|
+
df = pd.concat(__res, ignore_index=True)
|
1172
1200
|
df = df.groupby(
|
1173
1201
|
['日期', '店铺名称', '产品线', '触发sku_id', '跟单sku_id', 'spu_id', '花费', '展现数', '点击数'],
|
1174
1202
|
as_index=False).agg(
|
@@ -1373,13 +1401,17 @@ class MysqlDatasQuery:
|
|
1373
1401
|
'商品关注数': 1,
|
1374
1402
|
'店铺关注数': 1,
|
1375
1403
|
}
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1404
|
+
__res = []
|
1405
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1406
|
+
df = self.download.data_to_df(
|
1407
|
+
db_name='京东数据3',
|
1408
|
+
table_name=f'推广数据_关键词报表_{year}',
|
1409
|
+
start_date=start_date,
|
1410
|
+
end_date=end_date,
|
1411
|
+
projection=projection,
|
1412
|
+
)
|
1413
|
+
__res.append(df)
|
1414
|
+
df = pd.concat(__res, ignore_index=True)
|
1383
1415
|
df_lin = df[['计划id', '推广计划']]
|
1384
1416
|
df_lin.drop_duplicates(subset=['计划id'], keep='last', inplace=True, ignore_index=True)
|
1385
1417
|
df = df.groupby(
|
@@ -1461,13 +1493,17 @@ class MysqlDatasQuery:
|
|
1461
1493
|
'加购商品件数': 1,
|
1462
1494
|
'加购人数': 1,
|
1463
1495
|
}
|
1464
|
-
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1496
|
+
__res = []
|
1497
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1498
|
+
df = self.download.data_to_df(
|
1499
|
+
db_name='京东数据3',
|
1500
|
+
table_name=f'京东商智_sku_商品明细_{year}',
|
1501
|
+
start_date=start_date,
|
1502
|
+
end_date=end_date,
|
1503
|
+
projection=projection,
|
1504
|
+
)
|
1505
|
+
__res.append(df)
|
1506
|
+
df = pd.concat(__res, ignore_index=True)
|
1471
1507
|
df = df[df['商品id'] != '合计']
|
1472
1508
|
df = df.groupby(['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'],
|
1473
1509
|
as_index=False).agg(
|
@@ -1531,13 +1567,17 @@ class MysqlDatasQuery:
|
|
1531
1567
|
'加购商品件数': 1,
|
1532
1568
|
'加购人数': 1,
|
1533
1569
|
}
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1570
|
+
__res = []
|
1571
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1572
|
+
df = self.download.data_to_df(
|
1573
|
+
db_name='京东数据3',
|
1574
|
+
table_name=f'京东商智_spu_商品明细_{year}',
|
1575
|
+
start_date=start_date,
|
1576
|
+
end_date=end_date,
|
1577
|
+
projection=projection,
|
1578
|
+
)
|
1579
|
+
__res.append(df)
|
1580
|
+
df = pd.concat(__res, ignore_index=True)
|
1541
1581
|
df = df[df['商品id'] != '合计']
|
1542
1582
|
df = df.groupby(['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'],
|
1543
1583
|
as_index=False).agg(
|
@@ -1606,13 +1646,17 @@ class MysqlDatasQuery:
|
|
1606
1646
|
'客单价': 1,
|
1607
1647
|
'uv价值': 1,
|
1608
1648
|
}
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1649
|
+
__res = []
|
1650
|
+
for year in range(2024, datetime.datetime.today().year+1):
|
1651
|
+
df = self.download.data_to_df(
|
1652
|
+
db_name='生意参谋3',
|
1653
|
+
table_name=f'手淘搜索_本店引流词_{year}',
|
1654
|
+
start_date=start_date,
|
1655
|
+
end_date=end_date,
|
1656
|
+
projection=projection,
|
1657
|
+
)
|
1658
|
+
__res.append(df)
|
1659
|
+
df = pd.concat(__res, ignore_index=True)
|
1616
1660
|
df = df.groupby(
|
1617
1661
|
['日期', '店铺名称', '词类型', '搜索词'],
|
1618
1662
|
as_index=False).agg(
|
@@ -1823,13 +1867,17 @@ class MysqlDatasQuery:
|
|
1823
1867
|
'总成交金额': 1,
|
1824
1868
|
'店铺名称': 1,
|
1825
1869
|
}
|
1826
|
-
|
1827
|
-
|
1828
|
-
|
1829
|
-
|
1830
|
-
|
1831
|
-
|
1832
|
-
|
1870
|
+
__res = []
|
1871
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1872
|
+
df_tm = self.download.data_to_df(
|
1873
|
+
db_name='推广数据2',
|
1874
|
+
table_name=f'营销场景报表_{year}',
|
1875
|
+
start_date=start_date,
|
1876
|
+
end_date=end_date,
|
1877
|
+
projection=projection,
|
1878
|
+
)
|
1879
|
+
__res.append(df_tm)
|
1880
|
+
df_tm = pd.concat(__res, ignore_index=True)
|
1833
1881
|
if len(df_tm) > 0:
|
1834
1882
|
df_tm.rename(columns={'场景名字': '营销场景'}, inplace=True)
|
1835
1883
|
df_tm = df_tm.groupby(
|
@@ -1843,14 +1891,17 @@ class MysqlDatasQuery:
|
|
1843
1891
|
'成交金额': ('总成交金额', np.max)
|
1844
1892
|
}
|
1845
1893
|
)
|
1846
|
-
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1894
|
+
__res = []
|
1895
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1896
|
+
df_tb = self.download.data_to_df(
|
1897
|
+
db_name='推广数据_淘宝店',
|
1898
|
+
table_name=f'营销场景报表_{year}',
|
1899
|
+
start_date=start_date,
|
1900
|
+
end_date=end_date,
|
1901
|
+
projection=projection,
|
1902
|
+
)
|
1903
|
+
__res.append(df_tb)
|
1904
|
+
df_tb = pd.concat(__res, ignore_index=True)
|
1854
1905
|
if len(df_tb) > 0:
|
1855
1906
|
df_tb.rename(columns={'场景名字': '营销场景'}, inplace=True)
|
1856
1907
|
df_tb = df_tb.groupby(
|
@@ -1879,13 +1930,17 @@ class MysqlDatasQuery:
|
|
1879
1930
|
'总成交金额': 1,
|
1880
1931
|
'店铺名称': 1,
|
1881
1932
|
}
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1933
|
+
__res = []
|
1934
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1935
|
+
df_tb_qzt = self.download.data_to_df(
|
1936
|
+
db_name='推广数据_淘宝店',
|
1937
|
+
table_name=f'全站推广报表_{year}',
|
1938
|
+
start_date=start_date,
|
1939
|
+
end_date=end_date,
|
1940
|
+
projection=projection,
|
1941
|
+
)
|
1942
|
+
__res.append(df_tb_qzt)
|
1943
|
+
df_tb_qzt = pd.concat(__res, ignore_index=True)
|
1889
1944
|
if len(df_tb_qzt) > 0:
|
1890
1945
|
# 这一步是排重
|
1891
1946
|
df_tb_qzt = df_tb_qzt.groupby(
|
@@ -1925,13 +1980,17 @@ class MysqlDatasQuery:
|
|
1925
1980
|
'成交金额': 1,
|
1926
1981
|
'店铺名称': 1,
|
1927
1982
|
}
|
1928
|
-
|
1929
|
-
|
1930
|
-
|
1931
|
-
|
1932
|
-
|
1933
|
-
|
1934
|
-
|
1983
|
+
__res = []
|
1984
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
1985
|
+
df_tm_pxb = self.download.data_to_df(
|
1986
|
+
db_name='推广数据2',
|
1987
|
+
table_name=f'品销宝_{year}',
|
1988
|
+
start_date=start_date,
|
1989
|
+
end_date=end_date,
|
1990
|
+
projection=projection,
|
1991
|
+
)
|
1992
|
+
__res.append(df_tm_pxb)
|
1993
|
+
df_tm_pxb = pd.concat(__res, ignore_index=True)
|
1935
1994
|
if len(df_tm_pxb) > 0:
|
1936
1995
|
df_tm_pxb = df_tm_pxb[df_tm_pxb['报表类型'] == '账户']
|
1937
1996
|
df_tm_pxb = df_tm_pxb.groupby(
|
@@ -1961,13 +2020,17 @@ class MysqlDatasQuery:
|
|
1961
2020
|
'总成交金额': 1,
|
1962
2021
|
'店铺名称': 1,
|
1963
2022
|
}
|
1964
|
-
|
1965
|
-
|
1966
|
-
|
1967
|
-
|
1968
|
-
|
1969
|
-
|
1970
|
-
|
2023
|
+
__res = []
|
2024
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
2025
|
+
df_tm_living = self.download.data_to_df(
|
2026
|
+
db_name='推广数据2',
|
2027
|
+
table_name=f'超级直播报表_人群_{year}',
|
2028
|
+
start_date=start_date,
|
2029
|
+
end_date=pd.to_datetime('2024-04-16'), # 只可以取此日期之前的数据
|
2030
|
+
projection=projection,
|
2031
|
+
)
|
2032
|
+
__res.append(df_tm_living)
|
2033
|
+
df_tm_living = pd.concat(__res, ignore_index=True)
|
1971
2034
|
if len(df_tm_living) > 0:
|
1972
2035
|
df_tm_living.rename(columns={'场景名字': '营销场景'}, inplace=True)
|
1973
2036
|
df_tm_living = df_tm_living.groupby(
|
@@ -1999,13 +2062,17 @@ class MysqlDatasQuery:
|
|
1999
2062
|
'spu_id': 1,
|
2000
2063
|
'店铺名称': 1,
|
2001
2064
|
}
|
2002
|
-
|
2003
|
-
|
2004
|
-
|
2005
|
-
|
2006
|
-
|
2007
|
-
|
2008
|
-
|
2065
|
+
__res = []
|
2066
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
2067
|
+
df_jd = self.download.data_to_df(
|
2068
|
+
db_name='京东数据3',
|
2069
|
+
table_name=f'推广数据_京准通_{year}',
|
2070
|
+
start_date=start_date,
|
2071
|
+
end_date=end_date,
|
2072
|
+
projection=projection,
|
2073
|
+
)
|
2074
|
+
__res.append(df_jd)
|
2075
|
+
df_jd = pd.concat(__res, ignore_index=True)
|
2009
2076
|
if len(df_jd) > 0:
|
2010
2077
|
df_jd = df_jd.groupby(['日期', '店铺名称', '产品线', '触发sku_id', '跟单sku_id', 'spu_id', '花费', '展现数', '点击数'],
|
2011
2078
|
as_index=False).agg(
|
@@ -2244,7 +2311,7 @@ class MysqlDatasQuery:
|
|
2244
2311
|
)
|
2245
2312
|
return True
|
2246
2313
|
|
2247
|
-
@try_except
|
2314
|
+
# @try_except
|
2248
2315
|
def dmp_crowd(self, db_name='聚合数据', table_name='达摩盘_人群报表'):
|
2249
2316
|
start_date, end_date = self.months_data(num=self.months)
|
2250
2317
|
projection = {
|
@@ -2267,14 +2334,19 @@ class MysqlDatasQuery:
|
|
2267
2334
|
df_crowd.drop_duplicates(subset=['人群id',], keep='last', inplace=True, ignore_index=True)
|
2268
2335
|
df_crowd.pop('日期')
|
2269
2336
|
df_crowd = df_crowd.astype({'人群id': 'int64'}, errors='ignore')
|
2337
|
+
|
2270
2338
|
projection = {}
|
2271
|
-
|
2272
|
-
|
2273
|
-
|
2274
|
-
|
2275
|
-
|
2276
|
-
|
2277
|
-
|
2339
|
+
__res = []
|
2340
|
+
for year in range(2024, datetime.datetime.today().year + 1):
|
2341
|
+
df_dmp = self.download.data_to_df(
|
2342
|
+
db_name='达摩盘3',
|
2343
|
+
table_name=f'dmp人群报表_{year}',
|
2344
|
+
start_date=start_date,
|
2345
|
+
end_date=end_date,
|
2346
|
+
projection=projection,
|
2347
|
+
)
|
2348
|
+
__res.append(df_dmp)
|
2349
|
+
df_dmp = pd.concat(__res, ignore_index=True)
|
2278
2350
|
df_dmp = df_dmp.astype({'人群id': 'int64'}, errors='ignore')
|
2279
2351
|
df_dmp.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
2280
2352
|
df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗_元'], keep='last', inplace=True, ignore_index=True)
|
@@ -2286,8 +2358,8 @@ class MysqlDatasQuery:
|
|
2286
2358
|
# breakpoint()
|
2287
2359
|
df.rename(columns={'消耗_元': '消耗'}, inplace=True)
|
2288
2360
|
set_typ = {
|
2289
|
-
'店铺名称': 'varchar(100)',
|
2290
2361
|
'日期': 'date',
|
2362
|
+
'店铺名称': 'varchar(100)',
|
2291
2363
|
'人群id': 'bigint',
|
2292
2364
|
'人群名称': 'varchar(255)',
|
2293
2365
|
'营销渠道': 'varchar(100)',
|
@@ -2348,7 +2420,6 @@ class MysqlDatasQuery:
|
|
2348
2420
|
filename=None, # 用来追踪处理进度
|
2349
2421
|
reset_id=False, # 是否重置自增列
|
2350
2422
|
set_typ=set_typ,
|
2351
|
-
|
2352
2423
|
)
|
2353
2424
|
return True
|
2354
2425
|
|
@@ -2759,7 +2830,6 @@ class MysqlDatasQuery:
|
|
2759
2830
|
filename=None, # 用来追踪处理进度
|
2760
2831
|
reset_id=False, # 是否重置自增列
|
2761
2832
|
set_typ=set_typ,
|
2762
|
-
|
2763
2833
|
)
|
2764
2834
|
return True
|
2765
2835
|
|
@@ -2870,7 +2940,6 @@ class MysqlDatasQuery:
|
|
2870
2940
|
filename=None, # 用来追踪处理进度
|
2871
2941
|
reset_id=False, # 是否重置自增列
|
2872
2942
|
set_typ=set_typ,
|
2873
|
-
|
2874
2943
|
)
|
2875
2944
|
return True
|
2876
2945
|
|
@@ -2926,8 +2995,8 @@ class MysqlDatasQuery:
|
|
2926
2995
|
}
|
2927
2996
|
if not self.update_service:
|
2928
2997
|
return
|
2929
|
-
min_date = df['日期'].min()
|
2930
|
-
max_date = df['日期'].max()
|
2998
|
+
min_date = df['日期'].min().strftime("%Y-%m-%d")
|
2999
|
+
max_date = df['日期'].max().strftime("%Y-%m-%d")
|
2931
3000
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
2932
3001
|
print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name} -> {min_date}~{max_date}')
|
2933
3002
|
m_engine.df_to_mysql(
|
@@ -2942,7 +3011,6 @@ class MysqlDatasQuery:
|
|
2942
3011
|
filename=None, # 用来追踪处理进度
|
2943
3012
|
reset_id=False, # 是否重置自增列
|
2944
3013
|
set_typ=set_typ,
|
2945
|
-
|
2946
3014
|
)
|
2947
3015
|
return True
|
2948
3016
|
|
@@ -2964,7 +3032,7 @@ def date_table():
|
|
2964
3032
|
"""
|
2965
3033
|
生成 pbix 使用的日期表
|
2966
3034
|
"""
|
2967
|
-
start_date = '2022-01-
|
3035
|
+
start_date = '2022-01-07' # 日期表的起始日期
|
2968
3036
|
yesterday = time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400))
|
2969
3037
|
dic = pd.date_range(start=start_date, end=yesterday)
|
2970
3038
|
df = pd.DataFrame(dic, columns=['日期'])
|
@@ -2983,6 +3051,34 @@ def date_table():
|
|
2983
3051
|
df['年月'] = df.apply(lambda x: x['年'] + x['月'], axis=1)
|
2984
3052
|
df['月日'] = df.apply(lambda x: x['月'] + x['日'] + '日', axis=1)
|
2985
3053
|
df['第n周'] = df['日期'].apply(lambda x: x.strftime('第%W周'))
|
3054
|
+
|
3055
|
+
# 重构 df,添加 1 列,从周五~下周四作为 1 周 汇总
|
3056
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
|
3057
|
+
grouped = df.groupby(pd.Grouper(key='日期', freq='7D'))
|
3058
|
+
__res = []
|
3059
|
+
num = 1
|
3060
|
+
for name, group in grouped:
|
3061
|
+
if num > 52:
|
3062
|
+
num = 1
|
3063
|
+
# print(f'Group: {name}')
|
3064
|
+
group['第n周_new'] = f'第{num}周'
|
3065
|
+
num += 1
|
3066
|
+
__res.append(group)
|
3067
|
+
# print(group)
|
3068
|
+
# break
|
3069
|
+
df = pd.concat(__res, ignore_index=True)
|
3070
|
+
# df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x))
|
3071
|
+
df['weekname'] = df['日期'].dt.day_name()
|
3072
|
+
dict_dt = {
|
3073
|
+
'Monday': '星期一',
|
3074
|
+
'Tuesday': '星期二',
|
3075
|
+
'Wednesday': '星期三',
|
3076
|
+
'Thursday': '星期四',
|
3077
|
+
'Friday': '星期五',
|
3078
|
+
'Saturday': '星期六',
|
3079
|
+
'Sunday': '星期日',
|
3080
|
+
}
|
3081
|
+
df['星期'] = df['weekname'].apply(lambda x: dict_dt[x])
|
2986
3082
|
df['索引'] = p
|
2987
3083
|
df['月索引'] = mon
|
2988
3084
|
df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
|
@@ -3001,6 +3097,9 @@ def date_table():
|
|
3001
3097
|
'年月': 'varchar(50)',
|
3002
3098
|
'月日': 'varchar(50)',
|
3003
3099
|
'第n周': 'varchar(50)',
|
3100
|
+
'第n周_new': 'varchar(50)',
|
3101
|
+
'星期': 'varchar(50)',
|
3102
|
+
'weekname': 'varchar(50)',
|
3004
3103
|
'索引': 'int',
|
3005
3104
|
'月索引': 'int',
|
3006
3105
|
}
|
@@ -3091,22 +3190,23 @@ def query_(months=1, less_dict=[]):
|
|
3091
3190
|
|
3092
3191
|
|
3093
3192
|
if __name__ == '__main__':
|
3094
|
-
|
3095
|
-
|
3096
|
-
|
3097
|
-
# )
|
3098
|
-
|
3099
|
-
query_(months=3)
|
3100
|
-
|
3101
|
-
system = platform.system() # 本机系统
|
3102
|
-
host_name = socket.gethostname() # 本机名
|
3103
|
-
conf = myconfig.main()
|
3104
|
-
db_list = conf[system][host_name]['mysql']['数据库集']
|
3105
|
-
# 4. 清理聚合数据
|
3106
|
-
optimize_data.op_data(
|
3107
|
-
db_name_lists=['聚合数据'],
|
3108
|
-
days=3650,
|
3109
|
-
is_mongo=False,
|
3110
|
-
is_mysql=True,
|
3193
|
+
main(
|
3194
|
+
days=150, # 清理聚合数据的日期长度
|
3195
|
+
months=3 # 生成聚合数据的长度
|
3111
3196
|
)
|
3112
3197
|
|
3198
|
+
# query_(months=3)
|
3199
|
+
|
3200
|
+
# system = platform.system() # 本机系统
|
3201
|
+
# host_name = socket.gethostname() # 本机名
|
3202
|
+
# conf = myconfig.main()
|
3203
|
+
# db_list = conf[system][host_name]['mysql']['数据库集']
|
3204
|
+
# # 4. 清理聚合数据
|
3205
|
+
# optimize_data.op_data(
|
3206
|
+
# # db_name_lists=['聚合数据'],
|
3207
|
+
# db_name_lists=db_list,
|
3208
|
+
# days=3650,
|
3209
|
+
# is_mongo=False,
|
3210
|
+
# is_mysql=True,
|
3211
|
+
# )
|
3212
|
+
|
mdbq/mysql/mysql.py
CHANGED
@@ -17,23 +17,12 @@ from mdbq.config import myconfig
|
|
17
17
|
warnings.filterwarnings('ignore')
|
18
18
|
"""
|
19
19
|
建表流程:
|
20
|
-
|
21
|
-
1. 每个表手动上传一个文件建表
|
22
|
-
2. 全部建表完成,建议所有表的数据类型,有问题的在数据库修改
|
23
|
-
3. 清空所有数据表,仅保留列信息
|
24
|
-
4. 下载所有数据表的 dtypes 信息到 json 文件
|
25
|
-
5. 之后可以正常上传数据
|
20
|
+
|
26
21
|
|
27
22
|
建表规范:
|
28
23
|
1. 数据库和数据表名如果有字母,必须使用小写,大写在建库后会自动变小写,再次上传数据会找不到数据库(macos和linux都有这种情况)
|
29
24
|
2. 无论是数据库/表/列名还是值,尽量避免特殊字符或者表情符号,数据库/表/列名尽量都使用 `列名` 转义,避免错误
|
30
25
|
3. 小数必须使用 decimal, 禁止 float 和 double, 因为计算精度差异,后续需要聚合数据时会引发很多问题
|
31
|
-
4. 日期类型暂时全部用 DATETIME,使用 DATE 在后续可能会重复插入不能排重,因为 df 进来的数据, 日期是带时间的,而数据库中日期不含时间
|
32
|
-
5. 目前小数自动适配类型转换,对于文本或者大数全部用 mediumtext, 因为部分表涉及爬虫数据,进来的字符长度未知,暂时统一 mediumtext 避免入库失败
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
26
|
|
38
27
|
"""
|
39
28
|
|
@@ -127,7 +116,7 @@ class MysqlUpload:
|
|
127
116
|
return wrapper
|
128
117
|
|
129
118
|
@try_except
|
130
|
-
def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False):
|
119
|
+
def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
|
131
120
|
"""
|
132
121
|
插入字典数据
|
133
122
|
dict_data: 字典
|
@@ -142,6 +131,21 @@ class MysqlUpload:
|
|
142
131
|
main_key = []
|
143
132
|
if not unique_main_key:
|
144
133
|
unique_main_key = []
|
134
|
+
|
135
|
+
if cut_data:
|
136
|
+
if '日期' in dict_data.keys():
|
137
|
+
try:
|
138
|
+
__y = pd.to_datetime(dict_data['日期']).strftime('%Y')
|
139
|
+
__y_m = pd.to_datetime(dict_data['日期']).strftime('%Y-%m')
|
140
|
+
if str(cut_data).lower() == 'year':
|
141
|
+
table_name = f'{table_name}_{__y}'
|
142
|
+
elif str(cut_data).lower() == 'month':
|
143
|
+
table_name = f'{table_name}_{__y_m}'
|
144
|
+
else:
|
145
|
+
print(f'参数不正确,cut_data应为 year 或 month ')
|
146
|
+
except Exception as e:
|
147
|
+
print(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
148
|
+
|
145
149
|
connection = pymysql.connect(**self.config) # 连接数据库
|
146
150
|
with connection.cursor() as cursor:
|
147
151
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
@@ -201,9 +205,9 @@ class MysqlUpload:
|
|
201
205
|
cursor.execute(sql)
|
202
206
|
if col in unique_main_key:
|
203
207
|
if dtypes[col] == 'mediumtext':
|
204
|
-
sql = f"ALTER TABLE {table_name} ADD UNIQUE (`{col}`({index_length}))"
|
208
|
+
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`({index_length}))"
|
205
209
|
else:
|
206
|
-
sql = f"ALTER TABLE {table_name} ADD UNIQUE (`{col}`)"
|
210
|
+
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
|
207
211
|
cursor.execute(sql)
|
208
212
|
# if col in main_key or col in unique_main_key:
|
209
213
|
# sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
|
@@ -438,7 +442,7 @@ class MysqlUpload:
|
|
438
442
|
|
439
443
|
@try_except
|
440
444
|
def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False, drop_duplicates=False,
|
441
|
-
filename=None, count=None, reset_id=False, allow_not_null=False):
|
445
|
+
filename=None, count=None, reset_id=False, allow_not_null=False, cut_data=None):
|
442
446
|
"""
|
443
447
|
db_name: 数据库名
|
444
448
|
table_name: 表名
|
@@ -464,6 +468,20 @@ class MysqlUpload:
|
|
464
468
|
print(f'{db_name} 不能为 None')
|
465
469
|
return
|
466
470
|
|
471
|
+
if cut_data:
|
472
|
+
if '日期' in df.columns.tolist():
|
473
|
+
try:
|
474
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
475
|
+
min_year = df['日期'].min(skipna=True).year
|
476
|
+
min_month = df['日期'].min(skipna=True).month
|
477
|
+
if str(cut_data).lower() == 'year':
|
478
|
+
table_name = f'{table_name}_{min_year}'
|
479
|
+
elif str(cut_data).lower() == 'month':
|
480
|
+
table_name = f'{table_name}_{min_year}_{min_month}'
|
481
|
+
else:
|
482
|
+
print(f'参数不正确,cut_data应为 year 或 month ')
|
483
|
+
except Exception as e:
|
484
|
+
print(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
467
485
|
# 清理 dataframe 非法值,并转换获取数据类型
|
468
486
|
dtypes, df = self.convert_df_dtypes(df)
|
469
487
|
if set_typ:
|
@@ -584,13 +602,13 @@ class MysqlUpload:
|
|
584
602
|
if reset_id:
|
585
603
|
pass
|
586
604
|
# try:
|
587
|
-
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
605
|
+
# cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
588
606
|
# result = cursor.fetchone()
|
589
607
|
# if result:
|
590
|
-
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
608
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
591
609
|
# cursor.execute(
|
592
|
-
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
593
|
-
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
610
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
611
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
594
612
|
# except Exception as e:
|
595
613
|
# print(f'{e}')
|
596
614
|
# connection.rollback()
|
@@ -689,13 +707,13 @@ class MysqlUpload:
|
|
689
707
|
|
690
708
|
# # 6. 重置自增列
|
691
709
|
# try:
|
692
|
-
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
710
|
+
# cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
693
711
|
# result = cursor.fetchone()
|
694
712
|
# if result:
|
695
|
-
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
713
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
696
714
|
# cursor.execute(
|
697
|
-
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
698
|
-
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
715
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
716
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
699
717
|
# except Exception as e:
|
700
718
|
# print(f'{table_name}, -> {e}')
|
701
719
|
# connection.rollback()
|
@@ -732,7 +750,7 @@ class MysqlUpload:
|
|
732
750
|
try:
|
733
751
|
with connection.cursor() as cursor:
|
734
752
|
# 获取指定日期范围的数据
|
735
|
-
sql = f"SELECT * FROM {db_name}
|
753
|
+
sql = f"SELECT * FROM `{db_name}`.`{table_name}` WHERE `{date_name}` BETWEEN '%s' AND '%s'" % (start_date, end_date)
|
736
754
|
cursor.execute(sql)
|
737
755
|
rows = cursor.fetchall() # 获取查询结果
|
738
756
|
columns = [desc[0] for desc in cursor.description]
|
@@ -892,7 +910,8 @@ class OptimizeDatas:
|
|
892
910
|
self.config.update({'database': self.db_name}) # 添加更新 config 字段
|
893
911
|
self.connection = pymysql.connect(**self.config)
|
894
912
|
with self.connection.cursor() as cursor:
|
895
|
-
sql = f"SELECT 1 FROM {table_name} LIMIT 1"
|
913
|
+
sql = f"SELECT 1 FROM `{table_name}` LIMIT 1"
|
914
|
+
# print(sql)
|
896
915
|
cursor.execute(sql)
|
897
916
|
result = cursor.fetchone()
|
898
917
|
if not result:
|
@@ -900,7 +919,7 @@ class OptimizeDatas:
|
|
900
919
|
print(f'{now}数据表: {table_name}, 数据长度为 0')
|
901
920
|
continue # 检查数据表是否为空
|
902
921
|
|
903
|
-
cursor.execute(f"SHOW FULL COLUMNS FROM {table_name}") # 查询数据表的列信息
|
922
|
+
cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
|
904
923
|
columns = cursor.fetchall()
|
905
924
|
date_exist = False
|
906
925
|
for col in columns: # 遍历列信息,检查是否存在类型为日期的列
|
@@ -908,8 +927,8 @@ class OptimizeDatas:
|
|
908
927
|
date_exist = True
|
909
928
|
break
|
910
929
|
if date_exist: # 存在日期列
|
911
|
-
sql_max = f"SELECT MAX(日期) AS max_date FROM {table_name}"
|
912
|
-
sql_min = f"SELECT MIN(日期) AS min_date FROM {table_name}"
|
930
|
+
sql_max = f"SELECT MAX(日期) AS max_date FROM `{table_name}`"
|
931
|
+
sql_min = f"SELECT MIN(日期) AS min_date FROM `{table_name}`"
|
913
932
|
cursor.execute(sql_max)
|
914
933
|
max_result = cursor.fetchone()
|
915
934
|
cursor.execute(sql_min)
|
@@ -931,13 +950,13 @@ class OptimizeDatas:
|
|
931
950
|
|
932
951
|
# # 5. 重置自增列 (id 列)
|
933
952
|
# try:
|
934
|
-
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
953
|
+
# cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
935
954
|
# result = cursor.fetchone()
|
936
955
|
# if result:
|
937
|
-
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
956
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
938
957
|
# cursor.execute(
|
939
|
-
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
940
|
-
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
958
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
959
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
941
960
|
# except Exception as e:
|
942
961
|
# print(f'{e}')
|
943
962
|
# self.connection.rollback()
|
@@ -974,7 +993,7 @@ class OptimizeDatas:
|
|
974
993
|
with self.connection.cursor() as cursor:
|
975
994
|
placeholders = ', '.join(['%s'] * len(duplicate_id))
|
976
995
|
# 移除冗余数据
|
977
|
-
sql = f"DELETE FROM {table_name} WHERE id IN ({placeholders})"
|
996
|
+
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
978
997
|
cursor.execute(sql, duplicate_id)
|
979
998
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
980
999
|
print(f"{now}{table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
|
@@ -985,7 +1004,7 @@ class OptimizeDatas:
|
|
985
1004
|
|
986
1005
|
def delete_duplicate2(self, table_name, except_key=['更新时间']):
|
987
1006
|
with self.connection.cursor() as cursor:
|
988
|
-
sql = f"SELECT * FROM {table_name}" # 如果不包含日期列,则获取全部数据
|
1007
|
+
sql = f"SELECT * FROM `{table_name}`" # 如果不包含日期列,则获取全部数据
|
989
1008
|
cursor.execute(sql)
|
990
1009
|
datas = cursor.fetchall()
|
991
1010
|
if not datas:
|
@@ -1012,7 +1031,7 @@ class OptimizeDatas:
|
|
1012
1031
|
with self.connection.cursor() as cursor:
|
1013
1032
|
placeholders = ', '.join(['%s'] * len(duplicate_id))
|
1014
1033
|
# 移除冗余数据
|
1015
|
-
sql = f"DELETE FROM {table_name} WHERE id IN ({placeholders})"
|
1034
|
+
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
1016
1035
|
cursor.execute(sql, duplicate_id)
|
1017
1036
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
1018
1037
|
print(f"{now}{table_name} -> before: {len(datas)}, "
|
@@ -1064,7 +1083,7 @@ class OptimizeDatas:
|
|
1064
1083
|
connection = pymysql.connect(**self.config)
|
1065
1084
|
try:
|
1066
1085
|
with connection.cursor() as cursor:
|
1067
|
-
sql = f"SELECT * FROM {table_name} WHERE {'日期'} BETWEEN '%s' AND '%s'" % (date, date)
|
1086
|
+
sql = f"SELECT * FROM `{table_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (date, date)
|
1068
1087
|
cursor.execute(sql)
|
1069
1088
|
results = cursor.fetchall()
|
1070
1089
|
except Exception as e:
|
@@ -1096,14 +1115,14 @@ class OptimizeDatas:
|
|
1096
1115
|
self.config.update({'database': self.db_name}) # 添加更新 config 字段
|
1097
1116
|
self.connection = pymysql.connect(**self.config)
|
1098
1117
|
with self.connection.cursor() as cursor:
|
1099
|
-
cursor.execute(f"SHOW FULL COLUMNS FROM {table_name}") # 查询数据表的列信息
|
1118
|
+
cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
|
1100
1119
|
columns = cursor.fetchall()
|
1101
1120
|
columns = [{column['Field']: column['Type']} for column in columns]
|
1102
1121
|
for column in columns:
|
1103
1122
|
for key, value in column.items():
|
1104
1123
|
if key.endswith('_'):
|
1105
1124
|
new_name = re.sub(r'_+$', '', key)
|
1106
|
-
sql = f"ALTER TABLE {table_name} CHANGE COLUMN {key} {new_name} {value}"
|
1125
|
+
sql = f"ALTER TABLE `{table_name}` CHANGE COLUMN {key} {new_name} {value}"
|
1107
1126
|
cursor.execute(sql)
|
1108
1127
|
self.connection.commit()
|
1109
1128
|
if self.connection:
|
mdbq/mysql/s_query.py
CHANGED
@@ -60,7 +60,7 @@ class QueryDatas:
|
|
60
60
|
|
61
61
|
self.config.update({'database': db_name})
|
62
62
|
connection = pymysql.connect(**self.config) # 重新连接数据库
|
63
|
-
|
63
|
+
|
64
64
|
with connection.cursor() as cursor:
|
65
65
|
# 3. 获取数据表的所有列信息
|
66
66
|
sql = 'SELECT `COLUMN_NAME` FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
@@ -98,18 +98,21 @@ class QueryDatas:
|
|
98
98
|
rows = cursor.fetchall() # 获取查询结果
|
99
99
|
columns = [desc[0] for desc in cursor.description]
|
100
100
|
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
101
|
+
if 'id' in df.columns.tolist():
|
102
|
+
df.pop('id') # 默认不返回 id 列
|
103
|
+
if len(df) == 0:
|
104
|
+
print(f'database: {db_name}, table: {table_name} 查询的数据为空1')
|
105
105
|
connection.close()
|
106
|
+
return df
|
106
107
|
|
107
|
-
if len(df) == 0:
|
108
|
-
|
109
|
-
|
110
|
-
cv = converter.DataFrameConverter()
|
111
|
-
df = cv.convert_df_cols(df)
|
112
|
-
|
108
|
+
# if len(df) == 0:
|
109
|
+
# print(f'database: {db_name}, table: {table_name} 查询的数据为空2')
|
110
|
+
# return pd.DataFrame()
|
111
|
+
# cv = converter.DataFrameConverter()
|
112
|
+
# df = cv.convert_df_cols(df)
|
113
|
+
# if 'id' in df.columns.tolist():
|
114
|
+
# df.pop('id') # 默认不返回 id 列
|
115
|
+
# return df
|
113
116
|
|
114
117
|
def columns_to_list(self, db_name, table_name, columns_name) -> list:
|
115
118
|
"""
|
@@ -1,11 +1,11 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=ltYntRjxeN9YX1uTR2_zoYEik2PVPD70xAF98d4TcAo,74732
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=E1Pngmf2zp7j2dVe7i1llIEtZxa9jtfB1A5deyfIaJU,143896
|
9
9
|
mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
|
10
10
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
11
11
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
@@ -27,9 +27,9 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
27
27
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
28
28
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
29
29
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
30
|
-
mdbq/mysql/mysql.py,sha256=
|
30
|
+
mdbq/mysql/mysql.py,sha256=Q4j0CTVnMXlrfFRd0yehcxNOMqLYztlx-848EIIet08,62681
|
31
31
|
mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
|
32
|
-
mdbq/mysql/s_query.py,sha256=
|
32
|
+
mdbq/mysql/s_query.py,sha256=MbIprZ4yJDAZ9AahZPzl7hqS695Vs0P-AJNwAtA_EEc,9287
|
33
33
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
34
34
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
35
35
|
mdbq/other/download_sku_picture.py,sha256=tlGh3oApJyH1vNva2PsMA-mdwl13tHdyLIOLO1FOyfo,45826
|
@@ -45,7 +45,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
45
45
|
mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
|
46
46
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
47
47
|
mdbq/spider/aikucun.py,sha256=48isoL6nEi_uniV-ja2HwYAI7O8D1i1goO4SzBwDGSU,19036
|
48
|
-
mdbq-3.
|
49
|
-
mdbq-3.
|
50
|
-
mdbq-3.
|
51
|
-
mdbq-3.
|
48
|
+
mdbq-3.2.0.dist-info/METADATA,sha256=1QsIsKB3o07nr8hpmMy4FLHSOoiskMwtGD0aRoaDThA,243
|
49
|
+
mdbq-3.2.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
50
|
+
mdbq-3.2.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
51
|
+
mdbq-3.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|