mdbq 3.1.8__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1213,38 +1213,80 @@ def one_file_to_mysql(file, db_name, table_name):
1213
1213
  )
1214
1214
 
1215
1215
 
1216
- def test():
1217
- path = r'/Users/xigua/Downloads/京准通'
1218
-
1216
+ def cut_as_year_month(as_month=False):
1217
+ """
1218
+ 将表格数据按年划分
1219
+ as_month: true 按月重新保存
1220
+ """
1221
+ file_name = '达摩盘_人群报表'
1222
+ path = r'/Users/xigua/Downloads/数据库导出'
1219
1223
 
1220
1224
  for root, dirs, files in os.walk(path, topdown=False):
1221
1225
  for name in files:
1222
1226
  if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
1223
- # df = pd.read_excel(os.path.join(root, name), header=0)
1224
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1225
-
1226
- # print(name)
1227
- if len(df) == 0:
1227
+ pattern = re.findall(r'\d{4}.csv|\d{4}-\d{2}.csv', name)
1228
+ if pattern:
1228
1229
  continue
1229
- df['类目ID'] = df['类目ID'].apply(lambda x: 0 if str(x) == 'null' else x)
1230
- df['类目名称'] = df['类目ID'].apply(lambda x: 0 if str(x) == 'null' else x)
1231
- df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
1230
+ if file_name not in name:
1231
+ continue
1232
+ # df = pd.read_excel(os.path.join(root, name), header=0)
1233
+ df_before = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1234
+ df_before['日期'] = pd.to_datetime(df_before['日期'], format='%Y-%m-%d', errors='ignore')
1235
+ max_date = df_before['日期'].max(skipna=True).year
1236
+ min_date = df_before['日期'].min(skipna=True).year
1237
+ for year in range(min_date, max_date+1):
1238
+ df = df_before[(df_before['日期'] >= f'{year}-01-01') & (df_before['日期'] <= f'{year}-12-31')]
1239
+ if as_month:
1240
+ for month in range(1, 13):
1241
+ if month < 10:
1242
+ month = f'0{month}'
1243
+ for n in range(31, 27, -1):
1244
+ try:
1245
+ end_day = pd.to_datetime(f'{year}-{month}-{n}')
1246
+ break
1247
+ except:
1248
+ continue
1249
+ st_day = pd.to_datetime(f'{year}-{month}-01')
1250
+ df_month = df[(df['日期'] >= st_day) & (df['日期'] <= end_day)]
1251
+ if len(df_month) == 0:
1252
+ continue
1253
+ df_month.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
1254
+ df_month = df_month.reset_index(drop=True)
1255
+ df_month = df_month.reset_index(drop=False)
1256
+ df_month.pop('id')
1257
+ df_month.rename(columns={'index': 'id'}, inplace=True)
1258
+ df_month['id'] = df_month['id'].apply(lambda x: x + 1)
1259
+ new_name = f'{os.path.splitext(name)[0]}_{year}_{month}.csv'
1260
+ print(new_name)
1261
+ df_month.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
1262
+ else:
1263
+ df.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
1264
+ df = df.reset_index(drop=True)
1265
+ df = df.reset_index(drop=False)
1266
+ df.pop('id')
1267
+ df.rename(columns={'index': 'id'}, inplace=True)
1268
+ df['id'] = df['id'].apply(lambda x: x + 1)
1269
+ new_name = f'{os.path.splitext(name)[0]}_{year}.csv'
1270
+ print(new_name)
1271
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
1232
1272
 
1233
1273
 
1234
1274
  if __name__ == '__main__':
1275
+ cut_as_year_month(as_month=False)
1276
+
1235
1277
  # username = 'root'
1236
1278
  # password = ''
1237
1279
  # host = ''
1238
1280
  # port = ''
1239
1281
  #
1240
- # 上传 1 个文件到数据库
1241
- one_file_to_mysql(
1242
- file=r'/Users/xigua/Downloads/批量SKU导出-批量任务.xlsx',
1243
- db_name='属性设置3',
1244
- table_name='京东商品属性',
1245
- )
1282
+ # # 上传 1 个文件到数据库
1283
+ # one_file_to_mysql(
1284
+ # file=r'/Users/xigua/Downloads/批量SKU导出-批量任务.xlsx',
1285
+ # db_name='属性设置3',
1286
+ # table_name='京东商品属性',
1287
+ # )
1288
+
1246
1289
 
1247
- # test()
1248
1290
  # col = 1
1249
1291
  # if col:
1250
1292
  # # 上传一个目录到指定数据库
@@ -85,13 +85,17 @@ class MysqlDatasQuery:
85
85
  '直接成交金额': 1,
86
86
  '店铺名称': 1,
87
87
  }
88
- df = self.download.data_to_df(
89
- db_name='推广数据2',
90
- table_name='主体报表',
91
- start_date=start_date,
92
- end_date=end_date,
93
- projection=projection,
94
- )
88
+ __res = []
89
+ for year in range(2024, datetime.datetime.today().year+1):
90
+ df = self.download.data_to_df(
91
+ db_name='推广数据2',
92
+ table_name=f'主体报表_{year}',
93
+ start_date=start_date,
94
+ end_date=end_date,
95
+ projection=projection,
96
+ )
97
+ __res.append(df)
98
+ df = pd.concat(__res, ignore_index=True)
95
99
  df.rename(columns={
96
100
  '场景名字': '营销场景',
97
101
  '主体id': '商品id',
@@ -233,7 +237,6 @@ class MysqlDatasQuery:
233
237
  filename=None, # 用来追踪处理进度
234
238
  reset_id=False, # 是否重置自增列
235
239
  set_typ=set_typ,
236
-
237
240
  )
238
241
  return True
239
242
 
@@ -254,13 +257,17 @@ class MysqlDatasQuery:
254
257
  '退货量_发货后': 1,
255
258
  '店铺名称': 1,
256
259
  }
257
- df = self.download.data_to_df(
258
- db_name='生意经3',
259
- table_name='宝贝指标',
260
- start_date=start_date,
261
- end_date=end_date,
262
- projection=projection,
263
- )
260
+ __res = []
261
+ for year in range(2024, datetime.datetime.today().year + 1):
262
+ df = self.download.data_to_df(
263
+ db_name='生意经3',
264
+ table_name=f'宝贝指标_{year}',
265
+ start_date=start_date,
266
+ end_date=end_date,
267
+ projection=projection,
268
+ )
269
+ __res.append(df)
270
+ df = pd.concat(__res, ignore_index=True)
264
271
  df['宝贝id'] = df['宝贝id'].astype(str)
265
272
  df = df.groupby(['日期', '店铺名称', '宝贝id', '行业类目'], as_index=False).agg(
266
273
  **{
@@ -322,7 +329,6 @@ class MysqlDatasQuery:
322
329
  filename=None, # 用来追踪处理进度
323
330
  reset_id=False, # 是否重置自增列
324
331
  set_typ=set_typ,
325
-
326
332
  )
327
333
  return True
328
334
 
@@ -344,13 +350,17 @@ class MysqlDatasQuery:
344
350
  '人群名字': 1,
345
351
  '店铺名称': 1,
346
352
  }
347
- df = self.download.data_to_df(
348
- db_name='推广数据2',
349
- table_name='人群报表',
350
- start_date=start_date,
351
- end_date=end_date,
352
- projection=projection,
353
- )
353
+ __res = []
354
+ for year in range(2024, datetime.datetime.today().year + 1):
355
+ df = self.download.data_to_df(
356
+ db_name='推广数据2',
357
+ table_name=f'人群报表_{year}',
358
+ start_date=start_date,
359
+ end_date=end_date,
360
+ projection=projection,
361
+ )
362
+ __res.append(df)
363
+ df = pd.concat(__res, ignore_index=True)
354
364
  df.rename(columns={
355
365
  '场景名字': '营销场景',
356
366
  '主体id': '商品id',
@@ -509,7 +519,6 @@ class MysqlDatasQuery:
509
519
  filename=None, # 用来追踪处理进度
510
520
  reset_id=False, # 是否重置自增列
511
521
  set_typ=set_typ,
512
-
513
522
  )
514
523
  return True
515
524
 
@@ -532,13 +541,17 @@ class MysqlDatasQuery:
532
541
  '直接成交金额': 1,
533
542
  '店铺名称': 1,
534
543
  }
535
- df = self.download.data_to_df(
536
- db_name='推广数据2',
537
- table_name='关键词报表',
538
- start_date=start_date,
539
- end_date=end_date,
540
- projection=projection,
541
- )
544
+ __res = []
545
+ for year in range(2024, datetime.datetime.today().year + 1):
546
+ df = self.download.data_to_df(
547
+ db_name='推广数据2',
548
+ table_name=f'关键词报表_{year}',
549
+ start_date=start_date,
550
+ end_date=end_date,
551
+ projection=projection,
552
+ )
553
+ __res.append(df)
554
+ df = pd.concat(__res, ignore_index=True)
542
555
  df.rename(columns={
543
556
  '场景名字': '营销场景',
544
557
  '宝贝id': '商品id',
@@ -642,7 +655,6 @@ class MysqlDatasQuery:
642
655
  filename=None, # 用来追踪处理进度
643
656
  reset_id=False, # 是否重置自增列
644
657
  set_typ=set_typ,
645
-
646
658
  )
647
659
  return True
648
660
 
@@ -666,13 +678,17 @@ class MysqlDatasQuery:
666
678
  '直接成交金额': 1,
667
679
  '店铺名称': 1,
668
680
  }
669
- df = self.download.data_to_df(
670
- db_name='推广数据2',
671
- table_name='超级直播报表_人群',
672
- start_date=start_date,
673
- end_date=end_date,
674
- projection=projection,
675
- )
681
+ __res = []
682
+ for year in range(2024, datetime.datetime.today().year + 1):
683
+ df = self.download.data_to_df(
684
+ db_name='推广数据2',
685
+ table_name=f'超级直播报表_人群_{year}',
686
+ start_date=start_date,
687
+ end_date=end_date,
688
+ projection=projection,
689
+ )
690
+ __res.append(df)
691
+ df = pd.concat(__res, ignore_index=True)
676
692
  df.rename(columns={
677
693
  '观看次数': '观看次数',
678
694
  '总购物车数': '加购量',
@@ -763,7 +779,6 @@ class MysqlDatasQuery:
763
779
  filename=None, # 用来追踪处理进度
764
780
  reset_id=False, # 是否重置自增列
765
781
  set_typ=set_typ,
766
-
767
782
  )
768
783
  return True
769
784
 
@@ -785,13 +800,17 @@ class MysqlDatasQuery:
785
800
  # '成交访客数': 1
786
801
  '店铺名称': 1,
787
802
  }
788
- df = self.download.data_to_df(
789
- db_name='推广数据2',
790
- table_name='品销宝',
791
- start_date=start_date,
792
- end_date=end_date,
793
- projection=projection,
794
- )
803
+ __res = []
804
+ for year in range(2024, datetime.datetime.today().year + 1):
805
+ df = self.download.data_to_df(
806
+ db_name='推广数据2',
807
+ table_name=f'品销宝_{year}',
808
+ start_date=start_date,
809
+ end_date=end_date,
810
+ projection=projection,
811
+ )
812
+ __res.append(df)
813
+ df = pd.concat(__res, ignore_index=True)
795
814
  df = df[df['报表类型'] == '账户']
796
815
  df.fillna(value=0, inplace=True)
797
816
  df.rename(columns={
@@ -873,18 +892,20 @@ class MysqlDatasQuery:
873
892
  filename=None, # 用来追踪处理进度
874
893
  reset_id=False, # 是否重置自增列
875
894
  set_typ=set_typ,
876
-
877
895
  )
878
896
  return True
879
897
 
880
898
  @try_except
881
899
  def idbm(self, db_name='聚合数据', table_name='商品id编码表'):
882
900
  """ 用生意经日数据制作商品 id 和编码对照表 """
883
- data_values = self.download.columns_to_list(
884
- db_name='生意经3',
885
- table_name='宝贝指标',
886
- columns_name=['宝贝id', '商家编码', '行业类目'],
887
- )
901
+ year = datetime.datetime.today().year
902
+ data_values = []
903
+ for year in range(2022, year+1):
904
+ data_values += self.download.columns_to_list(
905
+ db_name='生意经3',
906
+ table_name=f'宝贝指标_{year}',
907
+ columns_name=['宝贝id', '商家编码', '行业类目'],
908
+ )
888
909
  df = pd.DataFrame(data=data_values)
889
910
  df['宝贝id'] = df['宝贝id'].astype(str)
890
911
  df.drop_duplicates(subset='宝贝id', keep='last', inplace=True, ignore_index=True)
@@ -898,7 +919,7 @@ class MysqlDatasQuery:
898
919
  df['一级类目'] = df['行业类目']
899
920
  df.drop('行业类目', axis=1, inplace=True)
900
921
  df.sort_values('宝贝id', ascending=False, inplace=True)
901
- df = df[(df['宝贝id'] != '973') & (df['宝贝id'] != '973')]
922
+ df = df[(df['宝贝id'] != '973') & (df['宝贝id'] != 973) & (df['宝贝id'] != '0')]
902
923
  set_typ = {
903
924
  '宝贝id': 'bigint',
904
925
  '商家编码': 'varchar(100)',
@@ -928,7 +949,6 @@ class MysqlDatasQuery:
928
949
  filename=None, # 用来追踪处理进度
929
950
  reset_id=False, # 是否重置自增列
930
951
  set_typ=set_typ,
931
-
932
952
  )
933
953
  return True
934
954
 
@@ -1011,13 +1031,17 @@ class MysqlDatasQuery:
1011
1031
  '下单买家数': 1,
1012
1032
  '关注店铺人数': 1,
1013
1033
  }
1014
- df = self.download.data_to_df(
1015
- db_name='生意参谋3',
1016
- table_name='店铺流量来源构成',
1017
- start_date=start_date,
1018
- end_date=end_date,
1019
- projection=projection,
1020
- )
1034
+ __res = []
1035
+ for year in range(2024, datetime.datetime.today().year+1):
1036
+ df = self.download.data_to_df(
1037
+ db_name='生意参谋3',
1038
+ table_name=f'店铺流量来源构成_{year}',
1039
+ start_date=start_date,
1040
+ end_date=end_date,
1041
+ projection=projection,
1042
+ )
1043
+ __res.append(df)
1044
+ df = pd.concat(__res, ignore_index=True)
1021
1045
  df.drop_duplicates(subset=['日期', '店铺名称', '类别', '来源构成', '一级来源', '二级来源', '三级来源', '访客数'], keep='last', inplace=True, ignore_index=True)
1022
1046
  # 包含三级来源名称和预设索引值列
1023
1047
  # 截取 从上月1日 至 今天的花费数据, 推广款式按此数据从高到低排序(商品图+排序)
@@ -1162,13 +1186,17 @@ class MysqlDatasQuery:
1162
1186
  'spu_id': 1,
1163
1187
  '店铺名称':1,
1164
1188
  }
1165
- df = self.download.data_to_df(
1166
- db_name='京东数据3',
1167
- table_name='推广数据_京准通',
1168
- start_date=start_date,
1169
- end_date=end_date,
1170
- projection=projection,
1171
- )
1189
+ __res = []
1190
+ for year in range(2024, datetime.datetime.today().year + 1):
1191
+ df = self.download.data_to_df(
1192
+ db_name='京东数据3',
1193
+ table_name=f'推广数据_京准通_{year}',
1194
+ start_date=start_date,
1195
+ end_date=end_date,
1196
+ projection=projection,
1197
+ )
1198
+ __res.append(df)
1199
+ df = pd.concat(__res, ignore_index=True)
1172
1200
  df = df.groupby(
1173
1201
  ['日期', '店铺名称', '产品线', '触发sku_id', '跟单sku_id', 'spu_id', '花费', '展现数', '点击数'],
1174
1202
  as_index=False).agg(
@@ -1373,13 +1401,17 @@ class MysqlDatasQuery:
1373
1401
  '商品关注数': 1,
1374
1402
  '店铺关注数': 1,
1375
1403
  }
1376
- df = self.download.data_to_df(
1377
- db_name='京东数据3',
1378
- table_name='推广数据_关键词报表',
1379
- start_date=start_date,
1380
- end_date=end_date,
1381
- projection=projection,
1382
- )
1404
+ __res = []
1405
+ for year in range(2024, datetime.datetime.today().year + 1):
1406
+ df = self.download.data_to_df(
1407
+ db_name='京东数据3',
1408
+ table_name=f'推广数据_关键词报表_{year}',
1409
+ start_date=start_date,
1410
+ end_date=end_date,
1411
+ projection=projection,
1412
+ )
1413
+ __res.append(df)
1414
+ df = pd.concat(__res, ignore_index=True)
1383
1415
  df_lin = df[['计划id', '推广计划']]
1384
1416
  df_lin.drop_duplicates(subset=['计划id'], keep='last', inplace=True, ignore_index=True)
1385
1417
  df = df.groupby(
@@ -1461,13 +1493,17 @@ class MysqlDatasQuery:
1461
1493
  '加购商品件数': 1,
1462
1494
  '加购人数': 1,
1463
1495
  }
1464
- df = self.download.data_to_df(
1465
- db_name='京东数据3',
1466
- table_name='京东商智_sku_商品明细',
1467
- start_date=start_date,
1468
- end_date=end_date,
1469
- projection=projection,
1470
- )
1496
+ __res = []
1497
+ for year in range(2024, datetime.datetime.today().year + 1):
1498
+ df = self.download.data_to_df(
1499
+ db_name='京东数据3',
1500
+ table_name=f'京东商智_sku_商品明细_{year}',
1501
+ start_date=start_date,
1502
+ end_date=end_date,
1503
+ projection=projection,
1504
+ )
1505
+ __res.append(df)
1506
+ df = pd.concat(__res, ignore_index=True)
1471
1507
  df = df[df['商品id'] != '合计']
1472
1508
  df = df.groupby(['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'],
1473
1509
  as_index=False).agg(
@@ -1531,13 +1567,17 @@ class MysqlDatasQuery:
1531
1567
  '加购商品件数': 1,
1532
1568
  '加购人数': 1,
1533
1569
  }
1534
- df = self.download.data_to_df(
1535
- db_name='京东数据3',
1536
- table_name='京东商智_spu_商品明细',
1537
- start_date=start_date,
1538
- end_date=end_date,
1539
- projection=projection,
1540
- )
1570
+ __res = []
1571
+ for year in range(2024, datetime.datetime.today().year + 1):
1572
+ df = self.download.data_to_df(
1573
+ db_name='京东数据3',
1574
+ table_name=f'京东商智_spu_商品明细_{year}',
1575
+ start_date=start_date,
1576
+ end_date=end_date,
1577
+ projection=projection,
1578
+ )
1579
+ __res.append(df)
1580
+ df = pd.concat(__res, ignore_index=True)
1541
1581
  df = df[df['商品id'] != '合计']
1542
1582
  df = df.groupby(['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'],
1543
1583
  as_index=False).agg(
@@ -1606,13 +1646,17 @@ class MysqlDatasQuery:
1606
1646
  '客单价': 1,
1607
1647
  'uv价值': 1,
1608
1648
  }
1609
- df = self.download.data_to_df(
1610
- db_name='生意参谋3',
1611
- table_name='手淘搜索_本店引流词',
1612
- start_date=start_date,
1613
- end_date=end_date,
1614
- projection=projection,
1615
- )
1649
+ __res = []
1650
+ for year in range(2024, datetime.datetime.today().year+1):
1651
+ df = self.download.data_to_df(
1652
+ db_name='生意参谋3',
1653
+ table_name=f'手淘搜索_本店引流词_{year}',
1654
+ start_date=start_date,
1655
+ end_date=end_date,
1656
+ projection=projection,
1657
+ )
1658
+ __res.append(df)
1659
+ df = pd.concat(__res, ignore_index=True)
1616
1660
  df = df.groupby(
1617
1661
  ['日期', '店铺名称', '词类型', '搜索词'],
1618
1662
  as_index=False).agg(
@@ -1823,13 +1867,17 @@ class MysqlDatasQuery:
1823
1867
  '总成交金额': 1,
1824
1868
  '店铺名称': 1,
1825
1869
  }
1826
- df_tm = self.download.data_to_df(
1827
- db_name='推广数据2',
1828
- table_name='营销场景报表',
1829
- start_date=start_date,
1830
- end_date=end_date,
1831
- projection=projection,
1832
- )
1870
+ __res = []
1871
+ for year in range(2024, datetime.datetime.today().year + 1):
1872
+ df_tm = self.download.data_to_df(
1873
+ db_name='推广数据2',
1874
+ table_name=f'营销场景报表_{year}',
1875
+ start_date=start_date,
1876
+ end_date=end_date,
1877
+ projection=projection,
1878
+ )
1879
+ __res.append(df_tm)
1880
+ df_tm = pd.concat(__res, ignore_index=True)
1833
1881
  if len(df_tm) > 0:
1834
1882
  df_tm.rename(columns={'场景名字': '营销场景'}, inplace=True)
1835
1883
  df_tm = df_tm.groupby(
@@ -1843,14 +1891,17 @@ class MysqlDatasQuery:
1843
1891
  '成交金额': ('总成交金额', np.max)
1844
1892
  }
1845
1893
  )
1846
-
1847
- df_tb = self.download.data_to_df(
1848
- db_name='推广数据_淘宝店',
1849
- table_name='营销场景报表',
1850
- start_date=start_date,
1851
- end_date=end_date,
1852
- projection=projection,
1853
- )
1894
+ __res = []
1895
+ for year in range(2024, datetime.datetime.today().year + 1):
1896
+ df_tb = self.download.data_to_df(
1897
+ db_name='推广数据_淘宝店',
1898
+ table_name=f'营销场景报表_{year}',
1899
+ start_date=start_date,
1900
+ end_date=end_date,
1901
+ projection=projection,
1902
+ )
1903
+ __res.append(df_tb)
1904
+ df_tb = pd.concat(__res, ignore_index=True)
1854
1905
  if len(df_tb) > 0:
1855
1906
  df_tb.rename(columns={'场景名字': '营销场景'}, inplace=True)
1856
1907
  df_tb = df_tb.groupby(
@@ -1879,13 +1930,17 @@ class MysqlDatasQuery:
1879
1930
  '总成交金额': 1,
1880
1931
  '店铺名称': 1,
1881
1932
  }
1882
- df_tb_qzt = self.download.data_to_df(
1883
- db_name='推广数据_淘宝店',
1884
- table_name='全站推广报表',
1885
- start_date=start_date,
1886
- end_date=end_date,
1887
- projection=projection,
1888
- )
1933
+ __res = []
1934
+ for year in range(2024, datetime.datetime.today().year + 1):
1935
+ df_tb_qzt = self.download.data_to_df(
1936
+ db_name='推广数据_淘宝店',
1937
+ table_name=f'全站推广报表_{year}',
1938
+ start_date=start_date,
1939
+ end_date=end_date,
1940
+ projection=projection,
1941
+ )
1942
+ __res.append(df_tb_qzt)
1943
+ df_tb_qzt = pd.concat(__res, ignore_index=True)
1889
1944
  if len(df_tb_qzt) > 0:
1890
1945
  # 这一步是排重
1891
1946
  df_tb_qzt = df_tb_qzt.groupby(
@@ -1925,13 +1980,17 @@ class MysqlDatasQuery:
1925
1980
  '成交金额': 1,
1926
1981
  '店铺名称': 1,
1927
1982
  }
1928
- df_tm_pxb = self.download.data_to_df(
1929
- db_name='推广数据2',
1930
- table_name='品销宝',
1931
- start_date=start_date,
1932
- end_date=end_date,
1933
- projection=projection,
1934
- )
1983
+ __res = []
1984
+ for year in range(2024, datetime.datetime.today().year + 1):
1985
+ df_tm_pxb = self.download.data_to_df(
1986
+ db_name='推广数据2',
1987
+ table_name=f'品销宝_{year}',
1988
+ start_date=start_date,
1989
+ end_date=end_date,
1990
+ projection=projection,
1991
+ )
1992
+ __res.append(df_tm_pxb)
1993
+ df_tm_pxb = pd.concat(__res, ignore_index=True)
1935
1994
  if len(df_tm_pxb) > 0:
1936
1995
  df_tm_pxb = df_tm_pxb[df_tm_pxb['报表类型'] == '账户']
1937
1996
  df_tm_pxb = df_tm_pxb.groupby(
@@ -1961,13 +2020,17 @@ class MysqlDatasQuery:
1961
2020
  '总成交金额': 1,
1962
2021
  '店铺名称': 1,
1963
2022
  }
1964
- df_tm_living = self.download.data_to_df(
1965
- db_name='推广数据2',
1966
- table_name='超级直播报表_人群',
1967
- start_date=start_date,
1968
- end_date=pd.to_datetime('2024-04-16'), # 只可以取此日期之前的数据
1969
- projection=projection,
1970
- )
2023
+ __res = []
2024
+ for year in range(2024, datetime.datetime.today().year + 1):
2025
+ df_tm_living = self.download.data_to_df(
2026
+ db_name='推广数据2',
2027
+ table_name=f'超级直播报表_人群_{year}',
2028
+ start_date=start_date,
2029
+ end_date=pd.to_datetime('2024-04-16'), # 只可以取此日期之前的数据
2030
+ projection=projection,
2031
+ )
2032
+ __res.append(df_tm_living)
2033
+ df_tm_living = pd.concat(__res, ignore_index=True)
1971
2034
  if len(df_tm_living) > 0:
1972
2035
  df_tm_living.rename(columns={'场景名字': '营销场景'}, inplace=True)
1973
2036
  df_tm_living = df_tm_living.groupby(
@@ -1999,13 +2062,17 @@ class MysqlDatasQuery:
1999
2062
  'spu_id': 1,
2000
2063
  '店铺名称': 1,
2001
2064
  }
2002
- df_jd = self.download.data_to_df(
2003
- db_name='京东数据3',
2004
- table_name='推广数据_京准通',
2005
- start_date=start_date,
2006
- end_date=end_date,
2007
- projection=projection,
2008
- )
2065
+ __res = []
2066
+ for year in range(2024, datetime.datetime.today().year + 1):
2067
+ df_jd = self.download.data_to_df(
2068
+ db_name='京东数据3',
2069
+ table_name=f'推广数据_京准通_{year}',
2070
+ start_date=start_date,
2071
+ end_date=end_date,
2072
+ projection=projection,
2073
+ )
2074
+ __res.append(df_jd)
2075
+ df_jd = pd.concat(__res, ignore_index=True)
2009
2076
  if len(df_jd) > 0:
2010
2077
  df_jd = df_jd.groupby(['日期', '店铺名称', '产品线', '触发sku_id', '跟单sku_id', 'spu_id', '花费', '展现数', '点击数'],
2011
2078
  as_index=False).agg(
@@ -2244,7 +2311,7 @@ class MysqlDatasQuery:
2244
2311
  )
2245
2312
  return True
2246
2313
 
2247
- @try_except
2314
+ # @try_except
2248
2315
  def dmp_crowd(self, db_name='聚合数据', table_name='达摩盘_人群报表'):
2249
2316
  start_date, end_date = self.months_data(num=self.months)
2250
2317
  projection = {
@@ -2267,14 +2334,19 @@ class MysqlDatasQuery:
2267
2334
  df_crowd.drop_duplicates(subset=['人群id',], keep='last', inplace=True, ignore_index=True)
2268
2335
  df_crowd.pop('日期')
2269
2336
  df_crowd = df_crowd.astype({'人群id': 'int64'}, errors='ignore')
2337
+
2270
2338
  projection = {}
2271
- df_dmp = self.download.data_to_df(
2272
- db_name='达摩盘3',
2273
- table_name='dmp人群报表',
2274
- start_date=start_date,
2275
- end_date=end_date,
2276
- projection=projection,
2277
- )
2339
+ __res = []
2340
+ for year in range(2024, datetime.datetime.today().year + 1):
2341
+ df_dmp = self.download.data_to_df(
2342
+ db_name='达摩盘3',
2343
+ table_name=f'dmp人群报表_{year}',
2344
+ start_date=start_date,
2345
+ end_date=end_date,
2346
+ projection=projection,
2347
+ )
2348
+ __res.append(df_dmp)
2349
+ df_dmp = pd.concat(__res, ignore_index=True)
2278
2350
  df_dmp = df_dmp.astype({'人群id': 'int64'}, errors='ignore')
2279
2351
  df_dmp.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
2280
2352
  df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗_元'], keep='last', inplace=True, ignore_index=True)
@@ -2286,8 +2358,8 @@ class MysqlDatasQuery:
2286
2358
  # breakpoint()
2287
2359
  df.rename(columns={'消耗_元': '消耗'}, inplace=True)
2288
2360
  set_typ = {
2289
- '店铺名称': 'varchar(100)',
2290
2361
  '日期': 'date',
2362
+ '店铺名称': 'varchar(100)',
2291
2363
  '人群id': 'bigint',
2292
2364
  '人群名称': 'varchar(255)',
2293
2365
  '营销渠道': 'varchar(100)',
@@ -2348,7 +2420,6 @@ class MysqlDatasQuery:
2348
2420
  filename=None, # 用来追踪处理进度
2349
2421
  reset_id=False, # 是否重置自增列
2350
2422
  set_typ=set_typ,
2351
-
2352
2423
  )
2353
2424
  return True
2354
2425
 
@@ -2759,7 +2830,6 @@ class MysqlDatasQuery:
2759
2830
  filename=None, # 用来追踪处理进度
2760
2831
  reset_id=False, # 是否重置自增列
2761
2832
  set_typ=set_typ,
2762
-
2763
2833
  )
2764
2834
  return True
2765
2835
 
@@ -2870,7 +2940,6 @@ class MysqlDatasQuery:
2870
2940
  filename=None, # 用来追踪处理进度
2871
2941
  reset_id=False, # 是否重置自增列
2872
2942
  set_typ=set_typ,
2873
-
2874
2943
  )
2875
2944
  return True
2876
2945
 
@@ -2926,8 +2995,8 @@ class MysqlDatasQuery:
2926
2995
  }
2927
2996
  if not self.update_service:
2928
2997
  return
2929
- min_date = df['日期'].min()
2930
- max_date = df['日期'].max()
2998
+ min_date = df['日期'].min().strftime("%Y-%m-%d")
2999
+ max_date = df['日期'].max().strftime("%Y-%m-%d")
2931
3000
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
2932
3001
  print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name} -> {min_date}~{max_date}')
2933
3002
  m_engine.df_to_mysql(
@@ -2942,7 +3011,6 @@ class MysqlDatasQuery:
2942
3011
  filename=None, # 用来追踪处理进度
2943
3012
  reset_id=False, # 是否重置自增列
2944
3013
  set_typ=set_typ,
2945
-
2946
3014
  )
2947
3015
  return True
2948
3016
 
@@ -2964,7 +3032,7 @@ def date_table():
2964
3032
  """
2965
3033
  生成 pbix 使用的日期表
2966
3034
  """
2967
- start_date = '2022-01-01' # 日期表的起始日期
3035
+ start_date = '2022-01-07' # 日期表的起始日期
2968
3036
  yesterday = time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400))
2969
3037
  dic = pd.date_range(start=start_date, end=yesterday)
2970
3038
  df = pd.DataFrame(dic, columns=['日期'])
@@ -2983,6 +3051,34 @@ def date_table():
2983
3051
  df['年月'] = df.apply(lambda x: x['年'] + x['月'], axis=1)
2984
3052
  df['月日'] = df.apply(lambda x: x['月'] + x['日'] + '日', axis=1)
2985
3053
  df['第n周'] = df['日期'].apply(lambda x: x.strftime('第%W周'))
3054
+
3055
+ # 重构 df,添加 1 列,从周五~下周四作为 1 周 汇总
3056
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
3057
+ grouped = df.groupby(pd.Grouper(key='日期', freq='7D'))
3058
+ __res = []
3059
+ num = 1
3060
+ for name, group in grouped:
3061
+ if num > 52:
3062
+ num = 1
3063
+ # print(f'Group: {name}')
3064
+ group['第n周_new'] = f'第{num}周'
3065
+ num += 1
3066
+ __res.append(group)
3067
+ # print(group)
3068
+ # break
3069
+ df = pd.concat(__res, ignore_index=True)
3070
+ # df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x))
3071
+ df['weekname'] = df['日期'].dt.day_name()
3072
+ dict_dt = {
3073
+ 'Monday': '星期一',
3074
+ 'Tuesday': '星期二',
3075
+ 'Wednesday': '星期三',
3076
+ 'Thursday': '星期四',
3077
+ 'Friday': '星期五',
3078
+ 'Saturday': '星期六',
3079
+ 'Sunday': '星期日',
3080
+ }
3081
+ df['星期'] = df['weekname'].apply(lambda x: dict_dt[x])
2986
3082
  df['索引'] = p
2987
3083
  df['月索引'] = mon
2988
3084
  df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
@@ -3001,6 +3097,9 @@ def date_table():
3001
3097
  '年月': 'varchar(50)',
3002
3098
  '月日': 'varchar(50)',
3003
3099
  '第n周': 'varchar(50)',
3100
+ '第n周_new': 'varchar(50)',
3101
+ '星期': 'varchar(50)',
3102
+ 'weekname': 'varchar(50)',
3004
3103
  '索引': 'int',
3005
3104
  '月索引': 'int',
3006
3105
  }
@@ -3091,22 +3190,23 @@ def query_(months=1, less_dict=[]):
3091
3190
 
3092
3191
 
3093
3192
  if __name__ == '__main__':
3094
- # main(
3095
- # days=150, # 清理聚合数据的日期长度
3096
- # months=3 # 生成聚合数据的长度
3097
- # )
3098
-
3099
- query_(months=3)
3100
-
3101
- system = platform.system() # 本机系统
3102
- host_name = socket.gethostname() # 本机名
3103
- conf = myconfig.main()
3104
- db_list = conf[system][host_name]['mysql']['数据库集']
3105
- # 4. 清理聚合数据
3106
- optimize_data.op_data(
3107
- db_name_lists=['聚合数据'],
3108
- days=3650,
3109
- is_mongo=False,
3110
- is_mysql=True,
3193
+ main(
3194
+ days=150, # 清理聚合数据的日期长度
3195
+ months=3 # 生成聚合数据的长度
3111
3196
  )
3112
3197
 
3198
+ # query_(months=3)
3199
+
3200
+ # system = platform.system() # 本机系统
3201
+ # host_name = socket.gethostname() # 本机名
3202
+ # conf = myconfig.main()
3203
+ # db_list = conf[system][host_name]['mysql']['数据库集']
3204
+ # # 4. 清理聚合数据
3205
+ # optimize_data.op_data(
3206
+ # # db_name_lists=['聚合数据'],
3207
+ # db_name_lists=db_list,
3208
+ # days=3650,
3209
+ # is_mongo=False,
3210
+ # is_mysql=True,
3211
+ # )
3212
+
mdbq/mysql/mysql.py CHANGED
@@ -17,23 +17,12 @@ from mdbq.config import myconfig
17
17
  warnings.filterwarnings('ignore')
18
18
  """
19
19
  建表流程:
20
- 尽可能手动建表,再上传数据
21
- 1. 每个表手动上传一个文件建表
22
- 2. 全部建表完成,建议所有表的数据类型,有问题的在数据库修改
23
- 3. 清空所有数据表,仅保留列信息
24
- 4. 下载所有数据表的 dtypes 信息到 json 文件
25
- 5. 之后可以正常上传数据
20
+
26
21
 
27
22
  建表规范:
28
23
  1. 数据库和数据表名如果有字母,必须使用小写,大写在建库后会自动变小写,再次上传数据会找不到数据库(macos和linux都有这种情况)
29
24
  2. 无论是数据库/表/列名还是值,尽量避免特殊字符或者表情符号,数据库/表/列名尽量都使用 `列名` 转义,避免错误
30
25
  3. 小数必须使用 decimal, 禁止 float 和 double, 因为计算精度差异,后续需要聚合数据时会引发很多问题
31
- 4. 日期类型暂时全部用 DATETIME,使用 DATE 在后续可能会重复插入不能排重,因为 df 进来的数据, 日期是带时间的,而数据库中日期不含时间
32
- 5. 目前小数自动适配类型转换,对于文本或者大数全部用 mediumtext, 因为部分表涉及爬虫数据,进来的字符长度未知,暂时统一 mediumtext 避免入库失败
33
-
34
-
35
-
36
-
37
26
 
38
27
  """
39
28
 
@@ -127,7 +116,7 @@ class MysqlUpload:
127
116
  return wrapper
128
117
 
129
118
  @try_except
130
- def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False):
119
+ def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
131
120
  """
132
121
  插入字典数据
133
122
  dict_data: 字典
@@ -142,6 +131,21 @@ class MysqlUpload:
142
131
  main_key = []
143
132
  if not unique_main_key:
144
133
  unique_main_key = []
134
+
135
+ if cut_data:
136
+ if '日期' in dict_data.keys():
137
+ try:
138
+ __y = pd.to_datetime(dict_data['日期']).strftime('%Y')
139
+ __y_m = pd.to_datetime(dict_data['日期']).strftime('%Y-%m')
140
+ if str(cut_data).lower() == 'year':
141
+ table_name = f'{table_name}_{__y}'
142
+ elif str(cut_data).lower() == 'month':
143
+ table_name = f'{table_name}_{__y_m}'
144
+ else:
145
+ print(f'参数不正确,cut_data应为 year 或 month ')
146
+ except Exception as e:
147
+ print(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
148
+
145
149
  connection = pymysql.connect(**self.config) # 连接数据库
146
150
  with connection.cursor() as cursor:
147
151
  cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
@@ -201,9 +205,9 @@ class MysqlUpload:
201
205
  cursor.execute(sql)
202
206
  if col in unique_main_key:
203
207
  if dtypes[col] == 'mediumtext':
204
- sql = f"ALTER TABLE {table_name} ADD UNIQUE (`{col}`({index_length}))"
208
+ sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`({index_length}))"
205
209
  else:
206
- sql = f"ALTER TABLE {table_name} ADD UNIQUE (`{col}`)"
210
+ sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
207
211
  cursor.execute(sql)
208
212
  # if col in main_key or col in unique_main_key:
209
213
  # sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
@@ -438,7 +442,7 @@ class MysqlUpload:
438
442
 
439
443
  @try_except
440
444
  def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False, drop_duplicates=False,
441
- filename=None, count=None, reset_id=False, allow_not_null=False):
445
+ filename=None, count=None, reset_id=False, allow_not_null=False, cut_data=None):
442
446
  """
443
447
  db_name: 数据库名
444
448
  table_name: 表名
@@ -464,6 +468,20 @@ class MysqlUpload:
464
468
  print(f'{db_name} 不能为 None')
465
469
  return
466
470
 
471
+ if cut_data:
472
+ if '日期' in df.columns.tolist():
473
+ try:
474
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
475
+ min_year = df['日期'].min(skipna=True).year
476
+ min_month = df['日期'].min(skipna=True).month
477
+ if str(cut_data).lower() == 'year':
478
+ table_name = f'{table_name}_{min_year}'
479
+ elif str(cut_data).lower() == 'month':
480
+ table_name = f'{table_name}_{min_year}_{min_month}'
481
+ else:
482
+ print(f'参数不正确,cut_data应为 year 或 month ')
483
+ except Exception as e:
484
+ print(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
467
485
  # 清理 dataframe 非法值,并转换获取数据类型
468
486
  dtypes, df = self.convert_df_dtypes(df)
469
487
  if set_typ:
@@ -584,13 +602,13 @@ class MysqlUpload:
584
602
  if reset_id:
585
603
  pass
586
604
  # try:
587
- # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
605
+ # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
588
606
  # result = cursor.fetchone()
589
607
  # if result:
590
- # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
608
+ # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
591
609
  # cursor.execute(
592
- # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
593
- # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
610
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
611
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
594
612
  # except Exception as e:
595
613
  # print(f'{e}')
596
614
  # connection.rollback()
@@ -689,13 +707,13 @@ class MysqlUpload:
689
707
 
690
708
  # # 6. 重置自增列
691
709
  # try:
692
- # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
710
+ # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
693
711
  # result = cursor.fetchone()
694
712
  # if result:
695
- # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
713
+ # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
696
714
  # cursor.execute(
697
- # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
698
- # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
715
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
716
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
699
717
  # except Exception as e:
700
718
  # print(f'{table_name}, -> {e}')
701
719
  # connection.rollback()
@@ -732,7 +750,7 @@ class MysqlUpload:
732
750
  try:
733
751
  with connection.cursor() as cursor:
734
752
  # 获取指定日期范围的数据
735
- sql = f"SELECT * FROM {db_name}.{table_name} WHERE {date_name} BETWEEN '%s' AND '%s'" % (start_date, end_date)
753
+ sql = f"SELECT * FROM `{db_name}`.`{table_name}` WHERE `{date_name}` BETWEEN '%s' AND '%s'" % (start_date, end_date)
736
754
  cursor.execute(sql)
737
755
  rows = cursor.fetchall() # 获取查询结果
738
756
  columns = [desc[0] for desc in cursor.description]
@@ -892,7 +910,8 @@ class OptimizeDatas:
892
910
  self.config.update({'database': self.db_name}) # 添加更新 config 字段
893
911
  self.connection = pymysql.connect(**self.config)
894
912
  with self.connection.cursor() as cursor:
895
- sql = f"SELECT 1 FROM {table_name} LIMIT 1"
913
+ sql = f"SELECT 1 FROM `{table_name}` LIMIT 1"
914
+ # print(sql)
896
915
  cursor.execute(sql)
897
916
  result = cursor.fetchone()
898
917
  if not result:
@@ -900,7 +919,7 @@ class OptimizeDatas:
900
919
  print(f'{now}数据表: {table_name}, 数据长度为 0')
901
920
  continue # 检查数据表是否为空
902
921
 
903
- cursor.execute(f"SHOW FULL COLUMNS FROM {table_name}") # 查询数据表的列信息
922
+ cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
904
923
  columns = cursor.fetchall()
905
924
  date_exist = False
906
925
  for col in columns: # 遍历列信息,检查是否存在类型为日期的列
@@ -908,8 +927,8 @@ class OptimizeDatas:
908
927
  date_exist = True
909
928
  break
910
929
  if date_exist: # 存在日期列
911
- sql_max = f"SELECT MAX(日期) AS max_date FROM {table_name}"
912
- sql_min = f"SELECT MIN(日期) AS min_date FROM {table_name}"
930
+ sql_max = f"SELECT MAX(日期) AS max_date FROM `{table_name}`"
931
+ sql_min = f"SELECT MIN(日期) AS min_date FROM `{table_name}`"
913
932
  cursor.execute(sql_max)
914
933
  max_result = cursor.fetchone()
915
934
  cursor.execute(sql_min)
@@ -931,13 +950,13 @@ class OptimizeDatas:
931
950
 
932
951
  # # 5. 重置自增列 (id 列)
933
952
  # try:
934
- # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
953
+ # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
935
954
  # result = cursor.fetchone()
936
955
  # if result:
937
- # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
956
+ # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
938
957
  # cursor.execute(
939
- # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
940
- # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
958
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
959
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
941
960
  # except Exception as e:
942
961
  # print(f'{e}')
943
962
  # self.connection.rollback()
@@ -974,7 +993,7 @@ class OptimizeDatas:
974
993
  with self.connection.cursor() as cursor:
975
994
  placeholders = ', '.join(['%s'] * len(duplicate_id))
976
995
  # 移除冗余数据
977
- sql = f"DELETE FROM {table_name} WHERE id IN ({placeholders})"
996
+ sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
978
997
  cursor.execute(sql, duplicate_id)
979
998
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
980
999
  print(f"{now}{table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
@@ -985,7 +1004,7 @@ class OptimizeDatas:
985
1004
 
986
1005
  def delete_duplicate2(self, table_name, except_key=['更新时间']):
987
1006
  with self.connection.cursor() as cursor:
988
- sql = f"SELECT * FROM {table_name}" # 如果不包含日期列,则获取全部数据
1007
+ sql = f"SELECT * FROM `{table_name}`" # 如果不包含日期列,则获取全部数据
989
1008
  cursor.execute(sql)
990
1009
  datas = cursor.fetchall()
991
1010
  if not datas:
@@ -1012,7 +1031,7 @@ class OptimizeDatas:
1012
1031
  with self.connection.cursor() as cursor:
1013
1032
  placeholders = ', '.join(['%s'] * len(duplicate_id))
1014
1033
  # 移除冗余数据
1015
- sql = f"DELETE FROM {table_name} WHERE id IN ({placeholders})"
1034
+ sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
1016
1035
  cursor.execute(sql, duplicate_id)
1017
1036
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
1018
1037
  print(f"{now}{table_name} -> before: {len(datas)}, "
@@ -1064,7 +1083,7 @@ class OptimizeDatas:
1064
1083
  connection = pymysql.connect(**self.config)
1065
1084
  try:
1066
1085
  with connection.cursor() as cursor:
1067
- sql = f"SELECT * FROM {table_name} WHERE {'日期'} BETWEEN '%s' AND '%s'" % (date, date)
1086
+ sql = f"SELECT * FROM `{table_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (date, date)
1068
1087
  cursor.execute(sql)
1069
1088
  results = cursor.fetchall()
1070
1089
  except Exception as e:
@@ -1096,14 +1115,14 @@ class OptimizeDatas:
1096
1115
  self.config.update({'database': self.db_name}) # 添加更新 config 字段
1097
1116
  self.connection = pymysql.connect(**self.config)
1098
1117
  with self.connection.cursor() as cursor:
1099
- cursor.execute(f"SHOW FULL COLUMNS FROM {table_name}") # 查询数据表的列信息
1118
+ cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
1100
1119
  columns = cursor.fetchall()
1101
1120
  columns = [{column['Field']: column['Type']} for column in columns]
1102
1121
  for column in columns:
1103
1122
  for key, value in column.items():
1104
1123
  if key.endswith('_'):
1105
1124
  new_name = re.sub(r'_+$', '', key)
1106
- sql = f"ALTER TABLE {table_name} CHANGE COLUMN {key} {new_name} {value}"
1125
+ sql = f"ALTER TABLE `{table_name}` CHANGE COLUMN {key} {new_name} {value}"
1107
1126
  cursor.execute(sql)
1108
1127
  self.connection.commit()
1109
1128
  if self.connection:
mdbq/mysql/s_query.py CHANGED
@@ -60,7 +60,7 @@ class QueryDatas:
60
60
 
61
61
  self.config.update({'database': db_name})
62
62
  connection = pymysql.connect(**self.config) # 重新连接数据库
63
- # try:
63
+
64
64
  with connection.cursor() as cursor:
65
65
  # 3. 获取数据表的所有列信息
66
66
  sql = 'SELECT `COLUMN_NAME` FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
@@ -98,18 +98,21 @@ class QueryDatas:
98
98
  rows = cursor.fetchall() # 获取查询结果
99
99
  columns = [desc[0] for desc in cursor.description]
100
100
  df = pd.DataFrame(rows, columns=columns) # 转为 df
101
- # except Exception as e:
102
- # print(f'{e}')
103
- return df
104
- # finally:
101
+ if 'id' in df.columns.tolist():
102
+ df.pop('id') # 默认不返回 id 列
103
+ if len(df) == 0:
104
+ print(f'database: {db_name}, table: {table_name} 查询的数据为空1')
105
105
  connection.close()
106
+ return df
106
107
 
107
- if len(df) == 0:
108
- print(f'database: {db_name}, table: {table_name} 查询的数据为空')
109
- return pd.DataFrame()
110
- cv = converter.DataFrameConverter()
111
- df = cv.convert_df_cols(df)
112
- return df
108
+ # if len(df) == 0:
109
+ # print(f'database: {db_name}, table: {table_name} 查询的数据为空2')
110
+ # return pd.DataFrame()
111
+ # cv = converter.DataFrameConverter()
112
+ # df = cv.convert_df_cols(df)
113
+ # if 'id' in df.columns.tolist():
114
+ # df.pop('id') # 默认不返回 id 列
115
+ # return df
113
116
 
114
117
  def columns_to_list(self, db_name, table_name, columns_name) -> list:
115
118
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.1.8
3
+ Version: 3.2.0
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,11 +1,11 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=vgswBnIvXPmll6M1DGoCWozmlM6jdQDSHPdD4NJgqgg,72179
4
+ mdbq/aggregation/aggregation.py,sha256=ltYntRjxeN9YX1uTR2_zoYEik2PVPD70xAF98d4TcAo,74732
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
7
7
  mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
8
- mdbq/aggregation/query_data.py,sha256=L8JtjWpvowbauRgvXn6ukPIbUvcpgdSgatEU6vaZPRA,138540
8
+ mdbq/aggregation/query_data.py,sha256=E1Pngmf2zp7j2dVe7i1llIEtZxa9jtfB1A5deyfIaJU,143896
9
9
  mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
10
10
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
11
11
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
@@ -27,9 +27,9 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
27
27
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
28
28
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
29
29
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
30
- mdbq/mysql/mysql.py,sha256=mIxlamozTv0smBRDCQF16ayqn4-OTmZD9QkNgnNftIw,61666
30
+ mdbq/mysql/mysql.py,sha256=Q4j0CTVnMXlrfFRd0yehcxNOMqLYztlx-848EIIet08,62681
31
31
  mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
32
- mdbq/mysql/s_query.py,sha256=30WilUkm6lsiI5HfGphnnBSVbAXiNsgn2mEPd7AhKJs,9055
32
+ mdbq/mysql/s_query.py,sha256=MbIprZ4yJDAZ9AahZPzl7hqS695Vs0P-AJNwAtA_EEc,9287
33
33
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
34
34
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
35
35
  mdbq/other/download_sku_picture.py,sha256=tlGh3oApJyH1vNva2PsMA-mdwl13tHdyLIOLO1FOyfo,45826
@@ -45,7 +45,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
45
45
  mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
46
46
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
47
47
  mdbq/spider/aikucun.py,sha256=48isoL6nEi_uniV-ja2HwYAI7O8D1i1goO4SzBwDGSU,19036
48
- mdbq-3.1.8.dist-info/METADATA,sha256=3BkKptvgeELXXFhf9InK7NKm01QvWcYkPRNaQQra2qE,243
49
- mdbq-3.1.8.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
50
- mdbq-3.1.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
- mdbq-3.1.8.dist-info/RECORD,,
48
+ mdbq-3.2.0.dist-info/METADATA,sha256=1QsIsKB3o07nr8hpmMy4FLHSOoiskMwtGD0aRoaDThA,243
49
+ mdbq-3.2.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
50
+ mdbq-3.2.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
+ mdbq-3.2.0.dist-info/RECORD,,
File without changes