mdbq 1.6.4__py3-none-any.whl → 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/clean/data_clean.py CHANGED
@@ -152,6 +152,7 @@ class DataClean:
152
152
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
153
153
  if len(df) == 0:
154
154
  print(f'{name} 报表数据为空')
155
+ os.remove(os.path.join(root, name))
155
156
  continue
156
157
  pattern = re.findall(r'(.*_)\d{8}_\d{6}', name)
157
158
  shop_name = re.findall(r'\d{8}_\d{6}_(.*)\W', name)
@@ -180,6 +181,7 @@ class DataClean:
180
181
  df = pd.concat(df)
181
182
  if len(df) == 0:
182
183
  print(f'{name} 报表数据为空')
184
+ os.remove(os.path.join(root, name))
183
185
  continue
184
186
  new_name2 = os.path.splitext(name)[0] + '.csv'
185
187
  df['订单Id'] = df['订单Id'].apply(
@@ -198,6 +200,7 @@ class DataClean:
198
200
  df = pd.concat(df)
199
201
  if len(df) == 0:
200
202
  print(f'{name} 报表数据为空')
203
+ os.remove(os.path.join(root, name))
201
204
  continue
202
205
  new_name2 = os.path.splitext(name)[0] + '.csv'
203
206
  df['计划ID'] = df['计划ID'].apply(
@@ -217,6 +220,7 @@ class DataClean:
217
220
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
218
221
  if len(df) == 0:
219
222
  print(f'{name} 报表数据为空')
223
+ os.remove(os.path.join(root, name))
220
224
  continue
221
225
  min_clm = df.min()['日期']
222
226
  max_clm = df.max()['日期']
@@ -247,6 +251,7 @@ class DataClean:
247
251
  df = pd.read_excel(os.path.join(root, name), header=5)
248
252
  if len(df) == 0:
249
253
  print(f'{name} 报表数据为空')
254
+ os.remove(os.path.join(root, name))
250
255
  continue
251
256
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
252
257
  if date01[0] != date02[0]:
@@ -299,6 +304,7 @@ class DataClean:
299
304
  df = pd.read_excel(os.path.join(root, name), header=4)
300
305
  if len(df) == 0:
301
306
  print(f'{name} 报表数据为空')
307
+ os.remove(os.path.join(root, name))
302
308
  continue
303
309
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
304
310
  df['商品ID'] = df['商品ID'].apply(
@@ -325,6 +331,7 @@ class DataClean:
325
331
  df = pd.read_excel(os.path.join(root, name), header=7)
326
332
  if len(df) == 0:
327
333
  print(f'{name} 报表数据为空')
334
+ os.remove(os.path.join(root, name))
328
335
  continue
329
336
  df.rename(columns={'统计日期': '日期'}, inplace=True)
330
337
  df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
@@ -341,6 +348,7 @@ class DataClean:
341
348
  df = pd.read_excel(os.path.join(root, name), header=7)
342
349
  if len(df) == 0:
343
350
  print(f'{name} 报表数据为空')
351
+ os.remove(os.path.join(root, name))
344
352
  continue
345
353
  df.rename(columns={'统计日期': '日期'}, inplace=True)
346
354
  # 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
@@ -367,6 +375,7 @@ class DataClean:
367
375
  df = pd.read_excel(os.path.join(root, name), header=7)
368
376
  if len(df) == 0:
369
377
  print(f'{name} 报表数据为空')
378
+ os.remove(os.path.join(root, name))
370
379
  continue
371
380
  df.rename(columns={
372
381
  '统计日期': '日期',
@@ -390,6 +399,7 @@ class DataClean:
390
399
  df = pd.read_excel(os.path.join(root, name), header=7)
391
400
  if len(df) == 0:
392
401
  print(f'{name} 报表数据为空')
402
+ os.remove(os.path.join(root, name))
393
403
  continue
394
404
  df.rename(columns={'统计日期': '数据周期'}, inplace=True)
395
405
  # 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
@@ -422,6 +432,7 @@ class DataClean:
422
432
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
423
433
  if len(df) == 0:
424
434
  print(f'{name} 报表数据为空')
435
+ os.remove(os.path.join(root, name))
425
436
  continue
426
437
  if '日期' in df.columns.tolist():
427
438
  df.pop('日期')
@@ -453,6 +464,7 @@ class DataClean:
453
464
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
454
465
  if len(df) == 0:
455
466
  print(f'{name} 报表数据为空')
467
+ os.remove(os.path.join(root, name))
456
468
  continue
457
469
  df['日期'] = df['日期'].astype(str).apply(
458
470
  lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', x)[0]) if x else x)
@@ -484,6 +496,7 @@ class DataClean:
484
496
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
485
497
  if len(df) == 0:
486
498
  print(f'{name} 报表数据为空')
499
+ os.remove(os.path.join(root, name))
487
500
  continue
488
501
  df['省'] = df['省份'].apply(lambda x: x if ' ├─ ' not in x and ' └─ ' not in x else None)
489
502
  df['城市'] = df[['省份', '省']].apply(lambda x: '汇总' if x['省'] else x['省份'], axis=1)
@@ -519,6 +532,7 @@ class DataClean:
519
532
  df = pd.read_csv(os.path.join(root, name), encoding='gb18030', header=0, na_filter=False)
520
533
  if len(df) == 0:
521
534
  print(f'{name} 报表数据为空')
535
+ os.remove(os.path.join(root, name))
522
536
  continue
523
537
  df.insert(loc=0, column='日期', value=date1)
524
538
  df.insert(loc=1, column='数据周期', value=date)
@@ -541,6 +555,7 @@ class DataClean:
541
555
  df = pd.read_excel(os.path.join(root, name), header=0)
542
556
  if len(df) == 0:
543
557
  print(f'{name} 报表数据为空')
558
+ os.remove(os.path.join(root, name))
544
559
  continue
545
560
  df.rename(columns={'场次ID': '场次id', '商品ID': '商品id'}, inplace=True)
546
561
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
@@ -574,6 +589,7 @@ class DataClean:
574
589
  df = pd.read_excel(os.path.join(root, name), header=0)
575
590
  if len(df) == 0:
576
591
  print(f'{name} 报表数据为空')
592
+ os.remove(os.path.join(root, name))
577
593
  continue
578
594
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
579
595
  df.rename(columns={'统计日期': '日期'}, inplace=True)
@@ -595,6 +611,7 @@ class DataClean:
595
611
  df = pd.read_excel(os.path.join(root, name), header=5)
596
612
  if len(df) == 0:
597
613
  print(f'{name} 报表数据为空')
614
+ os.remove(os.path.join(root, name))
598
615
  continue
599
616
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
600
617
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
@@ -624,6 +641,7 @@ class DataClean:
624
641
  # print(sheet4)
625
642
  if len(df) == 0:
626
643
  print(f'{name} 报表数据为空')
644
+ os.remove(os.path.join(root, name))
627
645
  continue
628
646
  if len(df) < 1:
629
647
  print(f'{name} 跳过')
@@ -688,6 +706,7 @@ class DataClean:
688
706
  df = pd.read_excel(os.path.join(root, name), header=0)
689
707
  if len(df) == 0:
690
708
  print(f'{name} 报表数据为空')
709
+ os.remove(os.path.join(root, name))
691
710
  continue
692
711
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
693
712
  df.insert(loc=0, column='日期', value=new_date01)
@@ -724,6 +743,7 @@ class DataClean:
724
743
  df = pd.read_excel(os.path.join(root, name), header=0)
725
744
  if len(df) == 0:
726
745
  print(f'{name} 报表数据为空')
746
+ os.remove(os.path.join(root, name))
727
747
  continue
728
748
  if '10035975359247' in df['商品ID'].values or '10056642622343' in df['商品ID'].values:
729
749
  new_name = f'sku_{date1}_全部渠道_商品明细.csv'
@@ -756,6 +776,7 @@ class DataClean:
756
776
  df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
757
777
  if len(df) == 0:
758
778
  print(f'{name} 报表数据为空')
779
+ os.remove(os.path.join(root, name))
759
780
  continue
760
781
  df.rename(columns={'商品的ID': 'skuid'}, inplace=True)
761
782
  df['skuid'] = df['skuid'].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
@@ -773,6 +794,7 @@ class DataClean:
773
794
  df = pd.read_excel(os.path.join(root, name), header=0)
774
795
  if len(df) == 0:
775
796
  print(f'{name} 报表数据为空')
797
+ os.remove(os.path.join(root, name))
776
798
  continue
777
799
  df.insert(0, '日期', date_in) # 插入新列
778
800
  df.rename(columns={'SKU': 'skuid'}, inplace=True)
@@ -792,6 +814,7 @@ class DataClean:
792
814
  df = pd.read_excel(os.path.join(root, name), header=0)
793
815
  if len(df) == 0:
794
816
  print(f'{name} 报表数据为空')
817
+ os.remove(os.path.join(root, name))
795
818
  continue
796
819
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
797
820
  df.insert(loc=0, column='日期', value=start_date)
@@ -809,6 +832,7 @@ class DataClean:
809
832
  df = pd.read_excel(os.path.join(root, name), header=0)
810
833
  if len(df) == 0:
811
834
  print(f'{name} 报表数据为空')
835
+ os.remove(os.path.join(root, name))
812
836
  continue
813
837
  df['日期'] = df['日期'].apply(
814
838
  lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0])
@@ -836,6 +860,7 @@ class DataClean:
836
860
  df = pd.read_excel(os.path.join(root, name), header=0)
837
861
  if len(df) == 0:
838
862
  print(f'{name} 报表数据为空')
863
+ os.remove(os.path.join(root, name))
839
864
  continue
840
865
  df['日期'] = df['日期'].astype(str).apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}')
841
866
  df.insert(loc=0, column='类型', value='商家榜单')
@@ -852,6 +877,7 @@ class DataClean:
852
877
  df = pd.read_excel(os.path.join(root, name), header=0)
853
878
  if len(df) == 0:
854
879
  print(f'{name} 报表数据为空')
880
+ os.remove(os.path.join(root, name))
855
881
  continue
856
882
  d_time = datetime.datetime.today().strftime('%Y-%m-%d')
857
883
  df.insert(loc=0, column='日期', value=d_time)
@@ -871,6 +897,7 @@ class DataClean:
871
897
  df = pd.read_excel(os.path.join(root, name), header=0)
872
898
  if len(df) == 0:
873
899
  print(f'{name} 报表数据为空')
900
+ os.remove(os.path.join(root, name))
874
901
  continue
875
902
  d_time = datetime.datetime.today().strftime('%Y-%m-%d')
876
903
  df.insert(loc=0, column='日期', value=d_time)
@@ -886,6 +913,7 @@ class DataClean:
886
913
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
887
914
  if len(df) == 0:
888
915
  print(f'{name} 报表数据为空')
916
+ os.remove(os.path.join(root, name))
889
917
  continue
890
918
  pic_list = df['日期'].tolist()
891
919
  pic = []
@@ -913,6 +941,7 @@ class DataClean:
913
941
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
914
942
  if len(df) == 0:
915
943
  print(f'{name} 报表数据为空')
944
+ os.remove(os.path.join(root, name))
916
945
  continue
917
946
  pic_list = df['日期'].tolist()
918
947
  pic = []
@@ -942,6 +971,7 @@ class DataClean:
942
971
  df = pd.read_excel(os.path.join(root, name), header=0)
943
972
  if len(df) == 0:
944
973
  print(f'{name} 报表数据为空')
974
+ os.remove(os.path.join(root, name))
945
975
  continue
946
976
  df['摘要'] = df['摘要'].apply(lambda x: re.sub('\'', '', str(x)) if x else x)
947
977
  for col in ['原单号', '商品代码', '摘要']:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.6.4
3
+ Version: 1.6.5
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=socYDytP4F7zLd1WRokKitQ0bNsK4TQgkO1GDmgi29
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
- mdbq/clean/data_clean.py,sha256=nkAy_KUnf6iX9nqUE588lebtWmk8Kelnwp_7g-wRfuE,89156
12
+ mdbq/clean/data_clean.py,sha256=M1rdgQWGkkOU6fSE4756Y5o5jK_YOV4wy7BBFkP_RCY,90960
13
13
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
14
14
  mdbq/company/copysh.py,sha256=WCZ92vCJAy6_ZFeOxWL-U9gArIpyga4xts-s1wKsspY,17268
15
15
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
35
35
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
36
36
  mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
37
37
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
38
- mdbq-1.6.4.dist-info/METADATA,sha256=PLOLx_M9UUMqmD1qj1HvfJlaRbIzW5k0qB_xdBfq2WY,245
39
- mdbq-1.6.4.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
40
- mdbq-1.6.4.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
- mdbq-1.6.4.dist-info/RECORD,,
38
+ mdbq-1.6.5.dist-info/METADATA,sha256=SAZM8eGb72ZrgsPg0SDcQzJYkqFPhWYYelrEHkAt9Bw,245
39
+ mdbq-1.6.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
40
+ mdbq-1.6.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
+ mdbq-1.6.5.dist-info/RECORD,,
File without changes