mdbq 1.6.3__py3-none-any.whl → 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +4 -4
- mdbq/clean/data_clean.py +30 -0
- {mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/METADATA +1 -1
- {mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/RECORD +6 -6
- {mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/WHEEL +0 -0
- {mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1064,11 +1064,11 @@ if __name__ == '__main__':
|
|
1064
1064
|
# target_service='company',
|
1065
1065
|
# database='mysql'
|
1066
1066
|
# )
|
1067
|
-
db_name = '
|
1068
|
-
table_name = '
|
1067
|
+
db_name = '市场数据2'
|
1068
|
+
table_name = '类目洞察_属性分析_商品发现'
|
1069
1069
|
upload_dir(
|
1070
|
-
path='/Users/xigua
|
1070
|
+
path='/Users/xigua/Downloads/类目洞察/属性分析/商品发现',
|
1071
1071
|
db_name=db_name,
|
1072
1072
|
collection_name=table_name,
|
1073
|
-
dbs={'mysql':
|
1073
|
+
dbs={'mysql': False, 'mongodb': False},
|
1074
1074
|
)
|
mdbq/clean/data_clean.py
CHANGED
@@ -152,6 +152,7 @@ class DataClean:
|
|
152
152
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
153
153
|
if len(df) == 0:
|
154
154
|
print(f'{name} 报表数据为空')
|
155
|
+
os.remove(os.path.join(root, name))
|
155
156
|
continue
|
156
157
|
pattern = re.findall(r'(.*_)\d{8}_\d{6}', name)
|
157
158
|
shop_name = re.findall(r'\d{8}_\d{6}_(.*)\W', name)
|
@@ -180,6 +181,7 @@ class DataClean:
|
|
180
181
|
df = pd.concat(df)
|
181
182
|
if len(df) == 0:
|
182
183
|
print(f'{name} 报表数据为空')
|
184
|
+
os.remove(os.path.join(root, name))
|
183
185
|
continue
|
184
186
|
new_name2 = os.path.splitext(name)[0] + '.csv'
|
185
187
|
df['订单Id'] = df['订单Id'].apply(
|
@@ -198,6 +200,7 @@ class DataClean:
|
|
198
200
|
df = pd.concat(df)
|
199
201
|
if len(df) == 0:
|
200
202
|
print(f'{name} 报表数据为空')
|
203
|
+
os.remove(os.path.join(root, name))
|
201
204
|
continue
|
202
205
|
new_name2 = os.path.splitext(name)[0] + '.csv'
|
203
206
|
df['计划ID'] = df['计划ID'].apply(
|
@@ -217,6 +220,7 @@ class DataClean:
|
|
217
220
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
|
218
221
|
if len(df) == 0:
|
219
222
|
print(f'{name} 报表数据为空')
|
223
|
+
os.remove(os.path.join(root, name))
|
220
224
|
continue
|
221
225
|
min_clm = df.min()['日期']
|
222
226
|
max_clm = df.max()['日期']
|
@@ -247,6 +251,7 @@ class DataClean:
|
|
247
251
|
df = pd.read_excel(os.path.join(root, name), header=5)
|
248
252
|
if len(df) == 0:
|
249
253
|
print(f'{name} 报表数据为空')
|
254
|
+
os.remove(os.path.join(root, name))
|
250
255
|
continue
|
251
256
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
252
257
|
if date01[0] != date02[0]:
|
@@ -299,6 +304,7 @@ class DataClean:
|
|
299
304
|
df = pd.read_excel(os.path.join(root, name), header=4)
|
300
305
|
if len(df) == 0:
|
301
306
|
print(f'{name} 报表数据为空')
|
307
|
+
os.remove(os.path.join(root, name))
|
302
308
|
continue
|
303
309
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
304
310
|
df['商品ID'] = df['商品ID'].apply(
|
@@ -325,6 +331,7 @@ class DataClean:
|
|
325
331
|
df = pd.read_excel(os.path.join(root, name), header=7)
|
326
332
|
if len(df) == 0:
|
327
333
|
print(f'{name} 报表数据为空')
|
334
|
+
os.remove(os.path.join(root, name))
|
328
335
|
continue
|
329
336
|
df.rename(columns={'统计日期': '日期'}, inplace=True)
|
330
337
|
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
@@ -341,6 +348,7 @@ class DataClean:
|
|
341
348
|
df = pd.read_excel(os.path.join(root, name), header=7)
|
342
349
|
if len(df) == 0:
|
343
350
|
print(f'{name} 报表数据为空')
|
351
|
+
os.remove(os.path.join(root, name))
|
344
352
|
continue
|
345
353
|
df.rename(columns={'统计日期': '日期'}, inplace=True)
|
346
354
|
# 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
|
@@ -367,6 +375,7 @@ class DataClean:
|
|
367
375
|
df = pd.read_excel(os.path.join(root, name), header=7)
|
368
376
|
if len(df) == 0:
|
369
377
|
print(f'{name} 报表数据为空')
|
378
|
+
os.remove(os.path.join(root, name))
|
370
379
|
continue
|
371
380
|
df.rename(columns={
|
372
381
|
'统计日期': '日期',
|
@@ -390,6 +399,7 @@ class DataClean:
|
|
390
399
|
df = pd.read_excel(os.path.join(root, name), header=7)
|
391
400
|
if len(df) == 0:
|
392
401
|
print(f'{name} 报表数据为空')
|
402
|
+
os.remove(os.path.join(root, name))
|
393
403
|
continue
|
394
404
|
df.rename(columns={'统计日期': '数据周期'}, inplace=True)
|
395
405
|
# 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
|
@@ -422,6 +432,7 @@ class DataClean:
|
|
422
432
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
423
433
|
if len(df) == 0:
|
424
434
|
print(f'{name} 报表数据为空')
|
435
|
+
os.remove(os.path.join(root, name))
|
425
436
|
continue
|
426
437
|
if '日期' in df.columns.tolist():
|
427
438
|
df.pop('日期')
|
@@ -453,6 +464,7 @@ class DataClean:
|
|
453
464
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
454
465
|
if len(df) == 0:
|
455
466
|
print(f'{name} 报表数据为空')
|
467
|
+
os.remove(os.path.join(root, name))
|
456
468
|
continue
|
457
469
|
df['日期'] = df['日期'].astype(str).apply(
|
458
470
|
lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', x)[0]) if x else x)
|
@@ -484,6 +496,7 @@ class DataClean:
|
|
484
496
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
485
497
|
if len(df) == 0:
|
486
498
|
print(f'{name} 报表数据为空')
|
499
|
+
os.remove(os.path.join(root, name))
|
487
500
|
continue
|
488
501
|
df['省'] = df['省份'].apply(lambda x: x if ' ├─ ' not in x and ' └─ ' not in x else None)
|
489
502
|
df['城市'] = df[['省份', '省']].apply(lambda x: '汇总' if x['省'] else x['省份'], axis=1)
|
@@ -519,6 +532,7 @@ class DataClean:
|
|
519
532
|
df = pd.read_csv(os.path.join(root, name), encoding='gb18030', header=0, na_filter=False)
|
520
533
|
if len(df) == 0:
|
521
534
|
print(f'{name} 报表数据为空')
|
535
|
+
os.remove(os.path.join(root, name))
|
522
536
|
continue
|
523
537
|
df.insert(loc=0, column='日期', value=date1)
|
524
538
|
df.insert(loc=1, column='数据周期', value=date)
|
@@ -541,6 +555,7 @@ class DataClean:
|
|
541
555
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
542
556
|
if len(df) == 0:
|
543
557
|
print(f'{name} 报表数据为空')
|
558
|
+
os.remove(os.path.join(root, name))
|
544
559
|
continue
|
545
560
|
df.rename(columns={'场次ID': '场次id', '商品ID': '商品id'}, inplace=True)
|
546
561
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
@@ -574,6 +589,7 @@ class DataClean:
|
|
574
589
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
575
590
|
if len(df) == 0:
|
576
591
|
print(f'{name} 报表数据为空')
|
592
|
+
os.remove(os.path.join(root, name))
|
577
593
|
continue
|
578
594
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
579
595
|
df.rename(columns={'统计日期': '日期'}, inplace=True)
|
@@ -595,6 +611,7 @@ class DataClean:
|
|
595
611
|
df = pd.read_excel(os.path.join(root, name), header=5)
|
596
612
|
if len(df) == 0:
|
597
613
|
print(f'{name} 报表数据为空')
|
614
|
+
os.remove(os.path.join(root, name))
|
598
615
|
continue
|
599
616
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
600
617
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -624,6 +641,7 @@ class DataClean:
|
|
624
641
|
# print(sheet4)
|
625
642
|
if len(df) == 0:
|
626
643
|
print(f'{name} 报表数据为空')
|
644
|
+
os.remove(os.path.join(root, name))
|
627
645
|
continue
|
628
646
|
if len(df) < 1:
|
629
647
|
print(f'{name} 跳过')
|
@@ -688,6 +706,7 @@ class DataClean:
|
|
688
706
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
689
707
|
if len(df) == 0:
|
690
708
|
print(f'{name} 报表数据为空')
|
709
|
+
os.remove(os.path.join(root, name))
|
691
710
|
continue
|
692
711
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
693
712
|
df.insert(loc=0, column='日期', value=new_date01)
|
@@ -724,6 +743,7 @@ class DataClean:
|
|
724
743
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
725
744
|
if len(df) == 0:
|
726
745
|
print(f'{name} 报表数据为空')
|
746
|
+
os.remove(os.path.join(root, name))
|
727
747
|
continue
|
728
748
|
if '10035975359247' in df['商品ID'].values or '10056642622343' in df['商品ID'].values:
|
729
749
|
new_name = f'sku_{date1}_全部渠道_商品明细.csv'
|
@@ -756,6 +776,7 @@ class DataClean:
|
|
756
776
|
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
757
777
|
if len(df) == 0:
|
758
778
|
print(f'{name} 报表数据为空')
|
779
|
+
os.remove(os.path.join(root, name))
|
759
780
|
continue
|
760
781
|
df.rename(columns={'商品的ID': 'skuid'}, inplace=True)
|
761
782
|
df['skuid'] = df['skuid'].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
|
@@ -773,6 +794,7 @@ class DataClean:
|
|
773
794
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
774
795
|
if len(df) == 0:
|
775
796
|
print(f'{name} 报表数据为空')
|
797
|
+
os.remove(os.path.join(root, name))
|
776
798
|
continue
|
777
799
|
df.insert(0, '日期', date_in) # 插入新列
|
778
800
|
df.rename(columns={'SKU': 'skuid'}, inplace=True)
|
@@ -792,6 +814,7 @@ class DataClean:
|
|
792
814
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
793
815
|
if len(df) == 0:
|
794
816
|
print(f'{name} 报表数据为空')
|
817
|
+
os.remove(os.path.join(root, name))
|
795
818
|
continue
|
796
819
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
797
820
|
df.insert(loc=0, column='日期', value=start_date)
|
@@ -809,6 +832,7 @@ class DataClean:
|
|
809
832
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
810
833
|
if len(df) == 0:
|
811
834
|
print(f'{name} 报表数据为空')
|
835
|
+
os.remove(os.path.join(root, name))
|
812
836
|
continue
|
813
837
|
df['日期'] = df['日期'].apply(
|
814
838
|
lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0])
|
@@ -836,6 +860,7 @@ class DataClean:
|
|
836
860
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
837
861
|
if len(df) == 0:
|
838
862
|
print(f'{name} 报表数据为空')
|
863
|
+
os.remove(os.path.join(root, name))
|
839
864
|
continue
|
840
865
|
df['日期'] = df['日期'].astype(str).apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}')
|
841
866
|
df.insert(loc=0, column='类型', value='商家榜单')
|
@@ -852,6 +877,7 @@ class DataClean:
|
|
852
877
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
853
878
|
if len(df) == 0:
|
854
879
|
print(f'{name} 报表数据为空')
|
880
|
+
os.remove(os.path.join(root, name))
|
855
881
|
continue
|
856
882
|
d_time = datetime.datetime.today().strftime('%Y-%m-%d')
|
857
883
|
df.insert(loc=0, column='日期', value=d_time)
|
@@ -871,6 +897,7 @@ class DataClean:
|
|
871
897
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
872
898
|
if len(df) == 0:
|
873
899
|
print(f'{name} 报表数据为空')
|
900
|
+
os.remove(os.path.join(root, name))
|
874
901
|
continue
|
875
902
|
d_time = datetime.datetime.today().strftime('%Y-%m-%d')
|
876
903
|
df.insert(loc=0, column='日期', value=d_time)
|
@@ -886,6 +913,7 @@ class DataClean:
|
|
886
913
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
887
914
|
if len(df) == 0:
|
888
915
|
print(f'{name} 报表数据为空')
|
916
|
+
os.remove(os.path.join(root, name))
|
889
917
|
continue
|
890
918
|
pic_list = df['日期'].tolist()
|
891
919
|
pic = []
|
@@ -913,6 +941,7 @@ class DataClean:
|
|
913
941
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
914
942
|
if len(df) == 0:
|
915
943
|
print(f'{name} 报表数据为空')
|
944
|
+
os.remove(os.path.join(root, name))
|
916
945
|
continue
|
917
946
|
pic_list = df['日期'].tolist()
|
918
947
|
pic = []
|
@@ -942,6 +971,7 @@ class DataClean:
|
|
942
971
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
943
972
|
if len(df) == 0:
|
944
973
|
print(f'{name} 报表数据为空')
|
974
|
+
os.remove(os.path.join(root, name))
|
945
975
|
continue
|
946
976
|
df['摘要'] = df['摘要'].apply(lambda x: re.sub('\'', '', str(x)) if x else x)
|
947
977
|
for col in ['原单号', '商品代码', '摘要']:
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=mPKSiLsJXBPbDYsTso0VmDybinewFRs3z6uiA5Gqsn8,61720
|
5
5
|
mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
|
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=socYDytP4F7zLd1WRokKitQ0bNsK4TQgkO1GDmgi29
|
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/data_clean.py,sha256=
|
12
|
+
mdbq/clean/data_clean.py,sha256=M1rdgQWGkkOU6fSE4756Y5o5jK_YOV4wy7BBFkP_RCY,90960
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
14
|
mdbq/company/copysh.py,sha256=WCZ92vCJAy6_ZFeOxWL-U9gArIpyga4xts-s1wKsspY,17268
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
35
35
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
36
36
|
mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
38
|
-
mdbq-1.6.
|
39
|
-
mdbq-1.6.
|
40
|
-
mdbq-1.6.
|
41
|
-
mdbq-1.6.
|
38
|
+
mdbq-1.6.5.dist-info/METADATA,sha256=SAZM8eGb72ZrgsPg0SDcQzJYkqFPhWYYelrEHkAt9Bw,245
|
39
|
+
mdbq-1.6.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
40
|
+
mdbq-1.6.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
41
|
+
mdbq-1.6.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|