PyPI - mdbq - Versions diffs - 1.6.3__py3-none-any.whl → 1.6.5__py3-none-any.whl - Mend

mdbq 1.6.3py3-none-any.whl → 1.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

mdbq/aggregation/aggregation.py CHANGED Viewed

@@ -1064,11 +1064,11 @@ if __name__ == '__main__':
     #     target_service='company',
     #     database='mysql'
     # )
-    db_name = '推广数据2'
-    table_name = '超级直播'
+    db_name = '市场数据2'
+    table_name = '类目洞察_属性分析_商品发现'
     upload_dir(
-        path='/Users/xigua/数据中心/原始文件2/推广报表/超级直播',
+        path='/Users/xigua/Downloads/类目洞察/属性分析/商品发现',
         db_name=db_name,
         collection_name=table_name,
-        dbs={'mysql': True, 'mongodb': False},
+        dbs={'mysql': False, 'mongodb': False},
     )

mdbq/clean/data_clean.py CHANGED Viewed

@@ -152,6 +152,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     pattern = re.findall(r'(.*_)\d{8}_\d{6}', name)
                     shop_name = re.findall(r'\d{8}_\d{6}_(.*)\W', name)
@@ -180,6 +181,7 @@ class DataClean:
                     df = pd.concat(df)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     new_name2 = os.path.splitext(name)[0] + '.csv'
                     df['订单Id'] = df['订单Id'].apply(
@@ -198,6 +200,7 @@ class DataClean:
                     df = pd.concat(df)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     new_name2 = os.path.splitext(name)[0] + '.csv'
                     df['计划ID'] = df['计划ID'].apply(
@@ -217,6 +220,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     min_clm = df.min()['日期']
                     max_clm = df.max()['日期']
@@ -247,6 +251,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=5)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     if date01[0] != date02[0]:
@@ -299,6 +304,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=4)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     df['商品ID'] = df['商品ID'].apply(
@@ -325,6 +331,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=7)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.rename(columns={'统计日期': '日期'}, inplace=True)
                     df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
@@ -341,6 +348,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=7)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.rename(columns={'统计日期': '日期'}, inplace=True)
                     # 2024-2-19 官方更新了推广渠道来源名称，自助取数没有更新，这里强制更改
@@ -367,6 +375,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=7)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.rename(columns={
                         '统计日期': '日期',
@@ -390,6 +399,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=7)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.rename(columns={'统计日期': '数据周期'}, inplace=True)
                     # 2024-2-19 官方更新了推广渠道来源名称，自助取数没有更新，这里强制更改
@@ -422,6 +432,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     if '日期' in df.columns.tolist():
                         df.pop('日期')
@@ -453,6 +464,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df['日期'] = df['日期'].astype(str).apply(
                         lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', x)[0]) if x else x)
@@ -484,6 +496,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df['省'] = df['省份'].apply(lambda x: x if ' ├─ ' not in x and ' └─ ' not in x else None)
                     df['城市'] = df[['省份', '省']].apply(lambda x: '汇总' if x['省'] else x['省份'], axis=1)
@@ -519,6 +532,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding='gb18030', header=0, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.insert(loc=0, column='日期', value=date1)
                     df.insert(loc=1, column='数据周期', value=date)
@@ -541,6 +555,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.rename(columns={'场次ID': '场次id', '商品ID': '商品id'}, inplace=True)
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
@@ -574,6 +589,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     df.rename(columns={'统计日期': '日期'}, inplace=True)
@@ -595,6 +611,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=5)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     df.replace(to_replace=[','], value='', regex=True, inplace=True)
@@ -624,6 +641,7 @@ class DataClean:
                         # print(sheet4)
                         if len(df) == 0:
                             print(f'{name} 报表数据为空')
+                            os.remove(os.path.join(root, name))
                             continue
                         if len(df) < 1:
                             print(f'{name} 跳过')
@@ -688,6 +706,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     df.insert(loc=0, column='日期', value=new_date01)
@@ -724,6 +743,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     if '10035975359247' in df['商品ID'].values or '10056642622343' in df['商品ID'].values:
                         new_name = f'sku_{date1}_全部渠道_商品明细.csv'
@@ -756,6 +776,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.rename(columns={'商品的ID': 'skuid'}, inplace=True)
                     df['skuid'] = df['skuid'].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
@@ -773,6 +794,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.insert(0, '日期', date_in)  # 插入新列
                     df.rename(columns={'SKU': 'skuid'}, inplace=True)
@@ -792,6 +814,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df.replace(to_replace=[','], value='', regex=True, inplace=True)
                     df.insert(loc=0, column='日期', value=start_date)
@@ -809,6 +832,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df['日期'] = df['日期'].apply(
                         lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0])
@@ -836,6 +860,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df['日期'] = df['日期'].astype(str).apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}')
                     df.insert(loc=0, column='类型', value='商家榜单')
@@ -852,6 +877,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     d_time = datetime.datetime.today().strftime('%Y-%m-%d')
                     df.insert(loc=0, column='日期', value=d_time)
@@ -871,6 +897,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     d_time = datetime.datetime.today().strftime('%Y-%m-%d')
                     df.insert(loc=0, column='日期', value=d_time)
@@ -886,6 +913,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     pic_list = df['日期'].tolist()
                     pic = []
@@ -913,6 +941,7 @@ class DataClean:
                     df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     pic_list = df['日期'].tolist()
                     pic = []
@@ -942,6 +971,7 @@ class DataClean:
                     df = pd.read_excel(os.path.join(root, name), header=0)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
+                        os.remove(os.path.join(root, name))
                         continue
                     df['摘要'] = df['摘要'].apply(lambda x: re.sub('\'', '', str(x)) if x else x)
                     for col in ['原单号', '商品代码', '摘要']:

{mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 1.6.3
+Version: 1.6.5
 Home-page: https://pypi.org/project/mdbsql
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
 mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
 mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
-mdbq/aggregation/aggregation.py,sha256=mQRNn-S6oqvt9CyREllWMTtR2dYRsVgUe5gcdUCYH8U,61697
+mdbq/aggregation/aggregation.py,sha256=mPKSiLsJXBPbDYsTso0VmDybinewFRs3z6uiA5Gqsn8,61720
 mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
 mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
 mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=socYDytP4F7zLd1WRokKitQ0bNsK4TQgkO1GDmgi29
 mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
 mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
 mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
-mdbq/clean/data_clean.py,sha256=nkAy_KUnf6iX9nqUE588lebtWmk8Kelnwp_7g-wRfuE,89156
+mdbq/clean/data_clean.py,sha256=M1rdgQWGkkOU6fSE4756Y5o5jK_YOV4wy7BBFkP_RCY,90960
 mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
 mdbq/company/copysh.py,sha256=WCZ92vCJAy6_ZFeOxWL-U9gArIpyga4xts-s1wKsspY,17268
 mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
 mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
 mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
 mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
-mdbq-1.6.3.dist-info/METADATA,sha256=O-aJvN3jsjTj44Y9kK383iiNPIl_GI5_LdGI1wRmf88,245
-mdbq-1.6.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-mdbq-1.6.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
-mdbq-1.6.3.dist-info/RECORD,,
+mdbq-1.6.5.dist-info/METADATA,sha256=SAZM8eGb72ZrgsPg0SDcQzJYkqFPhWYYelrEHkAt9Bw,245
+mdbq-1.6.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+mdbq-1.6.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
+mdbq-1.6.5.dist-info/RECORD,,

{mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{mdbq-1.6.3.dist-info → mdbq-1.6.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

mdbq 1.6.3__py3-none-any.whl → 1.6.5__py3-none-any.whl

mdbq 1.6.3py3-none-any.whl → 1.6.5py3-none-any.whl