PyPI - mdbq - Versions diffs - 2.5.5__tar.gz → 2.5.7__tar.gz - Mend

mdbq 2.5.5tar.gz → 2.5.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{mdbq-2.5.5 → mdbq-2.5.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.5.5
+Version: 2.5.7
 Home-page: https://pypi.org/project/mdbq
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.5.5 → mdbq-2.5.7}/mdbq/clean/clean_upload.py RENAMED Viewed

@@ -7,7 +7,6 @@ import zipfile
 from pyzipper import PyZipFile
 import os
 import platform
-import pathlib
 import json
 from mdbq.mongo import mongo
 from mdbq.mysql import mysql
@@ -646,6 +645,52 @@ class DataClean:
                     }
                 )
+    def sp_scene_clean(self, path=None, is_except=[]):
+        if not path:
+            path = self.path
+        for root, dirs, files in os.walk(path, topdown=False):
+            for name in files:
+                if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
+                    continue
+                if 'py_xg' in name:
+                    continue
+                is_continue = False
+                if is_except:
+                    for item in is_except:
+                        if item in os.path.join(root, name):
+                            # print(name)
+                            is_continue = True
+                            break
+                if is_continue:  # 需要排除不做处理的文件或文件夹
+                    continue
+                if name.endswith('.xlsx') and '商品素材_' in name:
+                    shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
+                    df = pd.read_excel(os.path.join(root, name), header=0)
+                    df.insert(loc=1, column='店铺名称', value=shop_name)
+                    new_name = f'py_xg_{name}'
+                    df.to_excel(os.path.join(upload_path, new_name),
+                                index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
+                    if '官方旗舰店' in name:
+                        db_name = '属性设置3'
+                        collection_name = '商品素材_天猫'
+                    elif '官方企业店' in name:
+                        db_name = '属性设置3'
+                        collection_name = '商品素材_淘宝'
+                    os.remove(os.path.join(root, name))
+                # 将数据传入 self.datas 等待更新进数据库
+                if not db_name or not collection_name:
+                    print(f'db_name/collection_name 不能为空')
+                    continue
+                self.datas.append(
+                    {
+                        '数据库名': db_name,
+                        '集合名称': collection_name,
+                        '数据主体': df,
+                        '文件名': name,
+                    }
+                )
     """
     {文件分类}
     将已处理完的文件 分类移到原始文件夹下
@@ -665,7 +710,7 @@ class DataClean:
             _date = re.findall(r'(\d{4}-\d{2})-\d{2}', str(_name))
             if _date:
                 _date = _date[0]
-                t2 = pathlib.Path(t2, _date)  # 添加 年月分类
+                t2 = os.path.join(t2, _date)  # 添加 年月分类
                 if not os.path.exists(t2):
                     os.makedirs(t2, exist_ok=True)
         old_file = os.path.join(t2, _name)  # 检查目标位置是否已经存在该文件
@@ -711,16 +756,16 @@ class DataClean:
                 elif '天猫' in name and name.endswith('.csv') and 'order' in name:
                     t_path = os.path.join(self.source_path, '天猫_生意经', '订单数据')
                     bib(t_path, _as_month=False)
-                elif '淘宝' in name and name.endswith('.csv') and 'baobei' in name:
+                elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and 'baobei' in name:
                     t_path = os.path.join(self.source_path, '淘宝_生意经', '宝贝指标')
                     bib(t_path, _as_month=True)
-                elif '淘宝' in name and name.endswith('.csv') and '省份城市分析' in name:
+                elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and '省份城市分析' in name:
                     t_path = os.path.join(self.source_path, '淘宝_生意经', '省份城市分析')
                     bib(t_path, _as_month=True)
-                elif '淘宝' in name and name.endswith('.csv') and '店铺销售指标' in name:
+                elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and '店铺销售指标' in name:
                     t_path = os.path.join(self.source_path, '淘宝_生意经', '店铺销售指标')
                     bib(t_path, _as_month=False)
-                elif '淘宝' in name and name.endswith('.csv') and 'order' in name:
+                elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and 'order' in name:
                     t_path = os.path.join(self.source_path, '淘宝_生意经', '订单数据')
                     bib(t_path, _as_month=False)
@@ -850,7 +895,12 @@ class DataClean:
                     elif '定向人群' in name:
                         t_path = os.path.join(self.source_path, '天猫推广报表', '品销宝', '定向人群报表')
                         bib(t_path, _as_month=True)
+                elif name.endswith('xlsx') and '商品素材_万里马官方旗舰店' in name:
+                    t_path = os.path.join(self.source_path, '商品素材', '天猫')
+                    bib(t_path, _as_month=True)
+                elif name.endswith('xlsx') and '商品素材_万里马官方企业店' in name:
+                    t_path = os.path.join(self.source_path, '商品素材', '淘宝')
+                    bib(t_path, _as_month=True)
     # @try_except
     def move_tg_tb(self, path=None, is_except=[]):
@@ -1110,9 +1160,10 @@ def main(service_databases=None):
     )
     c.new_unzip(is_move=True)  # 解压文件， is_move 解压后是否删除原 zip 压缩文件
     c.tg_reports(is_except=['except'])  # 推广报表，天猫淘宝共同清洗
-    c.syj_reports_tm(is_except=['except'])  # 天猫=生意经
+    c.syj_reports_tm(is_except=['except'])  # 天猫生意经
     # c.syj_reports_tb(is_except=['except'])  # 淘宝生意经，不可以和天猫同时运行
     c.jd_reports(is_except=['except'])  # 清洗京东报表
+    c.sp_scene_clean(is_except=['except'])  # 商品素材
     c.upload_df(service_databases=service_databases)  # 上传数据库
     c.move_sjy(is_except=['临时文件',])  # 生意经，移到文件到原始文件夹
@@ -1161,6 +1212,49 @@ def main(service_databases=None):
     )
+def test():
+    # main_key = '单元报表'
+    path = f'/Users/xigua/数据中心/原始文件2/京东报表/JD流量来源13414124124'
+    for root, dirs, files in os.walk(path, topdown=False):
+        for name in files:
+            if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
+                continue
+            if 'py_xg' in name:
+                continue
+            if 'TM_旧表_字段' in root:
+                continue
+            if name.endswith('.csv'):
+                print(name)
+                df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
+                # if '店铺名称' not in df.columns.tolist():
+                #     df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
+                for item in df.columns.tolist():
+                    if '同比' in item or '环比' in item or '时间' in item:
+                        df.pop(item)
+                date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_\d{4}-\d{2}-\d{2}', name)[0]
+                date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
+                new_path = f'/Users/xigua/数据中心/原始文件3/京东报表/店铺来源_三级来源/{date}'
+                # new_path = os.path.join(new_path, date)  # 添加 年月分类
+                if not os.path.exists(new_path):
+                    os.makedirs(new_path, exist_ok=True)
+                # print(date_all)
+                new_name = f'py_xg_京东商智_店铺来源_三级来源_{date_all}.xlsx'
+                # print(os.path.join(new_path, new_name))
+                # breakpoint()
+                # df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
+                try:
+                    df.to_excel(os.path.join(new_path, new_name),
+                                index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
+                except Exception as e:
+                    print(e)
 if __name__ == '__main__':
     main(
         service_databases = [
@@ -1171,3 +1265,13 @@ if __name__ == '__main__':
         ]
     )
+    # c = DataClean(
+    #     path=upload_path,  # 源文件目录，下载文件夹
+    #     source_path=source_path3,  # 原始文件保存目录
+    #     service_databases=[{'home_lx': 'mysql'},]
+    # )
+    # c.sp_scene_clean(is_except=['except'])  # 商品素材
+    # c.move_tg_tm(is_except=['临时文件', ])  # 天猫，移到文件到原始文件夹
+    # test()

{mdbq-2.5.5 → mdbq-2.5.7}/mdbq.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.5.5
+Version: 2.5.7
 Home-page: https://pypi.org/project/mdbq
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.5.5 → mdbq-2.5.7}/setup.py RENAMED Viewed

@@ -3,7 +3,7 @@
 from setuptools import setup, find_packages
 setup(name='mdbq',
-      version='2.5.5',
+      version='2.5.7',
       author='xigua, ',
       author_email="2587125111@qq.com",
       url='https://pypi.org/project/mdbq',