PyPI - mdbq - Versions diffs - 2.6.0__tar.gz → 2.6.2__tar.gz - Mend

mdbq 2.6.0tar.gz → 2.6.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{mdbq-2.6.0 → mdbq-2.6.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.6.0
+Version: 2.6.2
 Home-page: https://pypi.org/project/mdbsql
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/aggregation.py RENAMED Viewed

@@ -1301,7 +1301,7 @@ def test2():
 if __name__ == '__main__':
     username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
     print(username, password, host, port)
-    file_dir(one_file=False, target_service='company')
+    # file_dir(one_file=False, target_service='company')
     # one_file_to_mysql(
     #     file='/Users/xigua/Downloads/爱库存_商品榜单_spu_2024-10-17_2024-10-17.csv',
     #     db_name='爱库存2',
@@ -1310,15 +1310,16 @@ if __name__ == '__main__':
     #     database='mysql'
     # )
-    # db_name = '推广数据2'
-    # table_name = '权益报表'
-    # upload_dir(
-    #     path='/Users/xigua/数据中心/原始文件2/推广报表/权益报表12313',
-    #     db_name=db_name,
-    #     collection_name=table_name,
-    #     dbs={'mysql': True, 'mongodb': False},
-    #     target_service='home_lx',
-    # )
+    # 上传一个目录到指定数据库
+    db_name = '天猫_推广数据3'
+    table_name = '主体报表'
+    upload_dir(
+        path='/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表',
+        db_name=db_name,
+        collection_name=table_name,
+        dbs={'mysql': True, 'mongodb': False},
+        target_service='company',
+    )
     # # 新版 数据分类

{mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/query_data.py RENAMED Viewed

@@ -93,6 +93,18 @@ class MysqlDatasQuery:
         username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mysql')
         self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
+    @staticmethod
+    def try_except(func):  # 在类内部定义一个异常处理方法
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                print(f'{func.__name__}, {e}')  # 将异常信息返回
+        return wrapper
+    @try_except
     def tg_wxt(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -118,6 +130,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def syj(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -142,6 +155,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def tg_rqbb(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -167,6 +181,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def tg_gjc(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -193,6 +208,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def tg_cjzb(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -220,6 +236,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def pxb_zh(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -245,6 +262,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def idbm(self):
         """ 用生意经日数据制作商品 id 和编码对照表 """
         data_values = self.download.columns_to_list(
@@ -255,6 +273,7 @@ class MysqlDatasQuery:
         df = pd.DataFrame(data=data_values)
         return df
+    @try_except
     def sp_picture(self):
         """ 用生意经日数据制作商品 id 和编码对照表 """
         data_values = self.download.columns_to_list(
@@ -265,6 +284,7 @@ class MysqlDatasQuery:
         df = pd.DataFrame(data=data_values)
         return df
+    @try_except
     def dplyd(self):
         """ 新旧版取的字段是一样的 """
         start_date, end_date = self.months_data(num=self.months)
@@ -288,6 +308,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def dplyd_old(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -310,6 +331,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def sp_cost(self):
         """ 电商定价 """
         data_values = self.download.columns_to_list(
@@ -320,6 +342,7 @@ class MysqlDatasQuery:
         df = pd.DataFrame(data=data_values)
         return df
+    @try_except
     def jdjzt(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -346,6 +369,8 @@ class MysqlDatasQuery:
             projection=projection,
         )
         return df
+    @try_except
     def jdqzyx(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -368,6 +393,8 @@ class MysqlDatasQuery:
             projection=projection,
         )
         return df
+    @try_except
     def jd_gjc(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -401,6 +428,8 @@ class MysqlDatasQuery:
             projection=projection,
         )
         return df
+    @try_except
     def sku_sales(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -422,6 +451,8 @@ class MysqlDatasQuery:
             projection=projection,
         )
         return df
+    @try_except
     def spu_sales(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -453,6 +484,7 @@ class MysqlDatasQuery:
         start_date = f'{start_date.year}-{start_date.month}-01'  # 替换为 n 月以前的第一天
         return pd.to_datetime(start_date), pd.to_datetime(end_date)
+    @try_except
     def tm_search(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -476,6 +508,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def zb_ccfx(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -520,6 +553,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def tg_by_day(self):
         """
         汇总各个店铺的推广数据，按日汇总
@@ -689,6 +723,7 @@ class MysqlDatasQuery:
         df = pd.concat(_datas, axis=0, ignore_index=True)
         return df
+    @try_except
     def aikucun_bd_spu(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -736,6 +771,7 @@ class MysqlDatasQuery:
         )
         return df
+    @try_except
     def dmp_crowd(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
@@ -748,7 +784,7 @@ class MysqlDatasQuery:
         }
         # projection = {}
         df_crowd = self.download.data_to_df(
-            db_name='达摩盘2',
+            db_name='达摩盘3',
             table_name='我的人群属性',
             start_date=start_date,
             end_date=end_date,
@@ -1497,6 +1533,7 @@ class GroupBy:
             print(f'<{table_name}>: Groupby 类尚未配置，数据为空')
             return pd.DataFrame({})
+    @try_except
     def ret_keyword(self, keyword, as_file=False):
         """ 推广关键词报表，关键词分类， """
         datas = [
@@ -1664,6 +1701,7 @@ class GroupBy:
                 break
         return result
+    @try_except
     def set_crowd(self, keyword, as_file=False):
         """ 推广人群报表，人群分类， """
         result_a = re.findall('_a$|_a_|_ai|^a_', str(keyword), re.IGNORECASE)
@@ -1699,6 +1737,7 @@ class GroupBy:
         if not is_res:
             return ''
+    @try_except
     def set_crowd2(self, keyword, as_file=False):
         """ 推广人群报表，人群分类， """
         datas = [
@@ -1796,7 +1835,7 @@ class GroupBy:
                 break
         return result
-    # @try_except
+    @try_except
     def performance(self, bb_tg=True):
          # print(self.data_tgyj)
         tg, syj, idbm, pic, cost = (
@@ -1832,6 +1871,8 @@ class GroupBy:
         df['毛利率'] = df.apply(lambda x: round((x['销售额'] - x['商品成本']) / x['销售额'], 4) if x['销售额'] > 0 else 0, axis=1)
         df['盈亏'] = df.apply(lambda x: x['商品毛利'] - x['花费'], axis=1)
         return df
+    @try_except
     def performance_concat(self, bb_tg=True):
         tg,  zb, pxb = self.data_tgyj['天猫汇总表调用'], self.data_tgyj['天猫_超级直播'], self.data_tgyj['天猫_品销宝账户报表']
         zb.rename(columns={
@@ -1880,6 +1921,7 @@ class GroupBy:
         )
         return df
+    @try_except
     def performance_jd(self, jd_tg=True):
         jdtg, sku_sales = self.data_jdtg['京东_京准通'], self.data_jdtg['京东_sku_商品明细']
         jdtg = jdtg.groupby(['日期', '跟单sku id'],
@@ -2285,6 +2327,6 @@ def main():
 if __name__ == '__main__':
-    data_aggregation(service_databases=[{'company': 'mysql'}], months=24, is_juhe=False)  # 正常的聚合所有数据
+    data_aggregation(service_databases=[{'company': 'mysql'}], months=0, is_juhe=False)  # 正常的聚合所有数据
     # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1)  # 单独聚合某一个数据库，具体库进函数编辑
     # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650)  # 立即启动对聚合数据的清理工作

{mdbq-2.6.0 → mdbq-2.6.2}/mdbq/clean/clean_upload.py RENAMED Viewed

@@ -89,15 +89,30 @@ class DataClean:
             path = self.path
         report_names = [
             {
-                '文件简称': '商品排行',  # 文件名中包含的字符
-                '数据库名': '天猫_生意参谋3',
+                '文件简称': '商品排行_',  # 文件名中包含的字符
+                '数据库名': '生意参谋3',
                 '集合名称': '商品排行',
             },
             {
-                '文件简称': '店铺来源_来源构成_万里马官方旗舰店',  # 文件名中包含的字符
-                '数据库名': '天猫_生意参谋3',
+                '文件简称': '店铺来源_来源构成_',  # 文件名中包含的字符
+                '数据库名': '生意参谋3',
                 '集合名称': '店铺流量来源构成',
             },
+            {
+                '文件简称': '商品类目属性_',  # 文件名中包含的字符
+                '数据库名': '生意参谋3',
+                '集合名称': '商品类目属性',
+            },
+            {
+                '文件简称': '商品主图视频_',  # 文件名中包含的字符
+                '数据库名': '生意参谋3',
+                '集合名称': '商品主图视频',
+            },
+            {
+                '文件简称': '商品sku属性_',  # 文件名中包含的字符
+                '数据库名': '生意参谋3',
+                '集合名称': '商品sku',
+            },
         ]
         for root, dirs, files in os.walk(path, topdown=False):
             for name in files:
@@ -126,7 +141,7 @@ class DataClean:
                         is_continue = True
                 if not is_continue:
                     continue
-                if name.endswith('.xls') and '商品排行_万里马官方旗舰店' in name:
+                if name.endswith('.xls') and '商品排行_' in name:
                     df = pd.read_excel(os.path.join(root, name), header=4)
                     if len(df) == 0:
                         print(f'{name} 报表数据为空')
@@ -135,15 +150,20 @@ class DataClean:
                     df.replace(to_replace=[','], value='', regex=True, inplace=True)
                     df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
                     shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
-                    df.insert(loc=1, column='店铺名称', value=shop_name)
+                    if '店铺名称' not in df.columns.tolist():
+                        df.insert(loc=1, column='店铺名称', value=shop_name)
                     new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
                     self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
                     os.remove(os.path.join(root, name))
-                elif name.endswith('.csv') and '_来源构成_万里马官方旗舰店' in name:
+                elif name.endswith('.csv') and '_来源构成_' in name:
                     df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
                     new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
                     self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
                     os.remove(os.path.join(root, name))
+                elif name.endswith('.csv') and ('商品类目属性' in name or '商品主图视频' in name or '商品sku属性' in name):
+                    df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
+                    new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
+                    os.rename(os.path.join(root, name), os.path.join(root, new_name))
                 # 将数据传入 self.datas 等待更新进数据库
                 if not db_name or not collection_name:
@@ -804,16 +824,13 @@ class DataClean:
                 if name.endswith('.xlsx') and '商品素材_' in name:
                     shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
                     df = pd.read_excel(os.path.join(root, name), header=0)
-                    df.insert(loc=1, column='店铺名称', value=shop_name)
+                    if '店铺名称' not in df.columns.tolist():
+                        df.insert(loc=1, column='店铺名称', value=shop_name)
                     new_name = f'py_xg_{name}'
                     df.to_excel(os.path.join(upload_path, new_name),
                                 index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
-                    if '官方旗舰店' in name:
-                        db_name = '属性设置3'
-                        collection_name = '商品素材_天猫'
-                    elif '官方企业店' in name:
-                        db_name = '属性设置3'
-                        collection_name = '商品素材_淘宝'
+                    db_name = '属性设置3'
+                    collection_name = '商品素材'
                     os.remove(os.path.join(root, name))
                 # 将数据传入 self.datas 等待更新进数据库
@@ -881,11 +898,15 @@ class DataClean:
                 if 'py_xg' not in name:  # 排除非目标文件
                     continue
-                if name.endswith('.csv') and '商品排行_万里马官方旗舰店' in name:
-                    t_path = os.path.join(self.source_path, '天猫_生意参谋', '商品排行')
+                if name.endswith('.csv') and '商品排行_' in name:
+                    t_path = os.path.join(self.source_path, '生意参谋', '商品排行')
+                    bib(t_path, _as_month=True)
+                elif name.endswith('.csv') and '店铺来源_来源构成_' in name:
+                    t_path = os.path.join(self.source_path, '生意参谋', '店铺流量来源')
                     bib(t_path, _as_month=True)
-                elif name.endswith('.csv') and '店铺来源_来源构成_万里马官方旗舰店' in name:
-                    t_path = os.path.join(self.source_path, '天猫_生意参谋', '店铺流量来源')
+                elif name.endswith('.csv') and (
+                        '商品类目属性' in name or '商品主图视频' in name or '商品sku属性' in name):
+                    t_path = os.path.join(self.source_path, '生意参谋', '商品属性')
                     bib(t_path, _as_month=True)
     def move_dmp(self, path=None, is_except=[]):
@@ -1097,10 +1118,10 @@ class DataClean:
                         t_path = os.path.join(self.source_path, '天猫推广报表', '品销宝', '定向人群报表')
                         bib(t_path, _as_month=True)
                 elif name.endswith('xlsx') and '商品素材_万里马官方旗舰店' in name:
-                    t_path = os.path.join(self.source_path, '商品素材', '天猫')
+                    t_path = os.path.join(self.source_path, '商品素材')
                     bib(t_path, _as_month=True)
                 elif name.endswith('xlsx') and '商品素材_万里马官方企业店' in name:
-                    t_path = os.path.join(self.source_path, '商品素材', '淘宝')
+                    t_path = os.path.join(self.source_path, '商品素材')
                     bib(t_path, _as_month=True)
     # @try_except
@@ -1366,6 +1387,7 @@ def main(service_databases=None, is_mysql=False):
     cn.dmp_tm(is_except=['except'])  # 达摩盘
     cn.tg_reports(is_except=['except'])  # 推广报表，天猫淘宝共同清洗
     cn.syj_reports_tm(is_except=['except'])  # 天猫生意经
+    # # 淘宝生意经，不可以和天猫同时运行
     # cn.syj_reports_tb(is_except=['except'])  # 淘宝生意经，不可以和天猫同时运行
     cn.jd_reports(is_except=['except'])  # 清洗京东报表
     cn.sp_scene_clean(is_except=['except'])  # 商品素材
@@ -1399,7 +1421,7 @@ def main(service_databases=None, is_mysql=False):
             '天猫_推广数据3',
             '淘宝_推广数据3',
             # '市场数据3',
-            '天猫_生意参谋3',
+            '生意参谋3',
             '天猫_生意经3',
             # '淘宝_生意经3',
         ],
@@ -1425,25 +1447,25 @@ def main(service_databases=None, is_mysql=False):
 def test():
     # main_key = '单元报表'
-    path = f'/Users/xigua/数据中心/原始文件2/生意参谋/商品排行qweqeqwe'
+    path = f'/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表'
     for root, dirs, files in os.walk(path, topdown=False):
         for name in files:
             if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
                 continue
-            if 'py_xg' in name:
-                continue
-            if 'TM_旧表_字段' in root:
-                continue
+            # if 'py_xg' in name:
+            #     continue
+            # if 'TM_旧表_字段' in root:
+            #     continue
             if name.endswith('.csv'):
                 print(name)
                 df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
-                if '店铺名称' not in df.columns.tolist():
-                    df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
-                df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
-                df.replace(to_replace=[','], value='', regex=True, inplace=True)
-                if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
-                    df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
+                # if '店铺名称' not in df.columns.tolist():
+                #     df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
+                # df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
+                # df.replace(to_replace=[','], value='', regex=True, inplace=True)
+                # if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
+                #     df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
                 # shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
                 # df.insert(loc=1, column='店铺名称', value=shop_name)
@@ -1478,7 +1500,7 @@ if __name__ == '__main__':
             # {'home_lx': 'mongodb'},
             # {'nas': 'mysql'},
         ],
-        is_mysql = True,
+        is_mysql = False,
     )
     # c = DataClean(

{mdbq-2.6.0 → mdbq-2.6.2}/mdbq/dataframe/converter.py RENAMED Viewed

@@ -47,7 +47,7 @@ class DataFrameConverter(object):
             try:
                 # 百分比在某些数据库中不兼容, 转换百分比为小数, # 转百分比的列不能含有中文或特殊字符
                 df[col] = df[col].apply(
-                    lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%', str(x)) else x)
+                    lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%$', str(x)) else x)
             except Exception as e:
                 print(f'留意错误信息: 位于列 -> {col} -> {e}')
@@ -94,5 +94,5 @@ if __name__ == '__main__':
     # print(df['a'].dtype)
     # print(df)
     pattern = '1540%'
-    pattern = re.findall(r'^\d+\.?\d*%', pattern)
+    pattern = re.findall(r'^\d+\.?\d*%$', pattern)
     print(pattern)

{mdbq-2.6.0 → mdbq-2.6.2}/mdbq.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.6.0
+Version: 2.6.2
 Home-page: https://pypi.org/project/mdbsql
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.6.0 → mdbq-2.6.2}/setup.py RENAMED Viewed

@@ -3,7 +3,7 @@
 from setuptools import setup, find_packages
 setup(name='mdbq',
-      version='2.6.0',
+      version='2.6.2',
       author='xigua, ',
       author_email="2587125111@qq.com",
       url='https://pypi.org/project/mdbsql',