PyPI - mdbq - Versions diffs - 2.6.5__tar.gz → 2.6.6__tar.gz - Mend

mdbq 2.6.5tar.gz → 2.6.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{mdbq-2.6.5 → mdbq-2.6.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.6.5
+Version: 2.6.6
 Home-page: https://pypi.org/project/mdbsql
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.6.5 → mdbq-2.6.6}/mdbq/aggregation/aggregation.py RENAMED Viewed

@@ -1329,7 +1329,7 @@ if __name__ == '__main__':
     db_name = '京东数据3'
     table_name = '京东商智_spu_商品明细'
     upload_dir(
-        path='/Users/xigua/数据中心/原始文件3/京东报表/spu_商品明细',
+        path='/Users/xigua/数据中心/原始文件3/京东报表/spu_商品明细qwqw',
         db_name=db_name,
         collection_name=table_name,
         dbs={'mysql': True, 'mongodb': False},

{mdbq-2.6.5 → mdbq-2.6.6}/mdbq/aggregation/query_data.py RENAMED Viewed

@@ -423,7 +423,6 @@ class MysqlDatasQuery:
             '总订单行': 1,
             '总订单金额': 1,
             '总加购数': 1,
-            '下单新客数（去重）': 1,
             '领券数': 1,
             '商品关注数': 1,
             '店铺关注数': 1,
@@ -493,24 +492,26 @@ class MysqlDatasQuery:
         return pd.to_datetime(start_date), pd.to_datetime(end_date)
     @try_except
-    def tm_search(self):
+    def se_search(self):
         start_date, end_date = self.months_data(num=self.months)
         projection = {
             '日期': 1,
-            '关键词': 1,
+            '店铺名称': 1,
+            '搜索词': 1,
+            '词类行': 1,
             '访客数': 1,
+            '加购人数': 1,
+            '商品收藏人数': 1,
             '支付转化率': 1,
-            '支付金额': 1,
-            '下单金额': 1,
             '支付买家数': 1,
-            '下单买家数': 1,
-            '加购人数': 1,
+            '支付金额': 1,
             '新访客': 1,
-            '店铺名称': 1,
+            '客单价': 1,
+            'uv价值': 1,
         }
         df = self.download.data_to_df(
             db_name='生意参谋3',
-            table_name='店铺来源_手淘搜索',
+            table_name='手淘搜索_本店引流词',
             start_date=start_date,
             end_date=end_date,
             projection=projection,
@@ -1462,7 +1463,6 @@ class GroupBy:
                     '总订单行': ('总订单行', np.max),
                     '总订单金额': ('总订单金额', np.max),
                     '总加购数': ('总加购数', np.max),
-                    '下单新客数': ('下单新客数（去重）', np.max),
                     '领券数': ('领券数', np.max),
                     '商品关注数': ('商品关注数', np.max),
                     '店铺关注数': ('店铺关注数', np.max)
@@ -1476,17 +1476,16 @@ class GroupBy:
             return df
         elif '天猫店铺来源_手淘搜索' in table_name:
             df = df.groupby(
-                ['日期', '关键词', '店铺名称'],
+                ['日期', '店铺名称', '搜索词'],
                 as_index=False).agg(
                 **{
                     '访客数': ('访客数', np.max),
-                    '支付转化率': ('支付转化率', np.max),
+                    '加购人数': ('加购人数', np.max),
                     '支付金额': ('支付金额', np.max),
-                    '下单金额': ('下单金额', np.max),
+                    '支付转化率': ('支付转化率', np.max),
                     '支付买家数': ('支付买家数', np.max),
-                    '下单买家数': ('下单买家数', np.max),
-                    '加购人数': ('加购人数', np.max),
-                    '新访客': ('新访客', np.max),
+                    '客单价': ('客单价', np.max),
+                    'uv价值': ('uv价值', np.max)
                 }
             )
             return df
@@ -2077,6 +2076,7 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
     2. 数据聚合清洗
     3. 统一回传数据库: <聚合数据>  （不再导出为文件）
     公司台式机调用
+    months: 1+，写 0 表示当月数据，但在每月 1 号时可能会因为返回空数据出错
     """
     for service_database in service_databases:
         for service_name, database in service_database.items():
@@ -2183,14 +2183,14 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
                     '数据库名': '聚合数据',
                     '集合名': '天猫店铺来源_手淘搜索',  # 暂缺
                     '唯一主键': ['日期', '关键词', '访客数'],
-                    '数据主体': sdq.tm_search(),
-                },
-                {
-                    '数据库名': '聚合数据',
-                    '集合名': '生意参谋_直播场次分析',  # 暂缺
-                    '唯一主键': ['场次id'],
-                    '数据主体': sdq.zb_ccfx(),
+                    '数据主体': sdq.se_search(),
                 },
+                # {
+                #     '数据库名': '聚合数据',
+                #     '集合名': '生意参谋_直播场次分析',  # 暂缺
+                #     '唯一主键': ['场次id'],
+                #     '数据主体': sdq.zb_ccfx(),
+                # },
                 {
                     '数据库名': '聚合数据',
                     '集合名': '多店推广场景_按日聚合',
@@ -2304,6 +2304,6 @@ def main():
 if __name__ == '__main__':
-    data_aggregation(service_databases=[{'company': 'mysql'}], months=0, is_juhe=True)  # 正常的聚合所有数据
+    data_aggregation(service_databases=[{'company': 'mysql'}], months=1, is_juhe=True)  # 正常的聚合所有数据
     # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1)  # 单独聚合某一个数据库，具体库进函数编辑
     # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650)  # 立即启动对聚合数据的清理工作

{mdbq-2.6.5 → mdbq-2.6.6}/mdbq/clean/clean_upload.py RENAMED Viewed

@@ -151,7 +151,7 @@ class DataClean:
                     self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
                     os.remove(os.path.join(root, name))
                 elif name.endswith('.xls') and '手淘搜索_本店引流词_' in name:
-                    df = pd.read_excel(os.path.join(root, name), header=5)
+                    df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
                     if len(df) == 0:
                         print(f'{name} 报表数据不能为空')
                         continue
@@ -382,7 +382,7 @@ class DataClean:
                     sheets4 = ['账户', '推广计划', '推广单元', '创意', '品牌流量包', '定向人群']  # 品销宝
                     file_name4 = os.path.splitext(name)[0]  # 明星店铺报表
                     for sheet4 in sheets4:
-                        df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='openpyxl')
+                        df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='xlrd')
                         if len(df) == 0:
                             print(f'{name} 报表数据为空')
                             os.remove(os.path.join(root, name))
@@ -765,11 +765,11 @@ class DataClean:
                     continue
                 if name.endswith('.xlsx') and '京东推广_' in name:
-                    df = pd.read_excel(os.path.join(root, name), header=0)
+                    df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
                     new_name = f'py_xg_{name}'
                     os.rename(os.path.join(root, name), os.path.join(root, new_name))
                 elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
-                    df = pd.read_excel(os.path.join(root, name), header=0)
+                    df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})', name)[0]
                     df.insert(loc=0, column='日期', value=pattern)
@@ -780,7 +780,7 @@ class DataClean:
                                 index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
                     os.remove(os.path.join(root, name))
                 elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
-                    df = pd.read_excel(os.path.join(root, name), header=0)
+                    df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})', name)[0]
                     df.insert(loc=0, column='日期', value=pattern)
@@ -791,7 +791,7 @@ class DataClean:
                                 index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
                     os.remove(os.path.join(root, name))
                 elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
-                    df = pd.read_excel(os.path.join(root, name), header=0)
+                    df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
                     df.replace(to_replace=['-'], value='', regex=False, inplace=True)
                     df.rename(columns={'时间': '日期'}, inplace=True)
                     for col in df.columns.tolist():
@@ -870,7 +870,7 @@ class DataClean:
                 if name.endswith('.xlsx') and '商品素材_' in name:
                     shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
-                    df = pd.read_excel(os.path.join(root, name), header=0)
+                    df = pd.read_excel(os.path.join(root, name), header=0, engine='xlrd')
                     if '日期' not in df.columns.tolist():
                         df.insert(loc=0, column='日期', value=datetime.datetime.today().strftime('%Y-%m-%d'))
                     if '店铺名称' not in df.columns.tolist():
@@ -1276,7 +1276,7 @@ class DataClean:
                             new_path = os.path.join(root, zip_name_1)  # 拼接解压后的文件路径
                             if os.path.isfile(new_path) and '全部渠道_商品明细' in new_path:  # 是否存在和包内同名的文件
                                 # 专门处理京东文件, 已过期可删
-                                df = pd.read_excel(new_path)
+                                df = pd.read_excel(new_path, engine='xlrd')
                                 try:
                                     pattern1 = re.findall(r'\d{8}_(\d{4})(\d{2})(\d{2})_全部渠道_商品明细',
                                                           name)
@@ -1564,3 +1564,4 @@ if __name__ == '__main__':
     # test()

{mdbq-2.6.5 → mdbq-2.6.6}/mdbq/dataframe/converter.py RENAMED Viewed

@@ -37,6 +37,7 @@ class DataFrameConverter(object):
         df.replace(to_replace=['="'], value='', regex=True, inplace=True)  # ="和"不可以放在一起清洗, 因为有: id=86785565
         df.replace(to_replace=['"'], value='', regex=True, inplace=True)
         cols = df.columns.tolist()
         df.reset_index(inplace=True, drop=True)  # 重置索引，避免下面的 df.loc[0, col] 会出错
         for col in cols:
@@ -81,7 +82,9 @@ class DataFrameConverter(object):
                         df[col] = df[col].apply(lambda x: pd.to_datetime(x))
                     except:
                         pass
-            new_col = col.lower()
+            new_col = re.sub(r'[（）()-,，$%&~^、* ]', '_', col.lower())
+            new_col = re.sub(r'_{2,}', '_', new_col)
+            new_col = re.sub(r'_+$', '', new_col)
             df.rename(columns={col: new_col}, inplace=True)
         df.fillna(0, inplace=True)
         return df

{mdbq-2.6.5 → mdbq-2.6.6}/mdbq.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mdbq
-Version: 2.6.5
+Version: 2.6.6
 Home-page: https://pypi.org/project/mdbsql
 Author: xigua,
 Author-email: 2587125111@qq.com

{mdbq-2.6.5 → mdbq-2.6.6}/setup.py RENAMED Viewed

@@ -3,7 +3,7 @@
 from setuptools import setup, find_packages
 setup(name='mdbq',
-      version='2.6.5',
+      version='2.6.6',
       author='xigua, ',
       author_email="2587125111@qq.com",
       url='https://pypi.org/project/mdbsql',