mdbq 2.5.5__tar.gz → 2.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-2.5.5 → mdbq-2.5.7}/PKG-INFO +1 -1
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/clean/clean_upload.py +112 -8
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-2.5.5 → mdbq-2.5.7}/setup.py +1 -1
- {mdbq-2.5.5 → mdbq-2.5.7}/README.txt +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/__version__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/aggregation/aggregation.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/aggregation/query_data.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/bdup/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/bdup/bdup.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/clean/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/clean/data_clean.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/company/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/company/copysh.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/company/home_sh.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/config/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/config/get_myconf.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/config/products.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/config/set_support.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/config/update_conf.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/dataframe/converter.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/log/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/log/mylogger.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/mongo/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/mongo/mongo.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/mysql/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/mysql/mysql.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/mysql/s_query.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/other/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/other/porxy.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/other/pov_city.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/other/sku_picture.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/other/ua_sj.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/pbix/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/pbix/refresh_all_old.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/req_post/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/req_post/req_tb.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/spider/__init__.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq/spider/aikucun.py +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-2.5.5 → mdbq-2.5.7}/setup.cfg +0 -0
@@ -7,7 +7,6 @@ import zipfile
|
|
7
7
|
from pyzipper import PyZipFile
|
8
8
|
import os
|
9
9
|
import platform
|
10
|
-
import pathlib
|
11
10
|
import json
|
12
11
|
from mdbq.mongo import mongo
|
13
12
|
from mdbq.mysql import mysql
|
@@ -646,6 +645,52 @@ class DataClean:
|
|
646
645
|
}
|
647
646
|
)
|
648
647
|
|
648
|
+
def sp_scene_clean(self, path=None, is_except=[]):
|
649
|
+
if not path:
|
650
|
+
path = self.path
|
651
|
+
for root, dirs, files in os.walk(path, topdown=False):
|
652
|
+
for name in files:
|
653
|
+
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
654
|
+
continue
|
655
|
+
if 'py_xg' in name:
|
656
|
+
continue
|
657
|
+
is_continue = False
|
658
|
+
if is_except:
|
659
|
+
for item in is_except:
|
660
|
+
if item in os.path.join(root, name):
|
661
|
+
# print(name)
|
662
|
+
is_continue = True
|
663
|
+
break
|
664
|
+
if is_continue: # 需要排除不做处理的文件或文件夹
|
665
|
+
continue
|
666
|
+
|
667
|
+
if name.endswith('.xlsx') and '商品素材_' in name:
|
668
|
+
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
669
|
+
df = pd.read_excel(os.path.join(root, name), header=0)
|
670
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
671
|
+
new_name = f'py_xg_{name}'
|
672
|
+
df.to_excel(os.path.join(upload_path, new_name),
|
673
|
+
index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
674
|
+
if '官方旗舰店' in name:
|
675
|
+
db_name = '属性设置3'
|
676
|
+
collection_name = '商品素材_天猫'
|
677
|
+
elif '官方企业店' in name:
|
678
|
+
db_name = '属性设置3'
|
679
|
+
collection_name = '商品素材_淘宝'
|
680
|
+
os.remove(os.path.join(root, name))
|
681
|
+
|
682
|
+
# 将数据传入 self.datas 等待更新进数据库
|
683
|
+
if not db_name or not collection_name:
|
684
|
+
print(f'db_name/collection_name 不能为空')
|
685
|
+
continue
|
686
|
+
self.datas.append(
|
687
|
+
{
|
688
|
+
'数据库名': db_name,
|
689
|
+
'集合名称': collection_name,
|
690
|
+
'数据主体': df,
|
691
|
+
'文件名': name,
|
692
|
+
}
|
693
|
+
)
|
649
694
|
"""
|
650
695
|
{文件分类}
|
651
696
|
将已处理完的文件 分类移到原始文件夹下
|
@@ -665,7 +710,7 @@ class DataClean:
|
|
665
710
|
_date = re.findall(r'(\d{4}-\d{2})-\d{2}', str(_name))
|
666
711
|
if _date:
|
667
712
|
_date = _date[0]
|
668
|
-
t2 =
|
713
|
+
t2 = os.path.join(t2, _date) # 添加 年月分类
|
669
714
|
if not os.path.exists(t2):
|
670
715
|
os.makedirs(t2, exist_ok=True)
|
671
716
|
old_file = os.path.join(t2, _name) # 检查目标位置是否已经存在该文件
|
@@ -711,16 +756,16 @@ class DataClean:
|
|
711
756
|
elif '天猫' in name and name.endswith('.csv') and 'order' in name:
|
712
757
|
t_path = os.path.join(self.source_path, '天猫_生意经', '订单数据')
|
713
758
|
bib(t_path, _as_month=False)
|
714
|
-
elif '淘宝' in name and name.endswith('.csv') and 'baobei' in name:
|
759
|
+
elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and 'baobei' in name:
|
715
760
|
t_path = os.path.join(self.source_path, '淘宝_生意经', '宝贝指标')
|
716
761
|
bib(t_path, _as_month=True)
|
717
|
-
elif '淘宝' in name and name.endswith('.csv') and '省份城市分析' in name:
|
762
|
+
elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and '省份城市分析' in name:
|
718
763
|
t_path = os.path.join(self.source_path, '淘宝_生意经', '省份城市分析')
|
719
764
|
bib(t_path, _as_month=True)
|
720
|
-
elif '淘宝' in name and name.endswith('.csv') and '店铺销售指标' in name:
|
765
|
+
elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and '店铺销售指标' in name:
|
721
766
|
t_path = os.path.join(self.source_path, '淘宝_生意经', '店铺销售指标')
|
722
767
|
bib(t_path, _as_month=False)
|
723
|
-
elif '淘宝' in name and name.endswith('.csv') and 'order' in name:
|
768
|
+
elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and 'order' in name:
|
724
769
|
t_path = os.path.join(self.source_path, '淘宝_生意经', '订单数据')
|
725
770
|
bib(t_path, _as_month=False)
|
726
771
|
|
@@ -850,7 +895,12 @@ class DataClean:
|
|
850
895
|
elif '定向人群' in name:
|
851
896
|
t_path = os.path.join(self.source_path, '天猫推广报表', '品销宝', '定向人群报表')
|
852
897
|
bib(t_path, _as_month=True)
|
853
|
-
|
898
|
+
elif name.endswith('xlsx') and '商品素材_万里马官方旗舰店' in name:
|
899
|
+
t_path = os.path.join(self.source_path, '商品素材', '天猫')
|
900
|
+
bib(t_path, _as_month=True)
|
901
|
+
elif name.endswith('xlsx') and '商品素材_万里马官方企业店' in name:
|
902
|
+
t_path = os.path.join(self.source_path, '商品素材', '淘宝')
|
903
|
+
bib(t_path, _as_month=True)
|
854
904
|
|
855
905
|
# @try_except
|
856
906
|
def move_tg_tb(self, path=None, is_except=[]):
|
@@ -1110,9 +1160,10 @@ def main(service_databases=None):
|
|
1110
1160
|
)
|
1111
1161
|
c.new_unzip(is_move=True) # 解压文件, is_move 解压后是否删除原 zip 压缩文件
|
1112
1162
|
c.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
|
1113
|
-
c.syj_reports_tm(is_except=['except']) #
|
1163
|
+
c.syj_reports_tm(is_except=['except']) # 天猫生意经
|
1114
1164
|
# c.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
|
1115
1165
|
c.jd_reports(is_except=['except']) # 清洗京东报表
|
1166
|
+
c.sp_scene_clean(is_except=['except']) # 商品素材
|
1116
1167
|
c.upload_df(service_databases=service_databases) # 上传数据库
|
1117
1168
|
|
1118
1169
|
c.move_sjy(is_except=['临时文件',]) # 生意经,移到文件到原始文件夹
|
@@ -1161,6 +1212,49 @@ def main(service_databases=None):
|
|
1161
1212
|
)
|
1162
1213
|
|
1163
1214
|
|
1215
|
+
def test():
|
1216
|
+
# main_key = '单元报表'
|
1217
|
+
path = f'/Users/xigua/数据中心/原始文件2/京东报表/JD流量来源13414124124'
|
1218
|
+
for root, dirs, files in os.walk(path, topdown=False):
|
1219
|
+
for name in files:
|
1220
|
+
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
1221
|
+
continue
|
1222
|
+
if 'py_xg' in name:
|
1223
|
+
continue
|
1224
|
+
if 'TM_旧表_字段' in root:
|
1225
|
+
continue
|
1226
|
+
|
1227
|
+
if name.endswith('.csv'):
|
1228
|
+
print(name)
|
1229
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1230
|
+
# if '店铺名称' not in df.columns.tolist():
|
1231
|
+
# df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
1232
|
+
for item in df.columns.tolist():
|
1233
|
+
if '同比' in item or '环比' in item or '时间' in item:
|
1234
|
+
df.pop(item)
|
1235
|
+
date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_\d{4}-\d{2}-\d{2}', name)[0]
|
1236
|
+
|
1237
|
+
date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
|
1238
|
+
|
1239
|
+
new_path = f'/Users/xigua/数据中心/原始文件3/京东报表/店铺来源_三级来源/{date}'
|
1240
|
+
# new_path = os.path.join(new_path, date) # 添加 年月分类
|
1241
|
+
if not os.path.exists(new_path):
|
1242
|
+
os.makedirs(new_path, exist_ok=True)
|
1243
|
+
# print(date_all)
|
1244
|
+
|
1245
|
+
new_name = f'py_xg_京东商智_店铺来源_三级来源_{date_all}.xlsx'
|
1246
|
+
# print(os.path.join(new_path, new_name))
|
1247
|
+
# breakpoint()
|
1248
|
+
# df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
|
1249
|
+
try:
|
1250
|
+
df.to_excel(os.path.join(new_path, new_name),
|
1251
|
+
index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
1252
|
+
except Exception as e:
|
1253
|
+
print(e)
|
1254
|
+
|
1255
|
+
|
1256
|
+
|
1257
|
+
|
1164
1258
|
if __name__ == '__main__':
|
1165
1259
|
main(
|
1166
1260
|
service_databases = [
|
@@ -1171,3 +1265,13 @@ if __name__ == '__main__':
|
|
1171
1265
|
]
|
1172
1266
|
)
|
1173
1267
|
|
1268
|
+
# c = DataClean(
|
1269
|
+
# path=upload_path, # 源文件目录,下载文件夹
|
1270
|
+
# source_path=source_path3, # 原始文件保存目录
|
1271
|
+
# service_databases=[{'home_lx': 'mysql'},]
|
1272
|
+
# )
|
1273
|
+
# c.sp_scene_clean(is_except=['except']) # 商品素材
|
1274
|
+
# c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
|
1275
|
+
|
1276
|
+
|
1277
|
+
# test()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|