mdbq 2.7.0__tar.gz → 2.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-2.7.0 → mdbq-2.7.1}/PKG-INFO +2 -2
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/aggregation/aggregation.py +33 -29
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/aggregation/query_data.py +3 -3
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/clean/clean_upload.py +25 -16
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/company/copysh.py +1 -2
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq.egg-info/PKG-INFO +2 -2
- {mdbq-2.7.0 → mdbq-2.7.1}/setup.py +2 -2
- {mdbq-2.7.0 → mdbq-2.7.1}/README.txt +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/__version__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/bdup/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/bdup/bdup.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/clean/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/clean/data_clean.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/company/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/company/copysh_bak.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/company/home_sh.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/config/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/config/get_myconf.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/config/products.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/config/set_support.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/config/update_conf.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/dataframe/converter.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/log/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/log/mylogger.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/mongo/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/mongo/mongo.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/mysql/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/mysql/mysql.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/mysql/recheck_mysql.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/mysql/s_query.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/other/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/other/porxy.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/other/pov_city.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/other/sku_picture.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/other/ua_sj.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/pbix/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/pbix/refresh_all_old.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/req_post/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/req_post/req_tb.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/spider/__init__.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq/spider/aikucun.py +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-2.7.0 → mdbq-2.7.1}/setup.cfg +0 -0
@@ -1,5 +1,7 @@
|
|
1
1
|
# -*- coding:utf-8 -*-
|
2
2
|
import warnings
|
3
|
+
from unittest.mock import inplace
|
4
|
+
|
3
5
|
import pandas as pd
|
4
6
|
import numpy as np
|
5
7
|
import chardet
|
@@ -1118,7 +1120,7 @@ class DatabaseUpdate:
|
|
1118
1120
|
def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, target_service='company'):
|
1119
1121
|
""" 上传一个文件夹到 mysql 或者 mongodb 数据库 """
|
1120
1122
|
if not os.path.isdir(path):
|
1121
|
-
print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir:
|
1123
|
+
print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 函数只接受文件夹路径,不是一个文件夹: {path}')
|
1122
1124
|
return
|
1123
1125
|
|
1124
1126
|
if dbs['mongodb']:
|
@@ -1288,29 +1290,40 @@ def file_dir(one_file=True, target_service='company'):
|
|
1288
1290
|
|
1289
1291
|
|
1290
1292
|
def test():
|
1291
|
-
path = '
|
1293
|
+
path = os.path.relpath(r'C:\Users\Administrator\Downloads\JD商品明细sku')
|
1292
1294
|
for root, dirs, files in os.walk(path, topdown=False):
|
1293
1295
|
for name in files:
|
1294
1296
|
if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
|
1297
|
+
print(name)
|
1298
|
+
# df = pd.read_excel(os.path.join(root, name), header=0)
|
1295
1299
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1296
|
-
|
1297
|
-
|
1300
|
+
cols = df.columns.tolist()
|
1301
|
+
if '店铺名称' not in cols:
|
1302
|
+
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
1303
|
+
if '曝光量' in cols:
|
1304
|
+
df.rename(columns={
|
1305
|
+
'曝光量': '搜索曝光量',
|
1306
|
+
'点击次数': '搜索点击次数',
|
1307
|
+
'点击率': '搜索点击率',
|
1308
|
+
}, inplace=True)
|
1309
|
+
if '取消金额' in cols:
|
1310
|
+
df.rename(columns={
|
1311
|
+
'取消金额': '取消及售后退款金额',
|
1312
|
+
'取消商品件数': '取消及售后退款件数',
|
1313
|
+
'取消单量': '取消及售后退款单量',
|
1314
|
+
}, inplace=True)
|
1315
|
+
if '取消及售后退款金额' not in cols:
|
1316
|
+
df['取消及售后退款金额'] = '0.0'
|
1317
|
+
df['取消及售后退款件数'] = 0
|
1318
|
+
df['取消及售后退款单量'] = 0
|
1298
1319
|
df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
|
1320
|
+
# new_name = f'{os.path.splitext(name)[0]}.xlsx'
|
1321
|
+
# df.to_excel(os.path.join(root, name),
|
1322
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
1299
1323
|
# break
|
1300
1324
|
# break
|
1301
1325
|
|
1302
1326
|
|
1303
|
-
def test2():
|
1304
|
-
dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1305
|
-
dp.new_unzip(is_move=True)
|
1306
|
-
dp.cleaning(is_move=False, ) # 清洗数据, 存入 self.datas
|
1307
|
-
dp.upload_df(service_databases=[
|
1308
|
-
# {'home_lx': 'mongodb'},
|
1309
|
-
{'company': 'mysql'},
|
1310
|
-
# {'nas': 'mysql'}
|
1311
|
-
], path=None, service_name=None)
|
1312
|
-
|
1313
|
-
|
1314
1327
|
if __name__ == '__main__':
|
1315
1328
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1316
1329
|
print(username, password, host, port)
|
@@ -1326,24 +1339,15 @@ if __name__ == '__main__':
|
|
1326
1339
|
# )
|
1327
1340
|
|
1328
1341
|
# 上传一个目录到指定数据库
|
1329
|
-
db_name = '
|
1330
|
-
table_name = '
|
1342
|
+
db_name = '京东数据3'
|
1343
|
+
table_name = '京东商智_spu_商品明细'
|
1331
1344
|
upload_dir(
|
1332
|
-
path='
|
1345
|
+
path=os.path.relpath(r'C:\同步空间\BaiduSyncdisk\原始文件3\京东报表\京东商智_spu_商品明细'),
|
1333
1346
|
db_name=db_name,
|
1334
1347
|
collection_name=table_name,
|
1335
1348
|
dbs={'mysql': True, 'mongodb': False},
|
1336
|
-
target_service='
|
1349
|
+
target_service='home_lx',
|
1337
1350
|
)
|
1338
1351
|
|
1339
1352
|
|
1340
|
-
#
|
1341
|
-
# dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1342
|
-
# dp.new_unzip(is_move=True)
|
1343
|
-
# dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
1344
|
-
# # 将 self.datas 更新至数据库
|
1345
|
-
# # dp.upload_df(service_databases=[
|
1346
|
-
# # # {'home_lx': 'mongodb'},
|
1347
|
-
# # {'company': 'mysql'},
|
1348
|
-
# # # {'nas': 'mysql'},
|
1349
|
-
# # ])
|
1353
|
+
# test()
|
@@ -2316,7 +2316,7 @@ def main():
|
|
2316
2316
|
if __name__ == '__main__':
|
2317
2317
|
data_aggregation(
|
2318
2318
|
service_databases=[{'company': 'mysql'}],
|
2319
|
-
months=
|
2320
|
-
is_juhe=
|
2321
|
-
# less_dict=['
|
2319
|
+
months=1,
|
2320
|
+
is_juhe=False, # 立即启动对聚合数据的清理工作
|
2321
|
+
# less_dict=['生意参谋_直播场次分析'], # 单独聚合某一个数据库
|
2322
2322
|
)
|
@@ -145,6 +145,7 @@ class DataClean:
|
|
145
145
|
df = pd.read_excel(os.path.join(root, name), header=4)
|
146
146
|
if len(df) == 0:
|
147
147
|
print(f'{name} 报表数据不能为空')
|
148
|
+
os.remove(os.path.join(root, name))
|
148
149
|
continue
|
149
150
|
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
150
151
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -159,6 +160,7 @@ class DataClean:
|
|
159
160
|
df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
|
160
161
|
if len(df) == 0:
|
161
162
|
print(f'{name} 报表数据不能为空')
|
163
|
+
os.remove(os.path.join(root, name))
|
162
164
|
continue
|
163
165
|
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
164
166
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -255,6 +257,7 @@ class DataClean:
|
|
255
257
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
256
258
|
if len(df) == 0:
|
257
259
|
print(f'{name} 报表数据为空')
|
260
|
+
os.remove(os.path.join(root, name))
|
258
261
|
continue
|
259
262
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
260
263
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
@@ -263,6 +266,7 @@ class DataClean:
|
|
263
266
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
264
267
|
if len(df) == 0:
|
265
268
|
print(f'{name} 报表数据为空')
|
269
|
+
os.remove(os.path.join(root, name))
|
266
270
|
continue
|
267
271
|
for col in df.columns.tolist():
|
268
272
|
if '(' in col or ')' in col:
|
@@ -563,6 +567,7 @@ class DataClean:
|
|
563
567
|
name_st = re.findall(r'([\u4e00-\u9fa5]+)\(分日', name)
|
564
568
|
if not name_st:
|
565
569
|
print(f'{name} 正则提取文件名失败')
|
570
|
+
os.remove(os.path.join(root, name))
|
566
571
|
continue
|
567
572
|
encoding = self.get_encoding(file_path=os.path.join(root, name))
|
568
573
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
@@ -802,8 +807,9 @@ class DataClean:
|
|
802
807
|
if not is_continue:
|
803
808
|
continue
|
804
809
|
|
805
|
-
if name.endswith('.
|
806
|
-
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
810
|
+
if name.endswith('.csv') and '京东推广_' in name:
|
811
|
+
# df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
812
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
807
813
|
new_name = f'py_xg_{name}'
|
808
814
|
os.rename(os.path.join(root, name), os.path.join(root, new_name))
|
809
815
|
elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
|
@@ -813,9 +819,10 @@ class DataClean:
|
|
813
819
|
df.insert(loc=0, column='日期', value=pattern)
|
814
820
|
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
815
821
|
df.fillna(0, inplace=True)
|
816
|
-
new_name = f'py_xg_{name}'
|
817
|
-
df.
|
818
|
-
|
822
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
823
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
824
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
825
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
819
826
|
os.remove(os.path.join(root, name))
|
820
827
|
elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
|
821
828
|
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
@@ -824,9 +831,10 @@ class DataClean:
|
|
824
831
|
df.insert(loc=0, column='日期', value=pattern)
|
825
832
|
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
826
833
|
df.fillna(0, inplace=True)
|
827
|
-
new_name = f'py_xg_{name}'
|
828
|
-
df.
|
829
|
-
|
834
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
835
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
836
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
837
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
830
838
|
os.remove(os.path.join(root, name))
|
831
839
|
elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
|
832
840
|
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
@@ -836,9 +844,10 @@ class DataClean:
|
|
836
844
|
if '环比' in col or '同比' in col:
|
837
845
|
df.drop(col, axis=1, inplace=True)
|
838
846
|
df.fillna(0, inplace=True)
|
839
|
-
new_name = f'py_xg_{name}'
|
840
|
-
df.
|
841
|
-
|
847
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
848
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
849
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
850
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
842
851
|
os.remove(os.path.join(root, name))
|
843
852
|
|
844
853
|
# 将数据传入 self.datas 等待更新进数据库
|
@@ -1119,10 +1128,10 @@ class DataClean:
|
|
1119
1128
|
continue
|
1120
1129
|
|
1121
1130
|
if name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
|
1122
|
-
t_path = os.path.join(self.source_path, '京东报表', '
|
1131
|
+
t_path = os.path.join(self.source_path, '京东报表', '京东商智_spu_商品明细')
|
1123
1132
|
bib(t_path, _as_month=True)
|
1124
1133
|
elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
|
1125
|
-
t_path = os.path.join(self.source_path, '京东报表', '
|
1134
|
+
t_path = os.path.join(self.source_path, '京东报表', '京东商智_sku_商品明细')
|
1126
1135
|
bib(t_path, _as_month=True)
|
1127
1136
|
elif name.endswith('.xlsx') and '京东推广_搜索词' in name:
|
1128
1137
|
t_path = os.path.join(self.source_path, '京东报表', '搜索词报表')
|
@@ -1587,7 +1596,7 @@ def main(service_databases=None, is_mysql=False):
|
|
1587
1596
|
cn.dmp_tm(is_except=['except']) # 达摩盘
|
1588
1597
|
cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
|
1589
1598
|
cn.syj_reports_tm(is_except=['except']) # 天猫生意经
|
1590
|
-
|
1599
|
+
# # 淘宝生意经,不可以和天猫同时运行
|
1591
1600
|
# cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
|
1592
1601
|
cn.jd_reports(is_except=['except']) # 清洗京东报表
|
1593
1602
|
cn.sp_scene_clean(is_except=['except']) # 商品素材
|
@@ -1646,8 +1655,8 @@ def main(service_databases=None, is_mysql=False):
|
|
1646
1655
|
if __name__ == '__main__':
|
1647
1656
|
main(
|
1648
1657
|
service_databases = [
|
1649
|
-
{'company': 'mysql'},
|
1650
|
-
|
1658
|
+
# {'company': 'mysql'},
|
1659
|
+
{'home_lx': 'mysql'},
|
1651
1660
|
# {'home_lx': 'mongodb'},
|
1652
1661
|
# {'nas': 'mysql'},
|
1653
1662
|
],
|
@@ -321,7 +321,7 @@ def op_data(days: int =100):
|
|
321
321
|
# 清理所有非聚合数据的库
|
322
322
|
optimize_data.op_data(
|
323
323
|
db_name_lists=[
|
324
|
-
'京东数据
|
324
|
+
'京东数据2',
|
325
325
|
'属性设置3',
|
326
326
|
'推广数据2',
|
327
327
|
'推广数据_淘宝店',
|
@@ -367,7 +367,6 @@ def main():
|
|
367
367
|
op_data(days=100)
|
368
368
|
|
369
369
|
t.sleep_minutes = 5 # 同步前休眠时间
|
370
|
-
# 4. 同步共享文件
|
371
370
|
t.tb_file()
|
372
371
|
time.sleep(600) # 检测间隔
|
373
372
|
|
@@ -3,10 +3,10 @@
|
|
3
3
|
from setuptools import setup, find_packages
|
4
4
|
|
5
5
|
setup(name='mdbq',
|
6
|
-
version='2.7.
|
6
|
+
version='2.7.1',
|
7
7
|
author='xigua, ',
|
8
8
|
author_email="2587125111@qq.com",
|
9
|
-
url='https://pypi.org/project/
|
9
|
+
url='https://pypi.org/project/mdbq',
|
10
10
|
long_description='''
|
11
11
|
世界上最庄严的问题:我能做什么好事?
|
12
12
|
''',
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|