mdbq 2.7.0__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +27 -33
- mdbq/aggregation/mysql_types.py +83 -90
- mdbq/aggregation/optimize_data.py +30 -52
- mdbq/aggregation/query_data.py +257 -296
- mdbq/clean/clean_upload.py +126 -201
- mdbq/company/copysh.py +1 -2
- mdbq/config/myconfig.py +30 -0
- mdbq/config/products.py +31 -34
- mdbq/mysql/mysql.py +12 -42
- mdbq/mysql/s_query.py +4 -3
- {mdbq-2.7.0.dist-info → mdbq-2.7.2.dist-info}/METADATA +2 -2
- {mdbq-2.7.0.dist-info → mdbq-2.7.2.dist-info}/RECORD +14 -13
- {mdbq-2.7.0.dist-info → mdbq-2.7.2.dist-info}/WHEEL +1 -1
- {mdbq-2.7.0.dist-info → mdbq-2.7.2.dist-info}/top_level.txt +0 -0
mdbq/clean/clean_upload.py
CHANGED
@@ -4,13 +4,14 @@ import pandas as pd
|
|
4
4
|
from functools import wraps
|
5
5
|
import chardet
|
6
6
|
import zipfile
|
7
|
+
import socket
|
7
8
|
from pyzipper import PyZipFile
|
8
9
|
import os
|
9
10
|
import platform
|
10
11
|
import json
|
11
12
|
from mdbq.mongo import mongo
|
12
13
|
from mdbq.mysql import mysql
|
13
|
-
from mdbq.config import
|
14
|
+
from mdbq.config import myconfig
|
14
15
|
from mdbq.aggregation import df_types
|
15
16
|
from mdbq.config import products
|
16
17
|
from mdbq.aggregation import optimize_data
|
@@ -41,18 +42,31 @@ else:
|
|
41
42
|
Share_Path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
42
43
|
|
43
44
|
upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
|
44
|
-
source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
|
45
|
+
# source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
|
45
46
|
source_path3 = os.path.join(Data_Path, '原始文件3') # 此目录保存下载并清洗过的文件,作为数据库备份
|
46
47
|
|
48
|
+
if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
|
49
|
+
conf = myconfig.main()
|
50
|
+
conf_data = conf['Windows']['xigua_lx']['mysql']['local']
|
51
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
52
|
+
service_database = {'xigua_lx': 'mysql'}
|
53
|
+
elif socket.gethostname().lower() in ['company', 'Mac2.local']:
|
54
|
+
conf = myconfig.main()
|
55
|
+
conf_data = conf['Windows']['company']['mysql']['local']
|
56
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
57
|
+
service_database = {'company': 'mysql'}
|
58
|
+
if not username:
|
59
|
+
print(f'找不到主机:')
|
60
|
+
|
61
|
+
|
47
62
|
|
48
63
|
class DataClean:
|
49
64
|
""" 数据分类 """
|
50
65
|
|
51
|
-
def __init__(self, path, source_path
|
66
|
+
def __init__(self, path, source_path):
|
52
67
|
self.path = path # 数据源位置,下载文件夹
|
53
68
|
self.source_path = source_path # 原始文件保存目录
|
54
69
|
self.datas = []
|
55
|
-
self.service_databases = service_databases
|
56
70
|
|
57
71
|
@staticmethod
|
58
72
|
def try_except(func): # 在类内部定义一个异常处理方法
|
@@ -145,6 +159,7 @@ class DataClean:
|
|
145
159
|
df = pd.read_excel(os.path.join(root, name), header=4)
|
146
160
|
if len(df) == 0:
|
147
161
|
print(f'{name} 报表数据不能为空')
|
162
|
+
os.remove(os.path.join(root, name))
|
148
163
|
continue
|
149
164
|
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
150
165
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -159,6 +174,7 @@ class DataClean:
|
|
159
174
|
df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
|
160
175
|
if len(df) == 0:
|
161
176
|
print(f'{name} 报表数据不能为空')
|
177
|
+
os.remove(os.path.join(root, name))
|
162
178
|
continue
|
163
179
|
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
164
180
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -255,6 +271,7 @@ class DataClean:
|
|
255
271
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
256
272
|
if len(df) == 0:
|
257
273
|
print(f'{name} 报表数据为空')
|
274
|
+
os.remove(os.path.join(root, name))
|
258
275
|
continue
|
259
276
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
260
277
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
@@ -263,6 +280,7 @@ class DataClean:
|
|
263
280
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
264
281
|
if len(df) == 0:
|
265
282
|
print(f'{name} 报表数据为空')
|
283
|
+
os.remove(os.path.join(root, name))
|
266
284
|
continue
|
267
285
|
for col in df.columns.tolist():
|
268
286
|
if '(' in col or ')' in col:
|
@@ -563,6 +581,7 @@ class DataClean:
|
|
563
581
|
name_st = re.findall(r'([\u4e00-\u9fa5]+)\(分日', name)
|
564
582
|
if not name_st:
|
565
583
|
print(f'{name} 正则提取文件名失败')
|
584
|
+
os.remove(os.path.join(root, name))
|
566
585
|
continue
|
567
586
|
encoding = self.get_encoding(file_path=os.path.join(root, name))
|
568
587
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
@@ -802,8 +821,9 @@ class DataClean:
|
|
802
821
|
if not is_continue:
|
803
822
|
continue
|
804
823
|
|
805
|
-
if name.endswith('.
|
806
|
-
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
824
|
+
if name.endswith('.csv') and '京东推广_' in name:
|
825
|
+
# df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
826
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
807
827
|
new_name = f'py_xg_{name}'
|
808
828
|
os.rename(os.path.join(root, name), os.path.join(root, new_name))
|
809
829
|
elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
|
@@ -813,9 +833,10 @@ class DataClean:
|
|
813
833
|
df.insert(loc=0, column='日期', value=pattern)
|
814
834
|
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
815
835
|
df.fillna(0, inplace=True)
|
816
|
-
new_name = f'py_xg_{name}'
|
817
|
-
df.
|
818
|
-
|
836
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
837
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
838
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
839
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
819
840
|
os.remove(os.path.join(root, name))
|
820
841
|
elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
|
821
842
|
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
@@ -824,9 +845,10 @@ class DataClean:
|
|
824
845
|
df.insert(loc=0, column='日期', value=pattern)
|
825
846
|
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
826
847
|
df.fillna(0, inplace=True)
|
827
|
-
new_name = f'py_xg_{name}'
|
828
|
-
df.
|
829
|
-
|
848
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
849
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
850
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
851
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
830
852
|
os.remove(os.path.join(root, name))
|
831
853
|
elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
|
832
854
|
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
@@ -836,9 +858,10 @@ class DataClean:
|
|
836
858
|
if '环比' in col or '同比' in col:
|
837
859
|
df.drop(col, axis=1, inplace=True)
|
838
860
|
df.fillna(0, inplace=True)
|
839
|
-
new_name = f'py_xg_{name}'
|
840
|
-
df.
|
841
|
-
|
861
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
862
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
863
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
864
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
842
865
|
os.remove(os.path.join(root, name))
|
843
866
|
|
844
867
|
# 将数据传入 self.datas 等待更新进数据库
|
@@ -908,7 +931,7 @@ class DataClean:
|
|
908
931
|
|
909
932
|
if name.endswith('.xlsx') and '商品素材_' in name:
|
910
933
|
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
911
|
-
df = pd.read_excel(os.path.join(root, name), header=0, engine='
|
934
|
+
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
912
935
|
if '日期' not in df.columns.tolist():
|
913
936
|
df.insert(loc=0, column='日期', value=datetime.datetime.today().strftime('%Y-%m-%d'))
|
914
937
|
if '店铺名称' not in df.columns.tolist():
|
@@ -998,7 +1021,7 @@ class DataClean:
|
|
998
1021
|
t_path = os.path.join(self.source_path, '生意参谋', '商品属性')
|
999
1022
|
bib(t_path, _as_month=True)
|
1000
1023
|
elif name.endswith('.csv') and '爱库存_商品榜单_' in name:
|
1001
|
-
t_path = os.path.join(self.source_path, '爱库存', 'spu
|
1024
|
+
t_path = os.path.join(self.source_path, '爱库存', '商品spu榜单')
|
1002
1025
|
bib(t_path, _as_month=True)
|
1003
1026
|
elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
|
1004
1027
|
t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
|
@@ -1040,7 +1063,6 @@ class DataClean:
|
|
1040
1063
|
t_path = os.path.join(self.source_path, '达摩盘', 'dmp人群报表')
|
1041
1064
|
bib(t_path, _as_month=True)
|
1042
1065
|
|
1043
|
-
|
1044
1066
|
# @try_except
|
1045
1067
|
def move_sjy(self, path=None, is_except=[]):
|
1046
1068
|
if not path:
|
@@ -1119,10 +1141,10 @@ class DataClean:
|
|
1119
1141
|
continue
|
1120
1142
|
|
1121
1143
|
if name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
|
1122
|
-
t_path = os.path.join(self.source_path, '京东报表', '
|
1144
|
+
t_path = os.path.join(self.source_path, '京东报表', '京东商智_spu_商品明细')
|
1123
1145
|
bib(t_path, _as_month=True)
|
1124
1146
|
elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
|
1125
|
-
t_path = os.path.join(self.source_path, '京东报表', '
|
1147
|
+
t_path = os.path.join(self.source_path, '京东报表', '京东商智_sku_商品明细')
|
1126
1148
|
bib(t_path, _as_month=True)
|
1127
1149
|
elif name.endswith('.xlsx') and '京东推广_搜索词' in name:
|
1128
1150
|
t_path = os.path.join(self.source_path, '京东报表', '搜索词报表')
|
@@ -1402,118 +1424,59 @@ class DataClean:
|
|
1402
1424
|
if _new_root != _root or '__MACOSX' in _root:
|
1403
1425
|
shutil.rmtree(_root)
|
1404
1426
|
|
1405
|
-
def upload_df(self,
|
1427
|
+
def upload_df(self, path=None):
|
1406
1428
|
"""
|
1407
1429
|
将清洗后的 df 上传数据库, copysh.py 调用
|
1408
1430
|
"""
|
1409
|
-
if not service_databases:
|
1410
|
-
service_databases = self.service_databases
|
1411
1431
|
df_to_json = df_types.DataTypes() # json 文件, 包含数据的 dtypes 信息
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
|
1420
|
-
|
1421
|
-
|
1422
|
-
|
1423
|
-
|
1424
|
-
|
1425
|
-
|
1426
|
-
|
1427
|
-
|
1428
|
-
|
1429
|
-
|
1430
|
-
|
1431
|
-
|
1432
|
-
|
1433
|
-
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
db_name=db_name,
|
1461
|
-
table_name=collection_name,
|
1462
|
-
move_insert=True, # 先删除,再插入
|
1463
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1464
|
-
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1465
|
-
filename=rt_filename, # 用来追踪处理进度
|
1466
|
-
service_database=service_database, # 字典
|
1467
|
-
)
|
1468
|
-
df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
1469
|
-
|
1470
|
-
|
1471
|
-
def test():
|
1472
|
-
# main_key = '单元报表'
|
1473
|
-
path = f'/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表'
|
1474
|
-
for root, dirs, files in os.walk(path, topdown=False):
|
1475
|
-
for name in files:
|
1476
|
-
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
1477
|
-
continue
|
1478
|
-
# if 'py_xg' in name:
|
1479
|
-
# continue
|
1480
|
-
# if 'TM_旧表_字段' in root:
|
1481
|
-
# continue
|
1482
|
-
|
1483
|
-
if name.endswith('.csv'):
|
1484
|
-
print(name)
|
1485
|
-
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1486
|
-
# if '店铺名称' not in df.columns.tolist():
|
1487
|
-
# df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
|
1488
|
-
# df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
1489
|
-
# df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
1490
|
-
# if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
|
1491
|
-
# df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
|
1492
|
-
# shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
|
1493
|
-
# df.insert(loc=1, column='店铺名称', value=shop_name)
|
1494
|
-
|
1495
|
-
date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_', name)[0]
|
1496
|
-
|
1497
|
-
date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
|
1498
|
-
|
1499
|
-
new_path = f'/Users/xigua/数据中心/原始文件3/天猫_生意参谋/商品排行/{date}'
|
1500
|
-
# new_path = os.path.join(new_path, date) # 添加 年月分类
|
1501
|
-
if not os.path.exists(new_path):
|
1502
|
-
os.makedirs(new_path, exist_ok=True)
|
1503
|
-
# print(date_all)
|
1504
|
-
|
1505
|
-
new_name = f'py_xg_商品排行_万里马官方旗舰店_{date_all}.csv'
|
1506
|
-
# print(os.path.join(new_path, new_name))
|
1507
|
-
# breakpoint()
|
1508
|
-
df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
|
1509
|
-
# try:
|
1510
|
-
# df.to_excel(os.path.join(new_path, new_name),
|
1511
|
-
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
1512
|
-
# except Exception as e:
|
1513
|
-
# print(e)
|
1514
|
-
|
1515
|
-
|
1516
|
-
def date_table(service_databases=[{}]):
|
1432
|
+
|
1433
|
+
# d = mongo.UploadMongo(
|
1434
|
+
# username=username,
|
1435
|
+
# password=password,
|
1436
|
+
# host=host,
|
1437
|
+
# port=port,
|
1438
|
+
# drop_duplicates=False,
|
1439
|
+
# )
|
1440
|
+
# for data in self.datas:
|
1441
|
+
# db_name, collection_name, df = data['数据库名'], data['集合名称'], data['数据主体']
|
1442
|
+
# df_to_json.get_df_types(
|
1443
|
+
# df=df,
|
1444
|
+
# db_name=db_name,
|
1445
|
+
# collection_name=collection_name,
|
1446
|
+
# is_file_dtype=True, # 默认本地文件优先: True
|
1447
|
+
# )
|
1448
|
+
# d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
1449
|
+
# if d.client:
|
1450
|
+
# d.client.close()
|
1451
|
+
|
1452
|
+
m = mysql.MysqlUpload(
|
1453
|
+
username=username,
|
1454
|
+
password=password,
|
1455
|
+
host=host,
|
1456
|
+
port=port,
|
1457
|
+
)
|
1458
|
+
for data in self.datas:
|
1459
|
+
df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
|
1460
|
+
df_to_json.get_df_types(
|
1461
|
+
df=df,
|
1462
|
+
db_name=db_name,
|
1463
|
+
collection_name=collection_name,
|
1464
|
+
is_file_dtype=True, # 默认本地文件优先: True
|
1465
|
+
)
|
1466
|
+
m.df_to_mysql(
|
1467
|
+
df=df,
|
1468
|
+
db_name=db_name,
|
1469
|
+
table_name=collection_name,
|
1470
|
+
move_insert=True, # 先删除,再插入
|
1471
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1472
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1473
|
+
filename=rt_filename, # 用来追踪处理进度
|
1474
|
+
service_database=service_database, # 字典
|
1475
|
+
)
|
1476
|
+
df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
1477
|
+
|
1478
|
+
|
1479
|
+
def date_table():
|
1517
1480
|
"""
|
1518
1481
|
生成 pbix 使用的日期表
|
1519
1482
|
"""
|
@@ -1540,58 +1503,43 @@ def date_table(service_databases=[{}]):
|
|
1540
1503
|
df['月索引'] = mon
|
1541
1504
|
df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
|
1542
1505
|
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
move_insert=True, # 先删除,再插入
|
1560
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1561
|
-
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1562
|
-
filename=None, # 用来追踪处理进度
|
1563
|
-
service_database=service_database, # 用来追踪处理进度
|
1564
|
-
)
|
1506
|
+
m = mysql.MysqlUpload(
|
1507
|
+
username=username,
|
1508
|
+
password=password,
|
1509
|
+
host=host,
|
1510
|
+
port=port,
|
1511
|
+
)
|
1512
|
+
m.df_to_mysql(
|
1513
|
+
df=df,
|
1514
|
+
db_name='聚合数据',
|
1515
|
+
table_name='日期表',
|
1516
|
+
move_insert=True, # 先删除,再插入
|
1517
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1518
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1519
|
+
filename=None, # 用来追踪处理进度
|
1520
|
+
service_database=service_database, # 用来追踪处理进度
|
1521
|
+
)
|
1565
1522
|
|
1566
1523
|
|
1567
|
-
def main(
|
1524
|
+
def main(is_mysql=False):
|
1568
1525
|
"""
|
1569
1526
|
is_mysql: 调试时加,False: 是否后续的聚合数据
|
1570
1527
|
"""
|
1571
1528
|
|
1572
|
-
if not service_databases:
|
1573
|
-
service_databases = [
|
1574
|
-
# {'home_lx': 'mongodb'},
|
1575
|
-
{'home_lx': 'mysql'},
|
1576
|
-
# {'company': 'mysql'},
|
1577
|
-
# {'nas': 'mysql'},
|
1578
|
-
]
|
1579
|
-
|
1580
1529
|
cn = DataClean(
|
1581
1530
|
path=upload_path, # 源文件目录,下载文件夹
|
1582
1531
|
source_path=source_path3, # 原始文件保存目录
|
1583
|
-
service_databases=service_databases
|
1584
1532
|
)
|
1585
1533
|
cn.new_unzip(is_move=True) # 解压文件, is_move 解压后是否删除原 zip 压缩文件
|
1586
1534
|
cn.sycm_tm(is_except=['except']) # 天猫生意参谋
|
1587
1535
|
cn.dmp_tm(is_except=['except']) # 达摩盘
|
1588
1536
|
cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
|
1589
1537
|
cn.syj_reports_tm(is_except=['except']) # 天猫生意经
|
1590
|
-
|
1538
|
+
# # 淘宝生意经,不可以和天猫同时运行
|
1591
1539
|
# cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
|
1592
1540
|
cn.jd_reports(is_except=['except']) # 清洗京东报表
|
1593
1541
|
cn.sp_scene_clean(is_except=['except']) # 商品素材
|
1594
|
-
cn.upload_df(
|
1542
|
+
cn.upload_df() # 上传数据库
|
1595
1543
|
|
1596
1544
|
cn.move_sycm(is_except=['临时文件', ]) # 生意参谋,移到文件到原始文件夹
|
1597
1545
|
cn.move_dmp(is_except=['临时文件', ]) # 达摩盘
|
@@ -1604,64 +1552,41 @@ def main(service_databases=None, is_mysql=False):
|
|
1604
1552
|
return
|
1605
1553
|
|
1606
1554
|
# 更新日期表
|
1607
|
-
date_table(
|
1608
|
-
# 更新货品年份基准表, 属性设置
|
1555
|
+
date_table()
|
1556
|
+
# 更新货品年份基准表, 属性设置 3 - 货品年份基准
|
1609
1557
|
p = products.Products()
|
1610
|
-
p.to_mysql(
|
1558
|
+
p.to_mysql()
|
1611
1559
|
|
1560
|
+
conf = myconfig.main()
|
1561
|
+
data = conf['Windows']['xigua_lx']['mysql']['local']
|
1562
|
+
db_list = conf['Windows']['xigua_lx']['mysql']['数据库集']
|
1563
|
+
db_list = [item for item in db_list if item != '聚合数据']
|
1612
1564
|
# 清理所有非聚合数据的库
|
1613
1565
|
optimize_data.op_data(
|
1614
|
-
db_name_lists=
|
1615
|
-
|
1616
|
-
'属性设置3',
|
1617
|
-
'推广数据2',
|
1618
|
-
'生意参谋3',
|
1619
|
-
'推广数据_淘宝店',
|
1620
|
-
'爱库存2'
|
1621
|
-
'生意参谋3',
|
1622
|
-
'生意经2',
|
1623
|
-
# '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
1624
|
-
'达摩盘3',
|
1625
|
-
],
|
1626
|
-
days=100,
|
1566
|
+
db_name_lists=db_list,
|
1567
|
+
days=5,
|
1627
1568
|
is_mongo=True,
|
1628
1569
|
is_mysql=True,
|
1629
|
-
service_databases=service_databases
|
1630
1570
|
)
|
1631
1571
|
|
1632
1572
|
# 数据聚合
|
1633
|
-
query_data.data_aggregation(
|
1573
|
+
query_data.data_aggregation(months=3)
|
1634
1574
|
time.sleep(60)
|
1635
1575
|
|
1636
1576
|
# 清理聚合数据, mongodb 中没有聚合数据,所以只需要清理 mysql 即可
|
1637
1577
|
optimize_data.op_data(
|
1638
1578
|
db_name_lists=['聚合数据'],
|
1639
1579
|
days=3650,
|
1640
|
-
service_databases=service_databases,
|
1641
1580
|
is_mongo=False,
|
1642
1581
|
is_mysql=True,
|
1643
1582
|
)
|
1644
1583
|
|
1645
1584
|
|
1646
1585
|
if __name__ == '__main__':
|
1647
|
-
main(
|
1648
|
-
service_databases = [
|
1649
|
-
{'company': 'mysql'},
|
1650
|
-
# {'home_lx': 'mysql'},
|
1651
|
-
# {'home_lx': 'mongodb'},
|
1652
|
-
# {'nas': 'mysql'},
|
1653
|
-
],
|
1654
|
-
is_mysql = False, # 清理聚合数据
|
1655
|
-
)
|
1656
|
-
# date_table(service_databases=[{'company': 'mysql'}])
|
1657
|
-
# c = DataClean(
|
1658
|
-
# path=upload_path, # 源文件目录,下载文件夹
|
1659
|
-
# source_path=source_path3, # 原始文件保存目录
|
1660
|
-
# service_databases=[{'home_lx': 'mysql'},]
|
1661
|
-
# )
|
1662
|
-
# c.sp_scene_clean(is_except=['except']) # 商品素材
|
1663
|
-
# c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
|
1664
|
-
|
1665
|
-
|
1666
|
-
# test()
|
1586
|
+
main(is_mysql=True)
|
1667
1587
|
|
1588
|
+
# conf = myconfig.main()
|
1589
|
+
# data = conf['Windows']['xigua_lx']['mysql']['local']
|
1590
|
+
# db_list = conf['Windows']['xigua_lx']['mysql']['数据库集']
|
1591
|
+
# db_list = [item for item in db_list if item != '聚合数据']
|
1592
|
+
# print(db_list)
|
mdbq/company/copysh.py
CHANGED
@@ -321,7 +321,7 @@ def op_data(days: int =100):
|
|
321
321
|
# 清理所有非聚合数据的库
|
322
322
|
optimize_data.op_data(
|
323
323
|
db_name_lists=[
|
324
|
-
'京东数据
|
324
|
+
'京东数据2',
|
325
325
|
'属性设置3',
|
326
326
|
'推广数据2',
|
327
327
|
'推广数据_淘宝店',
|
@@ -367,7 +367,6 @@ def main():
|
|
367
367
|
op_data(days=100)
|
368
368
|
|
369
369
|
t.sleep_minutes = 5 # 同步前休眠时间
|
370
|
-
# 4. 同步共享文件
|
371
370
|
t.tb_file()
|
372
371
|
time.sleep(600) # 检测间隔
|
373
372
|
|
mdbq/config/myconfig.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- coding: UTF-8 –*-
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
from mdbq.config import set_support
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
def main():
|
9
|
+
support_path = set_support.SetSupport(dirname='support').dirname
|
10
|
+
file = os.path.join(support_path, 'my_config.txt')
|
11
|
+
if not os.path.isfile(file):
|
12
|
+
print(f'缺少配置文件,无法读取配置文件: {file}')
|
13
|
+
return
|
14
|
+
with open(file, 'r', encoding='utf-8') as f:
|
15
|
+
config_datas = json.load(f)
|
16
|
+
return config_datas
|
17
|
+
|
18
|
+
|
19
|
+
def write_back(datas):
|
20
|
+
""" 将数据写回本地 """
|
21
|
+
support_path = set_support.SetSupport(dirname='support').dirname
|
22
|
+
file = os.path.join(support_path, 'my_config.txt')
|
23
|
+
with open(file, 'w+', encoding='utf-8') as f:
|
24
|
+
json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
if __name__ == '__main__':
|
29
|
+
d = main()
|
30
|
+
print(d)
|
mdbq/config/products.py
CHANGED
@@ -3,15 +3,28 @@ import json
|
|
3
3
|
import os
|
4
4
|
import platform
|
5
5
|
import getpass
|
6
|
+
import socket
|
6
7
|
import pandas as pd
|
7
8
|
from mdbq.mysql import mysql
|
8
|
-
from mdbq.config import
|
9
|
+
from mdbq.config import myconfig
|
9
10
|
from numpy.ma.core import product
|
10
11
|
|
11
12
|
"""
|
12
13
|
天猫货品年份基准对照
|
13
14
|
用于聚合数据,通过此数据表进一步可确定商品上架年月
|
14
15
|
"""
|
16
|
+
if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
|
17
|
+
conf = myconfig.main()
|
18
|
+
data = conf['Windows']['xigua_lx']['mysql']['local']
|
19
|
+
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
20
|
+
service_database = {'xigua_lx': 'mysql'}
|
21
|
+
elif socket.gethostname().lower() in ['company', 'Mac2.local']:
|
22
|
+
conf = myconfig.main()
|
23
|
+
data = conf['Windows']['company']['mysql']['local']
|
24
|
+
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
25
|
+
service_database = {'company': 'mysql'}
|
26
|
+
if not username:
|
27
|
+
print(f'找不到主机:')
|
15
28
|
|
16
29
|
|
17
30
|
class Products:
|
@@ -107,31 +120,25 @@ class Products:
|
|
107
120
|
]
|
108
121
|
self.datas += my_datas
|
109
122
|
|
110
|
-
def to_mysql(self
|
123
|
+
def to_mysql(self):
|
111
124
|
self.update_my_datas()
|
112
125
|
df = pd.DataFrame(self.datas)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
move_insert = False,
|
130
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
131
|
-
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
132
|
-
icm_update=[],
|
133
|
-
service_database=service_database, # 用来追踪处理进度
|
134
|
-
)
|
126
|
+
m = mysql.MysqlUpload(
|
127
|
+
username=username,
|
128
|
+
password=password,
|
129
|
+
host=host,
|
130
|
+
port=port,
|
131
|
+
)
|
132
|
+
m.df_to_mysql(
|
133
|
+
df=df,
|
134
|
+
db_name='属性设置3',
|
135
|
+
table_name='货品年份基准',
|
136
|
+
move_insert = False,
|
137
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
138
|
+
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
139
|
+
icm_update=[],
|
140
|
+
service_database=service_database, # 用来追踪处理进度
|
141
|
+
)
|
135
142
|
|
136
143
|
def market_date(self, product_id: int):
|
137
144
|
try:
|
@@ -153,13 +160,3 @@ if __name__ == '__main__':
|
|
153
160
|
p = Products()
|
154
161
|
year = p.market_date(product_id=product_id)
|
155
162
|
print(f'{product_id}: {year}')
|
156
|
-
|
157
|
-
p.to_mysql(service_databases=[
|
158
|
-
{
|
159
|
-
'home_lx': 'mysql'
|
160
|
-
},
|
161
|
-
{
|
162
|
-
'company': 'mysql'
|
163
|
-
}
|
164
|
-
]
|
165
|
-
)
|