mdbq 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +31 -30
- mdbq/aggregation/mysql_types.py +83 -90
- mdbq/aggregation/optimize_data.py +31 -52
- mdbq/aggregation/query_data.py +256 -295
- mdbq/clean/clean_upload.py +106 -194
- mdbq/config/myconfig.py +30 -0
- mdbq/config/products.py +32 -34
- mdbq/mysql/mysql.py +12 -42
- mdbq/mysql/s_query.py +4 -3
- {mdbq-2.7.1.dist-info → mdbq-2.7.3.dist-info}/METADATA +1 -1
- {mdbq-2.7.1.dist-info → mdbq-2.7.3.dist-info}/RECORD +13 -12
- {mdbq-2.7.1.dist-info → mdbq-2.7.3.dist-info}/WHEEL +1 -1
- {mdbq-2.7.1.dist-info → mdbq-2.7.3.dist-info}/top_level.txt +0 -0
mdbq/clean/clean_upload.py
CHANGED
@@ -4,13 +4,14 @@ import pandas as pd
|
|
4
4
|
from functools import wraps
|
5
5
|
import chardet
|
6
6
|
import zipfile
|
7
|
+
import socket
|
7
8
|
from pyzipper import PyZipFile
|
8
9
|
import os
|
9
10
|
import platform
|
10
11
|
import json
|
11
12
|
from mdbq.mongo import mongo
|
12
13
|
from mdbq.mysql import mysql
|
13
|
-
from mdbq.config import
|
14
|
+
from mdbq.config import myconfig
|
14
15
|
from mdbq.aggregation import df_types
|
15
16
|
from mdbq.config import products
|
16
17
|
from mdbq.aggregation import optimize_data
|
@@ -41,18 +42,32 @@ else:
|
|
41
42
|
Share_Path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
42
43
|
|
43
44
|
upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
|
44
|
-
source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
|
45
|
+
# source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
|
45
46
|
source_path3 = os.path.join(Data_Path, '原始文件3') # 此目录保存下载并清洗过的文件,作为数据库备份
|
46
47
|
|
48
|
+
username, password, host, port, service_database = None, None, None, None, None,
|
49
|
+
if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
|
50
|
+
conf = myconfig.main()
|
51
|
+
conf_data = conf['Windows']['xigua_lx']['mysql']['local']
|
52
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
53
|
+
service_database = {'xigua_lx': 'mysql'}
|
54
|
+
elif socket.gethostname().lower() in ['company', 'mac2.local']:
|
55
|
+
conf = myconfig.main()
|
56
|
+
conf_data = conf['Windows']['company']['mysql']['local']
|
57
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
58
|
+
service_database = {'company': 'mysql'}
|
59
|
+
if not username:
|
60
|
+
print(f'找不到主机:')
|
61
|
+
|
62
|
+
|
47
63
|
|
48
64
|
class DataClean:
|
49
65
|
""" 数据分类 """
|
50
66
|
|
51
|
-
def __init__(self, path, source_path
|
67
|
+
def __init__(self, path, source_path):
|
52
68
|
self.path = path # 数据源位置,下载文件夹
|
53
69
|
self.source_path = source_path # 原始文件保存目录
|
54
70
|
self.datas = []
|
55
|
-
self.service_databases = service_databases
|
56
71
|
|
57
72
|
@staticmethod
|
58
73
|
def try_except(func): # 在类内部定义一个异常处理方法
|
@@ -264,6 +279,7 @@ class DataClean:
|
|
264
279
|
os.remove(os.path.join(root, name))
|
265
280
|
elif name.endswith('.csv') and 'dmp人群报表_' in name:
|
266
281
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
282
|
+
df = df[df['日期'] != '']
|
267
283
|
if len(df) == 0:
|
268
284
|
print(f'{name} 报表数据为空')
|
269
285
|
os.remove(os.path.join(root, name))
|
@@ -917,7 +933,7 @@ class DataClean:
|
|
917
933
|
|
918
934
|
if name.endswith('.xlsx') and '商品素材_' in name:
|
919
935
|
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
920
|
-
df = pd.read_excel(os.path.join(root, name), header=0, engine='
|
936
|
+
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
921
937
|
if '日期' not in df.columns.tolist():
|
922
938
|
df.insert(loc=0, column='日期', value=datetime.datetime.today().strftime('%Y-%m-%d'))
|
923
939
|
if '店铺名称' not in df.columns.tolist():
|
@@ -1007,7 +1023,7 @@ class DataClean:
|
|
1007
1023
|
t_path = os.path.join(self.source_path, '生意参谋', '商品属性')
|
1008
1024
|
bib(t_path, _as_month=True)
|
1009
1025
|
elif name.endswith('.csv') and '爱库存_商品榜单_' in name:
|
1010
|
-
t_path = os.path.join(self.source_path, '爱库存', 'spu
|
1026
|
+
t_path = os.path.join(self.source_path, '爱库存', '商品spu榜单')
|
1011
1027
|
bib(t_path, _as_month=True)
|
1012
1028
|
elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
|
1013
1029
|
t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
|
@@ -1049,7 +1065,6 @@ class DataClean:
|
|
1049
1065
|
t_path = os.path.join(self.source_path, '达摩盘', 'dmp人群报表')
|
1050
1066
|
bib(t_path, _as_month=True)
|
1051
1067
|
|
1052
|
-
|
1053
1068
|
# @try_except
|
1054
1069
|
def move_sjy(self, path=None, is_except=[]):
|
1055
1070
|
if not path:
|
@@ -1127,22 +1142,22 @@ class DataClean:
|
|
1127
1142
|
if 'py_xg' not in name: # 排除非目标文件
|
1128
1143
|
continue
|
1129
1144
|
|
1130
|
-
if name.endswith('.
|
1145
|
+
if name.endswith('.csv') and '京东商智_spu_商品明细' in name:
|
1131
1146
|
t_path = os.path.join(self.source_path, '京东报表', '京东商智_spu_商品明细')
|
1132
1147
|
bib(t_path, _as_month=True)
|
1133
|
-
elif name.endswith('.
|
1148
|
+
elif name.endswith('.csv') and '京东商智_sku_商品明细' in name:
|
1134
1149
|
t_path = os.path.join(self.source_path, '京东报表', '京东商智_sku_商品明细')
|
1135
1150
|
bib(t_path, _as_month=True)
|
1136
|
-
elif name.endswith('.
|
1151
|
+
elif name.endswith('.csv') and '京东推广_搜索词' in name:
|
1137
1152
|
t_path = os.path.join(self.source_path, '京东报表', '搜索词报表')
|
1138
1153
|
bib(t_path, _as_month=True)
|
1139
|
-
elif name.endswith('.
|
1154
|
+
elif name.endswith('.csv') and '京东推广_点击成交' in name:
|
1140
1155
|
t_path = os.path.join(self.source_path, '京东报表', '推广报表')
|
1141
1156
|
bib(t_path, _as_month=True)
|
1142
|
-
elif name.endswith('.
|
1157
|
+
elif name.endswith('.csv') and '京东推广_关键词点击' in name:
|
1143
1158
|
t_path = os.path.join(self.source_path, '京东报表', '关键词报表')
|
1144
1159
|
bib(t_path, _as_month=True)
|
1145
|
-
elif name.endswith('.
|
1160
|
+
elif name.endswith('.csv') and '京东商智_店铺来源_三级来源' in name:
|
1146
1161
|
t_path = os.path.join(self.source_path, '京东报表', '店铺来源_三级来源')
|
1147
1162
|
bib(t_path, _as_month=True)
|
1148
1163
|
|
@@ -1411,118 +1426,59 @@ class DataClean:
|
|
1411
1426
|
if _new_root != _root or '__MACOSX' in _root:
|
1412
1427
|
shutil.rmtree(_root)
|
1413
1428
|
|
1414
|
-
def upload_df(self,
|
1429
|
+
def upload_df(self, path=None):
|
1415
1430
|
"""
|
1416
1431
|
将清洗后的 df 上传数据库, copysh.py 调用
|
1417
1432
|
"""
|
1418
|
-
if not service_databases:
|
1419
|
-
service_databases = self.service_databases
|
1420
1433
|
df_to_json = df_types.DataTypes() # json 文件, 包含数据的 dtypes 信息
|
1421
|
-
|
1422
|
-
|
1423
|
-
|
1424
|
-
|
1425
|
-
|
1426
|
-
|
1427
|
-
|
1428
|
-
|
1429
|
-
|
1430
|
-
|
1431
|
-
|
1432
|
-
|
1433
|
-
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1463
|
-
|
1464
|
-
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
db_name=db_name,
|
1470
|
-
table_name=collection_name,
|
1471
|
-
move_insert=True, # 先删除,再插入
|
1472
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1473
|
-
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1474
|
-
filename=rt_filename, # 用来追踪处理进度
|
1475
|
-
service_database=service_database, # 字典
|
1476
|
-
)
|
1477
|
-
df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
1478
|
-
|
1479
|
-
|
1480
|
-
def test():
|
1481
|
-
# main_key = '单元报表'
|
1482
|
-
path = f'/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表'
|
1483
|
-
for root, dirs, files in os.walk(path, topdown=False):
|
1484
|
-
for name in files:
|
1485
|
-
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
1486
|
-
continue
|
1487
|
-
# if 'py_xg' in name:
|
1488
|
-
# continue
|
1489
|
-
# if 'TM_旧表_字段' in root:
|
1490
|
-
# continue
|
1491
|
-
|
1492
|
-
if name.endswith('.csv'):
|
1493
|
-
print(name)
|
1494
|
-
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1495
|
-
# if '店铺名称' not in df.columns.tolist():
|
1496
|
-
# df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
|
1497
|
-
# df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
1498
|
-
# df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
1499
|
-
# if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
|
1500
|
-
# df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
|
1501
|
-
# shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
|
1502
|
-
# df.insert(loc=1, column='店铺名称', value=shop_name)
|
1503
|
-
|
1504
|
-
date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_', name)[0]
|
1505
|
-
|
1506
|
-
date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
|
1507
|
-
|
1508
|
-
new_path = f'/Users/xigua/数据中心/原始文件3/天猫_生意参谋/商品排行/{date}'
|
1509
|
-
# new_path = os.path.join(new_path, date) # 添加 年月分类
|
1510
|
-
if not os.path.exists(new_path):
|
1511
|
-
os.makedirs(new_path, exist_ok=True)
|
1512
|
-
# print(date_all)
|
1513
|
-
|
1514
|
-
new_name = f'py_xg_商品排行_万里马官方旗舰店_{date_all}.csv'
|
1515
|
-
# print(os.path.join(new_path, new_name))
|
1516
|
-
# breakpoint()
|
1517
|
-
df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
|
1518
|
-
# try:
|
1519
|
-
# df.to_excel(os.path.join(new_path, new_name),
|
1520
|
-
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
1521
|
-
# except Exception as e:
|
1522
|
-
# print(e)
|
1523
|
-
|
1524
|
-
|
1525
|
-
def date_table(service_databases=[{}]):
|
1434
|
+
|
1435
|
+
# d = mongo.UploadMongo(
|
1436
|
+
# username=username,
|
1437
|
+
# password=password,
|
1438
|
+
# host=host,
|
1439
|
+
# port=port,
|
1440
|
+
# drop_duplicates=False,
|
1441
|
+
# )
|
1442
|
+
# for data in self.datas:
|
1443
|
+
# db_name, collection_name, df = data['数据库名'], data['集合名称'], data['数据主体']
|
1444
|
+
# df_to_json.get_df_types(
|
1445
|
+
# df=df,
|
1446
|
+
# db_name=db_name,
|
1447
|
+
# collection_name=collection_name,
|
1448
|
+
# is_file_dtype=True, # 默认本地文件优先: True
|
1449
|
+
# )
|
1450
|
+
# d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
1451
|
+
# if d.client:
|
1452
|
+
# d.client.close()
|
1453
|
+
|
1454
|
+
m = mysql.MysqlUpload(
|
1455
|
+
username=username,
|
1456
|
+
password=password,
|
1457
|
+
host=host,
|
1458
|
+
port=port,
|
1459
|
+
)
|
1460
|
+
for data in self.datas:
|
1461
|
+
df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
|
1462
|
+
df_to_json.get_df_types(
|
1463
|
+
df=df,
|
1464
|
+
db_name=db_name,
|
1465
|
+
collection_name=collection_name,
|
1466
|
+
is_file_dtype=True, # 默认本地文件优先: True
|
1467
|
+
)
|
1468
|
+
m.df_to_mysql(
|
1469
|
+
df=df,
|
1470
|
+
db_name=db_name,
|
1471
|
+
table_name=collection_name,
|
1472
|
+
move_insert=True, # 先删除,再插入
|
1473
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1474
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1475
|
+
filename=rt_filename, # 用来追踪处理进度
|
1476
|
+
service_database=service_database, # 字典
|
1477
|
+
)
|
1478
|
+
df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
1479
|
+
|
1480
|
+
|
1481
|
+
def date_table():
|
1526
1482
|
"""
|
1527
1483
|
生成 pbix 使用的日期表
|
1528
1484
|
"""
|
@@ -1549,47 +1505,32 @@ def date_table(service_databases=[{}]):
|
|
1549
1505
|
df['月索引'] = mon
|
1550
1506
|
df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
|
1551
1507
|
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
|
1560
|
-
|
1561
|
-
|
1562
|
-
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
move_insert=True, # 先删除,再插入
|
1569
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1570
|
-
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1571
|
-
filename=None, # 用来追踪处理进度
|
1572
|
-
service_database=service_database, # 用来追踪处理进度
|
1573
|
-
)
|
1508
|
+
m = mysql.MysqlUpload(
|
1509
|
+
username=username,
|
1510
|
+
password=password,
|
1511
|
+
host=host,
|
1512
|
+
port=port,
|
1513
|
+
)
|
1514
|
+
m.df_to_mysql(
|
1515
|
+
df=df,
|
1516
|
+
db_name='聚合数据',
|
1517
|
+
table_name='日期表',
|
1518
|
+
move_insert=True, # 先删除,再插入
|
1519
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1520
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1521
|
+
filename=None, # 用来追踪处理进度
|
1522
|
+
service_database=service_database, # 用来追踪处理进度
|
1523
|
+
)
|
1574
1524
|
|
1575
1525
|
|
1576
|
-
def main(
|
1526
|
+
def main(is_mysql=False):
|
1577
1527
|
"""
|
1578
1528
|
is_mysql: 调试时加,False: 是否后续的聚合数据
|
1579
1529
|
"""
|
1580
1530
|
|
1581
|
-
if not service_databases:
|
1582
|
-
service_databases = [
|
1583
|
-
# {'home_lx': 'mongodb'},
|
1584
|
-
{'home_lx': 'mysql'},
|
1585
|
-
# {'company': 'mysql'},
|
1586
|
-
# {'nas': 'mysql'},
|
1587
|
-
]
|
1588
|
-
|
1589
1531
|
cn = DataClean(
|
1590
1532
|
path=upload_path, # 源文件目录,下载文件夹
|
1591
1533
|
source_path=source_path3, # 原始文件保存目录
|
1592
|
-
service_databases=service_databases
|
1593
1534
|
)
|
1594
1535
|
cn.new_unzip(is_move=True) # 解压文件, is_move 解压后是否删除原 zip 压缩文件
|
1595
1536
|
cn.sycm_tm(is_except=['except']) # 天猫生意参谋
|
@@ -1600,7 +1541,7 @@ def main(service_databases=None, is_mysql=False):
|
|
1600
1541
|
# cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
|
1601
1542
|
cn.jd_reports(is_except=['except']) # 清洗京东报表
|
1602
1543
|
cn.sp_scene_clean(is_except=['except']) # 商品素材
|
1603
|
-
cn.upload_df(
|
1544
|
+
cn.upload_df() # 上传数据库
|
1604
1545
|
|
1605
1546
|
cn.move_sycm(is_except=['临时文件', ]) # 生意参谋,移到文件到原始文件夹
|
1606
1547
|
cn.move_dmp(is_except=['临时文件', ]) # 达摩盘
|
@@ -1613,64 +1554,35 @@ def main(service_databases=None, is_mysql=False):
|
|
1613
1554
|
return
|
1614
1555
|
|
1615
1556
|
# 更新日期表
|
1616
|
-
date_table(
|
1617
|
-
# 更新货品年份基准表, 属性设置
|
1557
|
+
date_table()
|
1558
|
+
# 更新货品年份基准表, 属性设置 3 - 货品年份基准
|
1618
1559
|
p = products.Products()
|
1619
|
-
p.to_mysql(
|
1560
|
+
p.to_mysql()
|
1620
1561
|
|
1562
|
+
conf = myconfig.main()
|
1563
|
+
data = conf['Windows']['xigua_lx']['mysql']['local']
|
1564
|
+
db_list = conf['Windows']['xigua_lx']['mysql']['数据库集']
|
1565
|
+
db_list = [item for item in db_list if item != '聚合数据']
|
1621
1566
|
# 清理所有非聚合数据的库
|
1622
1567
|
optimize_data.op_data(
|
1623
|
-
db_name_lists=
|
1624
|
-
|
1625
|
-
'属性设置3',
|
1626
|
-
'推广数据2',
|
1627
|
-
'生意参谋3',
|
1628
|
-
'推广数据_淘宝店',
|
1629
|
-
'爱库存2'
|
1630
|
-
'生意参谋3',
|
1631
|
-
'生意经2',
|
1632
|
-
# '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
1633
|
-
'达摩盘3',
|
1634
|
-
],
|
1635
|
-
days=100,
|
1568
|
+
db_name_lists=db_list,
|
1569
|
+
days=5,
|
1636
1570
|
is_mongo=True,
|
1637
1571
|
is_mysql=True,
|
1638
|
-
service_databases=service_databases
|
1639
1572
|
)
|
1640
1573
|
|
1641
1574
|
# 数据聚合
|
1642
|
-
query_data.data_aggregation(
|
1575
|
+
query_data.data_aggregation(months=3)
|
1643
1576
|
time.sleep(60)
|
1644
1577
|
|
1645
1578
|
# 清理聚合数据, mongodb 中没有聚合数据,所以只需要清理 mysql 即可
|
1646
1579
|
optimize_data.op_data(
|
1647
1580
|
db_name_lists=['聚合数据'],
|
1648
1581
|
days=3650,
|
1649
|
-
service_databases=service_databases,
|
1650
1582
|
is_mongo=False,
|
1651
1583
|
is_mysql=True,
|
1652
1584
|
)
|
1653
1585
|
|
1654
1586
|
|
1655
1587
|
if __name__ == '__main__':
|
1656
|
-
main(
|
1657
|
-
service_databases = [
|
1658
|
-
# {'company': 'mysql'},
|
1659
|
-
{'home_lx': 'mysql'},
|
1660
|
-
# {'home_lx': 'mongodb'},
|
1661
|
-
# {'nas': 'mysql'},
|
1662
|
-
],
|
1663
|
-
is_mysql = False, # 清理聚合数据
|
1664
|
-
)
|
1665
|
-
# date_table(service_databases=[{'company': 'mysql'}])
|
1666
|
-
# c = DataClean(
|
1667
|
-
# path=upload_path, # 源文件目录,下载文件夹
|
1668
|
-
# source_path=source_path3, # 原始文件保存目录
|
1669
|
-
# service_databases=[{'home_lx': 'mysql'},]
|
1670
|
-
# )
|
1671
|
-
# c.sp_scene_clean(is_except=['except']) # 商品素材
|
1672
|
-
# c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
|
1673
|
-
|
1674
|
-
|
1675
|
-
# test()
|
1676
|
-
|
1588
|
+
main(is_mysql=False)
|
mdbq/config/myconfig.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- coding: UTF-8 –*-
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
from mdbq.config import set_support
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
def main():
|
9
|
+
support_path = set_support.SetSupport(dirname='support').dirname
|
10
|
+
file = os.path.join(support_path, 'my_config.txt')
|
11
|
+
if not os.path.isfile(file):
|
12
|
+
print(f'缺少配置文件,无法读取配置文件: {file}')
|
13
|
+
return
|
14
|
+
with open(file, 'r', encoding='utf-8') as f:
|
15
|
+
config_datas = json.load(f)
|
16
|
+
return config_datas
|
17
|
+
|
18
|
+
|
19
|
+
def write_back(datas):
|
20
|
+
""" 将数据写回本地 """
|
21
|
+
support_path = set_support.SetSupport(dirname='support').dirname
|
22
|
+
file = os.path.join(support_path, 'my_config.txt')
|
23
|
+
with open(file, 'w+', encoding='utf-8') as f:
|
24
|
+
json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
if __name__ == '__main__':
|
29
|
+
d = main()
|
30
|
+
print(d)
|
mdbq/config/products.py
CHANGED
@@ -3,15 +3,29 @@ import json
|
|
3
3
|
import os
|
4
4
|
import platform
|
5
5
|
import getpass
|
6
|
+
import socket
|
6
7
|
import pandas as pd
|
7
8
|
from mdbq.mysql import mysql
|
8
|
-
from mdbq.config import
|
9
|
+
from mdbq.config import myconfig
|
9
10
|
from numpy.ma.core import product
|
10
11
|
|
11
12
|
"""
|
12
13
|
天猫货品年份基准对照
|
13
14
|
用于聚合数据,通过此数据表进一步可确定商品上架年月
|
14
15
|
"""
|
16
|
+
username, password, host, port, service_database = None, None, None, None, None,
|
17
|
+
if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
|
18
|
+
conf = myconfig.main()
|
19
|
+
data = conf['Windows']['xigua_lx']['mysql']['local']
|
20
|
+
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
21
|
+
service_database = {'xigua_lx': 'mysql'}
|
22
|
+
elif socket.gethostname().lower() in ['company', 'mac2.local']:
|
23
|
+
conf = myconfig.main()
|
24
|
+
data = conf['Windows']['company']['mysql']['local']
|
25
|
+
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
26
|
+
service_database = {'company': 'mysql'}
|
27
|
+
if not username:
|
28
|
+
print(f'找不到主机:')
|
15
29
|
|
16
30
|
|
17
31
|
class Products:
|
@@ -107,31 +121,25 @@ class Products:
|
|
107
121
|
]
|
108
122
|
self.datas += my_datas
|
109
123
|
|
110
|
-
def to_mysql(self
|
124
|
+
def to_mysql(self):
|
111
125
|
self.update_my_datas()
|
112
126
|
df = pd.DataFrame(self.datas)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
move_insert = False,
|
130
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
131
|
-
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
132
|
-
icm_update=[],
|
133
|
-
service_database=service_database, # 用来追踪处理进度
|
134
|
-
)
|
127
|
+
m = mysql.MysqlUpload(
|
128
|
+
username=username,
|
129
|
+
password=password,
|
130
|
+
host=host,
|
131
|
+
port=port,
|
132
|
+
)
|
133
|
+
m.df_to_mysql(
|
134
|
+
df=df,
|
135
|
+
db_name='属性设置3',
|
136
|
+
table_name='货品年份基准',
|
137
|
+
move_insert = False,
|
138
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
139
|
+
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
140
|
+
icm_update=[],
|
141
|
+
service_database=service_database, # 用来追踪处理进度
|
142
|
+
)
|
135
143
|
|
136
144
|
def market_date(self, product_id: int):
|
137
145
|
try:
|
@@ -153,13 +161,3 @@ if __name__ == '__main__':
|
|
153
161
|
p = Products()
|
154
162
|
year = p.market_date(product_id=product_id)
|
155
163
|
print(f'{product_id}: {year}')
|
156
|
-
|
157
|
-
p.to_mysql(service_databases=[
|
158
|
-
{
|
159
|
-
'home_lx': 'mysql'
|
160
|
-
},
|
161
|
-
{
|
162
|
-
'company': 'mysql'
|
163
|
-
}
|
164
|
-
]
|
165
|
-
)
|
mdbq/mysql/mysql.py
CHANGED
@@ -12,7 +12,7 @@ import pandas as pd
|
|
12
12
|
from sqlalchemy import create_engine
|
13
13
|
import os
|
14
14
|
import calendar
|
15
|
-
from mdbq.config import
|
15
|
+
from mdbq.config import myconfig
|
16
16
|
from mdbq.config import set_support
|
17
17
|
from mdbq.dataframe import converter
|
18
18
|
from mdbq.aggregation import mysql_types
|
@@ -49,7 +49,7 @@ class MysqlUpload:
|
|
49
49
|
self.port = port
|
50
50
|
self.config = {
|
51
51
|
'host': self.host,
|
52
|
-
'port': self.port,
|
52
|
+
'port': int(self.port),
|
53
53
|
'user': self.username,
|
54
54
|
'password': self.password,
|
55
55
|
'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
|
@@ -69,7 +69,7 @@ class MysqlUpload:
|
|
69
69
|
return wrapper
|
70
70
|
|
71
71
|
@try_except
|
72
|
-
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'
|
72
|
+
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'xigua_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, reset_id=False):
|
73
73
|
"""
|
74
74
|
将 df 写入数据库
|
75
75
|
db_name: 数据库名称
|
@@ -185,13 +185,13 @@ class MysqlUpload:
|
|
185
185
|
|
186
186
|
# print(cl, db_n, tb_n)
|
187
187
|
# 返回这些结果的目的是等添加完列再写 json 文件才能读到 types 信息
|
188
|
-
# ⚠️ mysql_all_dtypes 函数默认只读取
|
188
|
+
# ⚠️ mysql_all_dtypes 函数默认只读取 xigua_lx 的数据库信息,不会读取其他系统
|
189
189
|
if cl and db_n and tb_n:
|
190
|
-
mysql_types.mysql_all_dtypes(
|
190
|
+
mysql_types.mysql_all_dtypes(db_name=db_name, table_name=table_name) # 更新一个表的 dtypes
|
191
191
|
elif cl and db_n:
|
192
|
-
mysql_types.mysql_all_dtypes(
|
192
|
+
mysql_types.mysql_all_dtypes(db_name=db_name) # 更新一个数据库的 dtypes
|
193
193
|
elif cl:
|
194
|
-
mysql_types.mysql_all_dtypes(
|
194
|
+
mysql_types.mysql_all_dtypes() # 更新所有数据库所有数据表的 dtypes 信息到本地 json
|
195
195
|
|
196
196
|
# 4. 更新插入数据
|
197
197
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
@@ -555,7 +555,7 @@ class OptimizeDatas:
|
|
555
555
|
self.charset = charset
|
556
556
|
self.config = {
|
557
557
|
'host': self.host,
|
558
|
-
'port': self.port,
|
558
|
+
'port': int(self.port),
|
559
559
|
'user': self.username,
|
560
560
|
'password': self.password,
|
561
561
|
'charset': self.charset, # utf8mb4 支持存储四字节的UTF-8字符集
|
@@ -864,38 +864,8 @@ def year_month_day_bak(start_date, end_date):
|
|
864
864
|
return results # start_date至end_date之间的所有年月日
|
865
865
|
|
866
866
|
|
867
|
-
def download_datas_bak(table_name, save_path, start_date):
|
868
|
-
username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
|
869
|
-
print(username, password, host, port)
|
870
|
-
m = MysqlUpload(username=username, password=password, host=host, port=port)
|
871
|
-
m.port = port
|
872
|
-
results = year_month_day(start_date=start_date, end_date='today')
|
873
|
-
# print(results)
|
874
|
-
for result in results:
|
875
|
-
start_date = result['起始日期']
|
876
|
-
end_date = result['结束日期']
|
877
|
-
# print(start_date, end_date)
|
878
|
-
df = m.read_mysql(db_name='天猫数据1', table_name=table_name, start_date=start_date, end_date=end_date)
|
879
|
-
if len(df) == 0:
|
880
|
-
continue
|
881
|
-
path = os.path.join(save_path, f'{table_name}_{str(start_date)}_{str(end_date)}.csv')
|
882
|
-
df['日期'] = df['日期'].apply(lambda x: re.sub(' .*', '', str(x)))
|
883
|
-
df.to_csv(path, index=False, encoding='utf-8_sig', header=True)
|
884
|
-
|
885
|
-
|
886
867
|
if __name__ == '__main__':
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
# print(df)
|
892
|
-
m = MysqlUpload(username=username, password=password, host=host, port=port)
|
893
|
-
m.df_to_mysql(
|
894
|
-
df=df,
|
895
|
-
db_name='test',
|
896
|
-
table_name='测试数据',
|
897
|
-
drop_duplicates=True,
|
898
|
-
# service_name=service_name,
|
899
|
-
# service_databases=service_databases,
|
900
|
-
)
|
901
|
-
|
868
|
+
conf = myconfig.main()
|
869
|
+
data = conf['Windows']['xigua_lx']['mysql']['local']
|
870
|
+
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
871
|
+
print(username, password, host, port)
|