mdbq 2.9.4__py3-none-any.whl → 2.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +15 -12
- mdbq/aggregation/query_data.py +40 -17
- mdbq/config/products.py +0 -10
- mdbq/mysql/mysql.py +105 -163
- mdbq/mysql/recheck_mysql.py +1 -1
- mdbq/req_post/req_tb.py +1 -1
- {mdbq-2.9.4.dist-info → mdbq-2.9.5.dist-info}/METADATA +1 -1
- {mdbq-2.9.4.dist-info → mdbq-2.9.5.dist-info}/RECORD +10 -12
- mdbq/company/copysh_bak.py +0 -417
- mdbq/company/home_sh.py +0 -386
- {mdbq-2.9.4.dist-info → mdbq-2.9.5.dist-info}/WHEEL +0 -0
- {mdbq-2.9.4.dist-info → mdbq-2.9.5.dist-info}/top_level.txt +0 -0
mdbq/company/home_sh.py
DELETED
@@ -1,386 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import os
|
3
|
-
import platform
|
4
|
-
import warnings
|
5
|
-
import getpass
|
6
|
-
import sys
|
7
|
-
import configparser
|
8
|
-
import datetime
|
9
|
-
import shutil
|
10
|
-
import time
|
11
|
-
import re
|
12
|
-
import socket
|
13
|
-
from dateutil.utils import today
|
14
|
-
from mdbq.bdup import bdup
|
15
|
-
from mdbq.aggregation import aggregation
|
16
|
-
from mdbq.aggregation import query_data
|
17
|
-
from mdbq.aggregation import optimize_data
|
18
|
-
from mdbq.config import update_conf
|
19
|
-
from mdbq.config import get_myconf
|
20
|
-
from mdbq.config import set_support
|
21
|
-
from mdbq.config import products
|
22
|
-
from mdbq.mysql import mysql
|
23
|
-
if platform.system() == 'Windows':
|
24
|
-
from mdbq.pbix import refresh_all
|
25
|
-
warnings.filterwarnings('ignore')
|
26
|
-
"""
|
27
|
-
除公司台式机外,其他主机执行下载更新任务
|
28
|
-
"""
|
29
|
-
|
30
|
-
|
31
|
-
class TbFiles:
|
32
|
-
"""
|
33
|
-
用于定时同步pandas数据源文件到共享
|
34
|
-
"""
|
35
|
-
def __init__(self):
|
36
|
-
|
37
|
-
support_path = set_support.SetSupport(dirname='support').dirname
|
38
|
-
|
39
|
-
self.my_conf = os.path.join(support_path, '.home.conf')
|
40
|
-
self.path1 = os.path.join(support_path, 'tb_list.txt')
|
41
|
-
self.path2 = os.path.join(support_path, 'cp_list.txt')
|
42
|
-
self.d_path = None
|
43
|
-
self.data_path = None
|
44
|
-
self.share_path = None
|
45
|
-
self.before_max_time = []
|
46
|
-
self.sleep_minutes = 30
|
47
|
-
self.tomorrow = datetime.date.today()
|
48
|
-
|
49
|
-
def check_change(self):
|
50
|
-
""" 检查 source_path 的所有文件修改日期, 函数返回最新修改日期 """
|
51
|
-
source_path = os.path.join(self.data_path, 'pandas数据源')
|
52
|
-
if not os.path.exists(source_path):
|
53
|
-
return
|
54
|
-
results = []
|
55
|
-
for root, dirs, files in os.walk(source_path, topdown=False):
|
56
|
-
for name in files:
|
57
|
-
if '~$' in name or 'baiduyun' in name or name.startswith('.') or 'Icon' in name or 'xunlei' in name:
|
58
|
-
continue # 排除这些文件的变动
|
59
|
-
# stat_info = os.path.getmtime(os.path.join(root, name))
|
60
|
-
_c = os.stat(os.path.join(root, name)).st_mtime # 读取文件的元信息 >>>文件修改时间
|
61
|
-
c_time = datetime.datetime.fromtimestamp(_c) # 格式化修改时间
|
62
|
-
results.append(c_time)
|
63
|
-
return max(results).strftime('%Y%m%d%H%M%S')
|
64
|
-
|
65
|
-
def check_conf(self):
|
66
|
-
if not os.path.isfile(self.my_conf):
|
67
|
-
self.set_conf() # 添加配置文件
|
68
|
-
print('因缺少配置文件, 已自动初始化')
|
69
|
-
config = configparser.ConfigParser() # 初始化configparser类
|
70
|
-
try:
|
71
|
-
config.read(self.my_conf, 'UTF-8')
|
72
|
-
self.d_path = config.get('database', 'd_path')
|
73
|
-
self.data_path = config.get('database', 'data_path')
|
74
|
-
self.share_path = config.get('database', 'share_path')
|
75
|
-
if self.d_path is None or self.data_path is None or self.share_path is None:
|
76
|
-
self.set_conf()
|
77
|
-
print('配置文件部分值不完整, 已自动初始化')
|
78
|
-
if not os.path.exists(self.d_path) or not os.path.exists(self.data_path) or not os.path.exists(self.share_path):
|
79
|
-
self.set_conf()
|
80
|
-
print('配置文件异常(可能跨系统), 已自动初始化')
|
81
|
-
except Exception as e:
|
82
|
-
print(e)
|
83
|
-
print('配置文件部分值缺失, 已自动初始化')
|
84
|
-
self.set_conf()
|
85
|
-
sys.path.append(self.share_path)
|
86
|
-
|
87
|
-
def set_conf(self):
|
88
|
-
if platform.system() == 'Windows':
|
89
|
-
self.d_path = os.path.join('C:\\Users', getpass.getuser(), 'Downloads')
|
90
|
-
self.data_path = os.path.join('C:\\同步空间', 'BaiduSyncdisk')
|
91
|
-
self.share_path = os.path.join('\\\\192.168.1.198', '时尚事业部\\01.运营部\\天猫报表') # 共享文件根目录
|
92
|
-
elif platform.system() == 'Darwin':
|
93
|
-
self.d_path = os.path.join('/Users', getpass.getuser(), 'Downloads')
|
94
|
-
self.data_path = os.path.join('/Users', getpass.getuser(), '数据中心')
|
95
|
-
self.share_path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
96
|
-
else:
|
97
|
-
self.d_path = 'Downloads'
|
98
|
-
self.data_path = os.path.join(getpass.getuser(), '数据中心')
|
99
|
-
self.share_path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
100
|
-
|
101
|
-
if not os.path.exists(self.share_path):
|
102
|
-
self.share_path = re.sub('时尚事业部', '时尚事业部-1', self.share_path)
|
103
|
-
|
104
|
-
with open(self.my_conf, 'w+', encoding='utf-8') as f:
|
105
|
-
f.write('[database]\n')
|
106
|
-
f.write(f'# 配置文件\n')
|
107
|
-
f.write(f'# home_sh.py ,当不是使用公司台式机 下载百度云文件夹进行任务更新时,读取这个配置文件\n')
|
108
|
-
f.write('# 下载目录\n')
|
109
|
-
f.write(f'd_path = {self.d_path}\n\n')
|
110
|
-
f.write('# 数据中心目录\n')
|
111
|
-
f.write(f'data_path = {self.data_path}\n\n')
|
112
|
-
f.write('# 共享目录\n')
|
113
|
-
f.write(f'share_path = {self.share_path}\n\n')
|
114
|
-
f.write('# 用于触发下载百度云文件,更新至本机数据库\n')
|
115
|
-
f.write(f'home_record = False\n\n')
|
116
|
-
print('目录初始化!')
|
117
|
-
|
118
|
-
def tb_file(self):
|
119
|
-
|
120
|
-
self.check_conf() # 检查配置文件
|
121
|
-
|
122
|
-
now_max_time = self.check_change()
|
123
|
-
if now_max_time in self.before_max_time:
|
124
|
-
return # 不更新
|
125
|
-
else:
|
126
|
-
self.before_max_time = [] # 重置变量,以免越来越占内存
|
127
|
-
self.before_max_time.append(now_max_time)
|
128
|
-
|
129
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
130
|
-
res = self.check_upload_mysql()
|
131
|
-
if not res:
|
132
|
-
print(f'检测到源文件修改, 但今日已经同步过, 不再同步')
|
133
|
-
return
|
134
|
-
print(f'{now}pandas数据源文件修改, 触发同步 ({self.sleep_minutes}分钟后开始)')
|
135
|
-
|
136
|
-
if not os.path.exists(self.data_path):
|
137
|
-
print(f'{self.data_path}: 本地目录不存在或配置文件异常, 无法同步此目录')
|
138
|
-
return None
|
139
|
-
if not os.path.exists(self.share_path):
|
140
|
-
print(f'{self.share_path}: 本机未连接共享或配置文件异常, 无法同步')
|
141
|
-
return None
|
142
|
-
|
143
|
-
time.sleep(self.sleep_minutes*60) # 开始同步前休眠时间
|
144
|
-
recent_time = 48 # 同步近N小时内更新过的文件,单位:小时
|
145
|
-
tb_list = []
|
146
|
-
pd_list = []
|
147
|
-
try:
|
148
|
-
with open(self.path1, 'r', encoding='utf-8') as f:
|
149
|
-
content = f.readlines()
|
150
|
-
content = [item.strip() for item in content if not item.strip().startswith('#')]
|
151
|
-
tb_list = [item for item in content if item]
|
152
|
-
|
153
|
-
with open(self.path2, 'r', encoding='utf-8') as f:
|
154
|
-
content = f.readlines()
|
155
|
-
content = [item.strip() for item in content if not item.strip().startswith('#')]
|
156
|
-
pd_list = [item for item in content if item]
|
157
|
-
except Exception as e:
|
158
|
-
print(e)
|
159
|
-
|
160
|
-
source_path = os.path.join(self.data_path, 'pandas数据源') # \BaiduSyncdisk\pandas数据源
|
161
|
-
target_path = os.path.join(self.share_path, 'pandas数据源') # \01.运营部\天猫报表\pandas数据源
|
162
|
-
|
163
|
-
if not os.path.exists(target_path): # 检查共享主目录,创建目录
|
164
|
-
os.makedirs(target_path, exist_ok=True)
|
165
|
-
|
166
|
-
# 删除共享的副本
|
167
|
-
file_list = os.listdir(self.share_path)
|
168
|
-
for file_1 in file_list:
|
169
|
-
if '副本_' in file_1 or 'con' in file_1: # or '.DS' in file_1
|
170
|
-
try:
|
171
|
-
os.remove(os.path.join(self.share_path, file_1))
|
172
|
-
print(f'移除: {os.path.join(self.share_path, file_1)}')
|
173
|
-
except Exception as e:
|
174
|
-
print(e)
|
175
|
-
print(f'移除失败:{os.path.join(self.share_path, file_1)}')
|
176
|
-
file_list2 = os.listdir(target_path) # 删除乱七八糟的临时文件
|
177
|
-
for file_1 in file_list2:
|
178
|
-
if '.DS' in file_1 or 'con' in file_1:
|
179
|
-
try:
|
180
|
-
os.remove(os.path.join(target_path, file_1))
|
181
|
-
print(f'移除: {os.path.join(target_path, file_1)}')
|
182
|
-
except Exception as e:
|
183
|
-
print(e)
|
184
|
-
|
185
|
-
# 删除 run_py的 副本
|
186
|
-
del_p = os.path.join(self.data_path, '自动0备份', 'py', '数据更新', 'run_py')
|
187
|
-
for file_1 in os.listdir(del_p):
|
188
|
-
if '副本_' in file_1:
|
189
|
-
try:
|
190
|
-
os.remove(os.path.join(del_p, file_1))
|
191
|
-
print(f'移除: {os.path.join(del_p, file_1)}')
|
192
|
-
except Exception as e:
|
193
|
-
print(e)
|
194
|
-
print(f'移除失败:{os.path.join(del_p, file_1)}')
|
195
|
-
|
196
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
197
|
-
print(f'{now} 正在同步文件...')
|
198
|
-
# 复制 run_py的文件到共享
|
199
|
-
for file_1 in tb_list:
|
200
|
-
s = os.path.join(del_p, file_1)
|
201
|
-
t = os.path.join(self.share_path, file_1)
|
202
|
-
try:
|
203
|
-
shutil.copy2(s, t)
|
204
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
205
|
-
print(f'{now}复制: {s}')
|
206
|
-
except Exception as e:
|
207
|
-
print(e)
|
208
|
-
s1 = os.path.join(del_p, f'副本_{file_1}')
|
209
|
-
t1 = os.path.join(self.share_path, f'副本_{file_1}')
|
210
|
-
shutil.copy2(s, s1) # 创建副本
|
211
|
-
shutil.copy2(s1, t1) # 复制副本到共享
|
212
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
213
|
-
print(f'{now}已创建副本 -->> {s1}')
|
214
|
-
|
215
|
-
# 同步 pandas 文件到共享
|
216
|
-
now_time = time.time()
|
217
|
-
for filenames in pd_list:
|
218
|
-
src = os.path.join(source_path, filenames) # 原位置,可能是文件或文件夹
|
219
|
-
dst = os.path.join(target_path, filenames) # 目标位置,可能是文件或文件夹
|
220
|
-
if os.path.isdir(src): # 如果是文件夹
|
221
|
-
for root, dirs, files in os.walk(src, topdown=False):
|
222
|
-
for name in files:
|
223
|
-
if '~$' in name or 'DS_Store' in name:
|
224
|
-
continue
|
225
|
-
if name.endswith('csv') or name.endswith('xlsx') or name.endswith('pbix') or name.endswith(
|
226
|
-
'xls'):
|
227
|
-
new_src = os.path.join(root, name)
|
228
|
-
# share_path = dst + '\\' + new_src.split(src)[1] # 拼接目标路径
|
229
|
-
share_path = os.path.join(f'{dst}{new_src.split(src)[1]}') # 拼接目标路径
|
230
|
-
ls_paths = os.path.dirname(os.path.abspath(share_path)) # 获取上级目录,用来创建
|
231
|
-
if not os.path.exists(ls_paths): # 目录不存在则创建
|
232
|
-
os.makedirs(ls_paths, exist_ok=True)
|
233
|
-
c_stat = os.stat(new_src).st_mtime # 读取文件的元信息 >>>文件修改时间
|
234
|
-
if now_time - c_stat < recent_time * 3600: # 仅同步近期更新的文件
|
235
|
-
# res_name = os.path.basename(new_src)
|
236
|
-
try:
|
237
|
-
shutil.copy2(new_src, share_path)
|
238
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
239
|
-
print(f'{now}复制文件: {new_src}')
|
240
|
-
except Exception as e:
|
241
|
-
print(e)
|
242
|
-
elif os.path.isfile(src) and 'DS_Store' not in src: # 如果是文件
|
243
|
-
if src.endswith('csv') or src.endswith('xlsx') or src.endswith('pbix') or src.endswith('xls'):
|
244
|
-
c_stat = os.stat(src).st_mtime # 读取文件的元信息 >>>文件修改时间
|
245
|
-
if now_time - c_stat < recent_time * 3600:
|
246
|
-
ls_paths = os.path.dirname(os.path.abspath(src)) # 获取上级目录,用来创建
|
247
|
-
if not os.path.exists(ls_paths): # 目录不存在则创建
|
248
|
-
os.makedirs(ls_paths, exist_ok=True)
|
249
|
-
# new_name = os.path.basename(src)
|
250
|
-
try:
|
251
|
-
shutil.copy2(src, dst)
|
252
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
253
|
-
print(f'{now}复制文件: {src}')
|
254
|
-
except Exception as e:
|
255
|
-
print(e)
|
256
|
-
else:
|
257
|
-
print(f'{src} 所需同步的文件不存在,请检查:pd_list参数')
|
258
|
-
|
259
|
-
if platform.system() == 'Windows':
|
260
|
-
excel_path = os.path.join(self.share_path, 'EXCEL报表')
|
261
|
-
files = os.listdir(excel_path)
|
262
|
-
r = refresh_all.RefreshAll()
|
263
|
-
for file in files:
|
264
|
-
if file.endswith('.xlsx'):
|
265
|
-
# now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
266
|
-
# print(f'{now}正在刷新 excel: {file}')
|
267
|
-
r.refresh_excel2(excel_file=os.path.join(excel_path, file))
|
268
|
-
time.sleep(10)
|
269
|
-
|
270
|
-
self.before_max_time = self.check_change() # 重置值, 避免重复同步
|
271
|
-
|
272
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
273
|
-
print(f'{now} 同步完成!')
|
274
|
-
|
275
|
-
def check_upload_mysql(self):
|
276
|
-
# 每天只更新一次
|
277
|
-
today = datetime.date.today()
|
278
|
-
if today == self.tomorrow:
|
279
|
-
self.tomorrow = today + datetime.timedelta(days=1)
|
280
|
-
return True
|
281
|
-
else:
|
282
|
-
return False
|
283
|
-
|
284
|
-
|
285
|
-
class UpdateMysql:
|
286
|
-
def __init__(self):
|
287
|
-
support_path = set_support.SetSupport(dirname='support').dirname
|
288
|
-
self.my_conf = os.path.join(support_path, '.home.conf')
|
289
|
-
self.ch_record = False
|
290
|
-
self.d_path = None
|
291
|
-
|
292
|
-
def check_date(self):
|
293
|
-
""" 检查文件中的 home_record 值,决定是否执行更新"""
|
294
|
-
config = configparser.ConfigParser() # 初始化configparser类
|
295
|
-
try:
|
296
|
-
config.read(self.my_conf, 'UTF-8')
|
297
|
-
self.ch_record = config.get('database', 'home_record').lower()
|
298
|
-
self.d_path = f'/Users/{getpass.getuser()}/Downloads'
|
299
|
-
except Exception as e:
|
300
|
-
print(e)
|
301
|
-
if self.ch_record == 'false':
|
302
|
-
return False, self.d_path
|
303
|
-
elif self.ch_record == 'true':
|
304
|
-
return True, self.d_path
|
305
|
-
else:
|
306
|
-
print(f'配置可能有误: {self.ch_record}, home_record 值应为: true 或 false')
|
307
|
-
return False, self.d_path
|
308
|
-
|
309
|
-
|
310
|
-
def op_data(days: int =100):
|
311
|
-
|
312
|
-
# 清理数据库, 除了 聚合数据
|
313
|
-
if socket.gethostname() != 'company': #
|
314
|
-
# # Mysql
|
315
|
-
# username, password, host, port = get_myconf.select_config_values(
|
316
|
-
# target_service='company',
|
317
|
-
# database='mysql',
|
318
|
-
# )
|
319
|
-
# s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
320
|
-
# s.db_name_lists = [
|
321
|
-
# '京东数据2',
|
322
|
-
# '推广数据2',
|
323
|
-
# '市场数据2',
|
324
|
-
# '生意参谋2',
|
325
|
-
# '生意经2',
|
326
|
-
# '属性设置2',
|
327
|
-
# # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
328
|
-
# ]
|
329
|
-
# s.days = days
|
330
|
-
# s.optimize_list()
|
331
|
-
|
332
|
-
# 清理所有非聚合数据的库
|
333
|
-
optimize_data.op_data(
|
334
|
-
db_name_lists=[
|
335
|
-
'京东数据2',
|
336
|
-
'推广数据2',
|
337
|
-
'市场数据2',
|
338
|
-
'生意参谋2',
|
339
|
-
'生意经2',
|
340
|
-
'属性设置2',
|
341
|
-
# '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
342
|
-
],
|
343
|
-
days=days,
|
344
|
-
)
|
345
|
-
|
346
|
-
# 数据聚合
|
347
|
-
query_data.data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=3,)
|
348
|
-
time.sleep(60)
|
349
|
-
|
350
|
-
# 清理聚合数据
|
351
|
-
optimize_data.op_data(db_name_lists=['聚合数据'], days=3650, )
|
352
|
-
|
353
|
-
|
354
|
-
def main():
|
355
|
-
t = TbFiles()
|
356
|
-
u = UpdateMysql()
|
357
|
-
while True:
|
358
|
-
res, d_path = u.check_date() # 文件中的 ch_record 值,决定是否执行更新
|
359
|
-
if res:
|
360
|
-
upload_path = f'windows/{str(datetime.date.today().strftime("%Y-%m"))}/{str(datetime.date.today())}'
|
361
|
-
b = bdup.BaiDu()
|
362
|
-
b.download_dir(local_path=d_path, remote_path=upload_path)
|
363
|
-
|
364
|
-
dp = aggregation.DatabaseUpdate(path=d_path)
|
365
|
-
dp.new_unzip(is_move=True)
|
366
|
-
dp.cleaning(is_move=True, is_except=[]) # 临时任务 需要移除自身下载的文件
|
367
|
-
dp.upload_df(service_databases=[{'home_lx': 'mysql'}])
|
368
|
-
dp.date_table(service_databases=[{'home_lx': 'mysql'}]) # 因为日期表不受 days 参数控制,因此单独更新日期表
|
369
|
-
dp.other_table(service_databases=[{'home_lx': 'mysql'}]) # 上传 support 文件夹下的 主推商品.csv
|
370
|
-
|
371
|
-
# 此操作用于修改 .home.conf 文件,将 home_record 改为 false (更新完成)
|
372
|
-
w = update_conf.UpdateConf()
|
373
|
-
w.update_config(filename='.home.conf', option='home_record', new_value='False')
|
374
|
-
time.sleep(60)
|
375
|
-
op_data(days=100) # 数据清理和聚合
|
376
|
-
|
377
|
-
t.sleep_minutes = 5 # 同步前休眠时间
|
378
|
-
t.tb_file()
|
379
|
-
time.sleep(600) # 检测间隔
|
380
|
-
|
381
|
-
|
382
|
-
if __name__ == '__main__':
|
383
|
-
main()
|
384
|
-
# # 聚合数据,并清理聚合数据
|
385
|
-
# query_data.data_aggregation(service_databases=[{'company': 'mysql'}], months=1)
|
386
|
-
|
File without changes
|
File without changes
|