mdbq 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-3.7.4.dist-info → mdbq-3.7.5.dist-info}/METADATA +1 -1
- mdbq-3.7.5.dist-info/RECORD +4 -0
- {mdbq-3.7.4.dist-info → mdbq-3.7.5.dist-info}/WHEEL +1 -1
- mdbq-3.7.5.dist-info/top_level.txt +1 -0
- mdbq/__init__.py +0 -1
- mdbq/__version__.py +0 -3
- mdbq/aggregation/__init__.py +0 -4
- mdbq/aggregation/aggregation_bak.py +0 -1438
- mdbq/aggregation/datashow_bak.py +0 -1264
- mdbq/aggregation/optimize_data.py +0 -76
- mdbq/aggregation/query_data.py +0 -3869
- mdbq/bdup/__init__.py +0 -5
- mdbq/bdup/bdup.py +0 -111
- mdbq/config/__init__.py +0 -4
- mdbq/config/default.py +0 -131
- mdbq/config/myconfig.py +0 -32
- mdbq/config/products.py +0 -159
- mdbq/config/set_support.py +0 -22
- mdbq/dataframe/__init__.py +0 -4
- mdbq/dataframe/converter.py +0 -107
- mdbq/log/__init__.py +0 -4
- mdbq/log/mylogger.py +0 -66
- mdbq/log/spider_logging.py +0 -55
- mdbq/mongo/__init__.py +0 -4
- mdbq/mongo/mongo.py +0 -729
- mdbq/mysql/__init__.py +0 -4
- mdbq/mysql/mysql.py +0 -1784
- mdbq/mysql/s_query.py +0 -211
- mdbq/mysql/year_month_day.py +0 -38
- mdbq/other/__init__.py +0 -4
- mdbq/other/download_sku_picture.py +0 -985
- mdbq/other/porxy.py +0 -115
- mdbq/other/pov_city.py +0 -405
- mdbq/other/sku_picture_bak.py +0 -1081
- mdbq/other/ua_sj.py +0 -222
- mdbq/pbix/__init__.py +0 -4
- mdbq/pbix/pbix_refresh.py +0 -70
- mdbq/pbix/refresh_all.py +0 -158
- mdbq/pbix/refresh_all_old.py +0 -177
- mdbq/redis/__init__.py +0 -4
- mdbq/redis/getredis.py +0 -642
- mdbq/spider/__init__.py +0 -4
- mdbq/spider/aikucun.py +0 -494
- mdbq-3.7.4.dist-info/RECORD +0 -43
- mdbq-3.7.4.dist-info/top_level.txt +0 -1
mdbq/bdup/bdup.py
DELETED
@@ -1,111 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import os
|
3
|
-
import platform
|
4
|
-
import subprocess
|
5
|
-
from concurrent.futures import ThreadPoolExecutor
|
6
|
-
from bypy import ByPy
|
7
|
-
|
8
|
-
|
9
|
-
class BaiDu:
|
10
|
-
"""
|
11
|
-
如果通过调用命令行终端运行, 云端路径必须使用linux格式,不要使用windows格式,否则在windows系统里面会上传失败(无法在云端创建文件)
|
12
|
-
"""
|
13
|
-
def __init__(self):
|
14
|
-
self.local_path = None
|
15
|
-
self.remote_path = None
|
16
|
-
self.skip:list = []
|
17
|
-
self.delete_remote_files:list = []
|
18
|
-
self.bp = ByPy()
|
19
|
-
self.count = 0
|
20
|
-
self.total = 0
|
21
|
-
|
22
|
-
def upload_dir(self, local_path, remote_path):
|
23
|
-
"""
|
24
|
-
上传整个文件夹,执行完后删除指定文件, 指定 self.delete_remote_files
|
25
|
-
如果通过调用命令行终端运行, 《云端路径!!》必须使用linux格式,不要使用反斜杆,否则在windows系统里面会上传失败
|
26
|
-
"""
|
27
|
-
self.local_path = local_path
|
28
|
-
self.remote_path = remote_path.replace('\\', '/')
|
29
|
-
if not os.path.exists(self.local_path):
|
30
|
-
print(f'{self.local_path}: 本地目录不存在,没有什么可传的')
|
31
|
-
return
|
32
|
-
|
33
|
-
if platform.system() == 'Windows':
|
34
|
-
self.bp.upload(localpath=self.local_path, remotepath=self.remote_path.replace('\\', '/')) # 上传文件到百度云
|
35
|
-
else:
|
36
|
-
command = f'bypy upload "{self.local_path}" "{self.remote_path}" --on-dup skip' # 相同文件跳过
|
37
|
-
try:
|
38
|
-
subprocess.run(command, shell=True)
|
39
|
-
except Exception as e:
|
40
|
-
print(e)
|
41
|
-
self.delete_files() # 最好是在内部执行删除, 避免路径异常
|
42
|
-
|
43
|
-
def upload_file(self, local_path, remote_path, processes=False):
|
44
|
-
"""
|
45
|
-
上传文件夹,按单个文件上传,可以跳过指定文件/文件夹, 指定 self.skip
|
46
|
-
《云端路径!!》必须使用linux格式
|
47
|
-
"""
|
48
|
-
if not isinstance(self.skip, list):
|
49
|
-
raise TypeError('skip must be a list')
|
50
|
-
self.skip += ['.DS_Store', '.localized', 'desktop.ini', '$RECYCLE.BIN', 'Icon']
|
51
|
-
self.local_path = local_path
|
52
|
-
self.remote_path = remote_path.replace('\\', '/')
|
53
|
-
if not os.path.exists(self.local_path):
|
54
|
-
print(f'{self.local_path}: 本地目录不存在,没有什么可传的')
|
55
|
-
return
|
56
|
-
|
57
|
-
local_files = os.listdir(self.local_path)
|
58
|
-
|
59
|
-
local_file_list = []
|
60
|
-
for file in local_files:
|
61
|
-
if file in self.skip: # 跳过指定文件/文件夹
|
62
|
-
continue
|
63
|
-
local_p = os.path.join(self.local_path, file)
|
64
|
-
if os.path.isfile(local_p):
|
65
|
-
rt_path = os.path.join(self.remote_path, file).replace('\\', '/')
|
66
|
-
self.total += 1
|
67
|
-
local_file_list.append({local_p: rt_path})
|
68
|
-
elif os.path.isdir(local_p):
|
69
|
-
for root, dirs, files in os.walk(local_p, topdown=False):
|
70
|
-
for name in files:
|
71
|
-
if name in self.skip: # 从子文件夹内跳过指定文件
|
72
|
-
continue
|
73
|
-
lc_path = os.path.join(root, name)
|
74
|
-
rt_path = lc_path.replace(self.local_path, self.remote_path).replace('\\', '/')
|
75
|
-
self.total += 1
|
76
|
-
local_file_list.append({lc_path: rt_path})
|
77
|
-
if processes:
|
78
|
-
# 不指定 max_workers 参数,默认值是 os.cpu_count() * 5
|
79
|
-
with ThreadPoolExecutor() as executor:
|
80
|
-
executor.map(self.up_one_file, local_file_list)
|
81
|
-
else:
|
82
|
-
for item in local_file_list:
|
83
|
-
self.up_one_file(file_dict=item)
|
84
|
-
|
85
|
-
def up_one_file(self, file_dict:dict):
|
86
|
-
if not isinstance(file_dict, dict):
|
87
|
-
raise TypeError('file_dict must be a dict')
|
88
|
-
for k, v in file_dict.items():
|
89
|
-
self.count += 1
|
90
|
-
print(f'上传: {self.count}/{self.total} {k}')
|
91
|
-
self.bp.upload(localpath=k, remotepath=v) # 上传文件到百度云
|
92
|
-
|
93
|
-
def delete_files(self):
|
94
|
-
""" 移除云端文件,位于 self.remote_path 文件夹下的子文件 """
|
95
|
-
self.delete_remote_files += ['.DS_Store', '.localized', 'desktop.ini', '$RECYCLE.BIN', 'Icon']
|
96
|
-
for delete_file in self.delete_remote_files:
|
97
|
-
self.bp.remove(remotepath=f'{self.remote_path.replace('\\', '/')}/{delete_file}') # 移除文件
|
98
|
-
|
99
|
-
def download_dir(self, local_path, remote_path):
|
100
|
-
""" 下载文件夹到本地 """
|
101
|
-
self.local_path = local_path
|
102
|
-
self.remote_path = remote_path.replace('\\', '/')
|
103
|
-
if not os.path.exists(self.local_path):
|
104
|
-
os.mkdir(self.local_path)
|
105
|
-
|
106
|
-
self.bp.download(localpath=f'{self.local_path}', remotepath=f'{self.remote_path.replace('\\', '/')}')
|
107
|
-
|
108
|
-
|
109
|
-
if __name__ == '__main__':
|
110
|
-
bp = ByPy()
|
111
|
-
bp.list()
|
mdbq/config/__init__.py
DELETED
mdbq/config/default.py
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import os
|
3
|
-
import json
|
4
|
-
import platform
|
5
|
-
import getpass
|
6
|
-
import socket
|
7
|
-
import logging
|
8
|
-
from mdbq.mysql import mysql
|
9
|
-
|
10
|
-
if platform.system() == 'Windows':
|
11
|
-
support_path = r'C:\数据中心2\support'
|
12
|
-
elif platform.system() == 'Darwin':
|
13
|
-
support_path = f'/Users/{getpass.getuser()}/数据中心2/support'
|
14
|
-
else:
|
15
|
-
support_path = '数据中心2/support' # 没有用, 可以删
|
16
|
-
logger = logging.getLogger(__name__)
|
17
|
-
|
18
|
-
|
19
|
-
def get_mysql_engine(platform='Windows', hostname='xigua_lx', sql='mysql', local='remoto', config_file=None):
|
20
|
-
if not config_file:
|
21
|
-
config_file = os.path.join(support_path, 'my_config.txt')
|
22
|
-
if not os.path.isfile(config_file):
|
23
|
-
print(f'缺少配置文件,无法读取配置文件: {config_file}')
|
24
|
-
return None
|
25
|
-
|
26
|
-
if socket.gethostname() == 'xigua_lx':
|
27
|
-
local = 'local'
|
28
|
-
|
29
|
-
with open(config_file, 'r', encoding='utf-8') as f:
|
30
|
-
conf = json.load(f)
|
31
|
-
conf_data = conf[platform][hostname][sql][local]
|
32
|
-
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
33
|
-
_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
|
34
|
-
return _engine, username, password, host, port
|
35
|
-
|
36
|
-
|
37
|
-
def return_host(conf_data):
|
38
|
-
"""
|
39
|
-
从配置文件数据中获取: username, password, host, port
|
40
|
-
:param conf_data:
|
41
|
-
:return:
|
42
|
-
"""
|
43
|
-
return conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
|
44
|
-
|
45
|
-
|
46
|
-
def return_one_engine(conf_data):
|
47
|
-
"""
|
48
|
-
返回一个 engine
|
49
|
-
"""
|
50
|
-
username, password, host, port = return_host(conf_data)
|
51
|
-
return mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
|
52
|
-
|
53
|
-
|
54
|
-
def get_hostname(platform, hostname, sql, local):
|
55
|
-
"""
|
56
|
-
返回一个主机的: username, password, host, port
|
57
|
-
"""
|
58
|
-
config_file = os.path.join(support_path, 'my_config.txt')
|
59
|
-
with open(config_file, 'r', encoding='utf-8') as f:
|
60
|
-
conf = json.load(f)
|
61
|
-
conf_data = conf[platform][hostname][sql][local]
|
62
|
-
return return_host(conf_data)
|
63
|
-
|
64
|
-
|
65
|
-
def get_engine_bak():
|
66
|
-
"""
|
67
|
-
要删除
|
68
|
-
"""
|
69
|
-
if not os.path.isdir(support_path):
|
70
|
-
print(f'缺少配置文件,无法读取配置文件: {file}')
|
71
|
-
return
|
72
|
-
config_file = os.path.join(support_path, 'my_config.txt')
|
73
|
-
|
74
|
-
with open(config_file, 'r', encoding='utf-8') as f:
|
75
|
-
conf = json.load(f)
|
76
|
-
|
77
|
-
if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
|
78
|
-
conf_data = conf['Windows']['xigua_lx']['mysql']['remoto']
|
79
|
-
m_engine = return_one_engine(conf_data=conf_data)
|
80
|
-
conf_data = conf['Windows']['company']['mysql']['local']
|
81
|
-
company_engine = return_one_engine(conf_data=conf_data)
|
82
|
-
username, password, host, port = return_host(conf_data) # 顺序不能乱
|
83
|
-
conf_data = conf['Windows']['xigua_ts']['mysql']['remoto']
|
84
|
-
ts_engine = return_one_engine(conf_data=conf_data)
|
85
|
-
elif socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'MacBookPro':
|
86
|
-
conf_data = conf['Windows']['xigua_lx']['mysql']['local']
|
87
|
-
m_engine = return_one_engine(conf_data=conf_data)
|
88
|
-
username, password, host, port = return_host(conf_data) # 顺序不能乱
|
89
|
-
conf_data = conf['Windows']['company']['mysql']['remoto']
|
90
|
-
company_engine = return_one_engine(conf_data=conf_data)
|
91
|
-
conf_data = conf['Windows']['xigua_ts']['mysql']['remoto']
|
92
|
-
ts_engine = return_one_engine(conf_data=conf_data)
|
93
|
-
|
94
|
-
else:
|
95
|
-
conf_data = conf['Windows']['xigua_lx']['mysql']['remoto']
|
96
|
-
m_engine = return_one_engine(conf_data=conf_data)
|
97
|
-
username, password, host, port = return_host(conf_data) # 顺序不能乱
|
98
|
-
conf_data = conf['Windows']['company']['mysql']['remoto']
|
99
|
-
company_engine = return_one_engine(conf_data=conf_data)
|
100
|
-
conf_data = conf['Windows']['xigua_ts']['mysql']['remoto']
|
101
|
-
ts_engine = return_one_engine(conf_data=conf_data)
|
102
|
-
|
103
|
-
return m_engine, company_engine, ts_engine, (username, password, host, port)
|
104
|
-
|
105
|
-
|
106
|
-
def write_back(datas):
|
107
|
-
""" 将数据写回本地 """
|
108
|
-
if not os.path.isdir(support_path):
|
109
|
-
print(f'缺少配置文件,无法读取配置文件: {file}')
|
110
|
-
return
|
111
|
-
file = os.path.join(support_path, 'my_config.txt')
|
112
|
-
with open(file, 'w+', encoding='utf-8') as f:
|
113
|
-
json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
if __name__ == '__main__':
|
118
|
-
pass
|
119
|
-
username, password, host, port = get_hostname(
|
120
|
-
platform='Windows',
|
121
|
-
hostname='xigua_lx',
|
122
|
-
sql='mysql',
|
123
|
-
local='remoto'
|
124
|
-
)
|
125
|
-
res = get_mysql_engine(
|
126
|
-
platform='Windows',
|
127
|
-
hostname='xigua_lx',
|
128
|
-
sql='mysql',
|
129
|
-
local='remoto'
|
130
|
-
)
|
131
|
-
print(res)
|
mdbq/config/myconfig.py
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import os
|
3
|
-
import json
|
4
|
-
from mdbq.config import set_support
|
5
|
-
"""
|
6
|
-
用来读取本地配置文件
|
7
|
-
"""
|
8
|
-
|
9
|
-
|
10
|
-
def main():
|
11
|
-
support_path = set_support.SetSupport(dirname='support').dirname
|
12
|
-
file = os.path.join(support_path, 'my_config.txt')
|
13
|
-
if not os.path.isfile(file):
|
14
|
-
print(f'缺少配置文件,无法读取配置文件: {file}')
|
15
|
-
return
|
16
|
-
with open(file, 'r', encoding='utf-8') as f:
|
17
|
-
config_datas = json.load(f)
|
18
|
-
return config_datas
|
19
|
-
|
20
|
-
|
21
|
-
def write_back(datas):
|
22
|
-
""" 将数据写回本地 """
|
23
|
-
support_path = set_support.SetSupport(dirname='support').dirname
|
24
|
-
file = os.path.join(support_path, 'my_config.txt')
|
25
|
-
with open(file, 'w+', encoding='utf-8') as f:
|
26
|
-
json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
if __name__ == '__main__':
|
31
|
-
d = main()
|
32
|
-
print(d)
|
mdbq/config/products.py
DELETED
@@ -1,159 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import json
|
3
|
-
import os
|
4
|
-
import platform
|
5
|
-
import getpass
|
6
|
-
import socket
|
7
|
-
import pandas as pd
|
8
|
-
from mdbq.mysql import mysql
|
9
|
-
from mdbq.config import default
|
10
|
-
from numpy.ma.core import product
|
11
|
-
|
12
|
-
"""
|
13
|
-
天猫货品年份基准对照
|
14
|
-
用于聚合数据,通过此数据表进一步可确定商品上架年月
|
15
|
-
"""
|
16
|
-
m_engine, username, password, host, port = default.get_mysql_engine(platform='Windows', hostname='xigua_lx', sql='mysql', local='remoto', config_file=None)
|
17
|
-
if not username:
|
18
|
-
print(f'找不到主机1:')
|
19
|
-
|
20
|
-
|
21
|
-
class Products:
|
22
|
-
def __init__(self):
|
23
|
-
self.datas = []
|
24
|
-
|
25
|
-
def update_my_datas(self):
|
26
|
-
my_datas = [
|
27
|
-
{
|
28
|
-
'平台': '天猫', '商品id': '848929365673', '上市年份': '2024年11月'
|
29
|
-
},
|
30
|
-
{
|
31
|
-
'平台': '天猫', '商品id': '840499705810', '上市年份': '2024年10月'
|
32
|
-
},
|
33
|
-
{
|
34
|
-
'平台': '天猫', '商品id': '830789689032', '上市年份': '2024年9月'
|
35
|
-
},
|
36
|
-
{
|
37
|
-
'平台': '天猫', '商品id': '822020840000', '上市年份': '2024年8月'
|
38
|
-
},
|
39
|
-
{
|
40
|
-
'平台': '天猫', '商品id': '811000000000', '上市年份': '2024年7月'
|
41
|
-
},
|
42
|
-
{
|
43
|
-
'平台': '天猫', '商品id': '800000000000', '上市年份': '2024年6月'
|
44
|
-
},
|
45
|
-
{
|
46
|
-
'平台': '天猫', '商品id': '791359643000', '上市年份': '2024年5月'
|
47
|
-
},
|
48
|
-
{
|
49
|
-
'平台': '天猫', '商品id': '778971448000', '上市年份': '2024年4月'
|
50
|
-
},
|
51
|
-
{
|
52
|
-
'平台': '天猫', '商品id': '770576016820', '上市年份': '2024年3月'
|
53
|
-
},
|
54
|
-
{
|
55
|
-
'平台': '天猫', '商品id': '766115058400', '上市年份': '2024年2月'
|
56
|
-
},
|
57
|
-
{
|
58
|
-
'平台': '天猫', '商品id': '759478591187', '上市年份': '2024年1月'
|
59
|
-
},
|
60
|
-
{
|
61
|
-
'平台': '天猫', '商品id': '752770183000', '上市年份': '2023年12月'
|
62
|
-
},
|
63
|
-
{
|
64
|
-
'平台': '天猫', '商品id': '745123890000', '上市年份': '2023年11月'
|
65
|
-
},
|
66
|
-
{
|
67
|
-
'平台': '天猫', '商品id': '741000000000', '上市年份': '2023年10月'
|
68
|
-
},
|
69
|
-
{
|
70
|
-
'平台': '天猫', '商品id': '736841920000', '上市年份': '2023年9月'
|
71
|
-
},
|
72
|
-
{
|
73
|
-
'平台': '天猫', '商品id': '730800000000', '上市年份': '2023年8月'
|
74
|
-
},
|
75
|
-
{
|
76
|
-
'平台': '天猫', '商品id': '726939636835', '上市年份': '2023年7月'
|
77
|
-
},
|
78
|
-
{
|
79
|
-
'平台': '天猫', '商品id': '721366048631', '上市年份': '2023年6月'
|
80
|
-
},
|
81
|
-
{
|
82
|
-
'平台': '天猫', '商品id': '716130443004', '上市年份': '2023年5月'
|
83
|
-
},
|
84
|
-
{
|
85
|
-
'平台': '天猫', '商品id': '709824308589', '上市年份': '2023年4月'
|
86
|
-
},
|
87
|
-
{
|
88
|
-
'平台': '天猫', '商品id': '705440027804', '上市年份': '2023年3月'
|
89
|
-
},
|
90
|
-
{
|
91
|
-
'平台': '天猫', '商品id': '701096067973', '上市年份': '2023年2月'
|
92
|
-
},
|
93
|
-
{
|
94
|
-
'平台': '天猫', '商品id': '696017000000', '上市年份': '2023年1月'
|
95
|
-
},
|
96
|
-
{
|
97
|
-
'平台': '天猫', '商品id': '666510000000', '上市年份': '2022年货品'
|
98
|
-
},
|
99
|
-
{
|
100
|
-
'平台': '天猫', '商品id': '636010000000', '上市年份': '2021年货品'
|
101
|
-
},
|
102
|
-
{
|
103
|
-
'平台': '天猫', '商品id': '610485872286', '上市年份': '2020年货品'
|
104
|
-
},
|
105
|
-
{
|
106
|
-
'平台': '天猫', '商品id': '585066000000', '上市年份': '2019年货品'
|
107
|
-
},
|
108
|
-
{
|
109
|
-
'平台': '天猫', '商品id': '563237000000', '上市年份': '2018年货品'
|
110
|
-
},
|
111
|
-
{
|
112
|
-
'平台': '天猫', '商品id': '100', '上市年份': '历史悠久'
|
113
|
-
},
|
114
|
-
]
|
115
|
-
self.datas += my_datas
|
116
|
-
|
117
|
-
|
118
|
-
def to_mysql(self):
|
119
|
-
self.update_my_datas()
|
120
|
-
df = pd.DataFrame(self.datas)
|
121
|
-
m_engine = mysql.MysqlUpload(
|
122
|
-
username=username,
|
123
|
-
password=password,
|
124
|
-
host=host,
|
125
|
-
port=port,
|
126
|
-
)
|
127
|
-
m_engine.insert_many_dict(
|
128
|
-
db_name='属性设置3',
|
129
|
-
table_name='货品年份基准',
|
130
|
-
dict_data_list=df.to_dict(orient='records'),
|
131
|
-
# icm_update=['日期', '店铺名称', '宝贝id'], # 唯一组合键
|
132
|
-
unique_main_key=['商品id'],
|
133
|
-
set_typ={
|
134
|
-
'商品id': 'bigint',
|
135
|
-
'平台': 'varchar(100)',
|
136
|
-
'上市年份': 'varchar(100)',
|
137
|
-
},
|
138
|
-
)
|
139
|
-
|
140
|
-
def market_date(self, product_id: int):
|
141
|
-
try:
|
142
|
-
product_id = int(product_id)
|
143
|
-
except:
|
144
|
-
return
|
145
|
-
self.update_my_datas()
|
146
|
-
market_date = [item['上市年份'] for item in self.datas if product_id > int(item['商品id'])]
|
147
|
-
if market_date:
|
148
|
-
return market_date[0] # 返回上市年份
|
149
|
-
|
150
|
-
|
151
|
-
def main():
|
152
|
-
pass
|
153
|
-
|
154
|
-
|
155
|
-
if __name__ == '__main__':
|
156
|
-
product_id = '696017020186'
|
157
|
-
p = Products()
|
158
|
-
year = p.market_date(product_id=product_id)
|
159
|
-
print(f'{product_id}: {year}')
|
mdbq/config/set_support.py
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import platform
|
3
|
-
import getpass
|
4
|
-
|
5
|
-
"""
|
6
|
-
专门用来设置 support 文件夹路径
|
7
|
-
support 文件夹包含很多配置类文件,是程序必不可少的依赖
|
8
|
-
"""
|
9
|
-
class SetSupport:
|
10
|
-
def __init__(self, dirname):
|
11
|
-
self.dirname = dirname
|
12
|
-
if platform.system() == 'Windows':
|
13
|
-
self.dirname = f'C:\\数据中心2\\support'
|
14
|
-
elif platform.system() == 'Darwin':
|
15
|
-
self.dirname = f'/Users/{getpass.getuser()}/数据中心2/support'
|
16
|
-
else:
|
17
|
-
self.dirname = '数据中心2/support' # 没有用, 可以删
|
18
|
-
|
19
|
-
|
20
|
-
if __name__ == '__main__':
|
21
|
-
s = SetSupport(dirname='support').dirname
|
22
|
-
print(s)
|
mdbq/dataframe/__init__.py
DELETED
mdbq/dataframe/converter.py
DELETED
@@ -1,107 +0,0 @@
|
|
1
|
-
# -*- coding:utf-8 -*-
|
2
|
-
import pandas as pd
|
3
|
-
import numpy as np
|
4
|
-
from decimal import Decimal
|
5
|
-
import re
|
6
|
-
|
7
|
-
|
8
|
-
class DataFrameConverter(object):
|
9
|
-
def __init__(self, df=pd.DataFrame({})):
|
10
|
-
self.df = df
|
11
|
-
|
12
|
-
def convert_df_cols(self, df=pd.DataFrame({})):
|
13
|
-
"""
|
14
|
-
清理 dataframe 非法值
|
15
|
-
对数据类型进行转换(尝试将 object 类型转为 int 或 float)
|
16
|
-
"""
|
17
|
-
if len(df) == 0:
|
18
|
-
df = self.df
|
19
|
-
if len(df) == 0:
|
20
|
-
return
|
21
|
-
|
22
|
-
def find_longest_decimal_value(number_list):
|
23
|
-
# 取列表中小数位数最长的值
|
24
|
-
longest_value = None
|
25
|
-
max_decimals = 0
|
26
|
-
for num in number_list:
|
27
|
-
decimal_places = len(str(num).split('.')[1])
|
28
|
-
if decimal_places > max_decimals:
|
29
|
-
max_decimals = decimal_places
|
30
|
-
longest_value = num
|
31
|
-
return longest_value
|
32
|
-
|
33
|
-
# dtypes = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
|
34
|
-
df.replace([np.inf, -np.inf], '0', inplace=True) # 清理一些非法值
|
35
|
-
# df.replace(to_replace=['\\N', '-', '--', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
|
36
|
-
df.replace(to_replace=['\\N', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
|
37
|
-
# df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
38
|
-
df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
|
39
|
-
df.replace(to_replace=['"'], value='', regex=True, inplace=True)
|
40
|
-
cols = df.columns.tolist()
|
41
|
-
|
42
|
-
df.reset_index(inplace=True, drop=True) # 重置索引,避免下面的 df.loc[0, col] 会出错
|
43
|
-
|
44
|
-
for col in cols:
|
45
|
-
if col.lower() == 'id':
|
46
|
-
df.pop(col) # 等待插入的 df 不能包含 id 列,否则可能跟现有 id 主键冲突
|
47
|
-
continue
|
48
|
-
|
49
|
-
try:
|
50
|
-
# 百分比在某些数据库中不兼容, 转换百分比为小数, # 转百分比的列不能含有中文或特殊字符
|
51
|
-
df[col] = df[col].apply(
|
52
|
-
lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%$', str(x)) else x)
|
53
|
-
except Exception as e:
|
54
|
-
print(f'留意错误信息: 位于列 -> {col} -> {e}')
|
55
|
-
|
56
|
-
if (col.endswith('占比') or col.endswith('率') or col.endswith('同比')
|
57
|
-
or col.endswith('环比') or col.lower().endswith('roi')
|
58
|
-
or col.endswith('产出比')):
|
59
|
-
df = df.astype({col: 'float64'}, errors='raise')
|
60
|
-
|
61
|
-
# 尝试转换合适的数据类型
|
62
|
-
if df[col].dtype == 'object':
|
63
|
-
# "_"符号会被错误识别
|
64
|
-
try:
|
65
|
-
# 不能直接使用 int() ,对于大数,可能转为uint64,导致数据库入库可能异常
|
66
|
-
df[col] = df[col].apply(
|
67
|
-
lambda x: np.int64(str(x)) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
|
68
|
-
# df[col] = df[col].apply(lambda x: int(x) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
|
69
|
-
except:
|
70
|
-
pass
|
71
|
-
if df[col].dtype == 'object':
|
72
|
-
try:
|
73
|
-
df[col] = df[col].apply(lambda x: float(x) if '.' in str(x) and '_' not in str(x) else x)
|
74
|
-
except:
|
75
|
-
pass
|
76
|
-
if df[col].dtype == 'float' or df[col].dtype == 'float64': # 对于小数类型, 保留 6 位小数
|
77
|
-
df[col] = df[col].fillna(0.0).apply(lambda x: round(x, 6))
|
78
|
-
|
79
|
-
# 转换日期样式的列为日期类型
|
80
|
-
value = df.loc[0, col]
|
81
|
-
if value:
|
82
|
-
res = re.match(r'\d{4}-\d{2}-\d{2}|\d{4}-\d{2}-\d{2} |\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
|
83
|
-
r'|\d{4}/\d{1}/\d{1}|\d{4}/\d{1}/\d{2}|\d{4}/\d{2}/\d{1}|\d{4}/\d{2}/\d{2}', str(value))
|
84
|
-
if res:
|
85
|
-
try:
|
86
|
-
df[col] = df[col].apply(lambda x: pd.to_datetime(x))
|
87
|
-
except:
|
88
|
-
pass
|
89
|
-
new_col = col.lower()
|
90
|
-
new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
|
91
|
-
new_col = new_col.replace(')', '')
|
92
|
-
new_col = re.sub(r'_{2,}', '_', new_col)
|
93
|
-
new_col = re.sub(r'_+$', '', new_col)
|
94
|
-
df.rename(columns={col: new_col}, inplace=True)
|
95
|
-
df.fillna(0, inplace=True)
|
96
|
-
return df
|
97
|
-
|
98
|
-
|
99
|
-
if __name__ == '__main__':
|
100
|
-
# df = pd.DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'])
|
101
|
-
# converter = DataFrameConverter()
|
102
|
-
# df = converter.convert_df_cols(df)
|
103
|
-
# print(df['a'].dtype)
|
104
|
-
# print(df)
|
105
|
-
pattern = '1540%'
|
106
|
-
pattern = re.findall(r'^\d+\.?\d*%$', pattern)
|
107
|
-
print(pattern)
|
mdbq/log/__init__.py
DELETED
mdbq/log/mylogger.py
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from logging import Logger
|
3
|
-
from logging import handlers
|
4
|
-
|
5
|
-
|
6
|
-
class MyLogger(Logger):
|
7
|
-
"""
|
8
|
-
从Logger类中继承,实例化一个日志器
|
9
|
-
"""
|
10
|
-
def __init__(self, logger_name, level='INFO', is_stream_handler=True, file=None, debug_file=None,
|
11
|
-
max_bytes=False, back_count=10, when=None):
|
12
|
-
"""
|
13
|
-
:param logger_name: 日志器的名字
|
14
|
-
:param level: 日志级别 # DEBUG INFO WARNING ERROR CRITICAL
|
15
|
-
:param is_stream_handler: 默认True输出到控制台
|
16
|
-
:param file: 传入文件名,默认None不输出到 file
|
17
|
-
param debug_file: 传入文件名,记录详细debug时使用,默认None不输出, 尽量不要和file同时使用,会重复写
|
18
|
-
:param when: 按周期分割日志,默认不分割,除非指定其他值
|
19
|
-
:param max_bytes: 按文件大小分割日志
|
20
|
-
:param back_count: 保留日志的数量, 值从0开始
|
21
|
-
"""
|
22
|
-
# 设置日志器名字、级别
|
23
|
-
super().__init__(logger_name, level)
|
24
|
-
|
25
|
-
# 定义日志格式, 使用Formatter类实例化一个日志类
|
26
|
-
fmt_stream = "%(asctime)s %(levelname)s %(name)s: %(message)s"
|
27
|
-
fmt_file = "%(asctime)s %(name)s: %(message)s"
|
28
|
-
fmt_debug_file = "%(asctime)s %(levelname)s %(name)s %(funcName)s: %(message)s"
|
29
|
-
formatter_stream = logging.Formatter(fmt_stream, datefmt="%Y-%m-%d %H:%M:%S")
|
30
|
-
formatter_file = logging.Formatter(fmt_file, datefmt="%Y-%m-%d %H:%M:%S")
|
31
|
-
formatter_debug_file = logging.Formatter(fmt_debug_file, datefmt="%Y-%m-%d %H:%M:%S")
|
32
|
-
|
33
|
-
# 创建一个handler,默认输出到控制台,如果设置为False,日志将不输出到控制台
|
34
|
-
if is_stream_handler:
|
35
|
-
stream_handler = logging.StreamHandler() # 设置渠道当中的日志格式
|
36
|
-
stream_handler.setFormatter(formatter_stream) # 将渠道与实例日志器绑定
|
37
|
-
self.addHandler(stream_handler)
|
38
|
-
|
39
|
-
# 创建一个handler,输出到文件file
|
40
|
-
if file:
|
41
|
-
file_handle = logging.FileHandler(file, mode='a', encoding='utf-8')
|
42
|
-
file_handle.setFormatter(formatter_file)
|
43
|
-
self.addHandler(file_handle)
|
44
|
-
|
45
|
-
# 创建一个handler,输出到文件file,记录详细的debug信息
|
46
|
-
if debug_file:
|
47
|
-
debug_file_handle = logging.FileHandler(debug_file, mode='a', encoding='utf-8')
|
48
|
-
debug_file_handle.setFormatter(formatter_debug_file)
|
49
|
-
self.addHandler(debug_file_handle)
|
50
|
-
|
51
|
-
# 创建一个handler,按日志文件大小分割
|
52
|
-
if max_bytes:
|
53
|
-
formatter_ = logging.Formatter(fmt='%(asctime)s %(name)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
|
54
|
-
formatter_time = handlers.RotatingFileHandler(filename='日志_分割.txt', encoding='utf-8',
|
55
|
-
maxBytes=max_bytes, backupCount=back_count)
|
56
|
-
formatter_time.setLevel(level)
|
57
|
-
formatter_time.setFormatter(formatter_)
|
58
|
-
self.addHandler(formatter_time)
|
59
|
-
|
60
|
-
# 创建一个handler,按指定周期分割日志
|
61
|
-
if when:
|
62
|
-
pass
|
63
|
-
|
64
|
-
|
65
|
-
if __name__ == '__main__':
|
66
|
-
pass
|