mdbq 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {mdbq-3.7.4.dist-info → mdbq-3.7.5.dist-info}/METADATA +1 -1
  2. mdbq-3.7.5.dist-info/RECORD +4 -0
  3. {mdbq-3.7.4.dist-info → mdbq-3.7.5.dist-info}/WHEEL +1 -1
  4. mdbq-3.7.5.dist-info/top_level.txt +1 -0
  5. mdbq/__init__.py +0 -1
  6. mdbq/__version__.py +0 -3
  7. mdbq/aggregation/__init__.py +0 -4
  8. mdbq/aggregation/aggregation_bak.py +0 -1438
  9. mdbq/aggregation/datashow_bak.py +0 -1264
  10. mdbq/aggregation/optimize_data.py +0 -76
  11. mdbq/aggregation/query_data.py +0 -3869
  12. mdbq/bdup/__init__.py +0 -5
  13. mdbq/bdup/bdup.py +0 -111
  14. mdbq/config/__init__.py +0 -4
  15. mdbq/config/default.py +0 -131
  16. mdbq/config/myconfig.py +0 -32
  17. mdbq/config/products.py +0 -159
  18. mdbq/config/set_support.py +0 -22
  19. mdbq/dataframe/__init__.py +0 -4
  20. mdbq/dataframe/converter.py +0 -107
  21. mdbq/log/__init__.py +0 -4
  22. mdbq/log/mylogger.py +0 -66
  23. mdbq/log/spider_logging.py +0 -55
  24. mdbq/mongo/__init__.py +0 -4
  25. mdbq/mongo/mongo.py +0 -729
  26. mdbq/mysql/__init__.py +0 -4
  27. mdbq/mysql/mysql.py +0 -1784
  28. mdbq/mysql/s_query.py +0 -211
  29. mdbq/mysql/year_month_day.py +0 -38
  30. mdbq/other/__init__.py +0 -4
  31. mdbq/other/download_sku_picture.py +0 -985
  32. mdbq/other/porxy.py +0 -115
  33. mdbq/other/pov_city.py +0 -405
  34. mdbq/other/sku_picture_bak.py +0 -1081
  35. mdbq/other/ua_sj.py +0 -222
  36. mdbq/pbix/__init__.py +0 -4
  37. mdbq/pbix/pbix_refresh.py +0 -70
  38. mdbq/pbix/refresh_all.py +0 -158
  39. mdbq/pbix/refresh_all_old.py +0 -177
  40. mdbq/redis/__init__.py +0 -4
  41. mdbq/redis/getredis.py +0 -642
  42. mdbq/spider/__init__.py +0 -4
  43. mdbq/spider/aikucun.py +0 -494
  44. mdbq-3.7.4.dist-info/RECORD +0 -43
  45. mdbq-3.7.4.dist-info/top_level.txt +0 -1
mdbq/bdup/__init__.py DELETED
@@ -1,5 +0,0 @@
1
-
2
-
3
-
4
- # 百度云数据处理
5
-
mdbq/bdup/bdup.py DELETED
@@ -1,111 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- import os
3
- import platform
4
- import subprocess
5
- from concurrent.futures import ThreadPoolExecutor
6
- from bypy import ByPy
7
-
8
-
9
- class BaiDu:
10
- """
11
- 如果通过调用命令行终端运行, 云端路径必须使用linux格式,不要使用windows格式,否则在windows系统里面会上传失败(无法在云端创建文件)
12
- """
13
- def __init__(self):
14
- self.local_path = None
15
- self.remote_path = None
16
- self.skip:list = []
17
- self.delete_remote_files:list = []
18
- self.bp = ByPy()
19
- self.count = 0
20
- self.total = 0
21
-
22
- def upload_dir(self, local_path, remote_path):
23
- """
24
- 上传整个文件夹,执行完后删除指定文件, 指定 self.delete_remote_files
25
- 如果通过调用命令行终端运行, 《云端路径!!》必须使用linux格式,不要使用反斜杆,否则在windows系统里面会上传失败
26
- """
27
- self.local_path = local_path
28
- self.remote_path = remote_path.replace('\\', '/')
29
- if not os.path.exists(self.local_path):
30
- print(f'{self.local_path}: 本地目录不存在,没有什么可传的')
31
- return
32
-
33
- if platform.system() == 'Windows':
34
- self.bp.upload(localpath=self.local_path, remotepath=self.remote_path.replace('\\', '/')) # 上传文件到百度云
35
- else:
36
- command = f'bypy upload "{self.local_path}" "{self.remote_path}" --on-dup skip' # 相同文件跳过
37
- try:
38
- subprocess.run(command, shell=True)
39
- except Exception as e:
40
- print(e)
41
- self.delete_files() # 最好是在内部执行删除, 避免路径异常
42
-
43
- def upload_file(self, local_path, remote_path, processes=False):
44
- """
45
- 上传文件夹,按单个文件上传,可以跳过指定文件/文件夹, 指定 self.skip
46
- 《云端路径!!》必须使用linux格式
47
- """
48
- if not isinstance(self.skip, list):
49
- raise TypeError('skip must be a list')
50
- self.skip += ['.DS_Store', '.localized', 'desktop.ini', '$RECYCLE.BIN', 'Icon']
51
- self.local_path = local_path
52
- self.remote_path = remote_path.replace('\\', '/')
53
- if not os.path.exists(self.local_path):
54
- print(f'{self.local_path}: 本地目录不存在,没有什么可传的')
55
- return
56
-
57
- local_files = os.listdir(self.local_path)
58
-
59
- local_file_list = []
60
- for file in local_files:
61
- if file in self.skip: # 跳过指定文件/文件夹
62
- continue
63
- local_p = os.path.join(self.local_path, file)
64
- if os.path.isfile(local_p):
65
- rt_path = os.path.join(self.remote_path, file).replace('\\', '/')
66
- self.total += 1
67
- local_file_list.append({local_p: rt_path})
68
- elif os.path.isdir(local_p):
69
- for root, dirs, files in os.walk(local_p, topdown=False):
70
- for name in files:
71
- if name in self.skip: # 从子文件夹内跳过指定文件
72
- continue
73
- lc_path = os.path.join(root, name)
74
- rt_path = lc_path.replace(self.local_path, self.remote_path).replace('\\', '/')
75
- self.total += 1
76
- local_file_list.append({lc_path: rt_path})
77
- if processes:
78
- # 不指定 max_workers 参数,默认值是 os.cpu_count() * 5
79
- with ThreadPoolExecutor() as executor:
80
- executor.map(self.up_one_file, local_file_list)
81
- else:
82
- for item in local_file_list:
83
- self.up_one_file(file_dict=item)
84
-
85
- def up_one_file(self, file_dict:dict):
86
- if not isinstance(file_dict, dict):
87
- raise TypeError('file_dict must be a dict')
88
- for k, v in file_dict.items():
89
- self.count += 1
90
- print(f'上传: {self.count}/{self.total} {k}')
91
- self.bp.upload(localpath=k, remotepath=v) # 上传文件到百度云
92
-
93
- def delete_files(self):
94
- """ 移除云端文件,位于 self.remote_path 文件夹下的子文件 """
95
- self.delete_remote_files += ['.DS_Store', '.localized', 'desktop.ini', '$RECYCLE.BIN', 'Icon']
96
- for delete_file in self.delete_remote_files:
97
- self.bp.remove(remotepath=f'{self.remote_path.replace('\\', '/')}/{delete_file}') # 移除文件
98
-
99
- def download_dir(self, local_path, remote_path):
100
- """ 下载文件夹到本地 """
101
- self.local_path = local_path
102
- self.remote_path = remote_path.replace('\\', '/')
103
- if not os.path.exists(self.local_path):
104
- os.mkdir(self.local_path)
105
-
106
- self.bp.download(localpath=f'{self.local_path}', remotepath=f'{self.remote_path.replace('\\', '/')}')
107
-
108
-
109
- if __name__ == '__main__':
110
- bp = ByPy()
111
- bp.list()
mdbq/config/__init__.py DELETED
@@ -1,4 +0,0 @@
1
-
2
-
3
-
4
- # 配置文件
mdbq/config/default.py DELETED
@@ -1,131 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- import os
3
- import json
4
- import platform
5
- import getpass
6
- import socket
7
- import logging
8
- from mdbq.mysql import mysql
9
-
10
- if platform.system() == 'Windows':
11
- support_path = r'C:\数据中心2\support'
12
- elif platform.system() == 'Darwin':
13
- support_path = f'/Users/{getpass.getuser()}/数据中心2/support'
14
- else:
15
- support_path = '数据中心2/support' # 没有用, 可以删
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- def get_mysql_engine(platform='Windows', hostname='xigua_lx', sql='mysql', local='remoto', config_file=None):
20
- if not config_file:
21
- config_file = os.path.join(support_path, 'my_config.txt')
22
- if not os.path.isfile(config_file):
23
- print(f'缺少配置文件,无法读取配置文件: {config_file}')
24
- return None
25
-
26
- if socket.gethostname() == 'xigua_lx':
27
- local = 'local'
28
-
29
- with open(config_file, 'r', encoding='utf-8') as f:
30
- conf = json.load(f)
31
- conf_data = conf[platform][hostname][sql][local]
32
- username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
33
- _engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
34
- return _engine, username, password, host, port
35
-
36
-
37
- def return_host(conf_data):
38
- """
39
- 从配置文件数据中获取: username, password, host, port
40
- :param conf_data:
41
- :return:
42
- """
43
- return conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
44
-
45
-
46
- def return_one_engine(conf_data):
47
- """
48
- 返回一个 engine
49
- """
50
- username, password, host, port = return_host(conf_data)
51
- return mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
52
-
53
-
54
- def get_hostname(platform, hostname, sql, local):
55
- """
56
- 返回一个主机的: username, password, host, port
57
- """
58
- config_file = os.path.join(support_path, 'my_config.txt')
59
- with open(config_file, 'r', encoding='utf-8') as f:
60
- conf = json.load(f)
61
- conf_data = conf[platform][hostname][sql][local]
62
- return return_host(conf_data)
63
-
64
-
65
- def get_engine_bak():
66
- """
67
- 要删除
68
- """
69
- if not os.path.isdir(support_path):
70
- print(f'缺少配置文件,无法读取配置文件: {file}')
71
- return
72
- config_file = os.path.join(support_path, 'my_config.txt')
73
-
74
- with open(config_file, 'r', encoding='utf-8') as f:
75
- conf = json.load(f)
76
-
77
- if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
78
- conf_data = conf['Windows']['xigua_lx']['mysql']['remoto']
79
- m_engine = return_one_engine(conf_data=conf_data)
80
- conf_data = conf['Windows']['company']['mysql']['local']
81
- company_engine = return_one_engine(conf_data=conf_data)
82
- username, password, host, port = return_host(conf_data) # 顺序不能乱
83
- conf_data = conf['Windows']['xigua_ts']['mysql']['remoto']
84
- ts_engine = return_one_engine(conf_data=conf_data)
85
- elif socket.gethostname() == 'xigua_lx' or socket.gethostname() == 'xigua1' or socket.gethostname() == 'MacBookPro':
86
- conf_data = conf['Windows']['xigua_lx']['mysql']['local']
87
- m_engine = return_one_engine(conf_data=conf_data)
88
- username, password, host, port = return_host(conf_data) # 顺序不能乱
89
- conf_data = conf['Windows']['company']['mysql']['remoto']
90
- company_engine = return_one_engine(conf_data=conf_data)
91
- conf_data = conf['Windows']['xigua_ts']['mysql']['remoto']
92
- ts_engine = return_one_engine(conf_data=conf_data)
93
-
94
- else:
95
- conf_data = conf['Windows']['xigua_lx']['mysql']['remoto']
96
- m_engine = return_one_engine(conf_data=conf_data)
97
- username, password, host, port = return_host(conf_data) # 顺序不能乱
98
- conf_data = conf['Windows']['company']['mysql']['remoto']
99
- company_engine = return_one_engine(conf_data=conf_data)
100
- conf_data = conf['Windows']['xigua_ts']['mysql']['remoto']
101
- ts_engine = return_one_engine(conf_data=conf_data)
102
-
103
- return m_engine, company_engine, ts_engine, (username, password, host, port)
104
-
105
-
106
- def write_back(datas):
107
- """ 将数据写回本地 """
108
- if not os.path.isdir(support_path):
109
- print(f'缺少配置文件,无法读取配置文件: {file}')
110
- return
111
- file = os.path.join(support_path, 'my_config.txt')
112
- with open(file, 'w+', encoding='utf-8') as f:
113
- json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
114
-
115
-
116
-
117
- if __name__ == '__main__':
118
- pass
119
- username, password, host, port = get_hostname(
120
- platform='Windows',
121
- hostname='xigua_lx',
122
- sql='mysql',
123
- local='remoto'
124
- )
125
- res = get_mysql_engine(
126
- platform='Windows',
127
- hostname='xigua_lx',
128
- sql='mysql',
129
- local='remoto'
130
- )
131
- print(res)
mdbq/config/myconfig.py DELETED
@@ -1,32 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- import os
3
- import json
4
- from mdbq.config import set_support
5
- """
6
- 用来读取本地配置文件
7
- """
8
-
9
-
10
- def main():
11
- support_path = set_support.SetSupport(dirname='support').dirname
12
- file = os.path.join(support_path, 'my_config.txt')
13
- if not os.path.isfile(file):
14
- print(f'缺少配置文件,无法读取配置文件: {file}')
15
- return
16
- with open(file, 'r', encoding='utf-8') as f:
17
- config_datas = json.load(f)
18
- return config_datas
19
-
20
-
21
- def write_back(datas):
22
- """ 将数据写回本地 """
23
- support_path = set_support.SetSupport(dirname='support').dirname
24
- file = os.path.join(support_path, 'my_config.txt')
25
- with open(file, 'w+', encoding='utf-8') as f:
26
- json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
27
-
28
-
29
-
30
- if __name__ == '__main__':
31
- d = main()
32
- print(d)
mdbq/config/products.py DELETED
@@ -1,159 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- import json
3
- import os
4
- import platform
5
- import getpass
6
- import socket
7
- import pandas as pd
8
- from mdbq.mysql import mysql
9
- from mdbq.config import default
10
- from numpy.ma.core import product
11
-
12
- """
13
- 天猫货品年份基准对照
14
- 用于聚合数据,通过此数据表进一步可确定商品上架年月
15
- """
16
- m_engine, username, password, host, port = default.get_mysql_engine(platform='Windows', hostname='xigua_lx', sql='mysql', local='remoto', config_file=None)
17
- if not username:
18
- print(f'找不到主机1:')
19
-
20
-
21
- class Products:
22
- def __init__(self):
23
- self.datas = []
24
-
25
- def update_my_datas(self):
26
- my_datas = [
27
- {
28
- '平台': '天猫', '商品id': '848929365673', '上市年份': '2024年11月'
29
- },
30
- {
31
- '平台': '天猫', '商品id': '840499705810', '上市年份': '2024年10月'
32
- },
33
- {
34
- '平台': '天猫', '商品id': '830789689032', '上市年份': '2024年9月'
35
- },
36
- {
37
- '平台': '天猫', '商品id': '822020840000', '上市年份': '2024年8月'
38
- },
39
- {
40
- '平台': '天猫', '商品id': '811000000000', '上市年份': '2024年7月'
41
- },
42
- {
43
- '平台': '天猫', '商品id': '800000000000', '上市年份': '2024年6月'
44
- },
45
- {
46
- '平台': '天猫', '商品id': '791359643000', '上市年份': '2024年5月'
47
- },
48
- {
49
- '平台': '天猫', '商品id': '778971448000', '上市年份': '2024年4月'
50
- },
51
- {
52
- '平台': '天猫', '商品id': '770576016820', '上市年份': '2024年3月'
53
- },
54
- {
55
- '平台': '天猫', '商品id': '766115058400', '上市年份': '2024年2月'
56
- },
57
- {
58
- '平台': '天猫', '商品id': '759478591187', '上市年份': '2024年1月'
59
- },
60
- {
61
- '平台': '天猫', '商品id': '752770183000', '上市年份': '2023年12月'
62
- },
63
- {
64
- '平台': '天猫', '商品id': '745123890000', '上市年份': '2023年11月'
65
- },
66
- {
67
- '平台': '天猫', '商品id': '741000000000', '上市年份': '2023年10月'
68
- },
69
- {
70
- '平台': '天猫', '商品id': '736841920000', '上市年份': '2023年9月'
71
- },
72
- {
73
- '平台': '天猫', '商品id': '730800000000', '上市年份': '2023年8月'
74
- },
75
- {
76
- '平台': '天猫', '商品id': '726939636835', '上市年份': '2023年7月'
77
- },
78
- {
79
- '平台': '天猫', '商品id': '721366048631', '上市年份': '2023年6月'
80
- },
81
- {
82
- '平台': '天猫', '商品id': '716130443004', '上市年份': '2023年5月'
83
- },
84
- {
85
- '平台': '天猫', '商品id': '709824308589', '上市年份': '2023年4月'
86
- },
87
- {
88
- '平台': '天猫', '商品id': '705440027804', '上市年份': '2023年3月'
89
- },
90
- {
91
- '平台': '天猫', '商品id': '701096067973', '上市年份': '2023年2月'
92
- },
93
- {
94
- '平台': '天猫', '商品id': '696017000000', '上市年份': '2023年1月'
95
- },
96
- {
97
- '平台': '天猫', '商品id': '666510000000', '上市年份': '2022年货品'
98
- },
99
- {
100
- '平台': '天猫', '商品id': '636010000000', '上市年份': '2021年货品'
101
- },
102
- {
103
- '平台': '天猫', '商品id': '610485872286', '上市年份': '2020年货品'
104
- },
105
- {
106
- '平台': '天猫', '商品id': '585066000000', '上市年份': '2019年货品'
107
- },
108
- {
109
- '平台': '天猫', '商品id': '563237000000', '上市年份': '2018年货品'
110
- },
111
- {
112
- '平台': '天猫', '商品id': '100', '上市年份': '历史悠久'
113
- },
114
- ]
115
- self.datas += my_datas
116
-
117
-
118
- def to_mysql(self):
119
- self.update_my_datas()
120
- df = pd.DataFrame(self.datas)
121
- m_engine = mysql.MysqlUpload(
122
- username=username,
123
- password=password,
124
- host=host,
125
- port=port,
126
- )
127
- m_engine.insert_many_dict(
128
- db_name='属性设置3',
129
- table_name='货品年份基准',
130
- dict_data_list=df.to_dict(orient='records'),
131
- # icm_update=['日期', '店铺名称', '宝贝id'], # 唯一组合键
132
- unique_main_key=['商品id'],
133
- set_typ={
134
- '商品id': 'bigint',
135
- '平台': 'varchar(100)',
136
- '上市年份': 'varchar(100)',
137
- },
138
- )
139
-
140
- def market_date(self, product_id: int):
141
- try:
142
- product_id = int(product_id)
143
- except:
144
- return
145
- self.update_my_datas()
146
- market_date = [item['上市年份'] for item in self.datas if product_id > int(item['商品id'])]
147
- if market_date:
148
- return market_date[0] # 返回上市年份
149
-
150
-
151
- def main():
152
- pass
153
-
154
-
155
- if __name__ == '__main__':
156
- product_id = '696017020186'
157
- p = Products()
158
- year = p.market_date(product_id=product_id)
159
- print(f'{product_id}: {year}')
@@ -1,22 +0,0 @@
1
- # -*- coding: UTF-8 –*-
2
- import platform
3
- import getpass
4
-
5
- """
6
- 专门用来设置 support 文件夹路径
7
- support 文件夹包含很多配置类文件,是程序必不可少的依赖
8
- """
9
- class SetSupport:
10
- def __init__(self, dirname):
11
- self.dirname = dirname
12
- if platform.system() == 'Windows':
13
- self.dirname = f'C:\\数据中心2\\support'
14
- elif platform.system() == 'Darwin':
15
- self.dirname = f'/Users/{getpass.getuser()}/数据中心2/support'
16
- else:
17
- self.dirname = '数据中心2/support' # 没有用, 可以删
18
-
19
-
20
- if __name__ == '__main__':
21
- s = SetSupport(dirname='support').dirname
22
- print(s)
@@ -1,4 +0,0 @@
1
-
2
-
3
-
4
- # dataframe 优化
@@ -1,107 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- import pandas as pd
3
- import numpy as np
4
- from decimal import Decimal
5
- import re
6
-
7
-
8
- class DataFrameConverter(object):
9
- def __init__(self, df=pd.DataFrame({})):
10
- self.df = df
11
-
12
- def convert_df_cols(self, df=pd.DataFrame({})):
13
- """
14
- 清理 dataframe 非法值
15
- 对数据类型进行转换(尝试将 object 类型转为 int 或 float)
16
- """
17
- if len(df) == 0:
18
- df = self.df
19
- if len(df) == 0:
20
- return
21
-
22
- def find_longest_decimal_value(number_list):
23
- # 取列表中小数位数最长的值
24
- longest_value = None
25
- max_decimals = 0
26
- for num in number_list:
27
- decimal_places = len(str(num).split('.')[1])
28
- if decimal_places > max_decimals:
29
- max_decimals = decimal_places
30
- longest_value = num
31
- return longest_value
32
-
33
- # dtypes = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
34
- df.replace([np.inf, -np.inf], '0', inplace=True) # 清理一些非法值
35
- # df.replace(to_replace=['\\N', '-', '--', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
36
- df.replace(to_replace=['\\N', '', 'nan', 'NAN'], value='0', regex=False, inplace=True) # 替换掉特殊字符
37
- # df.replace(to_replace=[','], value='', regex=True, inplace=True)
38
- df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
39
- df.replace(to_replace=['"'], value='', regex=True, inplace=True)
40
- cols = df.columns.tolist()
41
-
42
- df.reset_index(inplace=True, drop=True) # 重置索引,避免下面的 df.loc[0, col] 会出错
43
-
44
- for col in cols:
45
- if col.lower() == 'id':
46
- df.pop(col) # 等待插入的 df 不能包含 id 列,否则可能跟现有 id 主键冲突
47
- continue
48
-
49
- try:
50
- # 百分比在某些数据库中不兼容, 转换百分比为小数, # 转百分比的列不能含有中文或特殊字符
51
- df[col] = df[col].apply(
52
- lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%$', str(x)) else x)
53
- except Exception as e:
54
- print(f'留意错误信息: 位于列 -> {col} -> {e}')
55
-
56
- if (col.endswith('占比') or col.endswith('率') or col.endswith('同比')
57
- or col.endswith('环比') or col.lower().endswith('roi')
58
- or col.endswith('产出比')):
59
- df = df.astype({col: 'float64'}, errors='raise')
60
-
61
- # 尝试转换合适的数据类型
62
- if df[col].dtype == 'object':
63
- # "_"符号会被错误识别
64
- try:
65
- # 不能直接使用 int() ,对于大数,可能转为uint64,导致数据库入库可能异常
66
- df[col] = df[col].apply(
67
- lambda x: np.int64(str(x)) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
68
- # df[col] = df[col].apply(lambda x: int(x) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
69
- except:
70
- pass
71
- if df[col].dtype == 'object':
72
- try:
73
- df[col] = df[col].apply(lambda x: float(x) if '.' in str(x) and '_' not in str(x) else x)
74
- except:
75
- pass
76
- if df[col].dtype == 'float' or df[col].dtype == 'float64': # 对于小数类型, 保留 6 位小数
77
- df[col] = df[col].fillna(0.0).apply(lambda x: round(x, 6))
78
-
79
- # 转换日期样式的列为日期类型
80
- value = df.loc[0, col]
81
- if value:
82
- res = re.match(r'\d{4}-\d{2}-\d{2}|\d{4}-\d{2}-\d{2} |\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
83
- r'|\d{4}/\d{1}/\d{1}|\d{4}/\d{1}/\d{2}|\d{4}/\d{2}/\d{1}|\d{4}/\d{2}/\d{2}', str(value))
84
- if res:
85
- try:
86
- df[col] = df[col].apply(lambda x: pd.to_datetime(x))
87
- except:
88
- pass
89
- new_col = col.lower()
90
- new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
91
- new_col = new_col.replace(')', '')
92
- new_col = re.sub(r'_{2,}', '_', new_col)
93
- new_col = re.sub(r'_+$', '', new_col)
94
- df.rename(columns={col: new_col}, inplace=True)
95
- df.fillna(0, inplace=True)
96
- return df
97
-
98
-
99
- if __name__ == '__main__':
100
- # df = pd.DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'])
101
- # converter = DataFrameConverter()
102
- # df = converter.convert_df_cols(df)
103
- # print(df['a'].dtype)
104
- # print(df)
105
- pattern = '1540%'
106
- pattern = re.findall(r'^\d+\.?\d*%$', pattern)
107
- print(pattern)
mdbq/log/__init__.py DELETED
@@ -1,4 +0,0 @@
1
-
2
-
3
-
4
- # 日志器
mdbq/log/mylogger.py DELETED
@@ -1,66 +0,0 @@
1
- import logging
2
- from logging import Logger
3
- from logging import handlers
4
-
5
-
6
- class MyLogger(Logger):
7
- """
8
- 从Logger类中继承,实例化一个日志器
9
- """
10
- def __init__(self, logger_name, level='INFO', is_stream_handler=True, file=None, debug_file=None,
11
- max_bytes=False, back_count=10, when=None):
12
- """
13
- :param logger_name: 日志器的名字
14
- :param level: 日志级别 # DEBUG INFO WARNING ERROR CRITICAL
15
- :param is_stream_handler: 默认True输出到控制台
16
- :param file: 传入文件名,默认None不输出到 file
17
- param debug_file: 传入文件名,记录详细debug时使用,默认None不输出, 尽量不要和file同时使用,会重复写
18
- :param when: 按周期分割日志,默认不分割,除非指定其他值
19
- :param max_bytes: 按文件大小分割日志
20
- :param back_count: 保留日志的数量, 值从0开始
21
- """
22
- # 设置日志器名字、级别
23
- super().__init__(logger_name, level)
24
-
25
- # 定义日志格式, 使用Formatter类实例化一个日志类
26
- fmt_stream = "%(asctime)s %(levelname)s %(name)s: %(message)s"
27
- fmt_file = "%(asctime)s %(name)s: %(message)s"
28
- fmt_debug_file = "%(asctime)s %(levelname)s %(name)s %(funcName)s: %(message)s"
29
- formatter_stream = logging.Formatter(fmt_stream, datefmt="%Y-%m-%d %H:%M:%S")
30
- formatter_file = logging.Formatter(fmt_file, datefmt="%Y-%m-%d %H:%M:%S")
31
- formatter_debug_file = logging.Formatter(fmt_debug_file, datefmt="%Y-%m-%d %H:%M:%S")
32
-
33
- # 创建一个handler,默认输出到控制台,如果设置为False,日志将不输出到控制台
34
- if is_stream_handler:
35
- stream_handler = logging.StreamHandler() # 设置渠道当中的日志格式
36
- stream_handler.setFormatter(formatter_stream) # 将渠道与实例日志器绑定
37
- self.addHandler(stream_handler)
38
-
39
- # 创建一个handler,输出到文件file
40
- if file:
41
- file_handle = logging.FileHandler(file, mode='a', encoding='utf-8')
42
- file_handle.setFormatter(formatter_file)
43
- self.addHandler(file_handle)
44
-
45
- # 创建一个handler,输出到文件file,记录详细的debug信息
46
- if debug_file:
47
- debug_file_handle = logging.FileHandler(debug_file, mode='a', encoding='utf-8')
48
- debug_file_handle.setFormatter(formatter_debug_file)
49
- self.addHandler(debug_file_handle)
50
-
51
- # 创建一个handler,按日志文件大小分割
52
- if max_bytes:
53
- formatter_ = logging.Formatter(fmt='%(asctime)s %(name)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
54
- formatter_time = handlers.RotatingFileHandler(filename='日志_分割.txt', encoding='utf-8',
55
- maxBytes=max_bytes, backupCount=back_count)
56
- formatter_time.setLevel(level)
57
- formatter_time.setFormatter(formatter_)
58
- self.addHandler(formatter_time)
59
-
60
- # 创建一个handler,按指定周期分割日志
61
- if when:
62
- pass
63
-
64
-
65
- if __name__ == '__main__':
66
- pass