mdbq 3.6.16__py3-none-any.whl → 3.6.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/redis/getredis.py +2 -2
- {mdbq-3.6.16.dist-info → mdbq-3.6.17.dist-info}/METADATA +1 -1
- {mdbq-3.6.16.dist-info → mdbq-3.6.17.dist-info}/RECORD +5 -8
- mdbq/mysql/mysql_bak.py +0 -1808
- mdbq/mysql/recheck_mysql.py +0 -201
- mdbq/redis/getredis_/344/274/230/345/214/226hash.py +0 -710
- {mdbq-3.6.16.dist-info → mdbq-3.6.17.dist-info}/WHEEL +0 -0
- {mdbq-3.6.16.dist-info → mdbq-3.6.17.dist-info}/top_level.txt +0 -0
mdbq/mysql/recheck_mysql.py
DELETED
@@ -1,201 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import os
|
3
|
-
import time
|
4
|
-
import pandas as pd
|
5
|
-
import warnings
|
6
|
-
import datetime
|
7
|
-
from dateutil.relativedelta import relativedelta
|
8
|
-
from mdbq.config import get_myconf
|
9
|
-
from mdbq.mysql import mysql
|
10
|
-
from mdbq.mysql import s_query
|
11
|
-
|
12
|
-
""" 这是一个临时文件, 用来修改原始文件中大量 csv 文件中的场景名字(万相台报表) """
|
13
|
-
warnings.filterwarnings('ignore')
|
14
|
-
|
15
|
-
|
16
|
-
def id_account_rpt(id_rpt):
|
17
|
-
"""
|
18
|
-
id_rpt: 传入原二级场景id/原二级场景名字 ,返回其他键值
|
19
|
-
只可以旧推新,不可以新推旧
|
20
|
-
例如: 粉丝快 -> 人群推广, 精准人群推广 -> 人群推广
|
21
|
-
但不可以:人群推广 对应着旧的多个值,会发生问题
|
22
|
-
"""
|
23
|
-
if '="' in str(id_rpt):
|
24
|
-
id_rpt = str(id_rpt).replace('="', '')
|
25
|
-
id_rpt = str(id_rpt).replace('"', '')
|
26
|
-
_id_account_rpt = [
|
27
|
-
{
|
28
|
-
'原二级场景id': '436',
|
29
|
-
'原二级场景名字': '全站推广',
|
30
|
-
'场景id': '436',
|
31
|
-
'场景名字': '全站推广',
|
32
|
-
},
|
33
|
-
{
|
34
|
-
'原二级场景id': '407',
|
35
|
-
'原二级场景名字': '粉丝快',
|
36
|
-
'场景id': '372',
|
37
|
-
'场景名字': '人群推广',
|
38
|
-
},
|
39
|
-
{
|
40
|
-
'原二级场景id': '114',
|
41
|
-
'原二级场景名字': '货品加速',
|
42
|
-
'场景id': '376',
|
43
|
-
'场景名字': '货品运营',
|
44
|
-
},
|
45
|
-
{
|
46
|
-
'原二级场景id': '372',
|
47
|
-
'原二级场景名字': '精准人群推广',
|
48
|
-
'场景id': '372',
|
49
|
-
'场景名字': '人群推广',
|
50
|
-
},
|
51
|
-
{
|
52
|
-
'原二级场景id': '371',
|
53
|
-
'原二级场景名字': '关键词推广',
|
54
|
-
'场景id': '371',
|
55
|
-
'场景名字': '关键词推广',
|
56
|
-
},
|
57
|
-
{
|
58
|
-
'原二级场景id': '361',
|
59
|
-
'原二级场景名字': '全店智投',
|
60
|
-
'场景id': '361',
|
61
|
-
'场景名字': '全店智投',
|
62
|
-
},
|
63
|
-
{
|
64
|
-
'原二级场景id': '183',
|
65
|
-
'原二级场景名字': '超级短视频',
|
66
|
-
'场景id': '183',
|
67
|
-
'场景名字': '超级短视频',
|
68
|
-
},
|
69
|
-
{
|
70
|
-
'原二级场景id': '154',
|
71
|
-
'原二级场景名字': '活动加速',
|
72
|
-
'场景id': '154',
|
73
|
-
'场景名字': '活动加速',
|
74
|
-
},
|
75
|
-
{
|
76
|
-
'原二级场景id': '133',
|
77
|
-
'原二级场景名字': '会员快',
|
78
|
-
'场景id': '372',
|
79
|
-
'场景名字': '人群推广',
|
80
|
-
},
|
81
|
-
{
|
82
|
-
'原二级场景id': '108',
|
83
|
-
'原二级场景名字': '超级直播',
|
84
|
-
'场景id': '108',
|
85
|
-
'场景名字': '超级直播',
|
86
|
-
},
|
87
|
-
{
|
88
|
-
'原二级场景id': '105',
|
89
|
-
'原二级场景名字': '上新快',
|
90
|
-
'场景id': '105',
|
91
|
-
'场景名字': '上新快',
|
92
|
-
},
|
93
|
-
{
|
94
|
-
'原二级场景id': '78',
|
95
|
-
'原二级场景名字': '拉新快',
|
96
|
-
'场景id': '372',
|
97
|
-
'场景名字': '人群推广',
|
98
|
-
},
|
99
|
-
]
|
100
|
-
|
101
|
-
for data in _id_account_rpt:
|
102
|
-
if id_rpt == data['原二级场景id'] or id_rpt == data['原二级场景名字']:
|
103
|
-
return data
|
104
|
-
|
105
|
-
|
106
|
-
class ReCheckMysql:
|
107
|
-
def __init__(self, target_service):
|
108
|
-
username, password, host, port = get_myconf.select_config_values(target_service=target_service,
|
109
|
-
database='mysql')
|
110
|
-
self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
111
|
-
self.months = 1 # 读取近 num 个月的数据, 0 表示读取当月的数据
|
112
|
-
|
113
|
-
@staticmethod
|
114
|
-
def months_data(num=0, end_date=None):
|
115
|
-
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
116
|
-
if not end_date:
|
117
|
-
end_date = datetime.datetime.now()
|
118
|
-
start_date = end_date - relativedelta(months=num) # n 月以前的今天
|
119
|
-
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
120
|
-
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
121
|
-
|
122
|
-
def recheck_cols(self, db_name, table_name, service_name='company'):
|
123
|
-
start_date, end_date = self.months_data(num=self.months)
|
124
|
-
df = self.download.data_to_df(
|
125
|
-
db_name=db_name,
|
126
|
-
table_name=table_name,
|
127
|
-
start_date=start_date,
|
128
|
-
end_date=end_date,
|
129
|
-
projection={},
|
130
|
-
)
|
131
|
-
# df.to_csv('/Users/xigua/Downloads/test_before.csv', index=False, header=True, encoding='utf-8_sig')
|
132
|
-
# 调用 self.id_account_rpt 函数,根据场景id 修改对应的场景名字,如果没有匹配则不修改
|
133
|
-
df['场景名字'] = df.apply(lambda x: id_account_rpt(x['场景id']) if id_account_rpt(x['场景id']) else x['场景名字'], axis=1)
|
134
|
-
# df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
|
135
|
-
|
136
|
-
username, password, host, port = get_myconf.select_config_values(
|
137
|
-
target_service=service_name,
|
138
|
-
database='mysql',
|
139
|
-
)
|
140
|
-
m = mysql.MysqlUpload(
|
141
|
-
username=username,
|
142
|
-
password=password,
|
143
|
-
host=host,
|
144
|
-
port=port,
|
145
|
-
)
|
146
|
-
m.df_to_mysql(
|
147
|
-
df=df,
|
148
|
-
db_name=db_name,
|
149
|
-
table_name=table_name,
|
150
|
-
move_insert=True, # 先删除,再插入
|
151
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
152
|
-
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
153
|
-
count=None,
|
154
|
-
filename='', # 用来追踪处理进度
|
155
|
-
)
|
156
|
-
|
157
|
-
|
158
|
-
def recheck_csv():
|
159
|
-
path = ''
|
160
|
-
for root, dirs, files in os.walk(path, topdown=False):
|
161
|
-
for name in files:
|
162
|
-
if '~' in name or 'baidu' in name or 'Ds_' in name or 'xunlei' in name:
|
163
|
-
continue
|
164
|
-
if name.endswith('.csv'):
|
165
|
-
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
166
|
-
if '场景ID' not in df.columns.tolist():
|
167
|
-
continue
|
168
|
-
if '原二级场景名字' in df.columns.tolist() and '原二级场景ID' in df.columns.tolist():
|
169
|
-
df['原二级场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
|
170
|
-
df['原二级场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
|
171
|
-
if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist():
|
172
|
-
df['场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
|
173
|
-
df['场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
|
174
|
-
if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist() and '原二级场景名字' not in df.columns.tolist():
|
175
|
-
df.rename(columns={
|
176
|
-
'场景名字': '原二级场景名字',
|
177
|
-
'场景ID': '原二级场景ID',
|
178
|
-
}, inplace=True)
|
179
|
-
# 根据 id 修正 场景名字
|
180
|
-
df['原二级场景名字'] = df.apply(
|
181
|
-
lambda x: id_account_rpt(x['原二级场景ID'])['原二级场景名字'] if id_account_rpt(x['原二级场景ID']) else x['原二级场景名字'], axis=1)
|
182
|
-
# 根据原场景id获取新场景名字
|
183
|
-
df['场景名字'] = df.apply(
|
184
|
-
lambda x: id_account_rpt(x['原二级场景ID'])['场景名字'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
|
185
|
-
# 根据原场景id获取新场景id
|
186
|
-
df['场景ID'] = df.apply(
|
187
|
-
lambda x: id_account_rpt(x['原二级场景ID'])['场景id'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
|
188
|
-
print(name)
|
189
|
-
df.to_csv(os.path.join(root, name), index=False, header=True, encoding='utf-8_sig')
|
190
|
-
|
191
|
-
|
192
|
-
if __name__ == '__main__':
|
193
|
-
# r = ReCheckMysql(target_service='company')
|
194
|
-
# r.months = 100
|
195
|
-
# r.recheck_cols(
|
196
|
-
# db_name='推广数据2',
|
197
|
-
# table_name='营销场景报表',
|
198
|
-
# service_name='company',
|
199
|
-
# )
|
200
|
-
|
201
|
-
recheck_csv()
|