mdbq 2.6.8__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +4 -4
- mdbq/aggregation/query_data.py +3 -3
- mdbq/clean/clean_upload.py +3 -3
- mdbq/company/copysh.py +2 -1
- mdbq/mysql/recheck_mysql.py +201 -0
- {mdbq-2.6.8.dist-info → mdbq-2.7.0.dist-info}/METADATA +1 -1
- {mdbq-2.6.8.dist-info → mdbq-2.7.0.dist-info}/RECORD +9 -8
- {mdbq-2.6.8.dist-info → mdbq-2.7.0.dist-info}/WHEEL +0 -0
- {mdbq-2.6.8.dist-info → mdbq-2.7.0.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1118,7 +1118,7 @@ class DatabaseUpdate:
|
|
1118
1118
|
def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, target_service='company'):
|
1119
1119
|
""" 上传一个文件夹到 mysql 或者 mongodb 数据库 """
|
1120
1120
|
if not os.path.isdir(path):
|
1121
|
-
print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir:
|
1121
|
+
print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 路径不存在或错误: {path}')
|
1122
1122
|
return
|
1123
1123
|
|
1124
1124
|
if dbs['mongodb']:
|
@@ -1326,10 +1326,10 @@ if __name__ == '__main__':
|
|
1326
1326
|
# )
|
1327
1327
|
|
1328
1328
|
# 上传一个目录到指定数据库
|
1329
|
-
db_name = '
|
1330
|
-
table_name = '
|
1329
|
+
db_name = '推广数据2'
|
1330
|
+
table_name = '营销场景报表'
|
1331
1331
|
upload_dir(
|
1332
|
-
path='/Users/xigua/数据中心/原始文件3
|
1332
|
+
path='/Users/xigua/数据中心/原始文件3/天猫推广报表/营销场景报表',
|
1333
1333
|
db_name=db_name,
|
1334
1334
|
collection_name=table_name,
|
1335
1335
|
dbs={'mysql': True, 'mongodb': False},
|
mdbq/aggregation/query_data.py
CHANGED
@@ -2316,7 +2316,7 @@ def main():
|
|
2316
2316
|
if __name__ == '__main__':
|
2317
2317
|
data_aggregation(
|
2318
2318
|
service_databases=[{'company': 'mysql'}],
|
2319
|
-
months=
|
2320
|
-
is_juhe=
|
2321
|
-
# less_dict=['
|
2319
|
+
months=12,
|
2320
|
+
is_juhe=True, # 立即启动对聚合数据的清理工作
|
2321
|
+
# less_dict=['营销场景报表'], # 单独聚合某一个数据库
|
2322
2322
|
)
|
mdbq/clean/clean_upload.py
CHANGED
@@ -468,7 +468,7 @@ class DataClean:
|
|
468
468
|
{
|
469
469
|
'文件简称': 'order',
|
470
470
|
'数据库名': '生意经2',
|
471
|
-
'集合名称': '
|
471
|
+
'集合名称': '订单数据',
|
472
472
|
},
|
473
473
|
{
|
474
474
|
'文件简称': '省份城市分析',
|
@@ -609,7 +609,7 @@ class DataClean:
|
|
609
609
|
{
|
610
610
|
'文件简称': 'order',
|
611
611
|
'数据库名': '淘宝_生意经3',
|
612
|
-
'集合名称': '
|
612
|
+
'集合名称': '订单数据',
|
613
613
|
},
|
614
614
|
{
|
615
615
|
'文件简称': '省份城市分析',
|
@@ -1587,7 +1587,7 @@ def main(service_databases=None, is_mysql=False):
|
|
1587
1587
|
cn.dmp_tm(is_except=['except']) # 达摩盘
|
1588
1588
|
cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
|
1589
1589
|
cn.syj_reports_tm(is_except=['except']) # 天猫生意经
|
1590
|
-
|
1590
|
+
""" 淘宝生意经,不可以和天猫同时运行 """
|
1591
1591
|
# cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
|
1592
1592
|
cn.jd_reports(is_except=['except']) # 清洗京东报表
|
1593
1593
|
cn.sp_scene_clean(is_except=['except']) # 商品素材
|
mdbq/company/copysh.py
CHANGED
@@ -321,7 +321,7 @@ def op_data(days: int =100):
|
|
321
321
|
# 清理所有非聚合数据的库
|
322
322
|
optimize_data.op_data(
|
323
323
|
db_name_lists=[
|
324
|
-
'京东数据
|
324
|
+
'京东数据3',
|
325
325
|
'属性设置3',
|
326
326
|
'推广数据2',
|
327
327
|
'推广数据_淘宝店',
|
@@ -367,6 +367,7 @@ def main():
|
|
367
367
|
op_data(days=100)
|
368
368
|
|
369
369
|
t.sleep_minutes = 5 # 同步前休眠时间
|
370
|
+
# 4. 同步共享文件
|
370
371
|
t.tb_file()
|
371
372
|
time.sleep(600) # 检测间隔
|
372
373
|
|
@@ -0,0 +1,201 @@
|
|
1
|
+
# -*- coding: UTF-8 –*-
|
2
|
+
import os
|
3
|
+
import time
|
4
|
+
import pandas as pd
|
5
|
+
import warnings
|
6
|
+
import datetime
|
7
|
+
from dateutil.relativedelta import relativedelta
|
8
|
+
from mdbq.config import get_myconf
|
9
|
+
from mdbq.mysql import mysql
|
10
|
+
from mdbq.mysql import s_query
|
11
|
+
|
12
|
+
""" 这是一个临时文件, 用来修改原始文件中大量 csv 文件中的场景名字(万相台报表) """
|
13
|
+
warnings.filterwarnings('ignore')
|
14
|
+
|
15
|
+
|
16
|
+
def id_account_rpt(id_rpt):
|
17
|
+
"""
|
18
|
+
id_rpt: 传入原二级场景id/原二级场景名字 ,返回其他键值
|
19
|
+
只可以旧推新,不可以新推旧
|
20
|
+
例如: 粉丝快 -> 人群推广, 精准人群推广 -> 人群推广
|
21
|
+
但不可以:人群推广 对应着旧的多个值,会发生问题
|
22
|
+
"""
|
23
|
+
if '="' in str(id_rpt):
|
24
|
+
id_rpt = str(id_rpt).replace('="', '')
|
25
|
+
id_rpt = str(id_rpt).replace('"', '')
|
26
|
+
_id_account_rpt = [
|
27
|
+
{
|
28
|
+
'原二级场景id': '436',
|
29
|
+
'原二级场景名字': '全站推广',
|
30
|
+
'场景id': '436',
|
31
|
+
'场景名字': '全站推广',
|
32
|
+
},
|
33
|
+
{
|
34
|
+
'原二级场景id': '407',
|
35
|
+
'原二级场景名字': '粉丝快',
|
36
|
+
'场景id': '372',
|
37
|
+
'场景名字': '人群推广',
|
38
|
+
},
|
39
|
+
{
|
40
|
+
'原二级场景id': '114',
|
41
|
+
'原二级场景名字': '货品加速',
|
42
|
+
'场景id': '376',
|
43
|
+
'场景名字': '货品运营',
|
44
|
+
},
|
45
|
+
{
|
46
|
+
'原二级场景id': '372',
|
47
|
+
'原二级场景名字': '精准人群推广',
|
48
|
+
'场景id': '372',
|
49
|
+
'场景名字': '人群推广',
|
50
|
+
},
|
51
|
+
{
|
52
|
+
'原二级场景id': '371',
|
53
|
+
'原二级场景名字': '关键词推广',
|
54
|
+
'场景id': '371',
|
55
|
+
'场景名字': '关键词推广',
|
56
|
+
},
|
57
|
+
{
|
58
|
+
'原二级场景id': '361',
|
59
|
+
'原二级场景名字': '全店智投',
|
60
|
+
'场景id': '361',
|
61
|
+
'场景名字': '全店智投',
|
62
|
+
},
|
63
|
+
{
|
64
|
+
'原二级场景id': '183',
|
65
|
+
'原二级场景名字': '超级短视频',
|
66
|
+
'场景id': '183',
|
67
|
+
'场景名字': '超级短视频',
|
68
|
+
},
|
69
|
+
{
|
70
|
+
'原二级场景id': '154',
|
71
|
+
'原二级场景名字': '活动加速',
|
72
|
+
'场景id': '154',
|
73
|
+
'场景名字': '活动加速',
|
74
|
+
},
|
75
|
+
{
|
76
|
+
'原二级场景id': '133',
|
77
|
+
'原二级场景名字': '会员快',
|
78
|
+
'场景id': '372',
|
79
|
+
'场景名字': '人群推广',
|
80
|
+
},
|
81
|
+
{
|
82
|
+
'原二级场景id': '108',
|
83
|
+
'原二级场景名字': '超级直播',
|
84
|
+
'场景id': '108',
|
85
|
+
'场景名字': '超级直播',
|
86
|
+
},
|
87
|
+
{
|
88
|
+
'原二级场景id': '105',
|
89
|
+
'原二级场景名字': '上新快',
|
90
|
+
'场景id': '105',
|
91
|
+
'场景名字': '上新快',
|
92
|
+
},
|
93
|
+
{
|
94
|
+
'原二级场景id': '78',
|
95
|
+
'原二级场景名字': '拉新快',
|
96
|
+
'场景id': '372',
|
97
|
+
'场景名字': '人群推广',
|
98
|
+
},
|
99
|
+
]
|
100
|
+
|
101
|
+
for data in _id_account_rpt:
|
102
|
+
if id_rpt == data['原二级场景id'] or id_rpt == data['原二级场景名字']:
|
103
|
+
return data
|
104
|
+
|
105
|
+
|
106
|
+
class ReCheckMysql:
|
107
|
+
def __init__(self, target_service):
|
108
|
+
username, password, host, port = get_myconf.select_config_values(target_service=target_service,
|
109
|
+
database='mysql')
|
110
|
+
self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
111
|
+
self.months = 1 # 读取近 num 个月的数据, 0 表示读取当月的数据
|
112
|
+
|
113
|
+
@staticmethod
|
114
|
+
def months_data(num=0, end_date=None):
|
115
|
+
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
116
|
+
if not end_date:
|
117
|
+
end_date = datetime.datetime.now()
|
118
|
+
start_date = end_date - relativedelta(months=num) # n 月以前的今天
|
119
|
+
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
120
|
+
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
121
|
+
|
122
|
+
def recheck_cols(self, db_name, table_name, service_name='company'):
|
123
|
+
start_date, end_date = self.months_data(num=self.months)
|
124
|
+
df = self.download.data_to_df(
|
125
|
+
db_name=db_name,
|
126
|
+
table_name=table_name,
|
127
|
+
start_date=start_date,
|
128
|
+
end_date=end_date,
|
129
|
+
projection={},
|
130
|
+
)
|
131
|
+
# df.to_csv('/Users/xigua/Downloads/test_before.csv', index=False, header=True, encoding='utf-8_sig')
|
132
|
+
# 调用 self.id_account_rpt 函数,根据场景id 修改对应的场景名字,如果没有匹配则不修改
|
133
|
+
df['场景名字'] = df.apply(lambda x: id_account_rpt(x['场景id']) if id_account_rpt(x['场景id']) else x['场景名字'], axis=1)
|
134
|
+
# df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
|
135
|
+
|
136
|
+
username, password, host, port = get_myconf.select_config_values(
|
137
|
+
target_service=service_name,
|
138
|
+
database='mysql',
|
139
|
+
)
|
140
|
+
m = mysql.MysqlUpload(
|
141
|
+
username=username,
|
142
|
+
password=password,
|
143
|
+
host=host,
|
144
|
+
port=port,
|
145
|
+
)
|
146
|
+
m.df_to_mysql(
|
147
|
+
df=df,
|
148
|
+
db_name=db_name,
|
149
|
+
table_name=table_name,
|
150
|
+
move_insert=True, # 先删除,再插入
|
151
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
152
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
153
|
+
filename='', # 用来追踪处理进度
|
154
|
+
service_database={'company': 'mysql'}, # 字典
|
155
|
+
)
|
156
|
+
|
157
|
+
|
158
|
+
def recheck_csv():
|
159
|
+
path = '/Users/xigua/数据中心/原始文件2/推广报表34324234'
|
160
|
+
for root, dirs, files in os.walk(path, topdown=False):
|
161
|
+
for name in files:
|
162
|
+
if '~' in name or 'baidu' in name or 'Ds_' in name or 'xunlei' in name:
|
163
|
+
continue
|
164
|
+
if name.endswith('.csv'):
|
165
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
166
|
+
if '场景ID' not in df.columns.tolist():
|
167
|
+
continue
|
168
|
+
if '原二级场景名字' in df.columns.tolist() and '原二级场景ID' in df.columns.tolist():
|
169
|
+
df['原二级场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
|
170
|
+
df['原二级场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
|
171
|
+
if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist():
|
172
|
+
df['场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
|
173
|
+
df['场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
|
174
|
+
if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist() and '原二级场景名字' not in df.columns.tolist():
|
175
|
+
df.rename(columns={
|
176
|
+
'场景名字': '原二级场景名字',
|
177
|
+
'场景ID': '原二级场景ID',
|
178
|
+
}, inplace=True)
|
179
|
+
# 根据 id 修正 场景名字
|
180
|
+
df['原二级场景名字'] = df.apply(
|
181
|
+
lambda x: id_account_rpt(x['原二级场景ID'])['原二级场景名字'] if id_account_rpt(x['原二级场景ID']) else x['原二级场景名字'], axis=1)
|
182
|
+
# 根据原场景id获取新场景名字
|
183
|
+
df['场景名字'] = df.apply(
|
184
|
+
lambda x: id_account_rpt(x['原二级场景ID'])['场景名字'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
|
185
|
+
# 根据原场景id获取新场景id
|
186
|
+
df['场景ID'] = df.apply(
|
187
|
+
lambda x: id_account_rpt(x['原二级场景ID'])['场景id'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
|
188
|
+
print(name)
|
189
|
+
df.to_csv(os.path.join(root, name), index=False, header=True, encoding='utf-8_sig')
|
190
|
+
|
191
|
+
|
192
|
+
if __name__ == '__main__':
|
193
|
+
# r = ReCheckMysql(target_service='company')
|
194
|
+
# r.months = 100
|
195
|
+
# r.recheck_cols(
|
196
|
+
# db_name='推广数据2',
|
197
|
+
# table_name='营销场景报表',
|
198
|
+
# service_name='company',
|
199
|
+
# )
|
200
|
+
|
201
|
+
recheck_csv()
|
@@ -1,18 +1,18 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=v_5VM-InaDDvDNjAy_b8xsc38-vf78GkqoXjoe6MZ8U,76569
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=gqvKDgg3jrfCcI1VudrnQLJgKHUThZVTqS1zOQ5wgMk,102766
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=
|
12
|
+
mdbq/clean/clean_upload.py,sha256=X5WcWm7kkGZDMpk8p0vMq-SFIcrSL1DmVCYWbxYmLVI,86644
|
13
13
|
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
14
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
|
-
mdbq/company/copysh.py,sha256=
|
15
|
+
mdbq/company/copysh.py,sha256=3ZYm_rTE8nXcbgZlyHsa0y-RAkZ8vcmwkxMy_Jj4F2k,17574
|
16
16
|
mdbq/company/copysh_bak.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
|
17
17
|
mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
|
18
18
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -28,6 +28,7 @@ mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
|
28
28
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
29
29
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
30
30
|
mdbq/mysql/mysql.py,sha256=9IIyKYU81SXglz6GqVTz0-kCE2dhFuwpQAhUym-yjuc,47135
|
31
|
+
mdbq/mysql/recheck_mysql.py,sha256=jHQSlQy0PlQ_EYICQv_2nairUX3t6OIwPtSELKIpjkY,8702
|
31
32
|
mdbq/mysql/s_query.py,sha256=37GGHzRpycfUjsYEoQgDpdEs9JwjW-LxFXnGwwP2b2Q,8403
|
32
33
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
33
34
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -43,7 +44,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
43
44
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
44
45
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
45
46
|
mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
|
46
|
-
mdbq-2.
|
47
|
-
mdbq-2.
|
48
|
-
mdbq-2.
|
49
|
-
mdbq-2.
|
47
|
+
mdbq-2.7.0.dist-info/METADATA,sha256=i_0WznHsXfCR0sToIhC5S4mv3hv1qMUOFFdws6FocOg,245
|
48
|
+
mdbq-2.7.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
49
|
+
mdbq-2.7.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
50
|
+
mdbq-2.7.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|