mdbq 2.6.8__py3-none-any.whl → 2.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1329,7 +1329,7 @@ if __name__ == '__main__':
1329
1329
  db_name = '生意经2'
1330
1330
  table_name = '省份城市分析'
1331
1331
  upload_dir(
1332
- path='/Users/xigua/数据中心/原始文件3/天猫_生意经/省份城市分析',
1332
+ path='/Users/xigua/数据中心/原始文件2/生意经/地域分布',
1333
1333
  db_name=db_name,
1334
1334
  collection_name=table_name,
1335
1335
  dbs={'mysql': True, 'mongodb': False},
@@ -468,7 +468,7 @@ class DataClean:
468
468
  {
469
469
  '文件简称': 'order',
470
470
  '数据库名': '生意经2',
471
- '集合名称': '订单指标',
471
+ '集合名称': '订单数据',
472
472
  },
473
473
  {
474
474
  '文件简称': '省份城市分析',
@@ -609,7 +609,7 @@ class DataClean:
609
609
  {
610
610
  '文件简称': 'order',
611
611
  '数据库名': '淘宝_生意经3',
612
- '集合名称': '订单指标',
612
+ '集合名称': '订单数据',
613
613
  },
614
614
  {
615
615
  '文件简称': '省份城市分析',
@@ -0,0 +1,201 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import os
3
+ import time
4
+ import pandas as pd
5
+ import warnings
6
+ import datetime
7
+ from dateutil.relativedelta import relativedelta
8
+ from mdbq.config import get_myconf
9
+ from mdbq.mysql import mysql
10
+ from mdbq.mysql import s_query
11
+
12
+ """ 这是一个临时文件, 用来修改原始文件中大量 csv 文件中的场景名字(万相台报表) """
13
+ warnings.filterwarnings('ignore')
14
+
15
+
16
+ def id_account_rpt(id_rpt):
17
+ """
18
+ id_rpt: 传入原二级场景id/原二级场景名字 ,返回其他键值
19
+ 只可以旧推新,不可以新推旧
20
+ 例如: 粉丝快 -> 人群推广, 精准人群推广 -> 人群推广
21
+ 但不可以:人群推广 对应着旧的多个值,会发生问题
22
+ """
23
+ if '="' in str(id_rpt):
24
+ id_rpt = str(id_rpt).replace('="', '')
25
+ id_rpt = str(id_rpt).replace('"', '')
26
+ _id_account_rpt = [
27
+ {
28
+ '原二级场景id': '436',
29
+ '原二级场景名字': '全站推广',
30
+ '场景id': '436',
31
+ '场景名字': '全站推广',
32
+ },
33
+ {
34
+ '原二级场景id': '407',
35
+ '原二级场景名字': '粉丝快',
36
+ '场景id': '372',
37
+ '场景名字': '人群推广',
38
+ },
39
+ {
40
+ '原二级场景id': '114',
41
+ '原二级场景名字': '货品加速',
42
+ '场景id': '376',
43
+ '场景名字': '货品运营',
44
+ },
45
+ {
46
+ '原二级场景id': '372',
47
+ '原二级场景名字': '精准人群推广',
48
+ '场景id': '372',
49
+ '场景名字': '人群推广',
50
+ },
51
+ {
52
+ '原二级场景id': '371',
53
+ '原二级场景名字': '关键词推广',
54
+ '场景id': '371',
55
+ '场景名字': '关键词推广',
56
+ },
57
+ {
58
+ '原二级场景id': '361',
59
+ '原二级场景名字': '全店智投',
60
+ '场景id': '361',
61
+ '场景名字': '全店智投',
62
+ },
63
+ {
64
+ '原二级场景id': '183',
65
+ '原二级场景名字': '超级短视频',
66
+ '场景id': '183',
67
+ '场景名字': '超级短视频',
68
+ },
69
+ {
70
+ '原二级场景id': '154',
71
+ '原二级场景名字': '活动加速',
72
+ '场景id': '154',
73
+ '场景名字': '活动加速',
74
+ },
75
+ {
76
+ '原二级场景id': '133',
77
+ '原二级场景名字': '会员快',
78
+ '场景id': '372',
79
+ '场景名字': '人群推广',
80
+ },
81
+ {
82
+ '原二级场景id': '108',
83
+ '原二级场景名字': '超级直播',
84
+ '场景id': '108',
85
+ '场景名字': '超级直播',
86
+ },
87
+ {
88
+ '原二级场景id': '105',
89
+ '原二级场景名字': '上新快',
90
+ '场景id': '105',
91
+ '场景名字': '上新快',
92
+ },
93
+ {
94
+ '原二级场景id': '78',
95
+ '原二级场景名字': '拉新快',
96
+ '场景id': '372',
97
+ '场景名字': '人群推广',
98
+ },
99
+ ]
100
+
101
+ for data in _id_account_rpt:
102
+ if id_rpt == data['原二级场景id'] or id_rpt == data['原二级场景名字']:
103
+ return data
104
+
105
+
106
+ class ReCheckMysql:
107
+ def __init__(self, target_service):
108
+ username, password, host, port = get_myconf.select_config_values(target_service=target_service,
109
+ database='mysql')
110
+ self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
111
+ self.months = 1 # 读取近 num 个月的数据, 0 表示读取当月的数据
112
+
113
+ @staticmethod
114
+ def months_data(num=0, end_date=None):
115
+ """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
116
+ if not end_date:
117
+ end_date = datetime.datetime.now()
118
+ start_date = end_date - relativedelta(months=num) # n 月以前的今天
119
+ start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
120
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
121
+
122
+ def recheck_cols(self, db_name, table_name, service_name='company'):
123
+ start_date, end_date = self.months_data(num=self.months)
124
+ df = self.download.data_to_df(
125
+ db_name=db_name,
126
+ table_name=table_name,
127
+ start_date=start_date,
128
+ end_date=end_date,
129
+ projection={},
130
+ )
131
+ # df.to_csv('/Users/xigua/Downloads/test_before.csv', index=False, header=True, encoding='utf-8_sig')
132
+ # 调用 self.id_account_rpt 函数,根据场景id 修改对应的场景名字,如果没有匹配则不修改
133
+ df['场景名字'] = df.apply(lambda x: id_account_rpt(x['场景id']) if id_account_rpt(x['场景id']) else x['场景名字'], axis=1)
134
+ # df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
135
+
136
+ username, password, host, port = get_myconf.select_config_values(
137
+ target_service=service_name,
138
+ database='mysql',
139
+ )
140
+ m = mysql.MysqlUpload(
141
+ username=username,
142
+ password=password,
143
+ host=host,
144
+ port=port,
145
+ )
146
+ m.df_to_mysql(
147
+ df=df,
148
+ db_name=db_name,
149
+ table_name=table_name,
150
+ move_insert=True, # 先删除,再插入
151
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
152
+ drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
153
+ filename='', # 用来追踪处理进度
154
+ service_database={'company': 'mysql'}, # 字典
155
+ )
156
+
157
+
158
+ def recheck_csv():
159
+ path = '/Users/xigua/数据中心/原始文件2/推广报表34324234'
160
+ for root, dirs, files in os.walk(path, topdown=False):
161
+ for name in files:
162
+ if '~' in name or 'baidu' in name or 'Ds_' in name or 'xunlei' in name:
163
+ continue
164
+ if name.endswith('.csv'):
165
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
166
+ if '场景ID' not in df.columns.tolist():
167
+ continue
168
+ if '原二级场景名字' in df.columns.tolist() and '原二级场景ID' in df.columns.tolist():
169
+ df['原二级场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
170
+ df['原二级场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
171
+ if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist():
172
+ df['场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
173
+ df['场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
174
+ if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist() and '原二级场景名字' not in df.columns.tolist():
175
+ df.rename(columns={
176
+ '场景名字': '原二级场景名字',
177
+ '场景ID': '原二级场景ID',
178
+ }, inplace=True)
179
+ # 根据 id 修正 场景名字
180
+ df['原二级场景名字'] = df.apply(
181
+ lambda x: id_account_rpt(x['原二级场景ID'])['原二级场景名字'] if id_account_rpt(x['原二级场景ID']) else x['原二级场景名字'], axis=1)
182
+ # 根据原场景id获取新场景名字
183
+ df['场景名字'] = df.apply(
184
+ lambda x: id_account_rpt(x['原二级场景ID'])['场景名字'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
185
+ # 根据原场景id获取新场景id
186
+ df['场景ID'] = df.apply(
187
+ lambda x: id_account_rpt(x['原二级场景ID'])['场景id'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
188
+ print(name)
189
+ df.to_csv(os.path.join(root, name), index=False, header=True, encoding='utf-8_sig')
190
+
191
+
192
+ if __name__ == '__main__':
193
+ # r = ReCheckMysql(target_service='company')
194
+ # r.months = 100
195
+ # r.recheck_cols(
196
+ # db_name='推广数据2',
197
+ # table_name='营销场景报表',
198
+ # service_name='company',
199
+ # )
200
+
201
+ recheck_csv()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.6.8
3
+ Version: 2.6.9
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,7 +1,7 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=fnXBRxATlaCohx_dzAIewVlPI0d8L-2QY6wth9ENCwA,76594
4
+ mdbq/aggregation/aggregation.py,sha256=MdxBP0z3ITMoUPeRU_VF9CLMSy9Dc3EAfjV59fYgPT0,76581
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
7
7
  mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=m7Y2xSazPYKvy51yPK6n_Izsv5cjV83oHsiNc7N4fy
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
- mdbq/clean/clean_upload.py,sha256=4DNoSQBUYyn6OsdAP4WJoqWneReeHlvmctXyS5dQvIU,86640
12
+ mdbq/clean/clean_upload.py,sha256=cTFh63leDQvEfyRkpTcTNWJdUBHbxAdnwCtK8JOAEhg,86640
13
13
  mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
14
14
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
15
15
  mdbq/company/copysh.py,sha256=sisL5eo3D5HGGYvRw46xGqnqFaI3SxfBnoa-Y7zknus,17541
@@ -28,6 +28,7 @@ mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
28
28
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
29
29
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
30
30
  mdbq/mysql/mysql.py,sha256=9IIyKYU81SXglz6GqVTz0-kCE2dhFuwpQAhUym-yjuc,47135
31
+ mdbq/mysql/recheck_mysql.py,sha256=jHQSlQy0PlQ_EYICQv_2nairUX3t6OIwPtSELKIpjkY,8702
31
32
  mdbq/mysql/s_query.py,sha256=37GGHzRpycfUjsYEoQgDpdEs9JwjW-LxFXnGwwP2b2Q,8403
32
33
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
33
34
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -43,7 +44,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
43
44
  mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
44
45
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
45
46
  mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
46
- mdbq-2.6.8.dist-info/METADATA,sha256=0f19adUwFUrRTcAT5TvVboqz7L0X2CKv0x5acWdpIYw,245
47
- mdbq-2.6.8.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
48
- mdbq-2.6.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
49
- mdbq-2.6.8.dist-info/RECORD,,
47
+ mdbq-2.6.9.dist-info/METADATA,sha256=t5tQ6w5J_gAE9GygCcXJGQ1U3Aa0vEPq4DksLDc8v_8,245
48
+ mdbq-2.6.9.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
49
+ mdbq-2.6.9.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
50
+ mdbq-2.6.9.dist-info/RECORD,,
File without changes