mdbq 2.6.8__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1118,7 +1118,7 @@ class DatabaseUpdate:
1118
1118
  def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, target_service='company'):
1119
1119
  """ 上传一个文件夹到 mysql 或者 mongodb 数据库 """
1120
1120
  if not os.path.isdir(path):
1121
- print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 函数只接受文件夹路径,不是一个文件夹: {path}')
1121
+ print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 路径不存在或错误: {path}')
1122
1122
  return
1123
1123
 
1124
1124
  if dbs['mongodb']:
@@ -1326,10 +1326,10 @@ if __name__ == '__main__':
1326
1326
  # )
1327
1327
 
1328
1328
  # 上传一个目录到指定数据库
1329
- db_name = '生意经2'
1330
- table_name = '省份城市分析'
1329
+ db_name = '推广数据2'
1330
+ table_name = '营销场景报表'
1331
1331
  upload_dir(
1332
- path='/Users/xigua/数据中心/原始文件3/天猫_生意经/省份城市分析',
1332
+ path='/Users/xigua/数据中心/原始文件3/天猫推广报表/营销场景报表',
1333
1333
  db_name=db_name,
1334
1334
  collection_name=table_name,
1335
1335
  dbs={'mysql': True, 'mongodb': False},
@@ -2316,7 +2316,7 @@ def main():
2316
2316
  if __name__ == '__main__':
2317
2317
  data_aggregation(
2318
2318
  service_databases=[{'company': 'mysql'}],
2319
- months=1,
2320
- is_juhe=False, # 立即启动对聚合数据的清理工作
2321
- # less_dict=['生意参谋_直播场次分析'], # 单独聚合某一个数据库
2319
+ months=12,
2320
+ is_juhe=True, # 立即启动对聚合数据的清理工作
2321
+ # less_dict=['营销场景报表'], # 单独聚合某一个数据库
2322
2322
  )
@@ -468,7 +468,7 @@ class DataClean:
468
468
  {
469
469
  '文件简称': 'order',
470
470
  '数据库名': '生意经2',
471
- '集合名称': '订单指标',
471
+ '集合名称': '订单数据',
472
472
  },
473
473
  {
474
474
  '文件简称': '省份城市分析',
@@ -609,7 +609,7 @@ class DataClean:
609
609
  {
610
610
  '文件简称': 'order',
611
611
  '数据库名': '淘宝_生意经3',
612
- '集合名称': '订单指标',
612
+ '集合名称': '订单数据',
613
613
  },
614
614
  {
615
615
  '文件简称': '省份城市分析',
@@ -1587,7 +1587,7 @@ def main(service_databases=None, is_mysql=False):
1587
1587
  cn.dmp_tm(is_except=['except']) # 达摩盘
1588
1588
  cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
1589
1589
  cn.syj_reports_tm(is_except=['except']) # 天猫生意经
1590
- # # 淘宝生意经,不可以和天猫同时运行
1590
+ """ 淘宝生意经,不可以和天猫同时运行 """
1591
1591
  # cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
1592
1592
  cn.jd_reports(is_except=['except']) # 清洗京东报表
1593
1593
  cn.sp_scene_clean(is_except=['except']) # 商品素材
mdbq/company/copysh.py CHANGED
@@ -321,7 +321,7 @@ def op_data(days: int =100):
321
321
  # 清理所有非聚合数据的库
322
322
  optimize_data.op_data(
323
323
  db_name_lists=[
324
- '京东数据2',
324
+ '京东数据3',
325
325
  '属性设置3',
326
326
  '推广数据2',
327
327
  '推广数据_淘宝店',
@@ -367,6 +367,7 @@ def main():
367
367
  op_data(days=100)
368
368
 
369
369
  t.sleep_minutes = 5 # 同步前休眠时间
370
+ # 4. 同步共享文件
370
371
  t.tb_file()
371
372
  time.sleep(600) # 检测间隔
372
373
 
@@ -0,0 +1,201 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import os
3
+ import time
4
+ import pandas as pd
5
+ import warnings
6
+ import datetime
7
+ from dateutil.relativedelta import relativedelta
8
+ from mdbq.config import get_myconf
9
+ from mdbq.mysql import mysql
10
+ from mdbq.mysql import s_query
11
+
12
+ """ 这是一个临时文件, 用来修改原始文件中大量 csv 文件中的场景名字(万相台报表) """
13
+ warnings.filterwarnings('ignore')
14
+
15
+
16
+ def id_account_rpt(id_rpt):
17
+ """
18
+ id_rpt: 传入原二级场景id/原二级场景名字 ,返回其他键值
19
+ 只可以旧推新,不可以新推旧
20
+ 例如: 粉丝快 -> 人群推广, 精准人群推广 -> 人群推广
21
+ 但不可以:人群推广 对应着旧的多个值,会发生问题
22
+ """
23
+ if '="' in str(id_rpt):
24
+ id_rpt = str(id_rpt).replace('="', '')
25
+ id_rpt = str(id_rpt).replace('"', '')
26
+ _id_account_rpt = [
27
+ {
28
+ '原二级场景id': '436',
29
+ '原二级场景名字': '全站推广',
30
+ '场景id': '436',
31
+ '场景名字': '全站推广',
32
+ },
33
+ {
34
+ '原二级场景id': '407',
35
+ '原二级场景名字': '粉丝快',
36
+ '场景id': '372',
37
+ '场景名字': '人群推广',
38
+ },
39
+ {
40
+ '原二级场景id': '114',
41
+ '原二级场景名字': '货品加速',
42
+ '场景id': '376',
43
+ '场景名字': '货品运营',
44
+ },
45
+ {
46
+ '原二级场景id': '372',
47
+ '原二级场景名字': '精准人群推广',
48
+ '场景id': '372',
49
+ '场景名字': '人群推广',
50
+ },
51
+ {
52
+ '原二级场景id': '371',
53
+ '原二级场景名字': '关键词推广',
54
+ '场景id': '371',
55
+ '场景名字': '关键词推广',
56
+ },
57
+ {
58
+ '原二级场景id': '361',
59
+ '原二级场景名字': '全店智投',
60
+ '场景id': '361',
61
+ '场景名字': '全店智投',
62
+ },
63
+ {
64
+ '原二级场景id': '183',
65
+ '原二级场景名字': '超级短视频',
66
+ '场景id': '183',
67
+ '场景名字': '超级短视频',
68
+ },
69
+ {
70
+ '原二级场景id': '154',
71
+ '原二级场景名字': '活动加速',
72
+ '场景id': '154',
73
+ '场景名字': '活动加速',
74
+ },
75
+ {
76
+ '原二级场景id': '133',
77
+ '原二级场景名字': '会员快',
78
+ '场景id': '372',
79
+ '场景名字': '人群推广',
80
+ },
81
+ {
82
+ '原二级场景id': '108',
83
+ '原二级场景名字': '超级直播',
84
+ '场景id': '108',
85
+ '场景名字': '超级直播',
86
+ },
87
+ {
88
+ '原二级场景id': '105',
89
+ '原二级场景名字': '上新快',
90
+ '场景id': '105',
91
+ '场景名字': '上新快',
92
+ },
93
+ {
94
+ '原二级场景id': '78',
95
+ '原二级场景名字': '拉新快',
96
+ '场景id': '372',
97
+ '场景名字': '人群推广',
98
+ },
99
+ ]
100
+
101
+ for data in _id_account_rpt:
102
+ if id_rpt == data['原二级场景id'] or id_rpt == data['原二级场景名字']:
103
+ return data
104
+
105
+
106
+ class ReCheckMysql:
107
+ def __init__(self, target_service):
108
+ username, password, host, port = get_myconf.select_config_values(target_service=target_service,
109
+ database='mysql')
110
+ self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
111
+ self.months = 1 # 读取近 num 个月的数据, 0 表示读取当月的数据
112
+
113
+ @staticmethod
114
+ def months_data(num=0, end_date=None):
115
+ """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
116
+ if not end_date:
117
+ end_date = datetime.datetime.now()
118
+ start_date = end_date - relativedelta(months=num) # n 月以前的今天
119
+ start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
120
+ return pd.to_datetime(start_date), pd.to_datetime(end_date)
121
+
122
+ def recheck_cols(self, db_name, table_name, service_name='company'):
123
+ start_date, end_date = self.months_data(num=self.months)
124
+ df = self.download.data_to_df(
125
+ db_name=db_name,
126
+ table_name=table_name,
127
+ start_date=start_date,
128
+ end_date=end_date,
129
+ projection={},
130
+ )
131
+ # df.to_csv('/Users/xigua/Downloads/test_before.csv', index=False, header=True, encoding='utf-8_sig')
132
+ # 调用 self.id_account_rpt 函数,根据场景id 修改对应的场景名字,如果没有匹配则不修改
133
+ df['场景名字'] = df.apply(lambda x: id_account_rpt(x['场景id']) if id_account_rpt(x['场景id']) else x['场景名字'], axis=1)
134
+ # df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
135
+
136
+ username, password, host, port = get_myconf.select_config_values(
137
+ target_service=service_name,
138
+ database='mysql',
139
+ )
140
+ m = mysql.MysqlUpload(
141
+ username=username,
142
+ password=password,
143
+ host=host,
144
+ port=port,
145
+ )
146
+ m.df_to_mysql(
147
+ df=df,
148
+ db_name=db_name,
149
+ table_name=table_name,
150
+ move_insert=True, # 先删除,再插入
151
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
152
+ drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
153
+ filename='', # 用来追踪处理进度
154
+ service_database={'company': 'mysql'}, # 字典
155
+ )
156
+
157
+
158
+ def recheck_csv():
159
+ path = '/Users/xigua/数据中心/原始文件2/推广报表34324234'
160
+ for root, dirs, files in os.walk(path, topdown=False):
161
+ for name in files:
162
+ if '~' in name or 'baidu' in name or 'Ds_' in name or 'xunlei' in name:
163
+ continue
164
+ if name.endswith('.csv'):
165
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
166
+ if '场景ID' not in df.columns.tolist():
167
+ continue
168
+ if '原二级场景名字' in df.columns.tolist() and '原二级场景ID' in df.columns.tolist():
169
+ df['原二级场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
170
+ df['原二级场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
171
+ if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist():
172
+ df['场景ID'].replace(to_replace='="', value='', regex=True, inplace=True)
173
+ df['场景ID'].replace(to_replace='"', value='', regex=True, inplace=True)
174
+ if '场景名字' in df.columns.tolist() and '场景ID' in df.columns.tolist() and '原二级场景名字' not in df.columns.tolist():
175
+ df.rename(columns={
176
+ '场景名字': '原二级场景名字',
177
+ '场景ID': '原二级场景ID',
178
+ }, inplace=True)
179
+ # 根据 id 修正 场景名字
180
+ df['原二级场景名字'] = df.apply(
181
+ lambda x: id_account_rpt(x['原二级场景ID'])['原二级场景名字'] if id_account_rpt(x['原二级场景ID']) else x['原二级场景名字'], axis=1)
182
+ # 根据原场景id获取新场景名字
183
+ df['场景名字'] = df.apply(
184
+ lambda x: id_account_rpt(x['原二级场景ID'])['场景名字'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
185
+ # 根据原场景id获取新场景id
186
+ df['场景ID'] = df.apply(
187
+ lambda x: id_account_rpt(x['原二级场景ID'])['场景id'] if id_account_rpt(x['原二级场景ID']) else '', axis=1)
188
+ print(name)
189
+ df.to_csv(os.path.join(root, name), index=False, header=True, encoding='utf-8_sig')
190
+
191
+
192
+ if __name__ == '__main__':
193
+ # r = ReCheckMysql(target_service='company')
194
+ # r.months = 100
195
+ # r.recheck_cols(
196
+ # db_name='推广数据2',
197
+ # table_name='营销场景报表',
198
+ # service_name='company',
199
+ # )
200
+
201
+ recheck_csv()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.6.8
3
+ Version: 2.7.0
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,18 +1,18 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=fnXBRxATlaCohx_dzAIewVlPI0d8L-2QY6wth9ENCwA,76594
4
+ mdbq/aggregation/aggregation.py,sha256=v_5VM-InaDDvDNjAy_b8xsc38-vf78GkqoXjoe6MZ8U,76569
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
7
7
  mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
8
- mdbq/aggregation/query_data.py,sha256=m7Y2xSazPYKvy51yPK6n_Izsv5cjV83oHsiNc7N4fyA,102779
8
+ mdbq/aggregation/query_data.py,sha256=gqvKDgg3jrfCcI1VudrnQLJgKHUThZVTqS1zOQ5wgMk,102766
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
- mdbq/clean/clean_upload.py,sha256=4DNoSQBUYyn6OsdAP4WJoqWneReeHlvmctXyS5dQvIU,86640
12
+ mdbq/clean/clean_upload.py,sha256=X5WcWm7kkGZDMpk8p0vMq-SFIcrSL1DmVCYWbxYmLVI,86644
13
13
  mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
14
14
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
15
- mdbq/company/copysh.py,sha256=sisL5eo3D5HGGYvRw46xGqnqFaI3SxfBnoa-Y7zknus,17541
15
+ mdbq/company/copysh.py,sha256=3ZYm_rTE8nXcbgZlyHsa0y-RAkZ8vcmwkxMy_Jj4F2k,17574
16
16
  mdbq/company/copysh_bak.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
17
17
  mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
18
18
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -28,6 +28,7 @@ mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
28
28
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
29
29
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
30
30
  mdbq/mysql/mysql.py,sha256=9IIyKYU81SXglz6GqVTz0-kCE2dhFuwpQAhUym-yjuc,47135
31
+ mdbq/mysql/recheck_mysql.py,sha256=jHQSlQy0PlQ_EYICQv_2nairUX3t6OIwPtSELKIpjkY,8702
31
32
  mdbq/mysql/s_query.py,sha256=37GGHzRpycfUjsYEoQgDpdEs9JwjW-LxFXnGwwP2b2Q,8403
32
33
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
33
34
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -43,7 +44,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
43
44
  mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
44
45
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
45
46
  mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
46
- mdbq-2.6.8.dist-info/METADATA,sha256=0f19adUwFUrRTcAT5TvVboqz7L0X2CKv0x5acWdpIYw,245
47
- mdbq-2.6.8.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
48
- mdbq-2.6.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
49
- mdbq-2.6.8.dist-info/RECORD,,
47
+ mdbq-2.7.0.dist-info/METADATA,sha256=i_0WznHsXfCR0sToIhC5S4mv3hv1qMUOFFdws6FocOg,245
48
+ mdbq-2.7.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
49
+ mdbq-2.7.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
50
+ mdbq-2.7.0.dist-info/RECORD,,
File without changes