mdbq 1.9.8__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-1.9.8 → mdbq-2.0.0}/PKG-INFO +1 -1
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/aggregation/aggregation.py +30 -27
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/clean/data_clean.py +13 -2
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/config/get_myconf.py +1 -1
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-1.9.8 → mdbq-2.0.0}/setup.py +1 -1
- {mdbq-1.9.8 → mdbq-2.0.0}/README.txt +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/__version__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/aggregation/query_data.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/bdup/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/bdup/bdup.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/clean/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/company/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/company/copysh.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/config/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/config/products.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/config/set_support.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/config/update_conf.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/dataframe/converter.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/log/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/log/mylogger.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/mongo/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/mongo/mongo.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/mysql/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/mysql/mysql.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/mysql/s_query.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/other/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/other/porxy.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/other/pov_city.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/other/sku_picture.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/other/ua_sj.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/pbix/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq/spider/__init__.py +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-1.9.8 → mdbq-2.0.0}/setup.cfg +0 -0
@@ -1055,7 +1055,7 @@ class DatabaseUpdate:
|
|
1055
1055
|
)
|
1056
1056
|
|
1057
1057
|
|
1058
|
-
def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None):
|
1058
|
+
def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, target_service='company'):
|
1059
1059
|
""" 上传一个文件夹到 mysql 或者 mongodb 数据库 """
|
1060
1060
|
if not os.path.isdir(path):
|
1061
1061
|
print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 函数只接受文件夹路径,不是一个文件夹: {path}')
|
@@ -1063,7 +1063,7 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
1063
1063
|
|
1064
1064
|
if dbs['mongodb']:
|
1065
1065
|
username, password, host, port = get_myconf.select_config_values(
|
1066
|
-
target_service=
|
1066
|
+
target_service=target_service,
|
1067
1067
|
database='mongodb',
|
1068
1068
|
)
|
1069
1069
|
d = mongo.UploadMongo(
|
@@ -1076,7 +1076,7 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
1076
1076
|
|
1077
1077
|
if dbs['mysql']:
|
1078
1078
|
username, password, host, port = get_myconf.select_config_values(
|
1079
|
-
target_service=
|
1079
|
+
target_service=target_service,
|
1080
1080
|
database='mysql',
|
1081
1081
|
)
|
1082
1082
|
m = mysql.MysqlUpload(
|
@@ -1107,10 +1107,11 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
1107
1107
|
for root, dirs, files in os.walk(path, topdown=False):
|
1108
1108
|
for name in files:
|
1109
1109
|
count += 1
|
1110
|
-
i =
|
1110
|
+
i = 1 # 用来统计当前处理文件进度
|
1111
1111
|
for root, dirs, files in os.walk(path, topdown=False):
|
1112
1112
|
for name in files:
|
1113
1113
|
if '~$' in name or '.DS' in name or '.localized' in name or 'baidu' in name:
|
1114
|
+
i += 1
|
1114
1115
|
continue
|
1115
1116
|
if name.endswith('.csv'):
|
1116
1117
|
try:
|
@@ -1161,7 +1162,7 @@ def one_file_to_mysql(file, db_name, table_name, target_service, database):
|
|
1161
1162
|
m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, filename=filename, move_insert=False, df_sql=True, drop_duplicates=False,)
|
1162
1163
|
|
1163
1164
|
|
1164
|
-
def file_dir(one_file=True):
|
1165
|
+
def file_dir(one_file=True, target_service='company'):
|
1165
1166
|
"""
|
1166
1167
|
按照文件记录对照表上传数据
|
1167
1168
|
批量上传数据库
|
@@ -1195,7 +1196,7 @@ def file_dir(one_file=True):
|
|
1195
1196
|
file=real_path,
|
1196
1197
|
db_name=db_name,
|
1197
1198
|
table_name=table_name,
|
1198
|
-
target_service=
|
1199
|
+
target_service=target_service,
|
1199
1200
|
database='mysql'
|
1200
1201
|
)
|
1201
1202
|
else: # 上传全部文件夹
|
@@ -1203,7 +1204,8 @@ def file_dir(one_file=True):
|
|
1203
1204
|
path=os.path.join(path, sub_path),
|
1204
1205
|
db_name = db_name,
|
1205
1206
|
collection_name = table_name,
|
1206
|
-
dbs={'mysql': True, 'mongodb':
|
1207
|
+
dbs={'mysql': True, 'mongodb': False},
|
1208
|
+
target_service=target_service,
|
1207
1209
|
)
|
1208
1210
|
data.update({'入库进度': 1}) # 更新进度为已上传
|
1209
1211
|
# 将进度信息写回文件
|
@@ -1238,7 +1240,7 @@ def test2():
|
|
1238
1240
|
if __name__ == '__main__':
|
1239
1241
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1240
1242
|
print(username, password, host, port)
|
1241
|
-
# file_dir(one_file=False)
|
1243
|
+
# file_dir(one_file=False, target_service='home_lx')
|
1242
1244
|
# one_file_to_mysql(
|
1243
1245
|
# file='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_全站营销报表/2024-08/万里马箱包推广1_营销概况_全站营销_2024-08-19_2024-09-02.csv',
|
1244
1246
|
# db_name='京东数据2',
|
@@ -1247,23 +1249,24 @@ if __name__ == '__main__':
|
|
1247
1249
|
# database='mysql'
|
1248
1250
|
# )
|
1249
1251
|
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1252
|
+
db_name = '推广数据2'
|
1253
|
+
table_name = '权益报表'
|
1254
|
+
upload_dir(
|
1255
|
+
path='/Users/xigua/数据中心/原始文件2/推广报表/权益报表12313',
|
1256
|
+
db_name=db_name,
|
1257
|
+
collection_name=table_name,
|
1258
|
+
dbs={'mysql': True, 'mongodb': False},
|
1259
|
+
target_service='home_lx',
|
1260
|
+
)
|
1259
1261
|
|
1260
|
-
|
1261
|
-
|
1262
|
-
dp
|
1263
|
-
dp.
|
1264
|
-
#
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1262
|
+
|
1263
|
+
# # 新版 数据分类
|
1264
|
+
# dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1265
|
+
# dp.new_unzip(is_move=True)
|
1266
|
+
# dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
1267
|
+
# # 将 self.datas 更新至数据库
|
1268
|
+
# dp.upload_df(service_databases=[
|
1269
|
+
# # {'home_lx': 'mongodb'},
|
1270
|
+
# {'company': 'mysql'},
|
1271
|
+
# # {'nas': 'mysql'},
|
1272
|
+
# ])
|
@@ -116,7 +116,15 @@ class DataClean:
|
|
116
116
|
|
117
117
|
df.replace(to_replace=['\\N'], value=0, regex=False, inplace=True) # 替换掉特殊字符
|
118
118
|
df.fillna(0, inplace=True)
|
119
|
-
col_ids = [
|
119
|
+
col_ids = [
|
120
|
+
# '场景ID', # 2024.10.5 改为不加 =""
|
121
|
+
'计划ID',
|
122
|
+
'单元ID',
|
123
|
+
'主体ID',
|
124
|
+
'宝贝ID',
|
125
|
+
'词ID/词包ID',
|
126
|
+
'创意ID',
|
127
|
+
]
|
120
128
|
sb = df.columns.tolist()
|
121
129
|
if '日期' not in sb:
|
122
130
|
print(f'{name} 注意:该报表不包含分日数据,数据不会保存,请重新下载!')
|
@@ -162,7 +170,10 @@ class DataClean:
|
|
162
170
|
shop_name = shop_name[0]
|
163
171
|
else:
|
164
172
|
shop_name = ''
|
165
|
-
cols = [
|
173
|
+
cols = [
|
174
|
+
# '场景ID', # 2024.10.5 改为不加 =""
|
175
|
+
'计划ID',
|
176
|
+
]
|
166
177
|
for col in cols:
|
167
178
|
df[col] = df[col].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
|
168
179
|
df.replace(to_replace=['\\N'], value=0, regex=False, inplace=True) # 替换掉特殊字符
|
@@ -62,7 +62,7 @@ def select_config_values(target_service, database, path=None):
|
|
62
62
|
if socket.gethostname() == 'xigua_lx':
|
63
63
|
# 本机自身运行使用 127.0.0.1
|
64
64
|
options = ['username_mysql_lx_nw', 'password_mysql_lx_nw', 'host_bd', 'port_mysql_lx_nw',]
|
65
|
-
elif socket.gethostname() == 'xigua1' or socket.gethostname() == '
|
65
|
+
elif socket.gethostname() == 'xigua1' or socket.gethostname() == 'MacBookPro':
|
66
66
|
# 内网地址:正在运行的是 家里笔记本或者台式机,或者 macb ook pro
|
67
67
|
options = ['username_mysql_lx_nw', 'password_mysql_lx_nw', 'host_mysql_lx_nw', 'port_mysql_lx_nw',]
|
68
68
|
else:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|