mdbq 2.0.2__tar.gz → 2.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-2.0.2 → mdbq-2.0.4}/PKG-INFO +1 -1
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/aggregation/aggregation.py +14 -6
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/aggregation/query_data.py +1 -1
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/clean/data_clean.py +20 -5
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/config/products.py +3 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-2.0.2 → mdbq-2.0.4}/setup.py +1 -1
- {mdbq-2.0.2 → mdbq-2.0.4}/README.txt +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/__version__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/bdup/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/bdup/bdup.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/clean/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/company/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/company/copysh.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/config/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/config/get_myconf.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/config/set_support.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/config/update_conf.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/dataframe/converter.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/log/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/log/mylogger.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/mongo/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/mongo/mongo.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/mysql/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/mysql/mysql.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/mysql/s_query.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/other/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/other/porxy.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/other/pov_city.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/other/sku_picture.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/other/ua_sj.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/pbix/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq/spider/__init__.py +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-2.0.2 → mdbq-2.0.4}/setup.cfg +0 -0
@@ -320,6 +320,14 @@ class DatabaseUpdate:
|
|
320
320
|
)
|
321
321
|
df['日期'] = df['数据周期'].apply(lambda x: re.findall('(.*) ~', x)[0])
|
322
322
|
check_remove_file = True
|
323
|
+
elif name.endswith('.csv') and '分天数据-计划_活动类型-推广概览-数据汇总' in name:
|
324
|
+
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
325
|
+
df['日期'].replace(to_replace=['\\t'], value='', regex=True, inplace=True)
|
326
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
327
|
+
# min_clm = str(df['日期'].min()).split(' ')[0]
|
328
|
+
# max_clm = str(df['日期'].max()).split(' ')[0]
|
329
|
+
# new_name = f'淘宝联盟_分天数据_计划_活动类型_推广概览_数据汇总_{min_clm}_{max_clm}'
|
330
|
+
check_remove_file = True
|
323
331
|
elif name.endswith('.csv') and 'baobei' in name:
|
324
332
|
# 生意经宝贝指标日数据
|
325
333
|
date = re.findall(r's-(\d{4})(\d{2})(\d{2})\.', str(name))
|
@@ -1244,7 +1252,7 @@ def test2():
|
|
1244
1252
|
if __name__ == '__main__':
|
1245
1253
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1246
1254
|
print(username, password, host, port)
|
1247
|
-
file_dir(one_file=False, target_service='
|
1255
|
+
file_dir(one_file=False, target_service='company')
|
1248
1256
|
# one_file_to_mysql(
|
1249
1257
|
# file='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_全站营销报表/2024-08/万里马箱包推广1_营销概况_全站营销_2024-08-19_2024-09-02.csv',
|
1250
1258
|
# db_name='京东数据2',
|
@@ -1269,8 +1277,8 @@ if __name__ == '__main__':
|
|
1269
1277
|
# dp.new_unzip(is_move=True)
|
1270
1278
|
# dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
1271
1279
|
# # 将 self.datas 更新至数据库
|
1272
|
-
# dp.upload_df(service_databases=[
|
1273
|
-
# # {'home_lx': 'mongodb'},
|
1274
|
-
# {'company': 'mysql'},
|
1275
|
-
# # {'nas': 'mysql'},
|
1276
|
-
# ])
|
1280
|
+
# # dp.upload_df(service_databases=[
|
1281
|
+
# # # {'home_lx': 'mongodb'},
|
1282
|
+
# # {'company': 'mysql'},
|
1283
|
+
# # # {'nas': 'mysql'},
|
1284
|
+
# # ])
|
@@ -1535,7 +1535,7 @@ def main():
|
|
1535
1535
|
|
1536
1536
|
|
1537
1537
|
if __name__ == '__main__':
|
1538
|
-
data_aggregation(service_databases=[{'
|
1538
|
+
data_aggregation(service_databases=[{'company': 'mysql'}], months=7) # 正常的聚合所有数据
|
1539
1539
|
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
|
1540
1540
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
1541
1541
|
|
@@ -272,11 +272,14 @@ class DataClean:
|
|
272
272
|
df.insert(loc=0, column='数据周期', value=data_lis)
|
273
273
|
df.insert(loc=0, column='日期', value=date01[0])
|
274
274
|
# 2024-2-19 官方更新了推广渠道来源名称
|
275
|
+
# df['三级来源'] = df['三级来源'].apply(
|
276
|
+
# lambda x: '精准人群推广' if x == '精准人群推广(原引力魔方)'
|
277
|
+
# else '关键词推广' if x == '关键词推广(原直通车)'
|
278
|
+
# else '智能场景' if x == '智能场景(原万相台)'
|
279
|
+
# else x
|
280
|
+
# )
|
275
281
|
df['三级来源'] = df['三级来源'].apply(
|
276
|
-
lambda x: '
|
277
|
-
else '关键词推广' if x == '关键词推广(原直通车)'
|
278
|
-
else '智能场景' if x == '智能场景(原万相台)'
|
279
|
-
else x
|
282
|
+
lambda x: re.sub('(.*)', '', str(x) if x else x)
|
280
283
|
)
|
281
284
|
# df = df[df['访客数'] != '0']
|
282
285
|
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
@@ -474,6 +477,15 @@ class DataClean:
|
|
474
477
|
new_name = re.sub(r' ?(\(\d+\))', '',new_name)
|
475
478
|
self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
|
476
479
|
os.remove(os.path.join(root, name))
|
480
|
+
elif name.endswith('.csv') and '分天数据-计划_活动类型-推广概览-数据汇总' in name:
|
481
|
+
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
482
|
+
df['日期'].replace(to_replace=['\\t'], value='', regex=True, inplace=True)
|
483
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
484
|
+
min_clm = str(df['日期'].min()).split(' ')[0]
|
485
|
+
max_clm = str(df['日期'].max()).split(' ')[0]
|
486
|
+
new_name = f'淘宝联盟_分天数据_计划_活动类型_推广概览_数据汇总_{min_clm}_{max_clm}'
|
487
|
+
self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
|
488
|
+
os.remove(os.path.join(root, name))
|
477
489
|
elif name.endswith('.csv') and 'baobei' in name:
|
478
490
|
# 生意经宝贝指标日数据
|
479
491
|
# print(name)
|
@@ -1121,7 +1133,7 @@ class DataClean:
|
|
1121
1133
|
bib(t_path)
|
1122
1134
|
elif '_新版' in name:
|
1123
1135
|
t_path = str(pathlib.Path(self.source_path, '生意参谋/流量来源'))
|
1124
|
-
bib(t_path)
|
1136
|
+
bib(t_path, _as_month=True)
|
1125
1137
|
else:
|
1126
1138
|
t_path = str(pathlib.Path(self.source_path, '生意参谋/流量来源_旧版'))
|
1127
1139
|
bib(t_path, _as_month=True)
|
@@ -1143,6 +1155,9 @@ class DataClean:
|
|
1143
1155
|
elif name.endswith('.csv') and '参谋店铺流量来源(月)' in name:
|
1144
1156
|
t_path = str(pathlib.Path(self.source_path, '月数据/流量来源-自助取数-月数据'))
|
1145
1157
|
bib(t_path, _as_month=True)
|
1158
|
+
elif name.endswith('.csv') and '淘宝联盟_分天数据_计划_活动类型_推广概览_数据汇总' in name:
|
1159
|
+
t_path = str(pathlib.Path(self.source_path, '月数据/淘宝联盟'))
|
1160
|
+
bib(t_path, _as_month=False)
|
1146
1161
|
elif name.endswith('.csv') and '竞店分析' in name and '来源分析-入店来源' in name:
|
1147
1162
|
t_path = str(pathlib.Path(self.source_path, '市场数据/竞店分析/来源分析/入店来源'))
|
1148
1163
|
bib(t_path, _as_month=False)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|