mdbq 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +14 -6
- mdbq/clean/data_clean.py +12 -0
- {mdbq-2.0.2.dist-info → mdbq-2.0.3.dist-info}/METADATA +1 -1
- {mdbq-2.0.2.dist-info → mdbq-2.0.3.dist-info}/RECORD +6 -6
- {mdbq-2.0.2.dist-info → mdbq-2.0.3.dist-info}/WHEEL +0 -0
- {mdbq-2.0.2.dist-info → mdbq-2.0.3.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -320,6 +320,14 @@ class DatabaseUpdate:
|
|
320
320
|
)
|
321
321
|
df['日期'] = df['数据周期'].apply(lambda x: re.findall('(.*) ~', x)[0])
|
322
322
|
check_remove_file = True
|
323
|
+
elif name.endswith('.csv') and '分天数据-计划_活动类型-推广概览-数据汇总' in name:
|
324
|
+
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
325
|
+
df['日期'].replace(to_replace=['\\t'], value='', regex=True, inplace=True)
|
326
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
327
|
+
# min_clm = str(df['日期'].min()).split(' ')[0]
|
328
|
+
# max_clm = str(df['日期'].max()).split(' ')[0]
|
329
|
+
# new_name = f'淘宝联盟_分天数据_计划_活动类型_推广概览_数据汇总_{min_clm}_{max_clm}'
|
330
|
+
check_remove_file = True
|
323
331
|
elif name.endswith('.csv') and 'baobei' in name:
|
324
332
|
# 生意经宝贝指标日数据
|
325
333
|
date = re.findall(r's-(\d{4})(\d{2})(\d{2})\.', str(name))
|
@@ -1244,7 +1252,7 @@ def test2():
|
|
1244
1252
|
if __name__ == '__main__':
|
1245
1253
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1246
1254
|
print(username, password, host, port)
|
1247
|
-
file_dir(one_file=False, target_service='home_lx')
|
1255
|
+
# file_dir(one_file=False, target_service='home_lx')
|
1248
1256
|
# one_file_to_mysql(
|
1249
1257
|
# file='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_全站营销报表/2024-08/万里马箱包推广1_营销概况_全站营销_2024-08-19_2024-09-02.csv',
|
1250
1258
|
# db_name='京东数据2',
|
@@ -1264,11 +1272,11 @@ if __name__ == '__main__':
|
|
1264
1272
|
# )
|
1265
1273
|
|
1266
1274
|
|
1267
|
-
#
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
#
|
1275
|
+
# 新版 数据分类
|
1276
|
+
dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1277
|
+
dp.new_unzip(is_move=True)
|
1278
|
+
dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
1279
|
+
# 将 self.datas 更新至数据库
|
1272
1280
|
# dp.upload_df(service_databases=[
|
1273
1281
|
# # {'home_lx': 'mongodb'},
|
1274
1282
|
# {'company': 'mysql'},
|
mdbq/clean/data_clean.py
CHANGED
@@ -474,6 +474,15 @@ class DataClean:
|
|
474
474
|
new_name = re.sub(r' ?(\(\d+\))', '',new_name)
|
475
475
|
self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
|
476
476
|
os.remove(os.path.join(root, name))
|
477
|
+
elif name.endswith('.csv') and '分天数据-计划_活动类型-推广概览-数据汇总' in name:
|
478
|
+
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
479
|
+
df['日期'].replace(to_replace=['\\t'], value='', regex=True, inplace=True)
|
480
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
481
|
+
min_clm = str(df['日期'].min()).split(' ')[0]
|
482
|
+
max_clm = str(df['日期'].max()).split(' ')[0]
|
483
|
+
new_name = f'淘宝联盟_分天数据_计划_活动类型_推广概览_数据汇总_{min_clm}_{max_clm}'
|
484
|
+
self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
|
485
|
+
os.remove(os.path.join(root, name))
|
477
486
|
elif name.endswith('.csv') and 'baobei' in name:
|
478
487
|
# 生意经宝贝指标日数据
|
479
488
|
# print(name)
|
@@ -1143,6 +1152,9 @@ class DataClean:
|
|
1143
1152
|
elif name.endswith('.csv') and '参谋店铺流量来源(月)' in name:
|
1144
1153
|
t_path = str(pathlib.Path(self.source_path, '月数据/流量来源-自助取数-月数据'))
|
1145
1154
|
bib(t_path, _as_month=True)
|
1155
|
+
elif name.endswith('.csv') and '淘宝联盟_分天数据_计划_活动类型_推广概览_数据汇总' in name:
|
1156
|
+
t_path = str(pathlib.Path(self.source_path, '月数据/淘宝联盟'))
|
1157
|
+
bib(t_path, _as_month=False)
|
1146
1158
|
elif name.endswith('.csv') and '竞店分析' in name and '来源分析-入店来源' in name:
|
1147
1159
|
t_path = str(pathlib.Path(self.source_path, '市场数据/竞店分析/来源分析/入店来源'))
|
1148
1160
|
bib(t_path, _as_month=False)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=ysHgpGoO2aXme0l1G9fqKMzWnvtTeqPwy07n4CNpdDA,73922
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
|
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=ixvfjEai-zIN5fUiVg9nSFrzjiBfJv6QC9_mLTvFfg
|
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/data_clean.py,sha256=
|
12
|
+
mdbq/clean/data_clean.py,sha256=kKPVvKq2WVUI9qLF7U-jW1IEJtBE9_a6rwJwpR9hfhI,102556
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
14
|
mdbq/company/copysh.py,sha256=VUaaJPXPYPHWwnkdK77PWz_dAXZyEmYBA9Df1yROHAc,17764
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -36,7 +36,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
36
36
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
37
37
|
mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
|
38
38
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
39
|
-
mdbq-2.0.
|
40
|
-
mdbq-2.0.
|
41
|
-
mdbq-2.0.
|
42
|
-
mdbq-2.0.
|
39
|
+
mdbq-2.0.3.dist-info/METADATA,sha256=keQRDwBmHcrEjRS5EuqnKz9tfVGRUzntuD3Cmum4dy8,245
|
40
|
+
mdbq-2.0.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
41
|
+
mdbq-2.0.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
42
|
+
mdbq-2.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|