mdbq 2.0.3__py3-none-any.whl → 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +32 -13
- mdbq/aggregation/query_data.py +1 -1
- mdbq/clean/data_clean.py +19 -6
- mdbq/config/products.py +3 -0
- {mdbq-2.0.3.dist-info → mdbq-2.0.5.dist-info}/METADATA +1 -1
- {mdbq-2.0.3.dist-info → mdbq-2.0.5.dist-info}/RECORD +8 -8
- {mdbq-2.0.3.dist-info → mdbq-2.0.5.dist-info}/WHEEL +0 -0
- {mdbq-2.0.3.dist-info → mdbq-2.0.5.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -39,10 +39,11 @@ class DatabaseUpdate:
|
|
39
39
|
self.datas: list = [] # 带更新进数据库的数据集合
|
40
40
|
self.start_date = '2022-01-01' # 日期表的起始日期
|
41
41
|
|
42
|
-
def cleaning(self, is_move=True):
|
42
|
+
def cleaning(self, is_move=True, is_except=[]):
|
43
43
|
"""
|
44
44
|
数据清洗, 返回包含 数据库名, 集合名称, 和 df 主体
|
45
45
|
修改 cleaning 时,要同步 support 下的 标题对照表.csv
|
46
|
+
is_except: 需要排除不做处理的文件或文件夹
|
46
47
|
"""
|
47
48
|
if not os.path.exists(self.path):
|
48
49
|
print(f'1.1.0 初始化时传入了不存在的目录: {self.path}')
|
@@ -62,6 +63,16 @@ class DatabaseUpdate:
|
|
62
63
|
check_remove_file = False # 设置这个参数的目的: 避免误删其他文件, 不是本程序数据清洗覆盖的文件不做干预
|
63
64
|
if '~$' in name or '.DS' in name or '.localized' in name or '.ini' in name or '$RECYCLE.BIN' in name or 'Icon' in name:
|
64
65
|
continue
|
66
|
+
is_continue = False
|
67
|
+
if is_except:
|
68
|
+
for item in is_except:
|
69
|
+
if item in os.path.join(root, name):
|
70
|
+
# print(name)
|
71
|
+
is_continue = True
|
72
|
+
break
|
73
|
+
if is_continue: # 需要排除不做处理的文件或文件夹
|
74
|
+
continue
|
75
|
+
|
65
76
|
db_name = None # 初始化/重置变量,避免进入下一个循环
|
66
77
|
collection_name = None
|
67
78
|
for data in datas: # 根据标题对照表适配 db_name 和 collection_name
|
@@ -755,7 +766,15 @@ class DatabaseUpdate:
|
|
755
766
|
for name in files:
|
756
767
|
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
757
768
|
continue
|
758
|
-
|
769
|
+
is_continue = False
|
770
|
+
if is_except:
|
771
|
+
for item in is_except:
|
772
|
+
if item in os.path.join(root, name):
|
773
|
+
# print(name)
|
774
|
+
is_continue = True
|
775
|
+
break
|
776
|
+
if is_continue: # 需要排除不做处理的文件或文件夹
|
777
|
+
continue
|
759
778
|
db_name = None # 初始化/重置变量,避免进入下一个循环
|
760
779
|
collection_name = None
|
761
780
|
for data in datas: # 根据标题对照表适配 db_name 和 collection_name
|
@@ -1252,7 +1271,7 @@ def test2():
|
|
1252
1271
|
if __name__ == '__main__':
|
1253
1272
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1254
1273
|
print(username, password, host, port)
|
1255
|
-
|
1274
|
+
file_dir(one_file=False, target_service='company')
|
1256
1275
|
# one_file_to_mysql(
|
1257
1276
|
# file='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_全站营销报表/2024-08/万里马箱包推广1_营销概况_全站营销_2024-08-19_2024-09-02.csv',
|
1258
1277
|
# db_name='京东数据2',
|
@@ -1272,13 +1291,13 @@ if __name__ == '__main__':
|
|
1272
1291
|
# )
|
1273
1292
|
|
1274
1293
|
|
1275
|
-
# 新版 数据分类
|
1276
|
-
dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1277
|
-
dp.new_unzip(is_move=True)
|
1278
|
-
dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
1279
|
-
# 将 self.datas 更新至数据库
|
1280
|
-
# dp.upload_df(service_databases=[
|
1281
|
-
# # {'home_lx': 'mongodb'},
|
1282
|
-
# {'company': 'mysql'},
|
1283
|
-
# # {'nas': 'mysql'},
|
1284
|
-
# ])
|
1294
|
+
# # 新版 数据分类
|
1295
|
+
# dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1296
|
+
# dp.new_unzip(is_move=True)
|
1297
|
+
# dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
1298
|
+
# # 将 self.datas 更新至数据库
|
1299
|
+
# # dp.upload_df(service_databases=[
|
1300
|
+
# # # {'home_lx': 'mongodb'},
|
1301
|
+
# # {'company': 'mysql'},
|
1302
|
+
# # # {'nas': 'mysql'},
|
1303
|
+
# # ])
|
mdbq/aggregation/query_data.py
CHANGED
@@ -1535,7 +1535,7 @@ def main():
|
|
1535
1535
|
|
1536
1536
|
|
1537
1537
|
if __name__ == '__main__':
|
1538
|
-
data_aggregation(service_databases=[{'
|
1538
|
+
data_aggregation(service_databases=[{'company': 'mysql'}], months=7) # 正常的聚合所有数据
|
1539
1539
|
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
|
1540
1540
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
1541
1541
|
|
mdbq/clean/data_clean.py
CHANGED
@@ -67,7 +67,7 @@ class DataClean:
|
|
67
67
|
_df.to_csv(os.path.join(_save_paths, filenames), encoding=encoding, index=False, header=True)
|
68
68
|
|
69
69
|
# @try_except
|
70
|
-
def change_and_sort(self, path=None):
|
70
|
+
def change_and_sort(self, path=None, is_except=[]):
|
71
71
|
"""数据转换"""
|
72
72
|
if not path:
|
73
73
|
path = self.path
|
@@ -87,6 +87,16 @@ class DataClean:
|
|
87
87
|
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
88
88
|
continue
|
89
89
|
|
90
|
+
is_continue = False
|
91
|
+
if is_except:
|
92
|
+
for item in is_except:
|
93
|
+
if item in os.path.join(root, name):
|
94
|
+
# print(name)
|
95
|
+
is_continue = True
|
96
|
+
break
|
97
|
+
if is_continue: # 需要排除不做处理的文件或文件夹
|
98
|
+
continue
|
99
|
+
|
90
100
|
try:
|
91
101
|
encoding = self.get_encoding(file_path=pathlib.Path(root, name))
|
92
102
|
# ----------------- 推广报表 分割线 -----------------
|
@@ -272,11 +282,14 @@ class DataClean:
|
|
272
282
|
df.insert(loc=0, column='数据周期', value=data_lis)
|
273
283
|
df.insert(loc=0, column='日期', value=date01[0])
|
274
284
|
# 2024-2-19 官方更新了推广渠道来源名称
|
285
|
+
# df['三级来源'] = df['三级来源'].apply(
|
286
|
+
# lambda x: '精准人群推广' if x == '精准人群推广(原引力魔方)'
|
287
|
+
# else '关键词推广' if x == '关键词推广(原直通车)'
|
288
|
+
# else '智能场景' if x == '智能场景(原万相台)'
|
289
|
+
# else x
|
290
|
+
# )
|
275
291
|
df['三级来源'] = df['三级来源'].apply(
|
276
|
-
lambda x: '
|
277
|
-
else '关键词推广' if x == '关键词推广(原直通车)'
|
278
|
-
else '智能场景' if x == '智能场景(原万相台)'
|
279
|
-
else x
|
292
|
+
lambda x: re.sub('(.*)', '', str(x) if x else x)
|
280
293
|
)
|
281
294
|
# df = df[df['访客数'] != '0']
|
282
295
|
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
@@ -1130,7 +1143,7 @@ class DataClean:
|
|
1130
1143
|
bib(t_path)
|
1131
1144
|
elif '_新版' in name:
|
1132
1145
|
t_path = str(pathlib.Path(self.source_path, '生意参谋/流量来源'))
|
1133
|
-
bib(t_path)
|
1146
|
+
bib(t_path, _as_month=True)
|
1134
1147
|
else:
|
1135
1148
|
t_path = str(pathlib.Path(self.source_path, '生意参谋/流量来源_旧版'))
|
1136
1149
|
bib(t_path, _as_month=True)
|
mdbq/config/products.py
CHANGED
@@ -1,20 +1,20 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=TiSMZHa9F_f6iMptzCVdukWhCzXzpcYIh3lN61P-i94,74825
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=qBNjGTxaQl6rg2-_jlJKGz_sop9UVgoNj5z75XGl_iQ,72379
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/data_clean.py,sha256=
|
12
|
+
mdbq/clean/data_clean.py,sha256=hyhLsX5UEmj2ROVScQMRdR52vUuuLE5uSG5QJ60gtQU,103176
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
14
|
mdbq/company/copysh.py,sha256=VUaaJPXPYPHWwnkdK77PWz_dAXZyEmYBA9Df1yROHAc,17764
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
|
17
|
-
mdbq/config/products.py,sha256=
|
17
|
+
mdbq/config/products.py,sha256=L1uhzdbqTprQg_rekKt0ucgpeIuMvi3H2v48_GZWPuY,5803
|
18
18
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
19
19
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
20
20
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
@@ -36,7 +36,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
36
36
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
37
37
|
mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
|
38
38
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
39
|
-
mdbq-2.0.
|
40
|
-
mdbq-2.0.
|
41
|
-
mdbq-2.0.
|
42
|
-
mdbq-2.0.
|
39
|
+
mdbq-2.0.5.dist-info/METADATA,sha256=q3s1z7iCeWS4qXY4yzg05F7K_JUDYhIp1H5Zlo-uYV4,245
|
40
|
+
mdbq-2.0.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
41
|
+
mdbq-2.0.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
42
|
+
mdbq-2.0.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|