mdbq 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +21 -2
- mdbq/clean/data_clean.py +11 -1
- {mdbq-2.0.4.dist-info → mdbq-2.0.5.dist-info}/METADATA +1 -1
- {mdbq-2.0.4.dist-info → mdbq-2.0.5.dist-info}/RECORD +6 -6
- {mdbq-2.0.4.dist-info → mdbq-2.0.5.dist-info}/WHEEL +0 -0
- {mdbq-2.0.4.dist-info → mdbq-2.0.5.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -39,10 +39,11 @@ class DatabaseUpdate:
|
|
39
39
|
self.datas: list = [] # 带更新进数据库的数据集合
|
40
40
|
self.start_date = '2022-01-01' # 日期表的起始日期
|
41
41
|
|
42
|
-
def cleaning(self, is_move=True):
|
42
|
+
def cleaning(self, is_move=True, is_except=[]):
|
43
43
|
"""
|
44
44
|
数据清洗, 返回包含 数据库名, 集合名称, 和 df 主体
|
45
45
|
修改 cleaning 时,要同步 support 下的 标题对照表.csv
|
46
|
+
is_except: 需要排除不做处理的文件或文件夹
|
46
47
|
"""
|
47
48
|
if not os.path.exists(self.path):
|
48
49
|
print(f'1.1.0 初始化时传入了不存在的目录: {self.path}')
|
@@ -62,6 +63,16 @@ class DatabaseUpdate:
|
|
62
63
|
check_remove_file = False # 设置这个参数的目的: 避免误删其他文件, 不是本程序数据清洗覆盖的文件不做干预
|
63
64
|
if '~$' in name or '.DS' in name or '.localized' in name or '.ini' in name or '$RECYCLE.BIN' in name or 'Icon' in name:
|
64
65
|
continue
|
66
|
+
is_continue = False
|
67
|
+
if is_except:
|
68
|
+
for item in is_except:
|
69
|
+
if item in os.path.join(root, name):
|
70
|
+
# print(name)
|
71
|
+
is_continue = True
|
72
|
+
break
|
73
|
+
if is_continue: # 需要排除不做处理的文件或文件夹
|
74
|
+
continue
|
75
|
+
|
65
76
|
db_name = None # 初始化/重置变量,避免进入下一个循环
|
66
77
|
collection_name = None
|
67
78
|
for data in datas: # 根据标题对照表适配 db_name 和 collection_name
|
@@ -755,7 +766,15 @@ class DatabaseUpdate:
|
|
755
766
|
for name in files:
|
756
767
|
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
757
768
|
continue
|
758
|
-
|
769
|
+
is_continue = False
|
770
|
+
if is_except:
|
771
|
+
for item in is_except:
|
772
|
+
if item in os.path.join(root, name):
|
773
|
+
# print(name)
|
774
|
+
is_continue = True
|
775
|
+
break
|
776
|
+
if is_continue: # 需要排除不做处理的文件或文件夹
|
777
|
+
continue
|
759
778
|
db_name = None # 初始化/重置变量,避免进入下一个循环
|
760
779
|
collection_name = None
|
761
780
|
for data in datas: # 根据标题对照表适配 db_name 和 collection_name
|
mdbq/clean/data_clean.py
CHANGED
@@ -67,7 +67,7 @@ class DataClean:
|
|
67
67
|
_df.to_csv(os.path.join(_save_paths, filenames), encoding=encoding, index=False, header=True)
|
68
68
|
|
69
69
|
# @try_except
|
70
|
-
def change_and_sort(self, path=None):
|
70
|
+
def change_and_sort(self, path=None, is_except=[]):
|
71
71
|
"""数据转换"""
|
72
72
|
if not path:
|
73
73
|
path = self.path
|
@@ -87,6 +87,16 @@ class DataClean:
|
|
87
87
|
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
88
88
|
continue
|
89
89
|
|
90
|
+
is_continue = False
|
91
|
+
if is_except:
|
92
|
+
for item in is_except:
|
93
|
+
if item in os.path.join(root, name):
|
94
|
+
# print(name)
|
95
|
+
is_continue = True
|
96
|
+
break
|
97
|
+
if is_continue: # 需要排除不做处理的文件或文件夹
|
98
|
+
continue
|
99
|
+
|
90
100
|
try:
|
91
101
|
encoding = self.get_encoding(file_path=pathlib.Path(root, name))
|
92
102
|
# ----------------- 推广报表 分割线 -----------------
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=TiSMZHa9F_f6iMptzCVdukWhCzXzpcYIh3lN61P-i94,74825
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
|
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=qBNjGTxaQl6rg2-_jlJKGz_sop9UVgoNj5z75XGl_i
|
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/data_clean.py,sha256=
|
12
|
+
mdbq/clean/data_clean.py,sha256=hyhLsX5UEmj2ROVScQMRdR52vUuuLE5uSG5QJ60gtQU,103176
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
14
|
mdbq/company/copysh.py,sha256=VUaaJPXPYPHWwnkdK77PWz_dAXZyEmYBA9Df1yROHAc,17764
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -36,7 +36,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
36
36
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
37
37
|
mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
|
38
38
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
39
|
-
mdbq-2.0.
|
40
|
-
mdbq-2.0.
|
41
|
-
mdbq-2.0.
|
42
|
-
mdbq-2.0.
|
39
|
+
mdbq-2.0.5.dist-info/METADATA,sha256=q3s1z7iCeWS4qXY4yzg05F7K_JUDYhIp1H5Zlo-uYV4,245
|
40
|
+
mdbq-2.0.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
41
|
+
mdbq-2.0.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
42
|
+
mdbq-2.0.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|