mdbq 1.7.1__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +20 -5
- mdbq/clean/data_clean.py +17 -0
- {mdbq-1.7.1.dist-info → mdbq-1.7.2.dist-info}/METADATA +1 -1
- {mdbq-1.7.1.dist-info → mdbq-1.7.2.dist-info}/RECORD +6 -6
- {mdbq-1.7.1.dist-info → mdbq-1.7.2.dist-info}/WHEEL +0 -0
- {mdbq-1.7.1.dist-info → mdbq-1.7.2.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -530,6 +530,17 @@ class DatabaseUpdate:
|
|
530
530
|
df.drop("'当前时间'", axis=1, inplace=True)
|
531
531
|
df.rename(columns={'全站ROI': '全站roi'}, inplace=True)
|
532
532
|
df.insert(loc=1, column='产品线', value='全站营销')
|
533
|
+
elif name.endswith('.csv') and '关键词点击成交报表_pbix同步_勿删改' in name:
|
534
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
535
|
+
for col in df.columns.tolist():
|
536
|
+
if '(' in col:
|
537
|
+
new_col = re.sub('[()]', '_', col)
|
538
|
+
new_col = new_col.strip('_')
|
539
|
+
df.rename(columns={col: new_col}, inplace=True)
|
540
|
+
df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
|
541
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
542
|
+
# min_clm = str(df['日期'].min()).split(' ')[0]
|
543
|
+
# max_clm = str(df['日期'].max()).split(' ')[0]
|
533
544
|
|
534
545
|
# 商品素材,必须保持放在最后处理
|
535
546
|
elif name.endswith('xlsx'):
|
@@ -960,7 +971,9 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
960
971
|
d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
961
972
|
if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
|
962
973
|
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
963
|
-
|
974
|
+
df_sql = True,
|
975
|
+
drop_duplicates=False,
|
976
|
+
filename=name, count=f'{i}/{count}')
|
964
977
|
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
965
978
|
except Exception as e:
|
966
979
|
print(name, e)
|
@@ -1068,11 +1081,13 @@ if __name__ == '__main__':
|
|
1068
1081
|
# target_service='company',
|
1069
1082
|
# database='mysql'
|
1070
1083
|
# )
|
1071
|
-
|
1072
|
-
|
1084
|
+
|
1085
|
+
db_name = '京东数据2'
|
1086
|
+
table_name = '推广数据_关键词报表'
|
1073
1087
|
upload_dir(
|
1074
|
-
path='/Users/xigua
|
1088
|
+
path='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_关键词报表',
|
1075
1089
|
db_name=db_name,
|
1076
1090
|
collection_name=table_name,
|
1077
|
-
dbs={'mysql':
|
1091
|
+
dbs={'mysql': True, 'mongodb': False},
|
1078
1092
|
)
|
1093
|
+
|
mdbq/clean/data_clean.py
CHANGED
@@ -683,6 +683,20 @@ class DataClean:
|
|
683
683
|
m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='万相台_人群洞察')
|
684
684
|
|
685
685
|
# ----------------------- 京东数据处理分界线 -----------------------
|
686
|
+
elif name.endswith('.csv') and '关键词点击成交报表_pbix同步_勿删改' in name:
|
687
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
688
|
+
for col in df.columns.tolist():
|
689
|
+
if '(' in col:
|
690
|
+
new_col = re.sub('[()]', '_', col)
|
691
|
+
new_col = new_col.strip('_')
|
692
|
+
df.rename(columns={col: new_col}, inplace=True)
|
693
|
+
df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
|
694
|
+
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
695
|
+
min_clm = str(df['日期'].min()).split(' ')[0]
|
696
|
+
max_clm = str(df['日期'].max()).split(' ')[0]
|
697
|
+
new_name = f'京东推广关键词点击成交报表_{min_clm}_{max_clm}.csv'
|
698
|
+
self.save_to_csv(df, root, new_name)
|
699
|
+
os.remove(os.path.join(root, name))
|
686
700
|
elif name.endswith('.csv') and '营销概况_全站营销' in name:
|
687
701
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
|
688
702
|
df = df[(df['日期'] != '日期') & (df['日期'] != '汇总') & (df['日期'] != '0') & (df['花费'] != '0') & (df['花费'] != '0.00')]
|
@@ -1199,6 +1213,9 @@ class DataClean:
|
|
1199
1213
|
elif name.endswith('.csv') and '营销概况_全站营销' in name:
|
1200
1214
|
t_path = str(pathlib.Path(self.source_path, '京东报表/JD推广_全站营销报表'))
|
1201
1215
|
bib(t_path, _as_month=True)
|
1216
|
+
elif name.endswith('.csv') and '京东推广关键词点击成交报表' in name:
|
1217
|
+
t_path = str(pathlib.Path(self.source_path, '京东报表/JD推广_关键词报表'))
|
1218
|
+
bib(t_path, _as_month=True)
|
1202
1219
|
# 京东分界线 ------- 结束标记
|
1203
1220
|
|
1204
1221
|
def attribute(self, path=None, _str='商品素材导出', ):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=cydwGivXyoLzvDrbhn8BR-rBRLnY8VjJK9R16pVkR4s,62979
|
5
5
|
mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
|
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=N7y9bzmoK3hnurpA2hbYSJ6IMznj3D7NzmrlQo5gGg
|
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/data_clean.py,sha256=
|
12
|
+
mdbq/clean/data_clean.py,sha256=dVycc_wyXH96hJvAXw-lVAZV-3eCEGD-o04oHxAecHs,92545
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
14
|
mdbq/company/copysh.py,sha256=WCZ92vCJAy6_ZFeOxWL-U9gArIpyga4xts-s1wKsspY,17268
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -35,7 +35,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
35
35
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
36
36
|
mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
38
|
-
mdbq-1.7.
|
39
|
-
mdbq-1.7.
|
40
|
-
mdbq-1.7.
|
41
|
-
mdbq-1.7.
|
38
|
+
mdbq-1.7.2.dist-info/METADATA,sha256=oY3Kodabng2WfHmpryLIpCIry7d6w4Bt-F5KwwIzNfs,245
|
39
|
+
mdbq-1.7.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
40
|
+
mdbq-1.7.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
41
|
+
mdbq-1.7.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|