mdbq 2.6.9__py3-none-any.whl → 2.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +32 -28
- mdbq/clean/clean_upload.py +24 -15
- {mdbq-2.6.9.dist-info → mdbq-2.7.1.dist-info}/METADATA +2 -2
- {mdbq-2.6.9.dist-info → mdbq-2.7.1.dist-info}/RECORD +6 -6
- {mdbq-2.6.9.dist-info → mdbq-2.7.1.dist-info}/WHEEL +1 -1
- {mdbq-2.6.9.dist-info → mdbq-2.7.1.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# -*- coding:utf-8 -*-
|
2
2
|
import warnings
|
3
|
+
from unittest.mock import inplace
|
4
|
+
|
3
5
|
import pandas as pd
|
4
6
|
import numpy as np
|
5
7
|
import chardet
|
@@ -1288,29 +1290,40 @@ def file_dir(one_file=True, target_service='company'):
|
|
1288
1290
|
|
1289
1291
|
|
1290
1292
|
def test():
|
1291
|
-
path = '
|
1293
|
+
path = os.path.relpath(r'C:\Users\Administrator\Downloads\JD商品明细sku')
|
1292
1294
|
for root, dirs, files in os.walk(path, topdown=False):
|
1293
1295
|
for name in files:
|
1294
1296
|
if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
|
1297
|
+
print(name)
|
1298
|
+
# df = pd.read_excel(os.path.join(root, name), header=0)
|
1295
1299
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1296
|
-
|
1297
|
-
|
1300
|
+
cols = df.columns.tolist()
|
1301
|
+
if '店铺名称' not in cols:
|
1302
|
+
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
1303
|
+
if '曝光量' in cols:
|
1304
|
+
df.rename(columns={
|
1305
|
+
'曝光量': '搜索曝光量',
|
1306
|
+
'点击次数': '搜索点击次数',
|
1307
|
+
'点击率': '搜索点击率',
|
1308
|
+
}, inplace=True)
|
1309
|
+
if '取消金额' in cols:
|
1310
|
+
df.rename(columns={
|
1311
|
+
'取消金额': '取消及售后退款金额',
|
1312
|
+
'取消商品件数': '取消及售后退款件数',
|
1313
|
+
'取消单量': '取消及售后退款单量',
|
1314
|
+
}, inplace=True)
|
1315
|
+
if '取消及售后退款金额' not in cols:
|
1316
|
+
df['取消及售后退款金额'] = '0.0'
|
1317
|
+
df['取消及售后退款件数'] = 0
|
1318
|
+
df['取消及售后退款单量'] = 0
|
1298
1319
|
df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
|
1320
|
+
# new_name = f'{os.path.splitext(name)[0]}.xlsx'
|
1321
|
+
# df.to_excel(os.path.join(root, name),
|
1322
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
1299
1323
|
# break
|
1300
1324
|
# break
|
1301
1325
|
|
1302
1326
|
|
1303
|
-
def test2():
|
1304
|
-
dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1305
|
-
dp.new_unzip(is_move=True)
|
1306
|
-
dp.cleaning(is_move=False, ) # 清洗数据, 存入 self.datas
|
1307
|
-
dp.upload_df(service_databases=[
|
1308
|
-
# {'home_lx': 'mongodb'},
|
1309
|
-
{'company': 'mysql'},
|
1310
|
-
# {'nas': 'mysql'}
|
1311
|
-
], path=None, service_name=None)
|
1312
|
-
|
1313
|
-
|
1314
1327
|
if __name__ == '__main__':
|
1315
1328
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1316
1329
|
print(username, password, host, port)
|
@@ -1326,24 +1339,15 @@ if __name__ == '__main__':
|
|
1326
1339
|
# )
|
1327
1340
|
|
1328
1341
|
# 上传一个目录到指定数据库
|
1329
|
-
db_name = '
|
1330
|
-
table_name = '
|
1342
|
+
db_name = '京东数据3'
|
1343
|
+
table_name = '京东商智_spu_商品明细'
|
1331
1344
|
upload_dir(
|
1332
|
-
path='
|
1345
|
+
path=os.path.relpath(r'C:\同步空间\BaiduSyncdisk\原始文件3\京东报表\京东商智_spu_商品明细'),
|
1333
1346
|
db_name=db_name,
|
1334
1347
|
collection_name=table_name,
|
1335
1348
|
dbs={'mysql': True, 'mongodb': False},
|
1336
|
-
target_service='
|
1349
|
+
target_service='home_lx',
|
1337
1350
|
)
|
1338
1351
|
|
1339
1352
|
|
1340
|
-
#
|
1341
|
-
# dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
1342
|
-
# dp.new_unzip(is_move=True)
|
1343
|
-
# dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
1344
|
-
# # 将 self.datas 更新至数据库
|
1345
|
-
# # dp.upload_df(service_databases=[
|
1346
|
-
# # # {'home_lx': 'mongodb'},
|
1347
|
-
# # {'company': 'mysql'},
|
1348
|
-
# # # {'nas': 'mysql'},
|
1349
|
-
# # ])
|
1353
|
+
# test()
|
mdbq/clean/clean_upload.py
CHANGED
@@ -145,6 +145,7 @@ class DataClean:
|
|
145
145
|
df = pd.read_excel(os.path.join(root, name), header=4)
|
146
146
|
if len(df) == 0:
|
147
147
|
print(f'{name} 报表数据不能为空')
|
148
|
+
os.remove(os.path.join(root, name))
|
148
149
|
continue
|
149
150
|
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
150
151
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -159,6 +160,7 @@ class DataClean:
|
|
159
160
|
df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
|
160
161
|
if len(df) == 0:
|
161
162
|
print(f'{name} 报表数据不能为空')
|
163
|
+
os.remove(os.path.join(root, name))
|
162
164
|
continue
|
163
165
|
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
164
166
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -255,6 +257,7 @@ class DataClean:
|
|
255
257
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
256
258
|
if len(df) == 0:
|
257
259
|
print(f'{name} 报表数据为空')
|
260
|
+
os.remove(os.path.join(root, name))
|
258
261
|
continue
|
259
262
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
260
263
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
@@ -263,6 +266,7 @@ class DataClean:
|
|
263
266
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
264
267
|
if len(df) == 0:
|
265
268
|
print(f'{name} 报表数据为空')
|
269
|
+
os.remove(os.path.join(root, name))
|
266
270
|
continue
|
267
271
|
for col in df.columns.tolist():
|
268
272
|
if '(' in col or ')' in col:
|
@@ -563,6 +567,7 @@ class DataClean:
|
|
563
567
|
name_st = re.findall(r'([\u4e00-\u9fa5]+)\(分日', name)
|
564
568
|
if not name_st:
|
565
569
|
print(f'{name} 正则提取文件名失败')
|
570
|
+
os.remove(os.path.join(root, name))
|
566
571
|
continue
|
567
572
|
encoding = self.get_encoding(file_path=os.path.join(root, name))
|
568
573
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
@@ -802,8 +807,9 @@ class DataClean:
|
|
802
807
|
if not is_continue:
|
803
808
|
continue
|
804
809
|
|
805
|
-
if name.endswith('.
|
806
|
-
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
810
|
+
if name.endswith('.csv') and '京东推广_' in name:
|
811
|
+
# df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
812
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
807
813
|
new_name = f'py_xg_{name}'
|
808
814
|
os.rename(os.path.join(root, name), os.path.join(root, new_name))
|
809
815
|
elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
|
@@ -813,9 +819,10 @@ class DataClean:
|
|
813
819
|
df.insert(loc=0, column='日期', value=pattern)
|
814
820
|
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
815
821
|
df.fillna(0, inplace=True)
|
816
|
-
new_name = f'py_xg_{name}'
|
817
|
-
df.
|
818
|
-
|
822
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
823
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
824
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
825
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
819
826
|
os.remove(os.path.join(root, name))
|
820
827
|
elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
|
821
828
|
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
@@ -824,9 +831,10 @@ class DataClean:
|
|
824
831
|
df.insert(loc=0, column='日期', value=pattern)
|
825
832
|
df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
826
833
|
df.fillna(0, inplace=True)
|
827
|
-
new_name = f'py_xg_{name}'
|
828
|
-
df.
|
829
|
-
|
834
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
835
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
836
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
837
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
830
838
|
os.remove(os.path.join(root, name))
|
831
839
|
elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
|
832
840
|
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
@@ -836,9 +844,10 @@ class DataClean:
|
|
836
844
|
if '环比' in col or '同比' in col:
|
837
845
|
df.drop(col, axis=1, inplace=True)
|
838
846
|
df.fillna(0, inplace=True)
|
839
|
-
new_name = f'py_xg_{name}'
|
840
|
-
df.
|
841
|
-
|
847
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
848
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
|
849
|
+
# df.to_excel(os.path.join(upload_path, new_name),
|
850
|
+
# index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
842
851
|
os.remove(os.path.join(root, name))
|
843
852
|
|
844
853
|
# 将数据传入 self.datas 等待更新进数据库
|
@@ -1119,10 +1128,10 @@ class DataClean:
|
|
1119
1128
|
continue
|
1120
1129
|
|
1121
1130
|
if name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
|
1122
|
-
t_path = os.path.join(self.source_path, '京东报表', '
|
1131
|
+
t_path = os.path.join(self.source_path, '京东报表', '京东商智_spu_商品明细')
|
1123
1132
|
bib(t_path, _as_month=True)
|
1124
1133
|
elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
|
1125
|
-
t_path = os.path.join(self.source_path, '京东报表', '
|
1134
|
+
t_path = os.path.join(self.source_path, '京东报表', '京东商智_sku_商品明细')
|
1126
1135
|
bib(t_path, _as_month=True)
|
1127
1136
|
elif name.endswith('.xlsx') and '京东推广_搜索词' in name:
|
1128
1137
|
t_path = os.path.join(self.source_path, '京东报表', '搜索词报表')
|
@@ -1646,8 +1655,8 @@ def main(service_databases=None, is_mysql=False):
|
|
1646
1655
|
if __name__ == '__main__':
|
1647
1656
|
main(
|
1648
1657
|
service_databases = [
|
1649
|
-
{'company': 'mysql'},
|
1650
|
-
|
1658
|
+
# {'company': 'mysql'},
|
1659
|
+
{'home_lx': 'mysql'},
|
1651
1660
|
# {'home_lx': 'mongodb'},
|
1652
1661
|
# {'nas': 'mysql'},
|
1653
1662
|
],
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=wR_rHSu3srNBZPKng-7c3L_FKAuj6cL7GVwTCOAleH4,77125
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
|
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=m7Y2xSazPYKvy51yPK6n_Izsv5cjV83oHsiNc7N4fy
|
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=
|
12
|
+
mdbq/clean/clean_upload.py,sha256=_weFInJnBNZxqErIBHt_10SoMLLT5PIV_j_6n84Q_Y8,87490
|
13
13
|
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
14
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
15
|
mdbq/company/copysh.py,sha256=sisL5eo3D5HGGYvRw46xGqnqFaI3SxfBnoa-Y7zknus,17541
|
@@ -44,7 +44,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
44
44
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
45
45
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
46
46
|
mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
|
47
|
-
mdbq-2.
|
48
|
-
mdbq-2.
|
49
|
-
mdbq-2.
|
50
|
-
mdbq-2.
|
47
|
+
mdbq-2.7.1.dist-info/METADATA,sha256=Dtp6f3EYkLh9ML8akrYeEZ0h6qzcdL1XkYfv2CHkHnM,243
|
48
|
+
mdbq-2.7.1.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
49
|
+
mdbq-2.7.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
50
|
+
mdbq-2.7.1.dist-info/RECORD,,
|
File without changes
|