mdbq 2.5.6__py3-none-any.whl → 2.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/clean/clean_upload.py +47 -2
- mdbq/dataframe/converter.py +3 -3
- {mdbq-2.5.6.dist-info → mdbq-2.5.8.dist-info}/METADATA +1 -1
- {mdbq-2.5.6.dist-info → mdbq-2.5.8.dist-info}/RECORD +6 -6
- {mdbq-2.5.6.dist-info → mdbq-2.5.8.dist-info}/WHEEL +0 -0
- {mdbq-2.5.6.dist-info → mdbq-2.5.8.dist-info}/top_level.txt +0 -0
mdbq/clean/clean_upload.py
CHANGED
@@ -7,7 +7,6 @@ import zipfile
|
|
7
7
|
from pyzipper import PyZipFile
|
8
8
|
import os
|
9
9
|
import platform
|
10
|
-
import pathlib
|
11
10
|
import json
|
12
11
|
from mdbq.mongo import mongo
|
13
12
|
from mdbq.mysql import mysql
|
@@ -711,7 +710,7 @@ class DataClean:
|
|
711
710
|
_date = re.findall(r'(\d{4}-\d{2})-\d{2}', str(_name))
|
712
711
|
if _date:
|
713
712
|
_date = _date[0]
|
714
|
-
t2 =
|
713
|
+
t2 = os.path.join(t2, _date) # 添加 年月分类
|
715
714
|
if not os.path.exists(t2):
|
716
715
|
os.makedirs(t2, exist_ok=True)
|
717
716
|
old_file = os.path.join(t2, _name) # 检查目标位置是否已经存在该文件
|
@@ -1213,6 +1212,49 @@ def main(service_databases=None):
|
|
1213
1212
|
)
|
1214
1213
|
|
1215
1214
|
|
1215
|
+
def test():
|
1216
|
+
# main_key = '单元报表'
|
1217
|
+
path = f'/Users/xigua/数据中心/原始文件2/京东报表/JD流量来源13414124124'
|
1218
|
+
for root, dirs, files in os.walk(path, topdown=False):
|
1219
|
+
for name in files:
|
1220
|
+
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
1221
|
+
continue
|
1222
|
+
if 'py_xg' in name:
|
1223
|
+
continue
|
1224
|
+
if 'TM_旧表_字段' in root:
|
1225
|
+
continue
|
1226
|
+
|
1227
|
+
if name.endswith('.csv'):
|
1228
|
+
print(name)
|
1229
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1230
|
+
# if '店铺名称' not in df.columns.tolist():
|
1231
|
+
# df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
|
1232
|
+
for item in df.columns.tolist():
|
1233
|
+
if '同比' in item or '环比' in item or '时间' in item:
|
1234
|
+
df.pop(item)
|
1235
|
+
date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_\d{4}-\d{2}-\d{2}', name)[0]
|
1236
|
+
|
1237
|
+
date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
|
1238
|
+
|
1239
|
+
new_path = f'/Users/xigua/数据中心/原始文件3/京东报表/店铺来源_三级来源/{date}'
|
1240
|
+
# new_path = os.path.join(new_path, date) # 添加 年月分类
|
1241
|
+
if not os.path.exists(new_path):
|
1242
|
+
os.makedirs(new_path, exist_ok=True)
|
1243
|
+
# print(date_all)
|
1244
|
+
|
1245
|
+
new_name = f'py_xg_京东商智_店铺来源_三级来源_{date_all}.xlsx'
|
1246
|
+
# print(os.path.join(new_path, new_name))
|
1247
|
+
# breakpoint()
|
1248
|
+
# df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
|
1249
|
+
try:
|
1250
|
+
df.to_excel(os.path.join(new_path, new_name),
|
1251
|
+
index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
1252
|
+
except Exception as e:
|
1253
|
+
print(e)
|
1254
|
+
|
1255
|
+
|
1256
|
+
|
1257
|
+
|
1216
1258
|
if __name__ == '__main__':
|
1217
1259
|
main(
|
1218
1260
|
service_databases = [
|
@@ -1230,3 +1272,6 @@ if __name__ == '__main__':
|
|
1230
1272
|
# )
|
1231
1273
|
# c.sp_scene_clean(is_except=['except']) # 商品素材
|
1232
1274
|
# c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
|
1275
|
+
|
1276
|
+
|
1277
|
+
# test()
|
mdbq/dataframe/converter.py
CHANGED
@@ -47,7 +47,7 @@ class DataFrameConverter(object):
|
|
47
47
|
try:
|
48
48
|
# 百分比在某些数据库中不兼容, 转换百分比为小数, # 转百分比的列不能含有中文或特殊字符
|
49
49
|
df[col] = df[col].apply(
|
50
|
-
lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d
|
50
|
+
lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%', str(x)) else x)
|
51
51
|
except Exception as e:
|
52
52
|
print(f'留意错误信息: 位于列 -> {col} -> {e}')
|
53
53
|
|
@@ -93,6 +93,6 @@ if __name__ == '__main__':
|
|
93
93
|
# df = converter.convert_df_cols(df)
|
94
94
|
# print(df['a'].dtype)
|
95
95
|
# print(df)
|
96
|
-
pattern = '
|
97
|
-
pattern = re.
|
96
|
+
pattern = '1540%'
|
97
|
+
pattern = re.findall(r'^\d+\.?\d*%', pattern)
|
98
98
|
print(pattern)
|
@@ -9,7 +9,7 @@ mdbq/aggregation/query_data.py,sha256=WKe42Xq1Gi-ELuIT0k2jh3X4-R7heb0ub3Mj3yuCRA
|
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=
|
12
|
+
mdbq/clean/clean_upload.py,sha256=mkJvqW0ewhYelNsAKw_6ajV8eggzRFVgTYaPPYFt3Ak,66478
|
13
13
|
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
14
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
15
|
mdbq/company/copysh.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
|
@@ -20,7 +20,7 @@ mdbq/config/products.py,sha256=hN9UMkM6j76HYMulTYdtr3mOhh9QdpvvrLH14a_mbFY,5980
|
|
20
20
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
21
21
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
22
22
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
23
|
-
mdbq/dataframe/converter.py,sha256=
|
23
|
+
mdbq/dataframe/converter.py,sha256=SJLZ96f6QBxnPcaaKDi3UOLNk0b7O6aWLAyDOMiSy80,4312
|
24
24
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
25
25
|
mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
26
26
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
@@ -42,7 +42,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
42
42
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
43
43
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
44
44
|
mdbq/spider/aikucun.py,sha256=KdihSB3q44jsXUQAldfWRVfCSrEw2MNbM-_BhP_29g4,14448
|
45
|
-
mdbq-2.5.
|
46
|
-
mdbq-2.5.
|
47
|
-
mdbq-2.5.
|
48
|
-
mdbq-2.5.
|
45
|
+
mdbq-2.5.8.dist-info/METADATA,sha256=nu5MMMIj05LwXRKsmJwUs1e95SopXUec2w3znVPII7s,243
|
46
|
+
mdbq-2.5.8.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
47
|
+
mdbq-2.5.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
48
|
+
mdbq-2.5.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|