mdbq 2.6.5__py3-none-any.whl → 2.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +1 -1
- mdbq/aggregation/query_data.py +24 -24
- mdbq/clean/clean_upload.py +9 -8
- mdbq/dataframe/converter.py +4 -1
- {mdbq-2.6.5.dist-info → mdbq-2.6.6.dist-info}/METADATA +1 -1
- {mdbq-2.6.5.dist-info → mdbq-2.6.6.dist-info}/RECORD +8 -8
- {mdbq-2.6.5.dist-info → mdbq-2.6.6.dist-info}/WHEEL +0 -0
- {mdbq-2.6.5.dist-info → mdbq-2.6.6.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1329,7 +1329,7 @@ if __name__ == '__main__':
|
|
1329
1329
|
db_name = '京东数据3'
|
1330
1330
|
table_name = '京东商智_spu_商品明细'
|
1331
1331
|
upload_dir(
|
1332
|
-
path='/Users/xigua/数据中心/原始文件3/京东报表/spu_商品明细',
|
1332
|
+
path='/Users/xigua/数据中心/原始文件3/京东报表/spu_商品明细qwqw',
|
1333
1333
|
db_name=db_name,
|
1334
1334
|
collection_name=table_name,
|
1335
1335
|
dbs={'mysql': True, 'mongodb': False},
|
mdbq/aggregation/query_data.py
CHANGED
@@ -423,7 +423,6 @@ class MysqlDatasQuery:
|
|
423
423
|
'总订单行': 1,
|
424
424
|
'总订单金额': 1,
|
425
425
|
'总加购数': 1,
|
426
|
-
'下单新客数(去重)': 1,
|
427
426
|
'领券数': 1,
|
428
427
|
'商品关注数': 1,
|
429
428
|
'店铺关注数': 1,
|
@@ -493,24 +492,26 @@ class MysqlDatasQuery:
|
|
493
492
|
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
494
493
|
|
495
494
|
@try_except
|
496
|
-
def
|
495
|
+
def se_search(self):
|
497
496
|
start_date, end_date = self.months_data(num=self.months)
|
498
497
|
projection = {
|
499
498
|
'日期': 1,
|
500
|
-
'
|
499
|
+
'店铺名称': 1,
|
500
|
+
'搜索词': 1,
|
501
|
+
'词类行': 1,
|
501
502
|
'访客数': 1,
|
503
|
+
'加购人数': 1,
|
504
|
+
'商品收藏人数': 1,
|
502
505
|
'支付转化率': 1,
|
503
|
-
'支付金额': 1,
|
504
|
-
'下单金额': 1,
|
505
506
|
'支付买家数': 1,
|
506
|
-
'
|
507
|
-
'加购人数': 1,
|
507
|
+
'支付金额': 1,
|
508
508
|
'新访客': 1,
|
509
|
-
'
|
509
|
+
'客单价': 1,
|
510
|
+
'uv价值': 1,
|
510
511
|
}
|
511
512
|
df = self.download.data_to_df(
|
512
513
|
db_name='生意参谋3',
|
513
|
-
table_name='
|
514
|
+
table_name='手淘搜索_本店引流词',
|
514
515
|
start_date=start_date,
|
515
516
|
end_date=end_date,
|
516
517
|
projection=projection,
|
@@ -1462,7 +1463,6 @@ class GroupBy:
|
|
1462
1463
|
'总订单行': ('总订单行', np.max),
|
1463
1464
|
'总订单金额': ('总订单金额', np.max),
|
1464
1465
|
'总加购数': ('总加购数', np.max),
|
1465
|
-
'下单新客数': ('下单新客数(去重)', np.max),
|
1466
1466
|
'领券数': ('领券数', np.max),
|
1467
1467
|
'商品关注数': ('商品关注数', np.max),
|
1468
1468
|
'店铺关注数': ('店铺关注数', np.max)
|
@@ -1476,17 +1476,16 @@ class GroupBy:
|
|
1476
1476
|
return df
|
1477
1477
|
elif '天猫店铺来源_手淘搜索' in table_name:
|
1478
1478
|
df = df.groupby(
|
1479
|
-
['日期', '
|
1479
|
+
['日期', '店铺名称', '搜索词'],
|
1480
1480
|
as_index=False).agg(
|
1481
1481
|
**{
|
1482
1482
|
'访客数': ('访客数', np.max),
|
1483
|
-
'
|
1483
|
+
'加购人数': ('加购人数', np.max),
|
1484
1484
|
'支付金额': ('支付金额', np.max),
|
1485
|
-
'
|
1485
|
+
'支付转化率': ('支付转化率', np.max),
|
1486
1486
|
'支付买家数': ('支付买家数', np.max),
|
1487
|
-
'
|
1488
|
-
'
|
1489
|
-
'新访客': ('新访客', np.max),
|
1487
|
+
'客单价': ('客单价', np.max),
|
1488
|
+
'uv价值': ('uv价值', np.max)
|
1490
1489
|
}
|
1491
1490
|
)
|
1492
1491
|
return df
|
@@ -2077,6 +2076,7 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2077
2076
|
2. 数据聚合清洗
|
2078
2077
|
3. 统一回传数据库: <聚合数据> (不再导出为文件)
|
2079
2078
|
公司台式机调用
|
2079
|
+
months: 1+,写 0 表示当月数据,但在每月 1 号时可能会因为返回空数据出错
|
2080
2080
|
"""
|
2081
2081
|
for service_database in service_databases:
|
2082
2082
|
for service_name, database in service_database.items():
|
@@ -2183,14 +2183,14 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2183
2183
|
'数据库名': '聚合数据',
|
2184
2184
|
'集合名': '天猫店铺来源_手淘搜索', # 暂缺
|
2185
2185
|
'唯一主键': ['日期', '关键词', '访客数'],
|
2186
|
-
'数据主体': sdq.
|
2187
|
-
},
|
2188
|
-
{
|
2189
|
-
'数据库名': '聚合数据',
|
2190
|
-
'集合名': '生意参谋_直播场次分析', # 暂缺
|
2191
|
-
'唯一主键': ['场次id'],
|
2192
|
-
'数据主体': sdq.zb_ccfx(),
|
2186
|
+
'数据主体': sdq.se_search(),
|
2193
2187
|
},
|
2188
|
+
# {
|
2189
|
+
# '数据库名': '聚合数据',
|
2190
|
+
# '集合名': '生意参谋_直播场次分析', # 暂缺
|
2191
|
+
# '唯一主键': ['场次id'],
|
2192
|
+
# '数据主体': sdq.zb_ccfx(),
|
2193
|
+
# },
|
2194
2194
|
{
|
2195
2195
|
'数据库名': '聚合数据',
|
2196
2196
|
'集合名': '多店推广场景_按日聚合',
|
@@ -2304,6 +2304,6 @@ def main():
|
|
2304
2304
|
|
2305
2305
|
|
2306
2306
|
if __name__ == '__main__':
|
2307
|
-
data_aggregation(service_databases=[{'company': 'mysql'}], months=
|
2307
|
+
data_aggregation(service_databases=[{'company': 'mysql'}], months=1, is_juhe=True) # 正常的聚合所有数据
|
2308
2308
|
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
|
2309
2309
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
mdbq/clean/clean_upload.py
CHANGED
@@ -151,7 +151,7 @@ class DataClean:
|
|
151
151
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
152
152
|
os.remove(os.path.join(root, name))
|
153
153
|
elif name.endswith('.xls') and '手淘搜索_本店引流词_' in name:
|
154
|
-
df = pd.read_excel(os.path.join(root, name), header=5)
|
154
|
+
df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
|
155
155
|
if len(df) == 0:
|
156
156
|
print(f'{name} 报表数据不能为空')
|
157
157
|
continue
|
@@ -382,7 +382,7 @@ class DataClean:
|
|
382
382
|
sheets4 = ['账户', '推广计划', '推广单元', '创意', '品牌流量包', '定向人群'] # 品销宝
|
383
383
|
file_name4 = os.path.splitext(name)[0] # 明星店铺报表
|
384
384
|
for sheet4 in sheets4:
|
385
|
-
df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='
|
385
|
+
df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='xlrd')
|
386
386
|
if len(df) == 0:
|
387
387
|
print(f'{name} 报表数据为空')
|
388
388
|
os.remove(os.path.join(root, name))
|
@@ -765,11 +765,11 @@ class DataClean:
|
|
765
765
|
continue
|
766
766
|
|
767
767
|
if name.endswith('.xlsx') and '京东推广_' in name:
|
768
|
-
df = pd.read_excel(os.path.join(root, name), header=0)
|
768
|
+
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
769
769
|
new_name = f'py_xg_{name}'
|
770
770
|
os.rename(os.path.join(root, name), os.path.join(root, new_name))
|
771
771
|
elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
|
772
|
-
df = pd.read_excel(os.path.join(root, name), header=0)
|
772
|
+
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
773
773
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
774
774
|
pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})', name)[0]
|
775
775
|
df.insert(loc=0, column='日期', value=pattern)
|
@@ -780,7 +780,7 @@ class DataClean:
|
|
780
780
|
index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
781
781
|
os.remove(os.path.join(root, name))
|
782
782
|
elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
|
783
|
-
df = pd.read_excel(os.path.join(root, name), header=0)
|
783
|
+
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
784
784
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
785
785
|
pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})', name)[0]
|
786
786
|
df.insert(loc=0, column='日期', value=pattern)
|
@@ -791,7 +791,7 @@ class DataClean:
|
|
791
791
|
index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
792
792
|
os.remove(os.path.join(root, name))
|
793
793
|
elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
|
794
|
-
df = pd.read_excel(os.path.join(root, name), header=0)
|
794
|
+
df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
|
795
795
|
df.replace(to_replace=['-'], value='', regex=False, inplace=True)
|
796
796
|
df.rename(columns={'时间': '日期'}, inplace=True)
|
797
797
|
for col in df.columns.tolist():
|
@@ -870,7 +870,7 @@ class DataClean:
|
|
870
870
|
|
871
871
|
if name.endswith('.xlsx') and '商品素材_' in name:
|
872
872
|
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
873
|
-
df = pd.read_excel(os.path.join(root, name), header=0)
|
873
|
+
df = pd.read_excel(os.path.join(root, name), header=0, engine='xlrd')
|
874
874
|
if '日期' not in df.columns.tolist():
|
875
875
|
df.insert(loc=0, column='日期', value=datetime.datetime.today().strftime('%Y-%m-%d'))
|
876
876
|
if '店铺名称' not in df.columns.tolist():
|
@@ -1276,7 +1276,7 @@ class DataClean:
|
|
1276
1276
|
new_path = os.path.join(root, zip_name_1) # 拼接解压后的文件路径
|
1277
1277
|
if os.path.isfile(new_path) and '全部渠道_商品明细' in new_path: # 是否存在和包内同名的文件
|
1278
1278
|
# 专门处理京东文件, 已过期可删
|
1279
|
-
df = pd.read_excel(new_path)
|
1279
|
+
df = pd.read_excel(new_path, engine='xlrd')
|
1280
1280
|
try:
|
1281
1281
|
pattern1 = re.findall(r'\d{8}_(\d{4})(\d{2})(\d{2})_全部渠道_商品明细',
|
1282
1282
|
name)
|
@@ -1564,3 +1564,4 @@ if __name__ == '__main__':
|
|
1564
1564
|
|
1565
1565
|
|
1566
1566
|
# test()
|
1567
|
+
|
mdbq/dataframe/converter.py
CHANGED
@@ -37,6 +37,7 @@ class DataFrameConverter(object):
|
|
37
37
|
df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
|
38
38
|
df.replace(to_replace=['"'], value='', regex=True, inplace=True)
|
39
39
|
cols = df.columns.tolist()
|
40
|
+
|
40
41
|
df.reset_index(inplace=True, drop=True) # 重置索引,避免下面的 df.loc[0, col] 会出错
|
41
42
|
|
42
43
|
for col in cols:
|
@@ -81,7 +82,9 @@ class DataFrameConverter(object):
|
|
81
82
|
df[col] = df[col].apply(lambda x: pd.to_datetime(x))
|
82
83
|
except:
|
83
84
|
pass
|
84
|
-
new_col = col.lower()
|
85
|
+
new_col = re.sub(r'[()()-,,$%&~^、* ]', '_', col.lower())
|
86
|
+
new_col = re.sub(r'_{2,}', '_', new_col)
|
87
|
+
new_col = re.sub(r'_+$', '', new_col)
|
85
88
|
df.rename(columns={col: new_col}, inplace=True)
|
86
89
|
df.fillna(0, inplace=True)
|
87
90
|
return df
|
@@ -1,15 +1,15 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=aAAYq3-I4dqqXFFGwznihDl9ELajfi1NTIFdPFJ0Z_0,76606
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=Fmh7Z5IRbDhoRZLXcNbln2zLFwhiuOwfj2cm_3A5QhU,102588
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=
|
12
|
+
mdbq/clean/clean_upload.py,sha256=_lb4fdFh_VNM6112HuqIF7cFv_GSyiO2_XnugXUxLDg,81266
|
13
13
|
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
14
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
15
|
mdbq/company/copysh.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
|
@@ -20,7 +20,7 @@ mdbq/config/products.py,sha256=hN9UMkM6j76HYMulTYdtr3mOhh9QdpvvrLH14a_mbFY,5980
|
|
20
20
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
21
21
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
22
22
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
23
|
-
mdbq/dataframe/converter.py,sha256=
|
23
|
+
mdbq/dataframe/converter.py,sha256=3n3_FKBxv7bFWeRcmv9CfiApFXuvvbRwZxTwR-SLGzU,4461
|
24
24
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
25
25
|
mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
26
26
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
@@ -42,7 +42,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
42
42
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
43
43
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
44
44
|
mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
|
45
|
-
mdbq-2.6.
|
46
|
-
mdbq-2.6.
|
47
|
-
mdbq-2.6.
|
48
|
-
mdbq-2.6.
|
45
|
+
mdbq-2.6.6.dist-info/METADATA,sha256=tXG5JRRfO8htraf6iL6Tz7EjxAPw_tTVc6xgrYPyIzg,245
|
46
|
+
mdbq-2.6.6.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
47
|
+
mdbq-2.6.6.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
48
|
+
mdbq-2.6.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|