mdbq 2.3.6__py3-none-any.whl → 2.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/query_data.py +166 -1
- mdbq/company/copysh.py +7 -2
- {mdbq-2.3.6.dist-info → mdbq-2.3.7.dist-info}/METADATA +1 -1
- {mdbq-2.3.6.dist-info → mdbq-2.3.7.dist-info}/RECORD +6 -6
- {mdbq-2.3.6.dist-info → mdbq-2.3.7.dist-info}/WHEEL +0 -0
- {mdbq-2.3.6.dist-info → mdbq-2.3.7.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# -*- coding: UTF-8 –*-
|
2
2
|
import re
|
3
|
+
from unittest.mock import inplace
|
3
4
|
|
4
5
|
from mdbq.mongo import mongo
|
5
6
|
from mdbq.mysql import mysql
|
@@ -1003,6 +1004,23 @@ class GroupBy:
|
|
1003
1004
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
1004
1005
|
df['是否品牌词'] = df['词名字/词包名字'].str.contains('万里马|wanlima', regex=True)
|
1005
1006
|
df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '')
|
1007
|
+
dir_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\分类配置文件.xlsx'
|
1008
|
+
dir_file2 = '/Volumes/时尚事业部/01.运营部/0-电商周报-每周五更新/分类配置文件.xlsx'
|
1009
|
+
if not os.path.isfile(dir_file):
|
1010
|
+
dir_file = dir_file2
|
1011
|
+
if os.path.isfile(dir_file):
|
1012
|
+
df_fl = pd.read_excel(dir_file, sheet_name='关键词分类', header=0)
|
1013
|
+
df_fl.rename(columns={'分类1': '词分类'}, inplace=True)
|
1014
|
+
df_fl = df_fl[['关键词', '词分类']]
|
1015
|
+
df = pd.merge(df, df_fl, left_on=['词名字/词包名字'], right_on=['关键词'], how='left')
|
1016
|
+
df.pop('关键词')
|
1017
|
+
df['词分类'].fillna('', inplace=True)
|
1018
|
+
if '词分类' in df.columns.tolist():
|
1019
|
+
df['词分类'] = df.apply(lambda x: self.ret_keyword(keyword=str(x['词名字/词包名字']), as_file=False) if x['词分类'] == '' else x['词分类'], axis=1)
|
1020
|
+
else:
|
1021
|
+
df['词分类'] = df['词名字/词包名字'].apply(lambda x: self.ret_keyword(keyword=str(x), as_file=False))
|
1022
|
+
# df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
|
1023
|
+
# breakpoint()
|
1006
1024
|
return df
|
1007
1025
|
elif '天猫_超级直播' in table_name:
|
1008
1026
|
df.rename(columns={
|
@@ -1389,6 +1407,153 @@ class GroupBy:
|
|
1389
1407
|
print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
|
1390
1408
|
return pd.DataFrame({})
|
1391
1409
|
|
1410
|
+
def ret_keyword(self, keyword, as_file=False):
|
1411
|
+
datas = [
|
1412
|
+
{
|
1413
|
+
'类别': '品牌词',
|
1414
|
+
'值': [
|
1415
|
+
'万里马',
|
1416
|
+
'wanlima',
|
1417
|
+
'fion',
|
1418
|
+
'菲安妮',
|
1419
|
+
'迪桑娜',
|
1420
|
+
'dissona',
|
1421
|
+
'hr',
|
1422
|
+
'vh',
|
1423
|
+
'songmont',
|
1424
|
+
'vanessahogan',
|
1425
|
+
'dilaks',
|
1426
|
+
'khdesign',
|
1427
|
+
'peco',
|
1428
|
+
'giimmii',
|
1429
|
+
'cassile',
|
1430
|
+
'grotto',
|
1431
|
+
'why',
|
1432
|
+
'roulis',
|
1433
|
+
'lesschic',
|
1434
|
+
'amazing song',
|
1435
|
+
'mytaste',
|
1436
|
+
'bagtree',
|
1437
|
+
'红谷',
|
1438
|
+
'hongu',
|
1439
|
+
]
|
1440
|
+
},
|
1441
|
+
{
|
1442
|
+
'类别': '智选',
|
1443
|
+
'值': [
|
1444
|
+
'智选',
|
1445
|
+
]
|
1446
|
+
},
|
1447
|
+
{
|
1448
|
+
'类别': '年份',
|
1449
|
+
'值': [
|
1450
|
+
'20',
|
1451
|
+
]
|
1452
|
+
},
|
1453
|
+
{
|
1454
|
+
'类别': '材质',
|
1455
|
+
'值': [
|
1456
|
+
'皮',
|
1457
|
+
'牛仔',
|
1458
|
+
'丹宁',
|
1459
|
+
'帆布',
|
1460
|
+
]
|
1461
|
+
},
|
1462
|
+
{
|
1463
|
+
'类别': '季节',
|
1464
|
+
'值': [
|
1465
|
+
'春',
|
1466
|
+
'夏',
|
1467
|
+
'秋',
|
1468
|
+
'冬',
|
1469
|
+
]
|
1470
|
+
},
|
1471
|
+
{
|
1472
|
+
'类别': '款式',
|
1473
|
+
'值': [
|
1474
|
+
'水桶',
|
1475
|
+
'托特',
|
1476
|
+
'腋下',
|
1477
|
+
'小方',
|
1478
|
+
'通用款',
|
1479
|
+
'手拿',
|
1480
|
+
'马鞍',
|
1481
|
+
'链条',
|
1482
|
+
'菜篮',
|
1483
|
+
'hobo',
|
1484
|
+
'波士顿',
|
1485
|
+
'凯莉',
|
1486
|
+
'饺子',
|
1487
|
+
'盒子',
|
1488
|
+
'牛角',
|
1489
|
+
'公文',
|
1490
|
+
'月牙',
|
1491
|
+
'单肩',
|
1492
|
+
'枕头',
|
1493
|
+
'斜挎',
|
1494
|
+
'手提',
|
1495
|
+
'手拎',
|
1496
|
+
'拎手',
|
1497
|
+
'斜肩',
|
1498
|
+
'棒球',
|
1499
|
+
'饺包',
|
1500
|
+
'保龄球',
|
1501
|
+
'戴妃',
|
1502
|
+
'半月',
|
1503
|
+
'弯月',
|
1504
|
+
'法棍',
|
1505
|
+
'流浪',
|
1506
|
+
'拎包',
|
1507
|
+
'中式',
|
1508
|
+
'手挽',
|
1509
|
+
'皮带',
|
1510
|
+
'眼镜',
|
1511
|
+
'斜跨',
|
1512
|
+
'律师',
|
1513
|
+
'斜背',
|
1514
|
+
]
|
1515
|
+
},
|
1516
|
+
{
|
1517
|
+
'类别': '品类词',
|
1518
|
+
'值': [
|
1519
|
+
'老花',
|
1520
|
+
'包包',
|
1521
|
+
'通勤',
|
1522
|
+
'高级感',
|
1523
|
+
'轻奢',
|
1524
|
+
'包',
|
1525
|
+
'新款',
|
1526
|
+
'小众',
|
1527
|
+
'爆款',
|
1528
|
+
'工作',
|
1529
|
+
'精致',
|
1530
|
+
'奢侈',
|
1531
|
+
'袋',
|
1532
|
+
],
|
1533
|
+
},
|
1534
|
+
]
|
1535
|
+
if as_file:
|
1536
|
+
with open(os.path.join(self.output, f'分类配置.json'), 'w') as f:
|
1537
|
+
json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
|
1538
|
+
breakpoint()
|
1539
|
+
result = ''
|
1540
|
+
res = []
|
1541
|
+
is_continue = False
|
1542
|
+
for data in datas:
|
1543
|
+
for item in data['值']:
|
1544
|
+
if item == '20':
|
1545
|
+
pattern = r'\d\d'
|
1546
|
+
res = re.findall(f'{item}{pattern}', str(keyword), re.IGNORECASE)
|
1547
|
+
else:
|
1548
|
+
res = re.findall(item, str(keyword), re.IGNORECASE)
|
1549
|
+
if res:
|
1550
|
+
result = data['类别']
|
1551
|
+
is_continue = True
|
1552
|
+
break
|
1553
|
+
if is_continue:
|
1554
|
+
break
|
1555
|
+
return result
|
1556
|
+
|
1392
1557
|
# @try_except
|
1393
1558
|
def performance(self, bb_tg=True):
|
1394
1559
|
# print(self.data_tgyj)
|
@@ -1875,6 +2040,6 @@ def main():
|
|
1875
2040
|
|
1876
2041
|
|
1877
2042
|
if __name__ == '__main__':
|
1878
|
-
data_aggregation(service_databases=[{'company': 'mysql'}], months=
|
2043
|
+
data_aggregation(service_databases=[{'company': 'mysql'}], months=0) # 正常的聚合所有数据
|
1879
2044
|
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
|
1880
2045
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
mdbq/company/copysh.py
CHANGED
@@ -265,8 +265,13 @@ class TbFiles:
|
|
265
265
|
time.sleep(5)
|
266
266
|
|
267
267
|
# 临时加的
|
268
|
-
excel_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\
|
269
|
-
|
268
|
+
# excel_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\0-WLM_运营周报-1012输出.xlsx'
|
269
|
+
dir_files = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新'
|
270
|
+
files = os.listdir(dir_files)
|
271
|
+
for file in files:
|
272
|
+
if file.endswith('.xlsx') and '0-WLM_运营周报' in file and '~' not in file and 'baidu' not in file:
|
273
|
+
excel_file = os.path.join(dir_files, file)
|
274
|
+
r.refresh_excel(file=excel_file)
|
270
275
|
|
271
276
|
self.before_max_time = self.check_change() # 重置值, 避免重复同步
|
272
277
|
|
@@ -5,13 +5,13 @@ mdbq/aggregation/aggregation.py,sha256=CS_gMBwPKQo7uId8BrsYNTjtCZKzRVV7gT4PfE2Q4
|
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=Wis40oL04M7E1pkvgNPjyVFAUe-zgjimjIVAikxYY8Y,4418
|
8
|
-
mdbq/aggregation/query_data.py,sha256
|
8
|
+
mdbq/aggregation/query_data.py,sha256=sOsDzZM5C2w82SgCNbJGKYIvHNDmTb-NSQYGdJYjP10,93287
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
12
|
mdbq/clean/data_clean.py,sha256=QvbM_mRHvUbwvJu0UpJu4SUQNxLWsFl7QApoq6cPiVU,104788
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
|
-
mdbq/company/copysh.py,sha256=
|
14
|
+
mdbq/company/copysh.py,sha256=8LL9uPkY0L3XGxTM8G7tROaQX3YzDq5vaE8U1FLe_n0,19714
|
15
15
|
mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
|
16
16
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
|
@@ -41,7 +41,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
41
41
|
mdbq/req_post/req_tb.py,sha256=0gVmmVewIpsJVEXkEEW0UeSSPqE9iAyLpMS0P4qjlX0,35270
|
42
42
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
43
43
|
mdbq/spider/aikucun.py,sha256=fszwS5jml5-S98u5FurBKVS2L4O-0wXFdU6CsGqbWrE,14104
|
44
|
-
mdbq-2.3.
|
45
|
-
mdbq-2.3.
|
46
|
-
mdbq-2.3.
|
47
|
-
mdbq-2.3.
|
44
|
+
mdbq-2.3.7.dist-info/METADATA,sha256=q_SXglctvCzavKdB__xkwQQI4v22pXvtB6cc1YDl8Jk,245
|
45
|
+
mdbq-2.3.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
46
|
+
mdbq-2.3.7.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
47
|
+
mdbq-2.3.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|