mdbq 2.0.9__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +1 -0
- mdbq/company/home_sh.py +6 -3
- mdbq/other/sku_picture.py +141 -20
- {mdbq-2.0.9.dist-info → mdbq-2.1.1.dist-info}/METADATA +1 -1
- {mdbq-2.0.9.dist-info → mdbq-2.1.1.dist-info}/RECORD +7 -7
- {mdbq-2.0.9.dist-info → mdbq-2.1.1.dist-info}/WHEEL +0 -0
- {mdbq-2.0.9.dist-info → mdbq-2.1.1.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1084,6 +1084,7 @@ class DatabaseUpdate:
|
|
1084
1084
|
if '预算' in col:
|
1085
1085
|
df.rename(columns={col: '预算占比'}, inplace=True)
|
1086
1086
|
df = df[['商品id', '商家编码', '预算占比']]
|
1087
|
+
df['日期'] = datetime.datetime.now().strftime('%Y-%m-%d')
|
1087
1088
|
for service_database in service_databases:
|
1088
1089
|
for service_name, database in service_database.items():
|
1089
1090
|
username, password, host, port = get_myconf.select_config_values(
|
mdbq/company/home_sh.py
CHANGED
@@ -23,11 +23,14 @@ from mdbq.mysql import mysql
|
|
23
23
|
if platform.system() == 'Windows':
|
24
24
|
from mdbq.pbix import refresh_all
|
25
25
|
warnings.filterwarnings('ignore')
|
26
|
+
"""
|
27
|
+
除公司台式机外,其他主机执行下载更新任务
|
28
|
+
"""
|
26
29
|
|
27
30
|
|
28
31
|
class TbFiles:
|
29
32
|
"""
|
30
|
-
|
33
|
+
用于定时同步pandas数据源文件到共享
|
31
34
|
"""
|
32
35
|
def __init__(self):
|
33
36
|
|
@@ -108,7 +111,7 @@ class TbFiles:
|
|
108
111
|
f.write(f'data_path = {self.data_path}\n\n')
|
109
112
|
f.write('# 共享目录\n')
|
110
113
|
f.write(f'share_path = {self.share_path}\n\n')
|
111
|
-
f.write('#
|
114
|
+
f.write('# 用于触发下载百度云文件,更新至本机数据库\n')
|
112
115
|
f.write(f'home_record = False\n\n')
|
113
116
|
print('目录初始化!')
|
114
117
|
|
@@ -287,7 +290,7 @@ class UpdateMysql:
|
|
287
290
|
self.d_path = None
|
288
291
|
|
289
292
|
def check_date(self):
|
290
|
-
"""
|
293
|
+
""" 检查文件中的 home_record 值,决定是否执行更新"""
|
291
294
|
config = configparser.ConfigParser() # 初始化configparser类
|
292
295
|
try:
|
293
296
|
config.read(self.my_conf, 'UTF-8')
|
mdbq/other/sku_picture.py
CHANGED
@@ -31,11 +31,13 @@ from openpyxl.utils import get_column_letter
|
|
31
31
|
warnings.filterwarnings('ignore')
|
32
32
|
|
33
33
|
if platform.system() == 'Windows':
|
34
|
-
|
35
|
-
|
34
|
+
D_PATH = os.path.normpath(f'C:\\Users\\{getpass.getuser()}\\Downloads')
|
35
|
+
Share_Path = os.path.normpath(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表') # 共享文件根目录
|
36
36
|
elif platform.system() == 'Darwin':
|
37
|
-
|
37
|
+
D_PATH = os.path.normpath(f'/Users/{getpass.getuser()}/Downloads')
|
38
|
+
Share_Path = os.path.normpath('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
38
39
|
else:
|
40
|
+
D_PATH = 'Downloads'
|
39
41
|
Share_Path = ''
|
40
42
|
|
41
43
|
|
@@ -85,9 +87,25 @@ class LoadAccount:
|
|
85
87
|
option.add_experimental_option('prefs', prefs)
|
86
88
|
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
87
89
|
if platform.system() == 'Windows':
|
88
|
-
|
90
|
+
# 设置 chrome 和 chromedriver 启动路径
|
91
|
+
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
92
|
+
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
93
|
+
# os.environ["webdriver.chrome.driver"] = chrome_path
|
94
|
+
option.binary_location = chrome_path # windows 设置此参数有效
|
95
|
+
service = Service(chromedriver_path)
|
96
|
+
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
97
|
+
elif platform.system() == 'Darwin':
|
98
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
99
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
100
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
101
|
+
# option.binary_location = chrome_path # Macos 设置此参数报错
|
102
|
+
service = Service(chromedriver_path)
|
89
103
|
else:
|
90
|
-
|
104
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
105
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
106
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
107
|
+
# option.binary_location = chrome_path # macos 设置此参数报错
|
108
|
+
service = Service(chromedriver_path)
|
91
109
|
_driver = webdriver.Chrome(options=option, service=service, ) # 创建Chrome驱动程序实例
|
92
110
|
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
93
111
|
|
@@ -355,8 +373,9 @@ class SkuPicture:
|
|
355
373
|
self.urls = []
|
356
374
|
self.datas = [] # 从单品页面获取数据,存储这部分数据,作为中转
|
357
375
|
self.df = pd.DataFrame()
|
376
|
+
self.xlsx_save_path = os.path.join(D_PATH, '商品id_编码_图片_临时文件.xlsx')
|
358
377
|
|
359
|
-
def each_page(self):
|
378
|
+
def each_page(self, as_local_file=True):
|
360
379
|
wait = WebDriverWait(self.driver, timeout=15)
|
361
380
|
num = len(self.urls)
|
362
381
|
i = 0
|
@@ -432,13 +451,13 @@ class SkuPicture:
|
|
432
451
|
'sku编码': sku_spbm,
|
433
452
|
'商家编码': data['商家编码'],
|
434
453
|
'推荐卖点': desc,
|
435
|
-
'
|
454
|
+
'获取与下载': '已获取',
|
436
455
|
'类目': leimu,
|
437
456
|
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
438
457
|
}
|
439
458
|
)
|
440
459
|
except Exception as e:
|
441
|
-
|
460
|
+
print('报错信息: ', e, '-'*10, data)
|
442
461
|
pass
|
443
462
|
i += 1
|
444
463
|
# if i > 3:
|
@@ -448,15 +467,16 @@ class SkuPicture:
|
|
448
467
|
results = []
|
449
468
|
for data in self.datas:
|
450
469
|
try:
|
451
|
-
|
452
|
-
results.append(
|
470
|
+
_df = pd.DataFrame.from_dict(data, orient='columns')
|
471
|
+
results.append(_df)
|
453
472
|
except:
|
454
473
|
pass
|
455
474
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
self.df =
|
475
|
+
self.df = pd.concat(results) # 更新 df
|
476
|
+
self.df = self.df[self.df['sku图片链接'] != '0']
|
477
|
+
if results and as_local_file:
|
478
|
+
self.df.to_excel(self.xlsx_save_path, index=False, header=True, engine='openpyxl',
|
479
|
+
freeze_panes=(1, 0))
|
460
480
|
|
461
481
|
def read_df(self):
|
462
482
|
path = os.path.join(self.path, self.filename)
|
@@ -486,6 +506,7 @@ class DownloadPicture():
|
|
486
506
|
if not os.path.exists(self.save_path):
|
487
507
|
os.mkdir(self.save_path)
|
488
508
|
self.local_file = ''
|
509
|
+
self.finish_download = []
|
489
510
|
|
490
511
|
def get_df_from_service(self):
|
491
512
|
start_date, end_date = self.months_data(num=self.months)
|
@@ -535,15 +556,15 @@ class DownloadPicture():
|
|
535
556
|
i += 1
|
536
557
|
time.sleep(0.5)
|
537
558
|
|
538
|
-
def download_data_from_local(self):
|
559
|
+
def download_data_from_local(self, col_name='sku图片链接'):
|
539
560
|
if not os.path.exists(self.save_path):
|
540
561
|
os.mkdir(self.save_path)
|
541
562
|
dict_data = self.df.to_dict('records')
|
542
563
|
num = len(dict_data)
|
543
564
|
i = 0
|
544
565
|
for data in dict_data:
|
545
|
-
url = data[
|
546
|
-
self.filename = f'{data['商品id']}_{data['商家编码']}.jpg'
|
566
|
+
url = data[col_name]
|
567
|
+
self.filename = f'{data['商品id']}_{data['商家编码']}_{data['sku编码']}.jpg'
|
547
568
|
if os.path.isfile(os.path.join(self.save_path, self.filename)):
|
548
569
|
i += 1
|
549
570
|
continue
|
@@ -557,6 +578,7 @@ class DownloadPicture():
|
|
557
578
|
# 保存图片到本地文件夹
|
558
579
|
with open(os.path.join(self.save_path, self.filename), 'wb') as f:
|
559
580
|
f.write(res.content)
|
581
|
+
self.finish_download.append(data['sku编码'])
|
560
582
|
i += 1
|
561
583
|
time.sleep(0.5)
|
562
584
|
|
@@ -761,12 +783,111 @@ def main3():
|
|
761
783
|
p.insert_data()
|
762
784
|
|
763
785
|
|
764
|
-
def
|
765
|
-
"""
|
786
|
+
def get_sp_id(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录', col_name='sku图片链接'):
|
787
|
+
""" 从数据库中获取商品id信息 """
|
788
|
+
# 实例化一个下载类
|
789
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
790
|
+
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
791
|
+
projection = {
|
792
|
+
'宝贝id': 1,
|
793
|
+
'商家编码': 1,
|
794
|
+
}
|
795
|
+
df = download.data_to_df(
|
796
|
+
db_name='生意经2',
|
797
|
+
table_name='宝贝指标',
|
798
|
+
start_date='2019-01-01',
|
799
|
+
end_date='2099-12-31',
|
800
|
+
projection=projection,
|
801
|
+
)
|
802
|
+
df.rename(columns={'宝贝id': '商品id'}, inplace=True)
|
803
|
+
df.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
|
804
|
+
df = df.head(2)
|
805
|
+
|
806
|
+
projection = {
|
807
|
+
'商品id': 1,
|
808
|
+
'商家编码': 1,
|
809
|
+
}
|
810
|
+
df_new = download.data_to_df(
|
811
|
+
db_name='属性设置2',
|
812
|
+
table_name='商品素材下载记录',
|
813
|
+
start_date='2019-01-01',
|
814
|
+
end_date='2099-12-31',
|
815
|
+
projection=projection,
|
816
|
+
)
|
817
|
+
df_new.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
|
818
|
+
# 使用merge获取交集
|
819
|
+
df = pd.merge(df, df_new, left_on=['商品id'], right_on=['商品id'], how='left')
|
820
|
+
df.rename(columns={'商家编码_x': '商家编码'}, inplace=True)
|
821
|
+
df.pop('商家编码_y')
|
822
|
+
urls = df.to_dict('records')
|
823
|
+
|
824
|
+
_driver = LoadAccount() # 账号域不同, 要重新实例化
|
825
|
+
tb_driver2 = _driver.load_account(shop_name='万里马官方旗舰店')
|
826
|
+
if tb_driver2:
|
827
|
+
s = SkuPicture(driver=tb_driver2)
|
828
|
+
s.urls = urls
|
829
|
+
s.each_page(as_local_file=True) # 根据 urls 获取每个商品数据并更新 df
|
830
|
+
tb_driver2.quit()
|
831
|
+
|
832
|
+
# 回传数据库
|
833
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
834
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
835
|
+
m.df_to_mysql(
|
836
|
+
df=s.df,
|
837
|
+
db_name=db_name,
|
838
|
+
table_name=table_name,
|
839
|
+
move_insert=True, # 先删除,再插入
|
840
|
+
df_sql=False,
|
841
|
+
drop_duplicates=False,
|
842
|
+
icm_update=[],
|
843
|
+
service_database={service_name: database},
|
844
|
+
) # 3. 回传数据库
|
845
|
+
|
846
|
+
# 从数据库中读取数据,并下载素材到本地
|
847
|
+
|
848
|
+
# 留空,必须留空
|
849
|
+
projection = {
|
850
|
+
# '商品id': 1,
|
851
|
+
# '商家编码': 1,
|
852
|
+
# 'sku编码': 1,
|
853
|
+
# col_name: 1,
|
854
|
+
# '获取与下载': 1,
|
855
|
+
}
|
856
|
+
df = download.data_to_df(
|
857
|
+
db_name=db_name,
|
858
|
+
table_name=table_name,
|
859
|
+
start_date='2019-01-01',
|
860
|
+
end_date='2099-12-31',
|
861
|
+
projection=projection,
|
862
|
+
)
|
863
|
+
df = df[df['获取与下载'] != '已下载']
|
864
|
+
|
865
|
+
# 实例化一个下载器类
|
866
|
+
d = DownloadPicture(service_name=service_name)
|
867
|
+
d.save_path = os.path.join(D_PATH, col_name) # 下载图片到本地时的存储位置
|
868
|
+
d.filename = f'{db_name}_{table_name}.xlsx'
|
869
|
+
d.df = df
|
870
|
+
d.download_data_from_local(col_name=col_name)
|
871
|
+
df['获取与下载'] = df.apply(lambda x: '已下载' if x['sku编码'] in d.finish_download else x['获取与下载'], axis=1)
|
872
|
+
|
873
|
+
# 回传数据库
|
874
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
875
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
876
|
+
m.df_to_mysql(
|
877
|
+
df=df,
|
878
|
+
db_name=db_name,
|
879
|
+
table_name=table_name,
|
880
|
+
move_insert=True, # 先删除,再插入
|
881
|
+
df_sql=False,
|
882
|
+
drop_duplicates=False,
|
883
|
+
icm_update=[],
|
884
|
+
service_database={service_name: database},
|
885
|
+
) # 3. 回传数据库
|
766
886
|
|
767
887
|
|
768
888
|
|
769
889
|
if __name__ == '__main__':
|
770
890
|
# main(service_name='home_lx', database='mysql')
|
771
|
-
main2(service_name='home_lx', database='mysql')
|
891
|
+
# main2(service_name='home_lx', database='mysql')
|
772
892
|
# main3()
|
893
|
+
get_sp_id(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录')
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=us2FJjLCYlyttATHc0xYRo1ZvcC4E7lB_JI034jI6h4,75810
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=Wis40oL04M7E1pkvgNPjyVFAUe-zgjimjIVAikxYY8Y,4418
|
@@ -12,7 +12,7 @@ mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
|
12
12
|
mdbq/clean/data_clean.py,sha256=ZJOTT0sNWP85x4pJl39UZBwN4Bnf8Ao-iVOJNiiRPEE,103996
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
14
|
mdbq/company/copysh.py,sha256=GaFl7lZ5pENys0W2N8SOjUdA0uz5v_u-aDZMLZADito,17778
|
15
|
-
mdbq/company/home_sh.py,sha256=
|
15
|
+
mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
|
16
16
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
|
18
18
|
mdbq/config/products.py,sha256=L1uhzdbqTprQg_rekKt0ucgpeIuMvi3H2v48_GZWPuY,5803
|
@@ -31,13 +31,13 @@ mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,
|
|
31
31
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
32
32
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
33
33
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
34
|
-
mdbq/other/sku_picture.py,sha256=
|
34
|
+
mdbq/other/sku_picture.py,sha256=lYzm2L4SlfWq09ddr_41JjP5UcpksZpnRQYOoXNXA-k,41279
|
35
35
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
36
36
|
mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
37
37
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
38
38
|
mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
|
39
39
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
40
|
-
mdbq-2.
|
41
|
-
mdbq-2.
|
42
|
-
mdbq-2.
|
43
|
-
mdbq-2.
|
40
|
+
mdbq-2.1.1.dist-info/METADATA,sha256=TGqdUDMFAQNf7fe5e3DPcL7MCfwm7UgIuAYvZoDGhdA,245
|
41
|
+
mdbq-2.1.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
42
|
+
mdbq-2.1.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
43
|
+
mdbq-2.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|