mdbq 2.1.0__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/other/sku_picture.py CHANGED
@@ -31,11 +31,13 @@ from openpyxl.utils import get_column_letter
31
31
  warnings.filterwarnings('ignore')
32
32
 
33
33
  if platform.system() == 'Windows':
34
- Share_Path = os.path.join(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表') # 共享文件根目录
35
- # Share_Path = os.path.join(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表') # 共享文件根目录
34
+ D_PATH = os.path.normpath(f'C:\\Users\\{getpass.getuser()}\\Downloads')
35
+ Share_Path = os.path.normpath(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表') # 共享文件根目录
36
36
  elif platform.system() == 'Darwin':
37
- Share_Path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
37
+ D_PATH = os.path.normpath(f'/Users/{getpass.getuser()}/Downloads')
38
+ Share_Path = os.path.normpath('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
38
39
  else:
40
+ D_PATH = 'Downloads'
39
41
  Share_Path = ''
40
42
 
41
43
 
@@ -85,9 +87,25 @@ class LoadAccount:
85
87
  option.add_experimental_option('prefs', prefs)
86
88
  option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
87
89
  if platform.system() == 'Windows':
88
- service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
90
+ # 设置 chrome 和 chromedriver 启动路径
91
+ chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
92
+ chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
93
+ # os.environ["webdriver.chrome.driver"] = chrome_path
94
+ option.binary_location = chrome_path # windows 设置此参数有效
95
+ service = Service(chromedriver_path)
96
+ # service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
97
+ elif platform.system() == 'Darwin':
98
+ chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
99
+ chromedriver_path = '/usr/local/chrome/chromedriver'
100
+ os.environ["webdriver.chrome.driver"] = chrome_path
101
+ # option.binary_location = chrome_path # Macos 设置此参数报错
102
+ service = Service(chromedriver_path)
89
103
  else:
90
- service = Service('/usr/local/bin/chromedriver')
104
+ chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
105
+ chromedriver_path = '/usr/local/chrome/chromedriver'
106
+ os.environ["webdriver.chrome.driver"] = chrome_path
107
+ # option.binary_location = chrome_path # macos 设置此参数报错
108
+ service = Service(chromedriver_path)
91
109
  _driver = webdriver.Chrome(options=option, service=service, ) # 创建Chrome驱动程序实例
92
110
  _driver.maximize_window() # 窗口最大化 方便后续加载数据
93
111
 
@@ -355,8 +373,9 @@ class SkuPicture:
355
373
  self.urls = []
356
374
  self.datas = [] # 从单品页面获取数据,存储这部分数据,作为中转
357
375
  self.df = pd.DataFrame()
376
+ self.xlsx_save_path = os.path.join(D_PATH, '商品id_编码_图片_临时文件.xlsx')
358
377
 
359
- def each_page(self):
378
+ def each_page(self, as_local_file=True):
360
379
  wait = WebDriverWait(self.driver, timeout=15)
361
380
  num = len(self.urls)
362
381
  i = 0
@@ -432,13 +451,13 @@ class SkuPicture:
432
451
  'sku编码': sku_spbm,
433
452
  '商家编码': data['商家编码'],
434
453
  '推荐卖点': desc,
435
- '是否新增': data['是否新增'],
454
+ '获取与下载': '已获取',
436
455
  '类目': leimu,
437
456
  '更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
438
457
  }
439
458
  )
440
459
  except Exception as e:
441
- # print(e)
460
+ print('报错信息: ', e, '-'*10, data)
442
461
  pass
443
462
  i += 1
444
463
  # if i > 3:
@@ -448,15 +467,16 @@ class SkuPicture:
448
467
  results = []
449
468
  for data in self.datas:
450
469
  try:
451
- df = pd.DataFrame.from_dict(data, orient='columns')
452
- results.append(df)
470
+ _df = pd.DataFrame.from_dict(data, orient='columns')
471
+ results.append(_df)
453
472
  except:
454
473
  pass
455
474
 
456
- if results:
457
- self.df = pd.concat(results)
458
- self.df.to_csv('C:\\Users\\Administrator\\Downloads\\商品id_编码_图片_临时文件.csv', encoding='utf-8_sig', index=False, header=True)
459
- self.df = self.df[df['sku图片链接'] != '0']
475
+ self.df = pd.concat(results) # 更新 df
476
+ self.df = self.df[self.df['sku图片链接'] != '0']
477
+ if results and as_local_file:
478
+ self.df.to_excel(self.xlsx_save_path, index=False, header=True, engine='openpyxl',
479
+ freeze_panes=(1, 0))
460
480
 
461
481
  def read_df(self):
462
482
  path = os.path.join(self.path, self.filename)
@@ -486,6 +506,7 @@ class DownloadPicture():
486
506
  if not os.path.exists(self.save_path):
487
507
  os.mkdir(self.save_path)
488
508
  self.local_file = ''
509
+ self.finish_download = []
489
510
 
490
511
  def get_df_from_service(self):
491
512
  start_date, end_date = self.months_data(num=self.months)
@@ -535,15 +556,15 @@ class DownloadPicture():
535
556
  i += 1
536
557
  time.sleep(0.5)
537
558
 
538
- def download_data_from_local(self):
559
+ def download_data_from_local(self, col_name='sku图片链接'):
539
560
  if not os.path.exists(self.save_path):
540
561
  os.mkdir(self.save_path)
541
562
  dict_data = self.df.to_dict('records')
542
563
  num = len(dict_data)
543
564
  i = 0
544
565
  for data in dict_data:
545
- url = data['商品图片']
546
- self.filename = f'{data['商品id']}_{data['商家编码']}.jpg'
566
+ url = data[col_name]
567
+ self.filename = f'{data['商品id']}_{data['商家编码']}_{data['sku编码']}.jpg'
547
568
  if os.path.isfile(os.path.join(self.save_path, self.filename)):
548
569
  i += 1
549
570
  continue
@@ -557,6 +578,7 @@ class DownloadPicture():
557
578
  # 保存图片到本地文件夹
558
579
  with open(os.path.join(self.save_path, self.filename), 'wb') as f:
559
580
  f.write(res.content)
581
+ self.finish_download.append(data['sku编码'])
560
582
  i += 1
561
583
  time.sleep(0.5)
562
584
 
@@ -761,12 +783,111 @@ def main3():
761
783
  p.insert_data()
762
784
 
763
785
 
764
- def main4():
765
- """ 文件中读取图片链接并下载到本地 """
786
+ def get_sp_id(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录', col_name='sku图片链接'):
787
+ """ 从数据库中获取商品id信息 """
788
+ # 实例化一个下载类
789
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
790
+ download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
791
+ projection = {
792
+ '宝贝id': 1,
793
+ '商家编码': 1,
794
+ }
795
+ df = download.data_to_df(
796
+ db_name='生意经2',
797
+ table_name='宝贝指标',
798
+ start_date='2019-01-01',
799
+ end_date='2099-12-31',
800
+ projection=projection,
801
+ )
802
+ df.rename(columns={'宝贝id': '商品id'}, inplace=True)
803
+ df.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
804
+ df = df.head(2)
805
+
806
+ projection = {
807
+ '商品id': 1,
808
+ '商家编码': 1,
809
+ }
810
+ df_new = download.data_to_df(
811
+ db_name='属性设置2',
812
+ table_name='商品素材下载记录',
813
+ start_date='2019-01-01',
814
+ end_date='2099-12-31',
815
+ projection=projection,
816
+ )
817
+ df_new.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
818
+ # 使用merge获取交集
819
+ df = pd.merge(df, df_new, left_on=['商品id'], right_on=['商品id'], how='left')
820
+ df.rename(columns={'商家编码_x': '商家编码'}, inplace=True)
821
+ df.pop('商家编码_y')
822
+ urls = df.to_dict('records')
823
+
824
+ _driver = LoadAccount() # 账号域不同, 要重新实例化
825
+ tb_driver2 = _driver.load_account(shop_name='万里马官方旗舰店')
826
+ if tb_driver2:
827
+ s = SkuPicture(driver=tb_driver2)
828
+ s.urls = urls
829
+ s.each_page(as_local_file=True) # 根据 urls 获取每个商品数据并更新 df
830
+ tb_driver2.quit()
831
+
832
+ # 回传数据库
833
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
834
+ m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
835
+ m.df_to_mysql(
836
+ df=s.df,
837
+ db_name=db_name,
838
+ table_name=table_name,
839
+ move_insert=True, # 先删除,再插入
840
+ df_sql=False,
841
+ drop_duplicates=False,
842
+ icm_update=[],
843
+ service_database={service_name: database},
844
+ ) # 3. 回传数据库
845
+
846
+ # 从数据库中读取数据,并下载素材到本地
847
+
848
+ # 留空,必须留空
849
+ projection = {
850
+ # '商品id': 1,
851
+ # '商家编码': 1,
852
+ # 'sku编码': 1,
853
+ # col_name: 1,
854
+ # '获取与下载': 1,
855
+ }
856
+ df = download.data_to_df(
857
+ db_name=db_name,
858
+ table_name=table_name,
859
+ start_date='2019-01-01',
860
+ end_date='2099-12-31',
861
+ projection=projection,
862
+ )
863
+ df = df[df['获取与下载'] != '已下载']
864
+
865
+ # 实例化一个下载器类
866
+ d = DownloadPicture(service_name=service_name)
867
+ d.save_path = os.path.join(D_PATH, col_name) # 下载图片到本地时的存储位置
868
+ d.filename = f'{db_name}_{table_name}.xlsx'
869
+ d.df = df
870
+ d.download_data_from_local(col_name=col_name)
871
+ df['获取与下载'] = df.apply(lambda x: '已下载' if x['sku编码'] in d.finish_download else x['获取与下载'], axis=1)
872
+
873
+ # 回传数据库
874
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
875
+ m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
876
+ m.df_to_mysql(
877
+ df=df,
878
+ db_name=db_name,
879
+ table_name=table_name,
880
+ move_insert=True, # 先删除,再插入
881
+ df_sql=False,
882
+ drop_duplicates=False,
883
+ icm_update=[],
884
+ service_database={service_name: database},
885
+ ) # 3. 回传数据库
766
886
 
767
887
 
768
888
 
769
889
  if __name__ == '__main__':
770
890
  # main(service_name='home_lx', database='mysql')
771
- main2(service_name='home_lx', database='mysql')
891
+ # main2(service_name='home_lx', database='mysql')
772
892
  # main3()
893
+ get_sp_id(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.1.0
3
+ Version: 2.1.1
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -31,13 +31,13 @@ mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,
31
31
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
32
32
  mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
33
33
  mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
34
- mdbq/other/sku_picture.py,sha256=Et8gpfAEqe7J9Z1TQSSPNuZ1OytesUztnFHMoxCfNv0,35866
34
+ mdbq/other/sku_picture.py,sha256=lYzm2L4SlfWq09ddr_41JjP5UcpksZpnRQYOoXNXA-k,41279
35
35
  mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
36
36
  mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
37
37
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
38
38
  mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
39
39
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
40
- mdbq-2.1.0.dist-info/METADATA,sha256=PLAlshZgZWvzK4MTDfuF9NwPk9oPj9E5bfhPOBD8lBQ,245
41
- mdbq-2.1.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
42
- mdbq-2.1.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
43
- mdbq-2.1.0.dist-info/RECORD,,
40
+ mdbq-2.1.1.dist-info/METADATA,sha256=TGqdUDMFAQNf7fe5e3DPcL7MCfwm7UgIuAYvZoDGhdA,245
41
+ mdbq-2.1.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
42
+ mdbq-2.1.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
43
+ mdbq-2.1.1.dist-info/RECORD,,
File without changes