mdbq 3.7.8__py3-none-any.whl → 3.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1081 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- import datetime
3
- import getpass
4
- import json
5
- import os
6
- import platform
7
- import random
8
- from dateutil.relativedelta import relativedelta
9
- import re
10
- import time
11
- import warnings
12
- import pandas as pd
13
- from lxml import etree
14
- from rich.pretty import pretty_repr
15
- from selenium import webdriver
16
- from selenium.webdriver.support.wait import WebDriverWait
17
- from selenium.webdriver.common.by import By
18
- from selenium.webdriver.support import expected_conditions as EC
19
- from selenium.webdriver.chrome.service import Service
20
- from mdbq.config import set_support
21
- from mdbq.config import get_myconf
22
- from mdbq.mysql import mysql
23
- from mdbq.mysql import s_query
24
- from mdbq.other import ua_sj
25
- import requests
26
-
27
- import io
28
- from openpyxl import load_workbook
29
- from openpyxl.drawing.image import Image
30
- from openpyxl.utils import get_column_letter
31
- from setuptools.sandbox import save_path
32
-
33
- warnings.filterwarnings('ignore')
34
- """
35
- 用来下载商品spu/sku图片素材
36
- """
37
-
38
-
39
- if platform.system() == 'Windows':
40
- D_PATH = os.path.normpath(f'C:\\Users\\{getpass.getuser()}\\Downloads')
41
- Share_Path = os.path.normpath(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表') # 共享文件根目录
42
- elif platform.system() == 'Darwin':
43
- D_PATH = os.path.normpath(f'/Users/{getpass.getuser()}/Downloads')
44
- Share_Path = os.path.normpath('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
45
- else:
46
- D_PATH = 'Downloads'
47
- Share_Path = ''
48
-
49
-
50
- class LoadAccount:
51
- """ 如果需要获取 cookie 需要注释无界面模式 """
52
-
53
- def __init__(self):
54
- self.url = 'https://login.taobao.com/' # 默认登录淘宝
55
- self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
56
-
57
- def __call__(self, *args, **kwargs):
58
- self.check_cookie() # 检测cookie有效期, 但不阻断任务
59
-
60
- def load_account(self, shop_name):
61
- option = webdriver.ChromeOptions()
62
- # option.add_argument("--headless") # 设置无界面模式
63
- # 调整chrome启动配置
64
- option.add_argument("--disable-gpu")
65
- option.add_argument("--no-sandbox")
66
- option.add_argument("--disable-dev-shm-usage")
67
- option.add_experimental_option("excludeSwitches", ["enable-automation"])
68
- option.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止日志输出,减少控制台干扰
69
- option.add_experimental_option("useAutomationExtension", False)
70
- option.add_argument('--ignore-ssl-error') # 忽略ssl错误
71
- prefs = {
72
- 'profile.default_content_settings.popups': 0, # 禁止弹出所有窗口
73
- "browser.download.manager. showAlertOnComplete": False, # 下载完成后不显示下载完成提示框
74
- "profile.default_content_setting_values.automatic_downloads": 1, # 允许自动下载多个文件
75
- }
76
-
77
- option.add_experimental_option('perfLoggingPrefs', {
78
- 'enableNetwork': True,
79
- 'enablePage': False,
80
- })
81
- option.set_capability("goog:loggingPrefs", {
82
- 'browser': 'ALL',
83
- 'performance': 'ALL',
84
- })
85
- option.set_capability("goog:perfLoggingPrefs", {
86
- 'enableNetwork': True,
87
- 'enablePage': False,
88
- 'enableTimeline': False
89
- })
90
-
91
-
92
-
93
- option.add_experimental_option('prefs', prefs)
94
- option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
95
- if platform.system() == 'Windows':
96
- # 设置 chrome 和 chromedriver 启动路径
97
- chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
98
- chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
99
- # os.environ["webdriver.chrome.driver"] = chrome_path
100
- option.binary_location = chrome_path # windows 设置此参数有效
101
- service = Service(chromedriver_path)
102
- # service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
103
- elif platform.system() == 'Darwin':
104
- chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
105
- chromedriver_path = '/usr/local/chrome/chromedriver'
106
- os.environ["webdriver.chrome.driver"] = chrome_path
107
- # option.binary_location = chrome_path # Macos 设置此参数报错
108
- service = Service(chromedriver_path)
109
- else:
110
- chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
111
- chromedriver_path = '/usr/local/chrome/chromedriver'
112
- os.environ["webdriver.chrome.driver"] = chrome_path
113
- # option.binary_location = chrome_path # macos 设置此参数报错
114
- service = Service(chromedriver_path)
115
- _driver = webdriver.Chrome(options=option, service=service, ) # 创建Chrome驱动程序实例
116
- _driver.maximize_window() # 窗口最大化 方便后续加载数据
117
-
118
- if 'jd' in shop_name: # 切换为京东
119
- self.url = 'https://shop.jd.com/jdm/home/'
120
- # 登录
121
- _driver.get(self.url)
122
- _driver.delete_all_cookies() # 首先清除浏览器打开已有的cookies
123
- name_lists = os.listdir(self.cookie_path) # cookie 放在主目录下的 cookies 文件夹
124
- for name in name_lists:
125
- if shop_name in name and name.endswith('.txt') and '~' not in name and '.DS' not in name:
126
- with open(os.path.join(self.cookie_path, name), 'r') as f:
127
- cookies_list = json.load(f) # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
128
- for cookie in cookies_list:
129
- _driver.add_cookie(cookie) # 添加cookies信息
130
- break
131
- # 以上从get url开始的操作要即时完成,不能进入time.sleep,否则登录失败
132
- if 'jd' in shop_name:
133
- return _driver
134
- else:
135
- _driver.refresh()
136
- time.sleep(random.uniform(5, 8))
137
- html = etree.HTML(_driver.page_source)
138
- user_name = html.xpath('//div[@class="site-nav-user"]/a/text()')
139
- if user_name: # 1877西门吹风
140
- print(f'当前账号:{user_name} 登录成功')
141
- return _driver
142
-
143
- elements = _driver.find_elements(
144
- By.XPATH, '//*[id="login-error"]/div')
145
- if elements: # 您已登录,子账号不能访问.... 其实已经处于登录状态
146
- if self.other(_driver):
147
- return _driver
148
- elements = _driver.find_elements(
149
- By.XPATH, '//div[@class="captcha-tips"]/div[@class="warnning-text"]')
150
- if elements: # 滑块验证,但其实已经处于登录状态
151
- if self.other(_driver):
152
- return _driver
153
- wait = WebDriverWait(_driver, timeout=15)
154
- try:
155
- button = wait.until(
156
- EC.element_to_be_clickable(
157
- (By.XPATH, '//button[@class="fm-button fm-submit " and @type="submit"]')
158
- )
159
- ) # 快速进入按钮
160
- _driver.execute_script("arguments[0].click();", button) # 点击登录
161
- time.sleep(3)
162
- except:
163
- # 店铺账号
164
- try:
165
- wait.until(
166
- EC.presence_of_element_located(
167
- (By.XPATH, '//*[@id="icestark-container"]/div[1]/div/div[1]/img')))
168
- html = etree.HTML(_driver.page_source)
169
- user_name = html.xpath('//div[@class="UserArea--shopName--3Z5NVbD"]/text()')
170
- print(f'当前账号:{user_name} 登录成功')
171
- return _driver
172
- except:
173
- print(f'{shop_name} -> {self.url} 尝试跨页登录1')
174
- # self.other(_driver)
175
-
176
- # 店铺账号, 有时候刷新cookies后系统会自动登录,不需要手动点击登录,因此多加一次判断
177
- try:
178
- wait.until(
179
- EC.presence_of_element_located((By.XPATH, '//*[@id="icestark-container"]/div[1]/div/div[1]/img')))
180
- html = etree.HTML(_driver.page_source)
181
- user_name = html.xpath('//div[@class="UserArea--shopName--3Z5NVbD"]/text()')
182
- print(f'当前账号:{user_name} 登录成功')
183
- except:
184
- print(f'{shop_name} -> {self.url} 尝试跨页登录2')
185
- self.other(_driver)
186
- return _driver
187
-
188
- @staticmethod
189
- def other(_driver):
190
- """ 淘宝账号不知为何刷新cookies后不跳转, """
191
- _driver.get('https://myseller.taobao.com')
192
- time.sleep(3)
193
- try:
194
- wait = WebDriverWait(_driver, timeout=15)
195
- wait.until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "UserArea--shopName")]')))
196
- print('登录成功')
197
- return True
198
- except Exception as e:
199
- print(e)
200
- print('登录失败')
201
- _driver.quit()
202
- return False
203
-
204
- def d_new_cookies(self, _driver, _shopname):
205
- """ 负责检查并刷新 cookies 文件"""
206
- try:
207
- _file = os.path.join(self.cookie_path, f'cookie_{_shopname}.txt')
208
- _c = os.stat(_file).st_mtime # 读取文件的元信息 >>>文件修改时间
209
- _c_time = datetime.datetime.fromtimestamp(_c) # 格式化修改时间
210
- _today = datetime.datetime.today()
211
- if (_today - _c_time).total_seconds() > 170000:
212
- with open(_file, 'w') as f:
213
- # 将cookies保存为json格式
214
- cookies_list = _driver.get_cookies()
215
- for cookie in cookies_list:
216
- # 该字段有问题所以删除就可以
217
- if 'expiry' in cookie:
218
- del cookie['expiry']
219
- if 'domain' in cookie and '万里马官方' in _shopname: # 仅仅是天猫淘宝需要修改此值, 京东别改
220
- cookie['domain'] = '.taobao.com'
221
- cookies_list = json.dumps(cookies_list)
222
- f.write(cookies_list)
223
- # print(f'cookie已保存: {_file}')
224
- except Exception as e:
225
- print(e)
226
-
227
- def check_cookie(self):
228
- """
229
- 检查cookies,如果过期则重新获取
230
- still_get: 设置该参数立即更新cookie, 不论是否过期
231
- """
232
- if not os.path.exists(self.cookie_path):
233
- print(f'没有找到cookies文件: {self.cookie_path}')
234
- return False
235
- files = os.listdir(self.cookie_path)
236
- cook = []
237
- for file in files:
238
- if file.endswith('txt') and 'cookie_' in file:
239
- cook.append(file)
240
- c_ = os.stat(os.path.join(self.cookie_path, file)).st_mtime # 读取文件的元信息 >>>文件修改时间
241
- c_time_ = datetime.datetime.fromtimestamp(c_) # 格式化修改时间
242
- today = datetime.datetime.today()
243
- if (today - c_time_).total_seconds() > 864000:
244
- # 超过时间重新获取cookies
245
- print(f' {file}cookie已过期,请重新获取cookies')
246
- return None
247
-
248
- def tb_cookie(self, _url='https://login.taobao.com/'):
249
- """
250
- 本函数需要谨慎调用,不要弄错账号以免cookies混乱
251
- 扫码获取cookies,下载到cookies文件夹
252
- is_wlm_cookie: 单独创建一个wlm的cookies,保存在上层目录,用于日常数据下载,其他淘宝爬虫不要调用
253
- c_account:设置为True时,检测店铺账号,False检测非店铺账号
254
- """
255
- option = webdriver.ChromeOptions() # 浏览器启动选项
256
- option.headless = True # False指定为无界面模式
257
- # 调整chrome启动配置
258
- option.add_argument("--disable-gpu")
259
- option.add_argument("--no-sandbox")
260
- option.add_argument("--disable-dev-shm-usage")
261
- option.add_experimental_option("excludeSwitches", ["enable-automation"])
262
- option.add_experimental_option("useAutomationExtension", False)
263
- if platform.system() == 'Windows':
264
- service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
265
- else:
266
- service = Service('/usr/local/bin/chromedriver')
267
- _driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
268
- # 登录
269
- _driver.get(_url)
270
- time.sleep(1)
271
- _driver.maximize_window() # 窗口最大化 方便后续加载数据
272
- wait = WebDriverWait(_driver, timeout=120) # 等待登录二维码
273
- wait.until(EC.element_to_be_clickable(
274
- (By.XPATH, '//div[@class="qrcode-login"]/div/div[@class="qrcode-img"]')))
275
-
276
- user_name = None
277
- for i in range(10):
278
- d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
279
- print(f'{d_time} 当前验证:等待非店账号扫码,请尽快扫码...')
280
- wait = WebDriverWait(_driver, timeout=10) # 等待扫码登录后的页面, 左上角加载的一张图片
281
- try: # 非店铺账号
282
- wait.until(
283
- EC.presence_of_element_located((By.XPATH, '//*[@id="J_SiteNavLogin"]/div[1]/div/a')))
284
- html = etree.HTML(_driver.page_source)
285
- user_name = html.xpath('//*[@id="J_SiteNavLogin"]/div[1]/div/a/text()')
286
- break
287
- except:
288
- d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
289
- print(f'{d_time} 当前验证:等待店铺账号扫码...')
290
- wait = WebDriverWait(_driver, timeout=15)
291
- try: # 等待左上角的牵牛图标
292
- wait.until(
293
- EC.presence_of_element_located(
294
- (By.XPATH, '//*[@id="icestark-container"]/div[1]/div/div[1]/img')))
295
- html = etree.HTML(_driver.page_source) # 登录店铺名称
296
- user_name = html.xpath('//div[contains(@class, "UserArea--shopName")]/text()')
297
- break
298
- except:
299
- d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
300
- print(f'{d_time} {_url} 第 {i + 1}/10 次等待登录超时,正在重试')
301
- if i > 8:
302
- return None
303
- d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
304
- print(f'{d_time} 登录成功,正在获取cookie...')
305
- time.sleep(1)
306
- sp_id = ['649844025963', '732863024183', '640779963378', '677330842517']
307
- sp_id = random.choice(sp_id)
308
- _driver.get(f'https://detail.tmall.com/item.htm?id={sp_id}')
309
- time.sleep(3)
310
- if user_name:
311
- user_name = user_name[0]
312
- user_name = re.sub(':', '_', user_name) # 删除用户名中的冒号
313
- else:
314
- user_name = ''
315
-
316
- if not os.path.exists(self.cookie_path):
317
- os.makedirs(self.cookie_path)
318
- _file = os.path.join(self.cookie_path, f'cookie_{user_name}.txt')
319
- with open(_file, 'w') as f:
320
- # 将cookies保存为json格式
321
- cookies_list = _driver.get_cookies()
322
- for cookie in cookies_list:
323
- # 该字段有问题所以删除就可以
324
- if 'expiry' in cookie:
325
- del cookie['expiry']
326
- if 'domain' in cookie:
327
- cookie['domain'] = '.taobao.com'
328
- cookies_list = json.dumps(cookies_list)
329
- f.write(cookies_list)
330
- print(f'cookie已保存: {_file}')
331
- _driver.quit()
332
-
333
- def jd_cookie(self, _url='https://shop.jd.com/jdm/home/'):
334
- option = webdriver.ChromeOptions() # 浏览器启动选项
335
- option.headless = True # False指定为无界面模式
336
- if platform.system() == 'Windows':
337
- service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
338
- else:
339
- service = Service('/usr/local/bin/chromedriver')
340
- _driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
341
- # 登录
342
- _driver.get(_url)
343
- time.sleep(1)
344
- _driver.maximize_window() # 窗口最大化 方便后续加载数据
345
- print('等待登录京东商家后台...')
346
- wait = WebDriverWait(_driver, timeout=300)
347
- try:
348
- wait.until(
349
- EC.presence_of_element_located((By.XPATH, '//span[text()="京准通"]')))
350
- except:
351
- print('等待京东登录超时!')
352
- d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
353
- print(f'{d_time} 登录成功,正在获取cookie...')
354
- time.sleep(3)
355
- # d_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
356
-
357
- if not os.path.exists(self.cookie_path):
358
- os.makedirs(self.cookie_path)
359
- _file = os.path.join(self.cookie_path, 'cookie_jd.txt')
360
- with open(_file, 'w') as f:
361
- # 将cookies保存为json格式
362
- cookies_list = _driver.get_cookies()
363
- for cookie in cookies_list:
364
- # 该字段有问题所以删除就可以
365
- if 'expiry' in cookie:
366
- del cookie['expiry']
367
- cookies_list = json.dumps(cookies_list)
368
- f.write(cookies_list)
369
- print(f'cookie已保存: {_file}')
370
- time.sleep(1)
371
- _driver.quit()
372
-
373
-
374
- class SkuPicture:
375
- def __init__(self, driver):
376
- self.driver = driver
377
- self.path = os.path.join(Share_Path, '其他文件')
378
- self.filename = '商品id编码表.xlsx'
379
- self.urls = []
380
- self.datas = [] # 从单品页面获取数据,存储这部分数据,作为中转
381
- self.df = pd.DataFrame()
382
- self.xlsx_save_path = os.path.join(D_PATH, '商品id_编码_图片_临时文件.xlsx')
383
-
384
- def each_page(self, as_local_file=True):
385
- wait = WebDriverWait(self.driver, timeout=15)
386
- num = len(self.urls)
387
- i = 0
388
- for data in self.urls:
389
- url = f'https://sell.publish.tmall.com/tmall/publish.htm?id={data['商品id']}'
390
- print(f'当前任务: {i}/{num} {url}')
391
- try:
392
- self.driver.get(url)
393
- time.sleep(3)
394
- # elements = self.driver.find_elements(
395
- # By.XPATH, '//h2[text()="很抱歉,您查看的商品找不到了!"]')
396
- # if len(elements) > 0:
397
- # continue
398
- wait.until(EC.presence_of_element_located((By.XPATH, '//tr[@class="sku-table-row"]')))
399
- html = etree.HTML(self.driver.page_source)
400
- imgs = html.xpath('//img[contains(@class, "img-block")]/@src')
401
- imgs = [f'https:{item}' for item in imgs if 'http' not in item]
402
- titles = html.xpath('//img[contains(@class, "img-block")]/../span/@title')
403
- # img = html.xpath('//tr[@class="sku-table-row"]/td/div/div/div/img[@class="img-block"]/@src')
404
- sku_price = html.xpath(
405
- '//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-money")]//input/@value')
406
- desc = html.xpath(
407
- '//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-skuIndividualCom")]//em/@title')
408
- sales = html.xpath(
409
- '//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-number")]//input/@value')
410
- sku_spbm = html.xpath(
411
- '//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-input") and contains(@id, "skuOuterId")]//input/@value')
412
- leimu = html.xpath(
413
- '//h2[@id="text-catpath"]/div/text()')
414
- sp_titles = html.xpath(
415
- '//div[@class="tm-title normal"]/span/span/input/@value')
416
-
417
- if sp_titles:
418
- sp_titles = sp_titles[0]
419
- else:
420
- sp_titles = ''
421
- if leimu:
422
- leimu = re.sub('>>', '_', leimu[0])
423
- leimu = re.sub('当前类目:', '', leimu)
424
- else:
425
- leimu = ''
426
- if not titles:
427
- titles = ''
428
- if not imgs:
429
- imgs = ''
430
- if not sales:
431
- sales = ''
432
- if not sku_price:
433
- sku_price = ''
434
- if not sku_spbm:
435
- sku_spbm = ''
436
- if not desc:
437
- desc = ''
438
-
439
- # print(sp_titles)
440
- # print(titles)
441
- # print(imgs)
442
- # print(sales)
443
- # print(sku_price)
444
- # print(sku_spbm)
445
- # print(desc)
446
- # print(leimu)
447
- self.datas.append(
448
- {
449
- '日期': datetime.date.today(),
450
- '商品id': data['商品id'],
451
- '商品标题': sp_titles,
452
- '商品链接': f'https://detail.tmall.com/item.htm?id={data['商品id']}',
453
- 'sku名称': titles,
454
- 'sku图片链接': imgs,
455
- '库存数量': sales,
456
- '价格': sku_price,
457
- 'sku编码': sku_spbm,
458
- '商家编码': data['商家编码'],
459
- '推荐卖点': desc,
460
- '获取与下载': '已获取',
461
- '类目': leimu,
462
- '更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
463
- }
464
- )
465
- except Exception as e:
466
- print('报错信息: ', e, '-'*10, data)
467
- pass
468
- i += 1
469
- # if i > 3:
470
- # break
471
- time.sleep(1)
472
-
473
- results = []
474
- for data in self.datas:
475
- try:
476
- _df = pd.DataFrame.from_dict(data, orient='columns')
477
- results.append(_df)
478
- except:
479
- pass
480
-
481
- self.df = pd.concat(results) # 更新 df
482
- self.df = self.df[self.df['sku图片链接'] != '0']
483
- if results and as_local_file:
484
- self.df.to_excel(self.xlsx_save_path, index=False, header=True, engine='openpyxl',
485
- freeze_panes=(1, 0))
486
-
487
- def read_df(self):
488
- path = os.path.join(self.path, self.filename)
489
- df = pd.read_excel(path, header=0)
490
- df = df[['商品id', '商家编码', '是否新增']]
491
- df['是否新增'].fillna(0, inplace=True)
492
- df = df.astype({'是否新增': int})
493
- df = df[df['是否新增'] == 1]
494
- self.urls = df.to_dict('records')
495
-
496
-
497
- class DownloadPicture():
498
- """
499
- 从数据库中下载数据
500
- """
501
- def __init__(self, service_name):
502
- # target_service 从哪个服务器下载数据
503
- self.months = 0 # 下载几个月数据, 0 表示当月, 1 是上月 1 号至今
504
- # 实例化一个下载类
505
- username, password, host, port = get_myconf.select_config_values(target_service=service_name,
506
- database='mysql')
507
- self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
508
- self.df = pd.DataFrame()
509
- self.headers = {'User-Agent': ua_sj.get_ua()}
510
- self.save_path = None
511
- self.filename = ''
512
- self.local_file = ''
513
- self.finish_download = []
514
- self.finish_d2 = []
515
-
516
- def get_df_from_service(self):
517
- start_date, end_date = self.months_data(num=self.months)
518
- projection = {
519
- '商品id': 1,
520
- '商家编码': 1,
521
- 'sku编码': 1,
522
- 'sku名称': 1,
523
- 'sku图片链接': 1
524
- }
525
- self.df = self.download.data_to_df(
526
- db_name='属性设置2',
527
- table_name='天猫商品sku信息',
528
- start_date=start_date,
529
- end_date=end_date,
530
- projection=projection,
531
- )
532
-
533
- def get_df_from_local(self):
534
- if not os.path.isfile(self.local_file):
535
- return
536
- self.df = pd.read_excel(self.local_file, header=0, engine='openpyxl')
537
-
538
- def download_data(self):
539
- # if not os.path.exists(self.save_path):
540
- # os.mkdir(self.save_path)
541
- dict_data = self.df.to_dict('records')
542
- num = len(dict_data)
543
- i = 0
544
- for data in dict_data:
545
- url = data['sku图片链接']
546
- sku_name = re.sub('/', '_', data['sku名称'])
547
- self.filename = f'{data['商品id']}_{data['商家编码']}_{data['sku编码']}_{sku_name}.jpg'
548
- if os.path.isfile(os.path.join(self.save_path, self.filename)):
549
- i += 1
550
- continue
551
- if 'https' not in url:
552
- i += 1
553
- continue
554
-
555
- print(f'正在下载: {i}/{num}, {data['sku编码']}')
556
- self.headers.update({'User-Agent': ua_sj.get_ua()})
557
- res = requests.get(url, headers=self.headers) # 下载图片到内存
558
- # 保存图片到本地文件夹
559
- with open(os.path.join(self.save_path, self.filename), 'wb') as f:
560
- f.write(res.content)
561
- i += 1
562
- time.sleep(0.5)
563
-
564
- def download_data_from_local(self, col_name='sku图片链接', save_path=os.path.join(D_PATH, 'sku图片链接')):
565
- self.save_path = save_path
566
- if not os.path.exists(self.save_path):
567
- os.mkdir(self.save_path)
568
- dict_data = self.df.to_dict('records')
569
- num = len(dict_data)
570
- i = 0
571
- for data in dict_data:
572
- url = data[col_name]
573
- self.filename = f'{data['商品id']}_{data['商家编码']}_{data['sku编码']}.jpg'
574
- if os.path.isfile(os.path.join(self.save_path, self.filename)):
575
- i += 1
576
- continue
577
- if 'https' not in url:
578
- i += 1
579
- continue
580
-
581
- print(f'正在下载: {i}/{num}, {data['商品id']}')
582
- self.headers.update({'User-Agent': ua_sj.get_ua()})
583
- res = requests.get(url, headers=self.headers) # 下载图片到内存
584
- # 保存图片到本地文件夹
585
- with open(os.path.join(self.save_path, self.filename), 'wb') as f:
586
- f.write(res.content)
587
- self.finish_download.append(data['sku编码'])
588
- i += 1
589
- time.sleep(0.5)
590
-
591
- def download_from_df(self, col_name='商品图片', save_path=os.path.join(D_PATH, '商品id_商家编码_图片')):
592
- self.save_path = save_path
593
- if not os.path.exists(self.save_path):
594
- os.mkdir(self.save_path)
595
- dict_data = self.df.to_dict('records')
596
- num = len(dict_data)
597
- i = 1
598
- for data in dict_data:
599
- url = data[col_name]
600
- # self.filename = f'{data['店铺名称']}_{data['商品id']}_{data['商家编码']}.jpg'
601
- self.filename = f'{data['商品id']}_{data['商家编码']}.jpg'
602
- # 清除特殊符号,避免无法创建文件
603
- self.filename = re.sub(r'[\\/\u4e00-\u9fa5‘’“”【】\[\]{}、,,~~!!]', '_', self.filename)
604
- if os.path.isfile(os.path.join(self.save_path, self.filename)):
605
- # self.finish_download.append(data['商品id'])
606
- self.finish_d2.append(
607
- {
608
- '完成时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
609
- '完成id': data['商品id'],
610
- }
611
- )
612
- i += 1
613
- continue
614
- if 'https' not in url:
615
- i += 1
616
- continue
617
-
618
- print(f'正在下载: {i}/{num}, {data['商品id']}')
619
- self.headers.update({'User-Agent': ua_sj.get_ua()})
620
- res = requests.get(url, headers=self.headers) # 下载图片到内存
621
- try:
622
- # 保存图片到本地文件夹
623
- with open(os.path.join(self.save_path, self.filename), 'wb') as f:
624
- f.write(res.content)
625
- # self.finish_download.append(data['商品id'])
626
- self.finish_d2.append(
627
- {
628
- '完成时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
629
- '完成id': data['商品id'],
630
- }
631
- )
632
- except Exception as e:
633
- print(f'{self.filename}: {e}')
634
- i += 1
635
- time.sleep(0.5)
636
-
637
- @staticmethod
638
- def months_data(num=0, end_date=None):
639
- """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
640
- if not end_date:
641
- end_date = datetime.datetime.now()
642
- start_date = end_date - relativedelta(months=num) # n 月以前的今天
643
- start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
644
- return pd.to_datetime(start_date), pd.to_datetime(end_date)
645
-
646
-
647
- class InsertPicture:
648
- def __init__(self):
649
- self.filename = 'test.xlsx'
650
- self.path = '/Users/xigua/Downloads'
651
- self.pic_datas = []
652
- self.header = 0 # sku 的标题栏起始行数
653
-
654
- def insert_data(self):
655
- self.get_filename()
656
-
657
- workbook = load_workbook(os.path.join(self.path, self.filename))
658
- sheet = workbook.active
659
- rows = sheet.max_row # 总行数
660
- columns = sheet.max_column # 总列数
661
- sheet.insert_cols(0, 1) # 在第0列开始插入1列空白列
662
- # sheet['A1'] = '商品图片'
663
-
664
- is_trange = False
665
- for col in range(1, columns+1): # 遍历每一列
666
- # if is_trange == True:
667
- # break
668
- for row in range(1, rows+1): # 遍历每一行
669
- # print(f'第{col}列, 第{row}行...')
670
- value = sheet.cell(row=row, column=col).value
671
- if value:
672
- for data in self.pic_datas:
673
- if str(value) == data['sku'] or (len(str(value)) > 16 and str(value) in data['sku']):
674
- # print(value, data['sku'])
675
- print(f'转换: 第{col}列, 第{row}行, sku: {data['sku']} ...')
676
- image_path = os.path.join(data['文件路径'], data['文件名称'])
677
- with open(image_path, 'rb') as f:
678
- img_data = f.read()
679
- img = Image(io.BytesIO(img_data))
680
- width, height = self.img_resize(img.width, img.height) # 等比例缩放图片
681
- col_letter = 'A'
682
- # col_letter = get_column_letter(col) # 将数字索引转换为列标签 A、B、C、D...
683
- sheet.column_dimensions[col_letter].width = 13
684
- sheet.row_dimensions[row].height = 80
685
- img.width = width
686
- img.height = height
687
- sheet.add_image(img, f'{col_letter}{row}')
688
- is_trange = True
689
-
690
- if is_trange == False: # 如果 sku 没有匹配到任何值,则使用 商家编码
691
- for col in range(1, columns + 1): # 遍历每一列
692
- # if is_trange == True:
693
- # break
694
- for row in range(1, rows + 1): # 遍历每一行
695
- # print(f'第{col}列, 第{row}行...')
696
- value = sheet.cell(row=row, column=col).value
697
- if value:
698
- for data in self.pic_datas:
699
- if str(value) == data['商家编码']:
700
- # print(value, data['sku'])
701
- print(f'转换: 第{col}列, 第{row}行, 商家编码: {data['商家编码']} ...')
702
- image_path = os.path.join(data['文件路径'], data['文件名称'])
703
- with open(image_path, 'rb') as f:
704
- img_data = f.read()
705
- img = Image(io.BytesIO(img_data))
706
- width, height = self.img_resize(img.width, img.height) # 等比例缩放图片
707
- col_letter = 'A'
708
- # col_letter = get_column_letter(col) # 将数字索引转换为列标签 A、B、C、D...
709
- sheet.column_dimensions[col_letter].width = 13
710
- sheet.row_dimensions[row].height = 80
711
- img.width = width
712
- img.height = height
713
- sheet.add_image(img, f'{col_letter}{row}')
714
- is_trange = True
715
- break # 商家编码只需要添加一次,所以必须 break,否则可能添加多个图片到某个单元格
716
-
717
- if is_trange == False: # 如果 sku 和商家编码都没有匹配到任何值,则使用 商品id
718
- for col in range(1, columns + 1): # 遍历每一列
719
- # if is_trange == True:
720
- # break
721
- for row in range(1, rows + 1): # 遍历每一行
722
- # print(f'第{col}列, 第{row}行...')
723
- value = sheet.cell(row=row, column=col).value
724
- if value:
725
- for data in self.pic_datas:
726
- if str(value) == data['商品id']:
727
- # print(value, data['sku'])
728
- print(f'转换: 第{col}列, 第{row}行, 商品id: {data['商品id']} ...')
729
- image_path = os.path.join(data['文件路径'], data['文件名称'])
730
- with open(image_path, 'rb') as f:
731
- img_data = f.read()
732
- img = Image(io.BytesIO(img_data))
733
- width, height = self.img_resize(img.width, img.height) # 等比例缩放图片
734
- col_letter = 'A'
735
- # col_letter = get_column_letter(col) # 将数字索引转换为列标签 A、B、C、D...
736
- sheet.column_dimensions[col_letter].width = 13
737
- sheet.row_dimensions[row].height = 80
738
- img.width = width
739
- img.height = height
740
- sheet.add_image(img, f'{col_letter}{row}')
741
- is_trange = True
742
- break # 商品id只需要添加一次,所以必须 break,否则可能添加多个图片到某个单元格
743
-
744
- if is_trange == False:
745
- print(f'{os.path.join(self.path, self.filename)}:\n'
746
- f'在该文件中没有找到匹配的 skuid/商品id/商家编码, 注意程序只会转换当前活动的 sheet, \n'
747
- f'1. 如果您确定文件中确实存在 skuid/商品id/商家编码, 可能是因为 sheet 不是活动状态, 请切换后再重新运行本程序。\n'
748
- f'2. 程序只能转换已经收录的商品图, 如果未被收录亦会转换失败, 请联系开发者添加。')
749
-
750
- workbook.save(os.path.join(self.path, f'ok_{self.filename}'))
751
-
752
- def img_resize(self, width, height, num=100):
753
- """
754
- 设置基础大小为 num, 并等比例缩放
755
- """
756
- if width > height:
757
- height = height * num // width
758
- width = num
759
- else:
760
- width = width * num // height
761
- height = num
762
- return width, height
763
-
764
- def get_filename(self):
765
- for root, dirs, files in os.walk(os.path.join(self.path, 'sku图片链接'), topdown=False):
766
- for name in files:
767
- if name.endswith('.jpg'):
768
- sku_id = re.findall(r'\d+_\d+_(\d+)_|\d+_\d+_(\d+-\d+)_|\d+_\d+_([A-Za-z]+\d+)_', name)
769
- sku_id = [item for item in sku_id[0] if item != '']
770
- sp_id = re.findall(r'^(\d+)_', name)
771
- spbm = re.findall(r'(\d{13})\d+', sku_id[0])
772
- if not spbm:
773
- spbm = ['0']
774
- self.pic_datas.append(
775
- {
776
- '文件路径': root,
777
- '文件名称': name,
778
- 'sku': sku_id[0],
779
- '商品id': sp_id[0],
780
- '商家编码': spbm[0],
781
- }
782
- )
783
-
784
-
785
- def main(service_name, database):
786
- """ 从 excel 获取商品id,通过爬虫获取 sku 图片数据并存入数据库 """
787
- if not os.path.exists(Share_Path):
788
- print(f'当前系统环境不支持')
789
- return
790
-
791
- _driver = LoadAccount() # 账号域不同, 要重新实例化
792
- # tb_driver2 = 1
793
- tb_driver2 = _driver.load_account(shop_name='万里马官方旗舰店')
794
- if tb_driver2:
795
- s = SkuPicture(driver=tb_driver2)
796
- s.path = os.path.join(Share_Path, '其他文件') # 本地 excel 的路径
797
- s.filename = '商品id编码表.xlsx'
798
- s.read_df() # 从本地文件中读取商品id,并更新 urls 参数
799
- s.each_page() # 根据 urls 获取每个商品数据并更新为 df
800
- tb_driver2.quit()
801
-
802
- # s.df.to_csv('/Users/xigua/Downloads/test.csv', encoding='utf-8_sig', index=False, header=True)
803
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
804
- m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
805
- m.df_to_mysql(
806
- df=s.df,
807
- db_name='属性设置2',
808
- table_name='天猫商品sku信息',
809
- move_insert=True, # 先删除,再插入
810
- # df_sql=True,
811
- # drop_duplicates=False,
812
- # icm_update=unique_key_list,
813
- service_database={service_name: database},
814
- ) # 3. 回传数据库
815
-
816
-
817
- def main2(service_name, database):
818
- """ 从数据库读取数据,并下载图片到本地 """
819
- d = DownloadPicture(service_name=service_name)
820
- # d.save_path = '/Users/xigua/Downloads/sku图片链接' # 下载图片到本地时的存储位置
821
- # d.get_df_from_service() # 从数据库读取数据
822
- # d.download_data()
823
-
824
- d.save_path = '/Users/xigua/Downloads/商品id_商家编码_图片' # 下载图片到本地时的存储位置
825
- d.local_file = '/Users/xigua/Downloads/商品id图片对照表.xlsx'
826
- d.get_df_from_local()
827
- d.download_data_from_local()
828
-
829
-
830
- def main3():
831
- """ """
832
- p = InsertPicture()
833
- p.filename = 'test.xlsx'
834
- # p.header = 1
835
- p.insert_data()
836
-
837
-
838
- def download_sku(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录', col_name='sku图片链接'):
839
- """ 从数据库中获取商品id信息 """
840
- # 实例化一个下载类
841
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
842
- download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
843
- projection = {
844
- '宝贝id': 1,
845
- '商家编码': 1,
846
- }
847
- df = download.data_to_df(
848
- db_name='生意经2',
849
- table_name='宝贝指标',
850
- start_date='2019-01-01',
851
- end_date='2099-12-31',
852
- projection=projection,
853
- )
854
- df.rename(columns={'宝贝id': '商品id'}, inplace=True)
855
- df.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
856
- df = df.head(2)
857
-
858
- projection = {
859
- '商品id': 1,
860
- '商家编码': 1,
861
- }
862
- df_new = download.data_to_df(
863
- db_name='属性设置2',
864
- table_name='商品素材下载记录',
865
- start_date='2019-01-01',
866
- end_date='2099-12-31',
867
- projection=projection,
868
- )
869
- df_new.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
870
- # 使用merge获取交集
871
- df = pd.merge(df, df_new, left_on=['商品id'], right_on=['商品id'], how='left')
872
- df.rename(columns={'商家编码_x': '商家编码'}, inplace=True)
873
- df.pop('商家编码_y')
874
- urls = df.to_dict('records')
875
-
876
- _driver = LoadAccount() # 账号域不同, 要重新实例化
877
- tb_driver2 = _driver.load_account(shop_name='万里马官方旗舰店')
878
- if tb_driver2:
879
- s = SkuPicture(driver=tb_driver2)
880
- s.urls = urls
881
- s.each_page(as_local_file=True) # 根据 urls 获取每个商品数据并更新 df
882
- tb_driver2.quit()
883
-
884
- # 回传数据库
885
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
886
- m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
887
- m.df_to_mysql(
888
- df=s.df,
889
- db_name=db_name,
890
- table_name=table_name,
891
- move_insert=True, # 先删除,再插入
892
- df_sql=False,
893
- drop_duplicates=False,
894
- icm_update=[],
895
- service_database={service_name: database},
896
- ) # 3. 回传数据库
897
-
898
- # 从数据库中读取数据,并下载素材到本地
899
-
900
- # 留空,必须留空
901
- projection = {
902
- # '商品id': 1,
903
- # '商家编码': 1,
904
- # 'sku编码': 1,
905
- # col_name: 1,
906
- # '获取与下载': 1,
907
- }
908
- df = download.data_to_df(
909
- db_name=db_name,
910
- table_name=table_name,
911
- start_date='2019-01-01',
912
- end_date='2099-12-31',
913
- projection=projection,
914
- )
915
- df = df[df['获取与下载'] != '已下载']
916
-
917
- # 实例化一个下载器类
918
- d = DownloadPicture(service_name=service_name)
919
- d.save_path = os.path.join(D_PATH, col_name) # 下载图片到本地时的存储位置
920
- d.filename = f'{db_name}_{table_name}.xlsx'
921
- d.df = df
922
- d.download_data_from_local(col_name=col_name)
923
- df['获取与下载'] = df.apply(lambda x: '已下载' if x['sku编码'] in d.finish_download else x['获取与下载'], axis=1)
924
-
925
- # 回传数据库
926
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
927
- m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
928
- m.df_to_mysql(
929
- df=df,
930
- db_name=db_name,
931
- table_name=table_name,
932
- move_insert=True, # 先删除,再插入
933
- df_sql=False,
934
- drop_duplicates=False,
935
- icm_update=[],
936
- service_database={service_name: database},
937
- ) # 3. 回传数据库
938
-
939
-
940
- def download_spu(service_name='company', database='mysql', db_name='属性设置2', table_name='商品spu素材下载记录', col_name='商品图片', save_path=os.path.join(D_PATH, '商品id_商家编码_图片')):
941
- """
942
- 1. 从商品素材导出中获取数据
943
- 2. 从商品id编码表 中获取数据
944
- 商品id编码表 是合并生意经的数据,主要获取商品编码,
945
- 对于未上架或者刚上架的货品,生意经未有数据,所以合并两表后,可能缺失商品编码信息
946
- 3. 更新数据库,不存在则创建
947
- 4. 从数据库中提取未下载的数据记录
948
- 5. 实例化一个下载器类,并下载数据
949
- 6. 回传并更新数据库
950
- """
951
-
952
- # 1. 从商品素材导出中获取数据
953
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
954
- download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
955
- projection = {
956
- '店铺名称': 1,
957
- '商品id': 1,
958
- '商品标题': 1,
959
- '商品状态': 1,
960
- '商品白底图': 1,
961
- '方版场景图': 1,
962
- '日期':1,
963
- }
964
- df = download.data_to_df(
965
- db_name='属性设置2',
966
- table_name='商品素材导出',
967
- start_date='2019-01-01',
968
- end_date='2099-12-31',
969
- projection=projection,
970
- )
971
- df['商品id'] = df['商品id'].astype('int64')
972
- df['日期'] = df['日期'].astype('datetime64[ns]')
973
- df = df[(df['商品白底图'] != '0') | (df['方版场景图'] != '0')]
974
- # 白底图优先
975
- df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
976
- lambda x: x['商品白底图'] if x['商品白底图'] != '0' else x['方版场景图'], axis=1)
977
- # # 方版场景图优先
978
- # df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
979
- # lambda x: x['方版场景图'] if x['方版场景图'] != '0' else x['商品白底图'], axis=1)
980
- df.sort_values(by=['商品id', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
981
- df.drop_duplicates(subset=['商品id'], keep='last', inplace=True, ignore_index=True)
982
- # df = df[['商品id', '商品图片', '日期']]
983
- df['商品图片'] = df['商品图片'].apply(lambda x: x if 'http' in x else None) # 检查是否是 http 链接
984
- df.dropna(how='all', subset=['商品图片'], axis=0, inplace=True) # 删除指定列含有空值的行
985
- df['商品链接'] = df['商品id'].apply(
986
- lambda x: f'https://detail.tmall.com/item.htm?id={str(x)}' if x and '.com' not in str(x) else x)
987
- df.sort_values(by='商品id', ascending=False, ignore_index=True, inplace=True) # ascending=False 降序排列
988
- df['商品id'] = df['商品id'].astype(str)
989
- # df = df.head(2)
990
-
991
- # 2. 从商品id编码表 中获取数据
992
- # 商品id编码表 是合并生意经的数据,主要获取商品编码,
993
- # 对于未上架或者刚上架的货品,生意经未有数据,所以合并两表后,可能缺失商品编码信息
994
- projection = {
995
- '宝贝id': 1,
996
- '商家编码': 1,
997
- }
998
- df_spbm = download.data_to_df(
999
- db_name='聚合数据',
1000
- table_name='商品id编码表',
1001
- start_date='2019-01-01',
1002
- end_date='2099-12-31',
1003
- projection=projection,
1004
- )
1005
- df_spbm.drop_duplicates(subset=['宝贝id'], keep='last', inplace=True, ignore_index=True)
1006
- # 合并两个表
1007
- df = pd.merge(df, df_spbm, left_on=['商品id'], right_on=['宝贝id'], how='left')
1008
- df.pop('宝贝id')
1009
- df['获取与下载'] = '已获取'
1010
- df['时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1011
- # df.to_csv(os.path.join(D_PATH, f'{col_name}.csv'), index=False, header=True, encoding='utf-8_sig')
1012
- if '方版场景图' in df.columns.tolist():
1013
- df['方版场景图'] = df['方版场景图'].astype(str)
1014
-
1015
- # 3. 更新数据库
1016
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
1017
- m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
1018
- m.df_to_mysql(
1019
- df=df,
1020
- db_name=db_name,
1021
- table_name=table_name,
1022
- move_insert=True, # 先删除,再插入
1023
- df_sql=False,
1024
- drop_duplicates=False,
1025
- icm_update=[],
1026
- service_database={service_name: database},
1027
- ) # 3. 回传数据库
1028
-
1029
- # 4. 从数据库中提取未下载的数据
1030
- projection = {}
1031
- df_before = download.data_to_df(
1032
- db_name=db_name,
1033
- table_name=table_name,
1034
- start_date='2019-01-01',
1035
- end_date='2099-12-31',
1036
- projection=projection,
1037
- )
1038
- df = df_before[df_before['获取与下载'] != '已下载']
1039
-
1040
- if len(df) > 0:
1041
- # 5. 实例化一个下载器类,并下载数据
1042
- d = DownloadPicture(service_name=service_name)
1043
- d.save_path = save_path # 下载图片到本地时的存储位置
1044
- d.filename = f'{db_name}_{table_name}.xlsx'
1045
- d.df = df
1046
- d.download_from_df(col_name=col_name, save_path=save_path)
1047
- # finish_id = [item['完成id'] for item in d.finish_d2]
1048
- # df['获取与下载'] = df.apply(lambda x: '已下载' if x['商品id'] in finish_id else x['获取与下载'], axis=1)
1049
- for item in d.finish_d2:
1050
- df['获取与下载'] = df.apply(lambda x: '已下载' if x['商品id'] == item['完成id'] else x['获取与下载'], axis=1)
1051
- df['时间'] = df.apply(lambda x: item['完成时间'] if x['商品id'] == item['完成id'] else x['时间'], axis=1)
1052
-
1053
- # 6. 回传数据库
1054
- username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
1055
- m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
1056
- m.df_to_mysql(
1057
- df=df,
1058
- db_name=db_name,
1059
- table_name=table_name,
1060
- move_insert=True, # 先删除,再插入
1061
- df_sql=False,
1062
- drop_duplicates=False,
1063
- icm_update=[],
1064
- service_database={service_name: database},
1065
- reset_id=True,
1066
- ) # 3. 回传数据库
1067
-
1068
-
1069
- if __name__ == '__main__':
1070
- # main(service_name='home_lx', database='mysql')
1071
- # main2(service_name='home_lx', database='mysql')
1072
- # main3()
1073
- # download_sku(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录')
1074
- download_spu(
1075
- service_name='company',
1076
- database='mysql',
1077
- db_name='属性设置2',
1078
- table_name='商品spu素材下载记录',
1079
- col_name='商品图片',
1080
- save_path=os.path.join(D_PATH, '1商品id_商家编码_图片'),
1081
- )