mdbq 1.7.8__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/query_data.py +44 -0
- mdbq/company/copysh.py +1 -1
- mdbq/other/sku_picture.py +487 -0
- mdbq/pbix/refresh_all.py +1 -1
- {mdbq-1.7.8.dist-info → mdbq-1.8.0.dist-info}/METADATA +1 -1
- {mdbq-1.7.8.dist-info → mdbq-1.8.0.dist-info}/RECORD +8 -7
- {mdbq-1.7.8.dist-info → mdbq-1.8.0.dist-info}/WHEEL +0 -0
- {mdbq-1.7.8.dist-info → mdbq-1.8.0.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
CHANGED
@@ -431,6 +431,28 @@ class MysqlDatasQuery:
|
|
431
431
|
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
432
432
|
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
433
433
|
|
434
|
+
def tm_search(self):
|
435
|
+
start_date, end_date = self.months_data(num=self.months)
|
436
|
+
projection = {
|
437
|
+
'日期': 1,
|
438
|
+
'关键词': 1,
|
439
|
+
'访客数': 1,
|
440
|
+
'支付转化率': 1,
|
441
|
+
'支付金额': 1,
|
442
|
+
'下单金额': 1,
|
443
|
+
'支付买家数': 1,
|
444
|
+
'下单买家数': 1,
|
445
|
+
'加购人数': 1,
|
446
|
+
'新访客': 1,
|
447
|
+
}
|
448
|
+
df = self.download.data_to_df(
|
449
|
+
db_name='生意参谋2',
|
450
|
+
table_name='店铺来源_手淘搜索',
|
451
|
+
start_date=start_date,
|
452
|
+
end_date=end_date,
|
453
|
+
projection=projection,
|
454
|
+
)
|
455
|
+
return df
|
434
456
|
|
435
457
|
class GroupBy:
|
436
458
|
"""
|
@@ -978,6 +1000,22 @@ class GroupBy:
|
|
978
1000
|
df['s_是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
|
979
1001
|
df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '')
|
980
1002
|
return df
|
1003
|
+
elif '天猫店铺来源_手淘搜索' in table_name:
|
1004
|
+
df = df.groupby(
|
1005
|
+
['日期', '关键词'],
|
1006
|
+
as_index=False).agg(
|
1007
|
+
**{
|
1008
|
+
'访客数': ('访客数', np.max),
|
1009
|
+
'支付转化率': ('支付转化率', np.max),
|
1010
|
+
'支付金额': ('支付金额', np.max),
|
1011
|
+
'下单金额': ('下单金额', np.max),
|
1012
|
+
'支付买家数': ('支付买家数', np.max),
|
1013
|
+
'下单买家数': ('下单买家数', np.max),
|
1014
|
+
'加购人数': ('加购人数', np.max),
|
1015
|
+
'新访客': ('新访客', np.max),
|
1016
|
+
}
|
1017
|
+
)
|
1018
|
+
return df
|
981
1019
|
else:
|
982
1020
|
print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
|
983
1021
|
return pd.DataFrame({})
|
@@ -1340,6 +1378,12 @@ def data_aggregation(service_databases=[{}], months=1):
|
|
1340
1378
|
'唯一主键': ['日期', '报表类型', '推广渠道', '营销场景', '花费'],
|
1341
1379
|
'数据主体': sdq.pxb_zh(),
|
1342
1380
|
},
|
1381
|
+
{
|
1382
|
+
'数据库名': '聚合数据',
|
1383
|
+
'集合名': '天猫店铺来源_手淘搜索',
|
1384
|
+
'唯一主键': ['日期', '关键词', '访客数'],
|
1385
|
+
'数据主体': sdq.tm_search(),
|
1386
|
+
},
|
1343
1387
|
]
|
1344
1388
|
for items in data_dict: # 遍历返回结果
|
1345
1389
|
db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
|
mdbq/company/copysh.py
CHANGED
@@ -0,0 +1,487 @@
|
|
1
|
+
# -*- coding:utf-8 -*-
|
2
|
+
import datetime
|
3
|
+
import getpass
|
4
|
+
import json
|
5
|
+
import os
|
6
|
+
import platform
|
7
|
+
import random
|
8
|
+
import re
|
9
|
+
import time
|
10
|
+
import warnings
|
11
|
+
import pandas as pd
|
12
|
+
from lxml import etree
|
13
|
+
from selenium import webdriver
|
14
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
15
|
+
from selenium.webdriver.common.by import By
|
16
|
+
from selenium.webdriver.support import expected_conditions as EC
|
17
|
+
from selenium.webdriver.chrome.service import Service
|
18
|
+
from mdbq.config import set_support
|
19
|
+
from mdbq.config import get_myconf
|
20
|
+
from mdbq.mysql import mysql
|
21
|
+
|
22
|
+
warnings.filterwarnings('ignore')
|
23
|
+
|
24
|
+
if platform.system() == 'Windows':
|
25
|
+
Share_Path = os.path.join(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表') # 共享文件根目录
|
26
|
+
elif platform.system() == 'Darwin':
|
27
|
+
Share_Path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
28
|
+
else:
|
29
|
+
Share_Path = ''
|
30
|
+
|
31
|
+
|
32
|
+
class LoadAccount:
|
33
|
+
""" 如果需要获取 cookie 需要注释无界面模式 """
|
34
|
+
|
35
|
+
def __init__(self):
|
36
|
+
self.url = 'https://login.taobao.com/' # 默认登录淘宝
|
37
|
+
self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
38
|
+
|
39
|
+
def __call__(self, *args, **kwargs):
|
40
|
+
self.check_cookie() # 检测cookie有效期, 但不阻断任务
|
41
|
+
|
42
|
+
def load_account(self, shop_name):
|
43
|
+
option = webdriver.ChromeOptions()
|
44
|
+
# option.add_argument("--headless") # 设置无界面模式
|
45
|
+
# 调整chrome启动配置
|
46
|
+
option.add_argument("--disable-gpu")
|
47
|
+
option.add_argument("--no-sandbox")
|
48
|
+
option.add_argument("--disable-dev-shm-usage")
|
49
|
+
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
50
|
+
option.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止日志输出,减少控制台干扰
|
51
|
+
option.add_experimental_option("useAutomationExtension", False)
|
52
|
+
option.add_argument('--ignore-ssl-error') # 忽略ssl错误
|
53
|
+
prefs = {
|
54
|
+
'profile.default_content_settings.popups': 0, # 禁止弹出所有窗口
|
55
|
+
"browser.download.manager. showAlertOnComplete": False, # 下载完成后不显示下载完成提示框
|
56
|
+
"profile.default_content_setting_values.automatic_downloads": 1, # 允许自动下载多个文件
|
57
|
+
}
|
58
|
+
|
59
|
+
option.add_experimental_option('perfLoggingPrefs', {
|
60
|
+
'enableNetwork': True,
|
61
|
+
'enablePage': False,
|
62
|
+
})
|
63
|
+
option.set_capability("goog:loggingPrefs", {
|
64
|
+
'browser': 'ALL',
|
65
|
+
'performance': 'ALL',
|
66
|
+
})
|
67
|
+
option.set_capability("goog:perfLoggingPrefs", {
|
68
|
+
'enableNetwork': True,
|
69
|
+
'enablePage': False,
|
70
|
+
'enableTimeline': False
|
71
|
+
})
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
option.add_experimental_option('prefs', prefs)
|
76
|
+
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
77
|
+
if platform.system() == 'Windows':
|
78
|
+
service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
|
79
|
+
else:
|
80
|
+
service = Service('/usr/local/bin/chromedriver')
|
81
|
+
_driver = webdriver.Chrome(options=option, service=service, ) # 创建Chrome驱动程序实例
|
82
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
83
|
+
|
84
|
+
if 'jd' in shop_name: # 切换为京东
|
85
|
+
self.url = 'https://shop.jd.com/jdm/home/'
|
86
|
+
# 登录
|
87
|
+
_driver.get(self.url)
|
88
|
+
_driver.delete_all_cookies() # 首先清除浏览器打开已有的cookies
|
89
|
+
name_lists = os.listdir(self.cookie_path) # cookie 放在主目录下的 cookies 文件夹
|
90
|
+
for name in name_lists:
|
91
|
+
if shop_name in name and name.endswith('.txt') and '~' not in name and '.DS' not in name:
|
92
|
+
with open(os.path.join(self.cookie_path, name), 'r') as f:
|
93
|
+
cookies_list = json.load(f) # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
|
94
|
+
for cookie in cookies_list:
|
95
|
+
_driver.add_cookie(cookie) # 添加cookies信息
|
96
|
+
break
|
97
|
+
# 以上从get url开始的操作要即时完成,不能进入time.sleep,否则登录失败
|
98
|
+
if 'jd' in shop_name:
|
99
|
+
return _driver
|
100
|
+
else:
|
101
|
+
_driver.refresh()
|
102
|
+
time.sleep(random.uniform(5, 8))
|
103
|
+
html = etree.HTML(_driver.page_source)
|
104
|
+
user_name = html.xpath('//div[@class="site-nav-user"]/a/text()')
|
105
|
+
if user_name: # 1877西门吹风
|
106
|
+
print(f'当前账号:{user_name} 登录成功')
|
107
|
+
return _driver
|
108
|
+
|
109
|
+
elements = _driver.find_elements(
|
110
|
+
By.XPATH, '//*[id="login-error"]/div')
|
111
|
+
if elements: # 您已登录,子账号不能访问.... 其实已经处于登录状态
|
112
|
+
if self.other(_driver):
|
113
|
+
return _driver
|
114
|
+
elements = _driver.find_elements(
|
115
|
+
By.XPATH, '//div[@class="captcha-tips"]/div[@class="warnning-text"]')
|
116
|
+
if elements: # 滑块验证,但其实已经处于登录状态
|
117
|
+
if self.other(_driver):
|
118
|
+
return _driver
|
119
|
+
wait = WebDriverWait(_driver, timeout=15)
|
120
|
+
try:
|
121
|
+
button = wait.until(
|
122
|
+
EC.element_to_be_clickable(
|
123
|
+
(By.XPATH, '//button[@class="fm-button fm-submit " and @type="submit"]')
|
124
|
+
)
|
125
|
+
) # 快速进入按钮
|
126
|
+
_driver.execute_script("arguments[0].click();", button) # 点击登录
|
127
|
+
time.sleep(3)
|
128
|
+
except:
|
129
|
+
# 店铺账号
|
130
|
+
try:
|
131
|
+
wait.until(
|
132
|
+
EC.presence_of_element_located(
|
133
|
+
(By.XPATH, '//*[@id="icestark-container"]/div[1]/div/div[1]/img')))
|
134
|
+
html = etree.HTML(_driver.page_source)
|
135
|
+
user_name = html.xpath('//div[@class="UserArea--shopName--3Z5NVbD"]/text()')
|
136
|
+
print(f'当前账号:{user_name} 登录成功')
|
137
|
+
return _driver
|
138
|
+
except:
|
139
|
+
print(f'{shop_name} -> {self.url} 尝试跨页登录1')
|
140
|
+
# self.other(_driver)
|
141
|
+
|
142
|
+
# 店铺账号, 有时候刷新cookies后系统会自动登录,不需要手动点击登录,因此多加一次判断
|
143
|
+
try:
|
144
|
+
wait.until(
|
145
|
+
EC.presence_of_element_located((By.XPATH, '//*[@id="icestark-container"]/div[1]/div/div[1]/img')))
|
146
|
+
html = etree.HTML(_driver.page_source)
|
147
|
+
user_name = html.xpath('//div[@class="UserArea--shopName--3Z5NVbD"]/text()')
|
148
|
+
print(f'当前账号:{user_name} 登录成功')
|
149
|
+
except:
|
150
|
+
print(f'{shop_name} -> {self.url} 尝试跨页登录2')
|
151
|
+
self.other(_driver)
|
152
|
+
return _driver
|
153
|
+
|
154
|
+
@staticmethod
|
155
|
+
def other(_driver):
|
156
|
+
""" 淘宝账号不知为何刷新cookies后不跳转, """
|
157
|
+
_driver.get('https://myseller.taobao.com')
|
158
|
+
time.sleep(3)
|
159
|
+
try:
|
160
|
+
wait = WebDriverWait(_driver, timeout=15)
|
161
|
+
wait.until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "UserArea--shopName")]')))
|
162
|
+
print('登录成功')
|
163
|
+
return True
|
164
|
+
except Exception as e:
|
165
|
+
print(e)
|
166
|
+
print('登录失败')
|
167
|
+
_driver.quit()
|
168
|
+
return False
|
169
|
+
|
170
|
+
def d_new_cookies(self, _driver, _shopname):
|
171
|
+
""" 负责检查并刷新 cookies 文件"""
|
172
|
+
try:
|
173
|
+
_file = os.path.join(self.cookie_path, f'cookie_{_shopname}.txt')
|
174
|
+
_c = os.stat(_file).st_mtime # 读取文件的元信息 >>>文件修改时间
|
175
|
+
_c_time = datetime.datetime.fromtimestamp(_c) # 格式化修改时间
|
176
|
+
_today = datetime.datetime.today()
|
177
|
+
if (_today - _c_time).total_seconds() > 170000:
|
178
|
+
with open(_file, 'w') as f:
|
179
|
+
# 将cookies保存为json格式
|
180
|
+
cookies_list = _driver.get_cookies()
|
181
|
+
for cookie in cookies_list:
|
182
|
+
# 该字段有问题所以删除就可以
|
183
|
+
if 'expiry' in cookie:
|
184
|
+
del cookie['expiry']
|
185
|
+
if 'domain' in cookie and '万里马官方' in _shopname: # 仅仅是天猫淘宝需要修改此值, 京东别改
|
186
|
+
cookie['domain'] = '.taobao.com'
|
187
|
+
cookies_list = json.dumps(cookies_list)
|
188
|
+
f.write(cookies_list)
|
189
|
+
# print(f'cookie已保存: {_file}')
|
190
|
+
except Exception as e:
|
191
|
+
print(e)
|
192
|
+
|
193
|
+
def check_cookie(self):
|
194
|
+
"""
|
195
|
+
检查cookies,如果过期则重新获取
|
196
|
+
still_get: 设置该参数立即更新cookie, 不论是否过期
|
197
|
+
"""
|
198
|
+
if not os.path.exists(self.cookie_path):
|
199
|
+
print(f'没有找到cookies文件: {self.cookie_path}')
|
200
|
+
return False
|
201
|
+
files = os.listdir(self.cookie_path)
|
202
|
+
cook = []
|
203
|
+
for file in files:
|
204
|
+
if file.endswith('txt') and 'cookie_' in file:
|
205
|
+
cook.append(file)
|
206
|
+
c_ = os.stat(os.path.join(self.cookie_path, file)).st_mtime # 读取文件的元信息 >>>文件修改时间
|
207
|
+
c_time_ = datetime.datetime.fromtimestamp(c_) # 格式化修改时间
|
208
|
+
today = datetime.datetime.today()
|
209
|
+
if (today - c_time_).total_seconds() > 864000:
|
210
|
+
# 超过时间重新获取cookies
|
211
|
+
print(f' {file}cookie已过期,请重新获取cookies')
|
212
|
+
return None
|
213
|
+
|
214
|
+
def tb_cookie(self, _url='https://login.taobao.com/'):
|
215
|
+
"""
|
216
|
+
本函数需要谨慎调用,不要弄错账号以免cookies混乱
|
217
|
+
扫码获取cookies,下载到cookies文件夹
|
218
|
+
is_wlm_cookie: 单独创建一个wlm的cookies,保存在上层目录,用于日常数据下载,其他淘宝爬虫不要调用
|
219
|
+
c_account:设置为True时,检测店铺账号,False检测非店铺账号
|
220
|
+
"""
|
221
|
+
option = webdriver.ChromeOptions() # 浏览器启动选项
|
222
|
+
option.headless = True # False指定为无界面模式
|
223
|
+
# 调整chrome启动配置
|
224
|
+
option.add_argument("--disable-gpu")
|
225
|
+
option.add_argument("--no-sandbox")
|
226
|
+
option.add_argument("--disable-dev-shm-usage")
|
227
|
+
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
228
|
+
option.add_experimental_option("useAutomationExtension", False)
|
229
|
+
if platform.system() == 'Windows':
|
230
|
+
service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
|
231
|
+
else:
|
232
|
+
service = Service('/usr/local/bin/chromedriver')
|
233
|
+
_driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
|
234
|
+
# 登录
|
235
|
+
_driver.get(_url)
|
236
|
+
time.sleep(1)
|
237
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
238
|
+
wait = WebDriverWait(_driver, timeout=120) # 等待登录二维码
|
239
|
+
wait.until(EC.element_to_be_clickable(
|
240
|
+
(By.XPATH, '//div[@class="qrcode-login"]/div/div[@class="qrcode-img"]')))
|
241
|
+
|
242
|
+
user_name = None
|
243
|
+
for i in range(10):
|
244
|
+
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
245
|
+
print(f'{d_time} 当前验证:等待非店账号扫码,请尽快扫码...')
|
246
|
+
wait = WebDriverWait(_driver, timeout=10) # 等待扫码登录后的页面, 左上角加载的一张图片
|
247
|
+
try: # 非店铺账号
|
248
|
+
wait.until(
|
249
|
+
EC.presence_of_element_located((By.XPATH, '//*[@id="J_SiteNavLogin"]/div[1]/div/a')))
|
250
|
+
html = etree.HTML(_driver.page_source)
|
251
|
+
user_name = html.xpath('//*[@id="J_SiteNavLogin"]/div[1]/div/a/text()')
|
252
|
+
break
|
253
|
+
except:
|
254
|
+
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
255
|
+
print(f'{d_time} 当前验证:等待店铺账号扫码...')
|
256
|
+
wait = WebDriverWait(_driver, timeout=15)
|
257
|
+
try: # 等待左上角的牵牛图标
|
258
|
+
wait.until(
|
259
|
+
EC.presence_of_element_located(
|
260
|
+
(By.XPATH, '//*[@id="icestark-container"]/div[1]/div/div[1]/img')))
|
261
|
+
html = etree.HTML(_driver.page_source) # 登录店铺名称
|
262
|
+
user_name = html.xpath('//div[contains(@class, "UserArea--shopName")]/text()')
|
263
|
+
break
|
264
|
+
except:
|
265
|
+
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
266
|
+
print(f'{d_time} {_url} 第 {i + 1}/10 次等待登录超时,正在重试')
|
267
|
+
if i > 8:
|
268
|
+
return None
|
269
|
+
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
270
|
+
print(f'{d_time} 登录成功,正在获取cookie...')
|
271
|
+
time.sleep(1)
|
272
|
+
sp_id = ['649844025963', '732863024183', '640779963378', '677330842517']
|
273
|
+
sp_id = random.choice(sp_id)
|
274
|
+
_driver.get(f'https://detail.tmall.com/item.htm?id={sp_id}')
|
275
|
+
time.sleep(3)
|
276
|
+
if user_name:
|
277
|
+
user_name = user_name[0]
|
278
|
+
user_name = re.sub(':', '_', user_name) # 删除用户名中的冒号
|
279
|
+
else:
|
280
|
+
user_name = ''
|
281
|
+
|
282
|
+
if not os.path.exists(self.cookie_path):
|
283
|
+
os.makedirs(self.cookie_path)
|
284
|
+
_file = os.path.join(self.cookie_path, f'cookie_{user_name}.txt')
|
285
|
+
with open(_file, 'w') as f:
|
286
|
+
# 将cookies保存为json格式
|
287
|
+
cookies_list = _driver.get_cookies()
|
288
|
+
for cookie in cookies_list:
|
289
|
+
# 该字段有问题所以删除就可以
|
290
|
+
if 'expiry' in cookie:
|
291
|
+
del cookie['expiry']
|
292
|
+
if 'domain' in cookie:
|
293
|
+
cookie['domain'] = '.taobao.com'
|
294
|
+
cookies_list = json.dumps(cookies_list)
|
295
|
+
f.write(cookies_list)
|
296
|
+
print(f'cookie已保存: {_file}')
|
297
|
+
_driver.quit()
|
298
|
+
|
299
|
+
def jd_cookie(self, _url='https://shop.jd.com/jdm/home/'):
|
300
|
+
option = webdriver.ChromeOptions() # 浏览器启动选项
|
301
|
+
option.headless = True # False指定为无界面模式
|
302
|
+
if platform.system() == 'Windows':
|
303
|
+
service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
|
304
|
+
else:
|
305
|
+
service = Service('/usr/local/bin/chromedriver')
|
306
|
+
_driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
|
307
|
+
# 登录
|
308
|
+
_driver.get(_url)
|
309
|
+
time.sleep(1)
|
310
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
311
|
+
print('等待登录京东商家后台...')
|
312
|
+
wait = WebDriverWait(_driver, timeout=300)
|
313
|
+
try:
|
314
|
+
wait.until(
|
315
|
+
EC.presence_of_element_located((By.XPATH, '//span[text()="京准通"]')))
|
316
|
+
except:
|
317
|
+
print('等待京东登录超时!')
|
318
|
+
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
319
|
+
print(f'{d_time} 登录成功,正在获取cookie...')
|
320
|
+
time.sleep(3)
|
321
|
+
# d_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
322
|
+
|
323
|
+
if not os.path.exists(self.cookie_path):
|
324
|
+
os.makedirs(self.cookie_path)
|
325
|
+
_file = os.path.join(self.cookie_path, 'cookie_jd.txt')
|
326
|
+
with open(_file, 'w') as f:
|
327
|
+
# 将cookies保存为json格式
|
328
|
+
cookies_list = _driver.get_cookies()
|
329
|
+
for cookie in cookies_list:
|
330
|
+
# 该字段有问题所以删除就可以
|
331
|
+
if 'expiry' in cookie:
|
332
|
+
del cookie['expiry']
|
333
|
+
cookies_list = json.dumps(cookies_list)
|
334
|
+
f.write(cookies_list)
|
335
|
+
print(f'cookie已保存: {_file}')
|
336
|
+
time.sleep(1)
|
337
|
+
_driver.quit()
|
338
|
+
|
339
|
+
|
340
|
+
class SkuPicture:
|
341
|
+
def __init__(self, driver):
|
342
|
+
self.driver = driver
|
343
|
+
self.path = os.path.join(Share_Path, '其他文件')
|
344
|
+
self.filename = '商品id编码表.xlsx'
|
345
|
+
self.urls = []
|
346
|
+
self.datas = [] # 从单品页面获取数据,存储这部分数据,作为中转
|
347
|
+
self.df = pd.DataFrame()
|
348
|
+
|
349
|
+
def each_page(self):
|
350
|
+
wait = WebDriverWait(self.driver, timeout=15)
|
351
|
+
num = len(self.urls)
|
352
|
+
i = 0
|
353
|
+
for data in self.urls:
|
354
|
+
url = f'https://sell.publish.tmall.com/tmall/publish.htm?id={data['商品id']}'
|
355
|
+
print(f'当前任务: {i}/{num} {url}')
|
356
|
+
try:
|
357
|
+
self.driver.get(url)
|
358
|
+
time.sleep(3)
|
359
|
+
# elements = self.driver.find_elements(
|
360
|
+
# By.XPATH, '//h2[text()="很抱歉,您查看的商品找不到了!"]')
|
361
|
+
# if len(elements) > 0:
|
362
|
+
# continue
|
363
|
+
wait.until(EC.presence_of_element_located((By.XPATH, '//tr[@class="sku-table-row"]')))
|
364
|
+
html = etree.HTML(self.driver.page_source)
|
365
|
+
imgs = html.xpath('//img[contains(@class, "img-block")]/@src')
|
366
|
+
imgs = [f'https:{item}' for item in imgs if 'http' not in item]
|
367
|
+
titles = html.xpath('//img[contains(@class, "img-block")]/../span/@title')
|
368
|
+
# img = html.xpath('//tr[@class="sku-table-row"]/td/div/div/div/img[@class="img-block"]/@src')
|
369
|
+
sku_price = html.xpath(
|
370
|
+
'//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-money")]//input/@value')
|
371
|
+
desc = html.xpath(
|
372
|
+
'//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-skuIndividualCom")]//em/@title')
|
373
|
+
sales = html.xpath(
|
374
|
+
'//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-number")]//input/@value')
|
375
|
+
sku_spbm = html.xpath(
|
376
|
+
'//tr[@class="sku-table-row"]/td[contains(@class, "sell-sku-cell-input") and contains(@id, "skuOuterId")]//input/@value')
|
377
|
+
leimu = html.xpath(
|
378
|
+
'//h2[@id="text-catpath"]/div/text()')
|
379
|
+
sp_titles = html.xpath(
|
380
|
+
'//div[@class="tm-title normal"]/span/span/input/@value')
|
381
|
+
|
382
|
+
if sp_titles:
|
383
|
+
sp_titles = sp_titles[0]
|
384
|
+
else:
|
385
|
+
sp_titles = ''
|
386
|
+
if leimu:
|
387
|
+
leimu = re.sub('>>', '_', leimu[0])
|
388
|
+
leimu = re.sub('当前类目:', '', leimu)
|
389
|
+
else:
|
390
|
+
leimu = ''
|
391
|
+
if not titles:
|
392
|
+
titles = ''
|
393
|
+
if not imgs:
|
394
|
+
imgs = ''
|
395
|
+
if not sales:
|
396
|
+
sales = ''
|
397
|
+
if not sku_price:
|
398
|
+
sku_price = ''
|
399
|
+
if not sku_spbm:
|
400
|
+
sku_spbm = ''
|
401
|
+
if not desc:
|
402
|
+
desc = ''
|
403
|
+
|
404
|
+
# print(sp_titles)
|
405
|
+
# print(titles)
|
406
|
+
# print(imgs)
|
407
|
+
# print(sales)
|
408
|
+
# print(sku_price)
|
409
|
+
# print(sku_spbm)
|
410
|
+
# print(desc)
|
411
|
+
# print(leimu)
|
412
|
+
self.datas.append(
|
413
|
+
{
|
414
|
+
'日期': datetime.date.today(),
|
415
|
+
'商品id': data['商品id'],
|
416
|
+
'商品标题': sp_titles,
|
417
|
+
'商品链接': f'https://detail.tmall.com/item.htm?id={data['商品id']}',
|
418
|
+
'sku名称': titles,
|
419
|
+
'sku图片链接': imgs,
|
420
|
+
'库存数量': sales,
|
421
|
+
'价格': sku_price,
|
422
|
+
'sku编码': sku_spbm,
|
423
|
+
'商家编码': data['商家编码'],
|
424
|
+
'推荐卖点': desc,
|
425
|
+
'是否新增': data['是否新增'],
|
426
|
+
'类目': leimu,
|
427
|
+
}
|
428
|
+
)
|
429
|
+
except Exception as e:
|
430
|
+
# print(e)
|
431
|
+
pass
|
432
|
+
i += 1
|
433
|
+
# if i > 3:
|
434
|
+
# break
|
435
|
+
time.sleep(1)
|
436
|
+
|
437
|
+
results = []
|
438
|
+
for data in self.datas:
|
439
|
+
try:
|
440
|
+
df = pd.DataFrame.from_dict(data, orient='columns')
|
441
|
+
results.append(df)
|
442
|
+
except:
|
443
|
+
pass
|
444
|
+
|
445
|
+
if results:
|
446
|
+
self.df = pd.concat(results)
|
447
|
+
|
448
|
+
def read_df(self):
|
449
|
+
path = os.path.join(self.path, self.filename)
|
450
|
+
df = pd.read_excel(path, header=0)
|
451
|
+
df = df[['商品id', '商家编码', '是否新增']]
|
452
|
+
df = df.astype({'是否新增': int})
|
453
|
+
df = df[df['是否新增'] == 1]
|
454
|
+
self.urls = df.to_dict('records')
|
455
|
+
|
456
|
+
|
457
|
+
def main(service_name, database):
|
458
|
+
if not os.path.exists(Share_Path):
|
459
|
+
print(f'当前系统环境不支持')
|
460
|
+
return
|
461
|
+
|
462
|
+
_driver = LoadAccount() # 账号域不同, 要重新实例化
|
463
|
+
# tb_driver2 = 1
|
464
|
+
tb_driver2 = _driver.load_account(shop_name='万里马官方旗舰店')
|
465
|
+
if tb_driver2:
|
466
|
+
s = SkuPicture(driver=tb_driver2)
|
467
|
+
s.read_df() # 从本地文件中读取商品id,并更新 urls 参数
|
468
|
+
s.each_page() # 根据 urls 获取每个商品数据并更新为 df
|
469
|
+
tb_driver2.quit()
|
470
|
+
|
471
|
+
# s.df.to_csv('/Users/xigua/Downloads/test.csv', encoding='utf-8_sig', index=False, header=True)
|
472
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
473
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
474
|
+
m.df_to_mysql(
|
475
|
+
df=s.df,
|
476
|
+
db_name='属性设置2',
|
477
|
+
table_name='天猫商品sku信息',
|
478
|
+
move_insert=True, # 先删除,再插入
|
479
|
+
# df_sql=True,
|
480
|
+
# drop_duplicates=False,
|
481
|
+
# icm_update=unique_key_list,
|
482
|
+
service_database={service_name: database},
|
483
|
+
) # 3. 回传数据库
|
484
|
+
|
485
|
+
|
486
|
+
if __name__ == '__main__':
|
487
|
+
main(service_name='company', database='mysql')
|
mdbq/pbix/refresh_all.py
CHANGED
@@ -63,7 +63,7 @@ class RefreshAll:
|
|
63
63
|
if filename.endswith('.xlsx'):
|
64
64
|
try:
|
65
65
|
print(f'正在刷新 >>>{filename}')
|
66
|
-
path = os.path.join(self.run_py_path, filename) # 拼接文件路径
|
66
|
+
path = os.path.join(top_path, self.run_py_path, filename) # 拼接文件路径
|
67
67
|
xlapp = win32com.client.Dispatch('Excel.Application') # 创建Excel程序App
|
68
68
|
xlapp.Visible = False # 窗口是否可见
|
69
69
|
xlapp.DisplayAlerts = False # 是否显示警告信息
|
@@ -5,13 +5,13 @@ mdbq/aggregation/aggregation.py,sha256=sgsetJHK4fOcXvqQCVgJoSIwZQLMznVG3I-MqHlW_
|
|
5
5
|
mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=dzS1XvoJ0oEckrvIF-_uUALnPIRG4mOwG5ktr3LWsKY,70243
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
12
|
mdbq/clean/data_clean.py,sha256=T0WYOKFwNZTNk3temKOw1K2H54kxu9QBJjlTbkMtxNk,94217
|
13
13
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
14
|
-
mdbq/company/copysh.py,sha256=
|
14
|
+
mdbq/company/copysh.py,sha256=tEWittsWE_ocp-ilUvbDQncIVaVgtjjja5M0WHjecE4,17753
|
15
15
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/config/get_myconf.py,sha256=-CFEW0dQh4OIwVgwK-cL0eVp1LN3PjJgN89d4P5TB9I,6011
|
17
17
|
mdbq/config/products.py,sha256=vIK8DJ-F3XXwvNPK-4OJq2tZITNlL6Sub8QBdoOng8U,5676
|
@@ -30,12 +30,13 @@ mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,
|
|
30
30
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
31
31
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
32
32
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
33
|
+
mdbq/other/sku_picture.py,sha256=U9NG-laKns3g0FJsK-JwqlW1EQZRTEXvIsbdFNBR0Ro,22250
|
33
34
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
34
35
|
mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
35
36
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
36
|
-
mdbq/pbix/refresh_all.py,sha256=
|
37
|
+
mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
|
37
38
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
38
|
-
mdbq-1.
|
39
|
-
mdbq-1.
|
40
|
-
mdbq-1.
|
41
|
-
mdbq-1.
|
39
|
+
mdbq-1.8.0.dist-info/METADATA,sha256=hb11aMvPCn1u8Jtmn2BQaMfgkUloamu_-ri-FFpgpns,245
|
40
|
+
mdbq-1.8.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
41
|
+
mdbq-1.8.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
42
|
+
mdbq-1.8.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|