mdbq 3.8.1__py3-none-any.whl → 3.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -3
- mdbq/aggregation/query_data.py +49 -3
- mdbq/mysql/mysql.py +143 -140
- mdbq/spider/aikucun.py +377 -385
- {mdbq-3.8.1.dist-info → mdbq-3.8.3.dist-info}/METADATA +1 -1
- {mdbq-3.8.1.dist-info → mdbq-3.8.3.dist-info}/RECORD +8 -9
- mdbq/aggregation/optimize_data_bak.py +0 -30
- {mdbq-3.8.1.dist-info → mdbq-3.8.3.dist-info}/WHEEL +0 -0
- {mdbq-3.8.1.dist-info → mdbq-3.8.3.dist-info}/top_level.txt +0 -0
mdbq/spider/aikucun.py
CHANGED
@@ -1,190 +1,75 @@
|
|
1
1
|
# -*- coding:utf-8 -*-
|
2
2
|
import datetime
|
3
|
-
import
|
3
|
+
import requests
|
4
4
|
import json
|
5
5
|
import os
|
6
6
|
import sys
|
7
|
-
import pathlib
|
8
|
-
import platform
|
9
7
|
import re
|
10
8
|
import time
|
11
9
|
import warnings
|
12
|
-
import
|
13
|
-
import
|
10
|
+
import platform
|
11
|
+
import getpass
|
14
12
|
from selenium import webdriver
|
15
13
|
from selenium.webdriver.support.wait import WebDriverWait
|
16
14
|
from selenium.webdriver.common.by import By
|
17
15
|
from selenium.webdriver.support import expected_conditions as EC
|
18
16
|
from selenium.webdriver.chrome.service import Service
|
19
|
-
|
20
|
-
|
21
|
-
from mdbq.
|
17
|
+
import pymysql
|
18
|
+
import pandas as pd
|
19
|
+
from mdbq.log import spider_logging
|
22
20
|
from mdbq.mysql import mysql
|
23
21
|
from mdbq.mysql import s_query
|
24
|
-
from mdbq.config import
|
25
|
-
|
26
|
-
warnings.filterwarnings('ignore')
|
22
|
+
from mdbq.config import config
|
23
|
+
from mdbq.other import ua_sj
|
27
24
|
|
28
25
|
|
29
|
-
|
30
|
-
|
31
|
-
D_PATH = str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\Downloads'))
|
32
|
-
elif platform.system() == 'Linux':
|
33
|
-
D_PATH = os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'Downloads')
|
34
|
-
if not os.path.exists(D_PATH):
|
35
|
-
os.makedirs(D_PATH)
|
36
|
-
else:
|
37
|
-
D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
|
38
|
-
upload_path = os.path.join(D_PATH, '数据上传中心', '爱库存') # 此目录位于下载文件夹
|
26
|
+
content = config.read_config(file_path=os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'spd.txt'))
|
27
|
+
username, password, host, port = content['username'], content['password'], content['host'], content['port']
|
39
28
|
|
40
|
-
|
41
|
-
m_engine, username, password, host, port = default.get_mysql_engine(platform='Windows', hostname=hostname, sql='mysql', local=local, config_file=None)
|
42
|
-
print(username, password, host, port)
|
29
|
+
m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
|
43
30
|
# 实例化一个数据查询类,用来获取 cookies 表数据
|
44
31
|
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
32
|
+
logger = spider_logging.setup_logging()
|
45
33
|
|
46
34
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
_url = 'https://gray-merc.aikucun.com/index.html'
|
51
|
-
cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
52
|
-
filename_aikucun = 'cookie_aikucun.json'
|
53
|
-
print(_url)
|
54
|
-
|
55
|
-
option = webdriver.ChromeOptions() # 浏览器启动选项
|
56
|
-
option.headless = True # False指定为无界面模式
|
57
|
-
# 调整chrome启动配置
|
58
|
-
option.add_argument("--disable-gpu")
|
59
|
-
option.add_argument("--no-sandbox")
|
60
|
-
option.add_argument("--disable-dev-shm-usage")
|
61
|
-
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
62
|
-
option.add_experimental_option("useAutomationExtension", False)
|
63
|
-
# if platform.system() == 'Windows':
|
64
|
-
# service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
|
65
|
-
# else:
|
66
|
-
# service = Service('/usr/local/bin/chromedriver')
|
67
|
-
if platform.system() == 'Windows':
|
68
|
-
# 设置Chrome的路径
|
69
|
-
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
70
|
-
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
71
|
-
# os.environ["webdriver.chrome.driver"] = chrome_path
|
72
|
-
option.binary_location = chrome_path # windows 设置此参数有效
|
73
|
-
service = Service(chromedriver_path)
|
74
|
-
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
75
|
-
else:
|
76
|
-
# 设置Chrome的路径
|
77
|
-
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
78
|
-
chromedriver_path = '/usr/local/chrome/chromedriver'
|
79
|
-
os.environ["webdriver.chrome.driver"] = chrome_path
|
80
|
-
|
81
|
-
service = Service(chromedriver_path)
|
82
|
-
_driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
|
83
|
-
|
84
|
-
# 登录
|
85
|
-
_driver.get(_url)
|
86
|
-
time.sleep(0.1)
|
87
|
-
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
88
|
-
print(f'请登录并切换到百宝箱,再保存 cookies: \n https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604')
|
89
|
-
wait = WebDriverWait(_driver, timeout=15)
|
90
|
-
input_box = wait.until(
|
91
|
-
EC.element_to_be_clickable(
|
92
|
-
(By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
|
93
|
-
input_box.send_keys('广东万里马实业股份有限公司')
|
94
|
-
input_box = wait.until(
|
95
|
-
EC.element_to_be_clickable(
|
96
|
-
(By.XPATH, '//input[@placeholder="请输入密码"]'))) #
|
97
|
-
input_box.send_keys('wlm123$$$')
|
98
|
-
time.sleep(0.1)
|
99
|
-
elements = _driver.find_elements(
|
100
|
-
By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
|
101
|
-
_driver.execute_script("arguments[0].click();", elements[0])
|
102
|
-
for i in range(100):
|
35
|
+
def keep_connect(_db_name, _config, max_try: int=10):
|
36
|
+
attempts = 1
|
37
|
+
while attempts <= max_try:
|
103
38
|
try:
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
else:
|
125
|
-
new_cookies_list.append(cookie)
|
126
|
-
|
127
|
-
######### 新增 写入 mysql #########
|
128
|
-
set_typ = {
|
129
|
-
'日期': 'date',
|
130
|
-
'domain': 'varchar(100)',
|
131
|
-
'expiry': 'int',
|
132
|
-
'httpOnly': 'varchar(20)',
|
133
|
-
'name': 'varchar(50)',
|
134
|
-
'path': 'varchar(50)',
|
135
|
-
'sameSite': 'varchar(50)',
|
136
|
-
'secure': 'varchar(50)',
|
137
|
-
'value': 'text',
|
138
|
-
'更新时间': 'timestamp'
|
139
|
-
}
|
140
|
-
_cookies_list = []
|
141
|
-
for item in cookies_list:
|
142
|
-
new_dict = {'日期': datetime.datetime.today().strftime('%Y-%m-%d'), }
|
143
|
-
for k, v in item.items():
|
144
|
-
if v is None:
|
145
|
-
v = 'None'
|
146
|
-
new_dict.update({k: v})
|
147
|
-
if 'expiry' not in new_dict:
|
148
|
-
new_dict.update({'expiry': 0})
|
149
|
-
new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
150
|
-
_cookies_list.append(new_dict)
|
151
|
-
m_engine.insert_many_dict(
|
152
|
-
db_name='cookie文件',
|
153
|
-
table_name='main_aikucun',
|
154
|
-
dict_data_list=_cookies_list,
|
155
|
-
set_typ=set_typ,
|
156
|
-
allow_not_null=True, # 允许插入空值
|
157
|
-
)
|
158
|
-
#############################################
|
159
|
-
|
160
|
-
json_file = os.path.join(cookie_path, filename_aikucun)
|
161
|
-
with open(json_file, 'w', encoding='utf-8') as f:
|
162
|
-
json.dump(new_cookies_list, f, ensure_ascii=False, sort_keys=True, indent=4)
|
163
|
-
print(f'cookie已保存: {json_file}')
|
164
|
-
|
165
|
-
# _file = os.path.join(cookie_path, filename_aikucun)
|
166
|
-
# with open(_file, 'w') as f:
|
167
|
-
# # 将cookies保存为json格式
|
168
|
-
# cookies_list = _driver.get_cookies()
|
169
|
-
# # for cookie in cookies_list:
|
170
|
-
# # # 该字段有问题所以删除就可以
|
171
|
-
# # if 'expiry' in cookie:
|
172
|
-
# # del cookie['expiry']
|
173
|
-
# # # if 'domain' in cookie:
|
174
|
-
# # # cookie['domain'] = '.taobao.com'
|
175
|
-
# cookies_list = json.dumps(cookies_list)
|
176
|
-
# f.write(cookies_list)
|
177
|
-
# print(f'cookie已保存: {_file}')
|
178
|
-
_driver.quit()
|
39
|
+
connection = pymysql.connect(**_config) # 连接数据库
|
40
|
+
return connection
|
41
|
+
except Exception as e:
|
42
|
+
logger.error(f'{_db_name}: 连接失败,正在重试: {host}:{port} {attempts}/{max_try} {e}')
|
43
|
+
attempts += 1
|
44
|
+
time.sleep(30)
|
45
|
+
logger.error(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
|
46
|
+
return None
|
47
|
+
|
48
|
+
|
49
|
+
def dates_between(start_date, end_date) -> list:
|
50
|
+
""" 获取两个日期之间的所有日期, 返回 list """
|
51
|
+
start_date = pd.to_datetime(start_date)
|
52
|
+
end_date = pd.to_datetime(end_date)
|
53
|
+
dates = []
|
54
|
+
current_date = start_date
|
55
|
+
while current_date <= end_date:
|
56
|
+
dates.append(current_date.strftime('%Y-%m-%d'))
|
57
|
+
current_date += datetime.timedelta(days=1)
|
58
|
+
return dates
|
179
59
|
|
180
60
|
|
181
61
|
class AikuCun:
|
182
62
|
def __init__(self):
|
183
|
-
|
184
|
-
self.
|
185
|
-
self.
|
63
|
+
self.url = 'https://gray-merc.aikucun.com/index.html'
|
64
|
+
self.db_name = 'cookie文件'
|
65
|
+
self.table_name = 'main_aikucun'
|
66
|
+
self.shop_name = '万里马爱库存'
|
67
|
+
self.token = None
|
68
|
+
self.today = datetime.date.today()
|
69
|
+
self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
|
70
|
+
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
186
71
|
|
187
|
-
def
|
72
|
+
def logining(self, shop_name='aikucun', headless=False):
|
188
73
|
option = webdriver.ChromeOptions()
|
189
74
|
if headless:
|
190
75
|
option.add_argument("--headless") # 设置无界面模式
|
@@ -219,8 +104,8 @@ class AikuCun:
|
|
219
104
|
option.add_experimental_option('prefs', prefs)
|
220
105
|
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
221
106
|
|
222
|
-
# 修改默认下载文件夹路径
|
223
|
-
option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
|
107
|
+
# # 修改默认下载文件夹路径
|
108
|
+
# option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
|
224
109
|
|
225
110
|
# # 通过excludeSwitches参数禁用默认的启动路径
|
226
111
|
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
@@ -249,247 +134,354 @@ class AikuCun:
|
|
249
134
|
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
250
135
|
|
251
136
|
# 登录
|
252
|
-
_driver.get(self.
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
137
|
+
_driver.get(url='https://gray-merc.aikucun.com/index.html') # self.url 可能被修改,这里使用固定页面获取 sign
|
138
|
+
time.sleep(0.1)
|
139
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
140
|
+
wait = WebDriverWait(_driver, timeout=15)
|
141
|
+
input_box = wait.until(
|
142
|
+
EC.element_to_be_clickable(
|
143
|
+
(By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
|
144
|
+
input_box.send_keys('广东万里马实业股份有限公司')
|
145
|
+
input_box = wait.until(
|
146
|
+
EC.element_to_be_clickable(
|
147
|
+
(By.XPATH, '//input[@placeholder="请输入密码"]'))) #
|
148
|
+
input_box.send_keys('wlm123$$$')
|
149
|
+
time.sleep(0.1)
|
150
|
+
elements = _driver.find_elements(
|
151
|
+
By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
|
152
|
+
_driver.execute_script("arguments[0].click();", elements[0])
|
153
|
+
for i in range(100):
|
154
|
+
try:
|
155
|
+
wait.until(
|
156
|
+
EC.element_to_be_clickable(
|
157
|
+
(By.XPATH, '//div[@class="user-info nav-user-slider"]')))
|
158
|
+
break
|
159
|
+
except:
|
160
|
+
time.sleep(5)
|
161
|
+
local_storage = _driver.execute_script("return window.localStorage;")
|
162
|
+
if 'token' in local_storage.keys():
|
163
|
+
self.token = {
|
164
|
+
'日期': datetime.datetime.today().strftime('%Y-%m-%d'),
|
165
|
+
'平台': '爱库存',
|
166
|
+
'店铺名称': self.shop_name,
|
167
|
+
'token': local_storage['token'],
|
168
|
+
'来源位置': 'localstorage',
|
169
|
+
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
170
|
+
}
|
171
|
+
time.sleep(5)
|
172
|
+
_driver.quit()
|
173
|
+
|
174
|
+
def save_token(self):
|
175
|
+
if not self.token:
|
176
|
+
print('self.token 不能为空')
|
177
|
+
return
|
178
|
+
set_typ = {
|
179
|
+
'日期': 'DATE',
|
180
|
+
'平台': 'varchar(50)',
|
181
|
+
'店铺名称': 'varchar(50)',
|
182
|
+
'token': 'varchar(255)',
|
183
|
+
'来源位置': 'varchar(50)',
|
184
|
+
'更新时间': 'timestamp'
|
185
|
+
}
|
186
|
+
# 更新至数据库记录
|
187
|
+
m_engine.dict_to_mysql(
|
188
|
+
db_name=self.db_name,
|
189
|
+
table_name=self.table_name,
|
190
|
+
dict_data=self.token,
|
191
|
+
unique_main_key=None,
|
192
|
+
icm_update=[], # 唯一组合键
|
193
|
+
main_key=None, # 指定索引列, 通常用日期列,默认会设置日期为索引
|
194
|
+
set_typ={}, # 指定数据类型
|
195
|
+
)
|
196
|
+
|
197
|
+
def get_data_from_bbx(self, start_date=None, end_date=None, item_type='spu', page_num=1, page_size=300):
|
198
|
+
if start_date:
|
199
|
+
self.start_date = start_date
|
200
|
+
if end_date:
|
201
|
+
self.end_date = end_date
|
202
|
+
date_list = dates_between(start_date=self.start_date, end_date=self.end_date)
|
203
|
+
|
264
204
|
df = download.data_to_df(
|
265
|
-
db_name=db_name,
|
266
|
-
table_name=table_name,
|
267
|
-
start_date='2025-
|
268
|
-
end_date='
|
205
|
+
db_name=self.db_name,
|
206
|
+
table_name=self.table_name,
|
207
|
+
start_date='2025-03-07',
|
208
|
+
end_date='2039-12-31',
|
269
209
|
projection={
|
270
|
-
'
|
271
|
-
'
|
272
|
-
'
|
273
|
-
'
|
274
|
-
'path': 1,
|
275
|
-
'sameSite': 1,
|
276
|
-
'secure': 1,
|
277
|
-
'value': 1,
|
210
|
+
'日期': 1,
|
211
|
+
'平台': 1,
|
212
|
+
'店铺名称': 1,
|
213
|
+
'token': 1,
|
278
214
|
'更新时间': 1
|
279
215
|
},
|
280
216
|
)
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
new_dict.update({k: v})
|
291
|
-
# _driver.add_cookie(new_dict) # 添加cookies信息
|
292
|
-
|
293
|
-
_driver.refresh()
|
294
|
-
time.sleep(3)
|
295
|
-
return _driver
|
296
|
-
|
297
|
-
def get_data(self, shop_name='aikucun', date_num=1, headless=True):
|
298
|
-
"""
|
299
|
-
date_num: 获取最近 N 天数据,0表示今天
|
300
|
-
所有数据都是逐日下载
|
301
|
-
"""
|
302
|
-
|
303
|
-
_driver = self.login(shop_name=shop_name, headless=headless)
|
304
|
-
|
305
|
-
_driver.get(self.sp_url)
|
306
|
-
time.sleep(3)
|
307
|
-
# breakpoint()
|
308
|
-
|
309
|
-
today = datetime.date.today()
|
310
|
-
for date_s in range(date_num):
|
311
|
-
new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
|
312
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
313
|
-
print(f'{now} 正在下载爱库存文件 {date_s+1}/{date_num}: {new_date}')
|
314
|
-
str_date = str(new_date)[2:]
|
315
|
-
wait = WebDriverWait(_driver, timeout=15) #
|
316
|
-
elements = _driver.find_elements(
|
317
|
-
By.XPATH, '//input[@placeholder="开始日期"]')
|
318
|
-
# _driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
319
|
-
|
320
|
-
input_box = wait.until(
|
321
|
-
EC.element_to_be_clickable(
|
322
|
-
(By.XPATH, '//input[@placeholder="开始日期"]'))) #
|
323
|
-
|
324
|
-
# from selenium.webdriver.common.keys import Keys
|
325
|
-
for i in range(8):
|
326
|
-
input_box.send_keys(Keys.BACKSPACE)
|
327
|
-
input_box.send_keys(str_date)
|
328
|
-
time.sleep(1)
|
329
|
-
input_box = wait.until(
|
330
|
-
EC.element_to_be_clickable(
|
331
|
-
(By.XPATH, '//input[@placeholder="结束日期"]'))) # 文件名输入框
|
332
|
-
|
333
|
-
for i in range(8):
|
334
|
-
input_box.send_keys(Keys.BACKSPACE)
|
335
|
-
input_box.send_keys(str_date)
|
336
|
-
time.sleep(2)
|
337
|
-
input_box.send_keys(Keys.ENTER)
|
338
|
-
time.sleep(2)
|
339
|
-
wait.until(EC.presence_of_element_located((By.XPATH, '//button/span[contains(text(), "查询")]')))
|
340
|
-
elements = _driver.find_elements(
|
341
|
-
By.XPATH, '//button/span[contains(text(), "查询")]')
|
342
|
-
_driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
343
|
-
time.sleep(5)
|
344
|
-
wait.until(EC.presence_of_element_located(
|
345
|
-
(By.XPATH,
|
346
|
-
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
|
347
|
-
|
348
|
-
elements = _driver.find_elements(
|
349
|
-
By.XPATH,
|
350
|
-
'//div[@class="ak-page-list__table-empty" and contains(text(), "暂无数据")]')
|
351
|
-
if elements:
|
352
|
-
print(f'cookies 可能已过期,无法下载')
|
353
|
-
_driver.quit()
|
217
|
+
if len(df) == 0:
|
218
|
+
self.logining()
|
219
|
+
self.save_token()
|
220
|
+
else:
|
221
|
+
# 仅保留最新日期的数据
|
222
|
+
idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
|
223
|
+
df = df.loc[idx][['token']]
|
224
|
+
if len(df) == 0:
|
225
|
+
print(f'从数据库获取的 token 不能为空')
|
354
226
|
return
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
227
|
+
self.token = df.iloc[0, 0]
|
228
|
+
|
229
|
+
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/{item_type}/list'
|
230
|
+
headers = {
|
231
|
+
'headers': ua_sj.get_ua(),
|
232
|
+
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
233
|
+
'content-type': 'application/json;charset=UTF-8',
|
234
|
+
'origin': 'https://treasurebox.aikucun.com',
|
235
|
+
'system': 'merchant',
|
236
|
+
'token': self.token, # 从浏览器本地存储空间获取
|
237
|
+
}
|
238
|
+
num = 1
|
239
|
+
results = []
|
240
|
+
for date in date_list:
|
241
|
+
req_date = re.sub('-', '', date)
|
242
|
+
data = {
|
243
|
+
'beginDate': req_date,
|
244
|
+
'brandIds': [],
|
245
|
+
'cropId': '',
|
246
|
+
'cropName': '',
|
247
|
+
'ctgryOneIds': [],
|
248
|
+
'ctgryThreeIds': [],
|
249
|
+
'ctgryTwoIds': [],
|
250
|
+
'dimValue': '',
|
251
|
+
'endDate': req_date,
|
252
|
+
'merchantShopCode': '',
|
253
|
+
'orderByName': 'dealGmv',
|
254
|
+
'orderType': 'desc',
|
255
|
+
'pageNum': page_num,
|
256
|
+
'pageSize': page_size
|
257
|
+
}
|
258
|
+
|
259
|
+
res = requests.post(
|
260
|
+
url=self.url,
|
261
|
+
headers=headers,
|
262
|
+
# cookies=cookies,
|
263
|
+
data=json.dumps(data)
|
264
|
+
)
|
265
|
+
print(f'正在获取数据({num}/{len(date_list)}): {item_type}榜单 {date}')
|
266
|
+
# print(res.json())
|
267
|
+
if not res.json()['success']:
|
268
|
+
print('requests 请求不成功, success 返回值应为 True')
|
269
|
+
time.sleep(1)
|
270
|
+
continue
|
271
|
+
if not res.json()['data']['rows']:
|
272
|
+
print("请求获取的数据 ['data']['rows'] 不能为空")
|
273
|
+
time.sleep(1)
|
274
|
+
continue
|
275
|
+
results += [(date, res.json()['data']['rows'])]
|
276
|
+
num += 1
|
277
|
+
time.sleep(1)
|
278
|
+
if num % 32 == 0:
|
279
|
+
print("避免频繁请求, 正在休眠...")
|
280
|
+
# time.sleep(60)
|
281
|
+
|
282
|
+
return results
|
283
|
+
|
284
|
+
def insert_datas(self, data_list, db_name, table_name):
|
285
|
+
"""数据清洗"""
|
286
|
+
if not data_list:
|
287
|
+
return
|
288
|
+
chanel_name = {
|
289
|
+
'availableNum': '可售库存数',
|
290
|
+
'availableSkuCnt': '在架sku数',
|
291
|
+
'brandName': '品牌名',
|
292
|
+
'ctgryOneName': '一级类目名称',
|
293
|
+
'ctgryThreeName': '三级类目名称',
|
294
|
+
'ctgryTwoName': '二级类目名称',
|
295
|
+
'dealBuyerCnt': '支付人数_成交',
|
296
|
+
'dealBuyerCntRate': '成交率_成交',
|
297
|
+
'dealGmv': '成交gmv',
|
298
|
+
'dealIdolCnt': '销售爱豆人数',
|
299
|
+
'dealProductCnt': '销售量_成交',
|
300
|
+
'dealProductCntRate': '售罄率',
|
301
|
+
'dealSkuCnt': '成交sku数',
|
302
|
+
'dealTwoCnt': '订单数_成交',
|
303
|
+
'downSkuCnt': '可售sku数',
|
304
|
+
'etlInsertTime': '数据更新时间',
|
305
|
+
'forwardConfirmCnt': '转发爱豆人数',
|
306
|
+
'forwardConfirmNum': '转发次数',
|
307
|
+
'merStyleNo': '商品款号', # spu 榜单
|
308
|
+
'styleNo': '商品货号', # sku 榜单
|
309
|
+
'orderBuyerCnt': '支付人数_交易',
|
310
|
+
'orderBuyerCntRate': '成交率_交易',
|
311
|
+
'orderGmv': '下单gmv',
|
312
|
+
'orderProductCnt': '销售量_交易',
|
313
|
+
'orderSkuCnt': '下单sku数',
|
314
|
+
'orderTwoCnt': '订单数_交易',
|
315
|
+
'pictureUrl': '图片',
|
316
|
+
'pvNum': '浏览量',
|
317
|
+
'rn': '序号',
|
318
|
+
'spuId': 'spuid',
|
319
|
+
'spuName': '商品名称',
|
320
|
+
'supplyAmount': '供货额',
|
321
|
+
'supplyPerAmount': '供货价',
|
322
|
+
'uvNum': '访客量',
|
323
|
+
'colorName': '颜色',
|
324
|
+
'sizeName': '尺码',
|
325
|
+
'barCode': '条码', # sku榜单 款号 + 颜色编码
|
326
|
+
}
|
327
|
+
# 移除未翻译的列名
|
328
|
+
res_col = [item for item in chanel_name.keys() if chanel_name[item] == '']
|
329
|
+
for item in res_col:
|
330
|
+
del chanel_name[item]
|
331
|
+
|
332
|
+
_results = []
|
333
|
+
for item_ in data_list:
|
334
|
+
end_date, d_list = item_
|
335
|
+
for main_data_dict in d_list:
|
336
|
+
dict_data_before = {}
|
337
|
+
# 添加数据
|
338
|
+
dict_data_before.update({k: v for k, v in main_data_dict.items()})
|
339
|
+
# 初始化 dict_data
|
340
|
+
dict_data = {
|
341
|
+
'日期': end_date,
|
342
|
+
'平台': '爱库存',
|
343
|
+
'店铺名称': self.shop_name
|
344
|
+
}
|
345
|
+
for k, v in dict_data_before.items():
|
346
|
+
# 翻译键名
|
347
|
+
[dict_data.update({name_v: v}) for name_k, name_v in chanel_name.items() if k == name_k]
|
348
|
+
# 没有翻译的键值也要保留
|
349
|
+
not_in_rename = [item for item in dict_data_before.keys() if item not in chanel_name.keys()]
|
350
|
+
[dict_data.update({item: dict_data_before[item]}) for item in not_in_rename]
|
351
|
+
dict_data.update(
|
352
|
+
{
|
353
|
+
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
354
|
+
}
|
355
|
+
)
|
356
|
+
new_dict_data = {}
|
357
|
+
for k, v in dict_data.items():
|
358
|
+
if v and str(v).lower() != 'none' and str(v) != 'null':
|
359
|
+
new_dict_data.update({k: v})
|
360
|
+
else:
|
361
|
+
new_dict_data.update({k: 0})
|
362
|
+
_results.append(new_dict_data)
|
365
363
|
set_typ = {
|
366
|
-
'
|
367
|
-
'
|
368
|
-
'
|
364
|
+
'可售库存数': 'INT',
|
365
|
+
'在架sku数': 'INT',
|
366
|
+
'品牌名': 'varchar(50)',
|
367
|
+
'一级类目名称': 'varchar(50)',
|
368
|
+
'三级类目名称': 'varchar(50)',
|
369
|
+
'二级类目名称': 'varchar(50)',
|
370
|
+
'支付人数_成交': 'INT',
|
371
|
+
'成交率_成交': 'decimal(10,4)',
|
372
|
+
'成交gmv': 'decimal(10,2)',
|
373
|
+
'销售爱豆人数': 'INT',
|
374
|
+
'销售量_成交': 'INT',
|
375
|
+
'售罄率': 'decimal(10,4)',
|
376
|
+
'成交sku数': 'INT',
|
377
|
+
'订单数_成交': 'INT',
|
378
|
+
'可售sku数': 'INT',
|
379
|
+
'数据更新时间': 'DATETIME',
|
380
|
+
'转发爱豆人数': 'INT',
|
381
|
+
'转发次数': 'INT',
|
382
|
+
'商品款号': 'varchar(50)',
|
383
|
+
'支付人数_交易': 'INT',
|
384
|
+
'成交率_交易': 'decimal(10,4)',
|
385
|
+
'下单gmv': 'decimal(10,2)',
|
386
|
+
'销售量_交易': 'INT',
|
387
|
+
'下单sku数': 'INT',
|
388
|
+
'订单数_交易': 'INT',
|
369
389
|
'图片': 'varchar(255)',
|
370
|
-
'
|
371
|
-
'
|
372
|
-
'
|
373
|
-
'
|
374
|
-
'
|
375
|
-
'
|
376
|
-
'
|
377
|
-
'
|
390
|
+
'浏览量': 'INT',
|
391
|
+
'序号': 'INT',
|
392
|
+
'spuId': 'varchar(50)',
|
393
|
+
'商品名称': 'varchar(50)',
|
394
|
+
'供货额': 'decimal(10,2)',
|
395
|
+
'供货价': 'decimal(10,2)',
|
396
|
+
'访客量': 'INT',
|
397
|
+
'颜色': 'varchar(50)',
|
398
|
+
'尺码': 'varchar(50)',
|
399
|
+
'货号': 'varchar(50)', # 款号 + 颜色编码
|
378
400
|
}
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
os.remove(os.path.join(root, name))
|
394
|
-
continue
|
395
|
-
df.insert(loc=0, column='日期', value=date) # df中插入新列
|
396
|
-
df.insert(loc=1, column='店铺名称', value='爱库存平台') # df中插入新列
|
397
|
-
df.rename(columns={'spuId': 'spu_id'}, inplace=True)
|
398
|
-
# df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
|
399
|
-
# df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
|
400
|
-
# print(df['数据更新时间'])
|
401
|
-
# breakpoint()
|
402
|
-
new_dict = {
|
403
|
-
'日期': '',
|
404
|
-
'店铺名称': '',
|
405
|
-
'序号': '',
|
406
|
-
'商品名称': '',
|
407
|
-
'spu_id': '',
|
408
|
-
'商品款号': '',
|
409
|
-
'一级类目名称': '',
|
410
|
-
'二级类目名称': '',
|
411
|
-
'三级类目名称': '',
|
412
|
-
'访客量': '',
|
413
|
-
'浏览量': '',
|
414
|
-
'下单gmv': '',
|
415
|
-
'成交gmv': '',
|
416
|
-
'支付人数_成交': '',
|
417
|
-
}
|
418
|
-
_results = []
|
419
|
-
for dict_data in df.to_dict(orient='records'):
|
420
|
-
new_dict.update(dict_data)
|
421
|
-
new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
422
|
-
_results.append(new_dict)
|
423
|
-
if _results:
|
424
|
-
m_engine.insert_many_dict(
|
425
|
-
db_name='爱库存2',
|
426
|
-
table_name='商品spu榜单',
|
427
|
-
dict_data_list=_results,
|
428
|
-
icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
429
|
-
unique_main_key=None,
|
430
|
-
set_typ=set_typ,
|
431
|
-
)
|
432
|
-
|
433
|
-
new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
|
434
|
-
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
|
435
|
-
os.remove(os.path.join(root, name))
|
436
|
-
|
437
|
-
|
438
|
-
def akucun(headless=True, date_num=10):
|
439
|
-
akc = AikuCun()
|
440
|
-
akc.get_data(shop_name='aikucun', date_num=date_num, headless=headless) # 获取最近 N 天数据,0表示今天
|
441
|
-
|
442
|
-
|
443
|
-
class AikuCunNew:
|
444
|
-
|
445
|
-
def __init__(self, shop_name,):
|
446
|
-
self.shop_name = shop_name
|
447
|
-
self.today = datetime.date.today()
|
448
|
-
self.headers = {'User-Agent': ua_sj.get_ua()}
|
449
|
-
self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
450
|
-
self.cookies = {}
|
451
|
-
self.get_cookies() # 更新 self.cookies 的值
|
452
|
-
self.support_path = set_support.SetSupport(dirname='support').dirname
|
453
|
-
self.start_date = (self.today - datetime.timedelta(days=15)).strftime('%Y-%m-%d')
|
454
|
-
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
401
|
+
print(f'{self.shop_name} 正在更新数据库 {db_name} -> {table_name}...')
|
402
|
+
if 'spu' in table_name:
|
403
|
+
drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
|
404
|
+
else:
|
405
|
+
drop_dup = ['日期', '平台', '店铺名称', '条码']
|
406
|
+
m_engine.insert_many_dict(
|
407
|
+
db_name=db_name,
|
408
|
+
table_name=table_name,
|
409
|
+
dict_data_list=_results,
|
410
|
+
icm_update=drop_dup, # 唯一组合键
|
411
|
+
# unique_main_key=['人群id'],
|
412
|
+
set_typ=set_typ,
|
413
|
+
allow_not_null=False, # 创建允许插入空值的列
|
414
|
+
)
|
455
415
|
|
456
|
-
def
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
416
|
+
def get_sign(self):
|
417
|
+
sign = 'bbcf5b9cf3d3b8ba9c22550dcba8a3ce97be766f'
|
418
|
+
current_timestamp_ms = '1741396070777'
|
419
|
+
# current_timestamp_ms = int(round(time.time() * 1000))
|
420
|
+
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/sku/list?time={current_timestamp_ms}&sign={sign}'
|
421
|
+
headers = {
|
422
|
+
'headers': ua_sj.get_ua(),
|
423
|
+
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
424
|
+
'content-type': 'application/json;charset=UTF-8',
|
425
|
+
'origin': 'https://treasurebox.aikucun.com',
|
426
|
+
# 'system': 'merchant',
|
427
|
+
# 'token': self.token, # 从浏览器本地存储空间获取
|
428
|
+
}
|
465
429
|
data = {
|
466
|
-
'
|
467
|
-
'
|
430
|
+
'beginDate': '20250307',
|
431
|
+
'brandIds': [],
|
432
|
+
'cropId': '',
|
433
|
+
'cropName': '',
|
434
|
+
'ctgryOneIds': [],
|
435
|
+
'ctgryThreeIds': [],
|
436
|
+
'ctgryTwoIds': [],
|
437
|
+
'dimValue': '',
|
438
|
+
'endDate': '20250307',
|
439
|
+
'merchantShopCode': '',
|
440
|
+
'orderByName': 'dealGmv',
|
441
|
+
'orderType': 'desc',
|
442
|
+
'pageNum': 1,
|
443
|
+
'pageSize': 10
|
468
444
|
}
|
469
445
|
res = requests.post(
|
470
|
-
url,
|
471
|
-
headers=
|
472
|
-
|
473
|
-
params=data
|
446
|
+
url=self.url,
|
447
|
+
headers=headers,
|
448
|
+
data=json.dumps(data)
|
474
449
|
)
|
475
|
-
print(res.
|
476
|
-
|
450
|
+
print(res.json())
|
477
451
|
|
478
452
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
453
|
+
def main(start_date, end_date, item_type=['spu']):
|
454
|
+
ak = AikuCun()
|
455
|
+
# ak.get_sign()
|
456
|
+
for type_ in item_type:
|
457
|
+
if type_ not in ['spu', 'sku']:
|
458
|
+
print(f'{item_type} 非法参数: {type_}')
|
459
|
+
continue
|
460
|
+
for i in range(2):
|
461
|
+
data_list = ak.get_data_from_bbx(
|
462
|
+
start_date=start_date,
|
463
|
+
end_date=end_date,
|
464
|
+
item_type=type_,
|
465
|
+
page_num=1,
|
466
|
+
page_size=300
|
467
|
+
)
|
468
|
+
if not data_list:
|
469
|
+
ak.logining()
|
470
|
+
ak.save_token()
|
471
|
+
else:
|
485
472
|
break
|
486
|
-
|
487
|
-
|
473
|
+
|
474
|
+
ak.insert_datas(
|
475
|
+
data_list=data_list,
|
476
|
+
db_name='爱库存2',
|
477
|
+
table_name=f'{type_}榜单'
|
478
|
+
)
|
488
479
|
|
489
480
|
|
490
|
-
if __name__ == '__main__':
|
491
|
-
# get_cookie_aikucun() # 登录并获取 cookies
|
492
|
-
akucun(date_num=30, headless=True) # 下载数据
|
493
481
|
|
494
|
-
|
495
|
-
|
482
|
+
if __name__ == '__main__':
|
483
|
+
main(
|
484
|
+
start_date='2025-03-06',
|
485
|
+
end_date='2025-03-06',
|
486
|
+
item_type=['spu', 'sku']
|
487
|
+
)
|