mdbq 4.0.50__py3-none-any.whl → 4.0.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +2 -2
- {mdbq-4.0.50.dist-info → mdbq-4.0.52.dist-info}/METADATA +1 -1
- {mdbq-4.0.50.dist-info → mdbq-4.0.52.dist-info}/RECORD +6 -7
- mdbq/spider/aikucun.py +0 -530
- {mdbq-4.0.50.dist-info → mdbq-4.0.52.dist-info}/WHEEL +0 -0
- {mdbq-4.0.50.dist-info → mdbq-4.0.52.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.52'
|
mdbq/aggregation/query_data.py
CHANGED
@@ -2679,7 +2679,7 @@ class MysqlDatasQuery:
|
|
2679
2679
|
projection = {}
|
2680
2680
|
df = self.download_manager.data_to_df(
|
2681
2681
|
db_name='爱库存2',
|
2682
|
-
table_name='
|
2682
|
+
table_name='爱库存_spu榜单',
|
2683
2683
|
start_date=start_date,
|
2684
2684
|
end_date=end_date,
|
2685
2685
|
projection=projection,
|
@@ -3347,7 +3347,7 @@ def query2(months=1, download_manager=None):
|
|
3347
3347
|
sdq.tg_gjc(db_name='聚合数据', table_name='天猫_关键词报表')
|
3348
3348
|
sdq.jd_gjc(db_name='聚合数据', table_name='京东_关键词报表')
|
3349
3349
|
sdq.se_search(db_name='聚合数据', table_name='天猫店铺来源_手淘搜索')
|
3350
|
-
sdq.aikucun_bd_spu(db_name='聚合数据', table_name='爱库存_商品spu榜单')
|
3350
|
+
# sdq.aikucun_bd_spu(db_name='聚合数据', table_name='爱库存_商品spu榜单')
|
3351
3351
|
sdq.dmp_crowd(db_name='聚合数据', table_name='达摩盘_人群报表')
|
3352
3352
|
sdq.deeplink(db_name='聚合数据', table_name='达摩盘_deeplink人群洞察')
|
3353
3353
|
sdq.global_insights(db_name='聚合数据', table_name='全域洞察')
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=oRmzDEkmmyAlXwpbKoDeVNJzH0P6pghSjKc3gq_WNa4,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/query_data.py,sha256=
|
4
|
+
mdbq/aggregation/query_data.py,sha256=83-CR2jFFrMJMwkuZm43Kn7P3mjTvjP6i4Av6eEudT8,157236
|
5
5
|
mdbq/aggregation/set_typ_dict.py,sha256=Cax8i6My6xuTC8l-eB0AJ-kOZQwPRcZj-P9dhiiaTj0,21078
|
6
6
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
7
7
|
mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
|
@@ -25,8 +25,7 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
|
25
25
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
26
26
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
27
27
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
28
|
-
mdbq/
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
31
|
-
mdbq-4.0.
|
32
|
-
mdbq-4.0.50.dist-info/RECORD,,
|
28
|
+
mdbq-4.0.52.dist-info/METADATA,sha256=OLb8hSRm0jr5HU-n2MJ0JILsojpcmcbGBCJCaPdZHN4,364
|
29
|
+
mdbq-4.0.52.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-4.0.52.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-4.0.52.dist-info/RECORD,,
|
mdbq/spider/aikucun.py
DELETED
@@ -1,530 +0,0 @@
|
|
1
|
-
# -*- coding:utf-8 -*-
|
2
|
-
import datetime
|
3
|
-
import requests
|
4
|
-
import json
|
5
|
-
import os
|
6
|
-
import re
|
7
|
-
import time
|
8
|
-
import platform
|
9
|
-
import getpass
|
10
|
-
from selenium import webdriver
|
11
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
12
|
-
from selenium.webdriver.common.by import By
|
13
|
-
from selenium.webdriver.support import expected_conditions as EC
|
14
|
-
from selenium.webdriver.chrome.service import Service
|
15
|
-
import pymysql
|
16
|
-
from mdbq.mysql import uploader
|
17
|
-
from mdbq.mysql import s_query
|
18
|
-
from mdbq.myconf import myconf
|
19
|
-
from mdbq.other import ua_sj
|
20
|
-
from mdbq.other import otk
|
21
|
-
from mdbq.log import mylogger
|
22
|
-
|
23
|
-
dir_path = os.path.expanduser("~")
|
24
|
-
parser = myconf.ConfigParser()
|
25
|
-
host, port, username, password = parser.get_section_values(
|
26
|
-
file_path=os.path.join(dir_path, 'spd.txt'),
|
27
|
-
section='mysql',
|
28
|
-
keys=['host', 'port', 'username', 'password'],
|
29
|
-
)
|
30
|
-
|
31
|
-
# 实例化一个数据查询类,用来获取 cookies 表数据
|
32
|
-
logger = mylogger.MyLogger(
|
33
|
-
logging_mode='file',
|
34
|
-
log_level='info',
|
35
|
-
log_format='json',
|
36
|
-
max_log_size=50,
|
37
|
-
backup_count=5,
|
38
|
-
enable_async=False, # 是否启用异步日志
|
39
|
-
sample_rate=1, # 采样DEBUG/INFO日志
|
40
|
-
sensitive_fields=[], # 敏感字段过滤
|
41
|
-
enable_metrics=False, # 是否启用性能指标
|
42
|
-
)
|
43
|
-
|
44
|
-
|
45
|
-
def keep_connect(_db_name, _config, max_try: int=10):
|
46
|
-
attempts = 1
|
47
|
-
while attempts <= max_try:
|
48
|
-
try:
|
49
|
-
connection = pymysql.connect(**_config) # 连接数据库
|
50
|
-
return connection
|
51
|
-
except Exception as e:
|
52
|
-
logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try, '错误信息': e})
|
53
|
-
attempts += 1
|
54
|
-
time.sleep(30)
|
55
|
-
logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try})
|
56
|
-
return None
|
57
|
-
|
58
|
-
|
59
|
-
class AikuCun:
|
60
|
-
def __init__(self, uld_manager, download_manager):
|
61
|
-
self.url = 'https://gray-merc.aikucun.com/index.html'
|
62
|
-
self.db_name = 'cookie文件'
|
63
|
-
self.table_name = 'main_aikucun'
|
64
|
-
self.shop_name = '万里马爱库存'
|
65
|
-
self.token = None
|
66
|
-
self.today = datetime.date.today()
|
67
|
-
self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
|
68
|
-
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
69
|
-
self.error_count = 0
|
70
|
-
self.uld = uld_manager
|
71
|
-
self.download = download_manager
|
72
|
-
|
73
|
-
def logining(self, shop_name='aikucun', headless=False):
|
74
|
-
option = webdriver.ChromeOptions()
|
75
|
-
if headless:
|
76
|
-
option.add_argument("--headless") # 设置无界面模式
|
77
|
-
# 调整chrome启动配置
|
78
|
-
option.add_argument("--disable-gpu")
|
79
|
-
option.add_argument("--no-sandbox")
|
80
|
-
option.add_argument("--disable-dev-shm-usage")
|
81
|
-
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
82
|
-
option.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止日志输出,减少控制台干扰
|
83
|
-
option.add_experimental_option("useAutomationExtension", False)
|
84
|
-
option.add_argument('--ignore-ssl-error') # 忽略ssl错误
|
85
|
-
prefs = {
|
86
|
-
'profile.default_content_settings.popups': 0, # 禁止弹出所有窗口
|
87
|
-
"browser.download.manager. showAlertOnComplete": False, # 下载完成后不显示下载完成提示框
|
88
|
-
"profile.default_content_setting_values.automatic_downloads": 1, # 允许自动下载多个文件
|
89
|
-
}
|
90
|
-
|
91
|
-
option.add_experimental_option('perfLoggingPrefs', {
|
92
|
-
'enableNetwork': True,
|
93
|
-
'enablePage': False,
|
94
|
-
})
|
95
|
-
option.set_capability("goog:loggingPrefs", {
|
96
|
-
'browser': 'ALL',
|
97
|
-
'performance': 'ALL',
|
98
|
-
})
|
99
|
-
option.set_capability("goog:perfLoggingPrefs", {
|
100
|
-
'enableNetwork': True,
|
101
|
-
'enablePage': False,
|
102
|
-
'enableTimeline': False
|
103
|
-
})
|
104
|
-
|
105
|
-
option.add_experimental_option('prefs', prefs)
|
106
|
-
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
107
|
-
|
108
|
-
# # 修改默认下载文件夹路径
|
109
|
-
# option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
|
110
|
-
|
111
|
-
# # 通过excludeSwitches参数禁用默认的启动路径
|
112
|
-
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
113
|
-
|
114
|
-
if platform.system() == 'Windows':
|
115
|
-
# 设置 chrome 和 chromedriver 启动路径
|
116
|
-
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
117
|
-
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
118
|
-
# os.environ["webdriver.chrome.driver"] = chrome_path
|
119
|
-
option.binary_location = chrome_path # windows 设置此参数有效
|
120
|
-
service = Service(chromedriver_path)
|
121
|
-
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
122
|
-
elif platform.system() == 'Darwin':
|
123
|
-
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
124
|
-
chromedriver_path = '/usr/local/chrome/chromedriver'
|
125
|
-
os.environ["webdriver.chrome.driver"] = chrome_path
|
126
|
-
# option.binary_location = chrome_path # Macos 设置此参数报错
|
127
|
-
service = Service(chromedriver_path)
|
128
|
-
elif platform.system().lower() == 'linux':
|
129
|
-
# ubuntu
|
130
|
-
chrome_path = '/usr/bin/google-chrome'
|
131
|
-
chromedriver_path = '/usr/local/bin/chromedriver'
|
132
|
-
# option.binary_location = chrome_path # macOS 设置此参数有效
|
133
|
-
service = Service(chromedriver_path)
|
134
|
-
else:
|
135
|
-
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
136
|
-
chromedriver_path = '/usr/local/chrome/chromedriver'
|
137
|
-
os.environ["webdriver.chrome.driver"] = chrome_path
|
138
|
-
# option.binary_location = chrome_path # macos 设置此参数报错
|
139
|
-
service = Service(chromedriver_path)
|
140
|
-
_driver = webdriver.Chrome(options=option, service=service) # 创建Chrome驱动程序实例
|
141
|
-
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
142
|
-
|
143
|
-
# 登录
|
144
|
-
_driver.get(url='https://gray-merc.aikucun.com/index.html') # self.url 可能被修改,这里使用固定页面获取 sign
|
145
|
-
time.sleep(0.1)
|
146
|
-
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
147
|
-
wait = WebDriverWait(_driver, timeout=15)
|
148
|
-
input_box = wait.until(
|
149
|
-
EC.element_to_be_clickable(
|
150
|
-
(By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
|
151
|
-
input_box.send_keys('广东万里马实业股份有限公司')
|
152
|
-
input_box = wait.until(
|
153
|
-
EC.element_to_be_clickable(
|
154
|
-
(By.XPATH, '//input[@placeholder="请输入密码"]'))) #
|
155
|
-
input_box.send_keys('wlm123$$$')
|
156
|
-
time.sleep(0.1)
|
157
|
-
elements = _driver.find_elements(
|
158
|
-
By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
|
159
|
-
_driver.execute_script("arguments[0].click();", elements[0])
|
160
|
-
for i in range(100):
|
161
|
-
try:
|
162
|
-
wait.until(
|
163
|
-
EC.element_to_be_clickable(
|
164
|
-
(By.XPATH, '//div[@class="user-info nav-user-slider"]')))
|
165
|
-
break
|
166
|
-
except:
|
167
|
-
time.sleep(5)
|
168
|
-
local_storage = _driver.execute_script("return window.localStorage;")
|
169
|
-
if 'token' in local_storage.keys():
|
170
|
-
self.token = {
|
171
|
-
'日期': datetime.datetime.today().strftime('%Y-%m-%d'),
|
172
|
-
'平台': '爱库存',
|
173
|
-
'店铺名称': self.shop_name,
|
174
|
-
'token': local_storage['token'],
|
175
|
-
'来源位置': 'localstorage',
|
176
|
-
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
177
|
-
}
|
178
|
-
time.sleep(5)
|
179
|
-
_driver.quit()
|
180
|
-
|
181
|
-
def save_token(self):
|
182
|
-
if not self.token:
|
183
|
-
logger.error('self.token 不能为空')
|
184
|
-
return
|
185
|
-
set_typ = {
|
186
|
-
'日期': 'DATE',
|
187
|
-
'平台': 'varchar(50)',
|
188
|
-
'店铺名称': 'varchar(50)',
|
189
|
-
'token': 'varchar(255)',
|
190
|
-
'来源位置': 'varchar(50)',
|
191
|
-
'更新时间': 'timestamp'
|
192
|
-
}
|
193
|
-
# 更新至数据库记录
|
194
|
-
self.uld.upload_data(
|
195
|
-
db_name=self.db_name,
|
196
|
-
table_name=self.table_name,
|
197
|
-
data=self.token,
|
198
|
-
set_typ=set_typ,
|
199
|
-
primary_keys=[],
|
200
|
-
check_duplicate=False,
|
201
|
-
update_on_duplicate=False,
|
202
|
-
duplicate_columns=[],
|
203
|
-
allow_null=False,
|
204
|
-
partition_by=None,
|
205
|
-
partition_date_column='日期',
|
206
|
-
auto_create=True,
|
207
|
-
indexes=[],
|
208
|
-
transaction_mode='row', # 事务模式
|
209
|
-
)
|
210
|
-
|
211
|
-
def get_data_from_bbx(self, start_date=None, end_date=None, item_type='spu', page_num=1, page_size=300):
|
212
|
-
"""
|
213
|
-
这里获取的数据等同于"查询"按钮的数据, 没有"营销后供货额/供货价" 这2个字段, 如果通过下载按钮的报表则有两个字段
|
214
|
-
"""
|
215
|
-
if start_date:
|
216
|
-
self.start_date = start_date
|
217
|
-
if end_date:
|
218
|
-
self.end_date = end_date
|
219
|
-
date_list = otk.dates_between(start_date=self.start_date, end_date=self.end_date)
|
220
|
-
|
221
|
-
df = self.download.data_to_df(
|
222
|
-
db_name=self.db_name,
|
223
|
-
table_name=self.table_name,
|
224
|
-
start_date='2025-03-07',
|
225
|
-
end_date='2039-12-31',
|
226
|
-
projection={
|
227
|
-
'日期': 1,
|
228
|
-
'平台': 1,
|
229
|
-
'店铺名称': 1,
|
230
|
-
'token': 1,
|
231
|
-
'更新时间': 1
|
232
|
-
},
|
233
|
-
)
|
234
|
-
if len(df) == 0:
|
235
|
-
self.logining()
|
236
|
-
self.save_token()
|
237
|
-
else:
|
238
|
-
# 仅保留最新日期的数据
|
239
|
-
idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
|
240
|
-
df = df.loc[idx][['token']]
|
241
|
-
if len(df) == 0:
|
242
|
-
logger.error(f'从数据库获取的 token 不能为空')
|
243
|
-
return
|
244
|
-
self.token = df.iloc[0, 0]
|
245
|
-
|
246
|
-
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/{item_type}/list'
|
247
|
-
headers = {
|
248
|
-
'headers': ua_sj.get_ua(),
|
249
|
-
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
250
|
-
'content-type': 'application/json;charset=UTF-8',
|
251
|
-
'origin': 'https://treasurebox.aikucun.com',
|
252
|
-
'system': 'merchant',
|
253
|
-
'token': self.token, # 从浏览器本地存储空间获取
|
254
|
-
}
|
255
|
-
num = 1
|
256
|
-
results = []
|
257
|
-
for date in date_list:
|
258
|
-
if self.error_count > 5:
|
259
|
-
logger.logger('已退出请求 -> self.error_count > 5')
|
260
|
-
break
|
261
|
-
req_date = re.sub('-', '', date)
|
262
|
-
data = {
|
263
|
-
'beginDate': req_date,
|
264
|
-
'brandIds': [],
|
265
|
-
'cropId': '',
|
266
|
-
'cropName': '',
|
267
|
-
'ctgryOneIds': [],
|
268
|
-
'ctgryThreeIds': [],
|
269
|
-
'ctgryTwoIds': [],
|
270
|
-
'dimValue': '',
|
271
|
-
'endDate': req_date,
|
272
|
-
'merchantShopCode': '',
|
273
|
-
'orderByName': 'dealGmv',
|
274
|
-
'orderType': 'desc',
|
275
|
-
'pageNum': page_num,
|
276
|
-
'pageSize': page_size
|
277
|
-
}
|
278
|
-
|
279
|
-
res = requests.post(
|
280
|
-
url=self.url,
|
281
|
-
headers=headers,
|
282
|
-
# cookies=cookies,
|
283
|
-
data=json.dumps(data)
|
284
|
-
)
|
285
|
-
logger.info('获取数据', {'进度': num/len(date_list), '日期': date, '榜单类型': item_type})
|
286
|
-
if not res.json().get('success', None):
|
287
|
-
logger.error('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
|
288
|
-
num += 1
|
289
|
-
self.error_count += 1
|
290
|
-
time.sleep(1)
|
291
|
-
continue
|
292
|
-
if not res.json().get('data', {}).get('rows', None):
|
293
|
-
logger.error("返回的数据字典异常, ['data']['rows'] 不能为空")
|
294
|
-
num += 1
|
295
|
-
self.error_count += 1
|
296
|
-
time.sleep(1)
|
297
|
-
continue
|
298
|
-
results += [(date, res.json()['data']['rows'])]
|
299
|
-
num += 1
|
300
|
-
time.sleep(1)
|
301
|
-
if num % 32 == 0:
|
302
|
-
logger.info("避免频繁请求, 正在休眠...")
|
303
|
-
# time.sleep(60)
|
304
|
-
|
305
|
-
return results
|
306
|
-
|
307
|
-
def insert_datas(self, data_list, db_name, table_name):
|
308
|
-
"""数据清洗"""
|
309
|
-
if not data_list:
|
310
|
-
return
|
311
|
-
chanel_name = {
|
312
|
-
'availableNum': '可售库存数',
|
313
|
-
'availableSkuCnt': '在架sku数',
|
314
|
-
'brandName': '品牌名',
|
315
|
-
'ctgryOneName': '一级类目名称',
|
316
|
-
'ctgryThreeName': '三级类目名称',
|
317
|
-
'ctgryTwoName': '二级类目名称',
|
318
|
-
'dealBuyerCnt': '支付人数_成交',
|
319
|
-
'dealBuyerCntRate': '成交率_成交',
|
320
|
-
'dealGmv': '成交gmv',
|
321
|
-
'dealIdolCnt': '销售爱豆人数',
|
322
|
-
'dealProductCnt': '销售量_成交',
|
323
|
-
'dealProductCntRate': '售罄率',
|
324
|
-
'dealSkuCnt': '成交sku数',
|
325
|
-
'dealTwoCnt': '订单数_成交',
|
326
|
-
'downSkuCnt': '可售sku数',
|
327
|
-
'etlInsertTime': '数据更新时间',
|
328
|
-
'forwardConfirmCnt': '转发爱豆人数',
|
329
|
-
'forwardConfirmNum': '转发次数',
|
330
|
-
'merStyleNo': '商品款号', # spu 榜单
|
331
|
-
'styleNo': '商品货号', # sku 榜单
|
332
|
-
'orderBuyerCnt': '支付人数_交易',
|
333
|
-
'orderBuyerCntRate': '成交率_交易',
|
334
|
-
'orderGmv': '下单gmv',
|
335
|
-
'orderProductCnt': '销售量_交易',
|
336
|
-
'orderSkuCnt': '下单sku数',
|
337
|
-
'orderTwoCnt': '订单数_交易',
|
338
|
-
'pictureUrl': '图片',
|
339
|
-
'pvNum': '浏览量',
|
340
|
-
'rn': '序号',
|
341
|
-
'spuId': 'spuid',
|
342
|
-
'spuName': '商品名称',
|
343
|
-
'supplyAmount': '供货额',
|
344
|
-
'supplyPerAmount': '供货价',
|
345
|
-
'uvNum': '访客量',
|
346
|
-
'colorName': '颜色',
|
347
|
-
'sizeName': '尺码',
|
348
|
-
'barCode': '条码', # sku榜单 款号 + 颜色编码
|
349
|
-
}
|
350
|
-
# 移除未翻译的列名
|
351
|
-
res_col = [item for item in chanel_name.keys() if chanel_name[item] == '']
|
352
|
-
for item in res_col:
|
353
|
-
del chanel_name[item]
|
354
|
-
|
355
|
-
_results = []
|
356
|
-
for item_ in data_list:
|
357
|
-
end_date, d_list = item_
|
358
|
-
for main_data_dict in d_list:
|
359
|
-
dict_data_before = {}
|
360
|
-
# 添加数据
|
361
|
-
dict_data_before.update({k: v for k, v in main_data_dict.items()})
|
362
|
-
# 初始化 dict_data
|
363
|
-
dict_data = {
|
364
|
-
'日期': end_date,
|
365
|
-
'平台': '爱库存',
|
366
|
-
'店铺名称': self.shop_name
|
367
|
-
}
|
368
|
-
for k, v in dict_data_before.items():
|
369
|
-
# 翻译键名
|
370
|
-
[dict_data.update({name_v: v}) for name_k, name_v in chanel_name.items() if k == name_k]
|
371
|
-
# 没有翻译的键值也要保留
|
372
|
-
not_in_rename = [item for item in dict_data_before.keys() if item not in chanel_name.keys()]
|
373
|
-
[dict_data.update({item: dict_data_before[item]}) for item in not_in_rename]
|
374
|
-
dict_data.update(
|
375
|
-
{
|
376
|
-
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
377
|
-
}
|
378
|
-
)
|
379
|
-
new_dict_data = {}
|
380
|
-
for k, v in dict_data.items():
|
381
|
-
if v and str(v).lower() != 'none' and str(v) != 'null':
|
382
|
-
new_dict_data.update({k: v})
|
383
|
-
else:
|
384
|
-
new_dict_data.update({k: 0})
|
385
|
-
_results.append(new_dict_data)
|
386
|
-
set_typ = {
|
387
|
-
'可售库存数': 'INT',
|
388
|
-
'在架sku数': 'INT',
|
389
|
-
'品牌名': 'varchar(50)',
|
390
|
-
'一级类目名称': 'varchar(50)',
|
391
|
-
'三级类目名称': 'varchar(50)',
|
392
|
-
'二级类目名称': 'varchar(50)',
|
393
|
-
'支付人数_成交': 'INT',
|
394
|
-
'成交率_成交': 'decimal(10,4)',
|
395
|
-
'成交gmv': 'decimal(10,2)',
|
396
|
-
'销售爱豆人数': 'INT',
|
397
|
-
'销售量_成交': 'INT',
|
398
|
-
'售罄率': 'decimal(10,4)',
|
399
|
-
'成交sku数': 'INT',
|
400
|
-
'订单数_成交': 'INT',
|
401
|
-
'可售sku数': 'INT',
|
402
|
-
'数据更新时间': 'DATETIME',
|
403
|
-
'转发爱豆人数': 'INT',
|
404
|
-
'转发次数': 'INT',
|
405
|
-
'商品款号': 'varchar(50)',
|
406
|
-
'支付人数_交易': 'INT',
|
407
|
-
'成交率_交易': 'decimal(10,4)',
|
408
|
-
'下单gmv': 'decimal(10,2)',
|
409
|
-
'销售量_交易': 'INT',
|
410
|
-
'下单sku数': 'INT',
|
411
|
-
'订单数_交易': 'INT',
|
412
|
-
'图片': 'varchar(255)',
|
413
|
-
'浏览量': 'INT',
|
414
|
-
'序号': 'INT',
|
415
|
-
'spuid': 'varchar(50)',
|
416
|
-
'商品名称': 'varchar(50)',
|
417
|
-
'供货额': 'decimal(10,2)',
|
418
|
-
'供货价': 'decimal(10,2)',
|
419
|
-
'访客量': 'INT',
|
420
|
-
'颜色': 'varchar(50)',
|
421
|
-
'尺码': 'varchar(50)',
|
422
|
-
'货号': 'varchar(50)', # 款号 + 颜色编码
|
423
|
-
}
|
424
|
-
logger.info('更新数据库', {'店铺名称': self.shop_name, '库': db_name, '表': table_name})
|
425
|
-
if 'spu' in table_name:
|
426
|
-
drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
|
427
|
-
else:
|
428
|
-
drop_dup = ['日期', '平台', '店铺名称', '条码']
|
429
|
-
self.uld.upload_data(
|
430
|
-
db_name=db_name,
|
431
|
-
table_name=table_name,
|
432
|
-
data=_results,
|
433
|
-
set_typ=set_typ, # 定义列和数据类型
|
434
|
-
primary_keys=[], # 创建唯一主键
|
435
|
-
check_duplicate=False, # 检查重复数据
|
436
|
-
update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
|
437
|
-
duplicate_columns=drop_dup, # 指定排重的组合键
|
438
|
-
allow_null=False, # 允许插入空值
|
439
|
-
partition_by=None, # 按年/月分表
|
440
|
-
partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
|
441
|
-
auto_create=True, # 表不存在时自动创建, 默认参数不要更改
|
442
|
-
indexes=[], # 指定索引列
|
443
|
-
transaction_mode='row', # 事务模式
|
444
|
-
unique_keys=[drop_dup], # 唯一约束列表
|
445
|
-
)
|
446
|
-
|
447
|
-
def get_sign(self):
|
448
|
-
sign = 'bbcf5b9cf3d3b8ba9c22550dcba8a3ce97be766f'
|
449
|
-
current_timestamp_ms = '1741396070777'
|
450
|
-
# current_timestamp_ms = int(round(time.time() * 1000))
|
451
|
-
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/sku/list?time={current_timestamp_ms}&sign={sign}'
|
452
|
-
headers = {
|
453
|
-
'headers': ua_sj.get_ua(),
|
454
|
-
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
455
|
-
'content-type': 'application/json;charset=UTF-8',
|
456
|
-
'origin': 'https://treasurebox.aikucun.com',
|
457
|
-
# 'system': 'merchant',
|
458
|
-
# 'token': self.token, # 从浏览器本地存储空间获取
|
459
|
-
}
|
460
|
-
data = {
|
461
|
-
'beginDate': '20250307',
|
462
|
-
'brandIds': [],
|
463
|
-
'cropId': '',
|
464
|
-
'cropName': '',
|
465
|
-
'ctgryOneIds': [],
|
466
|
-
'ctgryThreeIds': [],
|
467
|
-
'ctgryTwoIds': [],
|
468
|
-
'dimValue': '',
|
469
|
-
'endDate': '20250307',
|
470
|
-
'merchantShopCode': '',
|
471
|
-
'orderByName': 'dealGmv',
|
472
|
-
'orderType': 'desc',
|
473
|
-
'pageNum': 1,
|
474
|
-
'pageSize': 10
|
475
|
-
}
|
476
|
-
res = requests.post(
|
477
|
-
url=self.url,
|
478
|
-
headers=headers,
|
479
|
-
data=json.dumps(data)
|
480
|
-
)
|
481
|
-
|
482
|
-
|
483
|
-
def main(start_date, end_date=None, item_type=['spu']):
|
484
|
-
db_config = {
|
485
|
-
'username': username,
|
486
|
-
'password': password,
|
487
|
-
'host': host,
|
488
|
-
'port': int(port),
|
489
|
-
'pool_size': 3
|
490
|
-
}
|
491
|
-
with uploader.MySQLUploader(**db_config) as uld:
|
492
|
-
with s_query.QueryDatas(**db_config) as download:
|
493
|
-
ak = AikuCun(uld_manager=uld, download_manager=download)
|
494
|
-
# ak.get_sign()
|
495
|
-
for type_ in item_type:
|
496
|
-
if type_ not in ['spu', 'sku']:
|
497
|
-
logger.error(f'{item_type} 非法参数: {type_}')
|
498
|
-
continue
|
499
|
-
for i in range(2):
|
500
|
-
data_list = ak.get_data_from_bbx(
|
501
|
-
start_date=start_date,
|
502
|
-
end_date=end_date,
|
503
|
-
item_type=type_,
|
504
|
-
page_num=1,
|
505
|
-
page_size=300
|
506
|
-
)
|
507
|
-
if not data_list:
|
508
|
-
ak.logining()
|
509
|
-
ak.save_token()
|
510
|
-
ak.error_count = 0 # 重置错误计数器
|
511
|
-
else:
|
512
|
-
break
|
513
|
-
|
514
|
-
ak.insert_datas(
|
515
|
-
data_list=data_list,
|
516
|
-
db_name='爱库存2',
|
517
|
-
table_name=f'{type_}榜单'
|
518
|
-
)
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
if __name__ == '__main__':
|
523
|
-
main(
|
524
|
-
start_date='2025-05-13',
|
525
|
-
# end_date='2025-04-28', # 不传则默认到今天
|
526
|
-
item_type=[
|
527
|
-
'spu',
|
528
|
-
'sku'
|
529
|
-
]
|
530
|
-
)
|
File without changes
|
File without changes
|