mdbq 3.7.26__py3-none-any.whl → 3.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/mysql/mysql.py +143 -140
- mdbq/other/download_sku_picture.py +1 -0
- mdbq/spider/aikucun.py +377 -385
- {mdbq-3.7.26.dist-info → mdbq-3.8.2.dist-info}/METADATA +1 -1
- {mdbq-3.7.26.dist-info → mdbq-3.8.2.dist-info}/RECORD +7 -7
- {mdbq-3.7.26.dist-info → mdbq-3.8.2.dist-info}/WHEEL +0 -0
- {mdbq-3.7.26.dist-info → mdbq-3.8.2.dist-info}/top_level.txt +0 -0
mdbq/mysql/mysql.py
CHANGED
@@ -290,32 +290,33 @@ class MysqlUpload:
|
|
290
290
|
cursor.execute(sql, dict_data['数据主体'])
|
291
291
|
|
292
292
|
if reset_id:
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
293
|
+
pass
|
294
|
+
# # 6. 重置自增列
|
295
|
+
# try:
|
296
|
+
# # 查询所有复合主键
|
297
|
+
# sql = (
|
298
|
+
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
299
|
+
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
300
|
+
# )
|
301
|
+
# cursor.execute(sql)
|
302
|
+
# result = cursor.fetchall() # 复合主键数
|
303
|
+
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
304
|
+
# column_name = 'id'
|
305
|
+
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
306
|
+
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
307
|
+
# cursor.execute(sql, (db_name, table_name, column_name))
|
308
|
+
# result = cursor.fetchone()
|
309
|
+
# if result:
|
310
|
+
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
311
|
+
# cursor.execute(sql)
|
312
|
+
# cursor.execute(
|
313
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
314
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
315
|
+
# else:
|
316
|
+
# logger.info(f'{table_name} 存在复合主键: 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
317
|
+
# except Exception as e:
|
318
|
+
# logger.error(f'333 {table_name} {e}')
|
319
|
+
# connection.rollback()
|
319
320
|
connection.commit()
|
320
321
|
|
321
322
|
@try_except
|
@@ -1014,35 +1015,36 @@ class MysqlUpload:
|
|
1014
1015
|
chunksize=1000
|
1015
1016
|
)
|
1016
1017
|
if reset_id:
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1018
|
+
pass
|
1019
|
+
# # 6. 重置自增列
|
1020
|
+
# try:
|
1021
|
+
# # 查询所有复合主键
|
1022
|
+
# sql = (
|
1023
|
+
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
1024
|
+
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
1025
|
+
# )
|
1026
|
+
# cursor.execute(sql)
|
1027
|
+
# result = cursor.fetchall() # 复合主键数
|
1028
|
+
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
1029
|
+
# column_name = 'id'
|
1030
|
+
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
1031
|
+
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
1032
|
+
# # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
1033
|
+
# cursor.execute(sql, (db_name, table_name, column_name))
|
1034
|
+
# result = cursor.fetchone()
|
1035
|
+
# if result:
|
1036
|
+
# # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
1037
|
+
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
1038
|
+
# cursor.execute(sql)
|
1039
|
+
# cursor.execute(
|
1040
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1041
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1042
|
+
# # logger.info(f'重置自增id')
|
1043
|
+
# else:
|
1044
|
+
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1045
|
+
# except Exception as e:
|
1046
|
+
# logger.error(f'333 {table_name} {e}')
|
1047
|
+
# connection.rollback()
|
1046
1048
|
connection.commit() # 提交事务
|
1047
1049
|
connection.close()
|
1048
1050
|
return
|
@@ -1070,36 +1072,36 @@ class MysqlUpload:
|
|
1070
1072
|
index=False,
|
1071
1073
|
chunksize=1000
|
1072
1074
|
)
|
1073
|
-
# 6. 重置自增列
|
1074
|
-
try:
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
except Exception as e:
|
1100
|
-
|
1101
|
-
|
1102
|
-
connection.close()
|
1075
|
+
# # 6. 重置自增列
|
1076
|
+
# try:
|
1077
|
+
# # 查询所有复合主键
|
1078
|
+
# sql = (
|
1079
|
+
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
1080
|
+
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
1081
|
+
# )
|
1082
|
+
# cursor.execute(sql)
|
1083
|
+
# result = cursor.fetchall() # 复合主键数
|
1084
|
+
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
1085
|
+
# column_name = 'id'
|
1086
|
+
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
1087
|
+
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
1088
|
+
# # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
1089
|
+
# cursor.execute(sql, (db_name, table_name, column_name))
|
1090
|
+
# result = cursor.fetchone()
|
1091
|
+
# if result:
|
1092
|
+
# # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
1093
|
+
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
1094
|
+
# cursor.execute(sql)
|
1095
|
+
# cursor.execute(
|
1096
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1097
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1098
|
+
# # logger.info(f'重置自增id')
|
1099
|
+
# else:
|
1100
|
+
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1101
|
+
# except Exception as e:
|
1102
|
+
# logger.error(f'333 {table_name} {e}')
|
1103
|
+
# connection.rollback()
|
1104
|
+
# connection.close()
|
1103
1105
|
return
|
1104
1106
|
|
1105
1107
|
datas = df.to_dict(orient='records')
|
@@ -1189,35 +1191,36 @@ class MysqlUpload:
|
|
1189
1191
|
pass
|
1190
1192
|
|
1191
1193
|
if reset_id:
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1194
|
+
pass
|
1195
|
+
# # 6. 重置自增列
|
1196
|
+
# try:
|
1197
|
+
# # 查询所有复合主键
|
1198
|
+
# sql = (
|
1199
|
+
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
1200
|
+
# f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
1201
|
+
# )
|
1202
|
+
# cursor.execute(sql)
|
1203
|
+
# result = cursor.fetchall() # 复合主键数
|
1204
|
+
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
1205
|
+
# column_name = 'id'
|
1206
|
+
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
1207
|
+
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
1208
|
+
# # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
|
1209
|
+
# cursor.execute(sql, (db_name, table_name, column_name))
|
1210
|
+
# result = cursor.fetchone()
|
1211
|
+
# if result:
|
1212
|
+
# # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
|
1213
|
+
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
1214
|
+
# cursor.execute(sql)
|
1215
|
+
# cursor.execute(
|
1216
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1217
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1218
|
+
# # logger.info(f'重置自增id')
|
1219
|
+
# else:
|
1220
|
+
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1221
|
+
# except Exception as e:
|
1222
|
+
# logger.error(f'333 {table_name} {e}')
|
1223
|
+
# connection.rollback()
|
1221
1224
|
connection.commit() # 提交事务
|
1222
1225
|
connection.close()
|
1223
1226
|
|
@@ -1500,32 +1503,32 @@ class OptimizeDatas:
|
|
1500
1503
|
else: # 不存在日期列的情况
|
1501
1504
|
self.delete_duplicate2(table_name=table_name, except_key=except_key)
|
1502
1505
|
|
1503
|
-
# 6. 重置自增列
|
1504
|
-
try:
|
1505
|
-
|
1506
|
-
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
except Exception as e:
|
1527
|
-
|
1528
|
-
|
1506
|
+
# # 6. 重置自增列
|
1507
|
+
# try:
|
1508
|
+
# # 查询所有复合主键
|
1509
|
+
# sql = (
|
1510
|
+
# f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
|
1511
|
+
# f"WHERE `TABLE_SCHEMA` = '{self.db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
|
1512
|
+
# )
|
1513
|
+
# cursor.execute(sql)
|
1514
|
+
# result = cursor.fetchall() # 复合主键数
|
1515
|
+
# if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
|
1516
|
+
# column_name = 'id'
|
1517
|
+
# sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
|
1518
|
+
# f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
|
1519
|
+
# cursor.execute(sql, (self.db_name, table_name, column_name))
|
1520
|
+
# result = cursor.fetchone()
|
1521
|
+
# if result:
|
1522
|
+
# sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
|
1523
|
+
# cursor.execute(sql)
|
1524
|
+
# cursor.execute(
|
1525
|
+
# f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1526
|
+
# cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1527
|
+
# else:
|
1528
|
+
# logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1529
|
+
# except Exception as e:
|
1530
|
+
# logger.error(f'333 {table_name} {e}')
|
1531
|
+
# self.connection.rollback()
|
1529
1532
|
self.connection.close()
|
1530
1533
|
logger.info(f'mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
1531
1534
|
|
mdbq/spider/aikucun.py
CHANGED
@@ -1,190 +1,75 @@
|
|
1
1
|
# -*- coding:utf-8 -*-
|
2
2
|
import datetime
|
3
|
-
import
|
3
|
+
import requests
|
4
4
|
import json
|
5
5
|
import os
|
6
6
|
import sys
|
7
|
-
import pathlib
|
8
|
-
import platform
|
9
7
|
import re
|
10
8
|
import time
|
11
9
|
import warnings
|
12
|
-
import
|
13
|
-
import
|
10
|
+
import platform
|
11
|
+
import getpass
|
14
12
|
from selenium import webdriver
|
15
13
|
from selenium.webdriver.support.wait import WebDriverWait
|
16
14
|
from selenium.webdriver.common.by import By
|
17
15
|
from selenium.webdriver.support import expected_conditions as EC
|
18
16
|
from selenium.webdriver.chrome.service import Service
|
19
|
-
|
20
|
-
|
21
|
-
from mdbq.
|
17
|
+
import pymysql
|
18
|
+
import pandas as pd
|
19
|
+
from mdbq.log import spider_logging
|
22
20
|
from mdbq.mysql import mysql
|
23
21
|
from mdbq.mysql import s_query
|
24
|
-
from mdbq.config import
|
25
|
-
|
26
|
-
warnings.filterwarnings('ignore')
|
22
|
+
from mdbq.config import config
|
23
|
+
from mdbq.other import ua_sj
|
27
24
|
|
28
25
|
|
29
|
-
|
30
|
-
|
31
|
-
D_PATH = str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\Downloads'))
|
32
|
-
elif platform.system() == 'Linux':
|
33
|
-
D_PATH = os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'Downloads')
|
34
|
-
if not os.path.exists(D_PATH):
|
35
|
-
os.makedirs(D_PATH)
|
36
|
-
else:
|
37
|
-
D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
|
38
|
-
upload_path = os.path.join(D_PATH, '数据上传中心', '爱库存') # 此目录位于下载文件夹
|
26
|
+
content = config.read_config(file_path=os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'spd.txt'))
|
27
|
+
username, password, host, port = content['username'], content['password'], content['host'], content['port']
|
39
28
|
|
40
|
-
|
41
|
-
m_engine, username, password, host, port = default.get_mysql_engine(platform='Windows', hostname=hostname, sql='mysql', local=local, config_file=None)
|
42
|
-
print(username, password, host, port)
|
29
|
+
m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
|
43
30
|
# 实例化一个数据查询类,用来获取 cookies 表数据
|
44
31
|
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
32
|
+
logger = spider_logging.setup_logging()
|
45
33
|
|
46
34
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
_url = 'https://gray-merc.aikucun.com/index.html'
|
51
|
-
cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
52
|
-
filename_aikucun = 'cookie_aikucun.json'
|
53
|
-
print(_url)
|
54
|
-
|
55
|
-
option = webdriver.ChromeOptions() # 浏览器启动选项
|
56
|
-
option.headless = True # False指定为无界面模式
|
57
|
-
# 调整chrome启动配置
|
58
|
-
option.add_argument("--disable-gpu")
|
59
|
-
option.add_argument("--no-sandbox")
|
60
|
-
option.add_argument("--disable-dev-shm-usage")
|
61
|
-
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
62
|
-
option.add_experimental_option("useAutomationExtension", False)
|
63
|
-
# if platform.system() == 'Windows':
|
64
|
-
# service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
|
65
|
-
# else:
|
66
|
-
# service = Service('/usr/local/bin/chromedriver')
|
67
|
-
if platform.system() == 'Windows':
|
68
|
-
# 设置Chrome的路径
|
69
|
-
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
70
|
-
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
71
|
-
# os.environ["webdriver.chrome.driver"] = chrome_path
|
72
|
-
option.binary_location = chrome_path # windows 设置此参数有效
|
73
|
-
service = Service(chromedriver_path)
|
74
|
-
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
75
|
-
else:
|
76
|
-
# 设置Chrome的路径
|
77
|
-
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
78
|
-
chromedriver_path = '/usr/local/chrome/chromedriver'
|
79
|
-
os.environ["webdriver.chrome.driver"] = chrome_path
|
80
|
-
|
81
|
-
service = Service(chromedriver_path)
|
82
|
-
_driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
|
83
|
-
|
84
|
-
# 登录
|
85
|
-
_driver.get(_url)
|
86
|
-
time.sleep(0.1)
|
87
|
-
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
88
|
-
print(f'请登录并切换到百宝箱,再保存 cookies: \n https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604')
|
89
|
-
wait = WebDriverWait(_driver, timeout=15)
|
90
|
-
input_box = wait.until(
|
91
|
-
EC.element_to_be_clickable(
|
92
|
-
(By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
|
93
|
-
input_box.send_keys('广东万里马实业股份有限公司')
|
94
|
-
input_box = wait.until(
|
95
|
-
EC.element_to_be_clickable(
|
96
|
-
(By.XPATH, '//input[@placeholder="请输入密码"]'))) #
|
97
|
-
input_box.send_keys('wlm123$$$')
|
98
|
-
time.sleep(0.1)
|
99
|
-
elements = _driver.find_elements(
|
100
|
-
By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
|
101
|
-
_driver.execute_script("arguments[0].click();", elements[0])
|
102
|
-
for i in range(100):
|
35
|
+
def keep_connect(_db_name, _config, max_try: int=10):
|
36
|
+
attempts = 1
|
37
|
+
while attempts <= max_try:
|
103
38
|
try:
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
else:
|
125
|
-
new_cookies_list.append(cookie)
|
126
|
-
|
127
|
-
######### 新增 写入 mysql #########
|
128
|
-
set_typ = {
|
129
|
-
'日期': 'date',
|
130
|
-
'domain': 'varchar(100)',
|
131
|
-
'expiry': 'int',
|
132
|
-
'httpOnly': 'varchar(20)',
|
133
|
-
'name': 'varchar(50)',
|
134
|
-
'path': 'varchar(50)',
|
135
|
-
'sameSite': 'varchar(50)',
|
136
|
-
'secure': 'varchar(50)',
|
137
|
-
'value': 'text',
|
138
|
-
'更新时间': 'timestamp'
|
139
|
-
}
|
140
|
-
_cookies_list = []
|
141
|
-
for item in cookies_list:
|
142
|
-
new_dict = {'日期': datetime.datetime.today().strftime('%Y-%m-%d'), }
|
143
|
-
for k, v in item.items():
|
144
|
-
if v is None:
|
145
|
-
v = 'None'
|
146
|
-
new_dict.update({k: v})
|
147
|
-
if 'expiry' not in new_dict:
|
148
|
-
new_dict.update({'expiry': 0})
|
149
|
-
new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
150
|
-
_cookies_list.append(new_dict)
|
151
|
-
m_engine.insert_many_dict(
|
152
|
-
db_name='cookie文件',
|
153
|
-
table_name='main_aikucun',
|
154
|
-
dict_data_list=_cookies_list,
|
155
|
-
set_typ=set_typ,
|
156
|
-
allow_not_null=True, # 允许插入空值
|
157
|
-
)
|
158
|
-
#############################################
|
159
|
-
|
160
|
-
json_file = os.path.join(cookie_path, filename_aikucun)
|
161
|
-
with open(json_file, 'w', encoding='utf-8') as f:
|
162
|
-
json.dump(new_cookies_list, f, ensure_ascii=False, sort_keys=True, indent=4)
|
163
|
-
print(f'cookie已保存: {json_file}')
|
164
|
-
|
165
|
-
# _file = os.path.join(cookie_path, filename_aikucun)
|
166
|
-
# with open(_file, 'w') as f:
|
167
|
-
# # 将cookies保存为json格式
|
168
|
-
# cookies_list = _driver.get_cookies()
|
169
|
-
# # for cookie in cookies_list:
|
170
|
-
# # # 该字段有问题所以删除就可以
|
171
|
-
# # if 'expiry' in cookie:
|
172
|
-
# # del cookie['expiry']
|
173
|
-
# # # if 'domain' in cookie:
|
174
|
-
# # # cookie['domain'] = '.taobao.com'
|
175
|
-
# cookies_list = json.dumps(cookies_list)
|
176
|
-
# f.write(cookies_list)
|
177
|
-
# print(f'cookie已保存: {_file}')
|
178
|
-
_driver.quit()
|
39
|
+
connection = pymysql.connect(**_config) # 连接数据库
|
40
|
+
return connection
|
41
|
+
except Exception as e:
|
42
|
+
logger.error(f'{_db_name}: 连接失败,正在重试: {host}:{port} {attempts}/{max_try} {e}')
|
43
|
+
attempts += 1
|
44
|
+
time.sleep(30)
|
45
|
+
logger.error(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
|
46
|
+
return None
|
47
|
+
|
48
|
+
|
49
|
+
def dates_between(start_date, end_date) -> list:
|
50
|
+
""" 获取两个日期之间的所有日期, 返回 list """
|
51
|
+
start_date = pd.to_datetime(start_date)
|
52
|
+
end_date = pd.to_datetime(end_date)
|
53
|
+
dates = []
|
54
|
+
current_date = start_date
|
55
|
+
while current_date <= end_date:
|
56
|
+
dates.append(current_date.strftime('%Y-%m-%d'))
|
57
|
+
current_date += datetime.timedelta(days=1)
|
58
|
+
return dates
|
179
59
|
|
180
60
|
|
181
61
|
class AikuCun:
|
182
62
|
def __init__(self):
|
183
|
-
|
184
|
-
self.
|
185
|
-
self.
|
63
|
+
self.url = 'https://gray-merc.aikucun.com/index.html'
|
64
|
+
self.db_name = 'cookie文件'
|
65
|
+
self.table_name = 'main_aikucun'
|
66
|
+
self.shop_name = '万里马爱库存'
|
67
|
+
self.token = None
|
68
|
+
self.today = datetime.date.today()
|
69
|
+
self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
|
70
|
+
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
186
71
|
|
187
|
-
def
|
72
|
+
def logining(self, shop_name='aikucun', headless=False):
|
188
73
|
option = webdriver.ChromeOptions()
|
189
74
|
if headless:
|
190
75
|
option.add_argument("--headless") # 设置无界面模式
|
@@ -219,8 +104,8 @@ class AikuCun:
|
|
219
104
|
option.add_experimental_option('prefs', prefs)
|
220
105
|
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
221
106
|
|
222
|
-
# 修改默认下载文件夹路径
|
223
|
-
option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
|
107
|
+
# # 修改默认下载文件夹路径
|
108
|
+
# option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
|
224
109
|
|
225
110
|
# # 通过excludeSwitches参数禁用默认的启动路径
|
226
111
|
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
@@ -249,247 +134,354 @@ class AikuCun:
|
|
249
134
|
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
250
135
|
|
251
136
|
# 登录
|
252
|
-
_driver.get(self.
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
137
|
+
_driver.get(url='https://gray-merc.aikucun.com/index.html') # self.url 可能被修改,这里使用固定页面获取 sign
|
138
|
+
time.sleep(0.1)
|
139
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
140
|
+
wait = WebDriverWait(_driver, timeout=15)
|
141
|
+
input_box = wait.until(
|
142
|
+
EC.element_to_be_clickable(
|
143
|
+
(By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
|
144
|
+
input_box.send_keys('广东万里马实业股份有限公司')
|
145
|
+
input_box = wait.until(
|
146
|
+
EC.element_to_be_clickable(
|
147
|
+
(By.XPATH, '//input[@placeholder="请输入密码"]'))) #
|
148
|
+
input_box.send_keys('wlm123$$$')
|
149
|
+
time.sleep(0.1)
|
150
|
+
elements = _driver.find_elements(
|
151
|
+
By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
|
152
|
+
_driver.execute_script("arguments[0].click();", elements[0])
|
153
|
+
for i in range(100):
|
154
|
+
try:
|
155
|
+
wait.until(
|
156
|
+
EC.element_to_be_clickable(
|
157
|
+
(By.XPATH, '//div[@class="user-info nav-user-slider"]')))
|
158
|
+
break
|
159
|
+
except:
|
160
|
+
time.sleep(5)
|
161
|
+
local_storage = _driver.execute_script("return window.localStorage;")
|
162
|
+
if 'token' in local_storage.keys():
|
163
|
+
self.token = {
|
164
|
+
'日期': datetime.datetime.today().strftime('%Y-%m-%d'),
|
165
|
+
'平台': '爱库存',
|
166
|
+
'店铺名称': self.shop_name,
|
167
|
+
'token': local_storage['token'],
|
168
|
+
'来源位置': 'localstorage',
|
169
|
+
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
170
|
+
}
|
171
|
+
time.sleep(5)
|
172
|
+
_driver.quit()
|
173
|
+
|
174
|
+
def save_token(self):
|
175
|
+
if not self.token:
|
176
|
+
print('self.token 不能为空')
|
177
|
+
return
|
178
|
+
set_typ = {
|
179
|
+
'日期': 'DATE',
|
180
|
+
'平台': 'varchar(50)',
|
181
|
+
'店铺名称': 'varchar(50)',
|
182
|
+
'token': 'varchar(255)',
|
183
|
+
'来源位置': 'varchar(50)',
|
184
|
+
'更新时间': 'timestamp'
|
185
|
+
}
|
186
|
+
# 更新至数据库记录
|
187
|
+
m_engine.dict_to_mysql(
|
188
|
+
db_name=self.db_name,
|
189
|
+
table_name=self.table_name,
|
190
|
+
dict_data=self.token,
|
191
|
+
unique_main_key=None,
|
192
|
+
icm_update=[], # 唯一组合键
|
193
|
+
main_key=None, # 指定索引列, 通常用日期列,默认会设置日期为索引
|
194
|
+
set_typ={}, # 指定数据类型
|
195
|
+
)
|
196
|
+
|
197
|
+
def get_data_from_bbx(self, start_date=None, end_date=None, item_type='spu', page_num=1, page_size=300):
|
198
|
+
if start_date:
|
199
|
+
self.start_date = start_date
|
200
|
+
if end_date:
|
201
|
+
self.end_date = end_date
|
202
|
+
date_list = dates_between(start_date=self.start_date, end_date=self.end_date)
|
203
|
+
|
264
204
|
df = download.data_to_df(
|
265
|
-
db_name=db_name,
|
266
|
-
table_name=table_name,
|
267
|
-
start_date='2025-
|
268
|
-
end_date='
|
205
|
+
db_name=self.db_name,
|
206
|
+
table_name=self.table_name,
|
207
|
+
start_date='2025-03-07',
|
208
|
+
end_date='2039-12-31',
|
269
209
|
projection={
|
270
|
-
'
|
271
|
-
'
|
272
|
-
'
|
273
|
-
'
|
274
|
-
'path': 1,
|
275
|
-
'sameSite': 1,
|
276
|
-
'secure': 1,
|
277
|
-
'value': 1,
|
210
|
+
'日期': 1,
|
211
|
+
'平台': 1,
|
212
|
+
'店铺名称': 1,
|
213
|
+
'token': 1,
|
278
214
|
'更新时间': 1
|
279
215
|
},
|
280
216
|
)
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
new_dict.update({k: v})
|
291
|
-
# _driver.add_cookie(new_dict) # 添加cookies信息
|
292
|
-
|
293
|
-
_driver.refresh()
|
294
|
-
time.sleep(3)
|
295
|
-
return _driver
|
296
|
-
|
297
|
-
def get_data(self, shop_name='aikucun', date_num=1, headless=True):
|
298
|
-
"""
|
299
|
-
date_num: 获取最近 N 天数据,0表示今天
|
300
|
-
所有数据都是逐日下载
|
301
|
-
"""
|
302
|
-
|
303
|
-
_driver = self.login(shop_name=shop_name, headless=headless)
|
304
|
-
|
305
|
-
_driver.get(self.sp_url)
|
306
|
-
time.sleep(3)
|
307
|
-
# breakpoint()
|
308
|
-
|
309
|
-
today = datetime.date.today()
|
310
|
-
for date_s in range(date_num):
|
311
|
-
new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
|
312
|
-
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
313
|
-
print(f'{now} 正在下载爱库存文件 {date_s+1}/{date_num}: {new_date}')
|
314
|
-
str_date = str(new_date)[2:]
|
315
|
-
wait = WebDriverWait(_driver, timeout=15) #
|
316
|
-
elements = _driver.find_elements(
|
317
|
-
By.XPATH, '//input[@placeholder="开始日期"]')
|
318
|
-
# _driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
319
|
-
|
320
|
-
input_box = wait.until(
|
321
|
-
EC.element_to_be_clickable(
|
322
|
-
(By.XPATH, '//input[@placeholder="开始日期"]'))) #
|
323
|
-
|
324
|
-
# from selenium.webdriver.common.keys import Keys
|
325
|
-
for i in range(8):
|
326
|
-
input_box.send_keys(Keys.BACKSPACE)
|
327
|
-
input_box.send_keys(str_date)
|
328
|
-
time.sleep(1)
|
329
|
-
input_box = wait.until(
|
330
|
-
EC.element_to_be_clickable(
|
331
|
-
(By.XPATH, '//input[@placeholder="结束日期"]'))) # 文件名输入框
|
332
|
-
|
333
|
-
for i in range(8):
|
334
|
-
input_box.send_keys(Keys.BACKSPACE)
|
335
|
-
input_box.send_keys(str_date)
|
336
|
-
time.sleep(2)
|
337
|
-
input_box.send_keys(Keys.ENTER)
|
338
|
-
time.sleep(2)
|
339
|
-
wait.until(EC.presence_of_element_located((By.XPATH, '//button/span[contains(text(), "查询")]')))
|
340
|
-
elements = _driver.find_elements(
|
341
|
-
By.XPATH, '//button/span[contains(text(), "查询")]')
|
342
|
-
_driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
343
|
-
time.sleep(5)
|
344
|
-
wait.until(EC.presence_of_element_located(
|
345
|
-
(By.XPATH,
|
346
|
-
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
|
347
|
-
|
348
|
-
elements = _driver.find_elements(
|
349
|
-
By.XPATH,
|
350
|
-
'//div[@class="ak-page-list__table-empty" and contains(text(), "暂无数据")]')
|
351
|
-
if elements:
|
352
|
-
print(f'cookies 可能已过期,无法下载')
|
353
|
-
_driver.quit()
|
217
|
+
if len(df) == 0:
|
218
|
+
self.logining()
|
219
|
+
self.save_token()
|
220
|
+
else:
|
221
|
+
# 仅保留最新日期的数据
|
222
|
+
idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
|
223
|
+
df = df.loc[idx][['token']]
|
224
|
+
if len(df) == 0:
|
225
|
+
print(f'从数据库获取的 token 不能为空')
|
354
226
|
return
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
227
|
+
self.token = df.iloc[0, 0]
|
228
|
+
|
229
|
+
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/{item_type}/list'
|
230
|
+
headers = {
|
231
|
+
'headers': ua_sj.get_ua(),
|
232
|
+
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
233
|
+
'content-type': 'application/json;charset=UTF-8',
|
234
|
+
'origin': 'https://treasurebox.aikucun.com',
|
235
|
+
'system': 'merchant',
|
236
|
+
'token': self.token, # 从浏览器本地存储空间获取
|
237
|
+
}
|
238
|
+
num = 1
|
239
|
+
results = []
|
240
|
+
for date in date_list:
|
241
|
+
req_date = re.sub('-', '', date)
|
242
|
+
data = {
|
243
|
+
'beginDate': req_date,
|
244
|
+
'brandIds': [],
|
245
|
+
'cropId': '',
|
246
|
+
'cropName': '',
|
247
|
+
'ctgryOneIds': [],
|
248
|
+
'ctgryThreeIds': [],
|
249
|
+
'ctgryTwoIds': [],
|
250
|
+
'dimValue': '',
|
251
|
+
'endDate': req_date,
|
252
|
+
'merchantShopCode': '',
|
253
|
+
'orderByName': 'dealGmv',
|
254
|
+
'orderType': 'desc',
|
255
|
+
'pageNum': page_num,
|
256
|
+
'pageSize': page_size
|
257
|
+
}
|
258
|
+
|
259
|
+
res = requests.post(
|
260
|
+
url=self.url,
|
261
|
+
headers=headers,
|
262
|
+
# cookies=cookies,
|
263
|
+
data=json.dumps(data)
|
264
|
+
)
|
265
|
+
print(f'正在获取数据({num}/{len(date_list)}): {item_type}榜单 {date}')
|
266
|
+
# print(res.json())
|
267
|
+
if not res.json()['success']:
|
268
|
+
print('requests 请求不成功, success 返回值应为 True')
|
269
|
+
time.sleep(1)
|
270
|
+
continue
|
271
|
+
if not res.json()['data']['rows']:
|
272
|
+
print("请求获取的数据 ['data']['rows'] 不能为空")
|
273
|
+
time.sleep(1)
|
274
|
+
continue
|
275
|
+
results += [(date, res.json()['data']['rows'])]
|
276
|
+
num += 1
|
277
|
+
time.sleep(1)
|
278
|
+
if num % 32 == 0:
|
279
|
+
print("避免频繁请求, 正在休眠...")
|
280
|
+
# time.sleep(60)
|
281
|
+
|
282
|
+
return results
|
283
|
+
|
284
|
+
def insert_datas(self, data_list, db_name, table_name):
|
285
|
+
"""数据清洗"""
|
286
|
+
if not data_list:
|
287
|
+
return
|
288
|
+
chanel_name = {
|
289
|
+
'availableNum': '可售库存数',
|
290
|
+
'availableSkuCnt': '在架sku数',
|
291
|
+
'brandName': '品牌名',
|
292
|
+
'ctgryOneName': '一级类目名称',
|
293
|
+
'ctgryThreeName': '三级类目名称',
|
294
|
+
'ctgryTwoName': '二级类目名称',
|
295
|
+
'dealBuyerCnt': '支付人数_成交',
|
296
|
+
'dealBuyerCntRate': '成交率_成交',
|
297
|
+
'dealGmv': '成交gmv',
|
298
|
+
'dealIdolCnt': '销售爱豆人数',
|
299
|
+
'dealProductCnt': '销售量_成交',
|
300
|
+
'dealProductCntRate': '售罄率',
|
301
|
+
'dealSkuCnt': '成交sku数',
|
302
|
+
'dealTwoCnt': '订单数_成交',
|
303
|
+
'downSkuCnt': '可售sku数',
|
304
|
+
'etlInsertTime': '数据更新时间',
|
305
|
+
'forwardConfirmCnt': '转发爱豆人数',
|
306
|
+
'forwardConfirmNum': '转发次数',
|
307
|
+
'merStyleNo': '商品款号', # spu 榜单
|
308
|
+
'styleNo': '商品货号', # sku 榜单
|
309
|
+
'orderBuyerCnt': '支付人数_交易',
|
310
|
+
'orderBuyerCntRate': '成交率_交易',
|
311
|
+
'orderGmv': '下单gmv',
|
312
|
+
'orderProductCnt': '销售量_交易',
|
313
|
+
'orderSkuCnt': '下单sku数',
|
314
|
+
'orderTwoCnt': '订单数_交易',
|
315
|
+
'pictureUrl': '图片',
|
316
|
+
'pvNum': '浏览量',
|
317
|
+
'rn': '序号',
|
318
|
+
'spuId': 'spuid',
|
319
|
+
'spuName': '商品名称',
|
320
|
+
'supplyAmount': '供货额',
|
321
|
+
'supplyPerAmount': '供货价',
|
322
|
+
'uvNum': '访客量',
|
323
|
+
'colorName': '颜色',
|
324
|
+
'sizeName': '尺码',
|
325
|
+
'barCode': '条码', # sku榜单 款号 + 颜色编码
|
326
|
+
}
|
327
|
+
# 移除未翻译的列名
|
328
|
+
res_col = [item for item in chanel_name.keys() if chanel_name[item] == '']
|
329
|
+
for item in res_col:
|
330
|
+
del chanel_name[item]
|
331
|
+
|
332
|
+
_results = []
|
333
|
+
for item_ in data_list:
|
334
|
+
end_date, d_list = item_
|
335
|
+
for main_data_dict in d_list:
|
336
|
+
dict_data_before = {}
|
337
|
+
# 添加数据
|
338
|
+
dict_data_before.update({k: v for k, v in main_data_dict.items()})
|
339
|
+
# 初始化 dict_data
|
340
|
+
dict_data = {
|
341
|
+
'日期': end_date,
|
342
|
+
'平台': '爱库存',
|
343
|
+
'店铺名称': self.shop_name
|
344
|
+
}
|
345
|
+
for k, v in dict_data_before.items():
|
346
|
+
# 翻译键名
|
347
|
+
[dict_data.update({name_v: v}) for name_k, name_v in chanel_name.items() if k == name_k]
|
348
|
+
# 没有翻译的键值也要保留
|
349
|
+
not_in_rename = [item for item in dict_data_before.keys() if item not in chanel_name.keys()]
|
350
|
+
[dict_data.update({item: dict_data_before[item]}) for item in not_in_rename]
|
351
|
+
dict_data.update(
|
352
|
+
{
|
353
|
+
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
354
|
+
}
|
355
|
+
)
|
356
|
+
new_dict_data = {}
|
357
|
+
for k, v in dict_data.items():
|
358
|
+
if v and str(v).lower() != 'none' and str(v) != 'null':
|
359
|
+
new_dict_data.update({k: v})
|
360
|
+
else:
|
361
|
+
new_dict_data.update({k: 0})
|
362
|
+
_results.append(new_dict_data)
|
365
363
|
set_typ = {
|
366
|
-
'
|
367
|
-
'
|
368
|
-
'
|
364
|
+
'可售库存数': 'INT',
|
365
|
+
'在架sku数': 'INT',
|
366
|
+
'品牌名': 'varchar(50)',
|
367
|
+
'一级类目名称': 'varchar(50)',
|
368
|
+
'三级类目名称': 'varchar(50)',
|
369
|
+
'二级类目名称': 'varchar(50)',
|
370
|
+
'支付人数_成交': 'INT',
|
371
|
+
'成交率_成交': 'decimal(10,4)',
|
372
|
+
'成交gmv': 'decimal(10,2)',
|
373
|
+
'销售爱豆人数': 'INT',
|
374
|
+
'销售量_成交': 'INT',
|
375
|
+
'售罄率': 'decimal(10,4)',
|
376
|
+
'成交sku数': 'INT',
|
377
|
+
'订单数_成交': 'INT',
|
378
|
+
'可售sku数': 'INT',
|
379
|
+
'数据更新时间': 'DATETIME',
|
380
|
+
'转发爱豆人数': 'INT',
|
381
|
+
'转发次数': 'INT',
|
382
|
+
'商品款号': 'varchar(50)',
|
383
|
+
'支付人数_交易': 'INT',
|
384
|
+
'成交率_交易': 'decimal(10,4)',
|
385
|
+
'下单gmv': 'decimal(10,2)',
|
386
|
+
'销售量_交易': 'INT',
|
387
|
+
'下单sku数': 'INT',
|
388
|
+
'订单数_交易': 'INT',
|
369
389
|
'图片': 'varchar(255)',
|
370
|
-
'
|
371
|
-
'
|
372
|
-
'
|
373
|
-
'
|
374
|
-
'
|
375
|
-
'
|
376
|
-
'
|
377
|
-
'
|
390
|
+
'浏览量': 'INT',
|
391
|
+
'序号': 'INT',
|
392
|
+
'spuId': 'varchar(50)',
|
393
|
+
'商品名称': 'varchar(50)',
|
394
|
+
'供货额': 'decimal(10,2)',
|
395
|
+
'供货价': 'decimal(10,2)',
|
396
|
+
'访客量': 'INT',
|
397
|
+
'颜色': 'varchar(50)',
|
398
|
+
'尺码': 'varchar(50)',
|
399
|
+
'货号': 'varchar(50)', # 款号 + 颜色编码
|
378
400
|
}
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
os.remove(os.path.join(root, name))
|
394
|
-
continue
|
395
|
-
df.insert(loc=0, column='日期', value=date) # df中插入新列
|
396
|
-
df.insert(loc=1, column='店铺名称', value='爱库存平台') # df中插入新列
|
397
|
-
df.rename(columns={'spuId': 'spu_id'}, inplace=True)
|
398
|
-
# df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
|
399
|
-
# df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
|
400
|
-
# print(df['数据更新时间'])
|
401
|
-
# breakpoint()
|
402
|
-
new_dict = {
|
403
|
-
'日期': '',
|
404
|
-
'店铺名称': '',
|
405
|
-
'序号': '',
|
406
|
-
'商品名称': '',
|
407
|
-
'spu_id': '',
|
408
|
-
'商品款号': '',
|
409
|
-
'一级类目名称': '',
|
410
|
-
'二级类目名称': '',
|
411
|
-
'三级类目名称': '',
|
412
|
-
'访客量': '',
|
413
|
-
'浏览量': '',
|
414
|
-
'下单gmv': '',
|
415
|
-
'成交gmv': '',
|
416
|
-
'支付人数_成交': '',
|
417
|
-
}
|
418
|
-
_results = []
|
419
|
-
for dict_data in df.to_dict(orient='records'):
|
420
|
-
new_dict.update(dict_data)
|
421
|
-
new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
422
|
-
_results.append(new_dict)
|
423
|
-
if _results:
|
424
|
-
m_engine.insert_many_dict(
|
425
|
-
db_name='爱库存2',
|
426
|
-
table_name='商品spu榜单',
|
427
|
-
dict_data_list=_results,
|
428
|
-
icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
429
|
-
unique_main_key=None,
|
430
|
-
set_typ=set_typ,
|
431
|
-
)
|
432
|
-
|
433
|
-
new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
|
434
|
-
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
|
435
|
-
os.remove(os.path.join(root, name))
|
436
|
-
|
437
|
-
|
438
|
-
def akucun(headless=True, date_num=10):
|
439
|
-
akc = AikuCun()
|
440
|
-
akc.get_data(shop_name='aikucun', date_num=date_num, headless=headless) # 获取最近 N 天数据,0表示今天
|
441
|
-
|
442
|
-
|
443
|
-
class AikuCunNew:
|
444
|
-
|
445
|
-
def __init__(self, shop_name,):
|
446
|
-
self.shop_name = shop_name
|
447
|
-
self.today = datetime.date.today()
|
448
|
-
self.headers = {'User-Agent': ua_sj.get_ua()}
|
449
|
-
self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
450
|
-
self.cookies = {}
|
451
|
-
self.get_cookies() # 更新 self.cookies 的值
|
452
|
-
self.support_path = set_support.SetSupport(dirname='support').dirname
|
453
|
-
self.start_date = (self.today - datetime.timedelta(days=15)).strftime('%Y-%m-%d')
|
454
|
-
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
401
|
+
print(f'{self.shop_name} 正在更新数据库 {db_name} -> {table_name}...')
|
402
|
+
if 'spu' in table_name:
|
403
|
+
drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
|
404
|
+
else:
|
405
|
+
drop_dup = ['日期', '平台', '店铺名称', '条码']
|
406
|
+
m_engine.insert_many_dict(
|
407
|
+
db_name=db_name,
|
408
|
+
table_name=table_name,
|
409
|
+
dict_data_list=_results,
|
410
|
+
icm_update=drop_dup, # 唯一组合键
|
411
|
+
# unique_main_key=['人群id'],
|
412
|
+
set_typ=set_typ,
|
413
|
+
allow_not_null=False, # 创建允许插入空值的列
|
414
|
+
)
|
455
415
|
|
456
|
-
def
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
416
|
+
def get_sign(self):
|
417
|
+
sign = 'bbcf5b9cf3d3b8ba9c22550dcba8a3ce97be766f'
|
418
|
+
current_timestamp_ms = '1741396070777'
|
419
|
+
# current_timestamp_ms = int(round(time.time() * 1000))
|
420
|
+
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/sku/list?time={current_timestamp_ms}&sign={sign}'
|
421
|
+
headers = {
|
422
|
+
'headers': ua_sj.get_ua(),
|
423
|
+
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
424
|
+
'content-type': 'application/json;charset=UTF-8',
|
425
|
+
'origin': 'https://treasurebox.aikucun.com',
|
426
|
+
# 'system': 'merchant',
|
427
|
+
# 'token': self.token, # 从浏览器本地存储空间获取
|
428
|
+
}
|
465
429
|
data = {
|
466
|
-
'
|
467
|
-
'
|
430
|
+
'beginDate': '20250307',
|
431
|
+
'brandIds': [],
|
432
|
+
'cropId': '',
|
433
|
+
'cropName': '',
|
434
|
+
'ctgryOneIds': [],
|
435
|
+
'ctgryThreeIds': [],
|
436
|
+
'ctgryTwoIds': [],
|
437
|
+
'dimValue': '',
|
438
|
+
'endDate': '20250307',
|
439
|
+
'merchantShopCode': '',
|
440
|
+
'orderByName': 'dealGmv',
|
441
|
+
'orderType': 'desc',
|
442
|
+
'pageNum': 1,
|
443
|
+
'pageSize': 10
|
468
444
|
}
|
469
445
|
res = requests.post(
|
470
|
-
url,
|
471
|
-
headers=
|
472
|
-
|
473
|
-
params=data
|
446
|
+
url=self.url,
|
447
|
+
headers=headers,
|
448
|
+
data=json.dumps(data)
|
474
449
|
)
|
475
|
-
print(res.
|
476
|
-
|
450
|
+
print(res.json())
|
477
451
|
|
478
452
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
453
|
+
def main(start_date, end_date, item_type=['spu']):
|
454
|
+
ak = AikuCun()
|
455
|
+
# ak.get_sign()
|
456
|
+
for type_ in item_type:
|
457
|
+
if type_ not in ['spu', 'sku']:
|
458
|
+
print(f'{item_type} 非法参数: {type_}')
|
459
|
+
continue
|
460
|
+
for i in range(2):
|
461
|
+
data_list = ak.get_data_from_bbx(
|
462
|
+
start_date=start_date,
|
463
|
+
end_date=end_date,
|
464
|
+
item_type=type_,
|
465
|
+
page_num=1,
|
466
|
+
page_size=300
|
467
|
+
)
|
468
|
+
if not data_list:
|
469
|
+
ak.logining()
|
470
|
+
ak.save_token()
|
471
|
+
else:
|
485
472
|
break
|
486
|
-
|
487
|
-
|
473
|
+
|
474
|
+
ak.insert_datas(
|
475
|
+
data_list=data_list,
|
476
|
+
db_name='爱库存2',
|
477
|
+
table_name=f'{type_}榜单'
|
478
|
+
)
|
488
479
|
|
489
480
|
|
490
|
-
if __name__ == '__main__':
|
491
|
-
# get_cookie_aikucun() # 登录并获取 cookies
|
492
|
-
akucun(date_num=30, headless=True) # 下载数据
|
493
481
|
|
494
|
-
|
495
|
-
|
482
|
+
if __name__ == '__main__':
|
483
|
+
main(
|
484
|
+
start_date='2025-03-06',
|
485
|
+
end_date='2025-03-06',
|
486
|
+
item_type=['spu', 'sku']
|
487
|
+
)
|
@@ -15,11 +15,11 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
15
15
|
mdbq/log/spider_logging.py,sha256=59xe4Ckb7m-sBt3GYk8DC_hQg7-jnjBRq1o718r1Ry8,1676
|
16
16
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
17
17
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
18
|
-
mdbq/mysql/mysql.py,sha256=
|
18
|
+
mdbq/mysql/mysql.py,sha256=tR6l4Zzn9j6zKaFcy0Ktw2oL8OoX3QB6jDoDp1l2fiM,95474
|
19
19
|
mdbq/mysql/s_query.py,sha256=09Dp7DrVXui6dAI6zFDfrsUOdjPblF_oYUpgqbZMhXg,8757
|
20
20
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
21
21
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
22
|
-
mdbq/other/download_sku_picture.py,sha256
|
22
|
+
mdbq/other/download_sku_picture.py,sha256=xX3jU2Nu2R9CoveM7xevRstg1P40hvR1KL8wqmwt3O0,44809
|
23
23
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
24
24
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
25
25
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
@@ -30,8 +30,8 @@ mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,
|
|
30
30
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
31
31
|
mdbq/redis/getredis.py,sha256=1pTga2iINx0NV2ffl0D-aspZhrZMDQR8SpohAv1acoo,24076
|
32
32
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
33
|
-
mdbq/spider/aikucun.py,sha256=
|
34
|
-
mdbq-3.
|
35
|
-
mdbq-3.
|
36
|
-
mdbq-3.
|
37
|
-
mdbq-3.
|
33
|
+
mdbq/spider/aikucun.py,sha256=GBZsCjsN3i1ZW9pAxeMAmb-y3yW3p3HJpjfrnnna5gg,19702
|
34
|
+
mdbq-3.8.2.dist-info/METADATA,sha256=pmdQf_CAgoh9rcPTtyIQwrifPpr_WtddcV1sBPhDdj8,363
|
35
|
+
mdbq-3.8.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
36
|
+
mdbq-3.8.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
37
|
+
mdbq-3.8.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|