mdbq 3.7.26__py3-none-any.whl → 3.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/mysql.py CHANGED
@@ -290,32 +290,33 @@ class MysqlUpload:
290
290
  cursor.execute(sql, dict_data['数据主体'])
291
291
 
292
292
  if reset_id:
293
- # 6. 重置自增列
294
- try:
295
- # 查询所有复合主键
296
- sql = (
297
- f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
298
- f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
299
- )
300
- cursor.execute(sql)
301
- result = cursor.fetchall() # 复合主键数
302
- if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
303
- column_name = 'id'
304
- sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
305
- f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
306
- cursor.execute(sql, (db_name, table_name, column_name))
307
- result = cursor.fetchone()
308
- if result:
309
- sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
310
- cursor.execute(sql)
311
- cursor.execute(
312
- f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
313
- cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
314
- else:
315
- logger.info(f'{table_name} 存在复合主键: 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
316
- except Exception as e:
317
- logger.error(f'333 {table_name} {e}')
318
- connection.rollback()
293
+ pass
294
+ # # 6. 重置自增列
295
+ # try:
296
+ # # 查询所有复合主键
297
+ # sql = (
298
+ # f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
299
+ # f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
300
+ # )
301
+ # cursor.execute(sql)
302
+ # result = cursor.fetchall() # 复合主键数
303
+ # if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
304
+ # column_name = 'id'
305
+ # sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
306
+ # f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
307
+ # cursor.execute(sql, (db_name, table_name, column_name))
308
+ # result = cursor.fetchone()
309
+ # if result:
310
+ # sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
311
+ # cursor.execute(sql)
312
+ # cursor.execute(
313
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
314
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
315
+ # else:
316
+ # logger.info(f'{table_name} 存在复合主键: 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
317
+ # except Exception as e:
318
+ # logger.error(f'333 {table_name} {e}')
319
+ # connection.rollback()
319
320
  connection.commit()
320
321
 
321
322
  @try_except
@@ -1014,35 +1015,36 @@ class MysqlUpload:
1014
1015
  chunksize=1000
1015
1016
  )
1016
1017
  if reset_id:
1017
- # 6. 重置自增列
1018
- try:
1019
- # 查询所有复合主键
1020
- sql = (
1021
- f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1022
- f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1023
- )
1024
- cursor.execute(sql)
1025
- result = cursor.fetchall() # 复合主键数
1026
- if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1027
- column_name = 'id'
1028
- sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1029
- f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1030
- # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
1031
- cursor.execute(sql, (db_name, table_name, column_name))
1032
- result = cursor.fetchone()
1033
- if result:
1034
- # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
1035
- sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1036
- cursor.execute(sql)
1037
- cursor.execute(
1038
- f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1039
- cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1040
- # logger.info(f'重置自增id')
1041
- else:
1042
- logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1043
- except Exception as e:
1044
- logger.error(f'333 {table_name} {e}')
1045
- connection.rollback()
1018
+ pass
1019
+ # # 6. 重置自增列
1020
+ # try:
1021
+ # # 查询所有复合主键
1022
+ # sql = (
1023
+ # f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1024
+ # f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1025
+ # )
1026
+ # cursor.execute(sql)
1027
+ # result = cursor.fetchall() # 复合主键数
1028
+ # if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1029
+ # column_name = 'id'
1030
+ # sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1031
+ # f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1032
+ # # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
1033
+ # cursor.execute(sql, (db_name, table_name, column_name))
1034
+ # result = cursor.fetchone()
1035
+ # if result:
1036
+ # # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
1037
+ # sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1038
+ # cursor.execute(sql)
1039
+ # cursor.execute(
1040
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1041
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1042
+ # # logger.info(f'重置自增id')
1043
+ # else:
1044
+ # logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1045
+ # except Exception as e:
1046
+ # logger.error(f'333 {table_name} {e}')
1047
+ # connection.rollback()
1046
1048
  connection.commit() # 提交事务
1047
1049
  connection.close()
1048
1050
  return
@@ -1070,36 +1072,36 @@ class MysqlUpload:
1070
1072
  index=False,
1071
1073
  chunksize=1000
1072
1074
  )
1073
- # 6. 重置自增列
1074
- try:
1075
- # 查询所有复合主键
1076
- sql = (
1077
- f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1078
- f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1079
- )
1080
- cursor.execute(sql)
1081
- result = cursor.fetchall() # 复合主键数
1082
- if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1083
- column_name = 'id'
1084
- sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1085
- f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1086
- # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
1087
- cursor.execute(sql, (db_name, table_name, column_name))
1088
- result = cursor.fetchone()
1089
- if result:
1090
- # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
1091
- sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1092
- cursor.execute(sql)
1093
- cursor.execute(
1094
- f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1095
- cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1096
- # logger.info(f'重置自增id')
1097
- else:
1098
- logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1099
- except Exception as e:
1100
- logger.error(f'333 {table_name} {e}')
1101
- connection.rollback()
1102
- connection.close()
1075
+ # # 6. 重置自增列
1076
+ # try:
1077
+ # # 查询所有复合主键
1078
+ # sql = (
1079
+ # f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1080
+ # f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1081
+ # )
1082
+ # cursor.execute(sql)
1083
+ # result = cursor.fetchall() # 复合主键数
1084
+ # if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1085
+ # column_name = 'id'
1086
+ # sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1087
+ # f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1088
+ # # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
1089
+ # cursor.execute(sql, (db_name, table_name, column_name))
1090
+ # result = cursor.fetchone()
1091
+ # if result:
1092
+ # # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
1093
+ # sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1094
+ # cursor.execute(sql)
1095
+ # cursor.execute(
1096
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1097
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1098
+ # # logger.info(f'重置自增id')
1099
+ # else:
1100
+ # logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1101
+ # except Exception as e:
1102
+ # logger.error(f'333 {table_name} {e}')
1103
+ # connection.rollback()
1104
+ # connection.close()
1103
1105
  return
1104
1106
 
1105
1107
  datas = df.to_dict(orient='records')
@@ -1189,35 +1191,36 @@ class MysqlUpload:
1189
1191
  pass
1190
1192
 
1191
1193
  if reset_id:
1192
- # 6. 重置自增列
1193
- try:
1194
- # 查询所有复合主键
1195
- sql = (
1196
- f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1197
- f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1198
- )
1199
- cursor.execute(sql)
1200
- result = cursor.fetchall() # 复合主键数
1201
- if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1202
- column_name = 'id'
1203
- sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1204
- f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1205
- # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
1206
- cursor.execute(sql, (db_name, table_name, column_name))
1207
- result = cursor.fetchone()
1208
- if result:
1209
- # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
1210
- sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1211
- cursor.execute(sql)
1212
- cursor.execute(
1213
- f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1214
- cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1215
- # logger.info(f'重置自增id')
1216
- else:
1217
- logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1218
- except Exception as e:
1219
- logger.error(f'333 {table_name} {e}')
1220
- connection.rollback()
1194
+ pass
1195
+ # # 6. 重置自增列
1196
+ # try:
1197
+ # # 查询所有复合主键
1198
+ # sql = (
1199
+ # f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1200
+ # f"WHERE `TABLE_SCHEMA` = '{db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1201
+ # )
1202
+ # cursor.execute(sql)
1203
+ # result = cursor.fetchall() # 复合主键数
1204
+ # if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1205
+ # column_name = 'id'
1206
+ # sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1207
+ # f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1208
+ # # cursor.execute(f"SHOW COLUMNS FROM `{table_name}` LIKE 'id'")
1209
+ # cursor.execute(sql, (db_name, table_name, column_name))
1210
+ # result = cursor.fetchone()
1211
+ # if result:
1212
+ # # cursor.execute(f"ALTER TABLE `{table_name}` DROP COLUMN id;") # 删除 id 列
1213
+ # sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1214
+ # cursor.execute(sql)
1215
+ # cursor.execute(
1216
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1217
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1218
+ # # logger.info(f'重置自增id')
1219
+ # else:
1220
+ # logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1221
+ # except Exception as e:
1222
+ # logger.error(f'333 {table_name} {e}')
1223
+ # connection.rollback()
1221
1224
  connection.commit() # 提交事务
1222
1225
  connection.close()
1223
1226
 
@@ -1500,32 +1503,32 @@ class OptimizeDatas:
1500
1503
  else: # 不存在日期列的情况
1501
1504
  self.delete_duplicate2(table_name=table_name, except_key=except_key)
1502
1505
 
1503
- # 6. 重置自增列
1504
- try:
1505
- # 查询所有复合主键
1506
- sql = (
1507
- f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1508
- f"WHERE `TABLE_SCHEMA` = '{self.db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1509
- )
1510
- cursor.execute(sql)
1511
- result = cursor.fetchall() # 复合主键数
1512
- if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1513
- column_name = 'id'
1514
- sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1515
- f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1516
- cursor.execute(sql, (self.db_name, table_name, column_name))
1517
- result = cursor.fetchone()
1518
- if result:
1519
- sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1520
- cursor.execute(sql)
1521
- cursor.execute(
1522
- f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1523
- cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1524
- else:
1525
- logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1526
- except Exception as e:
1527
- logger.error(f'333 {table_name} {e}')
1528
- self.connection.rollback()
1506
+ # # 6. 重置自增列
1507
+ # try:
1508
+ # # 查询所有复合主键
1509
+ # sql = (
1510
+ # f"SELECT `COLUMN_NAME` AS `PrimaryKey` FROM `information_schema`.`COLUMNS` "
1511
+ # f"WHERE `TABLE_SCHEMA` = '{self.db_name}'AND `TABLE_NAME` = '{table_name}' AND `COLUMN_KEY` = 'PRI';"
1512
+ # )
1513
+ # cursor.execute(sql)
1514
+ # result = cursor.fetchall() # 复合主键数
1515
+ # if len(result) <= 1: # 如果存在复合主键,则不能直接删除 id 键,其他主键可能不是唯一,会报错
1516
+ # column_name = 'id'
1517
+ # sql = (f'SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS '
1518
+ # f'WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s')
1519
+ # cursor.execute(sql, (self.db_name, table_name, column_name))
1520
+ # result = cursor.fetchone()
1521
+ # if result:
1522
+ # sql = f"ALTER TABLE `{table_name}` DROP COLUMN {column_name}" # 删除 id 列
1523
+ # cursor.execute(sql)
1524
+ # cursor.execute(
1525
+ # f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
1526
+ # cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
1527
+ # else:
1528
+ # logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
1529
+ # except Exception as e:
1530
+ # logger.error(f'333 {table_name} {e}')
1531
+ # self.connection.rollback()
1529
1532
  self.connection.close()
1530
1533
  logger.info(f'mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
1531
1534
 
@@ -3,6 +3,7 @@ import datetime
3
3
  import getpass
4
4
  import json
5
5
  import os
6
+ import sys
6
7
  import platform
7
8
  import random
8
9
  from dateutil.relativedelta import relativedelta
mdbq/spider/aikucun.py CHANGED
@@ -1,190 +1,75 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  import datetime
3
- import getpass
3
+ import requests
4
4
  import json
5
5
  import os
6
6
  import sys
7
- import pathlib
8
- import platform
9
7
  import re
10
8
  import time
11
9
  import warnings
12
- import requests
13
- import pandas as pd
10
+ import platform
11
+ import getpass
14
12
  from selenium import webdriver
15
13
  from selenium.webdriver.support.wait import WebDriverWait
16
14
  from selenium.webdriver.common.by import By
17
15
  from selenium.webdriver.support import expected_conditions as EC
18
16
  from selenium.webdriver.chrome.service import Service
19
- from mdbq.config import set_support
20
- from selenium.webdriver.common.keys import Keys
21
- from mdbq.other import ua_sj
17
+ import pymysql
18
+ import pandas as pd
19
+ from mdbq.log import spider_logging
22
20
  from mdbq.mysql import mysql
23
21
  from mdbq.mysql import s_query
24
- from mdbq.config import default
25
-
26
- warnings.filterwarnings('ignore')
22
+ from mdbq.config import config
23
+ from mdbq.other import ua_sj
27
24
 
28
25
 
29
- if platform.system() == 'Windows':
30
- # windows版本
31
- D_PATH = str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\Downloads'))
32
- elif platform.system() == 'Linux':
33
- D_PATH = os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'Downloads')
34
- if not os.path.exists(D_PATH):
35
- os.makedirs(D_PATH)
36
- else:
37
- D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
38
- upload_path = os.path.join(D_PATH, '数据上传中心', '爱库存') # 此目录位于下载文件夹
26
+ content = config.read_config(file_path=os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'spd.txt'))
27
+ username, password, host, port = content['username'], content['password'], content['host'], content['port']
39
28
 
40
- targe_host, hostname, local = default.return_default_host()
41
- m_engine, username, password, host, port = default.get_mysql_engine(platform='Windows', hostname=hostname, sql='mysql', local=local, config_file=None)
42
- print(username, password, host, port)
29
+ m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
43
30
  # 实例化一个数据查询类,用来获取 cookies 表数据
44
31
  download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
32
+ logger = spider_logging.setup_logging()
45
33
 
46
34
 
47
- def get_cookie_aikucun():
48
- """
49
- """
50
- _url = 'https://gray-merc.aikucun.com/index.html'
51
- cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
52
- filename_aikucun = 'cookie_aikucun.json'
53
- print(_url)
54
-
55
- option = webdriver.ChromeOptions() # 浏览器启动选项
56
- option.headless = True # False指定为无界面模式
57
- # 调整chrome启动配置
58
- option.add_argument("--disable-gpu")
59
- option.add_argument("--no-sandbox")
60
- option.add_argument("--disable-dev-shm-usage")
61
- option.add_experimental_option("excludeSwitches", ["enable-automation"])
62
- option.add_experimental_option("useAutomationExtension", False)
63
- # if platform.system() == 'Windows':
64
- # service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
65
- # else:
66
- # service = Service('/usr/local/bin/chromedriver')
67
- if platform.system() == 'Windows':
68
- # 设置Chrome的路径
69
- chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
70
- chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
71
- # os.environ["webdriver.chrome.driver"] = chrome_path
72
- option.binary_location = chrome_path # windows 设置此参数有效
73
- service = Service(chromedriver_path)
74
- # service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
75
- else:
76
- # 设置Chrome的路径
77
- chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
78
- chromedriver_path = '/usr/local/chrome/chromedriver'
79
- os.environ["webdriver.chrome.driver"] = chrome_path
80
-
81
- service = Service(chromedriver_path)
82
- _driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
83
-
84
- # 登录
85
- _driver.get(_url)
86
- time.sleep(0.1)
87
- _driver.maximize_window() # 窗口最大化 方便后续加载数据
88
- print(f'请登录并切换到百宝箱,再保存 cookies: \n https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604')
89
- wait = WebDriverWait(_driver, timeout=15)
90
- input_box = wait.until(
91
- EC.element_to_be_clickable(
92
- (By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
93
- input_box.send_keys('广东万里马实业股份有限公司')
94
- input_box = wait.until(
95
- EC.element_to_be_clickable(
96
- (By.XPATH, '//input[@placeholder="请输入密码"]'))) #
97
- input_box.send_keys('wlm123$$$')
98
- time.sleep(0.1)
99
- elements = _driver.find_elements(
100
- By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
101
- _driver.execute_script("arguments[0].click();", elements[0])
102
- for i in range(100):
35
+ def keep_connect(_db_name, _config, max_try: int=10):
36
+ attempts = 1
37
+ while attempts <= max_try:
103
38
  try:
104
- wait.until(
105
- EC.element_to_be_clickable(
106
- (By.XPATH, '//div[@class="user-info nav-user-slider"]')))
107
- _driver.get(' https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604')
108
- time.sleep(3)
109
- break
110
- except:
111
- time.sleep(5)
112
-
113
- d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
114
- print(f'{d_time} 登录成功,正在获取cookie...')
115
- time.sleep(0.1)
116
-
117
- # 将cookies保存为json格式
118
- cookies_list = _driver.get_cookies()
119
- new_cookies_list = []
120
- for cookie in cookies_list:
121
- # 该字段有问题所以删除就可以
122
- if 'HWWAFSESTIME' in cookie:
123
- continue
124
- else:
125
- new_cookies_list.append(cookie)
126
-
127
- ######### 新增 写入 mysql #########
128
- set_typ = {
129
- '日期': 'date',
130
- 'domain': 'varchar(100)',
131
- 'expiry': 'int',
132
- 'httpOnly': 'varchar(20)',
133
- 'name': 'varchar(50)',
134
- 'path': 'varchar(50)',
135
- 'sameSite': 'varchar(50)',
136
- 'secure': 'varchar(50)',
137
- 'value': 'text',
138
- '更新时间': 'timestamp'
139
- }
140
- _cookies_list = []
141
- for item in cookies_list:
142
- new_dict = {'日期': datetime.datetime.today().strftime('%Y-%m-%d'), }
143
- for k, v in item.items():
144
- if v is None:
145
- v = 'None'
146
- new_dict.update({k: v})
147
- if 'expiry' not in new_dict:
148
- new_dict.update({'expiry': 0})
149
- new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
150
- _cookies_list.append(new_dict)
151
- m_engine.insert_many_dict(
152
- db_name='cookie文件',
153
- table_name='main_aikucun',
154
- dict_data_list=_cookies_list,
155
- set_typ=set_typ,
156
- allow_not_null=True, # 允许插入空值
157
- )
158
- #############################################
159
-
160
- json_file = os.path.join(cookie_path, filename_aikucun)
161
- with open(json_file, 'w', encoding='utf-8') as f:
162
- json.dump(new_cookies_list, f, ensure_ascii=False, sort_keys=True, indent=4)
163
- print(f'cookie已保存: {json_file}')
164
-
165
- # _file = os.path.join(cookie_path, filename_aikucun)
166
- # with open(_file, 'w') as f:
167
- # # 将cookies保存为json格式
168
- # cookies_list = _driver.get_cookies()
169
- # # for cookie in cookies_list:
170
- # # # 该字段有问题所以删除就可以
171
- # # if 'expiry' in cookie:
172
- # # del cookie['expiry']
173
- # # # if 'domain' in cookie:
174
- # # # cookie['domain'] = '.taobao.com'
175
- # cookies_list = json.dumps(cookies_list)
176
- # f.write(cookies_list)
177
- # print(f'cookie已保存: {_file}')
178
- _driver.quit()
39
+ connection = pymysql.connect(**_config) # 连接数据库
40
+ return connection
41
+ except Exception as e:
42
+ logger.error(f'{_db_name}: 连接失败,正在重试: {host}:{port} {attempts}/{max_try} {e}')
43
+ attempts += 1
44
+ time.sleep(30)
45
+ logger.error(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
46
+ return None
47
+
48
+
49
+ def dates_between(start_date, end_date) -> list:
50
+ """ 获取两个日期之间的所有日期, 返回 list """
51
+ start_date = pd.to_datetime(start_date)
52
+ end_date = pd.to_datetime(end_date)
53
+ dates = []
54
+ current_date = start_date
55
+ while current_date <= end_date:
56
+ dates.append(current_date.strftime('%Y-%m-%d'))
57
+ current_date += datetime.timedelta(days=1)
58
+ return dates
179
59
 
180
60
 
181
61
  class AikuCun:
182
62
  def __init__(self):
183
- # self.url = 'https://gray-merc.aikucun.com/index.html'
184
- self.sp_url = 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604'
185
- self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
63
+ self.url = 'https://gray-merc.aikucun.com/index.html'
64
+ self.db_name = 'cookie文件'
65
+ self.table_name = 'main_aikucun'
66
+ self.shop_name = '万里马爱库存'
67
+ self.token = None
68
+ self.today = datetime.date.today()
69
+ self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
70
+ self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
186
71
 
187
- def login(self, shop_name='aikucun', headless=False):
72
+ def logining(self, shop_name='aikucun', headless=False):
188
73
  option = webdriver.ChromeOptions()
189
74
  if headless:
190
75
  option.add_argument("--headless") # 设置无界面模式
@@ -219,8 +104,8 @@ class AikuCun:
219
104
  option.add_experimental_option('prefs', prefs)
220
105
  option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
221
106
 
222
- # 修改默认下载文件夹路径
223
- option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
107
+ # # 修改默认下载文件夹路径
108
+ # option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
224
109
 
225
110
  # # 通过excludeSwitches参数禁用默认的启动路径
226
111
  # option.add_experimental_option('excludeSwitches', ['enable-automation'])
@@ -249,247 +134,354 @@ class AikuCun:
249
134
  _driver.maximize_window() # 窗口最大化 方便后续加载数据
250
135
 
251
136
  # 登录
252
- _driver.get(self.sp_url)
253
- _driver.delete_all_cookies() # 首先清除浏览器打开已有的cookies
254
- name_lists = os.listdir(self.cookie_path) # cookie 放在主目录下的 cookies 文件夹
255
- for name in name_lists:
256
- if shop_name in name and name.endswith('.json') and '~' not in name and '.DS' not in name:
257
- with open(os.path.join(self.cookie_path, name), 'r') as f:
258
- cookies_list = json.load(f) # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
259
- for cookie in cookies_list:
260
- _driver.add_cookie(cookie) # 添加cookies信息
261
- # print(cookie)
262
- db_name = 'cookie文件'
263
- table_name = f'main_{shop_name}'
137
+ _driver.get(url='https://gray-merc.aikucun.com/index.html') # self.url 可能被修改,这里使用固定页面获取 sign
138
+ time.sleep(0.1)
139
+ _driver.maximize_window() # 窗口最大化 方便后续加载数据
140
+ wait = WebDriverWait(_driver, timeout=15)
141
+ input_box = wait.until(
142
+ EC.element_to_be_clickable(
143
+ (By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
144
+ input_box.send_keys('广东万里马实业股份有限公司')
145
+ input_box = wait.until(
146
+ EC.element_to_be_clickable(
147
+ (By.XPATH, '//input[@placeholder="请输入密码"]'))) #
148
+ input_box.send_keys('wlm123$$$')
149
+ time.sleep(0.1)
150
+ elements = _driver.find_elements(
151
+ By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
152
+ _driver.execute_script("arguments[0].click();", elements[0])
153
+ for i in range(100):
154
+ try:
155
+ wait.until(
156
+ EC.element_to_be_clickable(
157
+ (By.XPATH, '//div[@class="user-info nav-user-slider"]')))
158
+ break
159
+ except:
160
+ time.sleep(5)
161
+ local_storage = _driver.execute_script("return window.localStorage;")
162
+ if 'token' in local_storage.keys():
163
+ self.token = {
164
+ '日期': datetime.datetime.today().strftime('%Y-%m-%d'),
165
+ '平台': '爱库存',
166
+ '店铺名称': self.shop_name,
167
+ 'token': local_storage['token'],
168
+ '来源位置': 'localstorage',
169
+ '更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
170
+ }
171
+ time.sleep(5)
172
+ _driver.quit()
173
+
174
+ def save_token(self):
175
+ if not self.token:
176
+ print('self.token 不能为空')
177
+ return
178
+ set_typ = {
179
+ '日期': 'DATE',
180
+ '平台': 'varchar(50)',
181
+ '店铺名称': 'varchar(50)',
182
+ 'token': 'varchar(255)',
183
+ '来源位置': 'varchar(50)',
184
+ '更新时间': 'timestamp'
185
+ }
186
+ # 更新至数据库记录
187
+ m_engine.dict_to_mysql(
188
+ db_name=self.db_name,
189
+ table_name=self.table_name,
190
+ dict_data=self.token,
191
+ unique_main_key=None,
192
+ icm_update=[], # 唯一组合键
193
+ main_key=None, # 指定索引列, 通常用日期列,默认会设置日期为索引
194
+ set_typ={}, # 指定数据类型
195
+ )
196
+
197
+ def get_data_from_bbx(self, start_date=None, end_date=None, item_type='spu', page_num=1, page_size=300):
198
+ if start_date:
199
+ self.start_date = start_date
200
+ if end_date:
201
+ self.end_date = end_date
202
+ date_list = dates_between(start_date=self.start_date, end_date=self.end_date)
203
+
264
204
  df = download.data_to_df(
265
- db_name=db_name,
266
- table_name=table_name,
267
- start_date='2025-01-01',
268
- end_date='2030-12-11',
205
+ db_name=self.db_name,
206
+ table_name=self.table_name,
207
+ start_date='2025-03-07',
208
+ end_date='2039-12-31',
269
209
  projection={
270
- 'domain': 1,
271
- 'expiry': 1,
272
- 'httpOnly': 1,
273
- 'name': 1,
274
- 'path': 1,
275
- 'sameSite': 1,
276
- 'secure': 1,
277
- 'value': 1,
210
+ '日期': 1,
211
+ '平台': 1,
212
+ '店铺名称': 1,
213
+ 'token': 1,
278
214
  '更新时间': 1
279
215
  },
280
216
  )
281
- # 仅保留最新日期的数据
282
- idx = df.groupby('name')['更新时间'].idxmax()
283
- df = df.loc[idx]
284
- df.pop('更新时间')
285
- for item in df.to_dict('records'):
286
- new_dict = {}
287
- for k, v in item.items():
288
- if v == 'False':
289
- v = False
290
- new_dict.update({k: v})
291
- # _driver.add_cookie(new_dict) # 添加cookies信息
292
-
293
- _driver.refresh()
294
- time.sleep(3)
295
- return _driver
296
-
297
- def get_data(self, shop_name='aikucun', date_num=1, headless=True):
298
- """
299
- date_num: 获取最近 N 天数据,0表示今天
300
- 所有数据都是逐日下载
301
- """
302
-
303
- _driver = self.login(shop_name=shop_name, headless=headless)
304
-
305
- _driver.get(self.sp_url)
306
- time.sleep(3)
307
- # breakpoint()
308
-
309
- today = datetime.date.today()
310
- for date_s in range(date_num):
311
- new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
312
- now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
313
- print(f'{now} 正在下载爱库存文件 {date_s+1}/{date_num}: {new_date}')
314
- str_date = str(new_date)[2:]
315
- wait = WebDriverWait(_driver, timeout=15) #
316
- elements = _driver.find_elements(
317
- By.XPATH, '//input[@placeholder="开始日期"]')
318
- # _driver.execute_script("arguments[0].click();", elements[0]) # 点击
319
-
320
- input_box = wait.until(
321
- EC.element_to_be_clickable(
322
- (By.XPATH, '//input[@placeholder="开始日期"]'))) #
323
-
324
- # from selenium.webdriver.common.keys import Keys
325
- for i in range(8):
326
- input_box.send_keys(Keys.BACKSPACE)
327
- input_box.send_keys(str_date)
328
- time.sleep(1)
329
- input_box = wait.until(
330
- EC.element_to_be_clickable(
331
- (By.XPATH, '//input[@placeholder="结束日期"]'))) # 文件名输入框
332
-
333
- for i in range(8):
334
- input_box.send_keys(Keys.BACKSPACE)
335
- input_box.send_keys(str_date)
336
- time.sleep(2)
337
- input_box.send_keys(Keys.ENTER)
338
- time.sleep(2)
339
- wait.until(EC.presence_of_element_located((By.XPATH, '//button/span[contains(text(), "查询")]')))
340
- elements = _driver.find_elements(
341
- By.XPATH, '//button/span[contains(text(), "查询")]')
342
- _driver.execute_script("arguments[0].click();", elements[0]) # 点击
343
- time.sleep(5)
344
- wait.until(EC.presence_of_element_located(
345
- (By.XPATH,
346
- '//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
347
-
348
- elements = _driver.find_elements(
349
- By.XPATH,
350
- '//div[@class="ak-page-list__table-empty" and contains(text(), "暂无数据")]')
351
- if elements:
352
- print(f'cookies 可能已过期,无法下载')
353
- _driver.quit()
217
+ if len(df) == 0:
218
+ self.logining()
219
+ self.save_token()
220
+ else:
221
+ # 仅保留最新日期的数据
222
+ idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
223
+ df = df.loc[idx][['token']]
224
+ if len(df) == 0:
225
+ print(f'从数据库获取的 token 不能为空')
354
226
  return
355
-
356
- elements = _driver.find_elements(
357
- By.XPATH,
358
- '//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')
359
- _driver.execute_script("arguments[0].click();", elements[0]) # 点击
360
- time.sleep(5)
361
- self.clean_data(date=new_date) # 每下载一个文件,需要立即清洗数据
362
- _driver.quit()
363
-
364
- def clean_data(self, date):
227
+ self.token = df.iloc[0, 0]
228
+
229
+ self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/{item_type}/list'
230
+ headers = {
231
+ 'headers': ua_sj.get_ua(),
232
+ 'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
233
+ 'content-type': 'application/json;charset=UTF-8',
234
+ 'origin': 'https://treasurebox.aikucun.com',
235
+ 'system': 'merchant',
236
+ 'token': self.token, # 从浏览器本地存储空间获取
237
+ }
238
+ num = 1
239
+ results = []
240
+ for date in date_list:
241
+ req_date = re.sub('-', '', date)
242
+ data = {
243
+ 'beginDate': req_date,
244
+ 'brandIds': [],
245
+ 'cropId': '',
246
+ 'cropName': '',
247
+ 'ctgryOneIds': [],
248
+ 'ctgryThreeIds': [],
249
+ 'ctgryTwoIds': [],
250
+ 'dimValue': '',
251
+ 'endDate': req_date,
252
+ 'merchantShopCode': '',
253
+ 'orderByName': 'dealGmv',
254
+ 'orderType': 'desc',
255
+ 'pageNum': page_num,
256
+ 'pageSize': page_size
257
+ }
258
+
259
+ res = requests.post(
260
+ url=self.url,
261
+ headers=headers,
262
+ # cookies=cookies,
263
+ data=json.dumps(data)
264
+ )
265
+ print(f'正在获取数据({num}/{len(date_list)}): {item_type}榜单 {date}')
266
+ # print(res.json())
267
+ if not res.json()['success']:
268
+ print('requests 请求不成功, success 返回值应为 True')
269
+ time.sleep(1)
270
+ continue
271
+ if not res.json()['data']['rows']:
272
+ print("请求获取的数据 ['data']['rows'] 不能为空")
273
+ time.sleep(1)
274
+ continue
275
+ results += [(date, res.json()['data']['rows'])]
276
+ num += 1
277
+ time.sleep(1)
278
+ if num % 32 == 0:
279
+ print("避免频繁请求, 正在休眠...")
280
+ # time.sleep(60)
281
+
282
+ return results
283
+
284
+ def insert_datas(self, data_list, db_name, table_name):
285
+ """数据清洗"""
286
+ if not data_list:
287
+ return
288
+ chanel_name = {
289
+ 'availableNum': '可售库存数',
290
+ 'availableSkuCnt': '在架sku数',
291
+ 'brandName': '品牌名',
292
+ 'ctgryOneName': '一级类目名称',
293
+ 'ctgryThreeName': '三级类目名称',
294
+ 'ctgryTwoName': '二级类目名称',
295
+ 'dealBuyerCnt': '支付人数_成交',
296
+ 'dealBuyerCntRate': '成交率_成交',
297
+ 'dealGmv': '成交gmv',
298
+ 'dealIdolCnt': '销售爱豆人数',
299
+ 'dealProductCnt': '销售量_成交',
300
+ 'dealProductCntRate': '售罄率',
301
+ 'dealSkuCnt': '成交sku数',
302
+ 'dealTwoCnt': '订单数_成交',
303
+ 'downSkuCnt': '可售sku数',
304
+ 'etlInsertTime': '数据更新时间',
305
+ 'forwardConfirmCnt': '转发爱豆人数',
306
+ 'forwardConfirmNum': '转发次数',
307
+ 'merStyleNo': '商品款号', # spu 榜单
308
+ 'styleNo': '商品货号', # sku 榜单
309
+ 'orderBuyerCnt': '支付人数_交易',
310
+ 'orderBuyerCntRate': '成交率_交易',
311
+ 'orderGmv': '下单gmv',
312
+ 'orderProductCnt': '销售量_交易',
313
+ 'orderSkuCnt': '下单sku数',
314
+ 'orderTwoCnt': '订单数_交易',
315
+ 'pictureUrl': '图片',
316
+ 'pvNum': '浏览量',
317
+ 'rn': '序号',
318
+ 'spuId': 'spuid',
319
+ 'spuName': '商品名称',
320
+ 'supplyAmount': '供货额',
321
+ 'supplyPerAmount': '供货价',
322
+ 'uvNum': '访客量',
323
+ 'colorName': '颜色',
324
+ 'sizeName': '尺码',
325
+ 'barCode': '条码', # sku榜单 款号 + 颜色编码
326
+ }
327
+ # 移除未翻译的列名
328
+ res_col = [item for item in chanel_name.keys() if chanel_name[item] == '']
329
+ for item in res_col:
330
+ del chanel_name[item]
331
+
332
+ _results = []
333
+ for item_ in data_list:
334
+ end_date, d_list = item_
335
+ for main_data_dict in d_list:
336
+ dict_data_before = {}
337
+ # 添加数据
338
+ dict_data_before.update({k: v for k, v in main_data_dict.items()})
339
+ # 初始化 dict_data
340
+ dict_data = {
341
+ '日期': end_date,
342
+ '平台': '爱库存',
343
+ '店铺名称': self.shop_name
344
+ }
345
+ for k, v in dict_data_before.items():
346
+ # 翻译键名
347
+ [dict_data.update({name_v: v}) for name_k, name_v in chanel_name.items() if k == name_k]
348
+ # 没有翻译的键值也要保留
349
+ not_in_rename = [item for item in dict_data_before.keys() if item not in chanel_name.keys()]
350
+ [dict_data.update({item: dict_data_before[item]}) for item in not_in_rename]
351
+ dict_data.update(
352
+ {
353
+ '更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
354
+ }
355
+ )
356
+ new_dict_data = {}
357
+ for k, v in dict_data.items():
358
+ if v and str(v).lower() != 'none' and str(v) != 'null':
359
+ new_dict_data.update({k: v})
360
+ else:
361
+ new_dict_data.update({k: 0})
362
+ _results.append(new_dict_data)
365
363
  set_typ = {
366
- '日期': 'date',
367
- '店铺名称': 'varchar(100)',
368
- 'spu_id': 'varchar(100)',
364
+ '可售库存数': 'INT',
365
+ '在架sku数': 'INT',
366
+ '品牌名': 'varchar(50)',
367
+ '一级类目名称': 'varchar(50)',
368
+ '三级类目名称': 'varchar(50)',
369
+ '二级类目名称': 'varchar(50)',
370
+ '支付人数_成交': 'INT',
371
+ '成交率_成交': 'decimal(10,4)',
372
+ '成交gmv': 'decimal(10,2)',
373
+ '销售爱豆人数': 'INT',
374
+ '销售量_成交': 'INT',
375
+ '售罄率': 'decimal(10,4)',
376
+ '成交sku数': 'INT',
377
+ '订单数_成交': 'INT',
378
+ '可售sku数': 'INT',
379
+ '数据更新时间': 'DATETIME',
380
+ '转发爱豆人数': 'INT',
381
+ '转发次数': 'INT',
382
+ '商品款号': 'varchar(50)',
383
+ '支付人数_交易': 'INT',
384
+ '成交率_交易': 'decimal(10,4)',
385
+ '下单gmv': 'decimal(10,2)',
386
+ '销售量_交易': 'INT',
387
+ '下单sku数': 'INT',
388
+ '订单数_交易': 'INT',
369
389
  '图片': 'varchar(255)',
370
- '序号': 'smallint',
371
- '商品名称': 'varchar(255)',
372
- '商品款号': 'varchar(255)',
373
- '一级类目名称': 'varchar(255)',
374
- '二级类目名称': 'varchar(255)',
375
- '三级类目名称': 'varchar(255)',
376
- '数据更新时间': 'timestamp',
377
- '更新时间': 'timestamp',
390
+ '浏览量': 'INT',
391
+ '序号': 'INT',
392
+ 'spuId': 'varchar(50)',
393
+ '商品名称': 'varchar(50)',
394
+ '供货额': 'decimal(10,2)',
395
+ '供货价': 'decimal(10,2)',
396
+ '访客量': 'INT',
397
+ '颜色': 'varchar(50)',
398
+ '尺码': 'varchar(50)',
399
+ '货号': 'varchar(50)', # 款号 + 颜色编码
378
400
  }
379
- for root, dirs, files in os.walk(upload_path, topdown=False):
380
- for name in files:
381
- if '~$' in name or 'DS_Store' in name:
382
- continue
383
- if name.endswith('csv'):
384
- pattern = re.findall('[\u4e00-\u9fff]+', name)
385
- if pattern:
386
- continue
387
- pattern = re.findall('^[0-9a-zA-Z_]{5,}-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+', name)
388
- if not pattern:
389
- continue
390
- df = pd.read_csv(os.path.join(root, name), encoding='gb2312', header=0, na_filter=False)
391
- if len(df) == 0:
392
- print(f'数据长度为 0 : {name}')
393
- os.remove(os.path.join(root, name))
394
- continue
395
- df.insert(loc=0, column='日期', value=date) # df中插入新列
396
- df.insert(loc=1, column='店铺名称', value='爱库存平台') # df中插入新列
397
- df.rename(columns={'spuId': 'spu_id'}, inplace=True)
398
- # df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
399
- # df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
400
- # print(df['数据更新时间'])
401
- # breakpoint()
402
- new_dict = {
403
- '日期': '',
404
- '店铺名称': '',
405
- '序号': '',
406
- '商品名称': '',
407
- 'spu_id': '',
408
- '商品款号': '',
409
- '一级类目名称': '',
410
- '二级类目名称': '',
411
- '三级类目名称': '',
412
- '访客量': '',
413
- '浏览量': '',
414
- '下单gmv': '',
415
- '成交gmv': '',
416
- '支付人数_成交': '',
417
- }
418
- _results = []
419
- for dict_data in df.to_dict(orient='records'):
420
- new_dict.update(dict_data)
421
- new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
422
- _results.append(new_dict)
423
- if _results:
424
- m_engine.insert_many_dict(
425
- db_name='爱库存2',
426
- table_name='商品spu榜单',
427
- dict_data_list=_results,
428
- icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
429
- unique_main_key=None,
430
- set_typ=set_typ,
431
- )
432
-
433
- new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
434
- df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
435
- os.remove(os.path.join(root, name))
436
-
437
-
438
- def akucun(headless=True, date_num=10):
439
- akc = AikuCun()
440
- akc.get_data(shop_name='aikucun', date_num=date_num, headless=headless) # 获取最近 N 天数据,0表示今天
441
-
442
-
443
- class AikuCunNew:
444
-
445
- def __init__(self, shop_name,):
446
- self.shop_name = shop_name
447
- self.today = datetime.date.today()
448
- self.headers = {'User-Agent': ua_sj.get_ua()}
449
- self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
450
- self.cookies = {}
451
- self.get_cookies() # 更新 self.cookies 的值
452
- self.support_path = set_support.SetSupport(dirname='support').dirname
453
- self.start_date = (self.today - datetime.timedelta(days=15)).strftime('%Y-%m-%d')
454
- self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
401
+ print(f'{self.shop_name} 正在更新数据库 {db_name} -> {table_name}...')
402
+ if 'spu' in table_name:
403
+ drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
404
+ else:
405
+ drop_dup = ['日期', '平台', '店铺名称', '条码']
406
+ m_engine.insert_many_dict(
407
+ db_name=db_name,
408
+ table_name=table_name,
409
+ dict_data_list=_results,
410
+ icm_update=drop_dup, # 唯一组合键
411
+ # unique_main_key=['人群id'],
412
+ set_typ=set_typ,
413
+ allow_not_null=False, # 创建允许插入空值的列
414
+ )
455
415
 
456
- def akc(self):
457
- """
458
- """
459
- start_date = self.start_date
460
- end_date = self.end_date
461
- url = 'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/list/down?'
462
- self.headers.update({'Referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604'})
463
- now = datetime.datetime.now()
464
- timestamp_ms = round(time.mktime(now.timetuple()) * 1000 + now.microsecond / 1000)
416
+ def get_sign(self):
417
+ sign = 'bbcf5b9cf3d3b8ba9c22550dcba8a3ce97be766f'
418
+ current_timestamp_ms = '1741396070777'
419
+ # current_timestamp_ms = int(round(time.time() * 1000))
420
+ self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/sku/list?time={current_timestamp_ms}&sign={sign}'
421
+ headers = {
422
+ 'headers': ua_sj.get_ua(),
423
+ 'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
424
+ 'content-type': 'application/json;charset=UTF-8',
425
+ 'origin': 'https://treasurebox.aikucun.com',
426
+ # 'system': 'merchant',
427
+ # 'token': self.token, # 从浏览器本地存储空间获取
428
+ }
465
429
  data = {
466
- 'time': timestamp_ms,
467
- 'sign': '2DA6A7580C859B374AE830CAD78BB84B'
430
+ 'beginDate': '20250307',
431
+ 'brandIds': [],
432
+ 'cropId': '',
433
+ 'cropName': '',
434
+ 'ctgryOneIds': [],
435
+ 'ctgryThreeIds': [],
436
+ 'ctgryTwoIds': [],
437
+ 'dimValue': '',
438
+ 'endDate': '20250307',
439
+ 'merchantShopCode': '',
440
+ 'orderByName': 'dealGmv',
441
+ 'orderType': 'desc',
442
+ 'pageNum': 1,
443
+ 'pageSize': 10
468
444
  }
469
445
  res = requests.post(
470
- url,
471
- headers=self.headers,
472
- cookies=self.cookies,
473
- params=data
446
+ url=self.url,
447
+ headers=headers,
448
+ data=json.dumps(data)
474
449
  )
475
- print(res.text)
476
-
450
+ print(res.json())
477
451
 
478
452
 
479
- def get_cookies(self):
480
- files = os.listdir(self.cookie_path)
481
- for file in files:
482
- if self.shop_name in file and '~' not in file:
483
- with open(os.path.join(self.cookie_path, file), 'r') as f:
484
- cookies_data = json.load(f)
453
+ def main(start_date, end_date, item_type=['spu']):
454
+ ak = AikuCun()
455
+ # ak.get_sign()
456
+ for type_ in item_type:
457
+ if type_ not in ['spu', 'sku']:
458
+ print(f'{item_type} 非法参数: {type_}')
459
+ continue
460
+ for i in range(2):
461
+ data_list = ak.get_data_from_bbx(
462
+ start_date=start_date,
463
+ end_date=end_date,
464
+ item_type=type_,
465
+ page_num=1,
466
+ page_size=300
467
+ )
468
+ if not data_list:
469
+ ak.logining()
470
+ ak.save_token()
471
+ else:
485
472
  break
486
- for data in cookies_data:
487
- self.cookies.update({data['name']: data['value']})
473
+
474
+ ak.insert_datas(
475
+ data_list=data_list,
476
+ db_name='爱库存2',
477
+ table_name=f'{type_}榜单'
478
+ )
488
479
 
489
480
 
490
- if __name__ == '__main__':
491
- # get_cookie_aikucun() # 登录并获取 cookies
492
- akucun(date_num=30, headless=True) # 下载数据
493
481
 
494
- # a = AikuCunNew(shop_name='aikucun')
495
- # a.akc()
482
+ if __name__ == '__main__':
483
+ main(
484
+ start_date='2025-03-06',
485
+ end_date='2025-03-06',
486
+ item_type=['spu', 'sku']
487
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.7.26
3
+ Version: 3.8.2
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -15,11 +15,11 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
15
15
  mdbq/log/spider_logging.py,sha256=59xe4Ckb7m-sBt3GYk8DC_hQg7-jnjBRq1o718r1Ry8,1676
16
16
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
17
17
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
18
- mdbq/mysql/mysql.py,sha256=UoZPVyrgDp5L8-i0jVptkal9G64oNrdhNwa-xpp8txo,95127
18
+ mdbq/mysql/mysql.py,sha256=tR6l4Zzn9j6zKaFcy0Ktw2oL8OoX3QB6jDoDp1l2fiM,95474
19
19
  mdbq/mysql/s_query.py,sha256=09Dp7DrVXui6dAI6zFDfrsUOdjPblF_oYUpgqbZMhXg,8757
20
20
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
21
21
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
22
- mdbq/other/download_sku_picture.py,sha256=-rfWRmmsNjj0AyPZHT_xAaIaoMqcCyeppU6j81XFQYw,44798
22
+ mdbq/other/download_sku_picture.py,sha256=xX3jU2Nu2R9CoveM7xevRstg1P40hvR1KL8wqmwt3O0,44809
23
23
  mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
24
24
  mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
25
25
  mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
@@ -30,8 +30,8 @@ mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,
30
30
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
31
31
  mdbq/redis/getredis.py,sha256=1pTga2iINx0NV2ffl0D-aspZhrZMDQR8SpohAv1acoo,24076
32
32
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
33
- mdbq/spider/aikucun.py,sha256=o_QwFWbD6O2F56k6bwnpVV55EcdFCyes05ON7iu9TrA,21882
34
- mdbq-3.7.26.dist-info/METADATA,sha256=GtQ4jfKxKnY58OT5fNcdrJ2GTb32LBKQu6qzJqlLowI,364
35
- mdbq-3.7.26.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
36
- mdbq-3.7.26.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
37
- mdbq-3.7.26.dist-info/RECORD,,
33
+ mdbq/spider/aikucun.py,sha256=GBZsCjsN3i1ZW9pAxeMAmb-y3yW3p3HJpjfrnnna5gg,19702
34
+ mdbq-3.8.2.dist-info/METADATA,sha256=pmdQf_CAgoh9rcPTtyIQwrifPpr_WtddcV1sBPhDdj8,363
35
+ mdbq-3.8.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
36
+ mdbq-3.8.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
37
+ mdbq-3.8.2.dist-info/RECORD,,
File without changes