mdbq 4.0.10__tar.gz → 4.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-4.0.10 → mdbq-4.0.12}/PKG-INFO +1 -1
- mdbq-4.0.12/mdbq/__version__.py +1 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/aggregation/query_data.py +8 -4
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/deduplicator.py +10 -3
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/s_query.py +15 -11
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/unique_.py +7 -3
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/uploader.py +9 -3
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/download_sku_picture.py +8 -5
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/spider/aikucun.py +60 -50
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/SOURCES.txt +0 -3
- mdbq-4.0.10/mdbq/__version__.py +0 -1
- mdbq-4.0.10/mdbq/config/config.py +0 -95
- mdbq-4.0.10/mdbq/log/spider_logging.py +0 -47
- mdbq-4.0.10/mdbq/other/__init__.py +0 -4
- {mdbq-4.0.10 → mdbq-4.0.12}/README.txt +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/log/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/log/mylogger.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.0.10/mdbq/config → mdbq-4.0.12/mdbq/other}/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/otk.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/redis/getredis.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/setup.cfg +0 -0
- {mdbq-4.0.10 → mdbq-4.0.12}/setup.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = '4.0.12'
|
@@ -3,7 +3,7 @@ import re
|
|
3
3
|
# from mdbq.mysql import mysql
|
4
4
|
from mdbq.mysql import uploader
|
5
5
|
from mdbq.mysql import s_query
|
6
|
-
from mdbq.
|
6
|
+
from mdbq.conf import conf
|
7
7
|
from mdbq.log import mylogger
|
8
8
|
import datetime
|
9
9
|
from dateutil.relativedelta import relativedelta
|
@@ -18,9 +18,12 @@ from collections.abc import Mapping, Sequence
|
|
18
18
|
import inspect
|
19
19
|
|
20
20
|
dir_path = os.path.expanduser("~")
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
parser = conf.ConfigParser()
|
22
|
+
host, port, username, password = parser.get_section_values(
|
23
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
24
|
+
section='mysql',
|
25
|
+
keys=['host', 'port', 'username', 'password'],
|
26
|
+
)
|
24
27
|
host = 'localhost'
|
25
28
|
uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
|
26
29
|
|
@@ -3676,6 +3679,7 @@ def query3(months=1, download_manager=None):
|
|
3676
3679
|
|
3677
3680
|
|
3678
3681
|
def main(months=3):
|
3682
|
+
logger.info('数据聚合任务开始')
|
3679
3683
|
# 1. 更新日期表 更新货品年份基准表, 属性设置 3 - 货品年份基准
|
3680
3684
|
date_table()
|
3681
3685
|
# 2. 数据聚合
|
@@ -6,6 +6,7 @@ import warnings
|
|
6
6
|
import pymysql
|
7
7
|
import os
|
8
8
|
from mdbq.log import mylogger
|
9
|
+
from mdbq.conf import conf
|
9
10
|
from typing import List, Dict, Optional, Any, Tuple
|
10
11
|
from dbutils.pooled_db import PooledDB
|
11
12
|
import threading
|
@@ -1348,10 +1349,14 @@ class MySQLDeduplicator:
|
|
1348
1349
|
|
1349
1350
|
|
1350
1351
|
def main():
|
1351
|
-
|
1352
|
+
logger.info('去重任务开始')
|
1352
1353
|
dir_path = os.path.expanduser("~")
|
1353
|
-
|
1354
|
-
|
1354
|
+
parser = conf.ConfigParser()
|
1355
|
+
host, port, username, password = parser.get_section_values(
|
1356
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
1357
|
+
section='mysql',
|
1358
|
+
keys=['host', 'port', 'username', 'password'],
|
1359
|
+
)
|
1355
1360
|
# host = 'localhost'
|
1356
1361
|
|
1357
1362
|
deduplicator = MySQLDeduplicator(
|
@@ -1401,6 +1406,8 @@ def main():
|
|
1401
1406
|
|
1402
1407
|
# 关闭连接
|
1403
1408
|
deduplicator.close()
|
1409
|
+
logger.info('去重任务结束')
|
1410
|
+
|
1404
1411
|
|
1405
1412
|
if __name__ == '__main__':
|
1406
1413
|
main()
|
@@ -7,7 +7,7 @@ from decimal import Decimal
|
|
7
7
|
from contextlib import closing
|
8
8
|
from mdbq.log import mylogger
|
9
9
|
import os
|
10
|
-
from mdbq.
|
10
|
+
from mdbq.conf import conf
|
11
11
|
from typing import Optional, Dict, List, Set, Tuple, Union, Any, Literal
|
12
12
|
from dbutils.pooled_db import PooledDB
|
13
13
|
import time
|
@@ -35,7 +35,7 @@ class QueryDatas:
|
|
35
35
|
"""
|
36
36
|
|
37
37
|
def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4',
|
38
|
-
|
38
|
+
pool_size: int = 20, mincached: int = 2, maxcached: int = 5,
|
39
39
|
connect_timeout: int = 10, read_timeout: int = 30, write_timeout: int = 30,
|
40
40
|
max_retries: int = 3, retry_waiting_time: int = 5, collation: str = 'utf8mb4_0900_ai_ci') -> None:
|
41
41
|
"""
|
@@ -47,7 +47,7 @@ class QueryDatas:
|
|
47
47
|
host: 数据库主机
|
48
48
|
port: 数据库端口
|
49
49
|
charset: 字符集,默认utf8mb4
|
50
|
-
|
50
|
+
pool_size: 最大活动连接数,默认20
|
51
51
|
mincached: 最小缓存连接数,空闲连接数量,默认2
|
52
52
|
maxcached: 最大缓存连接数,最大空闲连接数,默认5
|
53
53
|
connect_timeout: 连接超时时间,默认10秒
|
@@ -87,14 +87,14 @@ class QueryDatas:
|
|
87
87
|
'write_timeout': write_timeout,
|
88
88
|
'autocommit': True
|
89
89
|
}
|
90
|
-
self.pool = self._create_connection_pool(
|
90
|
+
self.pool = self._create_connection_pool(pool_size, mincached, maxcached)
|
91
91
|
|
92
|
-
def _create_connection_pool(self,
|
92
|
+
def _create_connection_pool(self, pool_size: int, mincached: int, maxcached: int) -> PooledDB:
|
93
93
|
"""
|
94
94
|
创建数据库连接池
|
95
95
|
|
96
96
|
Args:
|
97
|
-
|
97
|
+
pool_size: 最大连接数
|
98
98
|
mincached: 最小缓存连接数
|
99
99
|
maxcached: 最大缓存连接数
|
100
100
|
|
@@ -122,7 +122,7 @@ class QueryDatas:
|
|
122
122
|
}
|
123
123
|
pool_params = {
|
124
124
|
'creator': pymysql,
|
125
|
-
'maxconnections':
|
125
|
+
'maxconnections': pool_size,
|
126
126
|
'mincached': mincached,
|
127
127
|
'maxcached': maxcached,
|
128
128
|
'blocking': True,
|
@@ -133,7 +133,7 @@ class QueryDatas:
|
|
133
133
|
try:
|
134
134
|
pool = PooledDB(**pool_params, **connection_params)
|
135
135
|
logger.debug('连接池创建成功', {
|
136
|
-
'连接池大小':
|
136
|
+
'连接池大小': pool_size,
|
137
137
|
'最小缓存': mincached,
|
138
138
|
'最大缓存': maxcached,
|
139
139
|
'主机': self.host,
|
@@ -717,7 +717,7 @@ class QueryDatas:
|
|
717
717
|
if hasattr(self, 'pool') and self.pool is not None:
|
718
718
|
try:
|
719
719
|
self.pool.close()
|
720
|
-
logger.
|
720
|
+
logger.debug('连接池已关闭', {
|
721
721
|
'主机': self.host,
|
722
722
|
'端口': self.port
|
723
723
|
})
|
@@ -949,8 +949,12 @@ class QueryDatas:
|
|
949
949
|
|
950
950
|
def main():
|
951
951
|
dir_path = os.path.expanduser("~")
|
952
|
-
|
953
|
-
|
952
|
+
parser = conf.ConfigParser()
|
953
|
+
host, port, username, password = parser.get_section_values(
|
954
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
955
|
+
section='mysql',
|
956
|
+
keys=['host', 'port', 'username', 'password'],
|
957
|
+
)
|
954
958
|
host = 'localhost'
|
955
959
|
|
956
960
|
qd = QueryDatas(username=username, password=password, host=host, port=port)
|
@@ -2,7 +2,7 @@ import re
|
|
2
2
|
import pymysql
|
3
3
|
from typing import List, Dict, Any, Tuple
|
4
4
|
from mdbq.log import mylogger
|
5
|
-
from mdbq.
|
5
|
+
from mdbq.conf import conf
|
6
6
|
from dbutils.pooled_db import PooledDB
|
7
7
|
import os
|
8
8
|
|
@@ -274,8 +274,12 @@ class UniqueManager:
|
|
274
274
|
|
275
275
|
def main():
|
276
276
|
dir_path = os.path.expanduser("~")
|
277
|
-
|
278
|
-
|
277
|
+
parser = conf.ConfigParser()
|
278
|
+
host, port, username, password = parser.get_section_values(
|
279
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
280
|
+
section='mysql',
|
281
|
+
keys=['host', 'port', 'username', 'password'],
|
282
|
+
)
|
279
283
|
# host = 'localhost'
|
280
284
|
|
281
285
|
my_databases = [
|
@@ -8,7 +8,7 @@ import pymysql
|
|
8
8
|
import pandas as pd
|
9
9
|
import os
|
10
10
|
from mdbq.log import mylogger
|
11
|
-
from mdbq.
|
11
|
+
from mdbq.conf import conf
|
12
12
|
from typing import Union, List, Dict, Optional, Any, Tuple, Set
|
13
13
|
from dbutils.pooled_db import PooledDB
|
14
14
|
import json
|
@@ -539,6 +539,8 @@ class MySQLUploader:
|
|
539
539
|
is_nan = True
|
540
540
|
elif str(value).lower() in ['nan', 'none']:
|
541
541
|
is_nan = True
|
542
|
+
elif value == '':
|
543
|
+
is_nan = True
|
542
544
|
if is_nan:
|
543
545
|
if not allow_null:
|
544
546
|
if 'int' in column_type_lower:
|
@@ -1735,8 +1737,12 @@ class MySQLUploader:
|
|
1735
1737
|
|
1736
1738
|
def main():
|
1737
1739
|
dir_path = os.path.expanduser("~")
|
1738
|
-
|
1739
|
-
|
1740
|
+
parser = conf.ConfigParser()
|
1741
|
+
host, port, username, password = parser.get_section_values(
|
1742
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
1743
|
+
section='mysql',
|
1744
|
+
keys=['host', 'port', 'username', 'password'],
|
1745
|
+
)
|
1740
1746
|
host = 'localhost'
|
1741
1747
|
|
1742
1748
|
uploader = MySQLUploader(
|
@@ -17,8 +17,7 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
17
17
|
from selenium.webdriver.common.by import By
|
18
18
|
from selenium.webdriver.support import expected_conditions as EC
|
19
19
|
from selenium.webdriver.chrome.service import Service
|
20
|
-
from mdbq.
|
21
|
-
from mdbq.config import config
|
20
|
+
from mdbq.conf import conf
|
22
21
|
from mdbq.mysql import mysql
|
23
22
|
from mdbq.mysql import s_query
|
24
23
|
from mdbq.other import ua_sj
|
@@ -49,8 +48,12 @@ if not os.path.exists(upload_path): # 数据中心根目录
|
|
49
48
|
|
50
49
|
dir_path = os.path.expanduser("~")
|
51
50
|
config_file = os.path.join(dir_path, 'spd.txt')
|
52
|
-
|
53
|
-
|
51
|
+
parser = conf.ConfigParser()
|
52
|
+
host, port, username, password = parser.get_section_values(
|
53
|
+
file_path=config_file,
|
54
|
+
section='mysql',
|
55
|
+
keys=['host', 'port', 'username', 'password'],
|
56
|
+
)
|
54
57
|
m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
|
55
58
|
|
56
59
|
if not username:
|
@@ -62,7 +65,7 @@ class LoadAccount:
|
|
62
65
|
|
63
66
|
def __init__(self):
|
64
67
|
self.url = 'https://login.taobao.com/' # 默认登录淘宝
|
65
|
-
self.cookie_path =
|
68
|
+
self.cookie_path = None
|
66
69
|
|
67
70
|
def __call__(self, *args, **kwargs):
|
68
71
|
self.check_cookie() # 检测cookie有效期, 但不阻断任务
|
@@ -15,19 +15,20 @@ from selenium.webdriver.chrome.service import Service
|
|
15
15
|
import pymysql
|
16
16
|
from mdbq.mysql import uploader
|
17
17
|
from mdbq.mysql import s_query
|
18
|
-
from mdbq.
|
18
|
+
from mdbq.conf import conf
|
19
19
|
from mdbq.other import ua_sj
|
20
20
|
from mdbq.other import otk
|
21
21
|
from mdbq.log import mylogger
|
22
22
|
|
23
23
|
dir_path = os.path.expanduser("~")
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
parser = conf.ConfigParser()
|
25
|
+
host, port, username, password = parser.get_section_values(
|
26
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
27
|
+
section='mysql',
|
28
|
+
keys=['host', 'port', 'username', 'password'],
|
29
|
+
)
|
27
30
|
|
28
|
-
uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
|
29
31
|
# 实例化一个数据查询类,用来获取 cookies 表数据
|
30
|
-
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
31
32
|
logger = mylogger.MyLogger(
|
32
33
|
logging_mode='file',
|
33
34
|
log_level='info',
|
@@ -48,15 +49,15 @@ def keep_connect(_db_name, _config, max_try: int=10):
|
|
48
49
|
connection = pymysql.connect(**_config) # 连接数据库
|
49
50
|
return connection
|
50
51
|
except Exception as e:
|
51
|
-
logger.error(
|
52
|
+
logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try, '错误信息': e})
|
52
53
|
attempts += 1
|
53
54
|
time.sleep(30)
|
54
|
-
logger.error(
|
55
|
+
logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try})
|
55
56
|
return None
|
56
57
|
|
57
58
|
|
58
59
|
class AikuCun:
|
59
|
-
def __init__(self):
|
60
|
+
def __init__(self, uld_manager, download_manager):
|
60
61
|
self.url = 'https://gray-merc.aikucun.com/index.html'
|
61
62
|
self.db_name = 'cookie文件'
|
62
63
|
self.table_name = 'main_aikucun'
|
@@ -66,6 +67,8 @@ class AikuCun:
|
|
66
67
|
self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
|
67
68
|
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
68
69
|
self.error_count = 0
|
70
|
+
self.uld = uld_manager
|
71
|
+
self.download = download_manager
|
69
72
|
|
70
73
|
def logining(self, shop_name='aikucun', headless=False):
|
71
74
|
option = webdriver.ChromeOptions()
|
@@ -171,7 +174,7 @@ class AikuCun:
|
|
171
174
|
|
172
175
|
def save_token(self):
|
173
176
|
if not self.token:
|
174
|
-
|
177
|
+
logger.error('self.token 不能为空')
|
175
178
|
return
|
176
179
|
set_typ = {
|
177
180
|
'日期': 'DATE',
|
@@ -182,11 +185,11 @@ class AikuCun:
|
|
182
185
|
'更新时间': 'timestamp'
|
183
186
|
}
|
184
187
|
# 更新至数据库记录
|
185
|
-
uld.upload_data(
|
188
|
+
self.uld.upload_data(
|
186
189
|
db_name=self.db_name,
|
187
190
|
table_name=self.table_name,
|
188
191
|
data=self.token,
|
189
|
-
set_typ=
|
192
|
+
set_typ=set_typ,
|
190
193
|
primary_keys=[],
|
191
194
|
check_duplicate=False,
|
192
195
|
update_on_duplicate=False,
|
@@ -209,7 +212,7 @@ class AikuCun:
|
|
209
212
|
self.end_date = end_date
|
210
213
|
date_list = otk.dates_between(start_date=self.start_date, end_date=self.end_date)
|
211
214
|
|
212
|
-
df = download.data_to_df(
|
215
|
+
df = self.download.data_to_df(
|
213
216
|
db_name=self.db_name,
|
214
217
|
table_name=self.table_name,
|
215
218
|
start_date='2025-03-07',
|
@@ -230,7 +233,7 @@ class AikuCun:
|
|
230
233
|
idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
|
231
234
|
df = df.loc[idx][['token']]
|
232
235
|
if len(df) == 0:
|
233
|
-
|
236
|
+
logger.error(f'从数据库获取的 token 不能为空')
|
234
237
|
return
|
235
238
|
self.token = df.iloc[0, 0]
|
236
239
|
|
@@ -247,7 +250,7 @@ class AikuCun:
|
|
247
250
|
results = []
|
248
251
|
for date in date_list:
|
249
252
|
if self.error_count > 5:
|
250
|
-
|
253
|
+
logger.logger('已退出请求 -> self.error_count > 5')
|
251
254
|
break
|
252
255
|
req_date = re.sub('-', '', date)
|
253
256
|
data = {
|
@@ -273,16 +276,15 @@ class AikuCun:
|
|
273
276
|
# cookies=cookies,
|
274
277
|
data=json.dumps(data)
|
275
278
|
)
|
276
|
-
|
277
|
-
# print(res.json())
|
279
|
+
logger.info('获取数据', {'进度': num/len(date_list), '日期': date, '榜单类型': item_type})
|
278
280
|
if not res.json().get('success', None):
|
279
|
-
|
281
|
+
logger.error('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
|
280
282
|
num += 1
|
281
283
|
self.error_count += 1
|
282
284
|
time.sleep(1)
|
283
285
|
continue
|
284
286
|
if not res.json().get('data', {}).get('rows', None):
|
285
|
-
|
287
|
+
logger.error("返回的数据字典异常, ['data']['rows'] 不能为空")
|
286
288
|
num += 1
|
287
289
|
self.error_count += 1
|
288
290
|
time.sleep(1)
|
@@ -291,7 +293,7 @@ class AikuCun:
|
|
291
293
|
num += 1
|
292
294
|
time.sleep(1)
|
293
295
|
if num % 32 == 0:
|
294
|
-
|
296
|
+
logger.info("避免频繁请求, 正在休眠...")
|
295
297
|
# time.sleep(60)
|
296
298
|
|
297
299
|
return results
|
@@ -413,18 +415,18 @@ class AikuCun:
|
|
413
415
|
'尺码': 'varchar(50)',
|
414
416
|
'货号': 'varchar(50)', # 款号 + 颜色编码
|
415
417
|
}
|
416
|
-
|
418
|
+
logger.info('更新数据库', {'店铺名称': self.shop_name, '库': db_name, '表': table_name})
|
417
419
|
if 'spu' in table_name:
|
418
420
|
drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
|
419
421
|
else:
|
420
422
|
drop_dup = ['日期', '平台', '店铺名称', '条码']
|
421
|
-
uld.upload_data(
|
423
|
+
self.uld.upload_data(
|
422
424
|
db_name=db_name,
|
423
425
|
table_name=table_name,
|
424
426
|
data=_results,
|
425
427
|
set_typ=set_typ, # 定义列和数据类型
|
426
428
|
primary_keys=[], # 创建唯一主键
|
427
|
-
check_duplicate=
|
429
|
+
check_duplicate=False, # 检查重复数据
|
428
430
|
update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
|
429
431
|
duplicate_columns=drop_dup, # 指定排重的组合键
|
430
432
|
allow_null=False, # 允许插入空值
|
@@ -470,36 +472,44 @@ class AikuCun:
|
|
470
472
|
headers=headers,
|
471
473
|
data=json.dumps(data)
|
472
474
|
)
|
473
|
-
print(res.json())
|
474
475
|
|
475
476
|
|
476
477
|
def main(start_date, end_date=None, item_type=['spu']):
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
478
|
+
db_config = {
|
479
|
+
'username': username,
|
480
|
+
'password': password,
|
481
|
+
'host': host,
|
482
|
+
'port': int(port),
|
483
|
+
'pool_size': 3
|
484
|
+
}
|
485
|
+
with uploader.MySQLUploader(**db_config) as uld:
|
486
|
+
with s_query.QueryDatas(**db_config) as download:
|
487
|
+
ak = AikuCun(uld_manager=uld, download_manager=download)
|
488
|
+
# ak.get_sign()
|
489
|
+
for type_ in item_type:
|
490
|
+
if type_ not in ['spu', 'sku']:
|
491
|
+
logger.error(f'{item_type} 非法参数: {type_}')
|
492
|
+
continue
|
493
|
+
for i in range(2):
|
494
|
+
data_list = ak.get_data_from_bbx(
|
495
|
+
start_date=start_date,
|
496
|
+
end_date=end_date,
|
497
|
+
item_type=type_,
|
498
|
+
page_num=1,
|
499
|
+
page_size=300
|
500
|
+
)
|
501
|
+
if not data_list:
|
502
|
+
ak.logining()
|
503
|
+
ak.save_token()
|
504
|
+
ak.error_count = 0 # 重置错误计数器
|
505
|
+
else:
|
506
|
+
break
|
497
507
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
508
|
+
ak.insert_datas(
|
509
|
+
data_list=data_list,
|
510
|
+
db_name='爱库存2',
|
511
|
+
table_name=f'{type_}榜单'
|
512
|
+
)
|
503
513
|
|
504
514
|
|
505
515
|
|
@@ -508,7 +518,7 @@ if __name__ == '__main__':
|
|
508
518
|
start_date='2025-05-13',
|
509
519
|
# end_date='2025-04-28', # 不传则默认到今天
|
510
520
|
item_type=[
|
511
|
-
|
521
|
+
'spu',
|
512
522
|
'sku'
|
513
523
|
]
|
514
524
|
)
|
@@ -8,11 +8,8 @@ mdbq.egg-info/dependency_links.txt
|
|
8
8
|
mdbq.egg-info/top_level.txt
|
9
9
|
mdbq/aggregation/__init__.py
|
10
10
|
mdbq/aggregation/query_data.py
|
11
|
-
mdbq/config/__init__.py
|
12
|
-
mdbq/config/config.py
|
13
11
|
mdbq/log/__init__.py
|
14
12
|
mdbq/log/mylogger.py
|
15
|
-
mdbq/log/spider_logging.py
|
16
13
|
mdbq/mysql/__init__.py
|
17
14
|
mdbq/mysql/deduplicator.py
|
18
15
|
mdbq/mysql/mysql.py
|
mdbq-4.0.10/mdbq/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
VERSION = '4.0.10'
|
@@ -1,95 +0,0 @@
|
|
1
|
-
import os.path
|
2
|
-
import re
|
3
|
-
|
4
|
-
|
5
|
-
def read_config(file_path):
|
6
|
-
"""读取配置文件,返回字典"""
|
7
|
-
if not os.path.isfile(file_path):
|
8
|
-
print(f'配置文件不存在: {file_path}')
|
9
|
-
return
|
10
|
-
config = {}
|
11
|
-
with open(file_path, 'r', encoding='utf-8') as file:
|
12
|
-
for line in file:
|
13
|
-
stripped_line = line.strip()
|
14
|
-
# 跳过空行和注释行(以 # 或 // 开头)
|
15
|
-
if not stripped_line or stripped_line.startswith(('#', '//')):
|
16
|
-
continue
|
17
|
-
# 处理行内注释(# 或 // 前有空格)
|
18
|
-
comment_match = re.search(r'\s+[#//]', line)
|
19
|
-
if comment_match:
|
20
|
-
line = line[:comment_match.start()].strip()
|
21
|
-
else:
|
22
|
-
line = line.strip()
|
23
|
-
# 解析键值对
|
24
|
-
if '=' in line:
|
25
|
-
key, value = line.split('=', 1)
|
26
|
-
config[key.strip()] = value.strip()
|
27
|
-
return config
|
28
|
-
|
29
|
-
|
30
|
-
def write_config(file_path, rewrite):
|
31
|
-
"""
|
32
|
-
更新配置文件中的键值对,保留注释和其他内容,修复等号空格问题
|
33
|
-
示例:
|
34
|
-
write_config('spd.txt', {'is_spider': True})
|
35
|
-
"""
|
36
|
-
# 读取所有行到内存
|
37
|
-
try:
|
38
|
-
with open(file_path, 'r', encoding='utf-8') as file:
|
39
|
-
lines = file.readlines()
|
40
|
-
except FileNotFoundError:
|
41
|
-
with open(file_path, 'w', encoding='utf-8') as file:
|
42
|
-
lines = []
|
43
|
-
|
44
|
-
new_lines = []
|
45
|
-
found_keys = set()
|
46
|
-
|
47
|
-
for line in lines:
|
48
|
-
stripped = line.strip()
|
49
|
-
if not stripped or stripped.startswith(('#', '//')):
|
50
|
-
new_lines.append(line)
|
51
|
-
continue
|
52
|
-
|
53
|
-
# 使用 partition 保留等号格式
|
54
|
-
key_part, sep, value_part = line.partition('=')
|
55
|
-
if not sep: # 没有等号的行直接保留
|
56
|
-
new_lines.append(line)
|
57
|
-
continue
|
58
|
-
|
59
|
-
key = key_part.strip()
|
60
|
-
if key in rewrite:
|
61
|
-
# 处理值部分和注释
|
62
|
-
comment_match = re.search(r'\s+([#//].*)$', value_part)
|
63
|
-
if comment_match:
|
64
|
-
comment = comment_match.group(0)
|
65
|
-
raw_value = value_part[:comment_match.start()].rstrip()
|
66
|
-
else:
|
67
|
-
comment = ''
|
68
|
-
raw_value = value_part.strip()
|
69
|
-
|
70
|
-
# 保留原值前导空格
|
71
|
-
leading_space = re.match(r'^(\s*)', value_part).group(1)
|
72
|
-
new_value = f"{leading_space}{rewrite[key]}{comment}"
|
73
|
-
|
74
|
-
# 构建新行(保留原等号格式)
|
75
|
-
new_line = f"{key_part}{sep}{new_value}\n"
|
76
|
-
new_lines.append(new_line)
|
77
|
-
found_keys.add(key)
|
78
|
-
else:
|
79
|
-
new_lines.append(line)
|
80
|
-
|
81
|
-
# 添加新键值对
|
82
|
-
for key in rewrite:
|
83
|
-
if key not in found_keys:
|
84
|
-
new_lines.append(f"{key} = {rewrite[key]}\n")
|
85
|
-
|
86
|
-
# 写入文件
|
87
|
-
with open(file_path, 'w', encoding='utf-8') as file:
|
88
|
-
file.writelines(new_lines)
|
89
|
-
|
90
|
-
|
91
|
-
if __name__ == '__main__':
|
92
|
-
res = read_config('/Users/xigua/数据中心2/spider/spd.txt')
|
93
|
-
print(res)
|
94
|
-
# write_config('spd.txt', {'is_spider': False})
|
95
|
-
|
@@ -1,47 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from logging.handlers import RotatingFileHandler
|
3
|
-
import platform
|
4
|
-
import os
|
5
|
-
import sys
|
6
|
-
import getpass
|
7
|
-
|
8
|
-
|
9
|
-
def setup_logging(reMoveOldHandler=True, filename='spider_tg.log'):
|
10
|
-
"""
|
11
|
-
reMoveOldHandler: 替换根日志记录器的所有现有处理器
|
12
|
-
"""
|
13
|
-
dir_path = os.path.expanduser("~")
|
14
|
-
if not os.path.isdir(os.path.join(dir_path, 'logfile')):
|
15
|
-
os.makedirs(os.path.join(dir_path, 'logfile'))
|
16
|
-
|
17
|
-
log_file = os.path.join(dir_path, 'logfile', filename)
|
18
|
-
file_handler = RotatingFileHandler(
|
19
|
-
filename=log_file,
|
20
|
-
maxBytes=3*1024*1024, # 3MB
|
21
|
-
backupCount=10,
|
22
|
-
encoding='utf-8' # 明确指定编码(避免Windows乱码)
|
23
|
-
)
|
24
|
-
stream_handler = logging.StreamHandler() # 终端输出Handler
|
25
|
-
formatter = logging.Formatter(
|
26
|
-
fmt='[%(asctime)s] %(levelname)s %(message)s',
|
27
|
-
datefmt='%Y-%m-%d %H:%M:%S'
|
28
|
-
)
|
29
|
-
file_handler.setFormatter(formatter)
|
30
|
-
stream_handler.setFormatter(formatter) # 终端使用相同格式
|
31
|
-
file_handler.setLevel(logging.INFO)
|
32
|
-
stream_handler.setLevel(logging.INFO)
|
33
|
-
|
34
|
-
# 获取根日志记录器并添加Handler
|
35
|
-
logger = logging.getLogger()
|
36
|
-
if reMoveOldHandler:
|
37
|
-
# 移除根日志记录器的所有现有处理器
|
38
|
-
for handler in logger.handlers[:]: # 使用[:]来创建handlers列表的一个副本,因为我们在迭代时修改列表
|
39
|
-
logger.removeHandler(handler)
|
40
|
-
logger.addHandler(file_handler)
|
41
|
-
logger.addHandler(stream_handler)
|
42
|
-
logger.setLevel(logging.INFO) # 设置根日志级别
|
43
|
-
return logger
|
44
|
-
|
45
|
-
|
46
|
-
if __name__ == '__main__':
|
47
|
-
pass
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|