mdbq 4.0.10__tar.gz → 4.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {mdbq-4.0.10 → mdbq-4.0.12}/PKG-INFO +1 -1
  2. mdbq-4.0.12/mdbq/__version__.py +1 -0
  3. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/aggregation/query_data.py +8 -4
  4. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/deduplicator.py +10 -3
  5. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/s_query.py +15 -11
  6. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/unique_.py +7 -3
  7. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/uploader.py +9 -3
  8. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/download_sku_picture.py +8 -5
  9. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/spider/aikucun.py +60 -50
  10. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/PKG-INFO +1 -1
  11. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/SOURCES.txt +0 -3
  12. mdbq-4.0.10/mdbq/__version__.py +0 -1
  13. mdbq-4.0.10/mdbq/config/config.py +0 -95
  14. mdbq-4.0.10/mdbq/log/spider_logging.py +0 -47
  15. mdbq-4.0.10/mdbq/other/__init__.py +0 -4
  16. {mdbq-4.0.10 → mdbq-4.0.12}/README.txt +0 -0
  17. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/__init__.py +0 -0
  18. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/aggregation/__init__.py +0 -0
  19. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/log/__init__.py +0 -0
  20. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/log/mylogger.py +0 -0
  21. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/__init__.py +0 -0
  22. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/mysql/mysql.py +0 -0
  23. {mdbq-4.0.10/mdbq/config → mdbq-4.0.12/mdbq/other}/__init__.py +0 -0
  24. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/otk.py +0 -0
  25. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/pov_city.py +0 -0
  26. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/other/ua_sj.py +0 -0
  27. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/pbix/__init__.py +0 -0
  28. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/pbix/pbix_refresh.py +0 -0
  29. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/pbix/refresh_all.py +0 -0
  30. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/redis/__init__.py +0 -0
  31. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/redis/getredis.py +0 -0
  32. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq/spider/__init__.py +0 -0
  33. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/dependency_links.txt +0 -0
  34. {mdbq-4.0.10 → mdbq-4.0.12}/mdbq.egg-info/top_level.txt +0 -0
  35. {mdbq-4.0.10 → mdbq-4.0.12}/setup.cfg +0 -0
  36. {mdbq-4.0.10 → mdbq-4.0.12}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.10
3
+ Version: 4.0.12
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.0.12'
@@ -3,7 +3,7 @@ import re
3
3
  # from mdbq.mysql import mysql
4
4
  from mdbq.mysql import uploader
5
5
  from mdbq.mysql import s_query
6
- from mdbq.config import config
6
+ from mdbq.conf import conf
7
7
  from mdbq.log import mylogger
8
8
  import datetime
9
9
  from dateutil.relativedelta import relativedelta
@@ -18,9 +18,12 @@ from collections.abc import Mapping, Sequence
18
18
  import inspect
19
19
 
20
20
  dir_path = os.path.expanduser("~")
21
- config_file = os.path.join(dir_path, 'spd.txt')
22
- content = config.read_config(file_path=config_file)
23
- username, password, host, port = content['username'], content['password'], content['host'], content['port']
21
+ parser = conf.ConfigParser()
22
+ host, port, username, password = parser.get_section_values(
23
+ file_path=os.path.join(dir_path, 'spd.txt'),
24
+ section='mysql',
25
+ keys=['host', 'port', 'username', 'password'],
26
+ )
24
27
  host = 'localhost'
25
28
  uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
26
29
 
@@ -3676,6 +3679,7 @@ def query3(months=1, download_manager=None):
3676
3679
 
3677
3680
 
3678
3681
  def main(months=3):
3682
+ logger.info('数据聚合任务开始')
3679
3683
  # 1. 更新日期表 更新货品年份基准表, 属性设置 3 - 货品年份基准
3680
3684
  date_table()
3681
3685
  # 2. 数据聚合
@@ -6,6 +6,7 @@ import warnings
6
6
  import pymysql
7
7
  import os
8
8
  from mdbq.log import mylogger
9
+ from mdbq.conf import conf
9
10
  from typing import List, Dict, Optional, Any, Tuple
10
11
  from dbutils.pooled_db import PooledDB
11
12
  import threading
@@ -1348,10 +1349,14 @@ class MySQLDeduplicator:
1348
1349
 
1349
1350
 
1350
1351
  def main():
1351
- from mdbq.config import config
1352
+ logger.info('去重任务开始')
1352
1353
  dir_path = os.path.expanduser("~")
1353
- my_cont = config.read_config(file_path=os.path.join(dir_path, 'spd.txt'))
1354
- username, password, host, port = my_cont['username'], my_cont['password'], my_cont['host'], int(my_cont['port'])
1354
+ parser = conf.ConfigParser()
1355
+ host, port, username, password = parser.get_section_values(
1356
+ file_path=os.path.join(dir_path, 'spd.txt'),
1357
+ section='mysql',
1358
+ keys=['host', 'port', 'username', 'password'],
1359
+ )
1355
1360
  # host = 'localhost'
1356
1361
 
1357
1362
  deduplicator = MySQLDeduplicator(
@@ -1401,6 +1406,8 @@ def main():
1401
1406
 
1402
1407
  # 关闭连接
1403
1408
  deduplicator.close()
1409
+ logger.info('去重任务结束')
1410
+
1404
1411
 
1405
1412
  if __name__ == '__main__':
1406
1413
  main()
@@ -7,7 +7,7 @@ from decimal import Decimal
7
7
  from contextlib import closing
8
8
  from mdbq.log import mylogger
9
9
  import os
10
- from mdbq.config import config
10
+ from mdbq.conf import conf
11
11
  from typing import Optional, Dict, List, Set, Tuple, Union, Any, Literal
12
12
  from dbutils.pooled_db import PooledDB
13
13
  import time
@@ -35,7 +35,7 @@ class QueryDatas:
35
35
  """
36
36
 
37
37
  def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4',
38
- maxconnections: int = 20, mincached: int = 2, maxcached: int = 5,
38
+ pool_size: int = 20, mincached: int = 2, maxcached: int = 5,
39
39
  connect_timeout: int = 10, read_timeout: int = 30, write_timeout: int = 30,
40
40
  max_retries: int = 3, retry_waiting_time: int = 5, collation: str = 'utf8mb4_0900_ai_ci') -> None:
41
41
  """
@@ -47,7 +47,7 @@ class QueryDatas:
47
47
  host: 数据库主机
48
48
  port: 数据库端口
49
49
  charset: 字符集,默认utf8mb4
50
- maxconnections: 最大活动连接数,默认20
50
+ pool_size: 最大活动连接数,默认20
51
51
  mincached: 最小缓存连接数,空闲连接数量,默认2
52
52
  maxcached: 最大缓存连接数,最大空闲连接数,默认5
53
53
  connect_timeout: 连接超时时间,默认10秒
@@ -87,14 +87,14 @@ class QueryDatas:
87
87
  'write_timeout': write_timeout,
88
88
  'autocommit': True
89
89
  }
90
- self.pool = self._create_connection_pool(maxconnections, mincached, maxcached)
90
+ self.pool = self._create_connection_pool(pool_size, mincached, maxcached)
91
91
 
92
- def _create_connection_pool(self, maxconnections: int, mincached: int, maxcached: int) -> PooledDB:
92
+ def _create_connection_pool(self, pool_size: int, mincached: int, maxcached: int) -> PooledDB:
93
93
  """
94
94
  创建数据库连接池
95
95
 
96
96
  Args:
97
- maxconnections: 最大连接数
97
+ pool_size: 最大连接数
98
98
  mincached: 最小缓存连接数
99
99
  maxcached: 最大缓存连接数
100
100
 
@@ -122,7 +122,7 @@ class QueryDatas:
122
122
  }
123
123
  pool_params = {
124
124
  'creator': pymysql,
125
- 'maxconnections': maxconnections,
125
+ 'maxconnections': pool_size,
126
126
  'mincached': mincached,
127
127
  'maxcached': maxcached,
128
128
  'blocking': True,
@@ -133,7 +133,7 @@ class QueryDatas:
133
133
  try:
134
134
  pool = PooledDB(**pool_params, **connection_params)
135
135
  logger.debug('连接池创建成功', {
136
- '连接池大小': maxconnections,
136
+ '连接池大小': pool_size,
137
137
  '最小缓存': mincached,
138
138
  '最大缓存': maxcached,
139
139
  '主机': self.host,
@@ -717,7 +717,7 @@ class QueryDatas:
717
717
  if hasattr(self, 'pool') and self.pool is not None:
718
718
  try:
719
719
  self.pool.close()
720
- logger.info('连接池已关闭', {
720
+ logger.debug('连接池已关闭', {
721
721
  '主机': self.host,
722
722
  '端口': self.port
723
723
  })
@@ -949,8 +949,12 @@ class QueryDatas:
949
949
 
950
950
  def main():
951
951
  dir_path = os.path.expanduser("~")
952
- my_cont = config.read_config(file_path=os.path.join(dir_path, 'spd.txt'))
953
- username, password, host, port = my_cont['username'], my_cont['password'], my_cont['host'], int(my_cont['port'])
952
+ parser = conf.ConfigParser()
953
+ host, port, username, password = parser.get_section_values(
954
+ file_path=os.path.join(dir_path, 'spd.txt'),
955
+ section='mysql',
956
+ keys=['host', 'port', 'username', 'password'],
957
+ )
954
958
  host = 'localhost'
955
959
 
956
960
  qd = QueryDatas(username=username, password=password, host=host, port=port)
@@ -2,7 +2,7 @@ import re
2
2
  import pymysql
3
3
  from typing import List, Dict, Any, Tuple
4
4
  from mdbq.log import mylogger
5
- from mdbq.config import config
5
+ from mdbq.conf import conf
6
6
  from dbutils.pooled_db import PooledDB
7
7
  import os
8
8
 
@@ -274,8 +274,12 @@ class UniqueManager:
274
274
 
275
275
  def main():
276
276
  dir_path = os.path.expanduser("~")
277
- my_cont = config.read_config(file_path=os.path.join(dir_path, 'spd.txt'))
278
- username, password, host, port = my_cont['username'], my_cont['password'], my_cont['host'], int(my_cont['port'])
277
+ parser = conf.ConfigParser()
278
+ host, port, username, password = parser.get_section_values(
279
+ file_path=os.path.join(dir_path, 'spd.txt'),
280
+ section='mysql',
281
+ keys=['host', 'port', 'username', 'password'],
282
+ )
279
283
  # host = 'localhost'
280
284
 
281
285
  my_databases = [
@@ -8,7 +8,7 @@ import pymysql
8
8
  import pandas as pd
9
9
  import os
10
10
  from mdbq.log import mylogger
11
- from mdbq.config import config
11
+ from mdbq.conf import conf
12
12
  from typing import Union, List, Dict, Optional, Any, Tuple, Set
13
13
  from dbutils.pooled_db import PooledDB
14
14
  import json
@@ -539,6 +539,8 @@ class MySQLUploader:
539
539
  is_nan = True
540
540
  elif str(value).lower() in ['nan', 'none']:
541
541
  is_nan = True
542
+ elif value == '':
543
+ is_nan = True
542
544
  if is_nan:
543
545
  if not allow_null:
544
546
  if 'int' in column_type_lower:
@@ -1735,8 +1737,12 @@ class MySQLUploader:
1735
1737
 
1736
1738
  def main():
1737
1739
  dir_path = os.path.expanduser("~")
1738
- my_cont = config.read_config(file_path=os.path.join(dir_path, 'spd.txt'))
1739
- username, password, host, port = my_cont['username'], my_cont['password'], my_cont['host'], int(my_cont['port'])
1740
+ parser = conf.ConfigParser()
1741
+ host, port, username, password = parser.get_section_values(
1742
+ file_path=os.path.join(dir_path, 'spd.txt'),
1743
+ section='mysql',
1744
+ keys=['host', 'port', 'username', 'password'],
1745
+ )
1740
1746
  host = 'localhost'
1741
1747
 
1742
1748
  uploader = MySQLUploader(
@@ -17,8 +17,7 @@ from selenium.webdriver.support.wait import WebDriverWait
17
17
  from selenium.webdriver.common.by import By
18
18
  from selenium.webdriver.support import expected_conditions as EC
19
19
  from selenium.webdriver.chrome.service import Service
20
- from mdbq.config import set_support
21
- from mdbq.config import config
20
+ from mdbq.conf import conf
22
21
  from mdbq.mysql import mysql
23
22
  from mdbq.mysql import s_query
24
23
  from mdbq.other import ua_sj
@@ -49,8 +48,12 @@ if not os.path.exists(upload_path): # 数据中心根目录
49
48
 
50
49
  dir_path = os.path.expanduser("~")
51
50
  config_file = os.path.join(dir_path, 'spd.txt')
52
- content = config.read_config(file_path=config_file)
53
- username, password, host, port = content['username'], content['password'], content['host'], content['port']
51
+ parser = conf.ConfigParser()
52
+ host, port, username, password = parser.get_section_values(
53
+ file_path=config_file,
54
+ section='mysql',
55
+ keys=['host', 'port', 'username', 'password'],
56
+ )
54
57
  m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
55
58
 
56
59
  if not username:
@@ -62,7 +65,7 @@ class LoadAccount:
62
65
 
63
66
  def __init__(self):
64
67
  self.url = 'https://login.taobao.com/' # 默认登录淘宝
65
- self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
68
+ self.cookie_path = None
66
69
 
67
70
  def __call__(self, *args, **kwargs):
68
71
  self.check_cookie() # 检测cookie有效期, 但不阻断任务
@@ -15,19 +15,20 @@ from selenium.webdriver.chrome.service import Service
15
15
  import pymysql
16
16
  from mdbq.mysql import uploader
17
17
  from mdbq.mysql import s_query
18
- from mdbq.config import config
18
+ from mdbq.conf import conf
19
19
  from mdbq.other import ua_sj
20
20
  from mdbq.other import otk
21
21
  from mdbq.log import mylogger
22
22
 
23
23
  dir_path = os.path.expanduser("~")
24
- config_file = os.path.join(dir_path, 'spd.txt')
25
- content = config.read_config(file_path=config_file)
26
- username, password, host, port = content['username'], content['password'], content['host'], content['port']
24
+ parser = conf.ConfigParser()
25
+ host, port, username, password = parser.get_section_values(
26
+ file_path=os.path.join(dir_path, 'spd.txt'),
27
+ section='mysql',
28
+ keys=['host', 'port', 'username', 'password'],
29
+ )
27
30
 
28
- uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
29
31
  # 实例化一个数据查询类,用来获取 cookies 表数据
30
- download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
31
32
  logger = mylogger.MyLogger(
32
33
  logging_mode='file',
33
34
  log_level='info',
@@ -48,15 +49,15 @@ def keep_connect(_db_name, _config, max_try: int=10):
48
49
  connection = pymysql.connect(**_config) # 连接数据库
49
50
  return connection
50
51
  except Exception as e:
51
- logger.error(f'{_db_name}: 连接失败,正在重试: {host}:{port} {attempts}/{max_try} {e}')
52
+ logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try, '错误信息': e})
52
53
  attempts += 1
53
54
  time.sleep(30)
54
- logger.error(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
55
+ logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try})
55
56
  return None
56
57
 
57
58
 
58
59
  class AikuCun:
59
- def __init__(self):
60
+ def __init__(self, uld_manager, download_manager):
60
61
  self.url = 'https://gray-merc.aikucun.com/index.html'
61
62
  self.db_name = 'cookie文件'
62
63
  self.table_name = 'main_aikucun'
@@ -66,6 +67,8 @@ class AikuCun:
66
67
  self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
67
68
  self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
68
69
  self.error_count = 0
70
+ self.uld = uld_manager
71
+ self.download = download_manager
69
72
 
70
73
  def logining(self, shop_name='aikucun', headless=False):
71
74
  option = webdriver.ChromeOptions()
@@ -171,7 +174,7 @@ class AikuCun:
171
174
 
172
175
  def save_token(self):
173
176
  if not self.token:
174
- print('self.token 不能为空')
177
+ logger.error('self.token 不能为空')
175
178
  return
176
179
  set_typ = {
177
180
  '日期': 'DATE',
@@ -182,11 +185,11 @@ class AikuCun:
182
185
  '更新时间': 'timestamp'
183
186
  }
184
187
  # 更新至数据库记录
185
- uld.upload_data(
188
+ self.uld.upload_data(
186
189
  db_name=self.db_name,
187
190
  table_name=self.table_name,
188
191
  data=self.token,
189
- set_typ={},
192
+ set_typ=set_typ,
190
193
  primary_keys=[],
191
194
  check_duplicate=False,
192
195
  update_on_duplicate=False,
@@ -209,7 +212,7 @@ class AikuCun:
209
212
  self.end_date = end_date
210
213
  date_list = otk.dates_between(start_date=self.start_date, end_date=self.end_date)
211
214
 
212
- df = download.data_to_df(
215
+ df = self.download.data_to_df(
213
216
  db_name=self.db_name,
214
217
  table_name=self.table_name,
215
218
  start_date='2025-03-07',
@@ -230,7 +233,7 @@ class AikuCun:
230
233
  idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
231
234
  df = df.loc[idx][['token']]
232
235
  if len(df) == 0:
233
- print(f'从数据库获取的 token 不能为空')
236
+ logger.error(f'从数据库获取的 token 不能为空')
234
237
  return
235
238
  self.token = df.iloc[0, 0]
236
239
 
@@ -247,7 +250,7 @@ class AikuCun:
247
250
  results = []
248
251
  for date in date_list:
249
252
  if self.error_count > 5:
250
- print('已退出请求 -> self.error_count > 5')
253
+ logger.logger('已退出请求 -> self.error_count > 5')
251
254
  break
252
255
  req_date = re.sub('-', '', date)
253
256
  data = {
@@ -273,16 +276,15 @@ class AikuCun:
273
276
  # cookies=cookies,
274
277
  data=json.dumps(data)
275
278
  )
276
- print(f'正在获取数据({num}/{len(date_list)}): {item_type}榜单 {date}')
277
- # print(res.json())
279
+ logger.info('获取数据', {'进度': num/len(date_list), '日期': date, '榜单类型': item_type})
278
280
  if not res.json().get('success', None):
279
- print('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
281
+ logger.error('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
280
282
  num += 1
281
283
  self.error_count += 1
282
284
  time.sleep(1)
283
285
  continue
284
286
  if not res.json().get('data', {}).get('rows', None):
285
- print("返回的数据字典异常, ['data']['rows'] 不能为空")
287
+ logger.error("返回的数据字典异常, ['data']['rows'] 不能为空")
286
288
  num += 1
287
289
  self.error_count += 1
288
290
  time.sleep(1)
@@ -291,7 +293,7 @@ class AikuCun:
291
293
  num += 1
292
294
  time.sleep(1)
293
295
  if num % 32 == 0:
294
- print("避免频繁请求, 正在休眠...")
296
+ logger.info("避免频繁请求, 正在休眠...")
295
297
  # time.sleep(60)
296
298
 
297
299
  return results
@@ -413,18 +415,18 @@ class AikuCun:
413
415
  '尺码': 'varchar(50)',
414
416
  '货号': 'varchar(50)', # 款号 + 颜色编码
415
417
  }
416
- print(f'{self.shop_name} 正在更新数据库 {db_name} -> {table_name}...')
418
+ logger.info('更新数据库', {'店铺名称': self.shop_name, '库': db_name, '表': table_name})
417
419
  if 'spu' in table_name:
418
420
  drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
419
421
  else:
420
422
  drop_dup = ['日期', '平台', '店铺名称', '条码']
421
- uld.upload_data(
423
+ self.uld.upload_data(
422
424
  db_name=db_name,
423
425
  table_name=table_name,
424
426
  data=_results,
425
427
  set_typ=set_typ, # 定义列和数据类型
426
428
  primary_keys=[], # 创建唯一主键
427
- check_duplicate=True, # 检查重复数据
429
+ check_duplicate=False, # 检查重复数据
428
430
  update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
429
431
  duplicate_columns=drop_dup, # 指定排重的组合键
430
432
  allow_null=False, # 允许插入空值
@@ -470,36 +472,44 @@ class AikuCun:
470
472
  headers=headers,
471
473
  data=json.dumps(data)
472
474
  )
473
- print(res.json())
474
475
 
475
476
 
476
477
  def main(start_date, end_date=None, item_type=['spu']):
477
- ak = AikuCun()
478
- # ak.get_sign()
479
- for type_ in item_type:
480
- if type_ not in ['spu', 'sku']:
481
- print(f'{item_type} 非法参数: {type_}')
482
- continue
483
- for i in range(2):
484
- data_list = ak.get_data_from_bbx(
485
- start_date=start_date,
486
- end_date=end_date,
487
- item_type=type_,
488
- page_num=1,
489
- page_size=300
490
- )
491
- if not data_list:
492
- ak.logining()
493
- ak.save_token()
494
- ak.error_count = 0 # 重置错误计数器
495
- else:
496
- break
478
+ db_config = {
479
+ 'username': username,
480
+ 'password': password,
481
+ 'host': host,
482
+ 'port': int(port),
483
+ 'pool_size': 3
484
+ }
485
+ with uploader.MySQLUploader(**db_config) as uld:
486
+ with s_query.QueryDatas(**db_config) as download:
487
+ ak = AikuCun(uld_manager=uld, download_manager=download)
488
+ # ak.get_sign()
489
+ for type_ in item_type:
490
+ if type_ not in ['spu', 'sku']:
491
+ logger.error(f'{item_type} 非法参数: {type_}')
492
+ continue
493
+ for i in range(2):
494
+ data_list = ak.get_data_from_bbx(
495
+ start_date=start_date,
496
+ end_date=end_date,
497
+ item_type=type_,
498
+ page_num=1,
499
+ page_size=300
500
+ )
501
+ if not data_list:
502
+ ak.logining()
503
+ ak.save_token()
504
+ ak.error_count = 0 # 重置错误计数器
505
+ else:
506
+ break
497
507
 
498
- ak.insert_datas(
499
- data_list=data_list,
500
- db_name='爱库存2',
501
- table_name=f'{type_}榜单'
502
- )
508
+ ak.insert_datas(
509
+ data_list=data_list,
510
+ db_name='爱库存2',
511
+ table_name=f'{type_}榜单'
512
+ )
503
513
 
504
514
 
505
515
 
@@ -508,7 +518,7 @@ if __name__ == '__main__':
508
518
  start_date='2025-05-13',
509
519
  # end_date='2025-04-28', # 不传则默认到今天
510
520
  item_type=[
511
- # 'spu',
521
+ 'spu',
512
522
  'sku'
513
523
  ]
514
524
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.10
3
+ Version: 4.0.12
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -8,11 +8,8 @@ mdbq.egg-info/dependency_links.txt
8
8
  mdbq.egg-info/top_level.txt
9
9
  mdbq/aggregation/__init__.py
10
10
  mdbq/aggregation/query_data.py
11
- mdbq/config/__init__.py
12
- mdbq/config/config.py
13
11
  mdbq/log/__init__.py
14
12
  mdbq/log/mylogger.py
15
- mdbq/log/spider_logging.py
16
13
  mdbq/mysql/__init__.py
17
14
  mdbq/mysql/deduplicator.py
18
15
  mdbq/mysql/mysql.py
@@ -1 +0,0 @@
1
- VERSION = '4.0.10'
@@ -1,95 +0,0 @@
1
- import os.path
2
- import re
3
-
4
-
5
- def read_config(file_path):
6
- """读取配置文件,返回字典"""
7
- if not os.path.isfile(file_path):
8
- print(f'配置文件不存在: {file_path}')
9
- return
10
- config = {}
11
- with open(file_path, 'r', encoding='utf-8') as file:
12
- for line in file:
13
- stripped_line = line.strip()
14
- # 跳过空行和注释行(以 # 或 // 开头)
15
- if not stripped_line or stripped_line.startswith(('#', '//')):
16
- continue
17
- # 处理行内注释(# 或 // 前有空格)
18
- comment_match = re.search(r'\s+[#//]', line)
19
- if comment_match:
20
- line = line[:comment_match.start()].strip()
21
- else:
22
- line = line.strip()
23
- # 解析键值对
24
- if '=' in line:
25
- key, value = line.split('=', 1)
26
- config[key.strip()] = value.strip()
27
- return config
28
-
29
-
30
- def write_config(file_path, rewrite):
31
- """
32
- 更新配置文件中的键值对,保留注释和其他内容,修复等号空格问题
33
- 示例:
34
- write_config('spd.txt', {'is_spider': True})
35
- """
36
- # 读取所有行到内存
37
- try:
38
- with open(file_path, 'r', encoding='utf-8') as file:
39
- lines = file.readlines()
40
- except FileNotFoundError:
41
- with open(file_path, 'w', encoding='utf-8') as file:
42
- lines = []
43
-
44
- new_lines = []
45
- found_keys = set()
46
-
47
- for line in lines:
48
- stripped = line.strip()
49
- if not stripped or stripped.startswith(('#', '//')):
50
- new_lines.append(line)
51
- continue
52
-
53
- # 使用 partition 保留等号格式
54
- key_part, sep, value_part = line.partition('=')
55
- if not sep: # 没有等号的行直接保留
56
- new_lines.append(line)
57
- continue
58
-
59
- key = key_part.strip()
60
- if key in rewrite:
61
- # 处理值部分和注释
62
- comment_match = re.search(r'\s+([#//].*)$', value_part)
63
- if comment_match:
64
- comment = comment_match.group(0)
65
- raw_value = value_part[:comment_match.start()].rstrip()
66
- else:
67
- comment = ''
68
- raw_value = value_part.strip()
69
-
70
- # 保留原值前导空格
71
- leading_space = re.match(r'^(\s*)', value_part).group(1)
72
- new_value = f"{leading_space}{rewrite[key]}{comment}"
73
-
74
- # 构建新行(保留原等号格式)
75
- new_line = f"{key_part}{sep}{new_value}\n"
76
- new_lines.append(new_line)
77
- found_keys.add(key)
78
- else:
79
- new_lines.append(line)
80
-
81
- # 添加新键值对
82
- for key in rewrite:
83
- if key not in found_keys:
84
- new_lines.append(f"{key} = {rewrite[key]}\n")
85
-
86
- # 写入文件
87
- with open(file_path, 'w', encoding='utf-8') as file:
88
- file.writelines(new_lines)
89
-
90
-
91
- if __name__ == '__main__':
92
- res = read_config('/Users/xigua/数据中心2/spider/spd.txt')
93
- print(res)
94
- # write_config('spd.txt', {'is_spider': False})
95
-
@@ -1,47 +0,0 @@
1
- import logging
2
- from logging.handlers import RotatingFileHandler
3
- import platform
4
- import os
5
- import sys
6
- import getpass
7
-
8
-
9
- def setup_logging(reMoveOldHandler=True, filename='spider_tg.log'):
10
- """
11
- reMoveOldHandler: 替换根日志记录器的所有现有处理器
12
- """
13
- dir_path = os.path.expanduser("~")
14
- if not os.path.isdir(os.path.join(dir_path, 'logfile')):
15
- os.makedirs(os.path.join(dir_path, 'logfile'))
16
-
17
- log_file = os.path.join(dir_path, 'logfile', filename)
18
- file_handler = RotatingFileHandler(
19
- filename=log_file,
20
- maxBytes=3*1024*1024, # 3MB
21
- backupCount=10,
22
- encoding='utf-8' # 明确指定编码(避免Windows乱码)
23
- )
24
- stream_handler = logging.StreamHandler() # 终端输出Handler
25
- formatter = logging.Formatter(
26
- fmt='[%(asctime)s] %(levelname)s %(message)s',
27
- datefmt='%Y-%m-%d %H:%M:%S'
28
- )
29
- file_handler.setFormatter(formatter)
30
- stream_handler.setFormatter(formatter) # 终端使用相同格式
31
- file_handler.setLevel(logging.INFO)
32
- stream_handler.setLevel(logging.INFO)
33
-
34
- # 获取根日志记录器并添加Handler
35
- logger = logging.getLogger()
36
- if reMoveOldHandler:
37
- # 移除根日志记录器的所有现有处理器
38
- for handler in logger.handlers[:]: # 使用[:]来创建handlers列表的一个副本,因为我们在迭代时修改列表
39
- logger.removeHandler(handler)
40
- logger.addHandler(file_handler)
41
- logger.addHandler(stream_handler)
42
- logger.setLevel(logging.INFO) # 设置根日志级别
43
- return logger
44
-
45
-
46
- if __name__ == '__main__':
47
- pass
@@ -1,4 +0,0 @@
1
-
2
-
3
-
4
- # 配置文件
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes