mdbq 4.0.10__py3-none-any.whl → 4.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.10'
1
+ VERSION = '4.0.11'
@@ -3676,6 +3676,7 @@ def query3(months=1, download_manager=None):
3676
3676
 
3677
3677
 
3678
3678
  def main(months=3):
3679
+ logger.info('数据聚合任务开始')
3679
3680
  # 1. 更新日期表 更新货品年份基准表, 属性设置 3 - 货品年份基准
3680
3681
  date_table()
3681
3682
  # 2. 数据聚合
@@ -6,6 +6,7 @@ import warnings
6
6
  import pymysql
7
7
  import os
8
8
  from mdbq.log import mylogger
9
+ from mdbq.config import config
9
10
  from typing import List, Dict, Optional, Any, Tuple
10
11
  from dbutils.pooled_db import PooledDB
11
12
  import threading
@@ -1348,7 +1349,7 @@ class MySQLDeduplicator:
1348
1349
 
1349
1350
 
1350
1351
  def main():
1351
- from mdbq.config import config
1352
+ logger.info('去重任务开始')
1352
1353
  dir_path = os.path.expanduser("~")
1353
1354
  my_cont = config.read_config(file_path=os.path.join(dir_path, 'spd.txt'))
1354
1355
  username, password, host, port = my_cont['username'], my_cont['password'], my_cont['host'], int(my_cont['port'])
@@ -1401,6 +1402,8 @@ def main():
1401
1402
 
1402
1403
  # 关闭连接
1403
1404
  deduplicator.close()
1405
+ logger.info('去重任务结束')
1406
+
1404
1407
 
1405
1408
  if __name__ == '__main__':
1406
1409
  main()
mdbq/mysql/s_query.py CHANGED
@@ -35,7 +35,7 @@ class QueryDatas:
35
35
  """
36
36
 
37
37
  def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4',
38
- maxconnections: int = 20, mincached: int = 2, maxcached: int = 5,
38
+ pool_size: int = 20, mincached: int = 2, maxcached: int = 5,
39
39
  connect_timeout: int = 10, read_timeout: int = 30, write_timeout: int = 30,
40
40
  max_retries: int = 3, retry_waiting_time: int = 5, collation: str = 'utf8mb4_0900_ai_ci') -> None:
41
41
  """
@@ -47,7 +47,7 @@ class QueryDatas:
47
47
  host: 数据库主机
48
48
  port: 数据库端口
49
49
  charset: 字符集,默认utf8mb4
50
- maxconnections: 最大活动连接数,默认20
50
+ pool_size: 最大活动连接数,默认20
51
51
  mincached: 最小缓存连接数,空闲连接数量,默认2
52
52
  maxcached: 最大缓存连接数,最大空闲连接数,默认5
53
53
  connect_timeout: 连接超时时间,默认10秒
@@ -87,14 +87,14 @@ class QueryDatas:
87
87
  'write_timeout': write_timeout,
88
88
  'autocommit': True
89
89
  }
90
- self.pool = self._create_connection_pool(maxconnections, mincached, maxcached)
90
+ self.pool = self._create_connection_pool(pool_size, mincached, maxcached)
91
91
 
92
- def _create_connection_pool(self, maxconnections: int, mincached: int, maxcached: int) -> PooledDB:
92
+ def _create_connection_pool(self, pool_size: int, mincached: int, maxcached: int) -> PooledDB:
93
93
  """
94
94
  创建数据库连接池
95
95
 
96
96
  Args:
97
- maxconnections: 最大连接数
97
+ pool_size: 最大连接数
98
98
  mincached: 最小缓存连接数
99
99
  maxcached: 最大缓存连接数
100
100
 
@@ -122,7 +122,7 @@ class QueryDatas:
122
122
  }
123
123
  pool_params = {
124
124
  'creator': pymysql,
125
- 'maxconnections': maxconnections,
125
+ 'maxconnections': pool_size,
126
126
  'mincached': mincached,
127
127
  'maxcached': maxcached,
128
128
  'blocking': True,
@@ -133,7 +133,7 @@ class QueryDatas:
133
133
  try:
134
134
  pool = PooledDB(**pool_params, **connection_params)
135
135
  logger.debug('连接池创建成功', {
136
- '连接池大小': maxconnections,
136
+ '连接池大小': pool_size,
137
137
  '最小缓存': mincached,
138
138
  '最大缓存': maxcached,
139
139
  '主机': self.host,
@@ -717,7 +717,7 @@ class QueryDatas:
717
717
  if hasattr(self, 'pool') and self.pool is not None:
718
718
  try:
719
719
  self.pool.close()
720
- logger.info('连接池已关闭', {
720
+ logger.debug('连接池已关闭', {
721
721
  '主机': self.host,
722
722
  '端口': self.port
723
723
  })
mdbq/mysql/uploader.py CHANGED
@@ -539,6 +539,8 @@ class MySQLUploader:
539
539
  is_nan = True
540
540
  elif str(value).lower() in ['nan', 'none']:
541
541
  is_nan = True
542
+ elif value == '':
543
+ is_nan = True
542
544
  if is_nan:
543
545
  if not allow_null:
544
546
  if 'int' in column_type_lower:
mdbq/spider/aikucun.py CHANGED
@@ -25,9 +25,7 @@ config_file = os.path.join(dir_path, 'spd.txt')
25
25
  content = config.read_config(file_path=config_file)
26
26
  username, password, host, port = content['username'], content['password'], content['host'], content['port']
27
27
 
28
- uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
29
28
  # 实例化一个数据查询类,用来获取 cookies 表数据
30
- download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
31
29
  logger = mylogger.MyLogger(
32
30
  logging_mode='file',
33
31
  log_level='info',
@@ -48,15 +46,15 @@ def keep_connect(_db_name, _config, max_try: int=10):
48
46
  connection = pymysql.connect(**_config) # 连接数据库
49
47
  return connection
50
48
  except Exception as e:
51
- logger.error(f'{_db_name}: 连接失败,正在重试: {host}:{port} {attempts}/{max_try} {e}')
49
+ logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try, '错误信息': e})
52
50
  attempts += 1
53
51
  time.sleep(30)
54
- logger.error(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
52
+ logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try})
55
53
  return None
56
54
 
57
55
 
58
56
  class AikuCun:
59
- def __init__(self):
57
+ def __init__(self, uld_manager, download_manager):
60
58
  self.url = 'https://gray-merc.aikucun.com/index.html'
61
59
  self.db_name = 'cookie文件'
62
60
  self.table_name = 'main_aikucun'
@@ -66,6 +64,8 @@ class AikuCun:
66
64
  self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
67
65
  self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
68
66
  self.error_count = 0
67
+ self.uld = uld_manager
68
+ self.download = download_manager
69
69
 
70
70
  def logining(self, shop_name='aikucun', headless=False):
71
71
  option = webdriver.ChromeOptions()
@@ -171,7 +171,7 @@ class AikuCun:
171
171
 
172
172
  def save_token(self):
173
173
  if not self.token:
174
- print('self.token 不能为空')
174
+ logger.error('self.token 不能为空')
175
175
  return
176
176
  set_typ = {
177
177
  '日期': 'DATE',
@@ -182,11 +182,11 @@ class AikuCun:
182
182
  '更新时间': 'timestamp'
183
183
  }
184
184
  # 更新至数据库记录
185
- uld.upload_data(
185
+ self.uld.upload_data(
186
186
  db_name=self.db_name,
187
187
  table_name=self.table_name,
188
188
  data=self.token,
189
- set_typ={},
189
+ set_typ=set_typ,
190
190
  primary_keys=[],
191
191
  check_duplicate=False,
192
192
  update_on_duplicate=False,
@@ -209,7 +209,7 @@ class AikuCun:
209
209
  self.end_date = end_date
210
210
  date_list = otk.dates_between(start_date=self.start_date, end_date=self.end_date)
211
211
 
212
- df = download.data_to_df(
212
+ df = self.download.data_to_df(
213
213
  db_name=self.db_name,
214
214
  table_name=self.table_name,
215
215
  start_date='2025-03-07',
@@ -230,7 +230,7 @@ class AikuCun:
230
230
  idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
231
231
  df = df.loc[idx][['token']]
232
232
  if len(df) == 0:
233
- print(f'从数据库获取的 token 不能为空')
233
+ logger.error(f'从数据库获取的 token 不能为空')
234
234
  return
235
235
  self.token = df.iloc[0, 0]
236
236
 
@@ -247,7 +247,7 @@ class AikuCun:
247
247
  results = []
248
248
  for date in date_list:
249
249
  if self.error_count > 5:
250
- print('已退出请求 -> self.error_count > 5')
250
+ logger.logger('已退出请求 -> self.error_count > 5')
251
251
  break
252
252
  req_date = re.sub('-', '', date)
253
253
  data = {
@@ -273,16 +273,15 @@ class AikuCun:
273
273
  # cookies=cookies,
274
274
  data=json.dumps(data)
275
275
  )
276
- print(f'正在获取数据({num}/{len(date_list)}): {item_type}榜单 {date}')
277
- # print(res.json())
276
+ logger.info('获取数据', {'进度': num/len(date_list), '日期': date, '榜单类型': item_type})
278
277
  if not res.json().get('success', None):
279
- print('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
278
+ logger.error('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
280
279
  num += 1
281
280
  self.error_count += 1
282
281
  time.sleep(1)
283
282
  continue
284
283
  if not res.json().get('data', {}).get('rows', None):
285
- print("返回的数据字典异常, ['data']['rows'] 不能为空")
284
+ logger.error("返回的数据字典异常, ['data']['rows'] 不能为空")
286
285
  num += 1
287
286
  self.error_count += 1
288
287
  time.sleep(1)
@@ -291,7 +290,7 @@ class AikuCun:
291
290
  num += 1
292
291
  time.sleep(1)
293
292
  if num % 32 == 0:
294
- print("避免频繁请求, 正在休眠...")
293
+ logger.info("避免频繁请求, 正在休眠...")
295
294
  # time.sleep(60)
296
295
 
297
296
  return results
@@ -413,18 +412,18 @@ class AikuCun:
413
412
  '尺码': 'varchar(50)',
414
413
  '货号': 'varchar(50)', # 款号 + 颜色编码
415
414
  }
416
- print(f'{self.shop_name} 正在更新数据库 {db_name} -> {table_name}...')
415
+ logger.info('更新数据库', {'店铺名称': self.shop_name, '库': db_name, '表': table_name})
417
416
  if 'spu' in table_name:
418
417
  drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
419
418
  else:
420
419
  drop_dup = ['日期', '平台', '店铺名称', '条码']
421
- uld.upload_data(
420
+ self.uld.upload_data(
422
421
  db_name=db_name,
423
422
  table_name=table_name,
424
423
  data=_results,
425
424
  set_typ=set_typ, # 定义列和数据类型
426
425
  primary_keys=[], # 创建唯一主键
427
- check_duplicate=True, # 检查重复数据
426
+ check_duplicate=False, # 检查重复数据
428
427
  update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
429
428
  duplicate_columns=drop_dup, # 指定排重的组合键
430
429
  allow_null=False, # 允许插入空值
@@ -470,36 +469,44 @@ class AikuCun:
470
469
  headers=headers,
471
470
  data=json.dumps(data)
472
471
  )
473
- print(res.json())
474
472
 
475
473
 
476
474
  def main(start_date, end_date=None, item_type=['spu']):
477
- ak = AikuCun()
478
- # ak.get_sign()
479
- for type_ in item_type:
480
- if type_ not in ['spu', 'sku']:
481
- print(f'{item_type} 非法参数: {type_}')
482
- continue
483
- for i in range(2):
484
- data_list = ak.get_data_from_bbx(
485
- start_date=start_date,
486
- end_date=end_date,
487
- item_type=type_,
488
- page_num=1,
489
- page_size=300
490
- )
491
- if not data_list:
492
- ak.logining()
493
- ak.save_token()
494
- ak.error_count = 0 # 重置错误计数器
495
- else:
496
- break
475
+ db_config = {
476
+ 'username': username,
477
+ 'password': password,
478
+ 'host': host,
479
+ 'port': int(port),
480
+ 'pool_size': 3
481
+ }
482
+ with uploader.MySQLUploader(**db_config) as uld:
483
+ with s_query.QueryDatas(**db_config) as download:
484
+ ak = AikuCun(uld_manager=uld, download_manager=download)
485
+ # ak.get_sign()
486
+ for type_ in item_type:
487
+ if type_ not in ['spu', 'sku']:
488
+ logger.error(f'{item_type} 非法参数: {type_}')
489
+ continue
490
+ for i in range(2):
491
+ data_list = ak.get_data_from_bbx(
492
+ start_date=start_date,
493
+ end_date=end_date,
494
+ item_type=type_,
495
+ page_num=1,
496
+ page_size=300
497
+ )
498
+ if not data_list:
499
+ ak.logining()
500
+ ak.save_token()
501
+ ak.error_count = 0 # 重置错误计数器
502
+ else:
503
+ break
497
504
 
498
- ak.insert_datas(
499
- data_list=data_list,
500
- db_name='爱库存2',
501
- table_name=f'{type_}榜单'
502
- )
505
+ ak.insert_datas(
506
+ data_list=data_list,
507
+ db_name='爱库存2',
508
+ table_name=f'{type_}榜单'
509
+ )
503
510
 
504
511
 
505
512
 
@@ -508,7 +515,7 @@ if __name__ == '__main__':
508
515
  start_date='2025-05-13',
509
516
  # end_date='2025-04-28', # 不传则默认到今天
510
517
  item_type=[
511
- # 'spu',
518
+ 'spu',
512
519
  'sku'
513
520
  ]
514
521
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.10
3
+ Version: 4.0.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,18 +1,17 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=DkNrzYKkaEbmtMXcPR8H0qa5y2WJne8kPSNBfTO6mFo,18
2
+ mdbq/__version__.py,sha256=PQJs_Lgx6OvamcsXbLCVuBAvLc7j2xwJDZEWigwyUy8,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/query_data.py,sha256=NdhsLJvt6NgZSMHpkMxnmEEXzDhUiR5tRwYwI-PfwIw,166732
4
+ mdbq/aggregation/query_data.py,sha256=SM8cS9lBKmhLBQdwJz-sRu9bl7w1HS0MEq10s6Tqf_0,166777
5
5
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
6
6
  mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
7
7
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
8
8
  mdbq/log/mylogger.py,sha256=9w_o5mYB3FooIxobq_lSa6oCYTKIhPxDFox-jeLtUHI,21714
9
- mdbq/log/spider_logging.py,sha256=-ozWWEGm3HVv604ozs_OOvVwumjokmUPwbaodesUrPY,1664
10
9
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
- mdbq/mysql/deduplicator.py,sha256=8v3MC6TJ0YEiExWrTP9OXAxTYnL9XbpYL2vWaER1h2M,73099
10
+ mdbq/mysql/deduplicator.py,sha256=fS1dSs92vN15tuqmAKrUVdKk6z9dwW_Fe9WHMBYsy2U,73172
12
11
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
13
- mdbq/mysql/s_query.py,sha256=jGBdGPE4mtB06vccfaWIEWpSAqdY-nWc1s9bzCUh8Gg,42916
12
+ mdbq/mysql/s_query.py,sha256=RPC-KZVuqPlCSmpmtUmYAOJdxJT01i0DvlIbmum4MxM,42882
14
13
  mdbq/mysql/unique_.py,sha256=Wgqq_PjAAD757JTa10wjYaJgssZ_C_ypU6DW56jbuyw,21074
15
- mdbq/mysql/uploader.py,sha256=wNQE7UjCEyAKri9CnQXO7d6EVXCaYqFze2i2tcGAVpw,81001
14
+ mdbq/mysql/uploader.py,sha256=wX2gHhVQJwGErnjUbLnsljkZ8Yd3YK-HS3P7q8DizAA,81053
16
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
17
16
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
18
17
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -24,8 +23,8 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
24
23
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
24
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
26
25
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
- mdbq/spider/aikucun.py,sha256=hPRzLQvFIF4ibN8aP3Dg_ru5meac90faPyzOB22cj-o,20965
28
- mdbq-4.0.10.dist-info/METADATA,sha256=AJXVA5kCyLJQiObIW13jNvJAWsXdXJRFN2xCCSdDO78,364
29
- mdbq-4.0.10.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-4.0.10.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-4.0.10.dist-info/RECORD,,
26
+ mdbq/spider/aikucun.py,sha256=7oquQ2RIJr6B1xblQMfnmHzteOlvHA7dIcPRaAPfHBc,21546
27
+ mdbq-4.0.11.dist-info/METADATA,sha256=zZh35aA-suJ3B_v39Mw8V_O2GSdOLdylfNPl_E99uqQ,364
28
+ mdbq-4.0.11.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
+ mdbq-4.0.11.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
+ mdbq-4.0.11.dist-info/RECORD,,
@@ -1,47 +0,0 @@
1
- import logging
2
- from logging.handlers import RotatingFileHandler
3
- import platform
4
- import os
5
- import sys
6
- import getpass
7
-
8
-
9
- def setup_logging(reMoveOldHandler=True, filename='spider_tg.log'):
10
- """
11
- reMoveOldHandler: 替换根日志记录器的所有现有处理器
12
- """
13
- dir_path = os.path.expanduser("~")
14
- if not os.path.isdir(os.path.join(dir_path, 'logfile')):
15
- os.makedirs(os.path.join(dir_path, 'logfile'))
16
-
17
- log_file = os.path.join(dir_path, 'logfile', filename)
18
- file_handler = RotatingFileHandler(
19
- filename=log_file,
20
- maxBytes=3*1024*1024, # 3MB
21
- backupCount=10,
22
- encoding='utf-8' # 明确指定编码(避免Windows乱码)
23
- )
24
- stream_handler = logging.StreamHandler() # 终端输出Handler
25
- formatter = logging.Formatter(
26
- fmt='[%(asctime)s] %(levelname)s %(message)s',
27
- datefmt='%Y-%m-%d %H:%M:%S'
28
- )
29
- file_handler.setFormatter(formatter)
30
- stream_handler.setFormatter(formatter) # 终端使用相同格式
31
- file_handler.setLevel(logging.INFO)
32
- stream_handler.setLevel(logging.INFO)
33
-
34
- # 获取根日志记录器并添加Handler
35
- logger = logging.getLogger()
36
- if reMoveOldHandler:
37
- # 移除根日志记录器的所有现有处理器
38
- for handler in logger.handlers[:]: # 使用[:]来创建handlers列表的一个副本,因为我们在迭代时修改列表
39
- logger.removeHandler(handler)
40
- logger.addHandler(file_handler)
41
- logger.addHandler(stream_handler)
42
- logger.setLevel(logging.INFO) # 设置根日志级别
43
- return logger
44
-
45
-
46
- if __name__ == '__main__':
47
- pass
File without changes