mdbq 4.0.38__py3-none-any.whl → 4.0.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.38'
1
+ VERSION = '4.0.40'
mdbq/log/mylogger.py CHANGED
@@ -103,7 +103,7 @@ class MyLogger:
103
103
  if log_file is None:
104
104
  self.log_file = os.path.join(log_path, f"{self.name}.log")
105
105
  else:
106
- self.log_file = os.path.join(log_path, log_file)
106
+ self.log_file = os.path.join(os.path.expanduser("~"), log_file)
107
107
  if not os.path.isdir(os.path.dirname(self.log_file)):
108
108
  os.makedirs(os.path.dirname(self.log_file))
109
109
  self.log_format = log_format
mdbq/mysql/uploader.py CHANGED
@@ -11,7 +11,6 @@ from mdbq.log import mylogger
11
11
  from mdbq.myconf import myconf
12
12
  from typing import Union, List, Dict, Optional, Any, Tuple, Set
13
13
  from dbutils.pooled_db import PooledDB
14
- import json
15
14
  import sys
16
15
  from decimal import Decimal, InvalidOperation
17
16
  import math
@@ -549,106 +548,70 @@ class MySQLUploader:
549
548
  """
550
549
  column_type_lower = column_type.lower() if column_type else ''
551
550
  # 统一判断None/NaN
552
- is_nan = False
553
- if value is None:
554
- is_nan = True
555
- elif isinstance(value, float) and math.isnan(value):
556
- is_nan = True
557
- elif str(value).lower() in ['nan', 'none']:
558
- is_nan = True
559
- elif value == '':
560
- is_nan = True
561
- if is_nan:
562
- if not allow_null:
563
- if 'int' in column_type_lower:
564
- logger.debug('字段值为None/NaN但不允许空值, 已填充为0', {
565
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type
566
- })
567
- return 0
568
- elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
569
- logger.debug('字段值为None/NaN但不允许空值, 已填充为0.0', {
570
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type
571
- })
572
- return 0.0
573
- elif 'date' in column_type_lower or 'time' in column_type_lower:
574
- if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
575
- default_date = '2000-01-01 00:00:00'
576
- else:
577
- default_date = '2000-01-01'
578
- logger.debug('字段值为None/NaN但不允许空值, 已填充为默认日期', {
579
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type, '默认值': default_date
580
- })
581
- return default_date
582
- else:
583
- logger.debug('字段值为None/NaN但不允许空值, 已填充为none字符串', {
584
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type
585
- })
586
- return 'none'
587
- return None
588
- try:
589
- if isinstance(value, str) and value.strip().endswith('%'):
590
- if re.match(r'^\d+(\.\d+)?%$', value.strip()):
591
- percent_str = value.strip().replace('%', '')
592
- percent_value = float(percent_str)
593
- decimal_value = percent_value / 100
594
- logger.debug('百分比字符串转小数', {'原始': value, '结果': decimal_value})
595
- return decimal_value
596
- else:
597
- logger.warning('百分比字符串不符合格式,跳过转换', {
598
- '库': db_name, '表': table_name, '列': col_name, '原始': value
599
- })
600
- elif 'int' in column_type_lower:
601
- if isinstance(value, str):
602
- value = value.replace(',', '').strip()
603
- try:
604
- return int(float(value))
605
- except ValueError:
606
- logger.error('字符串转整数失败', {
607
- '库': db_name, '表': table_name, '列': col_name, '值': value
608
- })
609
- raise ValueError(f"`{value}` -> 无法转为整数")
610
- return int(value) if value is not None else None
611
- elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
612
- if isinstance(value, str):
613
- value = value.replace(',', '')
614
- return float(value) if value is not None else None
615
- elif 'date' in column_type_lower or 'time' in column_type_lower:
616
- if isinstance(value, (datetime.datetime, pd.Timestamp)):
617
- return value.strftime('%Y-%m-%d %H:%M:%S')
618
- elif isinstance(value, str):
619
- try:
620
- return self._validate_datetime(value=value, date_type=False, no_log=False)
621
- except ValueError as e:
622
- logger.error('无效日期格式', {
623
- '库': db_name, '表': table_name, '列': col_name, '值': value, '错误': str(e)
624
- })
625
- raise ValueError(f"无效日期格式: `{value}` -> {str(e)}")
626
- return str(value)
627
- elif 'varchar' in column_type_lower:
628
- if isinstance(value, str):
629
- return value.replace('\\', '\\\\').replace("'", "\\'")
630
- else:
631
- return str(value)
632
- elif 'text' in column_type_lower:
633
- if isinstance(value, str):
634
- max_length = 65535
635
- if len(value) > max_length:
636
- logger.warning(f'TEXT字符串长度不允许超过 {max_length},已截断', {
637
- '库': db_name, '表': table_name, '列': col_name, '原始值': f'{value[:50]}...', '截断后值': f'{value[:50]}...'
638
- })
639
- value = value[:max_length]
640
- return value.replace('\\', '\\\\').replace("'", "\\'")
641
- else:
642
- return str(value)
643
- elif 'json' in column_type_lower:
644
- return json.dumps(value) if value is not None else None
551
+ if value == '' or pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value)):
552
+ if allow_null:
553
+ return None
645
554
  else:
646
- return value
647
- except (ValueError, TypeError) as e:
648
- logger.error('数据类型转换异常', {
649
- '库': db_name, '表': table_name, '列': col_name, '值': value, '目标类型': column_type, '错误': str(e)
650
- })
651
- raise ValueError(f"转换异常 -> 无法将 `{value}` 的数据类型转为: `{column_type}` -> {str(e)}")
555
+ logger.error("该列不允许为空值", {"库": db_name, "表": table_name, "列": col_name, "值": value})
556
+ raise ValueError("该列不允许为空值")
557
+
558
+ original_value = value
559
+
560
+ # 日期时间类型验证
561
+ if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
562
+ return self._validate_datetime(value, date_type=False, no_log=True)
563
+ elif 'date' in column_type_lower:
564
+ return self._validate_datetime(value, date_type=True, no_log=True)
565
+ # 数值类型验证
566
+ elif 'int' in column_type_lower:
567
+ try:
568
+ return int(value)
569
+ except (ValueError, TypeError):
570
+ logger.error(f"值 `{value}` 无法转换为整数", {"库": db_name, "表": table_name, "列": col_name})
571
+ raise ValueError(f"值 `{value}` 无法转换为整数")
572
+ elif any(t in column_type_lower for t in ['decimal', 'float', 'double']):
573
+ # 百分比字符串处理
574
+ if isinstance(value, str) and '%' in value:
575
+ try:
576
+ # 仅当值是'xx.xx%'格式时才转换
577
+ if re.match(r'^-?\d+(\.\d+)?%$', value.strip()):
578
+ value = float(value.strip().replace('%', '')) / 100
579
+ else:
580
+ # 不符合格式的百分比字符串,保留原始值
581
+ logger.warning("百分比字符串不符合格式,跳过转换", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
582
+ value = original_value
583
+ except (ValueError, TypeError):
584
+ logger.warning("百分比字符串转换失败,保留原始值", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
585
+ value = original_value
586
+
587
+ try:
588
+ # 检查是否为Decimal类型
589
+ if 'decimal' in column_type_lower:
590
+ precision, scale = self._get_decimal_scale(column_type)
591
+ value_decimal = Decimal(str(value))
592
+ # 检查整数部分长度
593
+ if len(value_decimal.as_tuple().digits) - abs(value_decimal.as_tuple().exponent) > precision - scale:
594
+ raise ValueError(f"整数部分超出范围")
595
+ return value_decimal
596
+ else: # float/double
597
+ return float(value)
598
+ except (ValueError, TypeError, InvalidOperation) as e:
599
+ logger.error(f"值 `{value}` 无法转换为数值类型: {e}", {"库": db_name, "表": table_name, "列": col_name})
600
+ raise ValueError(f"值 `{value}` 无法转换为数值类型: {e}")
601
+ # 字符串类型验证
602
+ elif 'varchar' in column_type_lower:
603
+ str_value = str(value)
604
+ try:
605
+ max_len = int(re.search(r'\((\d+)\)', column_type).group(1))
606
+ if len(str_value.encode('utf-8')) > max_len:
607
+ logger.warning(f"列`{col_name}`的值`{str_value}`长度({len(str_value.encode('utf-8'))})超出varchar({max_len})限制,将进行截断", {"库": db_name, "表": table_name})
608
+ return str_value.encode('utf-8')[:max_len].decode('utf-8', 'ignore')
609
+ except (AttributeError, IndexError):
610
+ # 没有找到长度定义,不截断
611
+ pass
612
+ return str_value
613
+
614
+ return value
652
615
 
653
616
  @_execute_with_retry
654
617
  def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
@@ -967,6 +930,8 @@ class MySQLUploader:
967
930
  data = self.normalize_column_names(data)
968
931
 
969
932
  # set_typ的键清洗
933
+ if not set_typ:
934
+ set_typ = {}
970
935
  set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
971
936
 
972
937
  # 新实现:严格按set_typ顺序过滤,后补充data中有但set_typ没有的列
@@ -1017,7 +982,6 @@ class MySQLUploader:
1017
982
  })
1018
983
  raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1019
984
  prepared_data.append(prepared_row)
1020
-
1021
985
  return prepared_data, filtered_set_typ
1022
986
 
1023
987
  def upload_data(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.38
3
+ Version: 4.0.40
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,9 +1,9 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=U1WMRZvqWGnnlhMX5A6NKB6YnAmRd4rS3atED7bLCQo,18
2
+ mdbq/__version__.py,sha256=YnJ4yS3LCjrBMdYJ8VXs6BM72GyNYWV1Zwk0iW2hB1k,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/query_data.py,sha256=WtTFMN78jn43Y-nBTPAXhAK56w3wDuv_cj4YtzzGbZk,169797
5
5
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
6
- mdbq/log/mylogger.py,sha256=iDhWkTY6I9T3IJuERWqiXKq1sNf0VuraSEq33ZxLqdw,24930
6
+ mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
7
7
  mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
8
8
  mdbq/myconf/myconf.py,sha256=rHvQCnQRKhQ49AZBke-Z4v28hyOLmHt4MylIuB0H6yA,33516
9
9
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
11
11
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
12
12
  mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
13
13
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
14
- mdbq/mysql/uploader.py,sha256=defQ4xCC3j8an9dWjFI3q_Fec0Irewe2FzBZqFL1GJM,84673
14
+ mdbq/mysql/uploader.py,sha256=FOroXUIsxJaMCqBeepUuymCpdhJsid4yiC_Rs1BT1sw,82823
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
17
17
  mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
28
- mdbq-4.0.38.dist-info/METADATA,sha256=edMyLlQlfms58lylwf4zSHXWbqA8C9jFwJ-YHT-p_cs,364
29
- mdbq-4.0.38.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-4.0.38.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-4.0.38.dist-info/RECORD,,
28
+ mdbq-4.0.40.dist-info/METADATA,sha256=FUd8oXQ4zlMFv9neapfYa24uQl84PHgt3S0aAEbEjGo,364
29
+ mdbq-4.0.40.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-4.0.40.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-4.0.40.dist-info/RECORD,,
File without changes