mdbq 4.0.39__py3-none-any.whl → 4.0.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.39'
1
+ VERSION = '4.0.41'
mdbq/mysql/uploader.py CHANGED
@@ -11,7 +11,6 @@ from mdbq.log import mylogger
11
11
  from mdbq.myconf import myconf
12
12
  from typing import Union, List, Dict, Optional, Any, Tuple, Set
13
13
  from dbutils.pooled_db import PooledDB
14
- import json
15
14
  import sys
16
15
  from decimal import Decimal, InvalidOperation
17
16
  import math
@@ -526,8 +525,7 @@ class MySQLUploader:
526
525
  '%Y-%m-%dT%H:%M:%S',
527
526
  '%Y-%m-%d %H:%M:%S.%f',
528
527
  '%Y/%-m/%-d', # 2023/1/8
529
- '%Y-%m-%-d', # 2023-01-8
530
- '%Y-%-m-%-d' # 2023-1-8
528
+ '%Y-%-m-%-d', # 2023-01-8
531
529
  ]
532
530
  for fmt in formats:
533
531
  try:
@@ -549,106 +547,70 @@ class MySQLUploader:
549
547
  """
550
548
  column_type_lower = column_type.lower() if column_type else ''
551
549
  # 统一判断None/NaN
552
- is_nan = False
553
- if value is None:
554
- is_nan = True
555
- elif isinstance(value, float) and math.isnan(value):
556
- is_nan = True
557
- elif str(value).lower() in ['nan', 'none']:
558
- is_nan = True
559
- elif value == '':
560
- is_nan = True
561
- if is_nan:
562
- if not allow_null:
563
- if 'int' in column_type_lower:
564
- logger.debug('字段值为None/NaN但不允许空值, 已填充为0', {
565
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type
566
- })
567
- return 0
568
- elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
569
- logger.debug('字段值为None/NaN但不允许空值, 已填充为0.0', {
570
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type
571
- })
572
- return 0.0
573
- elif 'date' in column_type_lower or 'time' in column_type_lower:
574
- if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
575
- default_date = '2000-01-01 00:00:00'
576
- else:
577
- default_date = '2000-01-01'
578
- logger.debug('字段值为None/NaN但不允许空值, 已填充为默认日期', {
579
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type, '默认值': default_date
580
- })
581
- return default_date
582
- else:
583
- logger.debug('字段值为None/NaN但不允许空值, 已填充为none字符串', {
584
- '库': db_name, '表': table_name, '列': col_name, '字段类型': column_type
585
- })
586
- return 'none'
587
- return None
588
- try:
589
- if isinstance(value, str) and value.strip().endswith('%'):
590
- if re.match(r'^\d+(\.\d+)?%$', value.strip()):
591
- percent_str = value.strip().replace('%', '')
592
- percent_value = float(percent_str)
593
- decimal_value = percent_value / 100
594
- logger.debug('百分比字符串转小数', {'原始': value, '结果': decimal_value})
595
- return decimal_value
596
- else:
597
- logger.warning('百分比字符串不符合格式,跳过转换', {
598
- '库': db_name, '表': table_name, '列': col_name, '原始': value
599
- })
600
- elif 'int' in column_type_lower:
601
- if isinstance(value, str):
602
- value = value.replace(',', '').strip()
603
- try:
604
- return int(float(value))
605
- except ValueError:
606
- logger.error('字符串转整数失败', {
607
- '库': db_name, '表': table_name, '列': col_name, '值': value
608
- })
609
- raise ValueError(f"`{value}` -> 无法转为整数")
610
- return int(value) if value is not None else None
611
- elif any(t in column_type_lower for t in ['float', 'double', 'decimal']):
612
- if isinstance(value, str):
613
- value = value.replace(',', '')
614
- return float(value) if value is not None else None
615
- elif 'date' in column_type_lower or 'time' in column_type_lower:
616
- if isinstance(value, (datetime.datetime, pd.Timestamp)):
617
- return value.strftime('%Y-%m-%d %H:%M:%S')
618
- elif isinstance(value, str):
619
- try:
620
- return self._validate_datetime(value=value, date_type=False, no_log=False)
621
- except ValueError as e:
622
- logger.error('无效日期格式', {
623
- '库': db_name, '表': table_name, '列': col_name, '值': value, '错误': str(e)
624
- })
625
- raise ValueError(f"无效日期格式: `{value}` -> {str(e)}")
626
- return str(value)
627
- elif 'varchar' in column_type_lower:
628
- if isinstance(value, str):
629
- return value.replace('\\', '\\\\').replace("'", "\\'")
630
- else:
631
- return str(value)
632
- elif 'text' in column_type_lower:
633
- if isinstance(value, str):
634
- max_length = 65535
635
- if len(value) > max_length:
636
- logger.warning(f'TEXT字符串长度不允许超过 {max_length},已截断', {
637
- '库': db_name, '表': table_name, '列': col_name, '原始值': f'{value[:50]}...', '截断后值': f'{value[:50]}...'
638
- })
639
- value = value[:max_length]
640
- return value.replace('\\', '\\\\').replace("'", "\\'")
641
- else:
642
- return str(value)
643
- elif 'json' in column_type_lower:
644
- return json.dumps(value) if value is not None else None
550
+ if value == '' or pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value)):
551
+ if allow_null:
552
+ return None
645
553
  else:
646
- return value
647
- except (ValueError, TypeError) as e:
648
- logger.error('数据类型转换异常', {
649
- '库': db_name, '表': table_name, '列': col_name, '值': value, '目标类型': column_type, '错误': str(e)
650
- })
651
- raise ValueError(f"转换异常 -> 无法将 `{value}` 的数据类型转为: `{column_type}` -> {str(e)}")
554
+ logger.error("该列不允许为空值", {"库": db_name, "表": table_name, "列": col_name, "值": value})
555
+ raise ValueError("该列不允许为空值")
556
+
557
+ original_value = value
558
+
559
+ # 日期时间类型验证
560
+ if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
561
+ return self._validate_datetime(value, date_type=False, no_log=True)
562
+ elif 'date' in column_type_lower:
563
+ return self._validate_datetime(value, date_type=True, no_log=True)
564
+ # 数值类型验证
565
+ elif 'int' in column_type_lower:
566
+ try:
567
+ return int(value)
568
+ except (ValueError, TypeError):
569
+ logger.error(f"值 `{value}` 无法转换为整数", {"库": db_name, "表": table_name, "列": col_name})
570
+ raise ValueError(f"值 `{value}` 无法转换为整数")
571
+ elif any(t in column_type_lower for t in ['decimal', 'float', 'double']):
572
+ # 百分比字符串处理
573
+ if isinstance(value, str) and '%' in value:
574
+ try:
575
+ # 仅当值是'xx.xx%'格式时才转换
576
+ if re.match(r'^-?\d+(\.\d+)?%$', value.strip()):
577
+ value = float(value.strip().replace('%', '')) / 100
578
+ else:
579
+ # 不符合格式的百分比字符串,保留原始值
580
+ logger.warning("百分比字符串不符合格式,跳过转换", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
581
+ value = original_value
582
+ except (ValueError, TypeError):
583
+ logger.warning("百分比字符串转换失败,保留原始值", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
584
+ value = original_value
585
+
586
+ try:
587
+ # 检查是否为Decimal类型
588
+ if 'decimal' in column_type_lower:
589
+ precision, scale = self._get_decimal_scale(column_type)
590
+ value_decimal = Decimal(str(value))
591
+ # 检查整数部分长度
592
+ if len(value_decimal.as_tuple().digits) - abs(value_decimal.as_tuple().exponent) > precision - scale:
593
+ raise ValueError(f"整数部分超出范围")
594
+ return value_decimal
595
+ else: # float/double
596
+ return float(value)
597
+ except (ValueError, TypeError, InvalidOperation) as e:
598
+ logger.error(f"值 `{value}` 无法转换为数值类型: {e}", {"库": db_name, "表": table_name, "列": col_name})
599
+ raise ValueError(f"值 `{value}` 无法转换为数值类型: {e}")
600
+ # 字符串类型验证
601
+ elif 'varchar' in column_type_lower:
602
+ str_value = str(value)
603
+ try:
604
+ max_len = int(re.search(r'\((\d+)\)', column_type).group(1))
605
+ if len(str_value.encode('utf-8')) > max_len:
606
+ logger.warning(f"列`{col_name}`的值`{str_value}`长度({len(str_value.encode('utf-8'))})超出varchar({max_len})限制,将进行截断", {"库": db_name, "表": table_name})
607
+ return str_value.encode('utf-8')[:max_len].decode('utf-8', 'ignore')
608
+ except (AttributeError, IndexError):
609
+ # 没有找到长度定义,不截断
610
+ pass
611
+ return str_value
612
+
613
+ return value
652
614
 
653
615
  @_execute_with_retry
654
616
  def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
@@ -853,8 +815,11 @@ class MySQLUploader:
853
815
 
854
816
  # 检查是否是百分比字符串
855
817
  if isinstance(value, str):
856
- if value.endswith('%'):
857
- return 'DECIMAL(10,4)' # 百分比统一使用DECIMAL(10,4)
818
+ if '%' in value:
819
+ if re.match(r'^-?\d+(\.\d+)?%$', value.strip()):
820
+ return 'DECIMAL(10, 4)' # 百分比转为小数,使用DECIMAL
821
+ else:
822
+ return 'VARCHAR(255)' # 不符合格式的百分比,视为字符串
858
823
 
859
824
  if isinstance(value, bool):
860
825
  return 'TINYINT(1)'
@@ -898,8 +863,8 @@ class MySQLUploader:
898
863
  return 'MEDIUMTEXT'
899
864
  else:
900
865
  return 'LONGTEXT'
901
- else:
902
- return 'VARCHAR(255)'
866
+
867
+ return 'VARCHAR(255)' # 默认字符串类型
903
868
 
904
869
  def normalize_column_names(self, data: Union[pd.DataFrame, List[Dict[str, Any]]]) -> Union[
905
870
  pd.DataFrame, List[Dict[str, Any]]]:
@@ -967,6 +932,8 @@ class MySQLUploader:
967
932
  data = self.normalize_column_names(data)
968
933
 
969
934
  # set_typ的键清洗
935
+ if not set_typ:
936
+ set_typ = {}
970
937
  set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
971
938
 
972
939
  # 新实现:严格按set_typ顺序过滤,后补充data中有但set_typ没有的列
@@ -1017,7 +984,6 @@ class MySQLUploader:
1017
984
  })
1018
985
  raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1019
986
  prepared_data.append(prepared_row)
1020
-
1021
987
  return prepared_data, filtered_set_typ
1022
988
 
1023
989
  def upload_data(
@@ -1353,7 +1319,7 @@ class MySQLUploader:
1353
1319
  for col in dup_cols:
1354
1320
  col_type = set_typ.get(col, '').lower()
1355
1321
  if col_type.startswith('decimal'):
1356
- scale = self._get_decimal_scale(col_type)
1322
+ _, scale = self._get_decimal_scale(col_type)
1357
1323
  conditions.append(f"ROUND(`{col}`, {scale}) = ROUND(%s, {scale})")
1358
1324
  else:
1359
1325
  conditions.append(f"`{col}` = %s")
@@ -1382,24 +1348,12 @@ class MySQLUploader:
1382
1348
  """
1383
1349
  return sql
1384
1350
 
1385
- def _get_decimal_scale(self, decimal_type: str) -> int:
1386
- """
1387
- 从DECIMAL类型定义中提取小数位数
1388
-
1389
- :param decimal_type: DECIMAL类型字符串,如'DECIMAL(10,4)'
1390
- :return: 小数位数
1391
- :raises: 无显式抛出异常,但解析失败时返回默认值2
1392
- """
1393
- try:
1394
- # 匹配DECIMAL类型中的精度和小数位数
1395
- match = re.match(r'decimal\((\d+),\s*(\d+)\)', decimal_type.lower())
1396
- if match:
1397
- return int(match.group(2))
1398
- except (ValueError, AttributeError, IndexError):
1399
- pass
1400
-
1401
- # 默认返回2位小数
1402
- return 2
1351
+ def _get_decimal_scale(self, decimal_type: str) -> Tuple[int, int]:
1352
+ """从DECIMAL类型字符串中提取精度和标度"""
1353
+ match = re.search(r'\((\d+)\s*,\s*(\d+)\)', decimal_type)
1354
+ if match:
1355
+ return int(match.group(1)), int(match.group(2))
1356
+ return 18, 2 # 默认值
1403
1357
 
1404
1358
  def _prepare_insert_sql(
1405
1359
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.39
3
+ Version: 4.0.41
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=n_fdVNnrFSkcZj0OrtGWK_KfQ2bTndWqHA32sOOig7o,18
2
+ mdbq/__version__.py,sha256=X4VgvOxMoRU-YHIymW2NhCV8yy6pvwj9wlFGdVP8Di8,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/query_data.py,sha256=WtTFMN78jn43Y-nBTPAXhAK56w3wDuv_cj4YtzzGbZk,169797
5
5
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
11
11
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
12
12
  mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
13
13
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
14
- mdbq/mysql/uploader.py,sha256=defQ4xCC3j8an9dWjFI3q_Fec0Irewe2FzBZqFL1GJM,84673
14
+ mdbq/mysql/uploader.py,sha256=PQKFohU32hRyUW3sAgWigbLnBh4h9ZydF4RNC3wNAyo,82640
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
17
17
  mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
28
- mdbq-4.0.39.dist-info/METADATA,sha256=9JtfqN1rvyf6K99crg26QGu--t9ksGomocg1VPyMHp8,364
29
- mdbq-4.0.39.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-4.0.39.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-4.0.39.dist-info/RECORD,,
28
+ mdbq-4.0.41.dist-info/METADATA,sha256=FDyMAo_9iwFqvELSDiPqPM6GhkNj0htFROeZiZYNMcU,364
29
+ mdbq-4.0.41.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-4.0.41.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-4.0.41.dist-info/RECORD,,
File without changes