mdbq 4.0.40__py3-none-any.whl → 4.0.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +81 -25
- {mdbq-4.0.40.dist-info → mdbq-4.0.42.dist-info}/METADATA +1 -1
- {mdbq-4.0.40.dist-info → mdbq-4.0.42.dist-info}/RECORD +6 -6
- {mdbq-4.0.40.dist-info → mdbq-4.0.42.dist-info}/WHEEL +0 -0
- {mdbq-4.0.40.dist-info → mdbq-4.0.42.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.42'
|
mdbq/mysql/uploader.py
CHANGED
@@ -516,6 +516,18 @@ class MySQLUploader:
|
|
516
516
|
:return: 标准化后的日期时间字符串或日期对象
|
517
517
|
:raises ValueError: 当日期格式无效时抛出
|
518
518
|
"""
|
519
|
+
# 处理 pandas Timestamp 对象
|
520
|
+
if hasattr(value, 'strftime'):
|
521
|
+
# 如果是 Timestamp 或 datetime 对象,直接格式化
|
522
|
+
if date_type:
|
523
|
+
return pd.to_datetime(value.strftime('%Y-%m-%d'))
|
524
|
+
else:
|
525
|
+
return value.strftime('%Y-%m-%d %H:%M:%S')
|
526
|
+
|
527
|
+
# 确保 value 是字符串
|
528
|
+
if not isinstance(value, str):
|
529
|
+
value = str(value)
|
530
|
+
|
519
531
|
formats = [
|
520
532
|
'%Y-%m-%d %H:%M:%S',
|
521
533
|
'%Y-%m-%d',
|
@@ -525,8 +537,7 @@ class MySQLUploader:
|
|
525
537
|
'%Y-%m-%dT%H:%M:%S',
|
526
538
|
'%Y-%m-%d %H:%M:%S.%f',
|
527
539
|
'%Y/%-m/%-d', # 2023/1/8
|
528
|
-
'%Y
|
529
|
-
'%Y-%-m-%-d' # 2023-1-8
|
540
|
+
'%Y-%-m-%-d', # 2023-01-8
|
530
541
|
]
|
531
542
|
for fmt in formats:
|
532
543
|
try:
|
@@ -565,6 +576,24 @@ class MySQLUploader:
|
|
565
576
|
# 数值类型验证
|
566
577
|
elif 'int' in column_type_lower:
|
567
578
|
try:
|
579
|
+
# 安全地处理各种数值类型
|
580
|
+
if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
|
581
|
+
# numpy 标量类型
|
582
|
+
try:
|
583
|
+
value = value.item()
|
584
|
+
except (ValueError, TypeError):
|
585
|
+
# 如果不是标量,保持原值
|
586
|
+
pass
|
587
|
+
elif hasattr(value, 'value') and not isinstance(value, str):
|
588
|
+
# pandas 或其他有 value 属性的对象
|
589
|
+
try:
|
590
|
+
extracted_value = value.value
|
591
|
+
# 验证提取的值是数值类型
|
592
|
+
if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').isdigit():
|
593
|
+
value = extracted_value
|
594
|
+
except (ValueError, TypeError, AttributeError):
|
595
|
+
# 如果提取失败,保持原值
|
596
|
+
pass
|
568
597
|
return int(value)
|
569
598
|
except (ValueError, TypeError):
|
570
599
|
logger.error(f"值 `{value}` 无法转换为整数", {"库": db_name, "表": table_name, "列": col_name})
|
@@ -588,12 +617,48 @@ class MySQLUploader:
|
|
588
617
|
# 检查是否为Decimal类型
|
589
618
|
if 'decimal' in column_type_lower:
|
590
619
|
precision, scale = self._get_decimal_scale(column_type)
|
620
|
+
# 安全地处理各种数值类型
|
621
|
+
if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
|
622
|
+
# numpy 标量类型
|
623
|
+
try:
|
624
|
+
value = value.item()
|
625
|
+
except (ValueError, TypeError):
|
626
|
+
# 如果不是标量,保持原值
|
627
|
+
pass
|
628
|
+
elif hasattr(value, 'value') and not isinstance(value, str):
|
629
|
+
# pandas 或其他有 value 属性的对象
|
630
|
+
try:
|
631
|
+
extracted_value = value.value
|
632
|
+
# 验证提取的值是数值类型
|
633
|
+
if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
|
634
|
+
value = extracted_value
|
635
|
+
except (ValueError, TypeError, AttributeError):
|
636
|
+
# 如果提取失败,保持原值
|
637
|
+
pass
|
591
638
|
value_decimal = Decimal(str(value))
|
592
639
|
# 检查整数部分长度
|
593
640
|
if len(value_decimal.as_tuple().digits) - abs(value_decimal.as_tuple().exponent) > precision - scale:
|
594
641
|
raise ValueError(f"整数部分超出范围")
|
595
642
|
return value_decimal
|
596
643
|
else: # float/double
|
644
|
+
# 安全地处理各种数值类型
|
645
|
+
if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
|
646
|
+
# numpy 标量类型
|
647
|
+
try:
|
648
|
+
value = value.item()
|
649
|
+
except (ValueError, TypeError):
|
650
|
+
# 如果不是标量,保持原值
|
651
|
+
pass
|
652
|
+
elif hasattr(value, 'value') and not isinstance(value, str):
|
653
|
+
# pandas 或其他有 value 属性的对象
|
654
|
+
try:
|
655
|
+
extracted_value = value.value
|
656
|
+
# 验证提取的值是数值类型
|
657
|
+
if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
|
658
|
+
value = extracted_value
|
659
|
+
except (ValueError, TypeError, AttributeError):
|
660
|
+
# 如果提取失败,保持原值
|
661
|
+
pass
|
597
662
|
return float(value)
|
598
663
|
except (ValueError, TypeError, InvalidOperation) as e:
|
599
664
|
logger.error(f"值 `{value}` 无法转换为数值类型: {e}", {"库": db_name, "表": table_name, "列": col_name})
|
@@ -816,8 +881,11 @@ class MySQLUploader:
|
|
816
881
|
|
817
882
|
# 检查是否是百分比字符串
|
818
883
|
if isinstance(value, str):
|
819
|
-
if
|
820
|
-
|
884
|
+
if '%' in value:
|
885
|
+
if re.match(r'^-?\d+(\.\d+)?%$', value.strip()):
|
886
|
+
return 'DECIMAL(10, 4)' # 百分比转为小数,使用DECIMAL
|
887
|
+
else:
|
888
|
+
return 'VARCHAR(255)' # 不符合格式的百分比,视为字符串
|
821
889
|
|
822
890
|
if isinstance(value, bool):
|
823
891
|
return 'TINYINT(1)'
|
@@ -861,8 +929,8 @@ class MySQLUploader:
|
|
861
929
|
return 'MEDIUMTEXT'
|
862
930
|
else:
|
863
931
|
return 'LONGTEXT'
|
864
|
-
|
865
|
-
|
932
|
+
|
933
|
+
return 'VARCHAR(255)' # 默认字符串类型
|
866
934
|
|
867
935
|
def normalize_column_names(self, data: Union[pd.DataFrame, List[Dict[str, Any]]]) -> Union[
|
868
936
|
pd.DataFrame, List[Dict[str, Any]]]:
|
@@ -1317,7 +1385,7 @@ class MySQLUploader:
|
|
1317
1385
|
for col in dup_cols:
|
1318
1386
|
col_type = set_typ.get(col, '').lower()
|
1319
1387
|
if col_type.startswith('decimal'):
|
1320
|
-
scale = self._get_decimal_scale(col_type)
|
1388
|
+
_, scale = self._get_decimal_scale(col_type)
|
1321
1389
|
conditions.append(f"ROUND(`{col}`, {scale}) = ROUND(%s, {scale})")
|
1322
1390
|
else:
|
1323
1391
|
conditions.append(f"`{col}` = %s")
|
@@ -1346,24 +1414,12 @@ class MySQLUploader:
|
|
1346
1414
|
"""
|
1347
1415
|
return sql
|
1348
1416
|
|
1349
|
-
def _get_decimal_scale(self, decimal_type: str) -> int:
|
1350
|
-
"""
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
:raises: 无显式抛出异常,但解析失败时返回默认值2
|
1356
|
-
"""
|
1357
|
-
try:
|
1358
|
-
# 匹配DECIMAL类型中的精度和小数位数
|
1359
|
-
match = re.match(r'decimal\((\d+),\s*(\d+)\)', decimal_type.lower())
|
1360
|
-
if match:
|
1361
|
-
return int(match.group(2))
|
1362
|
-
except (ValueError, AttributeError, IndexError):
|
1363
|
-
pass
|
1364
|
-
|
1365
|
-
# 默认返回2位小数
|
1366
|
-
return 2
|
1417
|
+
def _get_decimal_scale(self, decimal_type: str) -> Tuple[int, int]:
|
1418
|
+
"""从DECIMAL类型字符串中提取精度和标度"""
|
1419
|
+
match = re.search(r'\((\d+)\s*,\s*(\d+)\)', decimal_type)
|
1420
|
+
if match:
|
1421
|
+
return int(match.group(1)), int(match.group(2))
|
1422
|
+
return 18, 2 # 默认值
|
1367
1423
|
|
1368
1424
|
def _prepare_insert_sql(
|
1369
1425
|
self,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=oNzbjnM_gPKVuYbTskX3n8fcwlgDsOEOgDiMJaHcsIU,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/query_data.py,sha256=WtTFMN78jn43Y-nBTPAXhAK56w3wDuv_cj4YtzzGbZk,169797
|
5
5
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
|
|
11
11
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
12
12
|
mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
|
13
13
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/uploader.py,sha256=d2ocnCTyBkAJ4LjFLOatb0VgkJ28g70fPXJropRlH1s,86370
|
15
15
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
17
17
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
|
28
|
-
mdbq-4.0.
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
31
|
-
mdbq-4.0.
|
28
|
+
mdbq-4.0.42.dist-info/METADATA,sha256=XEoFYTnRfRqAv-rIatsbqCFRTXAP9vP0Zv6g-KzIOWU,364
|
29
|
+
mdbq-4.0.42.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-4.0.42.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-4.0.42.dist-info/RECORD,,
|
File without changes
|
File without changes
|