mdbq 4.0.41__py3-none-any.whl → 4.0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +12 -7
- mdbq/mysql/uploader.py +115 -16
- {mdbq-4.0.41.dist-info → mdbq-4.0.43.dist-info}/METADATA +1 -1
- {mdbq-4.0.41.dist-info → mdbq-4.0.43.dist-info}/RECORD +7 -7
- {mdbq-4.0.41.dist-info → mdbq-4.0.43.dist-info}/WHEEL +0 -0
- {mdbq-4.0.41.dist-info → mdbq-4.0.43.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.43'
|
mdbq/aggregation/query_data.py
CHANGED
@@ -972,6 +972,7 @@ class MysqlDatasQuery:
|
|
972
972
|
'用户年龄': 'varchar(100)',
|
973
973
|
'人群分类': 'varchar(100)',
|
974
974
|
}
|
975
|
+
df.fillna(0, inplace=True)
|
975
976
|
return df, {
|
976
977
|
'db_name': db_name,
|
977
978
|
'table_name': table_name,
|
@@ -1066,7 +1067,7 @@ class MysqlDatasQuery:
|
|
1066
1067
|
)
|
1067
1068
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
1068
1069
|
df['是否品牌词'] = df['词名字_词包名字'].str.contains('万里马|wanlima', regex=True)
|
1069
|
-
df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '')
|
1070
|
+
df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '-')
|
1070
1071
|
dir_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\分类配置文件.xlsx'
|
1071
1072
|
dir_file2 = '/Volumes/时尚事业部/01.运营部/0-电商周报-每周五更新/分类配置文件.xlsx'
|
1072
1073
|
if not os.path.isfile(dir_file):
|
@@ -1888,11 +1889,15 @@ class MysqlDatasQuery:
|
|
1888
1889
|
end_date=end_date,
|
1889
1890
|
projection=projection,
|
1890
1891
|
)
|
1891
|
-
|
1892
|
+
if 'spu_id' in df.columns:
|
1893
|
+
df = df.drop(columns=['spu_id']) # 删除原有 spu_id,避免冲突
|
1892
1894
|
df = pd.merge(df, df_sku, how='left', left_on='跟单sku_id', right_on='sku_id')
|
1893
|
-
df.
|
1894
|
-
|
1895
|
-
|
1895
|
+
df = df.drop(columns=['sku_id']) # 删除 merge 进来的 sku_id
|
1896
|
+
df['spu_id'] = df['spu_id'].fillna(0) # 填充 spu_id 空值
|
1897
|
+
# 调整 spu_id 到第3列
|
1898
|
+
cols = list(df.columns)
|
1899
|
+
cols.insert(3, cols.pop(cols.index('spu_id')))
|
1900
|
+
df = df[cols]
|
1896
1901
|
set_typ = {
|
1897
1902
|
'日期': 'date',
|
1898
1903
|
'店铺名称': 'varchar(100)',
|
@@ -2049,9 +2054,9 @@ class MysqlDatasQuery:
|
|
2049
2054
|
)
|
2050
2055
|
df = pd.merge(df, df_lin, how='left', left_on='计划id', right_on='计划id')
|
2051
2056
|
df['k_是否品牌词'] = df['关键词'].str.contains('万里马|wanlima', regex=True)
|
2052
|
-
df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '')
|
2057
|
+
df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
|
2053
2058
|
df['s_是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
|
2054
|
-
df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '')
|
2059
|
+
df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
|
2055
2060
|
set_typ = {
|
2056
2061
|
'日期': 'date',
|
2057
2062
|
'产品线': 'varchar(100)',
|
mdbq/mysql/uploader.py
CHANGED
@@ -516,6 +516,18 @@ class MySQLUploader:
|
|
516
516
|
:return: 标准化后的日期时间字符串或日期对象
|
517
517
|
:raises ValueError: 当日期格式无效时抛出
|
518
518
|
"""
|
519
|
+
# 处理 pandas Timestamp 对象
|
520
|
+
if hasattr(value, 'strftime'):
|
521
|
+
# 如果是 Timestamp 或 datetime 对象,直接格式化
|
522
|
+
if date_type:
|
523
|
+
return pd.to_datetime(value.strftime('%Y-%m-%d'))
|
524
|
+
else:
|
525
|
+
return value.strftime('%Y-%m-%d %H:%M:%S')
|
526
|
+
|
527
|
+
# 确保 value 是字符串
|
528
|
+
if not isinstance(value, str):
|
529
|
+
value = str(value)
|
530
|
+
|
519
531
|
formats = [
|
520
532
|
'%Y-%m-%d %H:%M:%S',
|
521
533
|
'%Y-%m-%d',
|
@@ -541,18 +553,112 @@ class MySQLUploader:
|
|
541
553
|
logger.error('无效的日期格式', {'值': value})
|
542
554
|
raise ValueError(f"无效的日期格式: `{value}`")
|
543
555
|
|
556
|
+
def _convert_to_int(self, value):
|
557
|
+
"""
|
558
|
+
尝试将value转换为int
|
559
|
+
"""
|
560
|
+
# 处理numpy/pandas标量
|
561
|
+
if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
|
562
|
+
try:
|
563
|
+
value = value.item()
|
564
|
+
except Exception:
|
565
|
+
pass
|
566
|
+
elif hasattr(value, 'value') and not isinstance(value, str):
|
567
|
+
try:
|
568
|
+
extracted_value = value.value
|
569
|
+
if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').isdigit():
|
570
|
+
value = extracted_value
|
571
|
+
except Exception:
|
572
|
+
pass
|
573
|
+
try:
|
574
|
+
return int(value)
|
575
|
+
except (ValueError, TypeError):
|
576
|
+
try:
|
577
|
+
return int(float(value))
|
578
|
+
except (ValueError, TypeError):
|
579
|
+
raise
|
580
|
+
|
581
|
+
def _convert_to_float(self, value):
|
582
|
+
"""
|
583
|
+
尝试将value转换为float,兼容常见数值类型。
|
584
|
+
"""
|
585
|
+
if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
|
586
|
+
try:
|
587
|
+
value = value.item()
|
588
|
+
except Exception:
|
589
|
+
pass
|
590
|
+
elif hasattr(value, 'value') and not isinstance(value, str):
|
591
|
+
try:
|
592
|
+
extracted_value = value.value
|
593
|
+
if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
|
594
|
+
value = extracted_value
|
595
|
+
except Exception:
|
596
|
+
pass
|
597
|
+
return float(value)
|
598
|
+
|
599
|
+
def _convert_to_decimal(self, value):
|
600
|
+
"""
|
601
|
+
尝试将value转换为Decimal,兼容常见数值类型。
|
602
|
+
"""
|
603
|
+
if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
|
604
|
+
try:
|
605
|
+
value = value.item()
|
606
|
+
except Exception:
|
607
|
+
pass
|
608
|
+
elif hasattr(value, 'value') and not isinstance(value, str):
|
609
|
+
try:
|
610
|
+
extracted_value = value.value
|
611
|
+
if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
|
612
|
+
value = extracted_value
|
613
|
+
except Exception:
|
614
|
+
pass
|
615
|
+
return Decimal(str(value))
|
616
|
+
|
617
|
+
def _truncate_str(self, str_value, max_len):
|
618
|
+
"""
|
619
|
+
截断字符串到指定字节长度(utf-8)。
|
620
|
+
"""
|
621
|
+
return str_value.encode('utf-8')[:max_len].decode('utf-8', 'ignore')
|
622
|
+
|
544
623
|
def _validate_value(self, value: Any, column_type: str, allow_null: bool, db_name: str = None, table_name: str = None, col_name: str = None) -> Any:
|
545
624
|
"""
|
546
625
|
根据列类型验证并转换数据值
|
547
626
|
"""
|
548
627
|
column_type_lower = column_type.lower() if column_type else ''
|
549
628
|
# 统一判断None/NaN
|
629
|
+
if value == '':
|
630
|
+
if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
|
631
|
+
return ""
|
550
632
|
if value == '' or pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value)):
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
633
|
+
# 兜底填充值映射
|
634
|
+
fallback_map = {
|
635
|
+
'int': 0,
|
636
|
+
'bigint': 0,
|
637
|
+
'tinyint': 0,
|
638
|
+
'smallint': 0,
|
639
|
+
'mediumint': 0,
|
640
|
+
'decimal': 0.0,
|
641
|
+
'float': 0.0,
|
642
|
+
'double': 0.0,
|
643
|
+
'date': '1970-01-01',
|
644
|
+
'datetime': '1970-01-01 00:00:00',
|
645
|
+
'timestamp': '1970-01-01 00:00:00',
|
646
|
+
'json': '{}',
|
647
|
+
'varchar': 'none',
|
648
|
+
'text': 'none',
|
649
|
+
'char': 'none',
|
650
|
+
'mediumtext': 'none',
|
651
|
+
'longtext': 'none',
|
652
|
+
}
|
653
|
+
fallback = 'none'
|
654
|
+
for typ, val in fallback_map.items():
|
655
|
+
if typ in column_type_lower:
|
656
|
+
fallback = val
|
657
|
+
break
|
658
|
+
if not allow_null:
|
659
|
+
logger.warning("该列不允许为空值", {"库": db_name, "表": table_name, "allow_null": allow_null, "列": col_name, "值": value, "兜底值": fallback})
|
555
660
|
raise ValueError("该列不允许为空值")
|
661
|
+
return fallback
|
556
662
|
|
557
663
|
original_value = value
|
558
664
|
|
@@ -564,7 +670,7 @@ class MySQLUploader:
|
|
564
670
|
# 数值类型验证
|
565
671
|
elif 'int' in column_type_lower:
|
566
672
|
try:
|
567
|
-
return
|
673
|
+
return self._convert_to_int(value)
|
568
674
|
except (ValueError, TypeError):
|
569
675
|
logger.error(f"值 `{value}` 无法转换为整数", {"库": db_name, "表": table_name, "列": col_name})
|
570
676
|
raise ValueError(f"值 `{value}` 无法转换为整数")
|
@@ -572,28 +678,23 @@ class MySQLUploader:
|
|
572
678
|
# 百分比字符串处理
|
573
679
|
if isinstance(value, str) and '%' in value:
|
574
680
|
try:
|
575
|
-
# 仅当值是'xx.xx%'格式时才转换
|
576
681
|
if re.match(r'^-?\d+(\.\d+)?%$', value.strip()):
|
577
682
|
value = float(value.strip().replace('%', '')) / 100
|
578
683
|
else:
|
579
|
-
# 不符合格式的百分比字符串,保留原始值
|
580
684
|
logger.warning("百分比字符串不符合格式,跳过转换", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
|
581
685
|
value = original_value
|
582
686
|
except (ValueError, TypeError):
|
583
687
|
logger.warning("百分比字符串转换失败,保留原始值", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
|
584
688
|
value = original_value
|
585
|
-
|
586
689
|
try:
|
587
|
-
# 检查是否为Decimal类型
|
588
690
|
if 'decimal' in column_type_lower:
|
589
691
|
precision, scale = self._get_decimal_scale(column_type)
|
590
|
-
value_decimal =
|
591
|
-
# 检查整数部分长度
|
692
|
+
value_decimal = self._convert_to_decimal(value)
|
592
693
|
if len(value_decimal.as_tuple().digits) - abs(value_decimal.as_tuple().exponent) > precision - scale:
|
593
694
|
raise ValueError(f"整数部分超出范围")
|
594
695
|
return value_decimal
|
595
|
-
else:
|
596
|
-
return
|
696
|
+
else: # float/double
|
697
|
+
return self._convert_to_float(value)
|
597
698
|
except (ValueError, TypeError, InvalidOperation) as e:
|
598
699
|
logger.error(f"值 `{value}` 无法转换为数值类型: {e}", {"库": db_name, "表": table_name, "列": col_name})
|
599
700
|
raise ValueError(f"值 `{value}` 无法转换为数值类型: {e}")
|
@@ -604,12 +705,10 @@ class MySQLUploader:
|
|
604
705
|
max_len = int(re.search(r'\((\d+)\)', column_type).group(1))
|
605
706
|
if len(str_value.encode('utf-8')) > max_len:
|
606
707
|
logger.warning(f"列`{col_name}`的值`{str_value}`长度({len(str_value.encode('utf-8'))})超出varchar({max_len})限制,将进行截断", {"库": db_name, "表": table_name})
|
607
|
-
return
|
708
|
+
return self._truncate_str(str_value, max_len)
|
608
709
|
except (AttributeError, IndexError):
|
609
|
-
# 没有找到长度定义,不截断
|
610
710
|
pass
|
611
711
|
return str_value
|
612
|
-
|
613
712
|
return value
|
614
713
|
|
615
714
|
@_execute_with_retry
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=TAtw5Bwp1oD8rSk5e0bqF2jRzZmluFqjX_BvfIKAeJg,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/query_data.py,sha256=
|
4
|
+
mdbq/aggregation/query_data.py,sha256=_hm98oELIZvKmvanv_qpC3C9cw42up911z0o-_uo6qk,170018
|
5
5
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
6
6
|
mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
|
7
7
|
mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
|
|
11
11
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
12
12
|
mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
|
13
13
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/uploader.py,sha256=LPfYEj7ywoAynY2Nl9gg0IurgIWd_bTwkda2ifD1TeE,86443
|
15
15
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
17
17
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
|
28
|
-
mdbq-4.0.
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
31
|
-
mdbq-4.0.
|
28
|
+
mdbq-4.0.43.dist-info/METADATA,sha256=QlDEJTpAJjJDiP86YJlM-LYDWDKyCAB7g9GzgiUAap4,364
|
29
|
+
mdbq-4.0.43.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-4.0.43.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-4.0.43.dist-info/RECORD,,
|
File without changes
|
File without changes
|