mdbq 4.0.41__py3-none-any.whl → 4.0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.41'
1
+ VERSION = '4.0.43'
@@ -972,6 +972,7 @@ class MysqlDatasQuery:
972
972
  '用户年龄': 'varchar(100)',
973
973
  '人群分类': 'varchar(100)',
974
974
  }
975
+ df.fillna(0, inplace=True)
975
976
  return df, {
976
977
  'db_name': db_name,
977
978
  'table_name': table_name,
@@ -1066,7 +1067,7 @@ class MysqlDatasQuery:
1066
1067
  )
1067
1068
  df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
1068
1069
  df['是否品牌词'] = df['词名字_词包名字'].str.contains('万里马|wanlima', regex=True)
1069
- df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '')
1070
+ df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '-')
1070
1071
  dir_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\分类配置文件.xlsx'
1071
1072
  dir_file2 = '/Volumes/时尚事业部/01.运营部/0-电商周报-每周五更新/分类配置文件.xlsx'
1072
1073
  if not os.path.isfile(dir_file):
@@ -1888,11 +1889,15 @@ class MysqlDatasQuery:
1888
1889
  end_date=end_date,
1889
1890
  projection=projection,
1890
1891
  )
1891
- df.pop('spu_id') # 删除推广表的 spu id
1892
+ if 'spu_id' in df.columns:
1893
+ df = df.drop(columns=['spu_id']) # 删除原有 spu_id,避免冲突
1892
1894
  df = pd.merge(df, df_sku, how='left', left_on='跟单sku_id', right_on='sku_id')
1893
- df.pop('sku_id') # 删除聚合后合并进来的 sku id,实际使用 跟单sku_id
1894
- p = df.pop('spu_id')
1895
- df.insert(loc=3, column='spu_id', value=p)
1895
+ df = df.drop(columns=['sku_id']) # 删除 merge 进来的 sku_id
1896
+ df['spu_id'] = df['spu_id'].fillna(0) # 填充 spu_id 空值
1897
+ # 调整 spu_id 到第3列
1898
+ cols = list(df.columns)
1899
+ cols.insert(3, cols.pop(cols.index('spu_id')))
1900
+ df = df[cols]
1896
1901
  set_typ = {
1897
1902
  '日期': 'date',
1898
1903
  '店铺名称': 'varchar(100)',
@@ -2049,9 +2054,9 @@ class MysqlDatasQuery:
2049
2054
  )
2050
2055
  df = pd.merge(df, df_lin, how='left', left_on='计划id', right_on='计划id')
2051
2056
  df['k_是否品牌词'] = df['关键词'].str.contains('万里马|wanlima', regex=True)
2052
- df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '')
2057
+ df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
2053
2058
  df['s_是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
2054
- df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '')
2059
+ df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
2055
2060
  set_typ = {
2056
2061
  '日期': 'date',
2057
2062
  '产品线': 'varchar(100)',
mdbq/mysql/uploader.py CHANGED
@@ -516,6 +516,18 @@ class MySQLUploader:
516
516
  :return: 标准化后的日期时间字符串或日期对象
517
517
  :raises ValueError: 当日期格式无效时抛出
518
518
  """
519
+ # 处理 pandas Timestamp 对象
520
+ if hasattr(value, 'strftime'):
521
+ # 如果是 Timestamp 或 datetime 对象,直接格式化
522
+ if date_type:
523
+ return pd.to_datetime(value.strftime('%Y-%m-%d'))
524
+ else:
525
+ return value.strftime('%Y-%m-%d %H:%M:%S')
526
+
527
+ # 确保 value 是字符串
528
+ if not isinstance(value, str):
529
+ value = str(value)
530
+
519
531
  formats = [
520
532
  '%Y-%m-%d %H:%M:%S',
521
533
  '%Y-%m-%d',
@@ -541,18 +553,112 @@ class MySQLUploader:
541
553
  logger.error('无效的日期格式', {'值': value})
542
554
  raise ValueError(f"无效的日期格式: `{value}`")
543
555
 
556
+ def _convert_to_int(self, value):
557
+ """
558
+ 尝试将value转换为int
559
+ """
560
+ # 处理numpy/pandas标量
561
+ if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
562
+ try:
563
+ value = value.item()
564
+ except Exception:
565
+ pass
566
+ elif hasattr(value, 'value') and not isinstance(value, str):
567
+ try:
568
+ extracted_value = value.value
569
+ if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').isdigit():
570
+ value = extracted_value
571
+ except Exception:
572
+ pass
573
+ try:
574
+ return int(value)
575
+ except (ValueError, TypeError):
576
+ try:
577
+ return int(float(value))
578
+ except (ValueError, TypeError):
579
+ raise
580
+
581
+ def _convert_to_float(self, value):
582
+ """
583
+ 尝试将value转换为float,兼容常见数值类型。
584
+ """
585
+ if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
586
+ try:
587
+ value = value.item()
588
+ except Exception:
589
+ pass
590
+ elif hasattr(value, 'value') and not isinstance(value, str):
591
+ try:
592
+ extracted_value = value.value
593
+ if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
594
+ value = extracted_value
595
+ except Exception:
596
+ pass
597
+ return float(value)
598
+
599
+ def _convert_to_decimal(self, value):
600
+ """
601
+ 尝试将value转换为Decimal,兼容常见数值类型。
602
+ """
603
+ if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
604
+ try:
605
+ value = value.item()
606
+ except Exception:
607
+ pass
608
+ elif hasattr(value, 'value') and not isinstance(value, str):
609
+ try:
610
+ extracted_value = value.value
611
+ if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
612
+ value = extracted_value
613
+ except Exception:
614
+ pass
615
+ return Decimal(str(value))
616
+
617
+ def _truncate_str(self, str_value, max_len):
618
+ """
619
+ 截断字符串到指定字节长度(utf-8)。
620
+ """
621
+ return str_value.encode('utf-8')[:max_len].decode('utf-8', 'ignore')
622
+
544
623
  def _validate_value(self, value: Any, column_type: str, allow_null: bool, db_name: str = None, table_name: str = None, col_name: str = None) -> Any:
545
624
  """
546
625
  根据列类型验证并转换数据值
547
626
  """
548
627
  column_type_lower = column_type.lower() if column_type else ''
549
628
  # 统一判断None/NaN
629
+ if value == '':
630
+ if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
631
+ return ""
550
632
  if value == '' or pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value)):
551
- if allow_null:
552
- return None
553
- else:
554
- logger.error("该列不允许为空值", {"库": db_name, "表": table_name, "列": col_name, "值": value})
633
+ # 兜底填充值映射
634
+ fallback_map = {
635
+ 'int': 0,
636
+ 'bigint': 0,
637
+ 'tinyint': 0,
638
+ 'smallint': 0,
639
+ 'mediumint': 0,
640
+ 'decimal': 0.0,
641
+ 'float': 0.0,
642
+ 'double': 0.0,
643
+ 'date': '1970-01-01',
644
+ 'datetime': '1970-01-01 00:00:00',
645
+ 'timestamp': '1970-01-01 00:00:00',
646
+ 'json': '{}',
647
+ 'varchar': 'none',
648
+ 'text': 'none',
649
+ 'char': 'none',
650
+ 'mediumtext': 'none',
651
+ 'longtext': 'none',
652
+ }
653
+ fallback = 'none'
654
+ for typ, val in fallback_map.items():
655
+ if typ in column_type_lower:
656
+ fallback = val
657
+ break
658
+ if not allow_null:
659
+ logger.warning("该列不允许为空值", {"库": db_name, "表": table_name, "allow_null": allow_null, "列": col_name, "值": value, "兜底值": fallback})
555
660
  raise ValueError("该列不允许为空值")
661
+ return fallback
556
662
 
557
663
  original_value = value
558
664
 
@@ -564,7 +670,7 @@ class MySQLUploader:
564
670
  # 数值类型验证
565
671
  elif 'int' in column_type_lower:
566
672
  try:
567
- return int(value)
673
+ return self._convert_to_int(value)
568
674
  except (ValueError, TypeError):
569
675
  logger.error(f"值 `{value}` 无法转换为整数", {"库": db_name, "表": table_name, "列": col_name})
570
676
  raise ValueError(f"值 `{value}` 无法转换为整数")
@@ -572,28 +678,23 @@ class MySQLUploader:
572
678
  # 百分比字符串处理
573
679
  if isinstance(value, str) and '%' in value:
574
680
  try:
575
- # 仅当值是'xx.xx%'格式时才转换
576
681
  if re.match(r'^-?\d+(\.\d+)?%$', value.strip()):
577
682
  value = float(value.strip().replace('%', '')) / 100
578
683
  else:
579
- # 不符合格式的百分比字符串,保留原始值
580
684
  logger.warning("百分比字符串不符合格式,跳过转换", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
581
685
  value = original_value
582
686
  except (ValueError, TypeError):
583
687
  logger.warning("百分比字符串转换失败,保留原始值", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
584
688
  value = original_value
585
-
586
689
  try:
587
- # 检查是否为Decimal类型
588
690
  if 'decimal' in column_type_lower:
589
691
  precision, scale = self._get_decimal_scale(column_type)
590
- value_decimal = Decimal(str(value))
591
- # 检查整数部分长度
692
+ value_decimal = self._convert_to_decimal(value)
592
693
  if len(value_decimal.as_tuple().digits) - abs(value_decimal.as_tuple().exponent) > precision - scale:
593
694
  raise ValueError(f"整数部分超出范围")
594
695
  return value_decimal
595
- else: # float/double
596
- return float(value)
696
+ else: # float/double
697
+ return self._convert_to_float(value)
597
698
  except (ValueError, TypeError, InvalidOperation) as e:
598
699
  logger.error(f"值 `{value}` 无法转换为数值类型: {e}", {"库": db_name, "表": table_name, "列": col_name})
599
700
  raise ValueError(f"值 `{value}` 无法转换为数值类型: {e}")
@@ -604,12 +705,10 @@ class MySQLUploader:
604
705
  max_len = int(re.search(r'\((\d+)\)', column_type).group(1))
605
706
  if len(str_value.encode('utf-8')) > max_len:
606
707
  logger.warning(f"列`{col_name}`的值`{str_value}`长度({len(str_value.encode('utf-8'))})超出varchar({max_len})限制,将进行截断", {"库": db_name, "表": table_name})
607
- return str_value.encode('utf-8')[:max_len].decode('utf-8', 'ignore')
708
+ return self._truncate_str(str_value, max_len)
608
709
  except (AttributeError, IndexError):
609
- # 没有找到长度定义,不截断
610
710
  pass
611
711
  return str_value
612
-
613
712
  return value
614
713
 
615
714
  @_execute_with_retry
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.41
3
+ Version: 4.0.43
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,7 +1,7 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=X4VgvOxMoRU-YHIymW2NhCV8yy6pvwj9wlFGdVP8Di8,18
2
+ mdbq/__version__.py,sha256=TAtw5Bwp1oD8rSk5e0bqF2jRzZmluFqjX_BvfIKAeJg,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/query_data.py,sha256=WtTFMN78jn43Y-nBTPAXhAK56w3wDuv_cj4YtzzGbZk,169797
4
+ mdbq/aggregation/query_data.py,sha256=_hm98oELIZvKmvanv_qpC3C9cw42up911z0o-_uo6qk,170018
5
5
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
6
6
  mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
7
7
  mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
11
11
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
12
12
  mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
13
13
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
14
- mdbq/mysql/uploader.py,sha256=PQKFohU32hRyUW3sAgWigbLnBh4h9ZydF4RNC3wNAyo,82640
14
+ mdbq/mysql/uploader.py,sha256=LPfYEj7ywoAynY2Nl9gg0IurgIWd_bTwkda2ifD1TeE,86443
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
17
17
  mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
28
- mdbq-4.0.41.dist-info/METADATA,sha256=FDyMAo_9iwFqvELSDiPqPM6GhkNj0htFROeZiZYNMcU,364
29
- mdbq-4.0.41.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-4.0.41.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-4.0.41.dist-info/RECORD,,
28
+ mdbq-4.0.43.dist-info/METADATA,sha256=QlDEJTpAJjJDiP86YJlM-LYDWDKyCAB7g9GzgiUAap4,364
29
+ mdbq-4.0.43.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-4.0.43.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-4.0.43.dist-info/RECORD,,
File without changes