mdbq 4.0.42__py3-none-any.whl → 4.0.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.0.42'
1
+ VERSION = '4.0.44'
@@ -972,6 +972,7 @@ class MysqlDatasQuery:
972
972
  '用户年龄': 'varchar(100)',
973
973
  '人群分类': 'varchar(100)',
974
974
  }
975
+ df.fillna(0, inplace=True)
975
976
  return df, {
976
977
  'db_name': db_name,
977
978
  'table_name': table_name,
@@ -1066,7 +1067,7 @@ class MysqlDatasQuery:
1066
1067
  )
1067
1068
  df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
1068
1069
  df['是否品牌词'] = df['词名字_词包名字'].str.contains('万里马|wanlima', regex=True)
1069
- df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '')
1070
+ df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '-')
1070
1071
  dir_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\分类配置文件.xlsx'
1071
1072
  dir_file2 = '/Volumes/时尚事业部/01.运营部/0-电商周报-每周五更新/分类配置文件.xlsx'
1072
1073
  if not os.path.isfile(dir_file):
@@ -1514,8 +1515,6 @@ class MysqlDatasQuery:
1514
1515
  projection=projection,
1515
1516
  )
1516
1517
  df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
1517
- df = df[df['日期'] == pd.to_datetime('2024-12-12')]
1518
-
1519
1518
  df_set['商品id'] = df_set['商品id'].astype('int64')
1520
1519
  df['商品id'] = df['商品id'].astype('int64')
1521
1520
  df_set.sort_values('商品id', ascending=False, ignore_index=True, inplace=True)
@@ -1888,11 +1887,15 @@ class MysqlDatasQuery:
1888
1887
  end_date=end_date,
1889
1888
  projection=projection,
1890
1889
  )
1891
- df.pop('spu_id') # 删除推广表的 spu id
1890
+ if 'spu_id' in df.columns:
1891
+ df = df.drop(columns=['spu_id']) # 删除原有 spu_id,避免冲突
1892
1892
  df = pd.merge(df, df_sku, how='left', left_on='跟单sku_id', right_on='sku_id')
1893
- df.pop('sku_id') # 删除聚合后合并进来的 sku id,实际使用 跟单sku_id
1894
- p = df.pop('spu_id')
1895
- df.insert(loc=3, column='spu_id', value=p)
1893
+ df = df.drop(columns=['sku_id']) # 删除 merge 进来的 sku_id
1894
+ df['spu_id'] = df['spu_id'].fillna(0) # 填充 spu_id 空值
1895
+ # 调整 spu_id 到第3列
1896
+ cols = list(df.columns)
1897
+ cols.insert(3, cols.pop(cols.index('spu_id')))
1898
+ df = df[cols]
1896
1899
  set_typ = {
1897
1900
  '日期': 'date',
1898
1901
  '店铺名称': 'varchar(100)',
@@ -2049,9 +2052,9 @@ class MysqlDatasQuery:
2049
2052
  )
2050
2053
  df = pd.merge(df, df_lin, how='left', left_on='计划id', right_on='计划id')
2051
2054
  df['k_是否品牌词'] = df['关键词'].str.contains('万里马|wanlima', regex=True)
2052
- df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '')
2055
+ df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
2053
2056
  df['s_是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
2054
- df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '')
2057
+ df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
2055
2058
  set_typ = {
2056
2059
  '日期': 'date',
2057
2060
  '产品线': 'varchar(100)',
@@ -3737,6 +3740,7 @@ def query3(months=1, download_manager=None):
3737
3740
  sdq = MysqlDatasQuery(download_manager=download_manager) # 实例化数据处理类
3738
3741
  sdq.months = months # 设置数据周期, 1 表示近 2 个月
3739
3742
  sdq.spph(db_name='聚合数据', table_name='天猫_商品排行')
3743
+ sdq.item_up(db_name='聚合数据', table_name='淘宝店铺货品')
3740
3744
 
3741
3745
 
3742
3746
  def main(months=3):
@@ -3774,4 +3778,4 @@ if __name__ == '__main__':
3774
3778
  )
3775
3779
  sdq = MysqlDatasQuery(download_manager=download_manager)
3776
3780
  sdq.months = 3
3777
- sdq.global_insights(db_name='聚合数据', table_name='全域洞察')
3781
+ sdq.item_up(db_name='聚合数据', table_name='淘宝店铺货品')
mdbq/mysql/uploader.py CHANGED
@@ -553,18 +553,112 @@ class MySQLUploader:
553
553
  logger.error('无效的日期格式', {'值': value})
554
554
  raise ValueError(f"无效的日期格式: `{value}`")
555
555
 
556
+ def _convert_to_int(self, value):
557
+ """
558
+ 尝试将value转换为int
559
+ """
560
+ # 处理numpy/pandas标量
561
+ if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
562
+ try:
563
+ value = value.item()
564
+ except Exception:
565
+ pass
566
+ elif hasattr(value, 'value') and not isinstance(value, str):
567
+ try:
568
+ extracted_value = value.value
569
+ if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').isdigit():
570
+ value = extracted_value
571
+ except Exception:
572
+ pass
573
+ try:
574
+ return int(value)
575
+ except (ValueError, TypeError):
576
+ try:
577
+ return int(float(value))
578
+ except (ValueError, TypeError):
579
+ raise
580
+
581
+ def _convert_to_float(self, value):
582
+ """
583
+ 尝试将value转换为float,兼容常见数值类型。
584
+ """
585
+ if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
586
+ try:
587
+ value = value.item()
588
+ except Exception:
589
+ pass
590
+ elif hasattr(value, 'value') and not isinstance(value, str):
591
+ try:
592
+ extracted_value = value.value
593
+ if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
594
+ value = extracted_value
595
+ except Exception:
596
+ pass
597
+ return float(value)
598
+
599
+ def _convert_to_decimal(self, value):
600
+ """
601
+ 尝试将value转换为Decimal,兼容常见数值类型。
602
+ """
603
+ if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
604
+ try:
605
+ value = value.item()
606
+ except Exception:
607
+ pass
608
+ elif hasattr(value, 'value') and not isinstance(value, str):
609
+ try:
610
+ extracted_value = value.value
611
+ if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
612
+ value = extracted_value
613
+ except Exception:
614
+ pass
615
+ return Decimal(str(value))
616
+
617
+ def _truncate_str(self, str_value, max_len):
618
+ """
619
+ 截断字符串到指定字节长度(utf-8)。
620
+ """
621
+ return str_value.encode('utf-8')[:max_len].decode('utf-8', 'ignore')
622
+
556
623
  def _validate_value(self, value: Any, column_type: str, allow_null: bool, db_name: str = None, table_name: str = None, col_name: str = None) -> Any:
557
624
  """
558
625
  根据列类型验证并转换数据值
559
626
  """
560
627
  column_type_lower = column_type.lower() if column_type else ''
561
628
  # 统一判断None/NaN
629
+ if value == '':
630
+ if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
631
+ return ""
562
632
  if value == '' or pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value)):
563
- if allow_null:
564
- return None
565
- else:
566
- logger.error("该列不允许为空值", {"库": db_name, "表": table_name, "列": col_name, "值": value})
633
+ # 兜底填充值映射
634
+ fallback_map = {
635
+ 'int': 0,
636
+ 'bigint': 0,
637
+ 'tinyint': 0,
638
+ 'smallint': 0,
639
+ 'mediumint': 0,
640
+ 'decimal': 0.0,
641
+ 'float': 0.0,
642
+ 'double': 0.0,
643
+ 'date': '1970-01-01',
644
+ 'datetime': '1970-01-01 00:00:00',
645
+ 'timestamp': '1970-01-01 00:00:00',
646
+ 'json': '{}',
647
+ 'varchar': 'none',
648
+ 'text': 'none',
649
+ 'char': 'none',
650
+ 'mediumtext': 'none',
651
+ 'longtext': 'none',
652
+ }
653
+ fallback = 'none'
654
+ for typ, val in fallback_map.items():
655
+ if typ in column_type_lower:
656
+ fallback = val
657
+ break
658
+ if not allow_null:
659
+ logger.warning("该列不允许为空值", {"库": db_name, "表": table_name, "allow_null": allow_null, "列": col_name, "值": value, "兜底值": fallback})
567
660
  raise ValueError("该列不允许为空值")
661
+ return fallback
568
662
 
569
663
  original_value = value
570
664
 
@@ -576,25 +670,7 @@ class MySQLUploader:
576
670
  # 数值类型验证
577
671
  elif 'int' in column_type_lower:
578
672
  try:
579
- # 安全地处理各种数值类型
580
- if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
581
- # numpy 标量类型
582
- try:
583
- value = value.item()
584
- except (ValueError, TypeError):
585
- # 如果不是标量,保持原值
586
- pass
587
- elif hasattr(value, 'value') and not isinstance(value, str):
588
- # pandas 或其他有 value 属性的对象
589
- try:
590
- extracted_value = value.value
591
- # 验证提取的值是数值类型
592
- if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').isdigit():
593
- value = extracted_value
594
- except (ValueError, TypeError, AttributeError):
595
- # 如果提取失败,保持原值
596
- pass
597
- return int(value)
673
+ return self._convert_to_int(value)
598
674
  except (ValueError, TypeError):
599
675
  logger.error(f"值 `{value}` 无法转换为整数", {"库": db_name, "表": table_name, "列": col_name})
600
676
  raise ValueError(f"值 `{value}` 无法转换为整数")
@@ -602,64 +678,23 @@ class MySQLUploader:
602
678
  # 百分比字符串处理
603
679
  if isinstance(value, str) and '%' in value:
604
680
  try:
605
- # 仅当值是'xx.xx%'格式时才转换
606
681
  if re.match(r'^-?\d+(\.\d+)?%$', value.strip()):
607
682
  value = float(value.strip().replace('%', '')) / 100
608
683
  else:
609
- # 不符合格式的百分比字符串,保留原始值
610
684
  logger.warning("百分比字符串不符合格式,跳过转换", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
611
685
  value = original_value
612
686
  except (ValueError, TypeError):
613
687
  logger.warning("百分比字符串转换失败,保留原始值", {"库": db_name, "表": table_name, "列": col_name, "原始": original_value})
614
688
  value = original_value
615
-
616
689
  try:
617
- # 检查是否为Decimal类型
618
690
  if 'decimal' in column_type_lower:
619
691
  precision, scale = self._get_decimal_scale(column_type)
620
- # 安全地处理各种数值类型
621
- if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
622
- # numpy 标量类型
623
- try:
624
- value = value.item()
625
- except (ValueError, TypeError):
626
- # 如果不是标量,保持原值
627
- pass
628
- elif hasattr(value, 'value') and not isinstance(value, str):
629
- # pandas 或其他有 value 属性的对象
630
- try:
631
- extracted_value = value.value
632
- # 验证提取的值是数值类型
633
- if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
634
- value = extracted_value
635
- except (ValueError, TypeError, AttributeError):
636
- # 如果提取失败,保持原值
637
- pass
638
- value_decimal = Decimal(str(value))
639
- # 检查整数部分长度
692
+ value_decimal = self._convert_to_decimal(value)
640
693
  if len(value_decimal.as_tuple().digits) - abs(value_decimal.as_tuple().exponent) > precision - scale:
641
694
  raise ValueError(f"整数部分超出范围")
642
695
  return value_decimal
643
- else: # float/double
644
- # 安全地处理各种数值类型
645
- if hasattr(value, 'item') and callable(getattr(value, 'item', None)):
646
- # numpy 标量类型
647
- try:
648
- value = value.item()
649
- except (ValueError, TypeError):
650
- # 如果不是标量,保持原值
651
- pass
652
- elif hasattr(value, 'value') and not isinstance(value, str):
653
- # pandas 或其他有 value 属性的对象
654
- try:
655
- extracted_value = value.value
656
- # 验证提取的值是数值类型
657
- if isinstance(extracted_value, (int, float, str)) and str(extracted_value).replace('.', '').replace('-', '').replace('e', '').replace('E', '').isdigit():
658
- value = extracted_value
659
- except (ValueError, TypeError, AttributeError):
660
- # 如果提取失败,保持原值
661
- pass
662
- return float(value)
696
+ else: # float/double
697
+ return self._convert_to_float(value)
663
698
  except (ValueError, TypeError, InvalidOperation) as e:
664
699
  logger.error(f"值 `{value}` 无法转换为数值类型: {e}", {"库": db_name, "表": table_name, "列": col_name})
665
700
  raise ValueError(f"值 `{value}` 无法转换为数值类型: {e}")
@@ -670,12 +705,10 @@ class MySQLUploader:
670
705
  max_len = int(re.search(r'\((\d+)\)', column_type).group(1))
671
706
  if len(str_value.encode('utf-8')) > max_len:
672
707
  logger.warning(f"列`{col_name}`的值`{str_value}`长度({len(str_value.encode('utf-8'))})超出varchar({max_len})限制,将进行截断", {"库": db_name, "表": table_name})
673
- return str_value.encode('utf-8')[:max_len].decode('utf-8', 'ignore')
708
+ return self._truncate_str(str_value, max_len)
674
709
  except (AttributeError, IndexError):
675
- # 没有找到长度定义,不截断
676
710
  pass
677
711
  return str_value
678
-
679
712
  return value
680
713
 
681
714
  @_execute_with_retry
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.42
3
+ Version: 4.0.44
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,7 +1,7 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=oNzbjnM_gPKVuYbTskX3n8fcwlgDsOEOgDiMJaHcsIU,18
2
+ mdbq/__version__.py,sha256=Yq_JgKwKONwVexEyE66trDbripXgbesAkvt1eQ___20,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/query_data.py,sha256=WtTFMN78jn43Y-nBTPAXhAK56w3wDuv_cj4YtzzGbZk,169797
4
+ mdbq/aggregation/query_data.py,sha256=ZWLJghNiEtyA4rvgUqMCLorY0R4-Likd6i4mVMuOni0,170025
5
5
  mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
6
6
  mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
7
7
  mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
11
11
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
12
12
  mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
13
13
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
14
- mdbq/mysql/uploader.py,sha256=d2ocnCTyBkAJ4LjFLOatb0VgkJ28g70fPXJropRlH1s,86370
14
+ mdbq/mysql/uploader.py,sha256=LPfYEj7ywoAynY2Nl9gg0IurgIWd_bTwkda2ifD1TeE,86443
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
17
17
  mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
28
- mdbq-4.0.42.dist-info/METADATA,sha256=XEoFYTnRfRqAv-rIatsbqCFRTXAP9vP0Zv6g-KzIOWU,364
29
- mdbq-4.0.42.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-4.0.42.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-4.0.42.dist-info/RECORD,,
28
+ mdbq-4.0.44.dist-info/METADATA,sha256=6BCrO5mef08KVeODcA3rgk-gOsNtI_8_CTh0mghCKeE,364
29
+ mdbq-4.0.44.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-4.0.44.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-4.0.44.dist-info/RECORD,,
File without changes