mdbq 3.11.0__py3-none-any.whl → 3.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.11.0'
1
+ VERSION = '3.11.2'
mdbq/mysql/uploader.py CHANGED
@@ -346,8 +346,9 @@ class MySQLUploader:
346
346
  logger.error('无效的标识符', {'标识符': identifier})
347
347
  raise ValueError(f"无效的标识符: `{identifier}`")
348
348
  if not self.case_sensitive:
349
-
350
- cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '_', identifier)
349
+ cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '_', identifier)
350
+ else:
351
+ cleaned = identifier
351
352
  cleaned = re.sub(r'_+', '_', cleaned).strip('_')
352
353
  if not cleaned:
353
354
  logger.error('无法清理异常标识符', {'原始标识符': identifier})
@@ -592,7 +593,10 @@ class MySQLUploader:
592
593
  with self._get_connection() as conn:
593
594
  with conn.cursor() as cursor:
594
595
  cursor.execute(sql, (db_name, table_name))
595
- set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
596
+ if self.case_sensitive:
597
+ set_typ = {row['COLUMN_NAME']: row['DATA_TYPE'] for row in cursor.fetchall()}
598
+ else:
599
+ set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
596
600
  logger.debug('获取表的列信息', {'库': db_name, '表': table_name, '列信息': set_typ})
597
601
  return set_typ
598
602
  except Exception as e:
@@ -726,17 +730,18 @@ class MySQLUploader:
726
730
  """
727
731
  1. pandas:规范化列名
728
732
  2. 字典列表:规范化每个字典的键
729
-
730
- 参数:
731
- data: 输入数据,支持两种类型:
732
- - pandas.DataFrame:将规范化其列名
733
- - List[Dict[str, Any]]:将规范化列表中每个字典的键
734
733
  """
735
734
  if isinstance(data, pd.DataFrame):
736
- data.columns = [self._validate_identifier(col) for col in data.columns]
735
+ if self.case_sensitive:
736
+ data.columns = [self._validate_identifier(col) for col in data.columns]
737
+ else:
738
+ data.columns = [self._validate_identifier(col).lower() for col in data.columns]
737
739
  return data
738
740
  elif isinstance(data, list):
739
- return [{self._validate_identifier(k): v for k, v in item.items()} for item in data]
741
+ if self.case_sensitive:
742
+ return [{self._validate_identifier(k): v for k, v in item.items()} for item in data]
743
+ else:
744
+ return [{self._validate_identifier(k).lower(): v for k, v in item.items()} for item in data]
740
745
  return data
741
746
 
742
747
  def _prepare_data(
@@ -747,18 +752,14 @@ class MySQLUploader:
747
752
  ) -> Tuple[List[Dict], Dict[str, str]]:
748
753
  """
749
754
  准备要上传的数据,验证并转换数据类型
750
-
751
- :param data: 输入数据,可以是字典、字典列表或DataFrame
752
- :param set_typ: 列名和数据类型字典 {列名: 数据类型}
753
- :param allow_null: 是否允许空值
754
- :return: 元组(准备好的数据列表, 过滤后的列类型字典)
755
- :raises ValueError: 当数据验证失败时抛出
756
755
  """
757
756
  # 统一数据格式为字典列表
758
757
  if isinstance(data, pd.DataFrame):
759
758
  try:
760
- # 将列名转为小写
761
- data.columns = [col.lower() for col in data.columns]
759
+ if self.case_sensitive:
760
+ data.columns = [col for col in data.columns]
761
+ else:
762
+ data.columns = [col.lower() for col in data.columns]
762
763
  data = data.replace({pd.NA: None}).to_dict('records')
763
764
  except Exception as e:
764
765
  logger.error('数据转字典时发生错误', {
@@ -768,10 +769,15 @@ class MySQLUploader:
768
769
  })
769
770
  raise ValueError(f"数据转字典时发生错误: {e}")
770
771
  elif isinstance(data, dict):
771
- data = [{k.lower(): v for k, v in data.items()}]
772
+ if self.case_sensitive:
773
+ data = [{k: v for k, v in data.items()}]
774
+ else:
775
+ data = [{k.lower(): v for k, v in data.items()}]
772
776
  elif isinstance(data, list) and all(isinstance(item, dict) for item in data):
773
- # 将列表中的每个字典键转为小写
774
- data = [{k.lower(): v for k, v in item.items()} for item in data]
777
+ if self.case_sensitive:
778
+ data = [{k: v for k, v in item.items()} for item in data]
779
+ else:
780
+ data = [{k.lower(): v for k, v in item.items()} for item in data]
775
781
  else:
776
782
  logger.error('数据结构必须是字典、列表、字典列表或dataframe', {
777
783
  'data': self._shorten_for_log(data),
@@ -782,8 +788,11 @@ class MySQLUploader:
782
788
  # 统一处理原始数据中列名的特殊字符
783
789
  data = self.normalize_column_names(data)
784
790
 
785
- # set_typ的键转为小写
786
- set_typ = {k.lower(): v for k, v in set_typ.items()}
791
+ # set_typ的键处理
792
+ if self.case_sensitive:
793
+ set_typ = {k: v for k, v in set_typ.items()}
794
+ else:
795
+ set_typ = {k.lower(): v for k, v in set_typ.items()}
787
796
 
788
797
  # 获取数据中实际存在的列名
789
798
  data_columns = set()
@@ -803,7 +812,6 @@ class MySQLUploader:
803
812
  filtered_set_typ[col] = inferred_type
804
813
  logger.debug(f"自动推断列 `{col}` 的数据类型为: `{inferred_type}`")
805
814
  else:
806
- # 没有样本值,使用默认类型
807
815
  filtered_set_typ[col] = 'VARCHAR(255)'
808
816
  logger.debug(f"列 `{col}` 使用默认数据类型: VARCHAR(255)")
809
817
 
@@ -812,9 +820,8 @@ class MySQLUploader:
812
820
  prepared_row = {}
813
821
  for col_name in filtered_set_typ:
814
822
  # 跳过id列,不允许外部传入id
815
- if col_name.lower() == 'id':
823
+ if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
816
824
  continue
817
-
818
825
  if col_name not in row:
819
826
  if not allow_null:
820
827
  error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.11.0
3
+ Version: 3.11.2
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=vrerkwEDL5E5nSSdgLRAwkxPSJb14-hLfjbjj6J7G3I,18
2
+ mdbq/__version__.py,sha256=wv5KMeim2R9kZhUwwfGA21oZwJF5NDKfYM8gzWNjR-I,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/query_data.py,sha256=fdotW8qdAyDB13p7r3p6AGBkavcHnf6hIvSMtcS7vqE,179875
5
5
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -11,7 +11,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
11
11
  mdbq/mysql/deduplicator.py,sha256=ibmxpzenhPgT_ei61TjQB2ZxYs9ztkG_ygbLSa8RIlM,32990
12
12
  mdbq/mysql/mysql.py,sha256=Lfy9PsEdgmdRtcG_UUgegH3bFTJPhByTWkcAYl8G6m0,56788
13
13
  mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
14
- mdbq/mysql/uploader.py,sha256=6b1NXGtQnhpSWMXnw0ai07ejS1GzMUZWzcjG8G68pbY,66451
14
+ mdbq/mysql/uploader.py,sha256=Mufu39jNm0Y6LGUOwO-HZxYCE34nn8aVwp2MJPqopMc,66824
15
15
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
17
17
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -24,7 +24,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
24
24
  mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
25
25
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
26
26
  mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
27
- mdbq-3.11.0.dist-info/METADATA,sha256=go0OKEPPSfRYS3OQJ0A2bJP1FPNCbtAtp-LHZlrh9NM,364
28
- mdbq-3.11.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
- mdbq-3.11.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
- mdbq-3.11.0.dist-info/RECORD,,
27
+ mdbq-3.11.2.dist-info/METADATA,sha256=nxrvJWSRWH09rrPuxsS6RVGTOm7W0tFw3dYXaDPDCd8,364
28
+ mdbq-3.11.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
29
+ mdbq-3.11.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
30
+ mdbq-3.11.2.dist-info/RECORD,,
File without changes