mdbq 4.0.63__tar.gz → 4.0.65__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {mdbq-4.0.63 → mdbq-4.0.65}/PKG-INFO +1 -1
  2. mdbq-4.0.65/mdbq/__version__.py +1 -0
  3. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/mysql/s_query.py +87 -1
  4. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/mysql/uploader.py +17 -11
  5. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq.egg-info/PKG-INFO +1 -1
  6. mdbq-4.0.63/mdbq/__version__.py +0 -1
  7. {mdbq-4.0.63 → mdbq-4.0.65}/README.txt +0 -0
  8. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/__init__.py +0 -0
  9. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/log/__init__.py +0 -0
  10. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/log/mylogger.py +0 -0
  11. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/myconf/__init__.py +0 -0
  12. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/myconf/myconf.py +0 -0
  13. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/mysql/__init__.py +0 -0
  14. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/mysql/deduplicator.py +0 -0
  15. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/mysql/mysql.py +0 -0
  16. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/mysql/unique_.py +0 -0
  17. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/other/__init__.py +0 -0
  18. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/other/download_sku_picture.py +0 -0
  19. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/other/error_handler.py +0 -0
  20. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/other/otk.py +0 -0
  21. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/other/pov_city.py +0 -0
  22. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/other/ua_sj.py +0 -0
  23. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/pbix/__init__.py +0 -0
  24. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/pbix/pbix_refresh.py +0 -0
  25. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/pbix/refresh_all.py +0 -0
  26. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/redis/__init__.py +0 -0
  27. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/redis/getredis.py +0 -0
  28. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq/spider/__init__.py +0 -0
  29. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq.egg-info/SOURCES.txt +0 -0
  30. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq.egg-info/dependency_links.txt +0 -0
  31. {mdbq-4.0.63 → mdbq-4.0.65}/mdbq.egg-info/top_level.txt +0 -0
  32. {mdbq-4.0.63 → mdbq-4.0.65}/setup.cfg +0 -0
  33. {mdbq-4.0.63 → mdbq-4.0.65}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.63
3
+ Version: 4.0.65
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.0.65'
@@ -779,6 +779,74 @@ class QueryDatas:
779
779
  df[col] = df[col].astype(float)
780
780
  return df
781
781
 
782
+ def _convert_columns_to_lowercase(self, data: Union[pd.DataFrame, List[Dict[str, Any]]],
783
+ lower_col: Optional[List[str]],
784
+ return_format: str) -> Union[pd.DataFrame, List[Dict[str, Any]]]:
785
+ """
786
+ 将指定列的值转换为小写。
787
+
788
+ Args:
789
+ data: 原始数据,可以是DataFrame或列表字典
790
+ lower_col: 需要转换为小写的列名列表
791
+ return_format: 返回数据格式
792
+
793
+ Returns:
794
+ 转换后的数据
795
+ """
796
+ # 参数验证
797
+ if not lower_col or not data:
798
+ return data
799
+
800
+ # 确保 lower_col 是列表类型
801
+ if not isinstance(lower_col, list):
802
+ logger.warning('lower_col 参数类型错误,应为列表', {'传入类型': type(lower_col).__name__})
803
+ return data
804
+
805
+ try:
806
+ if return_format == 'df':
807
+ df = data.copy()
808
+ for col in lower_col:
809
+ if not isinstance(col, str):
810
+ logger.warning('列名必须是字符串类型', {'列名': col, '类型': type(col).__name__})
811
+ continue
812
+
813
+ if col in df.columns:
814
+ # 只对字符串类型的列进行小写转换
815
+ if df[col].dtype == 'object':
816
+ # 更安全的空值处理:保持 None/NaN 不变,只转换非空字符串
817
+ df[col] = df[col].apply(lambda x: str(x).lower() if pd.notna(x) and x is not None else x)
818
+ else:
819
+ logger.debug('列不是字符串类型,跳过小写转换', {'列名': col, '数据类型': df[col].dtype})
820
+ logger.debug('列转换为小写', {'列名': col, '数据类型': df[col].dtype})
821
+ else:
822
+ logger.debug('指定的列不存在,跳过小写转换', {'列名': col, '可用列': list(df.columns)})
823
+ return df
824
+ else: # list_dict格式
825
+ result = []
826
+ for row in data:
827
+ new_row = row.copy()
828
+ for col in lower_col:
829
+ if not isinstance(col, str):
830
+ logger.warning('列名必须是字符串类型', {'列名': col, '类型': type(col).__name__})
831
+ continue
832
+
833
+ if col in new_row and new_row[col] is not None:
834
+ # 确保值是字符串类型再转换为小写
835
+ try:
836
+ new_row[col] = str(new_row[col]).lower()
837
+ except Exception as e:
838
+ logger.debug('值转换为小写失败', {'列名': col, '值': new_row[col], '错误': str(e)})
839
+ # 保持原值不变
840
+ result.append(new_row)
841
+ return result
842
+ except Exception as e:
843
+ logger.warning('小写转换失败', {
844
+ '错误类型': type(e).__name__,
845
+ '错误信息': str(e),
846
+ '指定列': lower_col
847
+ })
848
+ return data
849
+
782
850
  def __enter__(self):
783
851
  """上下文管理器入口"""
784
852
  return self
@@ -831,7 +899,8 @@ class QueryDatas:
831
899
  limit: Optional[int] = None,
832
900
  page_size: Optional[int] = None,
833
901
  date_column: Optional[str] = None,
834
- return_format: Literal['df', 'list_dict'] = 'df'
902
+ return_format: Literal['df', 'list_dict'] = 'df',
903
+ lower_col: Optional[List[str]] = ['店铺名称']
835
904
  ) -> Union[pd.DataFrame, List[Dict[str, Any]]]:
836
905
  """
837
906
  从数据库表获取数据,支持列筛选、日期范围过滤和行数限制。
@@ -855,6 +924,7 @@ class QueryDatas:
855
924
  return_format: 返回数据格式
856
925
  - 'df': 返回pandas DataFrame(默认)
857
926
  - 'list_dict': 返回列表字典格式 [{列1:值, 列2:值, ...}, ...]
927
+ lower_col: 需要转换为小写的列名列表,默认['店铺名称']。如果为None或空列表,则不进行小写转换。
858
928
 
859
929
  Returns:
860
930
  根据return_format参数返回不同格式的数据:
@@ -874,6 +944,18 @@ class QueryDatas:
874
944
  logger.error('无效的return_format值', {'库': db_name, '表': table_name, '指定返回数据格式, 有效值应为: ': ', '.join(valid_formats)})
875
945
  return [] if return_format == 'list_dict' else pd.DataFrame()
876
946
 
947
+ # 验证lower_col参数
948
+ if lower_col is not None:
949
+ if not isinstance(lower_col, list):
950
+ logger.warning('lower_col 参数类型错误,应为列表,将使用默认值', {'传入类型': type(lower_col).__name__})
951
+ lower_col = ['店铺名称']
952
+ elif len(lower_col) > 0:
953
+ # 验证列表中的每个元素都是字符串
954
+ invalid_items = [item for item in lower_col if not isinstance(item, str)]
955
+ if invalid_items:
956
+ logger.warning('lower_col 列表包含非字符串元素,将过滤掉', {'无效元素': invalid_items})
957
+ lower_col = [item for item in lower_col if isinstance(item, str)]
958
+
877
959
  # 验证日期范围
878
960
  start_date, end_date = self._validate_date_range(start_date, end_date, db_name, table_name)
879
961
 
@@ -928,10 +1010,12 @@ class QueryDatas:
928
1010
 
929
1011
  if result:
930
1012
  if return_format == 'list_dict':
1013
+ result = self._convert_columns_to_lowercase(result, lower_col, return_format)
931
1014
  return result
932
1015
  else:
933
1016
  df = pd.DataFrame(result)
934
1017
  df = self._convert_decimal_columns(df)
1018
+ df = self._convert_columns_to_lowercase(df, lower_col, return_format)
935
1019
  return df
936
1020
  return [] if return_format == 'list_dict' else pd.DataFrame()
937
1021
 
@@ -975,11 +1059,13 @@ class QueryDatas:
975
1059
  })
976
1060
 
977
1061
  if return_format == 'list_dict':
1062
+ all_results = self._convert_columns_to_lowercase(all_results, lower_col, return_format)
978
1063
  return all_results
979
1064
  else:
980
1065
  df = pd.DataFrame(all_results)
981
1066
  if not df.empty:
982
1067
  df = self._convert_decimal_columns(df)
1068
+ df = self._convert_columns_to_lowercase(df, lower_col, return_format)
983
1069
  return df
984
1070
 
985
1071
  except Exception as e:
@@ -267,7 +267,7 @@ class MySQLUploader:
267
267
  :return: 存在返回True,否则返回False
268
268
  :raises: 可能抛出数据库相关异常
269
269
  """
270
- db_name = self._validate_identifier(db_name)
270
+ db_name = self._validate_identifier(db_name, is_database=True)
271
271
  sql = 'SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = %s'
272
272
  conn = None
273
273
  try:
@@ -289,7 +289,7 @@ class MySQLUploader:
289
289
  :param db_name: 要创建的数据库名称
290
290
  :raises: 可能抛出数据库相关异常
291
291
  """
292
- db_name = self._validate_identifier(db_name)
292
+ db_name = self._validate_identifier(db_name, is_database=True)
293
293
  sql = f'CREATE DATABASE IF NOT EXISTS `{db_name}` CHARACTER SET {self.charset} COLLATE {self.collation}'
294
294
  conn = None
295
295
  try:
@@ -327,11 +327,12 @@ class MySQLUploader:
327
327
  logger.error('分表方式无效', {'表': table_name, '分表方式': partition_by})
328
328
  raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
329
329
 
330
- def _validate_identifier(self, identifier: str) -> str:
330
+ def _validate_identifier(self, identifier: str, is_database: bool = False) -> str:
331
331
  """
332
332
  验证并清理数据库标识符(表名、列名等)
333
333
 
334
334
  :param identifier: 要验证的标识符
335
+ :param is_database: 是否为数据库名,数据库名不能以数字开头
335
336
  :return: 清理后的安全标识符
336
337
  :raises ValueError: 当标识符无效时抛出
337
338
  """
@@ -349,9 +350,14 @@ class MySQLUploader:
349
350
  hash_suffix = hashlib.md5(identifier.encode('utf-8')).hexdigest()[:8]
350
351
  cleaned = f'unknown_col_{hash_suffix}'
351
352
 
352
- # 确保标识符不以数字开头(MySQL要求)
353
- if cleaned and cleaned[0].isdigit():
354
- cleaned = f'col_{cleaned}'
353
+ # 数据库名不能以数字开头(MySQL要求),但表名和列名可以
354
+ if is_database and cleaned and cleaned[0].isdigit():
355
+ cleaned = f'db_{cleaned}'
356
+ logger.warning('为数字开头的数据库名添加db_前缀', {
357
+ '原始标识符': identifier,
358
+ '清理后': cleaned
359
+ })
360
+
355
361
  mysql_keywords = {
356
362
  'select', 'insert', 'update', 'delete', 'from', 'where', 'and', 'or',
357
363
  'not', 'like', 'in', 'is', 'null', 'true', 'false', 'between'
@@ -379,7 +385,7 @@ class MySQLUploader:
379
385
  if time.time() - cached_time < self.metadata_cache_ttl:
380
386
  logger.debug('表存在缓存命中', {'库': db_name, '表': table_name, '存在': result})
381
387
  return result
382
- db_name = self._validate_identifier(db_name)
388
+ db_name = self._validate_identifier(db_name, is_database=True)
383
389
  table_name = self._validate_identifier(table_name)
384
390
  sql = """
385
391
  SELECT TABLE_NAME
@@ -413,7 +419,7 @@ class MySQLUploader:
413
419
  """
414
420
  创建数据表,优化索引创建方式
415
421
  """
416
- db_name = self._validate_identifier(db_name)
422
+ db_name = self._validate_identifier(db_name, is_database=True)
417
423
  table_name = self._validate_identifier(table_name)
418
424
  if not set_typ:
419
425
  logger.error('建表时未指定set_typ', {'库': db_name, '表': table_name})
@@ -859,7 +865,7 @@ class MySQLUploader:
859
865
  :return: 列名和数据类型字典 {列名: 数据类型}
860
866
  :raises: 可能抛出数据库相关异常
861
867
  """
862
- db_name = self._validate_identifier(db_name)
868
+ db_name = self._validate_identifier(db_name, is_database=True)
863
869
  table_name = self._validate_identifier(table_name)
864
870
  sql = """
865
871
  SELECT COLUMN_NAME, DATA_TYPE
@@ -885,7 +891,7 @@ class MySQLUploader:
885
891
  """
886
892
  确保某列有索引,如果没有则创建。
887
893
  """
888
- db_name = self._validate_identifier(db_name)
894
+ db_name = self._validate_identifier(db_name, is_database=True)
889
895
  table_name = self._validate_identifier(table_name)
890
896
  column = self._validate_identifier(column)
891
897
  # 检查索引是否已存在
@@ -914,7 +920,7 @@ class MySQLUploader:
914
920
  获取表中所有UNIQUE KEY的列组合(不含主键)。
915
921
  返回:[[col1, col2], ...]
916
922
  """
917
- db_name = self._validate_identifier(db_name)
923
+ db_name = self._validate_identifier(db_name, is_database=True)
918
924
  table_name = self._validate_identifier(table_name)
919
925
  sql = '''
920
926
  SELECT INDEX_NAME, COLUMN_NAME
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.63
3
+ Version: 4.0.65
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.0.63'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes