mdbq 4.0.63__py3-none-any.whl → 4.0.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/s_query.py +87 -1
- mdbq/mysql/uploader.py +17 -11
- {mdbq-4.0.63.dist-info → mdbq-4.0.65.dist-info}/METADATA +1 -1
- {mdbq-4.0.63.dist-info → mdbq-4.0.65.dist-info}/RECORD +7 -7
- {mdbq-4.0.63.dist-info → mdbq-4.0.65.dist-info}/WHEEL +0 -0
- {mdbq-4.0.63.dist-info → mdbq-4.0.65.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.65'
|
mdbq/mysql/s_query.py
CHANGED
@@ -779,6 +779,74 @@ class QueryDatas:
|
|
779
779
|
df[col] = df[col].astype(float)
|
780
780
|
return df
|
781
781
|
|
782
|
+
def _convert_columns_to_lowercase(self, data: Union[pd.DataFrame, List[Dict[str, Any]]],
|
783
|
+
lower_col: Optional[List[str]],
|
784
|
+
return_format: str) -> Union[pd.DataFrame, List[Dict[str, Any]]]:
|
785
|
+
"""
|
786
|
+
将指定列的值转换为小写。
|
787
|
+
|
788
|
+
Args:
|
789
|
+
data: 原始数据,可以是DataFrame或列表字典
|
790
|
+
lower_col: 需要转换为小写的列名列表
|
791
|
+
return_format: 返回数据格式
|
792
|
+
|
793
|
+
Returns:
|
794
|
+
转换后的数据
|
795
|
+
"""
|
796
|
+
# 参数验证
|
797
|
+
if not lower_col or not data:
|
798
|
+
return data
|
799
|
+
|
800
|
+
# 确保 lower_col 是列表类型
|
801
|
+
if not isinstance(lower_col, list):
|
802
|
+
logger.warning('lower_col 参数类型错误,应为列表', {'传入类型': type(lower_col).__name__})
|
803
|
+
return data
|
804
|
+
|
805
|
+
try:
|
806
|
+
if return_format == 'df':
|
807
|
+
df = data.copy()
|
808
|
+
for col in lower_col:
|
809
|
+
if not isinstance(col, str):
|
810
|
+
logger.warning('列名必须是字符串类型', {'列名': col, '类型': type(col).__name__})
|
811
|
+
continue
|
812
|
+
|
813
|
+
if col in df.columns:
|
814
|
+
# 只对字符串类型的列进行小写转换
|
815
|
+
if df[col].dtype == 'object':
|
816
|
+
# 更安全的空值处理:保持 None/NaN 不变,只转换非空字符串
|
817
|
+
df[col] = df[col].apply(lambda x: str(x).lower() if pd.notna(x) and x is not None else x)
|
818
|
+
else:
|
819
|
+
logger.debug('列不是字符串类型,跳过小写转换', {'列名': col, '数据类型': df[col].dtype})
|
820
|
+
logger.debug('列转换为小写', {'列名': col, '数据类型': df[col].dtype})
|
821
|
+
else:
|
822
|
+
logger.debug('指定的列不存在,跳过小写转换', {'列名': col, '可用列': list(df.columns)})
|
823
|
+
return df
|
824
|
+
else: # list_dict格式
|
825
|
+
result = []
|
826
|
+
for row in data:
|
827
|
+
new_row = row.copy()
|
828
|
+
for col in lower_col:
|
829
|
+
if not isinstance(col, str):
|
830
|
+
logger.warning('列名必须是字符串类型', {'列名': col, '类型': type(col).__name__})
|
831
|
+
continue
|
832
|
+
|
833
|
+
if col in new_row and new_row[col] is not None:
|
834
|
+
# 确保值是字符串类型再转换为小写
|
835
|
+
try:
|
836
|
+
new_row[col] = str(new_row[col]).lower()
|
837
|
+
except Exception as e:
|
838
|
+
logger.debug('值转换为小写失败', {'列名': col, '值': new_row[col], '错误': str(e)})
|
839
|
+
# 保持原值不变
|
840
|
+
result.append(new_row)
|
841
|
+
return result
|
842
|
+
except Exception as e:
|
843
|
+
logger.warning('小写转换失败', {
|
844
|
+
'错误类型': type(e).__name__,
|
845
|
+
'错误信息': str(e),
|
846
|
+
'指定列': lower_col
|
847
|
+
})
|
848
|
+
return data
|
849
|
+
|
782
850
|
def __enter__(self):
|
783
851
|
"""上下文管理器入口"""
|
784
852
|
return self
|
@@ -831,7 +899,8 @@ class QueryDatas:
|
|
831
899
|
limit: Optional[int] = None,
|
832
900
|
page_size: Optional[int] = None,
|
833
901
|
date_column: Optional[str] = None,
|
834
|
-
return_format: Literal['df', 'list_dict'] = 'df'
|
902
|
+
return_format: Literal['df', 'list_dict'] = 'df',
|
903
|
+
lower_col: Optional[List[str]] = ['店铺名称']
|
835
904
|
) -> Union[pd.DataFrame, List[Dict[str, Any]]]:
|
836
905
|
"""
|
837
906
|
从数据库表获取数据,支持列筛选、日期范围过滤和行数限制。
|
@@ -855,6 +924,7 @@ class QueryDatas:
|
|
855
924
|
return_format: 返回数据格式
|
856
925
|
- 'df': 返回pandas DataFrame(默认)
|
857
926
|
- 'list_dict': 返回列表字典格式 [{列1:值, 列2:值, ...}, ...]
|
927
|
+
lower_col: 需要转换为小写的列名列表,默认['店铺名称']。如果为None或空列表,则不进行小写转换。
|
858
928
|
|
859
929
|
Returns:
|
860
930
|
根据return_format参数返回不同格式的数据:
|
@@ -874,6 +944,18 @@ class QueryDatas:
|
|
874
944
|
logger.error('无效的return_format值', {'库': db_name, '表': table_name, '指定返回数据格式, 有效值应为: ': ', '.join(valid_formats)})
|
875
945
|
return [] if return_format == 'list_dict' else pd.DataFrame()
|
876
946
|
|
947
|
+
# 验证lower_col参数
|
948
|
+
if lower_col is not None:
|
949
|
+
if not isinstance(lower_col, list):
|
950
|
+
logger.warning('lower_col 参数类型错误,应为列表,将使用默认值', {'传入类型': type(lower_col).__name__})
|
951
|
+
lower_col = ['店铺名称']
|
952
|
+
elif len(lower_col) > 0:
|
953
|
+
# 验证列表中的每个元素都是字符串
|
954
|
+
invalid_items = [item for item in lower_col if not isinstance(item, str)]
|
955
|
+
if invalid_items:
|
956
|
+
logger.warning('lower_col 列表包含非字符串元素,将过滤掉', {'无效元素': invalid_items})
|
957
|
+
lower_col = [item for item in lower_col if isinstance(item, str)]
|
958
|
+
|
877
959
|
# 验证日期范围
|
878
960
|
start_date, end_date = self._validate_date_range(start_date, end_date, db_name, table_name)
|
879
961
|
|
@@ -928,10 +1010,12 @@ class QueryDatas:
|
|
928
1010
|
|
929
1011
|
if result:
|
930
1012
|
if return_format == 'list_dict':
|
1013
|
+
result = self._convert_columns_to_lowercase(result, lower_col, return_format)
|
931
1014
|
return result
|
932
1015
|
else:
|
933
1016
|
df = pd.DataFrame(result)
|
934
1017
|
df = self._convert_decimal_columns(df)
|
1018
|
+
df = self._convert_columns_to_lowercase(df, lower_col, return_format)
|
935
1019
|
return df
|
936
1020
|
return [] if return_format == 'list_dict' else pd.DataFrame()
|
937
1021
|
|
@@ -975,11 +1059,13 @@ class QueryDatas:
|
|
975
1059
|
})
|
976
1060
|
|
977
1061
|
if return_format == 'list_dict':
|
1062
|
+
all_results = self._convert_columns_to_lowercase(all_results, lower_col, return_format)
|
978
1063
|
return all_results
|
979
1064
|
else:
|
980
1065
|
df = pd.DataFrame(all_results)
|
981
1066
|
if not df.empty:
|
982
1067
|
df = self._convert_decimal_columns(df)
|
1068
|
+
df = self._convert_columns_to_lowercase(df, lower_col, return_format)
|
983
1069
|
return df
|
984
1070
|
|
985
1071
|
except Exception as e:
|
mdbq/mysql/uploader.py
CHANGED
@@ -267,7 +267,7 @@ class MySQLUploader:
|
|
267
267
|
:return: 存在返回True,否则返回False
|
268
268
|
:raises: 可能抛出数据库相关异常
|
269
269
|
"""
|
270
|
-
db_name = self._validate_identifier(db_name)
|
270
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
271
271
|
sql = 'SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = %s'
|
272
272
|
conn = None
|
273
273
|
try:
|
@@ -289,7 +289,7 @@ class MySQLUploader:
|
|
289
289
|
:param db_name: 要创建的数据库名称
|
290
290
|
:raises: 可能抛出数据库相关异常
|
291
291
|
"""
|
292
|
-
db_name = self._validate_identifier(db_name)
|
292
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
293
293
|
sql = f'CREATE DATABASE IF NOT EXISTS `{db_name}` CHARACTER SET {self.charset} COLLATE {self.collation}'
|
294
294
|
conn = None
|
295
295
|
try:
|
@@ -327,11 +327,12 @@ class MySQLUploader:
|
|
327
327
|
logger.error('分表方式无效', {'表': table_name, '分表方式': partition_by})
|
328
328
|
raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
|
329
329
|
|
330
|
-
def _validate_identifier(self, identifier: str) -> str:
|
330
|
+
def _validate_identifier(self, identifier: str, is_database: bool = False) -> str:
|
331
331
|
"""
|
332
332
|
验证并清理数据库标识符(表名、列名等)
|
333
333
|
|
334
334
|
:param identifier: 要验证的标识符
|
335
|
+
:param is_database: 是否为数据库名,数据库名不能以数字开头
|
335
336
|
:return: 清理后的安全标识符
|
336
337
|
:raises ValueError: 当标识符无效时抛出
|
337
338
|
"""
|
@@ -349,9 +350,14 @@ class MySQLUploader:
|
|
349
350
|
hash_suffix = hashlib.md5(identifier.encode('utf-8')).hexdigest()[:8]
|
350
351
|
cleaned = f'unknown_col_{hash_suffix}'
|
351
352
|
|
352
|
-
#
|
353
|
-
if cleaned and cleaned[0].isdigit():
|
354
|
-
cleaned = f'
|
353
|
+
# 数据库名不能以数字开头(MySQL要求),但表名和列名可以
|
354
|
+
if is_database and cleaned and cleaned[0].isdigit():
|
355
|
+
cleaned = f'db_{cleaned}'
|
356
|
+
logger.warning('为数字开头的数据库名添加db_前缀', {
|
357
|
+
'原始标识符': identifier,
|
358
|
+
'清理后': cleaned
|
359
|
+
})
|
360
|
+
|
355
361
|
mysql_keywords = {
|
356
362
|
'select', 'insert', 'update', 'delete', 'from', 'where', 'and', 'or',
|
357
363
|
'not', 'like', 'in', 'is', 'null', 'true', 'false', 'between'
|
@@ -379,7 +385,7 @@ class MySQLUploader:
|
|
379
385
|
if time.time() - cached_time < self.metadata_cache_ttl:
|
380
386
|
logger.debug('表存在缓存命中', {'库': db_name, '表': table_name, '存在': result})
|
381
387
|
return result
|
382
|
-
db_name = self._validate_identifier(db_name)
|
388
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
383
389
|
table_name = self._validate_identifier(table_name)
|
384
390
|
sql = """
|
385
391
|
SELECT TABLE_NAME
|
@@ -413,7 +419,7 @@ class MySQLUploader:
|
|
413
419
|
"""
|
414
420
|
创建数据表,优化索引创建方式
|
415
421
|
"""
|
416
|
-
db_name = self._validate_identifier(db_name)
|
422
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
417
423
|
table_name = self._validate_identifier(table_name)
|
418
424
|
if not set_typ:
|
419
425
|
logger.error('建表时未指定set_typ', {'库': db_name, '表': table_name})
|
@@ -859,7 +865,7 @@ class MySQLUploader:
|
|
859
865
|
:return: 列名和数据类型字典 {列名: 数据类型}
|
860
866
|
:raises: 可能抛出数据库相关异常
|
861
867
|
"""
|
862
|
-
db_name = self._validate_identifier(db_name)
|
868
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
863
869
|
table_name = self._validate_identifier(table_name)
|
864
870
|
sql = """
|
865
871
|
SELECT COLUMN_NAME, DATA_TYPE
|
@@ -885,7 +891,7 @@ class MySQLUploader:
|
|
885
891
|
"""
|
886
892
|
确保某列有索引,如果没有则创建。
|
887
893
|
"""
|
888
|
-
db_name = self._validate_identifier(db_name)
|
894
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
889
895
|
table_name = self._validate_identifier(table_name)
|
890
896
|
column = self._validate_identifier(column)
|
891
897
|
# 检查索引是否已存在
|
@@ -914,7 +920,7 @@ class MySQLUploader:
|
|
914
920
|
获取表中所有UNIQUE KEY的列组合(不含主键)。
|
915
921
|
返回:[[col1, col2], ...]
|
916
922
|
"""
|
917
|
-
db_name = self._validate_identifier(db_name)
|
923
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
918
924
|
table_name = self._validate_identifier(table_name)
|
919
925
|
sql = '''
|
920
926
|
SELECT INDEX_NAME, COLUMN_NAME
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=1gP4SARV8VrJwdAE1ZRMMnoekF6Fe6v0VGYuInbT8h0,18
|
3
3
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
4
4
|
mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
|
5
5
|
mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -7,9 +7,9 @@ mdbq/myconf/myconf.py,sha256=rHvQCnQRKhQ49AZBke-Z4v28hyOLmHt4MylIuB0H6yA,33516
|
|
7
7
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
8
8
|
mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73074
|
9
9
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
10
|
-
mdbq/mysql/s_query.py,sha256=
|
10
|
+
mdbq/mysql/s_query.py,sha256=hptv4YxnkqPruJkgNX8UxRRhisvSEnn8HzDGDLmCQFw,50231
|
11
11
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
12
|
-
mdbq/mysql/uploader.py,sha256=
|
12
|
+
mdbq/mysql/uploader.py,sha256=68kJIrCnP2dJZ6ilb8MoFzuzGGWU_272WwPfaqt075A,112125
|
13
13
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
14
14
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
15
15
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
@@ -22,7 +22,7 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
|
22
22
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
23
23
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
24
24
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
25
|
-
mdbq-4.0.
|
26
|
-
mdbq-4.0.
|
27
|
-
mdbq-4.0.
|
28
|
-
mdbq-4.0.
|
25
|
+
mdbq-4.0.65.dist-info/METADATA,sha256=5tbI-X82C-RHIHkdxXt6thtOGZbDDhYmeil4N8uQWBQ,364
|
26
|
+
mdbq-4.0.65.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
27
|
+
mdbq-4.0.65.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
28
|
+
mdbq-4.0.65.dist-info/RECORD,,
|
File without changes
|
File without changes
|