mdbq 3.11.0__py3-none-any.whl → 3.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +33 -26
- {mdbq-3.11.0.dist-info → mdbq-3.11.2.dist-info}/METADATA +1 -1
- {mdbq-3.11.0.dist-info → mdbq-3.11.2.dist-info}/RECORD +6 -6
- {mdbq-3.11.0.dist-info → mdbq-3.11.2.dist-info}/WHEEL +0 -0
- {mdbq-3.11.0.dist-info → mdbq-3.11.2.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.11.
|
1
|
+
VERSION = '3.11.2'
|
mdbq/mysql/uploader.py
CHANGED
@@ -346,8 +346,9 @@ class MySQLUploader:
|
|
346
346
|
logger.error('无效的标识符', {'标识符': identifier})
|
347
347
|
raise ValueError(f"无效的标识符: `{identifier}`")
|
348
348
|
if not self.case_sensitive:
|
349
|
-
|
350
|
-
|
349
|
+
cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '_', identifier)
|
350
|
+
else:
|
351
|
+
cleaned = identifier
|
351
352
|
cleaned = re.sub(r'_+', '_', cleaned).strip('_')
|
352
353
|
if not cleaned:
|
353
354
|
logger.error('无法清理异常标识符', {'原始标识符': identifier})
|
@@ -592,7 +593,10 @@ class MySQLUploader:
|
|
592
593
|
with self._get_connection() as conn:
|
593
594
|
with conn.cursor() as cursor:
|
594
595
|
cursor.execute(sql, (db_name, table_name))
|
595
|
-
|
596
|
+
if self.case_sensitive:
|
597
|
+
set_typ = {row['COLUMN_NAME']: row['DATA_TYPE'] for row in cursor.fetchall()}
|
598
|
+
else:
|
599
|
+
set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
|
596
600
|
logger.debug('获取表的列信息', {'库': db_name, '表': table_name, '列信息': set_typ})
|
597
601
|
return set_typ
|
598
602
|
except Exception as e:
|
@@ -726,17 +730,18 @@ class MySQLUploader:
|
|
726
730
|
"""
|
727
731
|
1. pandas:规范化列名
|
728
732
|
2. 字典列表:规范化每个字典的键
|
729
|
-
|
730
|
-
参数:
|
731
|
-
data: 输入数据,支持两种类型:
|
732
|
-
- pandas.DataFrame:将规范化其列名
|
733
|
-
- List[Dict[str, Any]]:将规范化列表中每个字典的键
|
734
733
|
"""
|
735
734
|
if isinstance(data, pd.DataFrame):
|
736
|
-
|
735
|
+
if self.case_sensitive:
|
736
|
+
data.columns = [self._validate_identifier(col) for col in data.columns]
|
737
|
+
else:
|
738
|
+
data.columns = [self._validate_identifier(col).lower() for col in data.columns]
|
737
739
|
return data
|
738
740
|
elif isinstance(data, list):
|
739
|
-
|
741
|
+
if self.case_sensitive:
|
742
|
+
return [{self._validate_identifier(k): v for k, v in item.items()} for item in data]
|
743
|
+
else:
|
744
|
+
return [{self._validate_identifier(k).lower(): v for k, v in item.items()} for item in data]
|
740
745
|
return data
|
741
746
|
|
742
747
|
def _prepare_data(
|
@@ -747,18 +752,14 @@ class MySQLUploader:
|
|
747
752
|
) -> Tuple[List[Dict], Dict[str, str]]:
|
748
753
|
"""
|
749
754
|
准备要上传的数据,验证并转换数据类型
|
750
|
-
|
751
|
-
:param data: 输入数据,可以是字典、字典列表或DataFrame
|
752
|
-
:param set_typ: 列名和数据类型字典 {列名: 数据类型}
|
753
|
-
:param allow_null: 是否允许空值
|
754
|
-
:return: 元组(准备好的数据列表, 过滤后的列类型字典)
|
755
|
-
:raises ValueError: 当数据验证失败时抛出
|
756
755
|
"""
|
757
756
|
# 统一数据格式为字典列表
|
758
757
|
if isinstance(data, pd.DataFrame):
|
759
758
|
try:
|
760
|
-
|
761
|
-
|
759
|
+
if self.case_sensitive:
|
760
|
+
data.columns = [col for col in data.columns]
|
761
|
+
else:
|
762
|
+
data.columns = [col.lower() for col in data.columns]
|
762
763
|
data = data.replace({pd.NA: None}).to_dict('records')
|
763
764
|
except Exception as e:
|
764
765
|
logger.error('数据转字典时发生错误', {
|
@@ -768,10 +769,15 @@ class MySQLUploader:
|
|
768
769
|
})
|
769
770
|
raise ValueError(f"数据转字典时发生错误: {e}")
|
770
771
|
elif isinstance(data, dict):
|
771
|
-
|
772
|
+
if self.case_sensitive:
|
773
|
+
data = [{k: v for k, v in data.items()}]
|
774
|
+
else:
|
775
|
+
data = [{k.lower(): v for k, v in data.items()}]
|
772
776
|
elif isinstance(data, list) and all(isinstance(item, dict) for item in data):
|
773
|
-
|
774
|
-
|
777
|
+
if self.case_sensitive:
|
778
|
+
data = [{k: v for k, v in item.items()} for item in data]
|
779
|
+
else:
|
780
|
+
data = [{k.lower(): v for k, v in item.items()} for item in data]
|
775
781
|
else:
|
776
782
|
logger.error('数据结构必须是字典、列表、字典列表或dataframe', {
|
777
783
|
'data': self._shorten_for_log(data),
|
@@ -782,8 +788,11 @@ class MySQLUploader:
|
|
782
788
|
# 统一处理原始数据中列名的特殊字符
|
783
789
|
data = self.normalize_column_names(data)
|
784
790
|
|
785
|
-
#
|
786
|
-
|
791
|
+
# set_typ的键处理
|
792
|
+
if self.case_sensitive:
|
793
|
+
set_typ = {k: v for k, v in set_typ.items()}
|
794
|
+
else:
|
795
|
+
set_typ = {k.lower(): v for k, v in set_typ.items()}
|
787
796
|
|
788
797
|
# 获取数据中实际存在的列名
|
789
798
|
data_columns = set()
|
@@ -803,7 +812,6 @@ class MySQLUploader:
|
|
803
812
|
filtered_set_typ[col] = inferred_type
|
804
813
|
logger.debug(f"自动推断列 `{col}` 的数据类型为: `{inferred_type}`")
|
805
814
|
else:
|
806
|
-
# 没有样本值,使用默认类型
|
807
815
|
filtered_set_typ[col] = 'VARCHAR(255)'
|
808
816
|
logger.debug(f"列 `{col}` 使用默认数据类型: VARCHAR(255)")
|
809
817
|
|
@@ -812,9 +820,8 @@ class MySQLUploader:
|
|
812
820
|
prepared_row = {}
|
813
821
|
for col_name in filtered_set_typ:
|
814
822
|
# 跳过id列,不允许外部传入id
|
815
|
-
if col_name.lower() == 'id':
|
823
|
+
if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
|
816
824
|
continue
|
817
|
-
|
818
825
|
if col_name not in row:
|
819
826
|
if not allow_null:
|
820
827
|
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`"
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=wv5KMeim2R9kZhUwwfGA21oZwJF5NDKfYM8gzWNjR-I,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/query_data.py,sha256=fdotW8qdAyDB13p7r3p6AGBkavcHnf6hIvSMtcS7vqE,179875
|
5
5
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -11,7 +11,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
11
11
|
mdbq/mysql/deduplicator.py,sha256=ibmxpzenhPgT_ei61TjQB2ZxYs9ztkG_ygbLSa8RIlM,32990
|
12
12
|
mdbq/mysql/mysql.py,sha256=Lfy9PsEdgmdRtcG_UUgegH3bFTJPhByTWkcAYl8G6m0,56788
|
13
13
|
mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/uploader.py,sha256=Mufu39jNm0Y6LGUOwO-HZxYCE34nn8aVwp2MJPqopMc,66824
|
15
15
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
17
17
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -24,7 +24,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
24
24
|
mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
|
25
25
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
26
26
|
mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
|
27
|
-
mdbq-3.11.
|
28
|
-
mdbq-3.11.
|
29
|
-
mdbq-3.11.
|
30
|
-
mdbq-3.11.
|
27
|
+
mdbq-3.11.2.dist-info/METADATA,sha256=nxrvJWSRWH09rrPuxsS6RVGTOm7W0tFw3dYXaDPDCd8,364
|
28
|
+
mdbq-3.11.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
29
|
+
mdbq-3.11.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
30
|
+
mdbq-3.11.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|