mdbq 3.11.1__py3-none-any.whl → 3.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +30 -24
- {mdbq-3.11.1.dist-info → mdbq-3.11.2.dist-info}/METADATA +1 -1
- {mdbq-3.11.1.dist-info → mdbq-3.11.2.dist-info}/RECORD +6 -6
- {mdbq-3.11.1.dist-info → mdbq-3.11.2.dist-info}/WHEEL +0 -0
- {mdbq-3.11.1.dist-info → mdbq-3.11.2.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.11.
|
1
|
+
VERSION = '3.11.2'
|
mdbq/mysql/uploader.py
CHANGED
@@ -593,7 +593,10 @@ class MySQLUploader:
|
|
593
593
|
with self._get_connection() as conn:
|
594
594
|
with conn.cursor() as cursor:
|
595
595
|
cursor.execute(sql, (db_name, table_name))
|
596
|
-
|
596
|
+
if self.case_sensitive:
|
597
|
+
set_typ = {row['COLUMN_NAME']: row['DATA_TYPE'] for row in cursor.fetchall()}
|
598
|
+
else:
|
599
|
+
set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
|
597
600
|
logger.debug('获取表的列信息', {'库': db_name, '表': table_name, '列信息': set_typ})
|
598
601
|
return set_typ
|
599
602
|
except Exception as e:
|
@@ -727,17 +730,18 @@ class MySQLUploader:
|
|
727
730
|
"""
|
728
731
|
1. pandas:规范化列名
|
729
732
|
2. 字典列表:规范化每个字典的键
|
730
|
-
|
731
|
-
参数:
|
732
|
-
data: 输入数据,支持两种类型:
|
733
|
-
- pandas.DataFrame:将规范化其列名
|
734
|
-
- List[Dict[str, Any]]:将规范化列表中每个字典的键
|
735
733
|
"""
|
736
734
|
if isinstance(data, pd.DataFrame):
|
737
|
-
|
735
|
+
if self.case_sensitive:
|
736
|
+
data.columns = [self._validate_identifier(col) for col in data.columns]
|
737
|
+
else:
|
738
|
+
data.columns = [self._validate_identifier(col).lower() for col in data.columns]
|
738
739
|
return data
|
739
740
|
elif isinstance(data, list):
|
740
|
-
|
741
|
+
if self.case_sensitive:
|
742
|
+
return [{self._validate_identifier(k): v for k, v in item.items()} for item in data]
|
743
|
+
else:
|
744
|
+
return [{self._validate_identifier(k).lower(): v for k, v in item.items()} for item in data]
|
741
745
|
return data
|
742
746
|
|
743
747
|
def _prepare_data(
|
@@ -748,18 +752,14 @@ class MySQLUploader:
|
|
748
752
|
) -> Tuple[List[Dict], Dict[str, str]]:
|
749
753
|
"""
|
750
754
|
准备要上传的数据,验证并转换数据类型
|
751
|
-
|
752
|
-
:param data: 输入数据,可以是字典、字典列表或DataFrame
|
753
|
-
:param set_typ: 列名和数据类型字典 {列名: 数据类型}
|
754
|
-
:param allow_null: 是否允许空值
|
755
|
-
:return: 元组(准备好的数据列表, 过滤后的列类型字典)
|
756
|
-
:raises ValueError: 当数据验证失败时抛出
|
757
755
|
"""
|
758
756
|
# 统一数据格式为字典列表
|
759
757
|
if isinstance(data, pd.DataFrame):
|
760
758
|
try:
|
761
|
-
|
762
|
-
|
759
|
+
if self.case_sensitive:
|
760
|
+
data.columns = [col for col in data.columns]
|
761
|
+
else:
|
762
|
+
data.columns = [col.lower() for col in data.columns]
|
763
763
|
data = data.replace({pd.NA: None}).to_dict('records')
|
764
764
|
except Exception as e:
|
765
765
|
logger.error('数据转字典时发生错误', {
|
@@ -769,10 +769,15 @@ class MySQLUploader:
|
|
769
769
|
})
|
770
770
|
raise ValueError(f"数据转字典时发生错误: {e}")
|
771
771
|
elif isinstance(data, dict):
|
772
|
-
|
772
|
+
if self.case_sensitive:
|
773
|
+
data = [{k: v for k, v in data.items()}]
|
774
|
+
else:
|
775
|
+
data = [{k.lower(): v for k, v in data.items()}]
|
773
776
|
elif isinstance(data, list) and all(isinstance(item, dict) for item in data):
|
774
|
-
|
775
|
-
|
777
|
+
if self.case_sensitive:
|
778
|
+
data = [{k: v for k, v in item.items()} for item in data]
|
779
|
+
else:
|
780
|
+
data = [{k.lower(): v for k, v in item.items()} for item in data]
|
776
781
|
else:
|
777
782
|
logger.error('数据结构必须是字典、列表、字典列表或dataframe', {
|
778
783
|
'data': self._shorten_for_log(data),
|
@@ -783,8 +788,11 @@ class MySQLUploader:
|
|
783
788
|
# 统一处理原始数据中列名的特殊字符
|
784
789
|
data = self.normalize_column_names(data)
|
785
790
|
|
786
|
-
#
|
787
|
-
|
791
|
+
# set_typ的键处理
|
792
|
+
if self.case_sensitive:
|
793
|
+
set_typ = {k: v for k, v in set_typ.items()}
|
794
|
+
else:
|
795
|
+
set_typ = {k.lower(): v for k, v in set_typ.items()}
|
788
796
|
|
789
797
|
# 获取数据中实际存在的列名
|
790
798
|
data_columns = set()
|
@@ -804,7 +812,6 @@ class MySQLUploader:
|
|
804
812
|
filtered_set_typ[col] = inferred_type
|
805
813
|
logger.debug(f"自动推断列 `{col}` 的数据类型为: `{inferred_type}`")
|
806
814
|
else:
|
807
|
-
# 没有样本值,使用默认类型
|
808
815
|
filtered_set_typ[col] = 'VARCHAR(255)'
|
809
816
|
logger.debug(f"列 `{col}` 使用默认数据类型: VARCHAR(255)")
|
810
817
|
|
@@ -813,9 +820,8 @@ class MySQLUploader:
|
|
813
820
|
prepared_row = {}
|
814
821
|
for col_name in filtered_set_typ:
|
815
822
|
# 跳过id列,不允许外部传入id
|
816
|
-
if col_name.lower() == 'id':
|
823
|
+
if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
|
817
824
|
continue
|
818
|
-
|
819
825
|
if col_name not in row:
|
820
826
|
if not allow_null:
|
821
827
|
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`"
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=wv5KMeim2R9kZhUwwfGA21oZwJF5NDKfYM8gzWNjR-I,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/query_data.py,sha256=fdotW8qdAyDB13p7r3p6AGBkavcHnf6hIvSMtcS7vqE,179875
|
5
5
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -11,7 +11,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
11
11
|
mdbq/mysql/deduplicator.py,sha256=ibmxpzenhPgT_ei61TjQB2ZxYs9ztkG_ygbLSa8RIlM,32990
|
12
12
|
mdbq/mysql/mysql.py,sha256=Lfy9PsEdgmdRtcG_UUgegH3bFTJPhByTWkcAYl8G6m0,56788
|
13
13
|
mdbq/mysql/s_query.py,sha256=dlnrVJ3-Vp1Suv9CNbPxyYSRqRJUHjOpF39tb2F-wBc,10190
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/uploader.py,sha256=Mufu39jNm0Y6LGUOwO-HZxYCE34nn8aVwp2MJPqopMc,66824
|
15
15
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
17
17
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -24,7 +24,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
24
24
|
mdbq/redis/getredis.py,sha256=YHgCKO8mEsslwet33K5tGss-nrDDwPnOSlhA9iBu0jY,24078
|
25
25
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
26
26
|
mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
|
27
|
-
mdbq-3.11.
|
28
|
-
mdbq-3.11.
|
29
|
-
mdbq-3.11.
|
30
|
-
mdbq-3.11.
|
27
|
+
mdbq-3.11.2.dist-info/METADATA,sha256=nxrvJWSRWH09rrPuxsS6RVGTOm7W0tFw3dYXaDPDCd8,364
|
28
|
+
mdbq-3.11.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
29
|
+
mdbq-3.11.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
30
|
+
mdbq-3.11.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|