mdbq 4.1.10__py3-none-any.whl → 4.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mdbq might be problematic. Click here for more details.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +172 -26
- {mdbq-4.1.10.dist-info → mdbq-4.1.11.dist-info}/METADATA +2 -2
- {mdbq-4.1.10.dist-info → mdbq-4.1.11.dist-info}/RECORD +6 -6
- {mdbq-4.1.10.dist-info → mdbq-4.1.11.dist-info}/WHEEL +1 -1
- {mdbq-4.1.10.dist-info → mdbq-4.1.11.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = '4.1.
|
|
1
|
+
VERSION = '4.1.11'
|
mdbq/mysql/uploader.py
CHANGED
|
@@ -111,6 +111,7 @@ class MySQLUploader:
|
|
|
111
111
|
:param read_timeout: 读取超时(秒),默认为30
|
|
112
112
|
:param write_timeout: 写入超时(秒),默认为30
|
|
113
113
|
:param ssl: SSL配置字典,默认为None
|
|
114
|
+
:param auto_creat_missing_cols: 自动添加缺失列,默认为False,建议手动维护表结构
|
|
114
115
|
"""
|
|
115
116
|
self.username = username
|
|
116
117
|
self.password = password
|
|
@@ -134,6 +135,7 @@ class MySQLUploader:
|
|
|
134
135
|
self._table_metadata_cache = {}
|
|
135
136
|
self.metadata_cache_ttl = 300 # 5分钟缓存时间
|
|
136
137
|
self.pool = self._create_connection_pool() # 创建连接池
|
|
138
|
+
self.auto_creat_missing_cols = False # 自动添加缺失列,正常不要自动添加,建议手动维护表结构
|
|
137
139
|
|
|
138
140
|
def _create_connection_pool(self) -> PooledDB:
|
|
139
141
|
"""
|
|
@@ -695,15 +697,16 @@ class MySQLUploader:
|
|
|
695
697
|
break
|
|
696
698
|
|
|
697
699
|
if not allow_null:
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
700
|
+
# 注释掉,这里可能会产生大量日志
|
|
701
|
+
# logger.debug("该列不允许为空值", {
|
|
702
|
+
# "库": db_name,
|
|
703
|
+
# "表": table_name,
|
|
704
|
+
# "allow_null": allow_null,
|
|
705
|
+
# "列": col_name,
|
|
706
|
+
# "值": original_value,
|
|
707
|
+
# "兜底值": fallback
|
|
708
|
+
# })
|
|
709
|
+
return fallback # 直接返回兜底值
|
|
707
710
|
|
|
708
711
|
return None # 允许空值时返回None
|
|
709
712
|
|
|
@@ -799,7 +802,13 @@ class MySQLUploader:
|
|
|
799
802
|
|
|
800
803
|
# 统一处理空值
|
|
801
804
|
if is_empty_value:
|
|
802
|
-
|
|
805
|
+
fallback_value = self._get_fallback_value(column_type_lower, allow_null, db_name, table_name, col_name, value)
|
|
806
|
+
# 如果返回了兜底值(非None),直接返回,不再进行后续验证
|
|
807
|
+
# 因为兜底值已经是根据列类型设计的合适值
|
|
808
|
+
if fallback_value is not None:
|
|
809
|
+
return fallback_value
|
|
810
|
+
# 如果返回None(允许空值的情况),继续后续处理
|
|
811
|
+
return None
|
|
803
812
|
|
|
804
813
|
# JSON类型验证和转换
|
|
805
814
|
if 'json' in column_type_lower:
|
|
@@ -1072,14 +1081,41 @@ class MySQLUploader:
|
|
|
1072
1081
|
'列': self._shorten_for_log(table_columns),
|
|
1073
1082
|
})
|
|
1074
1083
|
raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1084
|
+
# 检查并自动添加缺失的列
|
|
1085
|
+
missing_columns = [col for col in set_typ if col not in table_columns]
|
|
1086
|
+
if missing_columns:
|
|
1087
|
+
if not self.auto_creat_missing_cols:
|
|
1088
|
+
logger.error('列不存在且不支持自动添加,请手动维护表结构,并补齐缺失列', {
|
|
1078
1089
|
'库': db_name,
|
|
1079
1090
|
'表': table_name,
|
|
1080
|
-
'
|
|
1091
|
+
'缺失列数': len(missing_columns),
|
|
1092
|
+
'缺失列': missing_columns,
|
|
1081
1093
|
})
|
|
1082
|
-
raise ValueError(f"列不存在: `{
|
|
1094
|
+
raise ValueError(f"列不存在: `{missing_columns}` -> `{db_name}`.`{table_name}`")
|
|
1095
|
+
else:
|
|
1096
|
+
# 表有缺失列时报错,建议不允许自动添加,手动检查数据一致性,以免产生不必要的表错误
|
|
1097
|
+
# 自动添加缺失的列
|
|
1098
|
+
for col in missing_columns:
|
|
1099
|
+
try:
|
|
1100
|
+
self._add_column_to_table(db_name, table_name, col, set_typ[col], allow_null)
|
|
1101
|
+
logger.info('自动添加缺失列', {
|
|
1102
|
+
'库': db_name,
|
|
1103
|
+
'表': table_name,
|
|
1104
|
+
'列': col,
|
|
1105
|
+
'类型': set_typ[col]
|
|
1106
|
+
})
|
|
1107
|
+
except Exception as e:
|
|
1108
|
+
logger.error('添加列失败', {
|
|
1109
|
+
'库': db_name,
|
|
1110
|
+
'表': table_name,
|
|
1111
|
+
'列': col,
|
|
1112
|
+
'类型': set_typ[col],
|
|
1113
|
+
'错误': str(e)
|
|
1114
|
+
})
|
|
1115
|
+
raise ValueError(f"添加列失败: `{col}` -> `{db_name}`.`{table_name}`: {str(e)}")
|
|
1116
|
+
|
|
1117
|
+
# 重新获取表列信息
|
|
1118
|
+
table_columns = self._get_table_columns(db_name, table_name)
|
|
1083
1119
|
if date_column and date_column in table_columns:
|
|
1084
1120
|
try:
|
|
1085
1121
|
self._ensure_index(db_name, table_name, date_column)
|
|
@@ -1300,21 +1336,67 @@ class MySQLUploader:
|
|
|
1300
1336
|
prepared_row[col_name] = self._validate_value(None, filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
|
|
1301
1337
|
except ValueError as e:
|
|
1302
1338
|
if not allow_null:
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1339
|
+
# 如果不允许空值但验证失败,尝试使用兜底值
|
|
1340
|
+
try:
|
|
1341
|
+
fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, None)
|
|
1342
|
+
if fallback_value is not None:
|
|
1343
|
+
prepared_row[col_name] = fallback_value
|
|
1344
|
+
logger.warning(f"行号:{row_idx} -> 缺失列: `{col_name}`, 使用兜底值: {fallback_value}", {'row': self._shorten_for_log(row)})
|
|
1345
|
+
else:
|
|
1346
|
+
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
|
|
1347
|
+
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1348
|
+
raise ValueError(error_msg)
|
|
1349
|
+
except Exception:
|
|
1350
|
+
error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
|
|
1351
|
+
logger.error(error_msg, {'row': self._shorten_for_log(row)})
|
|
1352
|
+
raise ValueError(error_msg)
|
|
1353
|
+
else:
|
|
1354
|
+
prepared_row[col_name] = None
|
|
1307
1355
|
else:
|
|
1308
1356
|
try:
|
|
1309
1357
|
prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
|
|
1310
1358
|
except ValueError as e:
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1359
|
+
# 如果数据验证失败,检查是否为空值且不允许空值,尝试使用兜底值
|
|
1360
|
+
original_value = row[col_name]
|
|
1361
|
+
is_empty_original = (original_value is None or
|
|
1362
|
+
original_value == '' or
|
|
1363
|
+
(not isinstance(original_value, (list, dict)) and
|
|
1364
|
+
pd.isna(original_value) if hasattr(pd, 'isna') else False))
|
|
1365
|
+
|
|
1366
|
+
if is_empty_original and not allow_null:
|
|
1367
|
+
try:
|
|
1368
|
+
fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, original_value)
|
|
1369
|
+
if fallback_value is not None:
|
|
1370
|
+
prepared_row[col_name] = fallback_value
|
|
1371
|
+
logger.warning(f"行:{row_idx}, 列:`{col_name}` -> 原值验证失败,使用兜底值: {fallback_value}", {
|
|
1372
|
+
'原值': original_value,
|
|
1373
|
+
'兜底值': fallback_value,
|
|
1374
|
+
'row': self._shorten_for_log(row)
|
|
1375
|
+
})
|
|
1376
|
+
else:
|
|
1377
|
+
logger.error('数据验证失败', {
|
|
1378
|
+
'列': col_name,
|
|
1379
|
+
'行': row_idx,
|
|
1380
|
+
'报错': str(e),
|
|
1381
|
+
'row': self._shorten_for_log(row),
|
|
1382
|
+
})
|
|
1383
|
+
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1384
|
+
except Exception:
|
|
1385
|
+
logger.error('数据验证失败', {
|
|
1386
|
+
'列': col_name,
|
|
1387
|
+
'行': row_idx,
|
|
1388
|
+
'报错': str(e),
|
|
1389
|
+
'row': self._shorten_for_log(row),
|
|
1390
|
+
})
|
|
1391
|
+
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1392
|
+
else:
|
|
1393
|
+
logger.error('数据验证失败', {
|
|
1394
|
+
'列': col_name,
|
|
1395
|
+
'行': row_idx,
|
|
1396
|
+
'报错': str(e),
|
|
1397
|
+
'row': self._shorten_for_log(row),
|
|
1398
|
+
})
|
|
1399
|
+
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1318
1400
|
prepared_data.append(prepared_row)
|
|
1319
1401
|
return prepared_data, filtered_set_typ
|
|
1320
1402
|
|
|
@@ -2080,6 +2162,70 @@ class MySQLUploader:
|
|
|
2080
2162
|
except Exception as e:
|
|
2081
2163
|
logger.error('创建索引失败', {'库': db_name, '表': table_name, '列': column, '错误': str(e)})
|
|
2082
2164
|
raise
|
|
2165
|
+
|
|
2166
|
+
@_execute_with_retry
|
|
2167
|
+
def _add_column_to_table(self, db_name: str, table_name: str, column: str, column_type: str, allow_null: bool = False):
|
|
2168
|
+
"""
|
|
2169
|
+
添加列到指定表。
|
|
2170
|
+
|
|
2171
|
+
:param db_name: 数据库名
|
|
2172
|
+
:param table_name: 表名
|
|
2173
|
+
:param column: 需要添加的列名
|
|
2174
|
+
:param column_type: 列的数据类型
|
|
2175
|
+
:param allow_null: 是否允许空值,默认为False
|
|
2176
|
+
"""
|
|
2177
|
+
db_name = self._validate_identifier(db_name, is_database=True)
|
|
2178
|
+
table_name = self._validate_identifier(table_name)
|
|
2179
|
+
column = self._validate_identifier(column)
|
|
2180
|
+
|
|
2181
|
+
# 构建ALTER TABLE语句
|
|
2182
|
+
null_constraint = "NULL" if allow_null else "NOT NULL"
|
|
2183
|
+
|
|
2184
|
+
# 为新添加的列设置默认值
|
|
2185
|
+
default_value = ""
|
|
2186
|
+
if not allow_null:
|
|
2187
|
+
column_type_lower = column_type.lower()
|
|
2188
|
+
if any(t in column_type_lower for t in ['int', 'bigint', 'tinyint', 'smallint', 'mediumint']):
|
|
2189
|
+
default_value = " DEFAULT 0"
|
|
2190
|
+
elif any(t in column_type_lower for t in ['decimal', 'float', 'double']):
|
|
2191
|
+
default_value = " DEFAULT 0.0"
|
|
2192
|
+
elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
|
|
2193
|
+
default_value = " DEFAULT 'none'"
|
|
2194
|
+
elif 'date' in column_type_lower:
|
|
2195
|
+
if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
|
|
2196
|
+
default_value = " DEFAULT '1970-01-01 00:00:00'"
|
|
2197
|
+
else:
|
|
2198
|
+
default_value = " DEFAULT '1970-01-01'"
|
|
2199
|
+
elif 'json' in column_type_lower:
|
|
2200
|
+
default_value = " DEFAULT '{}'"
|
|
2201
|
+
|
|
2202
|
+
sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
|
|
2203
|
+
|
|
2204
|
+
conn = None
|
|
2205
|
+
try:
|
|
2206
|
+
with self._get_connection() as conn:
|
|
2207
|
+
with conn.cursor() as cursor:
|
|
2208
|
+
cursor.execute(sql)
|
|
2209
|
+
conn.commit()
|
|
2210
|
+
logger.debug('已为表添加列', {
|
|
2211
|
+
'库': db_name,
|
|
2212
|
+
'表': table_name,
|
|
2213
|
+
'列': column,
|
|
2214
|
+
'类型': column_type,
|
|
2215
|
+
'允许空值': allow_null
|
|
2216
|
+
})
|
|
2217
|
+
except Exception as e:
|
|
2218
|
+
logger.error('添加列失败', {
|
|
2219
|
+
'库': db_name,
|
|
2220
|
+
'表': table_name,
|
|
2221
|
+
'列': column,
|
|
2222
|
+
'类型': column_type,
|
|
2223
|
+
'错误': str(e),
|
|
2224
|
+
'SQL': sql
|
|
2225
|
+
})
|
|
2226
|
+
if conn is not None:
|
|
2227
|
+
conn.rollback()
|
|
2228
|
+
raise
|
|
2083
2229
|
|
|
2084
2230
|
def __enter__(self):
|
|
2085
2231
|
return self
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
|
-
mdbq/__version__.py,sha256=
|
|
2
|
+
mdbq/__version__.py,sha256=u5gDacgvRqvefEX8ZuCoclKHVFPzAOlN3K-QB3BRCjk,18
|
|
3
3
|
mdbq/auth/__init__.py,sha256=pnPMAt63sh1B6kEvmutUuro46zVf2v2YDAG7q-jV_To,24
|
|
4
4
|
mdbq/auth/auth_backend.py,sha256=iLN7AqiSq7fQgFtNtge_TIlVOR1hrCSZXH6oId6uGX4,116924
|
|
5
5
|
mdbq/auth/crypto.py,sha256=fcZRFCnrKVVdWDUx_zds51ynFYwS9DBvJOrRQVldrfM,15931
|
|
@@ -15,7 +15,7 @@ mdbq/mysql/deduplicator.py,sha256=2fugLyKs_xkvYvoG0C0hRYbJ_w8-4oa1FJ_vavoD7Qo,73
|
|
|
15
15
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
|
16
16
|
mdbq/mysql/s_query.py,sha256=N2xHJf2CiUXjXIVBemdst-wamIP3908EGAJOFG13fCU,50475
|
|
17
17
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
|
18
|
-
mdbq/mysql/uploader.py,sha256=
|
|
18
|
+
mdbq/mysql/uploader.py,sha256=ijeDZ_Z9krLjVltWzcM8JsHev9eBqB9CNcac_W2mWCg,125975
|
|
19
19
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
20
20
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
|
21
21
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
|
@@ -35,7 +35,7 @@ mdbq/route/routes.py,sha256=QVGfTvDgu0CpcKCvk1ra74H8uojgqTLUav1fnVAqLEA,29433
|
|
|
35
35
|
mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
|
|
36
36
|
mdbq/selenium/get_driver.py,sha256=1NTlVUE6QsyjTrVVVqTO2LOnYf578ccFWlWnvIXGtic,20903
|
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
|
38
|
-
mdbq-4.1.
|
|
39
|
-
mdbq-4.1.
|
|
40
|
-
mdbq-4.1.
|
|
41
|
-
mdbq-4.1.
|
|
38
|
+
mdbq-4.1.11.dist-info/METADATA,sha256=uDJZ-MeTKGJLduZ9Pyf-tN5kFgE9ZqYjakSJtrjzt_I,364
|
|
39
|
+
mdbq-4.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
mdbq-4.1.11.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
|
41
|
+
mdbq-4.1.11.dist-info/RECORD,,
|
|
File without changes
|