mdbq 3.10.1__py3-none-any.whl → 3.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +225 -149
- {mdbq-3.10.1.dist-info → mdbq-3.10.3.dist-info}/METADATA +1 -1
- {mdbq-3.10.1.dist-info → mdbq-3.10.3.dist-info}/RECORD +6 -6
- {mdbq-3.10.1.dist-info → mdbq-3.10.3.dist-info}/WHEEL +0 -0
- {mdbq-3.10.1.dist-info → mdbq-3.10.3.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.10.
|
1
|
+
VERSION = '3.10.3'
|
mdbq/mysql/uploader.py
CHANGED
@@ -303,8 +303,8 @@ class MySQLUploader:
|
|
303
303
|
return exists
|
304
304
|
except Exception as e:
|
305
305
|
logger.error(sys._getframe().f_code.co_name, {
|
306
|
+
'库': db_name,
|
306
307
|
'检查数据库是否存在时出错': str(e),
|
307
|
-
'库': db_name
|
308
308
|
})
|
309
309
|
raise
|
310
310
|
|
@@ -546,9 +546,9 @@ class MySQLUploader:
|
|
546
546
|
|
547
547
|
except Exception as e:
|
548
548
|
logger.error(sys._getframe().f_code.co_name, {
|
549
|
-
'建表失败': str(e),
|
550
549
|
'库': db_name,
|
551
550
|
'表': table_name,
|
551
|
+
'建表失败': str(e),
|
552
552
|
})
|
553
553
|
conn.rollback()
|
554
554
|
raise
|
@@ -678,9 +678,9 @@ class MySQLUploader:
|
|
678
678
|
return set_typ
|
679
679
|
except Exception as e:
|
680
680
|
logger.error(sys._getframe().f_code.co_name, {
|
681
|
-
'无法获取表列信息': str(e),
|
682
681
|
'库': db_name,
|
683
682
|
'表': table_name,
|
683
|
+
'无法获取表列信息': str(e),
|
684
684
|
})
|
685
685
|
raise
|
686
686
|
|
@@ -709,9 +709,9 @@ class MySQLUploader:
|
|
709
709
|
allow_null=allow_null)
|
710
710
|
else:
|
711
711
|
logger.error(sys._getframe().f_code.co_name, {
|
712
|
-
'数据表不存在': table_name,
|
713
712
|
'库': db_name,
|
714
713
|
'表': table_name,
|
714
|
+
'数据表不存在': table_name,
|
715
715
|
})
|
716
716
|
raise ValueError(f"数据表不存在: `{db_name}`.`{table_name}`")
|
717
717
|
|
@@ -719,9 +719,9 @@ class MySQLUploader:
|
|
719
719
|
table_columns = self._get_table_columns(db_name, table_name)
|
720
720
|
if not table_columns:
|
721
721
|
logger.error(sys._getframe().f_code.co_name, {
|
722
|
-
'获取列失败': table_columns,
|
723
722
|
'库': db_name,
|
724
723
|
'表': table_name,
|
724
|
+
'获取列失败': table_columns,
|
725
725
|
})
|
726
726
|
raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
|
727
727
|
|
@@ -729,9 +729,9 @@ class MySQLUploader:
|
|
729
729
|
for col in set_typ:
|
730
730
|
if col not in table_columns:
|
731
731
|
logger.error(sys._getframe().f_code.co_name, {
|
732
|
-
'列不存在': f'{col} -> {table_columns}',
|
733
732
|
'库': db_name,
|
734
733
|
'表': table_name,
|
734
|
+
'列不存在': f'{col} -> {table_columns}',
|
735
735
|
})
|
736
736
|
raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
|
737
737
|
|
@@ -960,7 +960,7 @@ class MySQLUploader:
|
|
960
960
|
默认值为 'batch'
|
961
961
|
:raises: 可能抛出各种验证和数据库相关异常
|
962
962
|
"""
|
963
|
-
upload_start = time.time()
|
963
|
+
# upload_start = time.time()
|
964
964
|
initial_row_count = len(data) if hasattr(data, '__len__') else 1
|
965
965
|
|
966
966
|
batch_id = f"batch_{int(time.time() * 1000)}"
|
@@ -972,7 +972,7 @@ class MySQLUploader:
|
|
972
972
|
'批次': batch_id,
|
973
973
|
'分表方式': partition_by,
|
974
974
|
'排重': check_duplicate,
|
975
|
-
'
|
975
|
+
'传入': len(data) if hasattr(data, '__len__') else 1,
|
976
976
|
'自动建表': auto_create
|
977
977
|
})
|
978
978
|
|
@@ -985,10 +985,10 @@ class MySQLUploader:
|
|
985
985
|
partition_by = str(partition_by).lower()
|
986
986
|
if partition_by not in ['year', 'month']:
|
987
987
|
logger.error(sys._getframe().f_code.co_name, {
|
988
|
-
'分表方式必须是 "year" 或 "month" 或 "None"': partition_by,
|
989
988
|
'库': db_name,
|
990
989
|
'表': table_name,
|
991
|
-
'批次': batch_id
|
990
|
+
'批次': batch_id,
|
991
|
+
'分表方式必须是 "year" 或 "month" 或 "None"': partition_by,
|
992
992
|
})
|
993
993
|
raise ValueError("分表方式必须是 'year' 或 'month' 或 'None'")
|
994
994
|
|
@@ -1029,10 +1029,10 @@ class MySQLUploader:
|
|
1029
1029
|
partitioned_data[part_table].append(row)
|
1030
1030
|
except Exception as e:
|
1031
1031
|
logger.error(sys._getframe().f_code.co_name, {
|
1032
|
-
'row_data': row,
|
1033
|
-
'分表处理失败': str(e),
|
1034
1032
|
'库': db_name,
|
1035
1033
|
'表': table_name,
|
1034
|
+
'row_data': row,
|
1035
|
+
'分表处理失败': str(e),
|
1036
1036
|
})
|
1037
1037
|
continue # 跳过当前行
|
1038
1038
|
|
@@ -1047,10 +1047,10 @@ class MySQLUploader:
|
|
1047
1047
|
)
|
1048
1048
|
except Exception as e:
|
1049
1049
|
logger.error(sys._getframe().f_code.co_name, {
|
1050
|
-
'分表': part_table,
|
1051
|
-
'分表上传失败': str(e),
|
1052
1050
|
'库': db_name,
|
1053
1051
|
'表': table_name,
|
1052
|
+
'分表': part_table,
|
1053
|
+
'分表上传失败': str(e),
|
1054
1054
|
})
|
1055
1055
|
continue # 跳过当前分表,继续处理其他分表
|
1056
1056
|
else:
|
@@ -1066,10 +1066,10 @@ class MySQLUploader:
|
|
1066
1066
|
|
1067
1067
|
except Exception as e:
|
1068
1068
|
logger.error(sys._getframe().f_code.co_name, {
|
1069
|
-
'上传过程发生全局错误': str(e),
|
1070
|
-
'error_type': type(e).__name__,
|
1071
1069
|
'库': db_name,
|
1072
1070
|
'表': table_name,
|
1071
|
+
'上传过程发生全局错误': str(e),
|
1072
|
+
'error_type': type(e).__name__,
|
1073
1073
|
})
|
1074
1074
|
finally:
|
1075
1075
|
logger.info("存储完成", {
|
@@ -1077,7 +1077,7 @@ class MySQLUploader:
|
|
1077
1077
|
'表': table_name,
|
1078
1078
|
'批次': batch_id,
|
1079
1079
|
'success': success_flag,
|
1080
|
-
'耗时': round(time.time() - upload_start, 2),
|
1080
|
+
# '耗时': round(time.time() - upload_start, 2),
|
1081
1081
|
'数据行': initial_row_count
|
1082
1082
|
})
|
1083
1083
|
|
@@ -1115,33 +1115,65 @@ class MySQLUploader:
|
|
1115
1115
|
if not data:
|
1116
1116
|
return
|
1117
1117
|
|
1118
|
+
# 验证事务模式
|
1119
|
+
transaction_mode = self._validate_transaction_mode(transaction_mode)
|
1120
|
+
|
1121
|
+
# 准备SQL语句
|
1122
|
+
sql = self._prepare_insert_sql(
|
1123
|
+
db_name, table_name, set_typ,
|
1124
|
+
check_duplicate, duplicate_columns,
|
1125
|
+
update_on_duplicate
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
# 执行批量插入
|
1129
|
+
total_inserted, total_skipped, total_failed = self._execute_batch_insert(
|
1130
|
+
db_name, table_name, data, set_typ,
|
1131
|
+
sql, check_duplicate, duplicate_columns,
|
1132
|
+
batch_size, batch_id, transaction_mode
|
1133
|
+
)
|
1134
|
+
|
1135
|
+
logger.info('插入完成', {
|
1136
|
+
'库': db_name,
|
1137
|
+
'表': table_name,
|
1138
|
+
'完成总计': len(data),
|
1139
|
+
'插入': total_inserted,
|
1140
|
+
'跳过': total_skipped,
|
1141
|
+
'失败': total_failed,
|
1142
|
+
'事务提交模式': transaction_mode,
|
1143
|
+
})
|
1144
|
+
|
1145
|
+
def _validate_transaction_mode(self, mode: str) -> str:
|
1146
|
+
"""验证并标准化事务模式"""
|
1118
1147
|
valid_modes = ('row', 'batch', 'hybrid')
|
1119
|
-
if
|
1148
|
+
if mode.lower() not in valid_modes:
|
1120
1149
|
logger.error(sys._getframe().f_code.co_name, {
|
1121
|
-
'
|
1122
|
-
'表': table_name,
|
1123
|
-
'参数异常': f'transaction_mode -> {transaction_mode}',
|
1150
|
+
'参数异常': f'transaction_mode -> {mode}',
|
1124
1151
|
'可选值': valid_modes,
|
1125
1152
|
'自动使用默认模式': 'batch'
|
1126
1153
|
})
|
1127
|
-
|
1154
|
+
return 'batch'
|
1155
|
+
return mode.lower()
|
1128
1156
|
|
1129
|
-
|
1130
|
-
|
1157
|
+
def _prepare_insert_sql(
|
1158
|
+
self,
|
1159
|
+
db_name: str,
|
1160
|
+
table_name: str,
|
1161
|
+
set_typ: Dict[str, str],
|
1162
|
+
check_duplicate: bool,
|
1163
|
+
duplicate_columns: Optional[List[str]],
|
1164
|
+
update_on_duplicate: bool
|
1165
|
+
) -> str:
|
1166
|
+
"""准备插入SQL语句"""
|
1167
|
+
# 获取所有列名(排除 `id`、`更新时间` 列)
|
1168
|
+
all_columns = [col for col in set_typ.keys() if col.lower() not in ['id', '更新时间']]
|
1131
1169
|
safe_columns = [self._validate_identifier(col) for col in all_columns]
|
1132
1170
|
placeholders = ','.join(['%s'] * len(safe_columns))
|
1133
1171
|
|
1134
|
-
# 初始化统计变量
|
1135
|
-
total_inserted = 0
|
1136
|
-
total_skipped = 0
|
1137
|
-
total_failed = 0
|
1138
|
-
|
1139
|
-
# 构建基础SQL语句
|
1140
1172
|
if check_duplicate:
|
1141
1173
|
if not duplicate_columns:
|
1142
1174
|
duplicate_columns = all_columns
|
1143
1175
|
else:
|
1144
|
-
duplicate_columns = [col for col in duplicate_columns if col
|
1176
|
+
duplicate_columns = [col for col in duplicate_columns if col.lower() not in ['id', '更新时间']]
|
1145
1177
|
|
1146
1178
|
conditions = []
|
1147
1179
|
for col in duplicate_columns:
|
@@ -1158,14 +1190,14 @@ class MySQLUploader:
|
|
1158
1190
|
if update_on_duplicate:
|
1159
1191
|
# 更新模式 - 使用ON DUPLICATE KEY UPDATE语法
|
1160
1192
|
update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)" for col in all_columns])
|
1161
|
-
|
1193
|
+
return f"""
|
1162
1194
|
INSERT INTO `{db_name}`.`{table_name}`
|
1163
1195
|
(`{'`,`'.join(safe_columns)}`)
|
1164
1196
|
VALUES ({placeholders})
|
1165
1197
|
ON DUPLICATE KEY UPDATE {update_clause}
|
1166
1198
|
"""
|
1167
1199
|
else:
|
1168
|
-
|
1200
|
+
return f"""INSERT INTO `{db_name}`.`{table_name}`
|
1169
1201
|
(`{'`,`'.join(safe_columns)}`)
|
1170
1202
|
SELECT {placeholders}
|
1171
1203
|
FROM DUAL
|
@@ -1175,147 +1207,190 @@ class MySQLUploader:
|
|
1175
1207
|
)
|
1176
1208
|
"""
|
1177
1209
|
else:
|
1178
|
-
|
1210
|
+
return f"""
|
1179
1211
|
INSERT INTO `{db_name}`.`{table_name}`
|
1180
1212
|
(`{'`,`'.join(safe_columns)}`)
|
1181
1213
|
VALUES ({placeholders})
|
1182
1214
|
"""
|
1183
1215
|
|
1184
|
-
|
1216
|
+
def _execute_batch_insert(
|
1217
|
+
self,
|
1218
|
+
db_name: str,
|
1219
|
+
table_name: str,
|
1220
|
+
data: List[Dict],
|
1221
|
+
set_typ: Dict[str, str],
|
1222
|
+
sql: str,
|
1223
|
+
check_duplicate: bool,
|
1224
|
+
duplicate_columns: Optional[List[str]],
|
1225
|
+
batch_size: int,
|
1226
|
+
batch_id: Optional[str],
|
1227
|
+
transaction_mode: str
|
1228
|
+
) -> Tuple[int, int, int]:
|
1229
|
+
"""执行批量插入操作"""
|
1230
|
+
# 获取所有列名(排除id列)
|
1231
|
+
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
1232
|
+
|
1233
|
+
total_inserted = 0
|
1234
|
+
total_skipped = 0
|
1235
|
+
total_failed = 0
|
1236
|
+
|
1185
1237
|
with self._get_connection() as conn:
|
1186
1238
|
with conn.cursor() as cursor:
|
1187
1239
|
for i in range(0, len(data), batch_size):
|
1188
1240
|
batch = data[i:i + batch_size]
|
1189
|
-
batch_inserted =
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
# 批量模式特殊处理 - 尝试逐行插入但保持事务
|
1195
|
-
try:
|
1196
|
-
for row_idx, row in enumerate(batch, 1):
|
1197
|
-
try:
|
1198
|
-
# 准备参数
|
1199
|
-
row_values = [row.get(col) for col in all_columns]
|
1200
|
-
if check_duplicate and not update_on_duplicate:
|
1201
|
-
row_values += [row.get(col) for col in duplicate_columns]
|
1202
|
-
|
1203
|
-
cursor.execute(sql, row_values)
|
1204
|
-
|
1205
|
-
if check_duplicate:
|
1206
|
-
# 检查是否实际插入了行
|
1207
|
-
if cursor.rowcount > 0:
|
1208
|
-
batch_inserted += 1
|
1209
|
-
else:
|
1210
|
-
batch_skipped += 1
|
1211
|
-
else:
|
1212
|
-
batch_inserted += 1
|
1213
|
-
|
1214
|
-
except Exception as e:
|
1215
|
-
batch_failed += 1
|
1216
|
-
logger.error(sys._getframe().f_code.co_name, {
|
1217
|
-
'库': db_name,
|
1218
|
-
'表': table_name,
|
1219
|
-
'批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
|
1220
|
-
'error_type': type(e).__name__,
|
1221
|
-
'单行插入失败': str(e),
|
1222
|
-
'数据类型': set_typ,
|
1223
|
-
'是否排重': check_duplicate,
|
1224
|
-
'排重列': duplicate_columns,
|
1225
|
-
'事务提交模式': transaction_mode,
|
1226
|
-
'处理方式': '继续处理剩余行'
|
1227
|
-
})
|
1228
|
-
continue # 继续处理下一行
|
1229
|
-
|
1230
|
-
# 批量模式最后统一提交
|
1231
|
-
conn.commit()
|
1232
|
-
|
1233
|
-
except Exception as e:
|
1234
|
-
# 如果整个批量操作失败,回滚
|
1235
|
-
conn.rollback()
|
1236
|
-
batch_failed = len(batch) # 标记整个批次失败
|
1237
|
-
logger.error(sys._getframe().f_code.co_name, {
|
1238
|
-
'库': db_name,
|
1239
|
-
'表': table_name,
|
1240
|
-
'批次': f'{batch_id} {i+1}/{len(data)}',
|
1241
|
-
'error_type': type(e).__name__,
|
1242
|
-
'批量操作失败': str(e),
|
1243
|
-
'事务提交模式': transaction_mode,
|
1244
|
-
'处理方式': '整个批次回滚'
|
1245
|
-
})
|
1246
|
-
|
1247
|
-
else: # row 或 hybrid 模式
|
1248
|
-
for row_idx, row in enumerate(batch, 1):
|
1249
|
-
try:
|
1250
|
-
# 准备参数
|
1251
|
-
row_values = [row.get(col) for col in all_columns]
|
1252
|
-
if check_duplicate and not update_on_duplicate:
|
1253
|
-
row_values += [row.get(col) for col in duplicate_columns]
|
1254
|
-
|
1255
|
-
cursor.execute(sql, row_values)
|
1256
|
-
|
1257
|
-
if check_duplicate:
|
1258
|
-
# 检查是否实际插入了行
|
1259
|
-
if cursor.rowcount > 0:
|
1260
|
-
batch_inserted += 1
|
1261
|
-
else:
|
1262
|
-
batch_skipped += 1
|
1263
|
-
else:
|
1264
|
-
batch_inserted += 1
|
1265
|
-
|
1266
|
-
# 根据模式决定提交时机
|
1267
|
-
if transaction_mode == 'row':
|
1268
|
-
conn.commit() # 逐行提交
|
1269
|
-
elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
|
1270
|
-
conn.commit() # 每100行提交一次
|
1271
|
-
|
1272
|
-
except Exception as e:
|
1273
|
-
conn.rollback()
|
1274
|
-
batch_failed += 1
|
1275
|
-
logger.error(sys._getframe().f_code.co_name, {
|
1276
|
-
'库': db_name,
|
1277
|
-
'表': table_name,
|
1278
|
-
'批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
|
1279
|
-
'error_type': type(e).__name__,
|
1280
|
-
'单行插入失败': str(e),
|
1281
|
-
'数据类型': set_typ,
|
1282
|
-
'是否排重': check_duplicate,
|
1283
|
-
'排重列': duplicate_columns,
|
1284
|
-
'事务提交模式': transaction_mode,
|
1285
|
-
})
|
1286
|
-
|
1287
|
-
# 混合模式最后统一提交
|
1288
|
-
if transaction_mode == 'hybrid':
|
1289
|
-
conn.commit()
|
1241
|
+
batch_inserted, batch_skipped, batch_failed = self._process_batch(
|
1242
|
+
conn, cursor, db_name, table_name, batch, all_columns,
|
1243
|
+
sql, check_duplicate, duplicate_columns, batch_id,
|
1244
|
+
transaction_mode, i, len(data)
|
1245
|
+
)
|
1290
1246
|
|
1291
1247
|
# 更新总统计
|
1292
1248
|
total_inserted += batch_inserted
|
1293
1249
|
total_skipped += batch_skipped
|
1294
1250
|
total_failed += batch_failed
|
1295
1251
|
|
1296
|
-
|
1252
|
+
return total_inserted, total_skipped, total_failed
|
1253
|
+
|
1254
|
+
def _process_batch(
|
1255
|
+
self,
|
1256
|
+
conn,
|
1257
|
+
cursor,
|
1258
|
+
db_name: str,
|
1259
|
+
table_name: str,
|
1260
|
+
batch: List[Dict],
|
1261
|
+
all_columns: List[str],
|
1262
|
+
sql: str,
|
1263
|
+
check_duplicate: bool,
|
1264
|
+
duplicate_columns: Optional[List[str]],
|
1265
|
+
batch_id: Optional[str],
|
1266
|
+
transaction_mode: str,
|
1267
|
+
batch_index: int,
|
1268
|
+
total_data_length: int
|
1269
|
+
) -> Tuple[int, int, int]:
|
1270
|
+
"""处理单个批次的数据插入"""
|
1271
|
+
batch_inserted = 0
|
1272
|
+
batch_skipped = 0
|
1273
|
+
batch_failed = 0
|
1274
|
+
|
1275
|
+
if transaction_mode == 'batch':
|
1276
|
+
# 批量模式特殊处理 - 尝试逐行插入但保持事务
|
1277
|
+
try:
|
1278
|
+
for row_idx, row in enumerate(batch, 1):
|
1279
|
+
result = self._process_single_row(
|
1280
|
+
cursor, row, all_columns, sql,
|
1281
|
+
check_duplicate, duplicate_columns
|
1282
|
+
)
|
1283
|
+
if result == 'inserted':
|
1284
|
+
batch_inserted += 1
|
1285
|
+
elif result == 'skipped':
|
1286
|
+
batch_skipped += 1
|
1287
|
+
else:
|
1288
|
+
batch_failed += 1
|
1289
|
+
|
1290
|
+
# 批量模式最后统一提交
|
1291
|
+
conn.commit()
|
1292
|
+
|
1293
|
+
except Exception as e:
|
1294
|
+
# 如果整个批量操作失败,回滚
|
1295
|
+
conn.rollback()
|
1296
|
+
batch_failed = len(batch) # 标记整个批次失败
|
1297
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1298
|
+
'库': db_name,
|
1299
|
+
'表': table_name,
|
1300
|
+
'批次': f'{batch_id} {batch_index + 1}/{total_data_length}',
|
1301
|
+
'error_type': type(e).__name__,
|
1302
|
+
'批量操作失败': str(e),
|
1303
|
+
'事务提交模式': transaction_mode,
|
1304
|
+
'处理方式': '整个批次回滚'
|
1305
|
+
})
|
1306
|
+
|
1307
|
+
else: # row 或 hybrid 模式
|
1308
|
+
for row_idx, row in enumerate(batch, 1):
|
1309
|
+
try:
|
1310
|
+
result = self._process_single_row(
|
1311
|
+
cursor, row, all_columns, sql,
|
1312
|
+
check_duplicate, duplicate_columns
|
1313
|
+
)
|
1314
|
+
if result == 'inserted':
|
1315
|
+
batch_inserted += 1
|
1316
|
+
elif result == 'skipped':
|
1317
|
+
batch_skipped += 1
|
1318
|
+
else:
|
1319
|
+
batch_failed += 1
|
1320
|
+
|
1321
|
+
# 根据模式决定提交时机
|
1322
|
+
if transaction_mode == 'row':
|
1323
|
+
conn.commit() # 逐行提交
|
1324
|
+
elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
|
1325
|
+
conn.commit() # 每100行提交一次
|
1326
|
+
|
1327
|
+
except Exception as e:
|
1328
|
+
conn.rollback()
|
1329
|
+
batch_failed += 1
|
1330
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1297
1331
|
'库': db_name,
|
1298
1332
|
'表': table_name,
|
1299
|
-
'
|
1300
|
-
'
|
1301
|
-
'
|
1302
|
-
'
|
1303
|
-
'
|
1304
|
-
'
|
1305
|
-
'失败': batch_failed,
|
1333
|
+
'批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
|
1334
|
+
'error_type': type(e).__name__,
|
1335
|
+
'单行插入失败': str(e),
|
1336
|
+
'数据类型': set_typ,
|
1337
|
+
'是否排重': check_duplicate,
|
1338
|
+
'排重列': duplicate_columns,
|
1306
1339
|
'事务提交模式': transaction_mode,
|
1307
1340
|
})
|
1308
1341
|
|
1309
|
-
|
1342
|
+
# 混合模式最后统一提交
|
1343
|
+
if transaction_mode == 'hybrid':
|
1344
|
+
conn.commit()
|
1345
|
+
|
1346
|
+
logger.debug(sys._getframe().f_code.co_name, {
|
1310
1347
|
'库': db_name,
|
1311
1348
|
'表': table_name,
|
1312
|
-
'
|
1313
|
-
'
|
1314
|
-
'
|
1315
|
-
'
|
1349
|
+
'批次': batch_id,
|
1350
|
+
'批次处理完成': batch_index // len(batch) + 1,
|
1351
|
+
'总批次': (total_data_length + len(batch) - 1) // len(batch),
|
1352
|
+
'数据量': len(batch),
|
1353
|
+
'插入': batch_inserted,
|
1354
|
+
'跳过': batch_skipped,
|
1355
|
+
'失败': batch_failed,
|
1316
1356
|
'事务提交模式': transaction_mode,
|
1317
1357
|
})
|
1318
1358
|
|
1359
|
+
return batch_inserted, batch_skipped, batch_failed
|
1360
|
+
|
1361
|
+
def _process_single_row(
|
1362
|
+
self,
|
1363
|
+
cursor,
|
1364
|
+
row: Dict,
|
1365
|
+
all_columns: List[str],
|
1366
|
+
sql: str,
|
1367
|
+
check_duplicate: bool,
|
1368
|
+
duplicate_columns: Optional[List[str]]
|
1369
|
+
) -> str:
|
1370
|
+
"""处理单行数据插入"""
|
1371
|
+
try:
|
1372
|
+
# 准备参数
|
1373
|
+
row_values = [row.get(col) for col in all_columns]
|
1374
|
+
if check_duplicate:
|
1375
|
+
row_values += [row.get(col) for col in duplicate_columns]
|
1376
|
+
|
1377
|
+
cursor.execute(sql, row_values)
|
1378
|
+
|
1379
|
+
if check_duplicate:
|
1380
|
+
# 检查是否实际插入了行
|
1381
|
+
return 'inserted' if cursor.rowcount > 0 else 'skipped'
|
1382
|
+
return 'inserted'
|
1383
|
+
|
1384
|
+
except Exception as e:
|
1385
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1386
|
+
'error_type': type(e).__name__,
|
1387
|
+
'单行插入失败': str(e),
|
1388
|
+
'是否排重': check_duplicate,
|
1389
|
+
'排重列': duplicate_columns,
|
1390
|
+
'处理方式': '继续处理剩余行'
|
1391
|
+
})
|
1392
|
+
return 'failed'
|
1393
|
+
|
1319
1394
|
def close(self):
|
1320
1395
|
"""
|
1321
1396
|
关闭连接池并清理资源
|
@@ -1385,6 +1460,7 @@ class MySQLUploader:
|
|
1385
1460
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
1386
1461
|
self.close()
|
1387
1462
|
|
1463
|
+
|
1388
1464
|
def main():
|
1389
1465
|
uploader = MySQLUploader(
|
1390
1466
|
username='root',
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=S1pYeTgXo5MtZqzwck9ASp8x1pB1PZ33oC1NI7fY9dI,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
|
5
5
|
mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
|
@@ -12,7 +12,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
12
12
|
mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
|
13
13
|
mdbq/mysql/mysql.py,sha256=Fzaqbjg5g3HdNl50jInIrdurdzcgR2CCzdKLVImD1-Q,55339
|
14
14
|
mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
|
15
|
-
mdbq/mysql/uploader.py,sha256=
|
15
|
+
mdbq/mysql/uploader.py,sha256=9wgFxsiFSAngdX2pWj57jElaspwqfPtadC-xQqvweUc,59066
|
16
16
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
18
18
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
|
28
|
-
mdbq-3.10.
|
29
|
-
mdbq-3.10.
|
30
|
-
mdbq-3.10.
|
31
|
-
mdbq-3.10.
|
28
|
+
mdbq-3.10.3.dist-info/METADATA,sha256=TM8JAb8gTTte7N0agKbaDWZ14bmkl66dgCxIbLTqCbc,364
|
29
|
+
mdbq-3.10.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-3.10.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-3.10.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|