mdbq 3.10.1__py3-none-any.whl → 3.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +205 -129
- {mdbq-3.10.1.dist-info → mdbq-3.10.2.dist-info}/METADATA +1 -1
- {mdbq-3.10.1.dist-info → mdbq-3.10.2.dist-info}/RECORD +6 -6
- {mdbq-3.10.1.dist-info → mdbq-3.10.2.dist-info}/WHEEL +0 -0
- {mdbq-3.10.1.dist-info → mdbq-3.10.2.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.10.
|
1
|
+
VERSION = '3.10.2'
|
mdbq/mysql/uploader.py
CHANGED
@@ -1115,28 +1115,60 @@ class MySQLUploader:
|
|
1115
1115
|
if not data:
|
1116
1116
|
return
|
1117
1117
|
|
1118
|
+
# 验证事务模式
|
1119
|
+
transaction_mode = self._validate_transaction_mode(transaction_mode)
|
1120
|
+
|
1121
|
+
# 准备SQL语句
|
1122
|
+
sql = self._prepare_insert_sql(
|
1123
|
+
db_name, table_name, set_typ,
|
1124
|
+
check_duplicate, duplicate_columns,
|
1125
|
+
update_on_duplicate
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
# 执行批量插入
|
1129
|
+
total_inserted, total_skipped, total_failed = self._execute_batch_insert(
|
1130
|
+
db_name, table_name, data, set_typ,
|
1131
|
+
sql, check_duplicate, duplicate_columns,
|
1132
|
+
batch_size, batch_id, transaction_mode
|
1133
|
+
)
|
1134
|
+
|
1135
|
+
logger.info('插入完成', {
|
1136
|
+
'库': db_name,
|
1137
|
+
'表': table_name,
|
1138
|
+
'完成总计': len(data),
|
1139
|
+
'插入': total_inserted,
|
1140
|
+
'跳过': total_skipped,
|
1141
|
+
'失败': total_failed,
|
1142
|
+
'事务提交模式': transaction_mode,
|
1143
|
+
})
|
1144
|
+
|
1145
|
+
def _validate_transaction_mode(self, mode: str) -> str:
|
1146
|
+
"""验证并标准化事务模式"""
|
1118
1147
|
valid_modes = ('row', 'batch', 'hybrid')
|
1119
|
-
if
|
1148
|
+
if mode.lower() not in valid_modes:
|
1120
1149
|
logger.error(sys._getframe().f_code.co_name, {
|
1121
|
-
'
|
1122
|
-
'表': table_name,
|
1123
|
-
'参数异常': f'transaction_mode -> {transaction_mode}',
|
1150
|
+
'参数异常': f'transaction_mode -> {mode}',
|
1124
1151
|
'可选值': valid_modes,
|
1125
1152
|
'自动使用默认模式': 'batch'
|
1126
1153
|
})
|
1127
|
-
|
1154
|
+
return 'batch'
|
1155
|
+
return mode.lower()
|
1128
1156
|
|
1157
|
+
def _prepare_insert_sql(
|
1158
|
+
self,
|
1159
|
+
db_name: str,
|
1160
|
+
table_name: str,
|
1161
|
+
set_typ: Dict[str, str],
|
1162
|
+
check_duplicate: bool,
|
1163
|
+
duplicate_columns: Optional[List[str]],
|
1164
|
+
update_on_duplicate: bool
|
1165
|
+
) -> str:
|
1166
|
+
"""准备插入SQL语句"""
|
1129
1167
|
# 获取所有列名(排除id列)
|
1130
1168
|
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
1131
1169
|
safe_columns = [self._validate_identifier(col) for col in all_columns]
|
1132
1170
|
placeholders = ','.join(['%s'] * len(safe_columns))
|
1133
1171
|
|
1134
|
-
# 初始化统计变量
|
1135
|
-
total_inserted = 0
|
1136
|
-
total_skipped = 0
|
1137
|
-
total_failed = 0
|
1138
|
-
|
1139
|
-
# 构建基础SQL语句
|
1140
1172
|
if check_duplicate:
|
1141
1173
|
if not duplicate_columns:
|
1142
1174
|
duplicate_columns = all_columns
|
@@ -1158,14 +1190,14 @@ class MySQLUploader:
|
|
1158
1190
|
if update_on_duplicate:
|
1159
1191
|
# 更新模式 - 使用ON DUPLICATE KEY UPDATE语法
|
1160
1192
|
update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)" for col in all_columns])
|
1161
|
-
|
1193
|
+
return f"""
|
1162
1194
|
INSERT INTO `{db_name}`.`{table_name}`
|
1163
1195
|
(`{'`,`'.join(safe_columns)}`)
|
1164
1196
|
VALUES ({placeholders})
|
1165
1197
|
ON DUPLICATE KEY UPDATE {update_clause}
|
1166
1198
|
"""
|
1167
1199
|
else:
|
1168
|
-
|
1200
|
+
return f"""INSERT INTO `{db_name}`.`{table_name}`
|
1169
1201
|
(`{'`,`'.join(safe_columns)}`)
|
1170
1202
|
SELECT {placeholders}
|
1171
1203
|
FROM DUAL
|
@@ -1175,147 +1207,190 @@ class MySQLUploader:
|
|
1175
1207
|
)
|
1176
1208
|
"""
|
1177
1209
|
else:
|
1178
|
-
|
1210
|
+
return f"""
|
1179
1211
|
INSERT INTO `{db_name}`.`{table_name}`
|
1180
1212
|
(`{'`,`'.join(safe_columns)}`)
|
1181
1213
|
VALUES ({placeholders})
|
1182
1214
|
"""
|
1183
1215
|
|
1184
|
-
|
1216
|
+
def _execute_batch_insert(
|
1217
|
+
self,
|
1218
|
+
db_name: str,
|
1219
|
+
table_name: str,
|
1220
|
+
data: List[Dict],
|
1221
|
+
set_typ: Dict[str, str],
|
1222
|
+
sql: str,
|
1223
|
+
check_duplicate: bool,
|
1224
|
+
duplicate_columns: Optional[List[str]],
|
1225
|
+
batch_size: int,
|
1226
|
+
batch_id: Optional[str],
|
1227
|
+
transaction_mode: str
|
1228
|
+
) -> Tuple[int, int, int]:
|
1229
|
+
"""执行批量插入操作"""
|
1230
|
+
# 获取所有列名(排除id列)
|
1231
|
+
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
1232
|
+
|
1233
|
+
total_inserted = 0
|
1234
|
+
total_skipped = 0
|
1235
|
+
total_failed = 0
|
1236
|
+
|
1185
1237
|
with self._get_connection() as conn:
|
1186
1238
|
with conn.cursor() as cursor:
|
1187
1239
|
for i in range(0, len(data), batch_size):
|
1188
1240
|
batch = data[i:i + batch_size]
|
1189
|
-
batch_inserted =
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
# 批量模式特殊处理 - 尝试逐行插入但保持事务
|
1195
|
-
try:
|
1196
|
-
for row_idx, row in enumerate(batch, 1):
|
1197
|
-
try:
|
1198
|
-
# 准备参数
|
1199
|
-
row_values = [row.get(col) for col in all_columns]
|
1200
|
-
if check_duplicate and not update_on_duplicate:
|
1201
|
-
row_values += [row.get(col) for col in duplicate_columns]
|
1202
|
-
|
1203
|
-
cursor.execute(sql, row_values)
|
1204
|
-
|
1205
|
-
if check_duplicate:
|
1206
|
-
# 检查是否实际插入了行
|
1207
|
-
if cursor.rowcount > 0:
|
1208
|
-
batch_inserted += 1
|
1209
|
-
else:
|
1210
|
-
batch_skipped += 1
|
1211
|
-
else:
|
1212
|
-
batch_inserted += 1
|
1213
|
-
|
1214
|
-
except Exception as e:
|
1215
|
-
batch_failed += 1
|
1216
|
-
logger.error(sys._getframe().f_code.co_name, {
|
1217
|
-
'库': db_name,
|
1218
|
-
'表': table_name,
|
1219
|
-
'批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
|
1220
|
-
'error_type': type(e).__name__,
|
1221
|
-
'单行插入失败': str(e),
|
1222
|
-
'数据类型': set_typ,
|
1223
|
-
'是否排重': check_duplicate,
|
1224
|
-
'排重列': duplicate_columns,
|
1225
|
-
'事务提交模式': transaction_mode,
|
1226
|
-
'处理方式': '继续处理剩余行'
|
1227
|
-
})
|
1228
|
-
continue # 继续处理下一行
|
1229
|
-
|
1230
|
-
# 批量模式最后统一提交
|
1231
|
-
conn.commit()
|
1232
|
-
|
1233
|
-
except Exception as e:
|
1234
|
-
# 如果整个批量操作失败,回滚
|
1235
|
-
conn.rollback()
|
1236
|
-
batch_failed = len(batch) # 标记整个批次失败
|
1237
|
-
logger.error(sys._getframe().f_code.co_name, {
|
1238
|
-
'库': db_name,
|
1239
|
-
'表': table_name,
|
1240
|
-
'批次': f'{batch_id} {i+1}/{len(data)}',
|
1241
|
-
'error_type': type(e).__name__,
|
1242
|
-
'批量操作失败': str(e),
|
1243
|
-
'事务提交模式': transaction_mode,
|
1244
|
-
'处理方式': '整个批次回滚'
|
1245
|
-
})
|
1246
|
-
|
1247
|
-
else: # row 或 hybrid 模式
|
1248
|
-
for row_idx, row in enumerate(batch, 1):
|
1249
|
-
try:
|
1250
|
-
# 准备参数
|
1251
|
-
row_values = [row.get(col) for col in all_columns]
|
1252
|
-
if check_duplicate and not update_on_duplicate:
|
1253
|
-
row_values += [row.get(col) for col in duplicate_columns]
|
1254
|
-
|
1255
|
-
cursor.execute(sql, row_values)
|
1256
|
-
|
1257
|
-
if check_duplicate:
|
1258
|
-
# 检查是否实际插入了行
|
1259
|
-
if cursor.rowcount > 0:
|
1260
|
-
batch_inserted += 1
|
1261
|
-
else:
|
1262
|
-
batch_skipped += 1
|
1263
|
-
else:
|
1264
|
-
batch_inserted += 1
|
1265
|
-
|
1266
|
-
# 根据模式决定提交时机
|
1267
|
-
if transaction_mode == 'row':
|
1268
|
-
conn.commit() # 逐行提交
|
1269
|
-
elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
|
1270
|
-
conn.commit() # 每100行提交一次
|
1271
|
-
|
1272
|
-
except Exception as e:
|
1273
|
-
conn.rollback()
|
1274
|
-
batch_failed += 1
|
1275
|
-
logger.error(sys._getframe().f_code.co_name, {
|
1276
|
-
'库': db_name,
|
1277
|
-
'表': table_name,
|
1278
|
-
'批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
|
1279
|
-
'error_type': type(e).__name__,
|
1280
|
-
'单行插入失败': str(e),
|
1281
|
-
'数据类型': set_typ,
|
1282
|
-
'是否排重': check_duplicate,
|
1283
|
-
'排重列': duplicate_columns,
|
1284
|
-
'事务提交模式': transaction_mode,
|
1285
|
-
})
|
1286
|
-
|
1287
|
-
# 混合模式最后统一提交
|
1288
|
-
if transaction_mode == 'hybrid':
|
1289
|
-
conn.commit()
|
1241
|
+
batch_inserted, batch_skipped, batch_failed = self._process_batch(
|
1242
|
+
conn, cursor, db_name, table_name, batch, all_columns,
|
1243
|
+
sql, check_duplicate, duplicate_columns, batch_id,
|
1244
|
+
transaction_mode, i, len(data)
|
1245
|
+
)
|
1290
1246
|
|
1291
1247
|
# 更新总统计
|
1292
1248
|
total_inserted += batch_inserted
|
1293
1249
|
total_skipped += batch_skipped
|
1294
1250
|
total_failed += batch_failed
|
1295
1251
|
|
1296
|
-
|
1252
|
+
return total_inserted, total_skipped, total_failed
|
1253
|
+
|
1254
|
+
def _process_batch(
|
1255
|
+
self,
|
1256
|
+
conn,
|
1257
|
+
cursor,
|
1258
|
+
db_name: str,
|
1259
|
+
table_name: str,
|
1260
|
+
batch: List[Dict],
|
1261
|
+
all_columns: List[str],
|
1262
|
+
sql: str,
|
1263
|
+
check_duplicate: bool,
|
1264
|
+
duplicate_columns: Optional[List[str]],
|
1265
|
+
batch_id: Optional[str],
|
1266
|
+
transaction_mode: str,
|
1267
|
+
batch_index: int,
|
1268
|
+
total_data_length: int
|
1269
|
+
) -> Tuple[int, int, int]:
|
1270
|
+
"""处理单个批次的数据插入"""
|
1271
|
+
batch_inserted = 0
|
1272
|
+
batch_skipped = 0
|
1273
|
+
batch_failed = 0
|
1274
|
+
|
1275
|
+
if transaction_mode == 'batch':
|
1276
|
+
# 批量模式特殊处理 - 尝试逐行插入但保持事务
|
1277
|
+
try:
|
1278
|
+
for row_idx, row in enumerate(batch, 1):
|
1279
|
+
result = self._process_single_row(
|
1280
|
+
cursor, row, all_columns, sql,
|
1281
|
+
check_duplicate, duplicate_columns
|
1282
|
+
)
|
1283
|
+
if result == 'inserted':
|
1284
|
+
batch_inserted += 1
|
1285
|
+
elif result == 'skipped':
|
1286
|
+
batch_skipped += 1
|
1287
|
+
else:
|
1288
|
+
batch_failed += 1
|
1289
|
+
|
1290
|
+
# 批量模式最后统一提交
|
1291
|
+
conn.commit()
|
1292
|
+
|
1293
|
+
except Exception as e:
|
1294
|
+
# 如果整个批量操作失败,回滚
|
1295
|
+
conn.rollback()
|
1296
|
+
batch_failed = len(batch) # 标记整个批次失败
|
1297
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1298
|
+
'库': db_name,
|
1299
|
+
'表': table_name,
|
1300
|
+
'批次': f'{batch_id} {batch_index + 1}/{total_data_length}',
|
1301
|
+
'error_type': type(e).__name__,
|
1302
|
+
'批量操作失败': str(e),
|
1303
|
+
'事务提交模式': transaction_mode,
|
1304
|
+
'处理方式': '整个批次回滚'
|
1305
|
+
})
|
1306
|
+
|
1307
|
+
else: # row 或 hybrid 模式
|
1308
|
+
for row_idx, row in enumerate(batch, 1):
|
1309
|
+
try:
|
1310
|
+
result = self._process_single_row(
|
1311
|
+
cursor, row, all_columns, sql,
|
1312
|
+
check_duplicate, duplicate_columns
|
1313
|
+
)
|
1314
|
+
if result == 'inserted':
|
1315
|
+
batch_inserted += 1
|
1316
|
+
elif result == 'skipped':
|
1317
|
+
batch_skipped += 1
|
1318
|
+
else:
|
1319
|
+
batch_failed += 1
|
1320
|
+
|
1321
|
+
# 根据模式决定提交时机
|
1322
|
+
if transaction_mode == 'row':
|
1323
|
+
conn.commit() # 逐行提交
|
1324
|
+
elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
|
1325
|
+
conn.commit() # 每100行提交一次
|
1326
|
+
|
1327
|
+
except Exception as e:
|
1328
|
+
conn.rollback()
|
1329
|
+
batch_failed += 1
|
1330
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1297
1331
|
'库': db_name,
|
1298
1332
|
'表': table_name,
|
1299
|
-
'
|
1300
|
-
'
|
1301
|
-
'
|
1302
|
-
'
|
1303
|
-
'
|
1304
|
-
'
|
1305
|
-
'失败': batch_failed,
|
1333
|
+
'批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
|
1334
|
+
'error_type': type(e).__name__,
|
1335
|
+
'单行插入失败': str(e),
|
1336
|
+
'数据类型': set_typ,
|
1337
|
+
'是否排重': check_duplicate,
|
1338
|
+
'排重列': duplicate_columns,
|
1306
1339
|
'事务提交模式': transaction_mode,
|
1307
1340
|
})
|
1308
1341
|
|
1309
|
-
|
1342
|
+
# 混合模式最后统一提交
|
1343
|
+
if transaction_mode == 'hybrid':
|
1344
|
+
conn.commit()
|
1345
|
+
|
1346
|
+
logger.debug(sys._getframe().f_code.co_name, {
|
1310
1347
|
'库': db_name,
|
1311
1348
|
'表': table_name,
|
1312
|
-
'
|
1313
|
-
'
|
1314
|
-
'
|
1315
|
-
'
|
1349
|
+
'批次': batch_id,
|
1350
|
+
'批次处理完成': batch_index // len(batch) + 1,
|
1351
|
+
'总批次': (total_data_length + len(batch) - 1) // len(batch),
|
1352
|
+
'数据量': len(batch),
|
1353
|
+
'插入': batch_inserted,
|
1354
|
+
'跳过': batch_skipped,
|
1355
|
+
'失败': batch_failed,
|
1316
1356
|
'事务提交模式': transaction_mode,
|
1317
1357
|
})
|
1318
1358
|
|
1359
|
+
return batch_inserted, batch_skipped, batch_failed
|
1360
|
+
|
1361
|
+
def _process_single_row(
|
1362
|
+
self,
|
1363
|
+
cursor,
|
1364
|
+
row: Dict,
|
1365
|
+
all_columns: List[str],
|
1366
|
+
sql: str,
|
1367
|
+
check_duplicate: bool,
|
1368
|
+
duplicate_columns: Optional[List[str]]
|
1369
|
+
) -> str:
|
1370
|
+
"""处理单行数据插入"""
|
1371
|
+
try:
|
1372
|
+
# 准备参数
|
1373
|
+
row_values = [row.get(col) for col in all_columns]
|
1374
|
+
if check_duplicate:
|
1375
|
+
row_values += [row.get(col) for col in duplicate_columns]
|
1376
|
+
|
1377
|
+
cursor.execute(sql, row_values)
|
1378
|
+
|
1379
|
+
if check_duplicate:
|
1380
|
+
# 检查是否实际插入了行
|
1381
|
+
return 'inserted' if cursor.rowcount > 0 else 'skipped'
|
1382
|
+
return 'inserted'
|
1383
|
+
|
1384
|
+
except Exception as e:
|
1385
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1386
|
+
'error_type': type(e).__name__,
|
1387
|
+
'单行插入失败': str(e),
|
1388
|
+
'是否排重': check_duplicate,
|
1389
|
+
'排重列': duplicate_columns,
|
1390
|
+
'处理方式': '继续处理剩余行'
|
1391
|
+
})
|
1392
|
+
return 'failed'
|
1393
|
+
|
1319
1394
|
def close(self):
|
1320
1395
|
"""
|
1321
1396
|
关闭连接池并清理资源
|
@@ -1385,6 +1460,7 @@ class MySQLUploader:
|
|
1385
1460
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
1386
1461
|
self.close()
|
1387
1462
|
|
1463
|
+
|
1388
1464
|
def main():
|
1389
1465
|
uploader = MySQLUploader(
|
1390
1466
|
username='root',
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=tL5iFQ6j9Svg-3tbUuEZAgDFN3ipIhdJjFUPU6EHSRQ,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
|
5
5
|
mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
|
@@ -12,7 +12,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
12
12
|
mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
|
13
13
|
mdbq/mysql/mysql.py,sha256=Fzaqbjg5g3HdNl50jInIrdurdzcgR2CCzdKLVImD1-Q,55339
|
14
14
|
mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
|
15
|
-
mdbq/mysql/uploader.py,sha256=
|
15
|
+
mdbq/mysql/uploader.py,sha256=XOSeGg74zN3qYFfWmLqr98H7tCj74dIMCS3C0cvS3kU,58994
|
16
16
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
18
18
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
|
28
|
-
mdbq-3.10.
|
29
|
-
mdbq-3.10.
|
30
|
-
mdbq-3.10.
|
31
|
-
mdbq-3.10.
|
28
|
+
mdbq-3.10.2.dist-info/METADATA,sha256=D9d_UixDPHEbrdRE1yjA4SHjo4tYoY60_R4cBGPF3ms,364
|
29
|
+
mdbq-3.10.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-3.10.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-3.10.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|