mdbq 3.10.0__py3-none-any.whl → 3.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +205 -77
- {mdbq-3.10.0.dist-info → mdbq-3.10.2.dist-info}/METADATA +1 -1
- {mdbq-3.10.0.dist-info → mdbq-3.10.2.dist-info}/RECORD +6 -6
- {mdbq-3.10.0.dist-info → mdbq-3.10.2.dist-info}/WHEEL +0 -0
- {mdbq-3.10.0.dist-info → mdbq-3.10.2.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.10.
|
1
|
+
VERSION = '3.10.2'
|
mdbq/mysql/uploader.py
CHANGED
@@ -1115,29 +1115,60 @@ class MySQLUploader:
|
|
1115
1115
|
if not data:
|
1116
1116
|
return
|
1117
1117
|
|
1118
|
+
# 验证事务模式
|
1119
|
+
transaction_mode = self._validate_transaction_mode(transaction_mode)
|
1120
|
+
|
1121
|
+
# 准备SQL语句
|
1122
|
+
sql = self._prepare_insert_sql(
|
1123
|
+
db_name, table_name, set_typ,
|
1124
|
+
check_duplicate, duplicate_columns,
|
1125
|
+
update_on_duplicate
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
# 执行批量插入
|
1129
|
+
total_inserted, total_skipped, total_failed = self._execute_batch_insert(
|
1130
|
+
db_name, table_name, data, set_typ,
|
1131
|
+
sql, check_duplicate, duplicate_columns,
|
1132
|
+
batch_size, batch_id, transaction_mode
|
1133
|
+
)
|
1134
|
+
|
1135
|
+
logger.info('插入完成', {
|
1136
|
+
'库': db_name,
|
1137
|
+
'表': table_name,
|
1138
|
+
'完成总计': len(data),
|
1139
|
+
'插入': total_inserted,
|
1140
|
+
'跳过': total_skipped,
|
1141
|
+
'失败': total_failed,
|
1142
|
+
'事务提交模式': transaction_mode,
|
1143
|
+
})
|
1144
|
+
|
1145
|
+
def _validate_transaction_mode(self, mode: str) -> str:
|
1146
|
+
"""验证并标准化事务模式"""
|
1118
1147
|
valid_modes = ('row', 'batch', 'hybrid')
|
1119
|
-
if
|
1148
|
+
if mode.lower() not in valid_modes:
|
1120
1149
|
logger.error(sys._getframe().f_code.co_name, {
|
1121
|
-
'
|
1122
|
-
'表': table_name,
|
1123
|
-
'参数异常': f'transaction_mode -> {transaction_mode}',
|
1150
|
+
'参数异常': f'transaction_mode -> {mode}',
|
1124
1151
|
'可选值': valid_modes,
|
1125
1152
|
'自动使用默认模式': 'batch'
|
1126
1153
|
})
|
1127
|
-
|
1128
|
-
|
1154
|
+
return 'batch'
|
1155
|
+
return mode.lower()
|
1129
1156
|
|
1157
|
+
def _prepare_insert_sql(
|
1158
|
+
self,
|
1159
|
+
db_name: str,
|
1160
|
+
table_name: str,
|
1161
|
+
set_typ: Dict[str, str],
|
1162
|
+
check_duplicate: bool,
|
1163
|
+
duplicate_columns: Optional[List[str]],
|
1164
|
+
update_on_duplicate: bool
|
1165
|
+
) -> str:
|
1166
|
+
"""准备插入SQL语句"""
|
1130
1167
|
# 获取所有列名(排除id列)
|
1131
1168
|
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
1132
1169
|
safe_columns = [self._validate_identifier(col) for col in all_columns]
|
1133
1170
|
placeholders = ','.join(['%s'] * len(safe_columns))
|
1134
1171
|
|
1135
|
-
# 初始化统计变量
|
1136
|
-
total_inserted = 0
|
1137
|
-
total_skipped = 0
|
1138
|
-
total_failed = 0
|
1139
|
-
|
1140
|
-
# 构建基础SQL语句
|
1141
1172
|
if check_duplicate:
|
1142
1173
|
if not duplicate_columns:
|
1143
1174
|
duplicate_columns = all_columns
|
@@ -1159,14 +1190,14 @@ class MySQLUploader:
|
|
1159
1190
|
if update_on_duplicate:
|
1160
1191
|
# 更新模式 - 使用ON DUPLICATE KEY UPDATE语法
|
1161
1192
|
update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)" for col in all_columns])
|
1162
|
-
|
1193
|
+
return f"""
|
1163
1194
|
INSERT INTO `{db_name}`.`{table_name}`
|
1164
1195
|
(`{'`,`'.join(safe_columns)}`)
|
1165
1196
|
VALUES ({placeholders})
|
1166
1197
|
ON DUPLICATE KEY UPDATE {update_clause}
|
1167
1198
|
"""
|
1168
1199
|
else:
|
1169
|
-
|
1200
|
+
return f"""INSERT INTO `{db_name}`.`{table_name}`
|
1170
1201
|
(`{'`,`'.join(safe_columns)}`)
|
1171
1202
|
SELECT {placeholders}
|
1172
1203
|
FROM DUAL
|
@@ -1176,94 +1207,190 @@ class MySQLUploader:
|
|
1176
1207
|
)
|
1177
1208
|
"""
|
1178
1209
|
else:
|
1179
|
-
|
1210
|
+
return f"""
|
1180
1211
|
INSERT INTO `{db_name}`.`{table_name}`
|
1181
1212
|
(`{'`,`'.join(safe_columns)}`)
|
1182
1213
|
VALUES ({placeholders})
|
1183
1214
|
"""
|
1184
1215
|
|
1185
|
-
|
1216
|
+
def _execute_batch_insert(
|
1217
|
+
self,
|
1218
|
+
db_name: str,
|
1219
|
+
table_name: str,
|
1220
|
+
data: List[Dict],
|
1221
|
+
set_typ: Dict[str, str],
|
1222
|
+
sql: str,
|
1223
|
+
check_duplicate: bool,
|
1224
|
+
duplicate_columns: Optional[List[str]],
|
1225
|
+
batch_size: int,
|
1226
|
+
batch_id: Optional[str],
|
1227
|
+
transaction_mode: str
|
1228
|
+
) -> Tuple[int, int, int]:
|
1229
|
+
"""执行批量插入操作"""
|
1230
|
+
# 获取所有列名(排除id列)
|
1231
|
+
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
1232
|
+
|
1233
|
+
total_inserted = 0
|
1234
|
+
total_skipped = 0
|
1235
|
+
total_failed = 0
|
1236
|
+
|
1186
1237
|
with self._get_connection() as conn:
|
1187
1238
|
with conn.cursor() as cursor:
|
1188
1239
|
for i in range(0, len(data), batch_size):
|
1189
1240
|
batch = data[i:i + batch_size]
|
1190
|
-
batch_inserted =
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
try:
|
1196
|
-
# 准备参数
|
1197
|
-
row_values = [row.get(col) for col in all_columns]
|
1198
|
-
if check_duplicate and not update_on_duplicate:
|
1199
|
-
row_values += [row.get(col) for col in duplicate_columns]
|
1200
|
-
|
1201
|
-
cursor.execute(sql, row_values)
|
1202
|
-
|
1203
|
-
if check_duplicate:
|
1204
|
-
# 检查是否实际插入了行
|
1205
|
-
if cursor.rowcount > 0:
|
1206
|
-
batch_inserted += 1
|
1207
|
-
else:
|
1208
|
-
batch_skipped += 1
|
1209
|
-
else:
|
1210
|
-
batch_inserted += 1
|
1211
|
-
|
1212
|
-
# 根据模式决定提交时机
|
1213
|
-
if transaction_mode == 'row':
|
1214
|
-
conn.commit() # 逐行提交
|
1215
|
-
elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
|
1216
|
-
conn.commit() # 每100行提交一次
|
1217
|
-
|
1218
|
-
except Exception as e:
|
1219
|
-
# if transaction_mode == 'row':
|
1220
|
-
# conn.rollback()
|
1221
|
-
conn.rollback()
|
1222
|
-
batch_failed += 1
|
1223
|
-
logger.error(sys._getframe().f_code.co_name, {
|
1224
|
-
'库': db_name,
|
1225
|
-
'表': table_name,
|
1226
|
-
'批次': batch_id,
|
1227
|
-
'error_type': type(e).__name__,
|
1228
|
-
'单行插入失败': str(e),
|
1229
|
-
'数据类型': set_typ,
|
1230
|
-
'是否排重': check_duplicate,
|
1231
|
-
'排重列': duplicate_columns,
|
1232
|
-
'事务提交模式': transaction_mode,
|
1233
|
-
})
|
1234
|
-
|
1235
|
-
# 批量模式最后统一提交
|
1236
|
-
if transaction_mode in ('batch', 'hybrid'):
|
1237
|
-
conn.commit()
|
1241
|
+
batch_inserted, batch_skipped, batch_failed = self._process_batch(
|
1242
|
+
conn, cursor, db_name, table_name, batch, all_columns,
|
1243
|
+
sql, check_duplicate, duplicate_columns, batch_id,
|
1244
|
+
transaction_mode, i, len(data)
|
1245
|
+
)
|
1238
1246
|
|
1239
1247
|
# 更新总统计
|
1240
1248
|
total_inserted += batch_inserted
|
1241
1249
|
total_skipped += batch_skipped
|
1242
1250
|
total_failed += batch_failed
|
1243
1251
|
|
1244
|
-
|
1252
|
+
return total_inserted, total_skipped, total_failed
|
1253
|
+
|
1254
|
+
def _process_batch(
|
1255
|
+
self,
|
1256
|
+
conn,
|
1257
|
+
cursor,
|
1258
|
+
db_name: str,
|
1259
|
+
table_name: str,
|
1260
|
+
batch: List[Dict],
|
1261
|
+
all_columns: List[str],
|
1262
|
+
sql: str,
|
1263
|
+
check_duplicate: bool,
|
1264
|
+
duplicate_columns: Optional[List[str]],
|
1265
|
+
batch_id: Optional[str],
|
1266
|
+
transaction_mode: str,
|
1267
|
+
batch_index: int,
|
1268
|
+
total_data_length: int
|
1269
|
+
) -> Tuple[int, int, int]:
|
1270
|
+
"""处理单个批次的数据插入"""
|
1271
|
+
batch_inserted = 0
|
1272
|
+
batch_skipped = 0
|
1273
|
+
batch_failed = 0
|
1274
|
+
|
1275
|
+
if transaction_mode == 'batch':
|
1276
|
+
# 批量模式特殊处理 - 尝试逐行插入但保持事务
|
1277
|
+
try:
|
1278
|
+
for row_idx, row in enumerate(batch, 1):
|
1279
|
+
result = self._process_single_row(
|
1280
|
+
cursor, row, all_columns, sql,
|
1281
|
+
check_duplicate, duplicate_columns
|
1282
|
+
)
|
1283
|
+
if result == 'inserted':
|
1284
|
+
batch_inserted += 1
|
1285
|
+
elif result == 'skipped':
|
1286
|
+
batch_skipped += 1
|
1287
|
+
else:
|
1288
|
+
batch_failed += 1
|
1289
|
+
|
1290
|
+
# 批量模式最后统一提交
|
1291
|
+
conn.commit()
|
1292
|
+
|
1293
|
+
except Exception as e:
|
1294
|
+
# 如果整个批量操作失败,回滚
|
1295
|
+
conn.rollback()
|
1296
|
+
batch_failed = len(batch) # 标记整个批次失败
|
1297
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1298
|
+
'库': db_name,
|
1299
|
+
'表': table_name,
|
1300
|
+
'批次': f'{batch_id} {batch_index + 1}/{total_data_length}',
|
1301
|
+
'error_type': type(e).__name__,
|
1302
|
+
'批量操作失败': str(e),
|
1303
|
+
'事务提交模式': transaction_mode,
|
1304
|
+
'处理方式': '整个批次回滚'
|
1305
|
+
})
|
1306
|
+
|
1307
|
+
else: # row 或 hybrid 模式
|
1308
|
+
for row_idx, row in enumerate(batch, 1):
|
1309
|
+
try:
|
1310
|
+
result = self._process_single_row(
|
1311
|
+
cursor, row, all_columns, sql,
|
1312
|
+
check_duplicate, duplicate_columns
|
1313
|
+
)
|
1314
|
+
if result == 'inserted':
|
1315
|
+
batch_inserted += 1
|
1316
|
+
elif result == 'skipped':
|
1317
|
+
batch_skipped += 1
|
1318
|
+
else:
|
1319
|
+
batch_failed += 1
|
1320
|
+
|
1321
|
+
# 根据模式决定提交时机
|
1322
|
+
if transaction_mode == 'row':
|
1323
|
+
conn.commit() # 逐行提交
|
1324
|
+
elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
|
1325
|
+
conn.commit() # 每100行提交一次
|
1326
|
+
|
1327
|
+
except Exception as e:
|
1328
|
+
conn.rollback()
|
1329
|
+
batch_failed += 1
|
1330
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1245
1331
|
'库': db_name,
|
1246
1332
|
'表': table_name,
|
1247
|
-
'
|
1248
|
-
'
|
1249
|
-
'
|
1250
|
-
'
|
1251
|
-
'
|
1252
|
-
'
|
1253
|
-
'失败': batch_failed,
|
1333
|
+
'批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
|
1334
|
+
'error_type': type(e).__name__,
|
1335
|
+
'单行插入失败': str(e),
|
1336
|
+
'数据类型': set_typ,
|
1337
|
+
'是否排重': check_duplicate,
|
1338
|
+
'排重列': duplicate_columns,
|
1254
1339
|
'事务提交模式': transaction_mode,
|
1255
1340
|
})
|
1256
1341
|
|
1257
|
-
|
1342
|
+
# 混合模式最后统一提交
|
1343
|
+
if transaction_mode == 'hybrid':
|
1344
|
+
conn.commit()
|
1345
|
+
|
1346
|
+
logger.debug(sys._getframe().f_code.co_name, {
|
1258
1347
|
'库': db_name,
|
1259
1348
|
'表': table_name,
|
1260
|
-
'
|
1261
|
-
'
|
1262
|
-
'
|
1263
|
-
'
|
1349
|
+
'批次': batch_id,
|
1350
|
+
'批次处理完成': batch_index // len(batch) + 1,
|
1351
|
+
'总批次': (total_data_length + len(batch) - 1) // len(batch),
|
1352
|
+
'数据量': len(batch),
|
1353
|
+
'插入': batch_inserted,
|
1354
|
+
'跳过': batch_skipped,
|
1355
|
+
'失败': batch_failed,
|
1264
1356
|
'事务提交模式': transaction_mode,
|
1265
1357
|
})
|
1266
1358
|
|
1359
|
+
return batch_inserted, batch_skipped, batch_failed
|
1360
|
+
|
1361
|
+
def _process_single_row(
|
1362
|
+
self,
|
1363
|
+
cursor,
|
1364
|
+
row: Dict,
|
1365
|
+
all_columns: List[str],
|
1366
|
+
sql: str,
|
1367
|
+
check_duplicate: bool,
|
1368
|
+
duplicate_columns: Optional[List[str]]
|
1369
|
+
) -> str:
|
1370
|
+
"""处理单行数据插入"""
|
1371
|
+
try:
|
1372
|
+
# 准备参数
|
1373
|
+
row_values = [row.get(col) for col in all_columns]
|
1374
|
+
if check_duplicate:
|
1375
|
+
row_values += [row.get(col) for col in duplicate_columns]
|
1376
|
+
|
1377
|
+
cursor.execute(sql, row_values)
|
1378
|
+
|
1379
|
+
if check_duplicate:
|
1380
|
+
# 检查是否实际插入了行
|
1381
|
+
return 'inserted' if cursor.rowcount > 0 else 'skipped'
|
1382
|
+
return 'inserted'
|
1383
|
+
|
1384
|
+
except Exception as e:
|
1385
|
+
logger.error(sys._getframe().f_code.co_name, {
|
1386
|
+
'error_type': type(e).__name__,
|
1387
|
+
'单行插入失败': str(e),
|
1388
|
+
'是否排重': check_duplicate,
|
1389
|
+
'排重列': duplicate_columns,
|
1390
|
+
'处理方式': '继续处理剩余行'
|
1391
|
+
})
|
1392
|
+
return 'failed'
|
1393
|
+
|
1267
1394
|
def close(self):
|
1268
1395
|
"""
|
1269
1396
|
关闭连接池并清理资源
|
@@ -1333,6 +1460,7 @@ class MySQLUploader:
|
|
1333
1460
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
1334
1461
|
self.close()
|
1335
1462
|
|
1463
|
+
|
1336
1464
|
def main():
|
1337
1465
|
uploader = MySQLUploader(
|
1338
1466
|
username='root',
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=tL5iFQ6j9Svg-3tbUuEZAgDFN3ipIhdJjFUPU6EHSRQ,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
|
5
5
|
mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
|
@@ -12,7 +12,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
12
12
|
mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
|
13
13
|
mdbq/mysql/mysql.py,sha256=Fzaqbjg5g3HdNl50jInIrdurdzcgR2CCzdKLVImD1-Q,55339
|
14
14
|
mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
|
15
|
-
mdbq/mysql/uploader.py,sha256=
|
15
|
+
mdbq/mysql/uploader.py,sha256=XOSeGg74zN3qYFfWmLqr98H7tCj74dIMCS3C0cvS3kU,58994
|
16
16
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
18
18
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
|
28
|
-
mdbq-3.10.
|
29
|
-
mdbq-3.10.
|
30
|
-
mdbq-3.10.
|
31
|
-
mdbq-3.10.
|
28
|
+
mdbq-3.10.2.dist-info/METADATA,sha256=D9d_UixDPHEbrdRE1yjA4SHjo4tYoY60_R4cBGPF3ms,364
|
29
|
+
mdbq-3.10.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-3.10.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-3.10.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|