mdbq 3.10.1__py3-none-any.whl → 3.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.10.1'
1
+ VERSION = '3.10.2'
mdbq/mysql/uploader.py CHANGED
@@ -1115,28 +1115,60 @@ class MySQLUploader:
1115
1115
  if not data:
1116
1116
  return
1117
1117
 
1118
+ # 验证事务模式
1119
+ transaction_mode = self._validate_transaction_mode(transaction_mode)
1120
+
1121
+ # 准备SQL语句
1122
+ sql = self._prepare_insert_sql(
1123
+ db_name, table_name, set_typ,
1124
+ check_duplicate, duplicate_columns,
1125
+ update_on_duplicate
1126
+ )
1127
+
1128
+ # 执行批量插入
1129
+ total_inserted, total_skipped, total_failed = self._execute_batch_insert(
1130
+ db_name, table_name, data, set_typ,
1131
+ sql, check_duplicate, duplicate_columns,
1132
+ batch_size, batch_id, transaction_mode
1133
+ )
1134
+
1135
+ logger.info('插入完成', {
1136
+ '库': db_name,
1137
+ '表': table_name,
1138
+ '完成总计': len(data),
1139
+ '插入': total_inserted,
1140
+ '跳过': total_skipped,
1141
+ '失败': total_failed,
1142
+ '事务提交模式': transaction_mode,
1143
+ })
1144
+
1145
+ def _validate_transaction_mode(self, mode: str) -> str:
1146
+ """验证并标准化事务模式"""
1118
1147
  valid_modes = ('row', 'batch', 'hybrid')
1119
- if transaction_mode.lower() not in valid_modes:
1148
+ if mode.lower() not in valid_modes:
1120
1149
  logger.error(sys._getframe().f_code.co_name, {
1121
- '': db_name,
1122
- '表': table_name,
1123
- '参数异常': f'transaction_mode -> {transaction_mode}',
1150
+ '参数异常': f'transaction_mode -> {mode}',
1124
1151
  '可选值': valid_modes,
1125
1152
  '自动使用默认模式': 'batch'
1126
1153
  })
1127
- transaction_mode = 'batch'
1154
+ return 'batch'
1155
+ return mode.lower()
1128
1156
 
1157
+ def _prepare_insert_sql(
1158
+ self,
1159
+ db_name: str,
1160
+ table_name: str,
1161
+ set_typ: Dict[str, str],
1162
+ check_duplicate: bool,
1163
+ duplicate_columns: Optional[List[str]],
1164
+ update_on_duplicate: bool
1165
+ ) -> str:
1166
+ """准备插入SQL语句"""
1129
1167
  # 获取所有列名(排除id列)
1130
1168
  all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1131
1169
  safe_columns = [self._validate_identifier(col) for col in all_columns]
1132
1170
  placeholders = ','.join(['%s'] * len(safe_columns))
1133
1171
 
1134
- # 初始化统计变量
1135
- total_inserted = 0
1136
- total_skipped = 0
1137
- total_failed = 0
1138
-
1139
- # 构建基础SQL语句
1140
1172
  if check_duplicate:
1141
1173
  if not duplicate_columns:
1142
1174
  duplicate_columns = all_columns
@@ -1158,14 +1190,14 @@ class MySQLUploader:
1158
1190
  if update_on_duplicate:
1159
1191
  # 更新模式 - 使用ON DUPLICATE KEY UPDATE语法
1160
1192
  update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)" for col in all_columns])
1161
- sql = f"""
1193
+ return f"""
1162
1194
  INSERT INTO `{db_name}`.`{table_name}`
1163
1195
  (`{'`,`'.join(safe_columns)}`)
1164
1196
  VALUES ({placeholders})
1165
1197
  ON DUPLICATE KEY UPDATE {update_clause}
1166
1198
  """
1167
1199
  else:
1168
- sql = f"""INSERT INTO `{db_name}`.`{table_name}`
1200
+ return f"""INSERT INTO `{db_name}`.`{table_name}`
1169
1201
  (`{'`,`'.join(safe_columns)}`)
1170
1202
  SELECT {placeholders}
1171
1203
  FROM DUAL
@@ -1175,147 +1207,190 @@ class MySQLUploader:
1175
1207
  )
1176
1208
  """
1177
1209
  else:
1178
- sql = f"""
1210
+ return f"""
1179
1211
  INSERT INTO `{db_name}`.`{table_name}`
1180
1212
  (`{'`,`'.join(safe_columns)}`)
1181
1213
  VALUES ({placeholders})
1182
1214
  """
1183
1215
 
1184
- # 分批插入数据
1216
+ def _execute_batch_insert(
1217
+ self,
1218
+ db_name: str,
1219
+ table_name: str,
1220
+ data: List[Dict],
1221
+ set_typ: Dict[str, str],
1222
+ sql: str,
1223
+ check_duplicate: bool,
1224
+ duplicate_columns: Optional[List[str]],
1225
+ batch_size: int,
1226
+ batch_id: Optional[str],
1227
+ transaction_mode: str
1228
+ ) -> Tuple[int, int, int]:
1229
+ """执行批量插入操作"""
1230
+ # 获取所有列名(排除id列)
1231
+ all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1232
+
1233
+ total_inserted = 0
1234
+ total_skipped = 0
1235
+ total_failed = 0
1236
+
1185
1237
  with self._get_connection() as conn:
1186
1238
  with conn.cursor() as cursor:
1187
1239
  for i in range(0, len(data), batch_size):
1188
1240
  batch = data[i:i + batch_size]
1189
- batch_inserted = 0
1190
- batch_skipped = 0
1191
- batch_failed = 0
1192
-
1193
- if transaction_mode == 'batch':
1194
- # 批量模式特殊处理 - 尝试逐行插入但保持事务
1195
- try:
1196
- for row_idx, row in enumerate(batch, 1):
1197
- try:
1198
- # 准备参数
1199
- row_values = [row.get(col) for col in all_columns]
1200
- if check_duplicate and not update_on_duplicate:
1201
- row_values += [row.get(col) for col in duplicate_columns]
1202
-
1203
- cursor.execute(sql, row_values)
1204
-
1205
- if check_duplicate:
1206
- # 检查是否实际插入了行
1207
- if cursor.rowcount > 0:
1208
- batch_inserted += 1
1209
- else:
1210
- batch_skipped += 1
1211
- else:
1212
- batch_inserted += 1
1213
-
1214
- except Exception as e:
1215
- batch_failed += 1
1216
- logger.error(sys._getframe().f_code.co_name, {
1217
- '库': db_name,
1218
- '表': table_name,
1219
- '批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
1220
- 'error_type': type(e).__name__,
1221
- '单行插入失败': str(e),
1222
- '数据类型': set_typ,
1223
- '是否排重': check_duplicate,
1224
- '排重列': duplicate_columns,
1225
- '事务提交模式': transaction_mode,
1226
- '处理方式': '继续处理剩余行'
1227
- })
1228
- continue # 继续处理下一行
1229
-
1230
- # 批量模式最后统一提交
1231
- conn.commit()
1232
-
1233
- except Exception as e:
1234
- # 如果整个批量操作失败,回滚
1235
- conn.rollback()
1236
- batch_failed = len(batch) # 标记整个批次失败
1237
- logger.error(sys._getframe().f_code.co_name, {
1238
- '库': db_name,
1239
- '表': table_name,
1240
- '批次': f'{batch_id} {i+1}/{len(data)}',
1241
- 'error_type': type(e).__name__,
1242
- '批量操作失败': str(e),
1243
- '事务提交模式': transaction_mode,
1244
- '处理方式': '整个批次回滚'
1245
- })
1246
-
1247
- else: # row 或 hybrid 模式
1248
- for row_idx, row in enumerate(batch, 1):
1249
- try:
1250
- # 准备参数
1251
- row_values = [row.get(col) for col in all_columns]
1252
- if check_duplicate and not update_on_duplicate:
1253
- row_values += [row.get(col) for col in duplicate_columns]
1254
-
1255
- cursor.execute(sql, row_values)
1256
-
1257
- if check_duplicate:
1258
- # 检查是否实际插入了行
1259
- if cursor.rowcount > 0:
1260
- batch_inserted += 1
1261
- else:
1262
- batch_skipped += 1
1263
- else:
1264
- batch_inserted += 1
1265
-
1266
- # 根据模式决定提交时机
1267
- if transaction_mode == 'row':
1268
- conn.commit() # 逐行提交
1269
- elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
1270
- conn.commit() # 每100行提交一次
1271
-
1272
- except Exception as e:
1273
- conn.rollback()
1274
- batch_failed += 1
1275
- logger.error(sys._getframe().f_code.co_name, {
1276
- '库': db_name,
1277
- '表': table_name,
1278
- '批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
1279
- 'error_type': type(e).__name__,
1280
- '单行插入失败': str(e),
1281
- '数据类型': set_typ,
1282
- '是否排重': check_duplicate,
1283
- '排重列': duplicate_columns,
1284
- '事务提交模式': transaction_mode,
1285
- })
1286
-
1287
- # 混合模式最后统一提交
1288
- if transaction_mode == 'hybrid':
1289
- conn.commit()
1241
+ batch_inserted, batch_skipped, batch_failed = self._process_batch(
1242
+ conn, cursor, db_name, table_name, batch, all_columns,
1243
+ sql, check_duplicate, duplicate_columns, batch_id,
1244
+ transaction_mode, i, len(data)
1245
+ )
1290
1246
 
1291
1247
  # 更新总统计
1292
1248
  total_inserted += batch_inserted
1293
1249
  total_skipped += batch_skipped
1294
1250
  total_failed += batch_failed
1295
1251
 
1296
- logger.debug(sys._getframe().f_code.co_name, {
1252
+ return total_inserted, total_skipped, total_failed
1253
+
1254
+ def _process_batch(
1255
+ self,
1256
+ conn,
1257
+ cursor,
1258
+ db_name: str,
1259
+ table_name: str,
1260
+ batch: List[Dict],
1261
+ all_columns: List[str],
1262
+ sql: str,
1263
+ check_duplicate: bool,
1264
+ duplicate_columns: Optional[List[str]],
1265
+ batch_id: Optional[str],
1266
+ transaction_mode: str,
1267
+ batch_index: int,
1268
+ total_data_length: int
1269
+ ) -> Tuple[int, int, int]:
1270
+ """处理单个批次的数据插入"""
1271
+ batch_inserted = 0
1272
+ batch_skipped = 0
1273
+ batch_failed = 0
1274
+
1275
+ if transaction_mode == 'batch':
1276
+ # 批量模式特殊处理 - 尝试逐行插入但保持事务
1277
+ try:
1278
+ for row_idx, row in enumerate(batch, 1):
1279
+ result = self._process_single_row(
1280
+ cursor, row, all_columns, sql,
1281
+ check_duplicate, duplicate_columns
1282
+ )
1283
+ if result == 'inserted':
1284
+ batch_inserted += 1
1285
+ elif result == 'skipped':
1286
+ batch_skipped += 1
1287
+ else:
1288
+ batch_failed += 1
1289
+
1290
+ # 批量模式最后统一提交
1291
+ conn.commit()
1292
+
1293
+ except Exception as e:
1294
+ # 如果整个批量操作失败,回滚
1295
+ conn.rollback()
1296
+ batch_failed = len(batch) # 标记整个批次失败
1297
+ logger.error(sys._getframe().f_code.co_name, {
1298
+ '库': db_name,
1299
+ '表': table_name,
1300
+ '批次': f'{batch_id} {batch_index + 1}/{total_data_length}',
1301
+ 'error_type': type(e).__name__,
1302
+ '批量操作失败': str(e),
1303
+ '事务提交模式': transaction_mode,
1304
+ '处理方式': '整个批次回滚'
1305
+ })
1306
+
1307
+ else: # row 或 hybrid 模式
1308
+ for row_idx, row in enumerate(batch, 1):
1309
+ try:
1310
+ result = self._process_single_row(
1311
+ cursor, row, all_columns, sql,
1312
+ check_duplicate, duplicate_columns
1313
+ )
1314
+ if result == 'inserted':
1315
+ batch_inserted += 1
1316
+ elif result == 'skipped':
1317
+ batch_skipped += 1
1318
+ else:
1319
+ batch_failed += 1
1320
+
1321
+ # 根据模式决定提交时机
1322
+ if transaction_mode == 'row':
1323
+ conn.commit() # 逐行提交
1324
+ elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
1325
+ conn.commit() # 每100行提交一次
1326
+
1327
+ except Exception as e:
1328
+ conn.rollback()
1329
+ batch_failed += 1
1330
+ logger.error(sys._getframe().f_code.co_name, {
1297
1331
  '库': db_name,
1298
1332
  '表': table_name,
1299
- '批次': batch_id,
1300
- '批次处理完成': i // batch_size + 1,
1301
- '总批次': (len(data) + batch_size - 1) // batch_size,
1302
- '数据量': len(batch),
1303
- '插入': batch_inserted,
1304
- '跳过': batch_skipped,
1305
- '失败': batch_failed,
1333
+ '批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
1334
+ 'error_type': type(e).__name__,
1335
+ '单行插入失败': str(e),
1336
+ '数据类型': set_typ,
1337
+ '是否排重': check_duplicate,
1338
+ '排重列': duplicate_columns,
1306
1339
  '事务提交模式': transaction_mode,
1307
1340
  })
1308
1341
 
1309
- logger.info('插入完成', {
1342
+ # 混合模式最后统一提交
1343
+ if transaction_mode == 'hybrid':
1344
+ conn.commit()
1345
+
1346
+ logger.debug(sys._getframe().f_code.co_name, {
1310
1347
  '库': db_name,
1311
1348
  '表': table_name,
1312
- '完成总计': len(data),
1313
- '插入': total_inserted,
1314
- '跳过': total_skipped,
1315
- '失败': total_failed,
1349
+ '批次': batch_id,
1350
+ '批次处理完成': batch_index // len(batch) + 1,
1351
+ '总批次': (total_data_length + len(batch) - 1) // len(batch),
1352
+ '数据量': len(batch),
1353
+ '插入': batch_inserted,
1354
+ '跳过': batch_skipped,
1355
+ '失败': batch_failed,
1316
1356
  '事务提交模式': transaction_mode,
1317
1357
  })
1318
1358
 
1359
+ return batch_inserted, batch_skipped, batch_failed
1360
+
1361
+ def _process_single_row(
1362
+ self,
1363
+ cursor,
1364
+ row: Dict,
1365
+ all_columns: List[str],
1366
+ sql: str,
1367
+ check_duplicate: bool,
1368
+ duplicate_columns: Optional[List[str]]
1369
+ ) -> str:
1370
+ """处理单行数据插入"""
1371
+ try:
1372
+ # 准备参数
1373
+ row_values = [row.get(col) for col in all_columns]
1374
+ if check_duplicate:
1375
+ row_values += [row.get(col) for col in duplicate_columns]
1376
+
1377
+ cursor.execute(sql, row_values)
1378
+
1379
+ if check_duplicate:
1380
+ # 检查是否实际插入了行
1381
+ return 'inserted' if cursor.rowcount > 0 else 'skipped'
1382
+ return 'inserted'
1383
+
1384
+ except Exception as e:
1385
+ logger.error(sys._getframe().f_code.co_name, {
1386
+ 'error_type': type(e).__name__,
1387
+ '单行插入失败': str(e),
1388
+ '是否排重': check_duplicate,
1389
+ '排重列': duplicate_columns,
1390
+ '处理方式': '继续处理剩余行'
1391
+ })
1392
+ return 'failed'
1393
+
1319
1394
  def close(self):
1320
1395
  """
1321
1396
  关闭连接池并清理资源
@@ -1385,6 +1460,7 @@ class MySQLUploader:
1385
1460
  def __exit__(self, exc_type, exc_val, exc_tb):
1386
1461
  self.close()
1387
1462
 
1463
+
1388
1464
  def main():
1389
1465
  uploader = MySQLUploader(
1390
1466
  username='root',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.10.1
3
+ Version: 3.10.2
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=IV5Hh9sLBiZlaqbX0_vhJtOQad8a2sOKBbx_A0k8fik,18
2
+ mdbq/__version__.py,sha256=tL5iFQ6j9Svg-3tbUuEZAgDFN3ipIhdJjFUPU6EHSRQ,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
5
5
  mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
@@ -12,7 +12,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
12
12
  mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
13
13
  mdbq/mysql/mysql.py,sha256=Fzaqbjg5g3HdNl50jInIrdurdzcgR2CCzdKLVImD1-Q,55339
14
14
  mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
15
- mdbq/mysql/uploader.py,sha256=Hvnc-Oqg0HloXTzu868k9Zh_aSMeVgfWgrqgeUWdJA0,58037
15
+ mdbq/mysql/uploader.py,sha256=XOSeGg74zN3qYFfWmLqr98H7tCj74dIMCS3C0cvS3kU,58994
16
16
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
17
17
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
18
18
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
28
- mdbq-3.10.1.dist-info/METADATA,sha256=CQR_RVCq58Ui1Llc2n_u3q0kzL8FJU_SKptHBVSswCM,364
29
- mdbq-3.10.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-3.10.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-3.10.1.dist-info/RECORD,,
28
+ mdbq-3.10.2.dist-info/METADATA,sha256=D9d_UixDPHEbrdRE1yjA4SHjo4tYoY60_R4cBGPF3ms,364
29
+ mdbq-3.10.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-3.10.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-3.10.2.dist-info/RECORD,,
File without changes