mdbq 3.10.0__py3-none-any.whl → 3.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.10.0'
1
+ VERSION = '3.10.2'
mdbq/mysql/uploader.py CHANGED
@@ -1115,29 +1115,60 @@ class MySQLUploader:
1115
1115
  if not data:
1116
1116
  return
1117
1117
 
1118
+ # 验证事务模式
1119
+ transaction_mode = self._validate_transaction_mode(transaction_mode)
1120
+
1121
+ # 准备SQL语句
1122
+ sql = self._prepare_insert_sql(
1123
+ db_name, table_name, set_typ,
1124
+ check_duplicate, duplicate_columns,
1125
+ update_on_duplicate
1126
+ )
1127
+
1128
+ # 执行批量插入
1129
+ total_inserted, total_skipped, total_failed = self._execute_batch_insert(
1130
+ db_name, table_name, data, set_typ,
1131
+ sql, check_duplicate, duplicate_columns,
1132
+ batch_size, batch_id, transaction_mode
1133
+ )
1134
+
1135
+ logger.info('插入完成', {
1136
+ '库': db_name,
1137
+ '表': table_name,
1138
+ '完成总计': len(data),
1139
+ '插入': total_inserted,
1140
+ '跳过': total_skipped,
1141
+ '失败': total_failed,
1142
+ '事务提交模式': transaction_mode,
1143
+ })
1144
+
1145
+ def _validate_transaction_mode(self, mode: str) -> str:
1146
+ """验证并标准化事务模式"""
1118
1147
  valid_modes = ('row', 'batch', 'hybrid')
1119
- if transaction_mode.lower() not in valid_modes:
1148
+ if mode.lower() not in valid_modes:
1120
1149
  logger.error(sys._getframe().f_code.co_name, {
1121
- '': db_name,
1122
- '表': table_name,
1123
- '参数异常': f'transaction_mode -> {transaction_mode}',
1150
+ '参数异常': f'transaction_mode -> {mode}',
1124
1151
  '可选值': valid_modes,
1125
1152
  '自动使用默认模式': 'batch'
1126
1153
  })
1127
- transaction_mode = 'batch'
1128
- # return
1154
+ return 'batch'
1155
+ return mode.lower()
1129
1156
 
1157
+ def _prepare_insert_sql(
1158
+ self,
1159
+ db_name: str,
1160
+ table_name: str,
1161
+ set_typ: Dict[str, str],
1162
+ check_duplicate: bool,
1163
+ duplicate_columns: Optional[List[str]],
1164
+ update_on_duplicate: bool
1165
+ ) -> str:
1166
+ """准备插入SQL语句"""
1130
1167
  # 获取所有列名(排除id列)
1131
1168
  all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1132
1169
  safe_columns = [self._validate_identifier(col) for col in all_columns]
1133
1170
  placeholders = ','.join(['%s'] * len(safe_columns))
1134
1171
 
1135
- # 初始化统计变量
1136
- total_inserted = 0
1137
- total_skipped = 0
1138
- total_failed = 0
1139
-
1140
- # 构建基础SQL语句
1141
1172
  if check_duplicate:
1142
1173
  if not duplicate_columns:
1143
1174
  duplicate_columns = all_columns
@@ -1159,14 +1190,14 @@ class MySQLUploader:
1159
1190
  if update_on_duplicate:
1160
1191
  # 更新模式 - 使用ON DUPLICATE KEY UPDATE语法
1161
1192
  update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)" for col in all_columns])
1162
- sql = f"""
1193
+ return f"""
1163
1194
  INSERT INTO `{db_name}`.`{table_name}`
1164
1195
  (`{'`,`'.join(safe_columns)}`)
1165
1196
  VALUES ({placeholders})
1166
1197
  ON DUPLICATE KEY UPDATE {update_clause}
1167
1198
  """
1168
1199
  else:
1169
- sql = f"""INSERT INTO `{db_name}`.`{table_name}`
1200
+ return f"""INSERT INTO `{db_name}`.`{table_name}`
1170
1201
  (`{'`,`'.join(safe_columns)}`)
1171
1202
  SELECT {placeholders}
1172
1203
  FROM DUAL
@@ -1176,94 +1207,190 @@ class MySQLUploader:
1176
1207
  )
1177
1208
  """
1178
1209
  else:
1179
- sql = f"""
1210
+ return f"""
1180
1211
  INSERT INTO `{db_name}`.`{table_name}`
1181
1212
  (`{'`,`'.join(safe_columns)}`)
1182
1213
  VALUES ({placeholders})
1183
1214
  """
1184
1215
 
1185
- # 分批插入数据
1216
+ def _execute_batch_insert(
1217
+ self,
1218
+ db_name: str,
1219
+ table_name: str,
1220
+ data: List[Dict],
1221
+ set_typ: Dict[str, str],
1222
+ sql: str,
1223
+ check_duplicate: bool,
1224
+ duplicate_columns: Optional[List[str]],
1225
+ batch_size: int,
1226
+ batch_id: Optional[str],
1227
+ transaction_mode: str
1228
+ ) -> Tuple[int, int, int]:
1229
+ """执行批量插入操作"""
1230
+ # 获取所有列名(排除id列)
1231
+ all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1232
+
1233
+ total_inserted = 0
1234
+ total_skipped = 0
1235
+ total_failed = 0
1236
+
1186
1237
  with self._get_connection() as conn:
1187
1238
  with conn.cursor() as cursor:
1188
1239
  for i in range(0, len(data), batch_size):
1189
1240
  batch = data[i:i + batch_size]
1190
- batch_inserted = 0
1191
- batch_skipped = 0
1192
- batch_failed = 0
1193
-
1194
- for row_idx, row in enumerate(batch, 1):
1195
- try:
1196
- # 准备参数
1197
- row_values = [row.get(col) for col in all_columns]
1198
- if check_duplicate and not update_on_duplicate:
1199
- row_values += [row.get(col) for col in duplicate_columns]
1200
-
1201
- cursor.execute(sql, row_values)
1202
-
1203
- if check_duplicate:
1204
- # 检查是否实际插入了行
1205
- if cursor.rowcount > 0:
1206
- batch_inserted += 1
1207
- else:
1208
- batch_skipped += 1
1209
- else:
1210
- batch_inserted += 1
1211
-
1212
- # 根据模式决定提交时机
1213
- if transaction_mode == 'row':
1214
- conn.commit() # 逐行提交
1215
- elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
1216
- conn.commit() # 每100行提交一次
1217
-
1218
- except Exception as e:
1219
- # if transaction_mode == 'row':
1220
- # conn.rollback()
1221
- conn.rollback()
1222
- batch_failed += 1
1223
- logger.error(sys._getframe().f_code.co_name, {
1224
- '库': db_name,
1225
- '表': table_name,
1226
- '批次': batch_id,
1227
- 'error_type': type(e).__name__,
1228
- '单行插入失败': str(e),
1229
- '数据类型': set_typ,
1230
- '是否排重': check_duplicate,
1231
- '排重列': duplicate_columns,
1232
- '事务提交模式': transaction_mode,
1233
- })
1234
-
1235
- # 批量模式最后统一提交
1236
- if transaction_mode in ('batch', 'hybrid'):
1237
- conn.commit()
1241
+ batch_inserted, batch_skipped, batch_failed = self._process_batch(
1242
+ conn, cursor, db_name, table_name, batch, all_columns,
1243
+ sql, check_duplicate, duplicate_columns, batch_id,
1244
+ transaction_mode, i, len(data)
1245
+ )
1238
1246
 
1239
1247
  # 更新总统计
1240
1248
  total_inserted += batch_inserted
1241
1249
  total_skipped += batch_skipped
1242
1250
  total_failed += batch_failed
1243
1251
 
1244
- logger.debug(sys._getframe().f_code.co_name, {
1252
+ return total_inserted, total_skipped, total_failed
1253
+
1254
+ def _process_batch(
1255
+ self,
1256
+ conn,
1257
+ cursor,
1258
+ db_name: str,
1259
+ table_name: str,
1260
+ batch: List[Dict],
1261
+ all_columns: List[str],
1262
+ sql: str,
1263
+ check_duplicate: bool,
1264
+ duplicate_columns: Optional[List[str]],
1265
+ batch_id: Optional[str],
1266
+ transaction_mode: str,
1267
+ batch_index: int,
1268
+ total_data_length: int
1269
+ ) -> Tuple[int, int, int]:
1270
+ """处理单个批次的数据插入"""
1271
+ batch_inserted = 0
1272
+ batch_skipped = 0
1273
+ batch_failed = 0
1274
+
1275
+ if transaction_mode == 'batch':
1276
+ # 批量模式特殊处理 - 尝试逐行插入但保持事务
1277
+ try:
1278
+ for row_idx, row in enumerate(batch, 1):
1279
+ result = self._process_single_row(
1280
+ cursor, row, all_columns, sql,
1281
+ check_duplicate, duplicate_columns
1282
+ )
1283
+ if result == 'inserted':
1284
+ batch_inserted += 1
1285
+ elif result == 'skipped':
1286
+ batch_skipped += 1
1287
+ else:
1288
+ batch_failed += 1
1289
+
1290
+ # 批量模式最后统一提交
1291
+ conn.commit()
1292
+
1293
+ except Exception as e:
1294
+ # 如果整个批量操作失败,回滚
1295
+ conn.rollback()
1296
+ batch_failed = len(batch) # 标记整个批次失败
1297
+ logger.error(sys._getframe().f_code.co_name, {
1298
+ '库': db_name,
1299
+ '表': table_name,
1300
+ '批次': f'{batch_id} {batch_index + 1}/{total_data_length}',
1301
+ 'error_type': type(e).__name__,
1302
+ '批量操作失败': str(e),
1303
+ '事务提交模式': transaction_mode,
1304
+ '处理方式': '整个批次回滚'
1305
+ })
1306
+
1307
+ else: # row 或 hybrid 模式
1308
+ for row_idx, row in enumerate(batch, 1):
1309
+ try:
1310
+ result = self._process_single_row(
1311
+ cursor, row, all_columns, sql,
1312
+ check_duplicate, duplicate_columns
1313
+ )
1314
+ if result == 'inserted':
1315
+ batch_inserted += 1
1316
+ elif result == 'skipped':
1317
+ batch_skipped += 1
1318
+ else:
1319
+ batch_failed += 1
1320
+
1321
+ # 根据模式决定提交时机
1322
+ if transaction_mode == 'row':
1323
+ conn.commit() # 逐行提交
1324
+ elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
1325
+ conn.commit() # 每100行提交一次
1326
+
1327
+ except Exception as e:
1328
+ conn.rollback()
1329
+ batch_failed += 1
1330
+ logger.error(sys._getframe().f_code.co_name, {
1245
1331
  '库': db_name,
1246
1332
  '表': table_name,
1247
- '批次': batch_id,
1248
- '批次处理完成': i // batch_size + 1,
1249
- '总批次': (len(data) + batch_size - 1) // batch_size,
1250
- '数据量': len(batch),
1251
- '插入': batch_inserted,
1252
- '跳过': batch_skipped,
1253
- '失败': batch_failed,
1333
+ '批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
1334
+ 'error_type': type(e).__name__,
1335
+ '单行插入失败': str(e),
1336
+ '数据类型': set_typ,
1337
+ '是否排重': check_duplicate,
1338
+ '排重列': duplicate_columns,
1254
1339
  '事务提交模式': transaction_mode,
1255
1340
  })
1256
1341
 
1257
- logger.info('插入完成', {
1342
+ # 混合模式最后统一提交
1343
+ if transaction_mode == 'hybrid':
1344
+ conn.commit()
1345
+
1346
+ logger.debug(sys._getframe().f_code.co_name, {
1258
1347
  '库': db_name,
1259
1348
  '表': table_name,
1260
- '完成总计': len(data),
1261
- '插入': total_inserted,
1262
- '跳过': total_skipped,
1263
- '失败': total_failed,
1349
+ '批次': batch_id,
1350
+ '批次处理完成': batch_index // len(batch) + 1,
1351
+ '总批次': (total_data_length + len(batch) - 1) // len(batch),
1352
+ '数据量': len(batch),
1353
+ '插入': batch_inserted,
1354
+ '跳过': batch_skipped,
1355
+ '失败': batch_failed,
1264
1356
  '事务提交模式': transaction_mode,
1265
1357
  })
1266
1358
 
1359
+ return batch_inserted, batch_skipped, batch_failed
1360
+
1361
+ def _process_single_row(
1362
+ self,
1363
+ cursor,
1364
+ row: Dict,
1365
+ all_columns: List[str],
1366
+ sql: str,
1367
+ check_duplicate: bool,
1368
+ duplicate_columns: Optional[List[str]]
1369
+ ) -> str:
1370
+ """处理单行数据插入"""
1371
+ try:
1372
+ # 准备参数
1373
+ row_values = [row.get(col) for col in all_columns]
1374
+ if check_duplicate:
1375
+ row_values += [row.get(col) for col in duplicate_columns]
1376
+
1377
+ cursor.execute(sql, row_values)
1378
+
1379
+ if check_duplicate:
1380
+ # 检查是否实际插入了行
1381
+ return 'inserted' if cursor.rowcount > 0 else 'skipped'
1382
+ return 'inserted'
1383
+
1384
+ except Exception as e:
1385
+ logger.error(sys._getframe().f_code.co_name, {
1386
+ 'error_type': type(e).__name__,
1387
+ '单行插入失败': str(e),
1388
+ '是否排重': check_duplicate,
1389
+ '排重列': duplicate_columns,
1390
+ '处理方式': '继续处理剩余行'
1391
+ })
1392
+ return 'failed'
1393
+
1267
1394
  def close(self):
1268
1395
  """
1269
1396
  关闭连接池并清理资源
@@ -1333,6 +1460,7 @@ class MySQLUploader:
1333
1460
  def __exit__(self, exc_type, exc_val, exc_tb):
1334
1461
  self.close()
1335
1462
 
1463
+
1336
1464
  def main():
1337
1465
  uploader = MySQLUploader(
1338
1466
  username='root',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.10.0
3
+ Version: 3.10.2
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=5KePxgTqsEgArDghwVLPBSZN4Cwjr4f6goTdfGdj5cE,18
2
+ mdbq/__version__.py,sha256=tL5iFQ6j9Svg-3tbUuEZAgDFN3ipIhdJjFUPU6EHSRQ,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
5
5
  mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
@@ -12,7 +12,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
12
12
  mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
13
13
  mdbq/mysql/mysql.py,sha256=Fzaqbjg5g3HdNl50jInIrdurdzcgR2CCzdKLVImD1-Q,55339
14
14
  mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
15
- mdbq/mysql/uploader.py,sha256=oK9LD5ydDRk5FvyKzBxQTXmTgnSMlpn31gq1Ht9uJCE,54884
15
+ mdbq/mysql/uploader.py,sha256=XOSeGg74zN3qYFfWmLqr98H7tCj74dIMCS3C0cvS3kU,58994
16
16
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
17
17
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
18
18
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
28
- mdbq-3.10.0.dist-info/METADATA,sha256=I6UtECQJCo-8lp2QiudMEWPG_EHctYjesMdcDPUb2wg,364
29
- mdbq-3.10.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-3.10.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-3.10.0.dist-info/RECORD,,
28
+ mdbq-3.10.2.dist-info/METADATA,sha256=D9d_UixDPHEbrdRE1yjA4SHjo4tYoY60_R4cBGPF3ms,364
29
+ mdbq-3.10.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-3.10.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-3.10.2.dist-info/RECORD,,
File without changes