mdbq 3.9.18__tar.gz → 3.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {mdbq-3.9.18 → mdbq-3.10.1}/PKG-INFO +1 -1
  2. mdbq-3.10.1/mdbq/__version__.py +1 -0
  3. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/mysql/uploader.py +123 -31
  4. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/spider/aikucun.py +55 -10
  5. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq.egg-info/PKG-INFO +1 -1
  6. mdbq-3.9.18/mdbq/__version__.py +0 -1
  7. {mdbq-3.9.18 → mdbq-3.10.1}/README.txt +0 -0
  8. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/__init__.py +0 -0
  9. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/aggregation/__init__.py +0 -0
  10. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/aggregation/optimize.py +0 -0
  11. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/aggregation/query_data.py +0 -0
  12. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/config/__init__.py +0 -0
  13. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/config/config.py +0 -0
  14. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/log/__init__.py +0 -0
  15. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/log/mylogger.py +0 -0
  16. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/log/spider_logging.py +0 -0
  17. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/mysql/__init__.py +0 -0
  18. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/mysql/deduplicator.py +0 -0
  19. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/mysql/mysql.py +0 -0
  20. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/mysql/s_query.py +0 -0
  21. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/other/__init__.py +0 -0
  22. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/other/download_sku_picture.py +0 -0
  23. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/other/otk.py +0 -0
  24. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/other/pov_city.py +0 -0
  25. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/other/ua_sj.py +0 -0
  26. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/pbix/__init__.py +0 -0
  27. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/pbix/pbix_refresh.py +0 -0
  28. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/pbix/refresh_all.py +0 -0
  29. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/redis/__init__.py +0 -0
  30. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/redis/getredis.py +0 -0
  31. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq/spider/__init__.py +0 -0
  32. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq.egg-info/SOURCES.txt +0 -0
  33. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq.egg-info/dependency_links.txt +0 -0
  34. {mdbq-3.9.18 → mdbq-3.10.1}/mdbq.egg-info/top_level.txt +0 -0
  35. {mdbq-3.9.18 → mdbq-3.10.1}/setup.cfg +0 -0
  36. {mdbq-3.9.18 → mdbq-3.10.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.9.18
3
+ Version: 3.10.1
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '3.10.1'
@@ -698,7 +698,8 @@ class MySQLUploader:
698
698
  date_column: Optional[str],
699
699
  indexes: Optional[List[str]],
700
700
  batch_id: Optional[str] = None,
701
- update_on_duplicate: bool = False
701
+ update_on_duplicate: bool = False,
702
+ transaction_mode: str = "batch"
702
703
  ):
703
704
  """实际执行表上传的方法"""
704
705
  # 检查表是否存在
@@ -739,7 +740,8 @@ class MySQLUploader:
739
740
  db_name, table_name, data, set_typ,
740
741
  check_duplicate, duplicate_columns,
741
742
  batch_id=batch_id,
742
- update_on_duplicate=update_on_duplicate
743
+ update_on_duplicate=update_on_duplicate,
744
+ transaction_mode=transaction_mode
743
745
  )
744
746
 
745
747
  def _infer_data_type(self, value: Any) -> str:
@@ -932,7 +934,8 @@ class MySQLUploader:
932
934
  partition_date_column: str = '日期',
933
935
  auto_create: bool = True,
934
936
  indexes: Optional[List[str]] = None,
935
- update_on_duplicate: bool = False
937
+ update_on_duplicate: bool = False,
938
+ transaction_mode: str = "batch"
936
939
  ):
937
940
  """
938
941
  上传数据到数据库的主入口方法
@@ -950,6 +953,11 @@ class MySQLUploader:
950
953
  :param auto_create: 表不存在时是否自动创建,默认为True
951
954
  :param indexes: 需要创建索引的列列表,可选
952
955
  :param update_on_duplicate: 遇到重复数据时是否更新旧数据(默认为False)
956
+ :param transaction_mode: 事务提交模式,可选值:
957
+ - 'row' : 逐行提交事务(错误隔离性好)
958
+ - 'batch' : 整批提交事务(性能最优)
959
+ - 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
960
+ 默认值为 'batch'
953
961
  :raises: 可能抛出各种验证和数据库相关异常
954
962
  """
955
963
  upload_start = time.time()
@@ -1035,7 +1043,7 @@ class MySQLUploader:
1035
1043
  db_name, part_table, part_data, filtered_set_typ,
1036
1044
  primary_keys, check_duplicate, duplicate_columns,
1037
1045
  allow_null, auto_create, partition_date_column,
1038
- indexes, batch_id, update_on_duplicate
1046
+ indexes, batch_id, update_on_duplicate, transaction_mode
1039
1047
  )
1040
1048
  except Exception as e:
1041
1049
  logger.error(sys._getframe().f_code.co_name, {
@@ -1051,7 +1059,7 @@ class MySQLUploader:
1051
1059
  db_name, table_name, prepared_data, filtered_set_typ,
1052
1060
  primary_keys, check_duplicate, duplicate_columns,
1053
1061
  allow_null, auto_create, partition_date_column,
1054
- indexes, batch_id, update_on_duplicate
1062
+ indexes, batch_id, update_on_duplicate, transaction_mode
1055
1063
  )
1056
1064
 
1057
1065
  success_flag = True
@@ -1083,7 +1091,8 @@ class MySQLUploader:
1083
1091
  duplicate_columns: Optional[List[str]] = None,
1084
1092
  batch_size: int = 1000,
1085
1093
  batch_id: Optional[str] = None,
1086
- update_on_duplicate: bool = False
1094
+ update_on_duplicate: bool = False,
1095
+ transaction_mode: str = "batch"
1087
1096
  ):
1088
1097
  """
1089
1098
  实际执行数据插入的方法
@@ -1097,10 +1106,26 @@ class MySQLUploader:
1097
1106
  :param batch_size: 批量插入大小,默认为1000
1098
1107
  :param update_on_duplicate: 遇到重复数据时是否更新旧数据(默认为False)
1099
1108
  :param batch_id: 批次ID用于日志追踪,可选
1109
+ :param transaction_mode: 事务提交模式,可选值:
1110
+ - 'row' : 逐行提交事务(错误隔离性好)
1111
+ - 'batch' : 整批提交事务(性能最优)
1112
+ - 'hybrid' : 混合模式(每N行提交,平衡性能与安全性)
1113
+ 默认值为 'batch'
1100
1114
  """
1101
1115
  if not data:
1102
1116
  return
1103
1117
 
1118
+ valid_modes = ('row', 'batch', 'hybrid')
1119
+ if transaction_mode.lower() not in valid_modes:
1120
+ logger.error(sys._getframe().f_code.co_name, {
1121
+ '库': db_name,
1122
+ '表': table_name,
1123
+ '参数异常': f'transaction_mode -> {transaction_mode}',
1124
+ '可选值': valid_modes,
1125
+ '自动使用默认模式': 'batch'
1126
+ })
1127
+ transaction_mode = 'batch'
1128
+
1104
1129
  # 获取所有列名(排除id列)
1105
1130
  all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1106
1131
  safe_columns = [self._validate_identifier(col) for col in all_columns]
@@ -1165,40 +1190,104 @@ class MySQLUploader:
1165
1190
  batch_skipped = 0
1166
1191
  batch_failed = 0
1167
1192
 
1168
- for row in batch:
1193
+ if transaction_mode == 'batch':
1194
+ # 批量模式特殊处理 - 尝试逐行插入但保持事务
1169
1195
  try:
1170
- # 准备参数
1171
- row_values = [row.get(col) for col in all_columns]
1172
- if check_duplicate and not update_on_duplicate:
1173
- row_values += [row.get(col) for col in duplicate_columns]
1174
-
1175
- cursor.execute(sql, row_values)
1176
-
1177
- if check_duplicate:
1178
- # 检查是否实际插入了行
1179
- if cursor.rowcount > 0:
1180
- batch_inserted += 1
1181
- else:
1182
- batch_skipped += 1
1183
- else:
1184
- batch_inserted += 1
1185
-
1196
+ for row_idx, row in enumerate(batch, 1):
1197
+ try:
1198
+ # 准备参数
1199
+ row_values = [row.get(col) for col in all_columns]
1200
+ if check_duplicate and not update_on_duplicate:
1201
+ row_values += [row.get(col) for col in duplicate_columns]
1202
+
1203
+ cursor.execute(sql, row_values)
1204
+
1205
+ if check_duplicate:
1206
+ # 检查是否实际插入了行
1207
+ if cursor.rowcount > 0:
1208
+ batch_inserted += 1
1209
+ else:
1210
+ batch_skipped += 1
1211
+ else:
1212
+ batch_inserted += 1
1213
+
1214
+ except Exception as e:
1215
+ batch_failed += 1
1216
+ logger.error(sys._getframe().f_code.co_name, {
1217
+ '库': db_name,
1218
+ '表': table_name,
1219
+ '批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
1220
+ 'error_type': type(e).__name__,
1221
+ '单行插入失败': str(e),
1222
+ '数据类型': set_typ,
1223
+ '是否排重': check_duplicate,
1224
+ '排重列': duplicate_columns,
1225
+ '事务提交模式': transaction_mode,
1226
+ '处理方式': '继续处理剩余行'
1227
+ })
1228
+ continue # 继续处理下一行
1229
+
1230
+ # 批量模式最后统一提交
1186
1231
  conn.commit()
1187
1232
 
1188
1233
  except Exception as e:
1234
+ # 如果整个批量操作失败,回滚
1189
1235
  conn.rollback()
1190
- batch_failed += 1
1236
+ batch_failed = len(batch) # 标记整个批次失败
1191
1237
  logger.error(sys._getframe().f_code.co_name, {
1192
1238
  '库': db_name,
1193
1239
  '表': table_name,
1194
- '批次': batch_id,
1240
+ '批次': f'{batch_id} {i+1}/{len(data)}',
1195
1241
  'error_type': type(e).__name__,
1196
- '单行插入失败': str(e),
1197
- '数据类型': set_typ,
1198
- '是否排重': check_duplicate,
1199
- '排重列': duplicate_columns
1242
+ '批量操作失败': str(e),
1243
+ '事务提交模式': transaction_mode,
1244
+ '处理方式': '整个批次回滚'
1200
1245
  })
1201
1246
 
1247
+ else: # row 或 hybrid 模式
1248
+ for row_idx, row in enumerate(batch, 1):
1249
+ try:
1250
+ # 准备参数
1251
+ row_values = [row.get(col) for col in all_columns]
1252
+ if check_duplicate and not update_on_duplicate:
1253
+ row_values += [row.get(col) for col in duplicate_columns]
1254
+
1255
+ cursor.execute(sql, row_values)
1256
+
1257
+ if check_duplicate:
1258
+ # 检查是否实际插入了行
1259
+ if cursor.rowcount > 0:
1260
+ batch_inserted += 1
1261
+ else:
1262
+ batch_skipped += 1
1263
+ else:
1264
+ batch_inserted += 1
1265
+
1266
+ # 根据模式决定提交时机
1267
+ if transaction_mode == 'row':
1268
+ conn.commit() # 逐行提交
1269
+ elif transaction_mode == 'hybrid' and row_idx % 100 == 0:
1270
+ conn.commit() # 每100行提交一次
1271
+
1272
+ except Exception as e:
1273
+ conn.rollback()
1274
+ batch_failed += 1
1275
+ logger.error(sys._getframe().f_code.co_name, {
1276
+ '库': db_name,
1277
+ '表': table_name,
1278
+ '批次/当前行': f'{batch_id} {row_idx}/{len(batch)}',
1279
+ 'error_type': type(e).__name__,
1280
+ '单行插入失败': str(e),
1281
+ '数据类型': set_typ,
1282
+ '是否排重': check_duplicate,
1283
+ '排重列': duplicate_columns,
1284
+ '事务提交模式': transaction_mode,
1285
+ })
1286
+
1287
+ # 混合模式最后统一提交
1288
+ if transaction_mode == 'hybrid':
1289
+ conn.commit()
1290
+
1202
1291
  # 更新总统计
1203
1292
  total_inserted += batch_inserted
1204
1293
  total_skipped += batch_skipped
@@ -1213,7 +1302,8 @@ class MySQLUploader:
1213
1302
  '数据量': len(batch),
1214
1303
  '插入': batch_inserted,
1215
1304
  '跳过': batch_skipped,
1216
- '失败': batch_failed
1305
+ '失败': batch_failed,
1306
+ '事务提交模式': transaction_mode,
1217
1307
  })
1218
1308
 
1219
1309
  logger.info('插入完成', {
@@ -1222,7 +1312,8 @@ class MySQLUploader:
1222
1312
  '完成总计': len(data),
1223
1313
  '插入': total_inserted,
1224
1314
  '跳过': total_skipped,
1225
- '失败': total_failed
1315
+ '失败': total_failed,
1316
+ '事务提交模式': transaction_mode,
1226
1317
  })
1227
1318
 
1228
1319
  def close(self):
@@ -1333,6 +1424,7 @@ def main():
1333
1424
  partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
1334
1425
  auto_create=True, # 表不存在时自动创建, 默认参数不要更改
1335
1426
  indexes=[], # 指定索引列
1427
+ transaction_mode='row', # 事务模式
1336
1428
  )
1337
1429
 
1338
1430
  uploader.close()
@@ -28,10 +28,23 @@ config_file = os.path.join(dir_path, 'spd.txt')
28
28
  content = config.read_config(file_path=config_file)
29
29
  username, password, host, port = content['username'], content['password'], content['host'], content['port']
30
30
 
31
- m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
31
+ # m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
32
+ uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
32
33
  # 实例化一个数据查询类,用来获取 cookies 表数据
33
34
  download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
34
- logger = spider_logging.setup_logging()
35
+ logger = mylogger.MyLogger(
36
+ name='aikucun',
37
+ logging_mode='file',
38
+ log_level='info',
39
+ log_file='aikucun.log',
40
+ log_format='json',
41
+ max_log_size=50,
42
+ backup_count=5,
43
+ enable_async=False, # 是否启用异步日志
44
+ sample_rate=1, # 采样50%的DEBUG/INFO日志
45
+ sensitive_fields=[], # 敏感字段列表
46
+ enable_metrics=False, # 是否启用性能指标
47
+ )
35
48
 
36
49
 
37
50
  def keep_connect(_db_name, _config, max_try: int=10):
@@ -175,10 +188,26 @@ class AikuCun:
175
188
  '更新时间': 'timestamp'
176
189
  }
177
190
  # 更新至数据库记录
178
- m_engine.dict_to_mysql(
191
+ # m_engine.dict_to_mysql(
192
+ # db_name=self.db_name,
193
+ # table_name=self.table_name,
194
+ # dict_data=self.token,
195
+ # )
196
+ uld.upload_data(
179
197
  db_name=self.db_name,
180
198
  table_name=self.table_name,
181
- dict_data=self.token,
199
+ data=self.token,
200
+ set_typ={},
201
+ primary_keys=[],
202
+ check_duplicate=False,
203
+ update_on_duplicate=False,
204
+ duplicate_columns=[],
205
+ allow_null=False,
206
+ partition_by=None,
207
+ partition_date_column='日期',
208
+ auto_create=True,
209
+ indexes=[],
210
+ transaction_mode='row', # 事务模式
182
211
  )
183
212
 
184
213
  def get_data_from_bbx(self, start_date=None, end_date=None, item_type='spu', page_num=1, page_size=300):
@@ -400,14 +429,30 @@ class AikuCun:
400
429
  drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
401
430
  else:
402
431
  drop_dup = ['日期', '平台', '店铺名称', '条码']
403
- m_engine.insert_many_dict(
432
+ # m_engine.insert_many_dict(
433
+ # db_name=db_name,
434
+ # table_name=table_name,
435
+ # dict_data_list=_results,
436
+ # icm_update=drop_dup, # 唯一组合键
437
+ # # unique_main_key=['人群id'],
438
+ # set_typ=set_typ,
439
+ # allow_not_null=False, # 创建允许插入空值的列
440
+ # )
441
+ uld.upload_data(
404
442
  db_name=db_name,
405
443
  table_name=table_name,
406
- dict_data_list=_results,
407
- icm_update=drop_dup, # 唯一组合键
408
- # unique_main_key=['人群id'],
409
- set_typ=set_typ,
410
- allow_not_null=False, # 创建允许插入空值的列
444
+ data=_results,
445
+ set_typ=set_typ, # 定义列和数据类型
446
+ primary_keys=[], # 创建唯一主键
447
+ check_duplicate=True, # 检查重复数据
448
+ update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
449
+ duplicate_columns=drop_dup, # 指定排重的组合键
450
+ allow_null=False, # 允许插入空值
451
+ partition_by=None, # 按年/月分表
452
+ partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
453
+ auto_create=True, # 表不存在时自动创建, 默认参数不要更改
454
+ indexes=[], # 指定索引列
455
+ transaction_mode='row', # 事务模式
411
456
  )
412
457
 
413
458
  def get_sign(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.9.18
3
+ Version: 3.10.1
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '3.9.18'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes