mdbq 4.2.2__py3-none-any.whl → 4.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mdbq might be problematic. Click here for more details.

mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '4.2.2'
1
+ VERSION = '4.2.3'
mdbq/mysql/uploader.py CHANGED
@@ -13,7 +13,6 @@ from decimal import Decimal, InvalidOperation
13
13
  import math
14
14
  import concurrent.futures
15
15
  import threading
16
- from queue import Queue
17
16
  import pymysql
18
17
  import pandas as pd
19
18
  import psutil
@@ -668,7 +667,8 @@ class TableManager:
668
667
 
669
668
  def create_table(self, db_name: str, table_name: str, columns: Dict[str, str],
670
669
  primary_keys: Optional[List[str]] = None,
671
- unique_keys: Optional[List[List[str]]] = None):
670
+ unique_keys: Optional[List[List[str]]] = None,
671
+ allow_null: bool = False):
672
672
  """创建表"""
673
673
  db_name = self._sanitize_identifier(db_name)
674
674
  table_name = self._sanitize_identifier(table_name)
@@ -684,7 +684,8 @@ class TableManager:
684
684
  if col_name.lower() in ['id', 'create_at', 'update_at']:
685
685
  continue
686
686
  safe_col_name = self._sanitize_identifier(col_name)
687
- column_defs.append(f"`{safe_col_name}` {col_type} NOT NULL")
687
+ null_constraint = "" if allow_null else " NOT NULL"
688
+ column_defs.append(f"`{safe_col_name}` {col_type}{null_constraint}")
688
689
 
689
690
  # 添加时间戳列
690
691
  column_defs.append("`create_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP")
@@ -785,6 +786,10 @@ class DataProcessor:
785
786
  chunk_size = min(chunk_size, 1000) # 内存紧张时减小chunk
786
787
 
787
788
  if isinstance(data, pd.DataFrame):
789
+ # 统一将DataFrame的列名转为小写
790
+ data = data.copy()
791
+ data.columns = [col.lower() for col in data.columns]
792
+
788
793
  # 对于大DataFrame,使用更高效的分块方式
789
794
  if len(data) > 50000:
790
795
  # 大数据集使用pandas的分块读取
@@ -795,11 +800,23 @@ class DataProcessor:
795
800
  chunk = data.iloc[i:i + chunk_size]
796
801
  yield chunk.to_dict('records')
797
802
  elif isinstance(data, dict):
798
- yield [data]
803
+ # 统一将字典的键转为小写
804
+ normalized_dict = {}
805
+ for key, value in data.items():
806
+ normalized_dict[key.lower()] = value
807
+ yield [normalized_dict]
799
808
  elif isinstance(data, list):
800
809
  if all(isinstance(item, dict) for item in data):
801
- for i in range(0, len(data), chunk_size):
802
- yield data[i:i + chunk_size]
810
+ # 统一将列表中字典的键转为小写
811
+ normalized_data = []
812
+ for item in data:
813
+ normalized_item = {}
814
+ for key, value in item.items():
815
+ normalized_item[key.lower()] = value
816
+ normalized_data.append(normalized_item)
817
+
818
+ for i in range(0, len(normalized_data), chunk_size):
819
+ yield normalized_data[i:i + chunk_size]
803
820
  else:
804
821
  raise ValueError("列表中必须全部是字典")
805
822
  else:
@@ -938,9 +955,21 @@ class DataInserter:
938
955
  try:
939
956
  cursor.executemany(sql, batch_values)
940
957
  total_inserted += len(batch_values)
941
- except pymysql.err.IntegrityError:
942
- total_skipped += len(batch_values)
943
- logger.debug('批量插入唯一约束冲突,跳过', {'批次大小': len(batch_values)})
958
+ except pymysql.err.IntegrityError as e:
959
+ # 批量插入遇到唯一约束冲突,fallback到逐行插入
960
+ logger.debug('批量插入唯一约束冲突,尝试逐行插入', {'批次大小': len(batch_values)})
961
+
962
+ # 逐行插入处理冲突
963
+ for single_value in batch_values:
964
+ try:
965
+ cursor.execute(sql, single_value)
966
+ total_inserted += 1
967
+ except pymysql.err.IntegrityError:
968
+ total_skipped += 1
969
+ logger.debug('单行插入唯一约束冲突,跳过')
970
+ except Exception as single_e:
971
+ total_failed += 1
972
+ logger.error('单行插入失败', {'错误': str(single_e)})
944
973
  except Exception as e:
945
974
  logger.error('批量插入失败', {'错误': str(e), '批次大小': len(batch_values)})
946
975
  raise
@@ -1077,12 +1106,6 @@ class MySQLUploader:
1077
1106
  }
1078
1107
 
1079
1108
  try:
1080
- # 计算原始数据大小
1081
- original_data_size = 0
1082
- if isinstance(data, (pd.DataFrame, list)):
1083
- original_data_size = len(data)
1084
- elif isinstance(data, dict):
1085
- original_data_size = 1
1086
1109
 
1087
1110
  # 标准化数据为流式迭代器
1088
1111
  normalized_data = DataProcessor.normalize_data(data)
@@ -1096,6 +1119,9 @@ class MySQLUploader:
1096
1119
  normalized_data = DataProcessor.normalize_data(data)
1097
1120
  logger.debug('自动推断数据类型', {'类型映射': set_typ})
1098
1121
 
1122
+ # 将set_typ的键统一转为小写
1123
+ set_typ = self.tran_set_typ_to_lower(set_typ)
1124
+
1099
1125
  # 确保数据库存在
1100
1126
  self.table_mgr.ensure_database_exists(db_name)
1101
1127
 
@@ -1142,7 +1168,7 @@ class MySQLUploader:
1142
1168
  # 确保表存在
1143
1169
  if not self.table_mgr.table_exists(db_name, table_name):
1144
1170
  self.table_mgr.create_table(db_name, table_name, set_typ,
1145
- unique_keys=unique_keys)
1171
+ unique_keys=unique_keys, allow_null=allow_null)
1146
1172
  result['tables_created'].append(f"{db_name}.{table_name}")
1147
1173
  else:
1148
1174
  # 表已存在,确保有时间戳列(但保持原有主键结构)
@@ -1275,7 +1301,7 @@ class MySQLUploader:
1275
1301
  if table_key not in created_tables:
1276
1302
  if not self.table_mgr.table_exists(db_name, partition_table_name):
1277
1303
  self.table_mgr.create_table(db_name, partition_table_name, set_typ,
1278
- unique_keys=unique_keys)
1304
+ unique_keys=unique_keys, allow_null=allow_null)
1279
1305
  result['tables_created'].append(table_key)
1280
1306
  else:
1281
1307
  # 表已存在,确保有时间戳列(但保持原有主键结构)
@@ -1358,7 +1384,10 @@ class MySQLUploader:
1358
1384
  set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
1359
1385
  normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)
1360
1386
  logger.debug('自动推断数据类型', {'类型映射': set_typ})
1361
-
1387
+
1388
+ # 将set_typ的键统一转为小写
1389
+ set_typ = self.tran_set_typ_to_lower(set_typ)
1390
+
1362
1391
  # 确保数据库存在
1363
1392
  self.table_mgr.ensure_database_exists(db_name)
1364
1393
 
@@ -1391,7 +1420,7 @@ class MySQLUploader:
1391
1420
  if table_key not in created_tables_set:
1392
1421
  if not self.table_mgr.table_exists(db_name, partition_table_name):
1393
1422
  self.table_mgr.create_table(db_name, partition_table_name, set_typ,
1394
- unique_keys=unique_keys)
1423
+ unique_keys=unique_keys, allow_null=allow_null)
1395
1424
  chunk_result['tables_created'].append(table_key)
1396
1425
  else:
1397
1426
  self.table_mgr.ensure_system_columns(db_name, partition_table_name)
@@ -1416,7 +1445,7 @@ class MySQLUploader:
1416
1445
  if table_key not in created_tables_set:
1417
1446
  if not self.table_mgr.table_exists(db_name, table_name):
1418
1447
  self.table_mgr.create_table(db_name, table_name, set_typ,
1419
- unique_keys=unique_keys)
1448
+ unique_keys=unique_keys, allow_null=allow_null)
1420
1449
  chunk_result = {'tables_created': [table_key]}
1421
1450
  else:
1422
1451
  self.table_mgr.ensure_system_columns(db_name, table_name)
@@ -1480,6 +1509,16 @@ class MySQLUploader:
1480
1509
  result['success'] = False
1481
1510
 
1482
1511
  return result
1512
+
1513
+ def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
1514
+ if not isinstance(set_typ, dict):
1515
+ return set_typ
1516
+
1517
+ set_typ_lower = {}
1518
+ for key, value in set_typ.items():
1519
+ set_typ_lower[key.lower()] = value
1520
+
1521
+ return set_typ_lower
1483
1522
 
1484
1523
 
1485
1524
  # 使用示例
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.2.2
3
+ Version: 4.2.3
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=gNDA6f7PmXcbqB0lTY4HIgD6dEB6SGywjhHa3HAyczA,17
2
+ mdbq/__version__.py,sha256=Q7DXB--Cx4H7Jq6-YWUEM7QiHlV2TIxeMIEtk2cEeP0,17
3
3
  mdbq/auth/__init__.py,sha256=pnPMAt63sh1B6kEvmutUuro46zVf2v2YDAG7q-jV_To,24
4
4
  mdbq/auth/auth_backend.py,sha256=iLN7AqiSq7fQgFtNtge_TIlVOR1hrCSZXH6oId6uGX4,116924
5
5
  mdbq/auth/crypto.py,sha256=fcZRFCnrKVVdWDUx_zds51ynFYwS9DBvJOrRQVldrfM,15931
@@ -15,7 +15,7 @@ mdbq/mysql/deduplicator.py,sha256=tzLIm9K9S0lGLlVTI0dDQVYpWX796XCuyufmw1lU26Y,73
15
15
  mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
16
16
  mdbq/mysql/s_query.py,sha256=N2xHJf2CiUXjXIVBemdst-wamIP3908EGAJOFG13fCU,50475
17
17
  mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
18
- mdbq/mysql/uploader.py,sha256=2inrXu3PIlvowfm5_0U4Trx_mraApjII8g_5ycFbNJ0,60059
18
+ mdbq/mysql/uploader.py,sha256=zi7Y2VQTykChKl6qvQ2fY_0T2SjVBJP8Vqm6vRI3sao,62088
19
19
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
20
20
  mdbq/other/download_sku_picture.py,sha256=MJX47I9jTUMFzO1kyEH-onIzAGa6QpgfmghrmyYnEsc,45111
21
21
  mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
@@ -35,7 +35,7 @@ mdbq/route/routes.py,sha256=QVGfTvDgu0CpcKCvk1ra74H8uojgqTLUav1fnVAqLEA,29433
35
35
  mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
36
36
  mdbq/selenium/get_driver.py,sha256=1NTlVUE6QsyjTrVVVqTO2LOnYf578ccFWlWnvIXGtic,20903
37
37
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
38
- mdbq-4.2.2.dist-info/METADATA,sha256=vfhvk7DXQ267-NOPdqKJ_AWCWSEbWKdDjIf7bilbCXo,363
39
- mdbq-4.2.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
40
- mdbq-4.2.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
- mdbq-4.2.2.dist-info/RECORD,,
38
+ mdbq-4.2.3.dist-info/METADATA,sha256=Kv4VfZBc5PDNFe8UFfBvVsJ33aj3jkfLUvmJjyO0HdU,363
39
+ mdbq-4.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ mdbq-4.2.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
+ mdbq-4.2.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5