mdbq 4.2.2__tar.gz → 4.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mdbq might be problematic. Click here for more details.
- {mdbq-4.2.2 → mdbq-4.2.3}/PKG-INFO +2 -2
- mdbq-4.2.3/mdbq/__version__.py +1 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/mysql/uploader.py +59 -20
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq.egg-info/PKG-INFO +2 -2
- mdbq-4.2.2/mdbq/__version__.py +0 -1
- {mdbq-4.2.2 → mdbq-4.2.3}/README.txt +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/auth/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/auth/auth_backend.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/auth/crypto.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/auth/rate_limiter.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/js/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/js/jc.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/log/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/log/mylogger.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/myconf/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/myconf/myconf.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/mysql/deduplicator.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/mysql/s_query.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/mysql/unique_.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/other/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/other/error_handler.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/other/otk.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/redis/getredis.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/redis/redis_cache.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/route/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/route/analytics.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/route/monitor.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/route/routes.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/selenium/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/selenium/get_driver.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/setup.cfg +0 -0
- {mdbq-4.2.2 → mdbq-4.2.3}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = '4.2.3'
|
|
@@ -13,7 +13,6 @@ from decimal import Decimal, InvalidOperation
|
|
|
13
13
|
import math
|
|
14
14
|
import concurrent.futures
|
|
15
15
|
import threading
|
|
16
|
-
from queue import Queue
|
|
17
16
|
import pymysql
|
|
18
17
|
import pandas as pd
|
|
19
18
|
import psutil
|
|
@@ -668,7 +667,8 @@ class TableManager:
|
|
|
668
667
|
|
|
669
668
|
def create_table(self, db_name: str, table_name: str, columns: Dict[str, str],
|
|
670
669
|
primary_keys: Optional[List[str]] = None,
|
|
671
|
-
unique_keys: Optional[List[List[str]]] = None
|
|
670
|
+
unique_keys: Optional[List[List[str]]] = None,
|
|
671
|
+
allow_null: bool = False):
|
|
672
672
|
"""创建表"""
|
|
673
673
|
db_name = self._sanitize_identifier(db_name)
|
|
674
674
|
table_name = self._sanitize_identifier(table_name)
|
|
@@ -684,7 +684,8 @@ class TableManager:
|
|
|
684
684
|
if col_name.lower() in ['id', 'create_at', 'update_at']:
|
|
685
685
|
continue
|
|
686
686
|
safe_col_name = self._sanitize_identifier(col_name)
|
|
687
|
-
|
|
687
|
+
null_constraint = "" if allow_null else " NOT NULL"
|
|
688
|
+
column_defs.append(f"`{safe_col_name}` {col_type}{null_constraint}")
|
|
688
689
|
|
|
689
690
|
# 添加时间戳列
|
|
690
691
|
column_defs.append("`create_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP")
|
|
@@ -785,6 +786,10 @@ class DataProcessor:
|
|
|
785
786
|
chunk_size = min(chunk_size, 1000) # 内存紧张时减小chunk
|
|
786
787
|
|
|
787
788
|
if isinstance(data, pd.DataFrame):
|
|
789
|
+
# 统一将DataFrame的列名转为小写
|
|
790
|
+
data = data.copy()
|
|
791
|
+
data.columns = [col.lower() for col in data.columns]
|
|
792
|
+
|
|
788
793
|
# 对于大DataFrame,使用更高效的分块方式
|
|
789
794
|
if len(data) > 50000:
|
|
790
795
|
# 大数据集使用pandas的分块读取
|
|
@@ -795,11 +800,23 @@ class DataProcessor:
|
|
|
795
800
|
chunk = data.iloc[i:i + chunk_size]
|
|
796
801
|
yield chunk.to_dict('records')
|
|
797
802
|
elif isinstance(data, dict):
|
|
798
|
-
|
|
803
|
+
# 统一将字典的键转为小写
|
|
804
|
+
normalized_dict = {}
|
|
805
|
+
for key, value in data.items():
|
|
806
|
+
normalized_dict[key.lower()] = value
|
|
807
|
+
yield [normalized_dict]
|
|
799
808
|
elif isinstance(data, list):
|
|
800
809
|
if all(isinstance(item, dict) for item in data):
|
|
801
|
-
|
|
802
|
-
|
|
810
|
+
# 统一将列表中字典的键转为小写
|
|
811
|
+
normalized_data = []
|
|
812
|
+
for item in data:
|
|
813
|
+
normalized_item = {}
|
|
814
|
+
for key, value in item.items():
|
|
815
|
+
normalized_item[key.lower()] = value
|
|
816
|
+
normalized_data.append(normalized_item)
|
|
817
|
+
|
|
818
|
+
for i in range(0, len(normalized_data), chunk_size):
|
|
819
|
+
yield normalized_data[i:i + chunk_size]
|
|
803
820
|
else:
|
|
804
821
|
raise ValueError("列表中必须全部是字典")
|
|
805
822
|
else:
|
|
@@ -938,9 +955,21 @@ class DataInserter:
|
|
|
938
955
|
try:
|
|
939
956
|
cursor.executemany(sql, batch_values)
|
|
940
957
|
total_inserted += len(batch_values)
|
|
941
|
-
except pymysql.err.IntegrityError:
|
|
942
|
-
|
|
943
|
-
logger.debug('
|
|
958
|
+
except pymysql.err.IntegrityError as e:
|
|
959
|
+
# 批量插入遇到唯一约束冲突,fallback到逐行插入
|
|
960
|
+
logger.debug('批量插入唯一约束冲突,尝试逐行插入', {'批次大小': len(batch_values)})
|
|
961
|
+
|
|
962
|
+
# 逐行插入处理冲突
|
|
963
|
+
for single_value in batch_values:
|
|
964
|
+
try:
|
|
965
|
+
cursor.execute(sql, single_value)
|
|
966
|
+
total_inserted += 1
|
|
967
|
+
except pymysql.err.IntegrityError:
|
|
968
|
+
total_skipped += 1
|
|
969
|
+
logger.debug('单行插入唯一约束冲突,跳过')
|
|
970
|
+
except Exception as single_e:
|
|
971
|
+
total_failed += 1
|
|
972
|
+
logger.error('单行插入失败', {'错误': str(single_e)})
|
|
944
973
|
except Exception as e:
|
|
945
974
|
logger.error('批量插入失败', {'错误': str(e), '批次大小': len(batch_values)})
|
|
946
975
|
raise
|
|
@@ -1077,12 +1106,6 @@ class MySQLUploader:
|
|
|
1077
1106
|
}
|
|
1078
1107
|
|
|
1079
1108
|
try:
|
|
1080
|
-
# 计算原始数据大小
|
|
1081
|
-
original_data_size = 0
|
|
1082
|
-
if isinstance(data, (pd.DataFrame, list)):
|
|
1083
|
-
original_data_size = len(data)
|
|
1084
|
-
elif isinstance(data, dict):
|
|
1085
|
-
original_data_size = 1
|
|
1086
1109
|
|
|
1087
1110
|
# 标准化数据为流式迭代器
|
|
1088
1111
|
normalized_data = DataProcessor.normalize_data(data)
|
|
@@ -1096,6 +1119,9 @@ class MySQLUploader:
|
|
|
1096
1119
|
normalized_data = DataProcessor.normalize_data(data)
|
|
1097
1120
|
logger.debug('自动推断数据类型', {'类型映射': set_typ})
|
|
1098
1121
|
|
|
1122
|
+
# 将set_typ的键统一转为小写
|
|
1123
|
+
set_typ = self.tran_set_typ_to_lower(set_typ)
|
|
1124
|
+
|
|
1099
1125
|
# 确保数据库存在
|
|
1100
1126
|
self.table_mgr.ensure_database_exists(db_name)
|
|
1101
1127
|
|
|
@@ -1142,7 +1168,7 @@ class MySQLUploader:
|
|
|
1142
1168
|
# 确保表存在
|
|
1143
1169
|
if not self.table_mgr.table_exists(db_name, table_name):
|
|
1144
1170
|
self.table_mgr.create_table(db_name, table_name, set_typ,
|
|
1145
|
-
unique_keys=unique_keys)
|
|
1171
|
+
unique_keys=unique_keys, allow_null=allow_null)
|
|
1146
1172
|
result['tables_created'].append(f"{db_name}.{table_name}")
|
|
1147
1173
|
else:
|
|
1148
1174
|
# 表已存在,确保有时间戳列(但保持原有主键结构)
|
|
@@ -1275,7 +1301,7 @@ class MySQLUploader:
|
|
|
1275
1301
|
if table_key not in created_tables:
|
|
1276
1302
|
if not self.table_mgr.table_exists(db_name, partition_table_name):
|
|
1277
1303
|
self.table_mgr.create_table(db_name, partition_table_name, set_typ,
|
|
1278
|
-
unique_keys=unique_keys)
|
|
1304
|
+
unique_keys=unique_keys, allow_null=allow_null)
|
|
1279
1305
|
result['tables_created'].append(table_key)
|
|
1280
1306
|
else:
|
|
1281
1307
|
# 表已存在,确保有时间戳列(但保持原有主键结构)
|
|
@@ -1358,7 +1384,10 @@ class MySQLUploader:
|
|
|
1358
1384
|
set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
|
|
1359
1385
|
normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)
|
|
1360
1386
|
logger.debug('自动推断数据类型', {'类型映射': set_typ})
|
|
1361
|
-
|
|
1387
|
+
|
|
1388
|
+
# 将set_typ的键统一转为小写
|
|
1389
|
+
set_typ = self.tran_set_typ_to_lower(set_typ)
|
|
1390
|
+
|
|
1362
1391
|
# 确保数据库存在
|
|
1363
1392
|
self.table_mgr.ensure_database_exists(db_name)
|
|
1364
1393
|
|
|
@@ -1391,7 +1420,7 @@ class MySQLUploader:
|
|
|
1391
1420
|
if table_key not in created_tables_set:
|
|
1392
1421
|
if not self.table_mgr.table_exists(db_name, partition_table_name):
|
|
1393
1422
|
self.table_mgr.create_table(db_name, partition_table_name, set_typ,
|
|
1394
|
-
unique_keys=unique_keys)
|
|
1423
|
+
unique_keys=unique_keys, allow_null=allow_null)
|
|
1395
1424
|
chunk_result['tables_created'].append(table_key)
|
|
1396
1425
|
else:
|
|
1397
1426
|
self.table_mgr.ensure_system_columns(db_name, partition_table_name)
|
|
@@ -1416,7 +1445,7 @@ class MySQLUploader:
|
|
|
1416
1445
|
if table_key not in created_tables_set:
|
|
1417
1446
|
if not self.table_mgr.table_exists(db_name, table_name):
|
|
1418
1447
|
self.table_mgr.create_table(db_name, table_name, set_typ,
|
|
1419
|
-
unique_keys=unique_keys)
|
|
1448
|
+
unique_keys=unique_keys, allow_null=allow_null)
|
|
1420
1449
|
chunk_result = {'tables_created': [table_key]}
|
|
1421
1450
|
else:
|
|
1422
1451
|
self.table_mgr.ensure_system_columns(db_name, table_name)
|
|
@@ -1480,6 +1509,16 @@ class MySQLUploader:
|
|
|
1480
1509
|
result['success'] = False
|
|
1481
1510
|
|
|
1482
1511
|
return result
|
|
1512
|
+
|
|
1513
|
+
def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
|
|
1514
|
+
if not isinstance(set_typ, dict):
|
|
1515
|
+
return set_typ
|
|
1516
|
+
|
|
1517
|
+
set_typ_lower = {}
|
|
1518
|
+
for key, value in set_typ.items():
|
|
1519
|
+
set_typ_lower[key.lower()] = value
|
|
1520
|
+
|
|
1521
|
+
return set_typ_lower
|
|
1483
1522
|
|
|
1484
1523
|
|
|
1485
1524
|
# 使用示例
|
mdbq-4.2.2/mdbq/__version__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = '4.2.2'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|