mdbq 4.2.7__tar.gz → 4.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mdbq might be problematic. Click here for more details.
- {mdbq-4.2.7 → mdbq-4.2.9}/PKG-INFO +1 -1
- mdbq-4.2.9/mdbq/__version__.py +1 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/mysql/uploader.py +89 -197
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq.egg-info/PKG-INFO +1 -1
- mdbq-4.2.7/mdbq/__version__.py +0 -1
- {mdbq-4.2.7 → mdbq-4.2.9}/README.txt +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/auth/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/auth/auth_backend.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/auth/crypto.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/auth/rate_limiter.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/js/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/js/jc.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/log/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/log/mylogger.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/myconf/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/myconf/myconf.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/mysql/deduplicator.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/mysql/s_query.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/mysql/unique_.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/other/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/other/error_handler.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/other/otk.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/redis/getredis.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/redis/redis_cache.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/route/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/route/analytics.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/route/monitor.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/route/routes.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/selenium/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/selenium/get_driver.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/setup.cfg +0 -0
- {mdbq-4.2.7 → mdbq-4.2.9}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = '4.2.9'
|
|
@@ -11,8 +11,6 @@ from typing import Union, List, Dict, Optional, Any, Tuple, Iterator
|
|
|
11
11
|
from functools import wraps
|
|
12
12
|
from decimal import Decimal, InvalidOperation
|
|
13
13
|
import math
|
|
14
|
-
import concurrent.futures
|
|
15
|
-
import threading
|
|
16
14
|
import pymysql
|
|
17
15
|
import pandas as pd
|
|
18
16
|
import psutil
|
|
@@ -64,6 +62,17 @@ class DatabaseConnectionManager:
|
|
|
64
62
|
'autocommit': False
|
|
65
63
|
}
|
|
66
64
|
|
|
65
|
+
# 设置时区为北京时间,确保时间戳的一致性
|
|
66
|
+
if 'init_command' not in self.config:
|
|
67
|
+
pool_params['init_command'] = "SET time_zone = '+08:00'"
|
|
68
|
+
else:
|
|
69
|
+
# 如果用户已设置init_command,则追加时区设置
|
|
70
|
+
existing_commands = self.config['init_command']
|
|
71
|
+
if 'time_zone' not in existing_commands.lower():
|
|
72
|
+
pool_params['init_command'] = f"{existing_commands}; SET time_zone = '+08:00'"
|
|
73
|
+
else:
|
|
74
|
+
pool_params['init_command'] = existing_commands
|
|
75
|
+
|
|
67
76
|
if self.config.get('ssl'):
|
|
68
77
|
pool_params['ssl'] = self.config['ssl']
|
|
69
78
|
|
|
@@ -256,18 +265,26 @@ class DataTypeInferrer:
|
|
|
256
265
|
# 采样数据进行类型推断
|
|
257
266
|
sample_data = data[:sample_size] if len(data) > sample_size else data
|
|
258
267
|
|
|
268
|
+
# 首先收集所有列名
|
|
269
|
+
all_columns = set()
|
|
270
|
+
for row in sample_data:
|
|
271
|
+
for col in row.keys():
|
|
272
|
+
if col.lower() not in ['id', 'create_at', 'update_at']:
|
|
273
|
+
all_columns.add(col)
|
|
274
|
+
|
|
275
|
+
# 为每个列初始化候选类型列表
|
|
276
|
+
for col in all_columns:
|
|
277
|
+
type_candidates[col] = []
|
|
278
|
+
|
|
259
279
|
for row in sample_data:
|
|
260
280
|
for col, value in row.items():
|
|
261
281
|
# 跳过系统列
|
|
262
282
|
if col.lower() in ['id', 'create_at', 'update_at']:
|
|
263
283
|
continue
|
|
264
284
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
if col not in type_candidates:
|
|
269
|
-
type_candidates[col] = []
|
|
270
|
-
type_candidates[col].append(mysql_type)
|
|
285
|
+
# 即使值为空,也要推断类型
|
|
286
|
+
mysql_type = DataTypeInferrer.infer_mysql_type(value)
|
|
287
|
+
type_candidates[col].append(mysql_type)
|
|
271
288
|
|
|
272
289
|
# 为每列选择最合适的类型
|
|
273
290
|
for col, types in type_candidates.items():
|
|
@@ -673,6 +690,19 @@ class TableManager:
|
|
|
673
690
|
db_name = self._sanitize_identifier(db_name)
|
|
674
691
|
table_name = self._sanitize_identifier(table_name)
|
|
675
692
|
|
|
693
|
+
# 验证columns不为空
|
|
694
|
+
if not columns:
|
|
695
|
+
raise ValueError(f"创建表失败:columns不能为空。数据库: {db_name}, 表: {table_name}")
|
|
696
|
+
|
|
697
|
+
# 验证unique_keys中的列是否存在于columns中
|
|
698
|
+
if unique_keys:
|
|
699
|
+
business_columns = {k.lower(): k for k in columns.keys() if k.lower() not in ['id', 'create_at', 'update_at']}
|
|
700
|
+
for i, uk in enumerate(unique_keys):
|
|
701
|
+
for col in uk:
|
|
702
|
+
col_lower = col.lower()
|
|
703
|
+
if col_lower not in business_columns and col not in columns:
|
|
704
|
+
raise ValueError(f"唯一约束中的列 '{col}' 不存在于表定义中。可用列: {list(business_columns.keys())}")
|
|
705
|
+
|
|
676
706
|
# 构建列定义
|
|
677
707
|
column_defs = []
|
|
678
708
|
|
|
@@ -705,8 +735,15 @@ class TableManager:
|
|
|
705
735
|
safe_uk_parts = []
|
|
706
736
|
for col in filtered_uk:
|
|
707
737
|
safe_col_name = self._sanitize_identifier(col)
|
|
708
|
-
# 检查是否需要前缀索引
|
|
709
|
-
|
|
738
|
+
# 检查是否需要前缀索引 - 优先使用原始列名,然后尝试小写
|
|
739
|
+
col_lower = col.lower()
|
|
740
|
+
if col in columns:
|
|
741
|
+
col_type = columns[col].lower()
|
|
742
|
+
elif col_lower in columns:
|
|
743
|
+
col_type = columns[col_lower].lower()
|
|
744
|
+
else:
|
|
745
|
+
col_type = 'varchar(255)'
|
|
746
|
+
|
|
710
747
|
if 'varchar' in col_type:
|
|
711
748
|
# 提取varchar长度
|
|
712
749
|
match = re.search(r'varchar\((\d+)\)', col_type)
|
|
@@ -716,20 +753,11 @@ class TableManager:
|
|
|
716
753
|
if length > 191:
|
|
717
754
|
prefix_length = 191
|
|
718
755
|
safe_uk_parts.append(f"`{safe_col_name}`({prefix_length})")
|
|
719
|
-
logger.debug('应用前缀索引', {
|
|
720
|
-
'列名': col,
|
|
721
|
-
'原始长度': length,
|
|
722
|
-
'前缀长度': prefix_length
|
|
723
|
-
})
|
|
724
756
|
else:
|
|
725
757
|
safe_uk_parts.append(f"`{safe_col_name}`")
|
|
726
758
|
else:
|
|
727
759
|
# 如果没有指定长度,默认使用前缀索引
|
|
728
760
|
safe_uk_parts.append(f"`{safe_col_name}`(191)")
|
|
729
|
-
logger.debug('应用默认前缀索引', {
|
|
730
|
-
'列名': col,
|
|
731
|
-
'前缀长度': 191
|
|
732
|
-
})
|
|
733
761
|
else:
|
|
734
762
|
# 非varchar字段保持原样
|
|
735
763
|
safe_uk_parts.append(f"`{safe_col_name}`")
|
|
@@ -749,9 +777,17 @@ class TableManager:
|
|
|
749
777
|
|
|
750
778
|
with self.conn_mgr.get_connection() as conn:
|
|
751
779
|
with conn.cursor() as cursor:
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
780
|
+
try:
|
|
781
|
+
cursor.execute(sql)
|
|
782
|
+
conn.commit()
|
|
783
|
+
logger.debug('表已创建', {'database': db_name, 'table': table_name})
|
|
784
|
+
except Exception as e:
|
|
785
|
+
logger.error('创建表失败', {
|
|
786
|
+
'database': db_name,
|
|
787
|
+
'table': table_name,
|
|
788
|
+
'error': str(e)
|
|
789
|
+
})
|
|
790
|
+
raise
|
|
755
791
|
|
|
756
792
|
def get_partition_table_name(self, base_name: str, date_value: str, partition_by: str) -> str:
|
|
757
793
|
"""获取分表名称"""
|
|
@@ -795,8 +831,6 @@ class TableManager:
|
|
|
795
831
|
return cleaned
|
|
796
832
|
|
|
797
833
|
|
|
798
|
-
|
|
799
|
-
|
|
800
834
|
class DataProcessor:
|
|
801
835
|
"""数据处理器"""
|
|
802
836
|
|
|
@@ -1063,6 +1097,12 @@ class MySQLUploader:
|
|
|
1063
1097
|
- 支持自动建表、分表、数据类型推断
|
|
1064
1098
|
- 高可用连接池管理和重试机制
|
|
1065
1099
|
- 流式批量插入优化
|
|
1100
|
+
- 自动设置数据库连接时区为北京时间(+08:00),确保时间戳一致性
|
|
1101
|
+
|
|
1102
|
+
时区说明:
|
|
1103
|
+
- 所有数据库连接会自动设置为北京时间(+08:00)
|
|
1104
|
+
- create_at和update_at列使用CURRENT_TIMESTAMP,会按照连接时区记录时间
|
|
1105
|
+
- 可使用check_timezone_settings()方法验证时区设置
|
|
1066
1106
|
"""
|
|
1067
1107
|
|
|
1068
1108
|
def __init__(self, username: str, password: str, host: str = 'localhost',
|
|
@@ -1152,21 +1192,35 @@ class MySQLUploader:
|
|
|
1152
1192
|
normalized_data = DataProcessor.normalize_data(data)
|
|
1153
1193
|
|
|
1154
1194
|
# 推断或验证列类型
|
|
1155
|
-
if set_typ is None:
|
|
1195
|
+
if set_typ is None or not set_typ:
|
|
1156
1196
|
# 取第一个chunk进行类型推断
|
|
1157
1197
|
first_chunk = next(iter(normalized_data))
|
|
1198
|
+
|
|
1199
|
+
if not first_chunk:
|
|
1200
|
+
raise ValueError("数据为空,无法推断列类型")
|
|
1201
|
+
|
|
1158
1202
|
set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
|
|
1159
1203
|
# 重新创建迭代器
|
|
1160
1204
|
normalized_data = DataProcessor.normalize_data(data)
|
|
1161
1205
|
logger.debug('自动推断数据类型', {'类型映射': set_typ})
|
|
1206
|
+
|
|
1207
|
+
# 验证推断结果
|
|
1208
|
+
if not set_typ or not any(col for col in set_typ.keys() if col.lower() not in ['id', 'create_at', 'update_at']):
|
|
1209
|
+
raise ValueError(f"类型推断失败,无有效业务列。推断结果: {set_typ}")
|
|
1162
1210
|
|
|
1163
1211
|
# 将set_typ的键统一转为小写
|
|
1164
1212
|
set_typ = self.tran_set_typ_to_lower(set_typ)
|
|
1165
1213
|
|
|
1214
|
+
# 最终验证:确保有业务列定义
|
|
1215
|
+
business_columns = {k: v for k, v in set_typ.items() if k.lower() not in ['id', 'create_at', 'update_at']}
|
|
1216
|
+
if not business_columns:
|
|
1217
|
+
raise ValueError(f"没有有效的业务列定义。set_typ: {set_typ}")
|
|
1218
|
+
|
|
1166
1219
|
# 确保数据库存在
|
|
1167
1220
|
self.table_mgr.ensure_database_exists(db_name)
|
|
1168
1221
|
|
|
1169
1222
|
# 处理分表逻辑
|
|
1223
|
+
|
|
1170
1224
|
if partition_by:
|
|
1171
1225
|
upload_result = self._handle_partitioned_upload(
|
|
1172
1226
|
db_name, table_name, normalized_data, set_typ,
|
|
@@ -1372,6 +1426,16 @@ class MySQLUploader:
|
|
|
1372
1426
|
main_result['failed_rows'] += partition_result['failed_rows']
|
|
1373
1427
|
main_result['tables_created'].extend(partition_result['tables_created'])
|
|
1374
1428
|
|
|
1429
|
+
def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
|
|
1430
|
+
if not isinstance(set_typ, dict) or set_typ is None:
|
|
1431
|
+
return {}
|
|
1432
|
+
|
|
1433
|
+
set_typ_lower = {}
|
|
1434
|
+
for key, value in set_typ.items():
|
|
1435
|
+
set_typ_lower[key.lower()] = value
|
|
1436
|
+
|
|
1437
|
+
return set_typ_lower
|
|
1438
|
+
|
|
1375
1439
|
def close(self):
|
|
1376
1440
|
"""关闭连接"""
|
|
1377
1441
|
if self.conn_mgr:
|
|
@@ -1389,178 +1453,6 @@ class MySQLUploader:
|
|
|
1389
1453
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
1390
1454
|
self.close()
|
|
1391
1455
|
|
|
1392
|
-
def upload_data_concurrent(self, db_name: str, table_name: str,
|
|
1393
|
-
data: Union[Dict, List[Dict], pd.DataFrame],
|
|
1394
|
-
set_typ: Optional[Dict[str, str]] = None,
|
|
1395
|
-
allow_null: bool = False,
|
|
1396
|
-
partition_by: Optional[str] = None,
|
|
1397
|
-
partition_date_column: str = '日期',
|
|
1398
|
-
update_on_duplicate: bool = False,
|
|
1399
|
-
unique_keys: Optional[List[List[str]]] = None,
|
|
1400
|
-
max_workers: int = 3) -> Dict[str, Any]:
|
|
1401
|
-
"""
|
|
1402
|
-
并发上传数据到MySQL数据库
|
|
1403
|
-
|
|
1404
|
-
:param max_workers: 最大并发工作线程数
|
|
1405
|
-
:return: 上传结果详情
|
|
1406
|
-
"""
|
|
1407
|
-
db_name = db_name.lower()
|
|
1408
|
-
table_name = table_name.lower()
|
|
1409
|
-
|
|
1410
|
-
result = {
|
|
1411
|
-
'success': False,
|
|
1412
|
-
'inserted_rows': 0,
|
|
1413
|
-
'skipped_rows': 0,
|
|
1414
|
-
'failed_rows': 0,
|
|
1415
|
-
'tables_created': []
|
|
1416
|
-
}
|
|
1417
|
-
|
|
1418
|
-
try:
|
|
1419
|
-
# 标准化数据为流式迭代器
|
|
1420
|
-
normalized_data = DataProcessor.normalize_data(data, chunk_size=2000) # 更小的chunk用于并发
|
|
1421
|
-
|
|
1422
|
-
# 推断或验证列类型
|
|
1423
|
-
if set_typ is None:
|
|
1424
|
-
first_chunk = next(iter(normalized_data))
|
|
1425
|
-
set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
|
|
1426
|
-
normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)
|
|
1427
|
-
logger.debug('自动推断数据类型', {'类型映射': set_typ})
|
|
1428
|
-
|
|
1429
|
-
# 将set_typ的键统一转为小写
|
|
1430
|
-
set_typ = self.tran_set_typ_to_lower(set_typ)
|
|
1431
|
-
|
|
1432
|
-
# 确保数据库存在
|
|
1433
|
-
self.table_mgr.ensure_database_exists(db_name)
|
|
1434
|
-
|
|
1435
|
-
# 创建线程锁用于表创建的线程安全
|
|
1436
|
-
table_creation_lock = threading.Lock()
|
|
1437
|
-
created_tables_set = set()
|
|
1438
|
-
|
|
1439
|
-
def process_chunk_worker(chunk_data):
|
|
1440
|
-
"""工作线程函数"""
|
|
1441
|
-
try:
|
|
1442
|
-
if partition_by:
|
|
1443
|
-
# 分表处理
|
|
1444
|
-
partitioned_chunk = DataProcessor.partition_data_by_date(
|
|
1445
|
-
chunk_data, partition_date_column, partition_by
|
|
1446
|
-
)
|
|
1447
|
-
|
|
1448
|
-
chunk_result = {
|
|
1449
|
-
'inserted_rows': 0,
|
|
1450
|
-
'skipped_rows': 0,
|
|
1451
|
-
'failed_rows': 0,
|
|
1452
|
-
'tables_created': []
|
|
1453
|
-
}
|
|
1454
|
-
|
|
1455
|
-
for partition_suffix, partition_data in partitioned_chunk.items():
|
|
1456
|
-
partition_table_name = f"{table_name}_{partition_suffix}"
|
|
1457
|
-
table_key = f"{db_name}.{partition_table_name}"
|
|
1458
|
-
|
|
1459
|
-
# 确保表存在(线程安全)
|
|
1460
|
-
with table_creation_lock:
|
|
1461
|
-
if table_key not in created_tables_set:
|
|
1462
|
-
if not self.table_mgr.table_exists(db_name, partition_table_name):
|
|
1463
|
-
self.table_mgr.create_table(db_name, partition_table_name, set_typ,
|
|
1464
|
-
unique_keys=unique_keys, allow_null=allow_null)
|
|
1465
|
-
chunk_result['tables_created'].append(table_key)
|
|
1466
|
-
else:
|
|
1467
|
-
self.table_mgr.ensure_system_columns(db_name, partition_table_name)
|
|
1468
|
-
created_tables_set.add(table_key)
|
|
1469
|
-
|
|
1470
|
-
# 准备并插入数据
|
|
1471
|
-
prepared_data = DataProcessor.prepare_data_for_insert(
|
|
1472
|
-
partition_data, set_typ, allow_null
|
|
1473
|
-
)
|
|
1474
|
-
|
|
1475
|
-
inserted, skipped, failed = self.data_inserter.insert_data(
|
|
1476
|
-
db_name, partition_table_name, prepared_data, set_typ, update_on_duplicate
|
|
1477
|
-
)
|
|
1478
|
-
|
|
1479
|
-
chunk_result['inserted_rows'] += inserted
|
|
1480
|
-
chunk_result['skipped_rows'] += skipped
|
|
1481
|
-
chunk_result['failed_rows'] += failed
|
|
1482
|
-
else:
|
|
1483
|
-
# 单表处理
|
|
1484
|
-
table_key = f"{db_name}.{table_name}"
|
|
1485
|
-
with table_creation_lock:
|
|
1486
|
-
if table_key not in created_tables_set:
|
|
1487
|
-
if not self.table_mgr.table_exists(db_name, table_name):
|
|
1488
|
-
self.table_mgr.create_table(db_name, table_name, set_typ,
|
|
1489
|
-
unique_keys=unique_keys, allow_null=allow_null)
|
|
1490
|
-
chunk_result = {'tables_created': [table_key]}
|
|
1491
|
-
else:
|
|
1492
|
-
self.table_mgr.ensure_system_columns(db_name, table_name)
|
|
1493
|
-
chunk_result = {'tables_created': []}
|
|
1494
|
-
created_tables_set.add(table_key)
|
|
1495
|
-
else:
|
|
1496
|
-
chunk_result = {'tables_created': []}
|
|
1497
|
-
|
|
1498
|
-
prepared_chunk = DataProcessor.prepare_data_for_insert(
|
|
1499
|
-
chunk_data, set_typ, allow_null
|
|
1500
|
-
)
|
|
1501
|
-
|
|
1502
|
-
inserted, skipped, failed = self.data_inserter.insert_data(
|
|
1503
|
-
db_name, table_name, prepared_chunk, set_typ, update_on_duplicate
|
|
1504
|
-
)
|
|
1505
|
-
|
|
1506
|
-
chunk_result.update({
|
|
1507
|
-
'inserted_rows': inserted,
|
|
1508
|
-
'skipped_rows': skipped,
|
|
1509
|
-
'failed_rows': failed
|
|
1510
|
-
})
|
|
1511
|
-
|
|
1512
|
-
return chunk_result
|
|
1513
|
-
|
|
1514
|
-
except Exception as e:
|
|
1515
|
-
logger.error('并发处理chunk失败', {'错误': str(e)})
|
|
1516
|
-
return {
|
|
1517
|
-
'inserted_rows': 0,
|
|
1518
|
-
'skipped_rows': 0,
|
|
1519
|
-
'failed_rows': len(chunk_data) if chunk_data else 0,
|
|
1520
|
-
'tables_created': []
|
|
1521
|
-
}
|
|
1522
|
-
|
|
1523
|
-
# 使用线程池执行并发处理
|
|
1524
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
1525
|
-
# 提交所有任务
|
|
1526
|
-
future_to_chunk = {}
|
|
1527
|
-
for chunk in normalized_data:
|
|
1528
|
-
if chunk:
|
|
1529
|
-
future = executor.submit(process_chunk_worker, chunk)
|
|
1530
|
-
future_to_chunk[future] = len(chunk)
|
|
1531
|
-
|
|
1532
|
-
# 收集结果
|
|
1533
|
-
for future in concurrent.futures.as_completed(future_to_chunk):
|
|
1534
|
-
chunk_result = future.result()
|
|
1535
|
-
result['inserted_rows'] += chunk_result['inserted_rows']
|
|
1536
|
-
result['skipped_rows'] += chunk_result['skipped_rows']
|
|
1537
|
-
result['failed_rows'] += chunk_result['failed_rows']
|
|
1538
|
-
result['tables_created'].extend(chunk_result['tables_created'])
|
|
1539
|
-
|
|
1540
|
-
# 去重tables_created
|
|
1541
|
-
result['tables_created'] = list(set(result['tables_created']))
|
|
1542
|
-
result['success'] = result['failed_rows'] == 0
|
|
1543
|
-
|
|
1544
|
-
except Exception as e:
|
|
1545
|
-
logger.error('并发数据上传失败', {
|
|
1546
|
-
'数据库': db_name,
|
|
1547
|
-
'表名': table_name,
|
|
1548
|
-
'错误': str(e)
|
|
1549
|
-
})
|
|
1550
|
-
result['success'] = False
|
|
1551
|
-
|
|
1552
|
-
return result
|
|
1553
|
-
|
|
1554
|
-
def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
|
|
1555
|
-
if not isinstance(set_typ, dict):
|
|
1556
|
-
return set_typ
|
|
1557
|
-
|
|
1558
|
-
set_typ_lower = {}
|
|
1559
|
-
for key, value in set_typ.items():
|
|
1560
|
-
set_typ_lower[key.lower()] = value
|
|
1561
|
-
|
|
1562
|
-
return set_typ_lower
|
|
1563
|
-
|
|
1564
1456
|
|
|
1565
1457
|
# 使用示例
|
|
1566
1458
|
if __name__ == '__main__':
|
mdbq-4.2.7/mdbq/__version__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = '4.2.7'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|