mdbq 4.2.8__tar.gz → 4.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mdbq might be problematic. Click here for more details.

Files changed (46) hide show
  1. {mdbq-4.2.8 → mdbq-4.2.9}/PKG-INFO +1 -1
  2. mdbq-4.2.9/mdbq/__version__.py +1 -0
  3. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/mysql/uploader.py +72 -197
  4. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq.egg-info/PKG-INFO +1 -1
  5. mdbq-4.2.8/mdbq/__version__.py +0 -1
  6. {mdbq-4.2.8 → mdbq-4.2.9}/README.txt +0 -0
  7. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/__init__.py +0 -0
  8. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/auth/__init__.py +0 -0
  9. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/auth/auth_backend.py +0 -0
  10. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/auth/crypto.py +0 -0
  11. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/auth/rate_limiter.py +0 -0
  12. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/js/__init__.py +0 -0
  13. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/js/jc.py +0 -0
  14. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/log/__init__.py +0 -0
  15. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/log/mylogger.py +0 -0
  16. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/myconf/__init__.py +0 -0
  17. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/myconf/myconf.py +0 -0
  18. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/mysql/__init__.py +0 -0
  19. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/mysql/deduplicator.py +0 -0
  20. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/mysql/mysql.py +0 -0
  21. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/mysql/s_query.py +0 -0
  22. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/mysql/unique_.py +0 -0
  23. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/other/__init__.py +0 -0
  24. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/other/download_sku_picture.py +0 -0
  25. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/other/error_handler.py +0 -0
  26. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/other/otk.py +0 -0
  27. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/other/pov_city.py +0 -0
  28. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/other/ua_sj.py +0 -0
  29. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/pbix/__init__.py +0 -0
  30. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/pbix/pbix_refresh.py +0 -0
  31. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/pbix/refresh_all.py +0 -0
  32. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/redis/__init__.py +0 -0
  33. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/redis/getredis.py +0 -0
  34. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/redis/redis_cache.py +0 -0
  35. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/route/__init__.py +0 -0
  36. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/route/analytics.py +0 -0
  37. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/route/monitor.py +0 -0
  38. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/route/routes.py +0 -0
  39. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/selenium/__init__.py +0 -0
  40. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/selenium/get_driver.py +0 -0
  41. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq/spider/__init__.py +0 -0
  42. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq.egg-info/SOURCES.txt +0 -0
  43. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq.egg-info/dependency_links.txt +0 -0
  44. {mdbq-4.2.8 → mdbq-4.2.9}/mdbq.egg-info/top_level.txt +0 -0
  45. {mdbq-4.2.8 → mdbq-4.2.9}/setup.cfg +0 -0
  46. {mdbq-4.2.8 → mdbq-4.2.9}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.2.8
3
+ Version: 4.2.9
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.2.9'
@@ -11,8 +11,6 @@ from typing import Union, List, Dict, Optional, Any, Tuple, Iterator
11
11
  from functools import wraps
12
12
  from decimal import Decimal, InvalidOperation
13
13
  import math
14
- import concurrent.futures
15
- import threading
16
14
  import pymysql
17
15
  import pandas as pd
18
16
  import psutil
@@ -267,18 +265,26 @@ class DataTypeInferrer:
267
265
  # 采样数据进行类型推断
268
266
  sample_data = data[:sample_size] if len(data) > sample_size else data
269
267
 
268
+ # 首先收集所有列名
269
+ all_columns = set()
270
+ for row in sample_data:
271
+ for col in row.keys():
272
+ if col.lower() not in ['id', 'create_at', 'update_at']:
273
+ all_columns.add(col)
274
+
275
+ # 为每个列初始化候选类型列表
276
+ for col in all_columns:
277
+ type_candidates[col] = []
278
+
270
279
  for row in sample_data:
271
280
  for col, value in row.items():
272
281
  # 跳过系统列
273
282
  if col.lower() in ['id', 'create_at', 'update_at']:
274
283
  continue
275
284
 
276
- if value is not None and str(value).strip():
277
- mysql_type = DataTypeInferrer.infer_mysql_type(value)
278
-
279
- if col not in type_candidates:
280
- type_candidates[col] = []
281
- type_candidates[col].append(mysql_type)
285
+ # 即使值为空,也要推断类型
286
+ mysql_type = DataTypeInferrer.infer_mysql_type(value)
287
+ type_candidates[col].append(mysql_type)
282
288
 
283
289
  # 为每列选择最合适的类型
284
290
  for col, types in type_candidates.items():
@@ -684,6 +690,19 @@ class TableManager:
684
690
  db_name = self._sanitize_identifier(db_name)
685
691
  table_name = self._sanitize_identifier(table_name)
686
692
 
693
+ # 验证columns不为空
694
+ if not columns:
695
+ raise ValueError(f"创建表失败:columns不能为空。数据库: {db_name}, 表: {table_name}")
696
+
697
+ # 验证unique_keys中的列是否存在于columns中
698
+ if unique_keys:
699
+ business_columns = {k.lower(): k for k in columns.keys() if k.lower() not in ['id', 'create_at', 'update_at']}
700
+ for i, uk in enumerate(unique_keys):
701
+ for col in uk:
702
+ col_lower = col.lower()
703
+ if col_lower not in business_columns and col not in columns:
704
+ raise ValueError(f"唯一约束中的列 '{col}' 不存在于表定义中。可用列: {list(business_columns.keys())}")
705
+
687
706
  # 构建列定义
688
707
  column_defs = []
689
708
 
@@ -716,8 +735,15 @@ class TableManager:
716
735
  safe_uk_parts = []
717
736
  for col in filtered_uk:
718
737
  safe_col_name = self._sanitize_identifier(col)
719
- # 检查是否需要前缀索引
720
- col_type = columns.get(col, 'varchar(255)').lower()
738
+ # 检查是否需要前缀索引 - 优先使用原始列名,然后尝试小写
739
+ col_lower = col.lower()
740
+ if col in columns:
741
+ col_type = columns[col].lower()
742
+ elif col_lower in columns:
743
+ col_type = columns[col_lower].lower()
744
+ else:
745
+ col_type = 'varchar(255)'
746
+
721
747
  if 'varchar' in col_type:
722
748
  # 提取varchar长度
723
749
  match = re.search(r'varchar\((\d+)\)', col_type)
@@ -727,20 +753,11 @@ class TableManager:
727
753
  if length > 191:
728
754
  prefix_length = 191
729
755
  safe_uk_parts.append(f"`{safe_col_name}`({prefix_length})")
730
- logger.debug('应用前缀索引', {
731
- '列名': col,
732
- '原始长度': length,
733
- '前缀长度': prefix_length
734
- })
735
756
  else:
736
757
  safe_uk_parts.append(f"`{safe_col_name}`")
737
758
  else:
738
759
  # 如果没有指定长度,默认使用前缀索引
739
760
  safe_uk_parts.append(f"`{safe_col_name}`(191)")
740
- logger.debug('应用默认前缀索引', {
741
- '列名': col,
742
- '前缀长度': 191
743
- })
744
761
  else:
745
762
  # 非varchar字段保持原样
746
763
  safe_uk_parts.append(f"`{safe_col_name}`")
@@ -760,9 +777,17 @@ class TableManager:
760
777
 
761
778
  with self.conn_mgr.get_connection() as conn:
762
779
  with conn.cursor() as cursor:
763
- cursor.execute(sql)
764
- conn.commit()
765
- logger.debug('表已创建', {'database': db_name, 'table': table_name})
780
+ try:
781
+ cursor.execute(sql)
782
+ conn.commit()
783
+ logger.debug('表已创建', {'database': db_name, 'table': table_name})
784
+ except Exception as e:
785
+ logger.error('创建表失败', {
786
+ 'database': db_name,
787
+ 'table': table_name,
788
+ 'error': str(e)
789
+ })
790
+ raise
766
791
 
767
792
  def get_partition_table_name(self, base_name: str, date_value: str, partition_by: str) -> str:
768
793
  """获取分表名称"""
@@ -806,8 +831,6 @@ class TableManager:
806
831
  return cleaned
807
832
 
808
833
 
809
-
810
-
811
834
  class DataProcessor:
812
835
  """数据处理器"""
813
836
 
@@ -1169,21 +1192,35 @@ class MySQLUploader:
1169
1192
  normalized_data = DataProcessor.normalize_data(data)
1170
1193
 
1171
1194
  # 推断或验证列类型
1172
- if set_typ is None:
1195
+ if set_typ is None or not set_typ:
1173
1196
  # 取第一个chunk进行类型推断
1174
1197
  first_chunk = next(iter(normalized_data))
1198
+
1199
+ if not first_chunk:
1200
+ raise ValueError("数据为空,无法推断列类型")
1201
+
1175
1202
  set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
1176
1203
  # 重新创建迭代器
1177
1204
  normalized_data = DataProcessor.normalize_data(data)
1178
1205
  logger.debug('自动推断数据类型', {'类型映射': set_typ})
1206
+
1207
+ # 验证推断结果
1208
+ if not set_typ or not any(col for col in set_typ.keys() if col.lower() not in ['id', 'create_at', 'update_at']):
1209
+ raise ValueError(f"类型推断失败,无有效业务列。推断结果: {set_typ}")
1179
1210
 
1180
1211
  # 将set_typ的键统一转为小写
1181
1212
  set_typ = self.tran_set_typ_to_lower(set_typ)
1182
1213
 
1214
+ # 最终验证:确保有业务列定义
1215
+ business_columns = {k: v for k, v in set_typ.items() if k.lower() not in ['id', 'create_at', 'update_at']}
1216
+ if not business_columns:
1217
+ raise ValueError(f"没有有效的业务列定义。set_typ: {set_typ}")
1218
+
1183
1219
  # 确保数据库存在
1184
1220
  self.table_mgr.ensure_database_exists(db_name)
1185
1221
 
1186
1222
  # 处理分表逻辑
1223
+
1187
1224
  if partition_by:
1188
1225
  upload_result = self._handle_partitioned_upload(
1189
1226
  db_name, table_name, normalized_data, set_typ,
@@ -1389,6 +1426,16 @@ class MySQLUploader:
1389
1426
  main_result['failed_rows'] += partition_result['failed_rows']
1390
1427
  main_result['tables_created'].extend(partition_result['tables_created'])
1391
1428
 
1429
+ def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
1430
+ if not isinstance(set_typ, dict) or set_typ is None:
1431
+ return {}
1432
+
1433
+ set_typ_lower = {}
1434
+ for key, value in set_typ.items():
1435
+ set_typ_lower[key.lower()] = value
1436
+
1437
+ return set_typ_lower
1438
+
1392
1439
  def close(self):
1393
1440
  """关闭连接"""
1394
1441
  if self.conn_mgr:
@@ -1406,178 +1453,6 @@ class MySQLUploader:
1406
1453
  def __exit__(self, exc_type, exc_val, exc_tb):
1407
1454
  self.close()
1408
1455
 
1409
- def upload_data_concurrent(self, db_name: str, table_name: str,
1410
- data: Union[Dict, List[Dict], pd.DataFrame],
1411
- set_typ: Optional[Dict[str, str]] = None,
1412
- allow_null: bool = False,
1413
- partition_by: Optional[str] = None,
1414
- partition_date_column: str = '日期',
1415
- update_on_duplicate: bool = False,
1416
- unique_keys: Optional[List[List[str]]] = None,
1417
- max_workers: int = 3) -> Dict[str, Any]:
1418
- """
1419
- 并发上传数据到MySQL数据库
1420
-
1421
- :param max_workers: 最大并发工作线程数
1422
- :return: 上传结果详情
1423
- """
1424
- db_name = db_name.lower()
1425
- table_name = table_name.lower()
1426
-
1427
- result = {
1428
- 'success': False,
1429
- 'inserted_rows': 0,
1430
- 'skipped_rows': 0,
1431
- 'failed_rows': 0,
1432
- 'tables_created': []
1433
- }
1434
-
1435
- try:
1436
- # 标准化数据为流式迭代器
1437
- normalized_data = DataProcessor.normalize_data(data, chunk_size=2000) # 更小的chunk用于并发
1438
-
1439
- # 推断或验证列类型
1440
- if set_typ is None:
1441
- first_chunk = next(iter(normalized_data))
1442
- set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
1443
- normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)
1444
- logger.debug('自动推断数据类型', {'类型映射': set_typ})
1445
-
1446
- # 将set_typ的键统一转为小写
1447
- set_typ = self.tran_set_typ_to_lower(set_typ)
1448
-
1449
- # 确保数据库存在
1450
- self.table_mgr.ensure_database_exists(db_name)
1451
-
1452
- # 创建线程锁用于表创建的线程安全
1453
- table_creation_lock = threading.Lock()
1454
- created_tables_set = set()
1455
-
1456
- def process_chunk_worker(chunk_data):
1457
- """工作线程函数"""
1458
- try:
1459
- if partition_by:
1460
- # 分表处理
1461
- partitioned_chunk = DataProcessor.partition_data_by_date(
1462
- chunk_data, partition_date_column, partition_by
1463
- )
1464
-
1465
- chunk_result = {
1466
- 'inserted_rows': 0,
1467
- 'skipped_rows': 0,
1468
- 'failed_rows': 0,
1469
- 'tables_created': []
1470
- }
1471
-
1472
- for partition_suffix, partition_data in partitioned_chunk.items():
1473
- partition_table_name = f"{table_name}_{partition_suffix}"
1474
- table_key = f"{db_name}.{partition_table_name}"
1475
-
1476
- # 确保表存在(线程安全)
1477
- with table_creation_lock:
1478
- if table_key not in created_tables_set:
1479
- if not self.table_mgr.table_exists(db_name, partition_table_name):
1480
- self.table_mgr.create_table(db_name, partition_table_name, set_typ,
1481
- unique_keys=unique_keys, allow_null=allow_null)
1482
- chunk_result['tables_created'].append(table_key)
1483
- else:
1484
- self.table_mgr.ensure_system_columns(db_name, partition_table_name)
1485
- created_tables_set.add(table_key)
1486
-
1487
- # 准备并插入数据
1488
- prepared_data = DataProcessor.prepare_data_for_insert(
1489
- partition_data, set_typ, allow_null
1490
- )
1491
-
1492
- inserted, skipped, failed = self.data_inserter.insert_data(
1493
- db_name, partition_table_name, prepared_data, set_typ, update_on_duplicate
1494
- )
1495
-
1496
- chunk_result['inserted_rows'] += inserted
1497
- chunk_result['skipped_rows'] += skipped
1498
- chunk_result['failed_rows'] += failed
1499
- else:
1500
- # 单表处理
1501
- table_key = f"{db_name}.{table_name}"
1502
- with table_creation_lock:
1503
- if table_key not in created_tables_set:
1504
- if not self.table_mgr.table_exists(db_name, table_name):
1505
- self.table_mgr.create_table(db_name, table_name, set_typ,
1506
- unique_keys=unique_keys, allow_null=allow_null)
1507
- chunk_result = {'tables_created': [table_key]}
1508
- else:
1509
- self.table_mgr.ensure_system_columns(db_name, table_name)
1510
- chunk_result = {'tables_created': []}
1511
- created_tables_set.add(table_key)
1512
- else:
1513
- chunk_result = {'tables_created': []}
1514
-
1515
- prepared_chunk = DataProcessor.prepare_data_for_insert(
1516
- chunk_data, set_typ, allow_null
1517
- )
1518
-
1519
- inserted, skipped, failed = self.data_inserter.insert_data(
1520
- db_name, table_name, prepared_chunk, set_typ, update_on_duplicate
1521
- )
1522
-
1523
- chunk_result.update({
1524
- 'inserted_rows': inserted,
1525
- 'skipped_rows': skipped,
1526
- 'failed_rows': failed
1527
- })
1528
-
1529
- return chunk_result
1530
-
1531
- except Exception as e:
1532
- logger.error('并发处理chunk失败', {'错误': str(e)})
1533
- return {
1534
- 'inserted_rows': 0,
1535
- 'skipped_rows': 0,
1536
- 'failed_rows': len(chunk_data) if chunk_data else 0,
1537
- 'tables_created': []
1538
- }
1539
-
1540
- # 使用线程池执行并发处理
1541
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
1542
- # 提交所有任务
1543
- future_to_chunk = {}
1544
- for chunk in normalized_data:
1545
- if chunk:
1546
- future = executor.submit(process_chunk_worker, chunk)
1547
- future_to_chunk[future] = len(chunk)
1548
-
1549
- # 收集结果
1550
- for future in concurrent.futures.as_completed(future_to_chunk):
1551
- chunk_result = future.result()
1552
- result['inserted_rows'] += chunk_result['inserted_rows']
1553
- result['skipped_rows'] += chunk_result['skipped_rows']
1554
- result['failed_rows'] += chunk_result['failed_rows']
1555
- result['tables_created'].extend(chunk_result['tables_created'])
1556
-
1557
- # 去重tables_created
1558
- result['tables_created'] = list(set(result['tables_created']))
1559
- result['success'] = result['failed_rows'] == 0
1560
-
1561
- except Exception as e:
1562
- logger.error('并发数据上传失败', {
1563
- '数据库': db_name,
1564
- '表名': table_name,
1565
- '错误': str(e)
1566
- })
1567
- result['success'] = False
1568
-
1569
- return result
1570
-
1571
- def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
1572
- if not isinstance(set_typ, dict):
1573
- return set_typ
1574
-
1575
- set_typ_lower = {}
1576
- for key, value in set_typ.items():
1577
- set_typ_lower[key.lower()] = value
1578
-
1579
- return set_typ_lower
1580
-
1581
1456
 
1582
1457
  # 使用示例
1583
1458
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.2.8
3
+ Version: 4.2.9
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.2.8'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes