PyPI - mdbq - Versions diffs - 4.2.2__tar.gz → 4.2.3__tar.gz - Mend

mdbq 4.2.2tar.gz → 4.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mdbq might be problematic. Click here for more details.

Files changed (46) hide show

{mdbq-4.2.2 → mdbq-4.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: mdbq
-Version: 4.2.2
+Version: 4.2.3
 Home-page: https://pypi.org/project/mdbq
 Author: xigua,
 Author-email: 2587125111@qq.com

mdbq-4.2.3/mdbq/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ VERSION = '4.2.3'

{mdbq-4.2.2 → mdbq-4.2.3}/mdbq/mysql/uploader.py RENAMED Viewed

@@ -13,7 +13,6 @@ from decimal import Decimal, InvalidOperation
 import math
 import concurrent.futures
 import threading
-from queue import Queue
 import pymysql
 import pandas as pd
 import psutil
@@ -668,7 +667,8 @@ class TableManager:
     def create_table(self, db_name: str, table_name: str, columns: Dict[str, str],
                     primary_keys: Optional[List[str]] = None,
-                    unique_keys: Optional[List[List[str]]] = None):
+                    unique_keys: Optional[List[List[str]]] = None,
+                    allow_null: bool = False):
         """创建表"""
         db_name = self._sanitize_identifier(db_name)
         table_name = self._sanitize_identifier(table_name)
@@ -684,7 +684,8 @@ class TableManager:
             if col_name.lower() in ['id', 'create_at', 'update_at']:
                 continue
             safe_col_name = self._sanitize_identifier(col_name)
-            column_defs.append(f"`{safe_col_name}` {col_type} NOT NULL")
+            null_constraint = "" if allow_null else " NOT NULL"
+            column_defs.append(f"`{safe_col_name}` {col_type}{null_constraint}")
         # 添加时间戳列
         column_defs.append("`create_at` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP")
@@ -785,6 +786,10 @@ class DataProcessor:
             chunk_size = min(chunk_size, 1000)  # 内存紧张时减小chunk
         if isinstance(data, pd.DataFrame):
+            # 统一将DataFrame的列名转为小写
+            data = data.copy()
+            data.columns = [col.lower() for col in data.columns]
             # 对于大DataFrame，使用更高效的分块方式
             if len(data) > 50000:
                 # 大数据集使用pandas的分块读取
@@ -795,11 +800,23 @@ class DataProcessor:
                     chunk = data.iloc[i:i + chunk_size]
                     yield chunk.to_dict('records')
         elif isinstance(data, dict):
-            yield [data]
+            # 统一将字典的键转为小写
+            normalized_dict = {}
+            for key, value in data.items():
+                normalized_dict[key.lower()] = value
+            yield [normalized_dict]
         elif isinstance(data, list):
             if all(isinstance(item, dict) for item in data):
-                for i in range(0, len(data), chunk_size):
-                    yield data[i:i + chunk_size]
+                # 统一将列表中字典的键转为小写
+                normalized_data = []
+                for item in data:
+                    normalized_item = {}
+                    for key, value in item.items():
+                        normalized_item[key.lower()] = value
+                    normalized_data.append(normalized_item)
+                for i in range(0, len(normalized_data), chunk_size):
+                    yield normalized_data[i:i + chunk_size]
             else:
                 raise ValueError("列表中必须全部是字典")
         else:
@@ -938,9 +955,21 @@ class DataInserter:
                             try:
                                 cursor.executemany(sql, batch_values)
                                 total_inserted += len(batch_values)
-                            except pymysql.err.IntegrityError:
-                                total_skipped += len(batch_values)
-                                logger.debug('批量插入唯一约束冲突，跳过', {'批次大小': len(batch_values)})
+                            except pymysql.err.IntegrityError as e:
+                                # 批量插入遇到唯一约束冲突，fallback到逐行插入
+                                logger.debug('批量插入唯一约束冲突，尝试逐行插入', {'批次大小': len(batch_values)})
+                                # 逐行插入处理冲突
+                                for single_value in batch_values:
+                                    try:
+                                        cursor.execute(sql, single_value)
+                                        total_inserted += 1
+                                    except pymysql.err.IntegrityError:
+                                        total_skipped += 1
+                                        logger.debug('单行插入唯一约束冲突，跳过')
+                                    except Exception as single_e:
+                                        total_failed += 1
+                                        logger.error('单行插入失败', {'错误': str(single_e)})
                             except Exception as e:
                                 logger.error('批量插入失败', {'错误': str(e), '批次大小': len(batch_values)})
                                 raise
@@ -1077,12 +1106,6 @@ class MySQLUploader:
         }
         try:
-            # 计算原始数据大小
-            original_data_size = 0
-            if isinstance(data, (pd.DataFrame, list)):
-                original_data_size = len(data)
-            elif isinstance(data, dict):
-                original_data_size = 1
             # 标准化数据为流式迭代器
             normalized_data = DataProcessor.normalize_data(data)
@@ -1096,6 +1119,9 @@ class MySQLUploader:
                 normalized_data = DataProcessor.normalize_data(data)
                 logger.debug('自动推断数据类型', {'类型映射': set_typ})
+            # 将set_typ的键统一转为小写
+            set_typ = self.tran_set_typ_to_lower(set_typ)
             # 确保数据库存在
             self.table_mgr.ensure_database_exists(db_name)
@@ -1142,7 +1168,7 @@ class MySQLUploader:
         # 确保表存在
         if not self.table_mgr.table_exists(db_name, table_name):
             self.table_mgr.create_table(db_name, table_name, set_typ,
-                                       unique_keys=unique_keys)
+                                       unique_keys=unique_keys, allow_null=allow_null)
             result['tables_created'].append(f"{db_name}.{table_name}")
         else:
             # 表已存在，确保有时间戳列（但保持原有主键结构）
@@ -1275,7 +1301,7 @@ class MySQLUploader:
         if table_key not in created_tables:
             if not self.table_mgr.table_exists(db_name, partition_table_name):
                 self.table_mgr.create_table(db_name, partition_table_name, set_typ,
-                                           unique_keys=unique_keys)
+                                           unique_keys=unique_keys, allow_null=allow_null)
                 result['tables_created'].append(table_key)
             else:
                 # 表已存在，确保有时间戳列（但保持原有主键结构）
@@ -1358,7 +1384,10 @@ class MySQLUploader:
                 set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
                 normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)
                 logger.debug('自动推断数据类型', {'类型映射': set_typ})
+            # 将set_typ的键统一转为小写
+            set_typ = self.tran_set_typ_to_lower(set_typ)
             # 确保数据库存在
             self.table_mgr.ensure_database_exists(db_name)
@@ -1391,7 +1420,7 @@ class MySQLUploader:
                                 if table_key not in created_tables_set:
                                     if not self.table_mgr.table_exists(db_name, partition_table_name):
                                         self.table_mgr.create_table(db_name, partition_table_name, set_typ,
-                                                                   unique_keys=unique_keys)
+                                                                   unique_keys=unique_keys, allow_null=allow_null)
                                         chunk_result['tables_created'].append(table_key)
                                     else:
                                         self.table_mgr.ensure_system_columns(db_name, partition_table_name)
@@ -1416,7 +1445,7 @@ class MySQLUploader:
                             if table_key not in created_tables_set:
                                 if not self.table_mgr.table_exists(db_name, table_name):
                                     self.table_mgr.create_table(db_name, table_name, set_typ,
-                                                               unique_keys=unique_keys)
+                                                               unique_keys=unique_keys, allow_null=allow_null)
                                     chunk_result = {'tables_created': [table_key]}
                                 else:
                                     self.table_mgr.ensure_system_columns(db_name, table_name)
@@ -1480,6 +1509,16 @@ class MySQLUploader:
             result['success'] = False
         return result
+    def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
+        if not isinstance(set_typ, dict):
+            return set_typ
+        set_typ_lower = {}
+        for key, value in set_typ.items():
+            set_typ_lower[key.lower()] = value
+        return set_typ_lower
 # 使用示例

{mdbq-4.2.2 → mdbq-4.2.3}/mdbq.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: mdbq
-Version: 4.2.2
+Version: 4.2.3
 Home-page: https://pypi.org/project/mdbq
 Author: xigua,
 Author-email: 2587125111@qq.com