PyPI - mdbq - Versions diffs - 4.2.8__tar.gz → 4.2.10__tar.gz - Mend

mdbq 4.2.8tar.gz → 4.2.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mdbq might be problematic. Click here for more details.

Files changed (46) hide show

{mdbq-4.2.8 → mdbq-4.2.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: mdbq
-Version: 4.2.8
+Version: 4.2.10
 Home-page: https://pypi.org/project/mdbq
 Author: xigua,
 Author-email: 2587125111@qq.com

mdbq-4.2.10/mdbq/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ VERSION = '4.2.10'

{mdbq-4.2.8 → mdbq-4.2.10}/mdbq/mysql/uploader.py RENAMED Viewed

@@ -11,8 +11,6 @@ from typing import Union, List, Dict, Optional, Any, Tuple, Iterator
 from functools import wraps
 from decimal import Decimal, InvalidOperation
 import math
-import concurrent.futures
-import threading
 import pymysql
 import pandas as pd
 import psutil
@@ -267,18 +265,26 @@ class DataTypeInferrer:
         # 采样数据进行类型推断
         sample_data = data[:sample_size] if len(data) > sample_size else data
+        # 首先收集所有列名
+        all_columns = set()
+        for row in sample_data:
+            for col in row.keys():
+                if col.lower() not in ['id', 'create_at', 'update_at']:
+                    all_columns.add(col)
+        # 为每个列初始化候选类型列表
+        for col in all_columns:
+            type_candidates[col] = []
         for row in sample_data:
             for col, value in row.items():
                 # 跳过系统列
                 if col.lower() in ['id', 'create_at', 'update_at']:
                     continue
-                if value is not None and str(value).strip():
-                    mysql_type = DataTypeInferrer.infer_mysql_type(value)
-                    if col not in type_candidates:
-                        type_candidates[col] = []
-                    type_candidates[col].append(mysql_type)
+                # 即使值为空，也要推断类型
+                mysql_type = DataTypeInferrer.infer_mysql_type(value)
+                type_candidates[col].append(mysql_type)
         # 为每列选择最合适的类型
         for col, types in type_candidates.items():
@@ -684,6 +690,19 @@ class TableManager:
         db_name = self._sanitize_identifier(db_name)
         table_name = self._sanitize_identifier(table_name)
+        # 验证columns不为空
+        if not columns:
+            raise ValueError(f"创建表失败：columns不能为空。数据库: {db_name}, 表: {table_name}")
+        # 验证unique_keys中的列是否存在于columns中
+        if unique_keys:
+            business_columns = {k.lower(): k for k in columns.keys() if k.lower() not in ['id', 'create_at', 'update_at']}
+            for i, uk in enumerate(unique_keys):
+                for col in uk:
+                    col_lower = col.lower()
+                    if col_lower not in business_columns and col not in columns:
+                        raise ValueError(f"唯一约束中的列 '{col}' 不存在于表定义中。可用列: {list(business_columns.keys())}")
         # 构建列定义
         column_defs = []
@@ -716,8 +735,15 @@ class TableManager:
                     safe_uk_parts = []
                     for col in filtered_uk:
                         safe_col_name = self._sanitize_identifier(col)
-                        # 检查是否需要前缀索引
-                        col_type = columns.get(col, 'varchar(255)').lower()
+                        # 检查是否需要前缀索引 - 优先使用原始列名，然后尝试小写
+                        col_lower = col.lower()
+                        if col in columns:
+                            col_type = columns[col].lower()
+                        elif col_lower in columns:
+                            col_type = columns[col_lower].lower()
+                        else:
+                            col_type = 'varchar(255)'
                         if 'varchar' in col_type:
                             # 提取varchar长度
                             match = re.search(r'varchar\((\d+)\)', col_type)
@@ -727,20 +753,11 @@ class TableManager:
                                 if length > 191:
                                     prefix_length = 191
                                     safe_uk_parts.append(f"`{safe_col_name}`({prefix_length})")
-                                    logger.debug('应用前缀索引', {
-                                        '列名': col,
-                                        '原始长度': length,
-                                        '前缀长度': prefix_length
-                                    })
                                 else:
                                     safe_uk_parts.append(f"`{safe_col_name}`")
                             else:
                                 # 如果没有指定长度，默认使用前缀索引
                                 safe_uk_parts.append(f"`{safe_col_name}`(191)")
-                                logger.debug('应用默认前缀索引', {
-                                    '列名': col,
-                                    '前缀长度': 191
-                                })
                         else:
                             # 非varchar字段保持原样
                             safe_uk_parts.append(f"`{safe_col_name}`")
@@ -760,9 +777,17 @@ class TableManager:
         with self.conn_mgr.get_connection() as conn:
             with conn.cursor() as cursor:
-                cursor.execute(sql)
-                conn.commit()
-                logger.debug('表已创建', {'database': db_name, 'table': table_name})
+                try:
+                    cursor.execute(sql)
+                    conn.commit()
+                    logger.debug('表已创建', {'database': db_name, 'table': table_name})
+                except Exception as e:
+                    logger.error('创建表失败', {
+                        'database': db_name,
+                        'table': table_name,
+                        'error': str(e)
+                    })
+                    raise
     def get_partition_table_name(self, base_name: str, date_value: str, partition_by: str) -> str:
         """获取分表名称"""
@@ -806,8 +831,6 @@ class TableManager:
         return cleaned
 class DataProcessor:
     """数据处理器"""
@@ -1169,21 +1192,35 @@ class MySQLUploader:
             normalized_data = DataProcessor.normalize_data(data)
             # 推断或验证列类型
-            if set_typ is None:
+            if set_typ is None or not set_typ:
                 # 取第一个chunk进行类型推断
                 first_chunk = next(iter(normalized_data))
+                if not first_chunk:
+                    raise ValueError("数据为空，无法推断列类型")
                 set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
                 # 重新创建迭代器
                 normalized_data = DataProcessor.normalize_data(data)
                 logger.debug('自动推断数据类型', {'类型映射': set_typ})
+                # 验证推断结果
+                if not set_typ or not any(col for col in set_typ.keys() if col.lower() not in ['id', 'create_at', 'update_at']):
+                    raise ValueError(f"类型推断失败，无有效业务列。推断结果: {set_typ}")
             # 将set_typ的键统一转为小写
             set_typ = self.tran_set_typ_to_lower(set_typ)
+            # 最终验证：确保有业务列定义
+            business_columns = {k: v for k, v in set_typ.items() if k.lower() not in ['id', 'create_at', 'update_at']}
+            if not business_columns:
+                raise ValueError(f"没有有效的业务列定义。set_typ: {set_typ}")
             # 确保数据库存在
             self.table_mgr.ensure_database_exists(db_name)
             # 处理分表逻辑
             if partition_by:
                 upload_result = self._handle_partitioned_upload(
                     db_name, table_name, normalized_data, set_typ,
@@ -1389,6 +1426,16 @@ class MySQLUploader:
         main_result['failed_rows'] += partition_result['failed_rows']
         main_result['tables_created'].extend(partition_result['tables_created'])
+    def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
+        if not isinstance(set_typ, dict) or set_typ is None:
+            return {}
+        set_typ_lower = {}
+        for key, value in set_typ.items():
+            set_typ_lower[key.lower()] = value
+        return set_typ_lower
     def close(self):
         """关闭连接"""
         if self.conn_mgr:
@@ -1406,178 +1453,6 @@ class MySQLUploader:
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
-    def upload_data_concurrent(self, db_name: str, table_name: str,
-                              data: Union[Dict, List[Dict], pd.DataFrame],
-                              set_typ: Optional[Dict[str, str]] = None,
-                              allow_null: bool = False,
-                              partition_by: Optional[str] = None,
-                              partition_date_column: str = '日期',
-                              update_on_duplicate: bool = False,
-                              unique_keys: Optional[List[List[str]]] = None,
-                              max_workers: int = 3) -> Dict[str, Any]:
-        """
-        并发上传数据到MySQL数据库
-        :param max_workers: 最大并发工作线程数
-        :return: 上传结果详情
-        """
-        db_name = db_name.lower()
-        table_name = table_name.lower()
-        result = {
-            'success': False,
-            'inserted_rows': 0,
-            'skipped_rows': 0,
-            'failed_rows': 0,
-            'tables_created': []
-        }
-        try:
-            # 标准化数据为流式迭代器
-            normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)  # 更小的chunk用于并发
-            # 推断或验证列类型
-            if set_typ is None:
-                first_chunk = next(iter(normalized_data))
-                set_typ = DataTypeInferrer.infer_types_from_data(first_chunk)
-                normalized_data = DataProcessor.normalize_data(data, chunk_size=2000)
-                logger.debug('自动推断数据类型', {'类型映射': set_typ})
-            # 将set_typ的键统一转为小写
-            set_typ = self.tran_set_typ_to_lower(set_typ)
-            # 确保数据库存在
-            self.table_mgr.ensure_database_exists(db_name)
-            # 创建线程锁用于表创建的线程安全
-            table_creation_lock = threading.Lock()
-            created_tables_set = set()
-            def process_chunk_worker(chunk_data):
-                """工作线程函数"""
-                try:
-                    if partition_by:
-                        # 分表处理
-                        partitioned_chunk = DataProcessor.partition_data_by_date(
-                            chunk_data, partition_date_column, partition_by
-                        )
-                        chunk_result = {
-                            'inserted_rows': 0,
-                            'skipped_rows': 0,
-                            'failed_rows': 0,
-                            'tables_created': []
-                        }
-                        for partition_suffix, partition_data in partitioned_chunk.items():
-                            partition_table_name = f"{table_name}_{partition_suffix}"
-                            table_key = f"{db_name}.{partition_table_name}"
-                            # 确保表存在（线程安全）
-                            with table_creation_lock:
-                                if table_key not in created_tables_set:
-                                    if not self.table_mgr.table_exists(db_name, partition_table_name):
-                                        self.table_mgr.create_table(db_name, partition_table_name, set_typ,
-                                                                   unique_keys=unique_keys, allow_null=allow_null)
-                                        chunk_result['tables_created'].append(table_key)
-                                    else:
-                                        self.table_mgr.ensure_system_columns(db_name, partition_table_name)
-                                    created_tables_set.add(table_key)
-                            # 准备并插入数据
-                            prepared_data = DataProcessor.prepare_data_for_insert(
-                                partition_data, set_typ, allow_null
-                            )
-                            inserted, skipped, failed = self.data_inserter.insert_data(
-                                db_name, partition_table_name, prepared_data, set_typ, update_on_duplicate
-                            )
-                            chunk_result['inserted_rows'] += inserted
-                            chunk_result['skipped_rows'] += skipped
-                            chunk_result['failed_rows'] += failed
-                    else:
-                        # 单表处理
-                        table_key = f"{db_name}.{table_name}"
-                        with table_creation_lock:
-                            if table_key not in created_tables_set:
-                                if not self.table_mgr.table_exists(db_name, table_name):
-                                    self.table_mgr.create_table(db_name, table_name, set_typ,
-                                                               unique_keys=unique_keys, allow_null=allow_null)
-                                    chunk_result = {'tables_created': [table_key]}
-                                else:
-                                    self.table_mgr.ensure_system_columns(db_name, table_name)
-                                    chunk_result = {'tables_created': []}
-                                created_tables_set.add(table_key)
-                            else:
-                                chunk_result = {'tables_created': []}
-                        prepared_chunk = DataProcessor.prepare_data_for_insert(
-                            chunk_data, set_typ, allow_null
-                        )
-                        inserted, skipped, failed = self.data_inserter.insert_data(
-                            db_name, table_name, prepared_chunk, set_typ, update_on_duplicate
-                        )
-                        chunk_result.update({
-                            'inserted_rows': inserted,
-                            'skipped_rows': skipped,
-                            'failed_rows': failed
-                        })
-                    return chunk_result
-                except Exception as e:
-                    logger.error('并发处理chunk失败', {'错误': str(e)})
-                    return {
-                        'inserted_rows': 0,
-                        'skipped_rows': 0,
-                        'failed_rows': len(chunk_data) if chunk_data else 0,
-                        'tables_created': []
-                    }
-            # 使用线程池执行并发处理
-            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-                # 提交所有任务
-                future_to_chunk = {}
-                for chunk in normalized_data:
-                    if chunk:
-                        future = executor.submit(process_chunk_worker, chunk)
-                        future_to_chunk[future] = len(chunk)
-                # 收集结果
-                for future in concurrent.futures.as_completed(future_to_chunk):
-                    chunk_result = future.result()
-                    result['inserted_rows'] += chunk_result['inserted_rows']
-                    result['skipped_rows'] += chunk_result['skipped_rows']
-                    result['failed_rows'] += chunk_result['failed_rows']
-                    result['tables_created'].extend(chunk_result['tables_created'])
-            # 去重tables_created
-            result['tables_created'] = list(set(result['tables_created']))
-            result['success'] = result['failed_rows'] == 0
-        except Exception as e:
-            logger.error('并发数据上传失败', {
-                '数据库': db_name,
-                '表名': table_name,
-                '错误': str(e)
-            })
-            result['success'] = False
-        return result
-    def tran_set_typ_to_lower(self, set_typ: Dict[str, str]) -> Dict[str, str]:
-        if not isinstance(set_typ, dict):
-            return set_typ
-        set_typ_lower = {}
-        for key, value in set_typ.items():
-            set_typ_lower[key.lower()] = value
-        return set_typ_lower
 # 使用示例
 if __name__ == '__main__':

{mdbq-4.2.8 → mdbq-4.2.10}/mdbq/redis/redis_cache.py RENAMED Viewed

@@ -879,4 +879,286 @@ class CacheManager:
 # 全局缓存管理器实例
-cache_manager = CacheManager()
+cache_manager = CacheManager()
+# ===== 装饰器功能 =====
+def flask_redis_cache(cache_key_func=None, ttl=1200, namespace="default",
+                data_validator=None, skip_cache_on_error=True):
+    """
+    Flask路由函数的Redis缓存装饰器
+    Args:
+        cache_key_func: 缓存键生成函数，接收请求数据作为参数，返回缓存键字符串
+                       如果为None，则使用默认的键生成策略
+        ttl: 缓存过期时间（秒），默认20分钟
+        namespace: 缓存命名空间，默认为"default"
+        data_validator: 数据验证函数，用于验证数据是否应该被缓存
+        skip_cache_on_error: 当缓存操作出错时是否跳过缓存，默认True
+    Usage:
+        @flask_redis_cache(
+            cache_key_func=lambda data: f"tables_{data.get('database', 'unknown')}",
+            ttl=1200,
+            namespace="sycm_tables"
+        )
+        def my_flask_route():
+            pass
+    """
+    def decorator(func):
+        import functools
+        import hashlib
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            # 导入Flask相关模块（延迟导入避免依赖问题）
+            try:
+                from flask import request, jsonify
+            except ImportError:
+                # 如果没有Flask环境，直接执行原函数
+                return func(*args, **kwargs)
+            # 获取缓存系统
+            cache_system = cache_manager.get_cache()
+            # 如果缓存系统不可用，直接执行原函数
+            if not cache_system:
+                return func(*args, **kwargs)
+            try:
+                # 获取请求数据用于生成缓存键
+                request_data = {}
+                if request.method == 'POST':
+                    try:
+                        request_data = request.get_json() or {}
+                    except Exception:
+                        request_data = {}
+                elif request.method == 'GET':
+                    request_data = dict(request.args)
+                # 生成缓存键
+                if cache_key_func:
+                    cache_key = cache_key_func(request_data)
+                else:
+                    # 默认缓存键生成策略
+                    func_name = func.__name__
+                    # 将请求数据转换为字符串并生成哈希
+                    data_str = str(sorted(request_data.items()))
+                    data_hash = hashlib.md5(data_str.encode()).hexdigest()[:8]
+                    cache_key = f"{func_name}_{data_hash}"
+                # 尝试从缓存获取数据
+                try:
+                    cached_result = cache_system.get(cache_key, namespace)
+                    if cached_result is not None:
+                        return jsonify(cached_result)
+                except Exception as e:
+                    if not skip_cache_on_error:
+                        raise
+                # 缓存未命中，执行原函数
+                result = func(*args, **kwargs)
+                # 如果结果是Flask Response对象，提取JSON数据进行缓存
+                if hasattr(result, 'get_json'):
+                    try:
+                        response_data = result.get_json()
+                        if response_data:
+                            # 使用安全缓存写入
+                            _safe_cache_set(
+                                cache_system=cache_system,
+                                cache_key=cache_key,
+                                response_data=response_data,
+                                ttl=ttl,
+                                namespace=namespace,
+                                data_validator=data_validator
+                            )
+                    except Exception as e:
+                        if not skip_cache_on_error:
+                            raise
+                elif isinstance(result, tuple) and len(result) == 2:
+                    # 处理 (response, status_code) 格式的返回值
+                    try:
+                        response_data, status_code = result
+                        if hasattr(response_data, 'get_json'):
+                            json_data = response_data.get_json()
+                        elif isinstance(response_data, dict):
+                            json_data = response_data
+                        else:
+                            json_data = None
+                        if json_data and status_code == 200:
+                            _safe_cache_set(
+                                cache_system=cache_system,
+                                cache_key=cache_key,
+                                response_data=json_data,
+                                ttl=ttl,
+                                namespace=namespace,
+                                data_validator=data_validator
+                            )
+                    except Exception as e:
+                        if not skip_cache_on_error:
+                            raise
+                return result
+            except Exception as e:
+                if not skip_cache_on_error:
+                    raise
+                return func(*args, **kwargs)
+        return wrapper
+    return decorator
+def function_redis_cache(cache_key_func=None, ttl=1800, namespace="default",
+                        skip_cache_on_error=True):
+    """
+    普通函数的Redis缓存装饰器
+    Args:
+        cache_key_func: 缓存键生成函数，接收函数参数作为输入，返回缓存键字符串
+                       如果为None，则使用默认的键生成策略
+        ttl: 缓存过期时间（秒），默认30分钟
+        namespace: 缓存命名空间，默认为"default"
+        skip_cache_on_error: 当缓存操作出错时是否跳过缓存，默认True
+    Usage:
+        @function_redis_cache(
+            cache_key_func=lambda _key, shop_name: f"cookies_{_key}_{shop_name}",
+            ttl=1800,
+            namespace="cookies_cache"
+        )
+        def my_function(_key, shop_name):
+            pass
+    """
+    def decorator(func):
+        import functools
+        import inspect
+        import hashlib
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            # 获取缓存系统
+            cache_system = cache_manager.get_cache()
+            # 如果缓存系统不可用，直接执行原函数
+            if not cache_system:
+                return func(*args, **kwargs)
+            try:
+                # 获取函数签名和参数
+                sig = inspect.signature(func)
+                bound_args = sig.bind(*args, **kwargs)
+                bound_args.apply_defaults()
+                # 生成缓存键
+                if cache_key_func:
+                    cache_key = cache_key_func(*args, **kwargs)
+                else:
+                    # 默认缓存键生成策略
+                    func_name = func.__name__
+                    # 将参数转换为字符串并生成哈希
+                    args_str = str(args) + str(sorted(kwargs.items()))
+                    args_hash = hashlib.md5(args_str.encode()).hexdigest()[:8]
+                    cache_key = f"{func_name}_{args_hash}"
+                # 尝试从缓存获取数据
+                try:
+                    cached_result = cache_system.get(cache_key, namespace)
+                    if cached_result is not None:
+                        return cached_result
+                except Exception as e:
+                    if not skip_cache_on_error:
+                        raise
+                # 缓存未命中，执行原函数
+                result = func(*args, **kwargs)
+                # 缓存结果（只缓存非空结果）
+                if result is not None and result != {} and result != []:
+                    try:
+                        cache_system.set(cache_key, result, ttl=ttl, namespace=namespace)
+                    except Exception as e:
+                        if not skip_cache_on_error:
+                            raise
+                return result
+            except Exception as e:
+                if not skip_cache_on_error:
+                    raise
+                return func(*args, **kwargs)
+        return wrapper
+    return decorator
+def _safe_cache_set(cache_system, cache_key, response_data, ttl, namespace,
+                   data_validator=None):
+    """
+    安全的缓存写入函数，只有数据有效时才写入缓存。
+    Args:
+        cache_system: 缓存系统实例
+        cache_key: 缓存键
+        response_data: 要缓存的响应数据
+        ttl: 缓存过期时间
+        namespace: 缓存命名空间
+        data_validator: 数据验证函数，返回True表示数据有效
+    Returns:
+        bool: 是否成功写入缓存
+    """
+    if not cache_system:
+        return False
+    # 默认验证逻辑：检查响应数据结构
+    if data_validator is None:
+        def default_validator(data):
+            if not isinstance(data, dict):
+                return False
+            # 更宽松的验证逻辑，支持多种响应格式
+            # 检查状态字段（支持多种成功状态格式）
+            status_ok = (
+                data.get('status') == 'success' or  # 新格式
+                data.get('code') == 0 or            # 旧格式
+                data.get('code') == 200             # HTTP状态码格式
+            )
+            if not status_ok:
+                return False
+            # 检查数据部分（支持多种数据字段名）
+            has_data_fields = (
+                'data' in data or           # 标准data字段
+                'logs' in data or           # 更新日志专用
+                'announcements' in data or  # 公告数据
+                'databases' in data or      # 数据库列表
+                'tables' in data or         # 表列表
+                'rows' in data or           # 数据行
+                'message' in data           # 包含消息即认为有数据
+            )
+            # 如果有数据字段，基本认为有效
+            return has_data_fields
+        data_validator = default_validator
+    # 验证数据
+    try:
+        is_valid = data_validator(response_data)
+    except Exception:
+        return False
+    if is_valid:
+        try:
+            cache_system.set(cache_key, response_data, ttl=ttl, namespace=namespace)
+            return True
+        except Exception:
+            return False
+    else:
+        return False

{mdbq-4.2.8 → mdbq-4.2.10}/mdbq.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: mdbq
-Version: 4.2.8
+Version: 4.2.10
 Home-page: https://pypi.org/project/mdbq
 Author: xigua,
 Author-email: 2587125111@qq.com