crawlo 1.4.6__py3-none-any.whl → 1.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +2 -1
- crawlo/__version__.py +1 -1
- crawlo/cli.py +2 -2
- crawlo/commands/check.py +1 -1
- crawlo/commands/help.py +5 -3
- crawlo/commands/list.py +1 -1
- crawlo/commands/run.py +49 -11
- crawlo/commands/stats.py +1 -1
- crawlo/config.py +12 -4
- crawlo/config_validator.py +1 -1
- crawlo/core/engine.py +20 -7
- crawlo/core/processor.py +1 -1
- crawlo/core/scheduler.py +4 -5
- crawlo/crawler.py +51 -10
- crawlo/downloader/__init__.py +7 -3
- crawlo/downloader/aiohttp_downloader.py +18 -18
- crawlo/downloader/cffi_downloader.py +5 -2
- crawlo/downloader/httpx_downloader.py +9 -3
- crawlo/downloader/hybrid_downloader.py +2 -2
- crawlo/downloader/playwright_downloader.py +38 -15
- crawlo/downloader/selenium_downloader.py +16 -2
- crawlo/event.py +42 -8
- crawlo/exceptions.py +157 -24
- crawlo/extension/__init__.py +10 -9
- crawlo/extension/health_check.py +7 -7
- crawlo/extension/log_interval.py +6 -6
- crawlo/extension/log_stats.py +2 -2
- crawlo/extension/logging_extension.py +4 -12
- crawlo/extension/memory_monitor.py +5 -5
- crawlo/extension/performance_profiler.py +5 -5
- crawlo/extension/request_recorder.py +6 -6
- crawlo/factories/base.py +1 -1
- crawlo/factories/crawler.py +61 -60
- crawlo/factories/utils.py +135 -0
- crawlo/filters/__init__.py +19 -2
- crawlo/filters/aioredis_filter.py +133 -49
- crawlo/filters/memory_filter.py +6 -21
- crawlo/framework.py +22 -8
- crawlo/initialization/built_in.py +24 -67
- crawlo/initialization/core.py +65 -19
- crawlo/initialization/phases.py +83 -2
- crawlo/initialization/registry.py +5 -7
- crawlo/initialization/utils.py +49 -0
- crawlo/logging/__init__.py +6 -10
- crawlo/logging/config.py +106 -22
- crawlo/logging/factory.py +12 -8
- crawlo/logging/manager.py +19 -27
- crawlo/middleware/__init__.py +72 -9
- crawlo/middleware/default_header.py +2 -2
- crawlo/middleware/download_delay.py +2 -2
- crawlo/middleware/middleware_manager.py +6 -6
- crawlo/middleware/offsite.py +2 -2
- crawlo/middleware/proxy.py +2 -2
- crawlo/middleware/request_ignore.py +4 -4
- crawlo/middleware/response_code.py +2 -2
- crawlo/middleware/response_filter.py +2 -2
- crawlo/middleware/retry.py +1 -1
- crawlo/mode_manager.py +38 -4
- crawlo/network/request.py +54 -26
- crawlo/network/response.py +69 -135
- crawlo/pipelines/__init__.py +40 -9
- crawlo/pipelines/base_pipeline.py +452 -0
- crawlo/pipelines/bloom_dedup_pipeline.py +4 -5
- crawlo/pipelines/console_pipeline.py +2 -2
- crawlo/pipelines/csv_pipeline.py +4 -4
- crawlo/pipelines/database_dedup_pipeline.py +4 -5
- crawlo/pipelines/json_pipeline.py +4 -4
- crawlo/pipelines/memory_dedup_pipeline.py +4 -5
- crawlo/pipelines/mongo_pipeline.py +23 -14
- crawlo/pipelines/mysql_pipeline.py +31 -39
- crawlo/pipelines/pipeline_manager.py +8 -8
- crawlo/pipelines/redis_dedup_pipeline.py +13 -14
- crawlo/project.py +1 -1
- crawlo/queue/__init__.py +10 -0
- crawlo/queue/queue_manager.py +79 -13
- crawlo/queue/redis_priority_queue.py +196 -47
- crawlo/settings/default_settings.py +16 -6
- crawlo/spider/__init__.py +6 -5
- crawlo/stats_collector.py +2 -2
- crawlo/task_manager.py +1 -1
- crawlo/templates/crawlo.cfg.tmpl +3 -3
- crawlo/templates/project/__init__.py.tmpl +1 -3
- crawlo/templates/project/items.py.tmpl +2 -6
- crawlo/templates/project/middlewares.py.tmpl +1 -1
- crawlo/templates/project/pipelines.py.tmpl +1 -2
- crawlo/templates/project/settings.py.tmpl +12 -10
- crawlo/templates/project/settings_distributed.py.tmpl +14 -13
- crawlo/templates/project/settings_gentle.py.tmpl +21 -23
- crawlo/templates/project/settings_high_performance.py.tmpl +21 -23
- crawlo/templates/project/settings_minimal.py.tmpl +10 -8
- crawlo/templates/project/settings_simple.py.tmpl +21 -23
- crawlo/templates/run.py.tmpl +1 -1
- crawlo/templates/spider/spider.py.tmpl +4 -12
- crawlo/templates/spiders_init.py.tmpl +3 -8
- crawlo/tools/__init__.py +0 -103
- crawlo/tools/scenario_adapter.py +1 -1
- crawlo/utils/__init__.py +25 -1
- crawlo/utils/batch_processor.py +23 -6
- crawlo/utils/config_manager.py +442 -0
- crawlo/utils/controlled_spider_mixin.py +1 -1
- crawlo/utils/db_helper.py +1 -1
- crawlo/utils/encoding_helper.py +190 -0
- crawlo/utils/error_handler.py +2 -2
- crawlo/utils/large_scale_helper.py +1 -1
- crawlo/utils/leak_detector.py +335 -0
- crawlo/utils/mongo_connection_pool.py +157 -0
- crawlo/utils/mysql_connection_pool.py +197 -0
- crawlo/utils/performance_monitor.py +1 -1
- crawlo/utils/redis_checker.py +91 -0
- crawlo/utils/redis_connection_pool.py +260 -70
- crawlo/utils/redis_key_validator.py +1 -1
- crawlo/utils/request.py +24 -2
- crawlo/utils/request_serializer.py +1 -1
- crawlo/utils/resource_manager.py +337 -0
- crawlo/utils/response_helper.py +113 -0
- crawlo/utils/selector_helper.py +3 -2
- crawlo/utils/singleton.py +70 -0
- crawlo/utils/spider_loader.py +1 -1
- crawlo/utils/text_helper.py +1 -1
- crawlo-1.4.8.dist-info/METADATA +831 -0
- {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/RECORD +131 -145
- tests/advanced_tools_example.py +10 -68
- tests/distributed_dedup_test.py +467 -0
- tests/monitor_redis_dedup.sh +72 -0
- tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -4
- tests/simple_cli_test.py +55 -0
- tests/test_cli_arguments.py +119 -0
- tests/test_dedup_fix.py +10 -10
- crawlo/logging/async_handler.py +0 -181
- crawlo/logging/monitor.py +0 -153
- crawlo/logging/sampler.py +0 -167
- crawlo/tools/authenticated_proxy.py +0 -241
- crawlo/tools/data_formatter.py +0 -226
- crawlo/tools/data_validator.py +0 -181
- crawlo/tools/encoding_converter.py +0 -127
- crawlo/tools/network_diagnostic.py +0 -365
- crawlo/tools/request_tools.py +0 -83
- crawlo/tools/retry_mechanism.py +0 -224
- crawlo/utils/env_config.py +0 -143
- crawlo/utils/large_scale_config.py +0 -287
- crawlo/utils/log.py +0 -80
- crawlo/utils/system.py +0 -11
- crawlo/utils/tools.py +0 -5
- crawlo/utils/url.py +0 -40
- crawlo-1.4.6.dist-info/METADATA +0 -329
- tests/env_config_example.py +0 -134
- tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +0 -162
- tests/test_authenticated_proxy.py +0 -142
- tests/test_comprehensive.py +0 -147
- tests/test_dynamic_downloaders_proxy.py +0 -125
- tests/test_dynamic_proxy.py +0 -93
- tests/test_dynamic_proxy_config.py +0 -147
- tests/test_dynamic_proxy_real.py +0 -110
- tests/test_env_config.py +0 -122
- tests/test_framework_env_usage.py +0 -104
- tests/test_large_scale_config.py +0 -113
- tests/test_proxy_api.py +0 -265
- tests/test_real_scenario_proxy.py +0 -196
- tests/tools_example.py +0 -261
- {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/WHEEL +0 -0
- {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
MongoDB 连接池管理器
|
|
5
|
+
==================
|
|
6
|
+
|
|
7
|
+
提供单例模式的MongoDB连接池,确保多个爬虫共享同一个连接池,
|
|
8
|
+
避免重复创建连接池导致的资源浪费。
|
|
9
|
+
|
|
10
|
+
特点:
|
|
11
|
+
1. 单例模式 - 全局唯一的连接池实例
|
|
12
|
+
2. 线程安全 - 使用异步锁保护初始化过程
|
|
13
|
+
3. 配置隔离 - 支持不同的数据库配置创建不同的连接池
|
|
14
|
+
4. 自动清理 - 支持资源清理和重置
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
from typing import Dict, Optional, Any
|
|
19
|
+
from motor.motor_asyncio import AsyncIOMotorClient
|
|
20
|
+
from crawlo.logging import get_logger
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MongoConnectionPoolManager:
|
|
24
|
+
"""MongoDB 连接池管理器(单例模式)"""
|
|
25
|
+
|
|
26
|
+
_instances: Dict[str, 'MongoConnectionPoolManager'] = {}
|
|
27
|
+
_lock = asyncio.Lock()
|
|
28
|
+
|
|
29
|
+
def __init__(self, pool_key: str):
|
|
30
|
+
"""
|
|
31
|
+
初始化连接池管理器
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
pool_key: 连接池唯一标识
|
|
35
|
+
"""
|
|
36
|
+
self.pool_key = pool_key
|
|
37
|
+
self.client: Optional[AsyncIOMotorClient] = None
|
|
38
|
+
self._client_lock = asyncio.Lock()
|
|
39
|
+
self._client_initialized = False
|
|
40
|
+
self._config: Dict[str, Any] = {}
|
|
41
|
+
self.logger = get_logger(f'MongoPool.{pool_key}')
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
async def get_client(
|
|
45
|
+
cls,
|
|
46
|
+
mongo_uri: str = 'mongodb://localhost:27017',
|
|
47
|
+
db_name: str = 'crawlo',
|
|
48
|
+
max_pool_size: int = 100,
|
|
49
|
+
min_pool_size: int = 10,
|
|
50
|
+
connect_timeout_ms: int = 5000,
|
|
51
|
+
socket_timeout_ms: int = 30000,
|
|
52
|
+
**kwargs
|
|
53
|
+
) -> AsyncIOMotorClient:
|
|
54
|
+
"""
|
|
55
|
+
获取 MongoDB 客户端实例(单例模式)
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
mongo_uri: MongoDB 连接 URI
|
|
59
|
+
db_name: 数据库名
|
|
60
|
+
max_pool_size: 最大连接池大小
|
|
61
|
+
min_pool_size: 最小连接池大小
|
|
62
|
+
connect_timeout_ms: 连接超时(毫秒)
|
|
63
|
+
socket_timeout_ms: Socket 超时(毫秒)
|
|
64
|
+
**kwargs: 其他连接参数
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
MongoDB 客户端实例
|
|
68
|
+
"""
|
|
69
|
+
# 生成连接池唯一标识
|
|
70
|
+
pool_key = f"{mongo_uri}:{db_name}"
|
|
71
|
+
|
|
72
|
+
async with cls._lock:
|
|
73
|
+
if pool_key not in cls._instances:
|
|
74
|
+
instance = cls(pool_key)
|
|
75
|
+
instance._config = {
|
|
76
|
+
'mongo_uri': mongo_uri,
|
|
77
|
+
'db_name': db_name,
|
|
78
|
+
'max_pool_size': max_pool_size,
|
|
79
|
+
'min_pool_size': min_pool_size,
|
|
80
|
+
'connect_timeout_ms': connect_timeout_ms,
|
|
81
|
+
'socket_timeout_ms': socket_timeout_ms,
|
|
82
|
+
**kwargs
|
|
83
|
+
}
|
|
84
|
+
cls._instances[pool_key] = instance
|
|
85
|
+
instance.logger.info(
|
|
86
|
+
f"创建新的 MongoDB 连接池管理器: {pool_key} "
|
|
87
|
+
f"(minPoolSize={min_pool_size}, maxPoolSize={max_pool_size})"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
instance = cls._instances[pool_key]
|
|
91
|
+
await instance._ensure_client()
|
|
92
|
+
return instance.client
|
|
93
|
+
|
|
94
|
+
async def _ensure_client(self):
|
|
95
|
+
"""确保客户端已初始化(线程安全)"""
|
|
96
|
+
if self._client_initialized and self.client:
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
async with self._client_lock:
|
|
100
|
+
if not self._client_initialized:
|
|
101
|
+
try:
|
|
102
|
+
self.client = AsyncIOMotorClient(
|
|
103
|
+
self._config['mongo_uri'],
|
|
104
|
+
maxPoolSize=self._config['max_pool_size'],
|
|
105
|
+
minPoolSize=self._config['min_pool_size'],
|
|
106
|
+
connectTimeoutMS=self._config['connect_timeout_ms'],
|
|
107
|
+
socketTimeoutMS=self._config['socket_timeout_ms']
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
self._client_initialized = True
|
|
111
|
+
self.logger.info(
|
|
112
|
+
f"MongoDB 客户端初始化成功: {self.pool_key} "
|
|
113
|
+
f"(minPoolSize={self._config['min_pool_size']}, "
|
|
114
|
+
f"maxPoolSize={self._config['max_pool_size']})"
|
|
115
|
+
)
|
|
116
|
+
except Exception as e:
|
|
117
|
+
self.logger.error(f"MongoDB 客户端初始化失败: {e}")
|
|
118
|
+
self._client_initialized = False
|
|
119
|
+
self.client = None
|
|
120
|
+
raise
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
async def close_all_clients(cls):
|
|
124
|
+
"""关闭所有 MongoDB 客户端"""
|
|
125
|
+
logger = get_logger('MongoPool')
|
|
126
|
+
logger.info(f"开始关闭所有 MongoDB 客户端,共 {len(cls._instances)} 个")
|
|
127
|
+
|
|
128
|
+
for pool_key, instance in cls._instances.items():
|
|
129
|
+
try:
|
|
130
|
+
if instance.client:
|
|
131
|
+
logger.info(f"关闭 MongoDB 客户端: {pool_key}")
|
|
132
|
+
instance.client.close()
|
|
133
|
+
logger.info(f"MongoDB 客户端已关闭: {pool_key}")
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logger.error(f"关闭 MongoDB 客户端 {pool_key} 时发生错误: {e}")
|
|
136
|
+
|
|
137
|
+
cls._instances.clear()
|
|
138
|
+
logger.info("所有 MongoDB 客户端已关闭")
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def get_pool_stats(cls) -> Dict[str, Any]:
|
|
142
|
+
"""获取所有连接池的统计信息"""
|
|
143
|
+
stats = {
|
|
144
|
+
'total_pools': len(cls._instances),
|
|
145
|
+
'pools': {}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
for pool_key, instance in cls._instances.items():
|
|
149
|
+
if instance.client:
|
|
150
|
+
stats['pools'][pool_key] = {
|
|
151
|
+
'uri': instance._config.get('mongo_uri', 'unknown'),
|
|
152
|
+
'db_name': instance._config.get('db_name', 'unknown'),
|
|
153
|
+
'min_pool_size': instance._config.get('min_pool_size', 'unknown'),
|
|
154
|
+
'max_pool_size': instance._config.get('max_pool_size', 'unknown')
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return stats
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
MySQL 连接池管理器
|
|
5
|
+
================
|
|
6
|
+
|
|
7
|
+
提供单例模式的MySQL连接池,确保多个爬虫共享同一个连接池,
|
|
8
|
+
避免重复创建连接池导致的资源浪费。
|
|
9
|
+
|
|
10
|
+
特点:
|
|
11
|
+
1. 单例模式 - 全局唯一的连接池实例
|
|
12
|
+
2. 线程安全 - 使用异步锁保护初始化过程
|
|
13
|
+
3. 配置隔离 - 支持不同的数据库配置创建不同的连接池
|
|
14
|
+
4. 自动清理 - 支持资源清理和重置
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import aiomysql
|
|
19
|
+
from asyncmy import create_pool as asyncmy_create_pool
|
|
20
|
+
from typing import Dict, Optional, Any
|
|
21
|
+
from crawlo.logging import get_logger
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MySQLConnectionPoolManager:
|
|
25
|
+
"""MySQL 连接池管理器(单例模式)"""
|
|
26
|
+
|
|
27
|
+
_instances: Dict[str, 'MySQLConnectionPoolManager'] = {}
|
|
28
|
+
_lock = asyncio.Lock()
|
|
29
|
+
|
|
30
|
+
def __init__(self, pool_key: str):
|
|
31
|
+
"""
|
|
32
|
+
初始化连接池管理器
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
pool_key: 连接池唯一标识
|
|
36
|
+
"""
|
|
37
|
+
self.pool_key = pool_key
|
|
38
|
+
self.pool = None
|
|
39
|
+
self._pool_lock = asyncio.Lock()
|
|
40
|
+
self._pool_initialized = False
|
|
41
|
+
self._config: Dict[str, Any] = {}
|
|
42
|
+
self._pool_type: str = 'asyncmy' # 默认使用 asyncmy
|
|
43
|
+
self.logger = get_logger(f'MySQLPool.{pool_key}')
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
async def get_pool(
|
|
47
|
+
cls,
|
|
48
|
+
pool_type: str = 'asyncmy',
|
|
49
|
+
host: str = 'localhost',
|
|
50
|
+
port: int = 3306,
|
|
51
|
+
user: str = 'root',
|
|
52
|
+
password: str = '',
|
|
53
|
+
db: str = 'crawlo',
|
|
54
|
+
minsize: int = 3,
|
|
55
|
+
maxsize: int = 10,
|
|
56
|
+
**kwargs
|
|
57
|
+
):
|
|
58
|
+
"""
|
|
59
|
+
获取连接池实例(单例模式)
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
pool_type: 连接池类型 ('asyncmy' 或 'aiomysql')
|
|
63
|
+
host: 数据库主机
|
|
64
|
+
port: 数据库端口
|
|
65
|
+
user: 数据库用户名
|
|
66
|
+
password: 数据库密码
|
|
67
|
+
db: 数据库名
|
|
68
|
+
minsize: 最小连接数
|
|
69
|
+
maxsize: 最大连接数
|
|
70
|
+
**kwargs: 其他连接参数
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
连接池实例
|
|
74
|
+
"""
|
|
75
|
+
# 生成连接池唯一标识
|
|
76
|
+
pool_key = f"{pool_type}:{host}:{port}:{db}"
|
|
77
|
+
|
|
78
|
+
async with cls._lock:
|
|
79
|
+
if pool_key not in cls._instances:
|
|
80
|
+
instance = cls(pool_key)
|
|
81
|
+
instance._pool_type = pool_type
|
|
82
|
+
instance._config = {
|
|
83
|
+
'host': host,
|
|
84
|
+
'port': port,
|
|
85
|
+
'user': user,
|
|
86
|
+
'password': password,
|
|
87
|
+
'db': db,
|
|
88
|
+
'minsize': minsize,
|
|
89
|
+
'maxsize': maxsize,
|
|
90
|
+
**kwargs
|
|
91
|
+
}
|
|
92
|
+
cls._instances[pool_key] = instance
|
|
93
|
+
instance.logger.info(
|
|
94
|
+
f"创建新的连接池管理器: {pool_key} "
|
|
95
|
+
f"(type={pool_type}, minsize={minsize}, maxsize={maxsize})"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
instance = cls._instances[pool_key]
|
|
99
|
+
await instance._ensure_pool()
|
|
100
|
+
return instance.pool
|
|
101
|
+
|
|
102
|
+
async def _ensure_pool(self):
|
|
103
|
+
"""确保连接池已初始化(线程安全)"""
|
|
104
|
+
if self._pool_initialized:
|
|
105
|
+
# 检查连接池是否仍然有效
|
|
106
|
+
if self.pool and hasattr(self.pool, 'closed') and not self.pool.closed:
|
|
107
|
+
return
|
|
108
|
+
else:
|
|
109
|
+
self.logger.warning("连接池已初始化但无效,重新初始化")
|
|
110
|
+
|
|
111
|
+
async with self._pool_lock:
|
|
112
|
+
if not self._pool_initialized:
|
|
113
|
+
try:
|
|
114
|
+
if self._pool_type == 'asyncmy':
|
|
115
|
+
self.pool = await self._create_asyncmy_pool()
|
|
116
|
+
elif self._pool_type == 'aiomysql':
|
|
117
|
+
self.pool = await self._create_aiomysql_pool()
|
|
118
|
+
else:
|
|
119
|
+
raise ValueError(f"不支持的连接池类型: {self._pool_type}")
|
|
120
|
+
|
|
121
|
+
self._pool_initialized = True
|
|
122
|
+
self.logger.info(
|
|
123
|
+
f"连接池初始化成功: {self.pool_key} "
|
|
124
|
+
f"(minsize={self._config['minsize']}, maxsize={self._config['maxsize']})"
|
|
125
|
+
)
|
|
126
|
+
except Exception as e:
|
|
127
|
+
self.logger.error(f"连接池初始化失败: {e}")
|
|
128
|
+
self._pool_initialized = False
|
|
129
|
+
self.pool = None
|
|
130
|
+
raise
|
|
131
|
+
|
|
132
|
+
async def _create_asyncmy_pool(self):
|
|
133
|
+
"""创建 asyncmy 连接池"""
|
|
134
|
+
return await asyncmy_create_pool(
|
|
135
|
+
host=self._config['host'],
|
|
136
|
+
port=self._config['port'],
|
|
137
|
+
user=self._config['user'],
|
|
138
|
+
password=self._config['password'],
|
|
139
|
+
db=self._config['db'],
|
|
140
|
+
minsize=self._config['minsize'],
|
|
141
|
+
maxsize=self._config['maxsize'],
|
|
142
|
+
echo=self._config.get('echo', False)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
async def _create_aiomysql_pool(self):
|
|
146
|
+
"""创建 aiomysql 连接池"""
|
|
147
|
+
return await aiomysql.create_pool(
|
|
148
|
+
host=self._config['host'],
|
|
149
|
+
port=self._config['port'],
|
|
150
|
+
user=self._config['user'],
|
|
151
|
+
password=self._config['password'],
|
|
152
|
+
db=self._config['db'],
|
|
153
|
+
minsize=self._config['minsize'],
|
|
154
|
+
maxsize=self._config['maxsize'],
|
|
155
|
+
cursorclass=aiomysql.DictCursor,
|
|
156
|
+
autocommit=False
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
async def close_all_pools(cls):
|
|
161
|
+
"""关闭所有连接池"""
|
|
162
|
+
logger = get_logger('MySQLPool')
|
|
163
|
+
logger.info(f"开始关闭所有连接池,共 {len(cls._instances)} 个")
|
|
164
|
+
|
|
165
|
+
for pool_key, instance in cls._instances.items():
|
|
166
|
+
try:
|
|
167
|
+
if instance.pool:
|
|
168
|
+
logger.info(f"关闭连接池: {pool_key}")
|
|
169
|
+
instance.pool.close()
|
|
170
|
+
await instance.pool.wait_closed()
|
|
171
|
+
logger.info(f"连接池已关闭: {pool_key}")
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(f"关闭连接池 {pool_key} 时发生错误: {e}")
|
|
174
|
+
|
|
175
|
+
cls._instances.clear()
|
|
176
|
+
logger.info("所有连接池已关闭")
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
def get_pool_stats(cls) -> Dict[str, Any]:
|
|
180
|
+
"""获取所有连接池的统计信息"""
|
|
181
|
+
stats = {
|
|
182
|
+
'total_pools': len(cls._instances),
|
|
183
|
+
'pools': {}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
for pool_key, instance in cls._instances.items():
|
|
187
|
+
if instance.pool:
|
|
188
|
+
stats['pools'][pool_key] = {
|
|
189
|
+
'type': instance._pool_type,
|
|
190
|
+
'size': getattr(instance.pool, 'size', 'unknown'),
|
|
191
|
+
'minsize': instance._config.get('minsize', 'unknown'),
|
|
192
|
+
'maxsize': instance._config.get('maxsize', 'unknown'),
|
|
193
|
+
'host': instance._config.get('host', 'unknown'),
|
|
194
|
+
'db': instance._config.get('db', 'unknown')
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return stats
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Redis可用性检查工具
|
|
5
|
+
==================
|
|
6
|
+
提供Redis连接可用性检测功能,用于自动模式下的运行时决策。
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Optional
|
|
12
|
+
import redis.asyncio as aioredis
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RedisChecker:
|
|
16
|
+
"""Redis可用性检查器"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, redis_url: str, timeout: float = 5.0):
|
|
19
|
+
"""
|
|
20
|
+
初始化Redis检查器
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
redis_url: Redis连接URL
|
|
24
|
+
timeout: 连接超时时间(秒)
|
|
25
|
+
"""
|
|
26
|
+
self.redis_url = redis_url
|
|
27
|
+
self.timeout = timeout
|
|
28
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
|
29
|
+
|
|
30
|
+
async def is_redis_available(self) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
检查Redis是否可用
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Redis是否可用
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
# 创建Redis连接
|
|
39
|
+
redis_client = aioredis.from_url(
|
|
40
|
+
self.redis_url,
|
|
41
|
+
socket_connect_timeout=self.timeout,
|
|
42
|
+
socket_timeout=self.timeout,
|
|
43
|
+
retry_on_timeout=False
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# 尝试执行ping命令
|
|
47
|
+
await asyncio.wait_for(redis_client.ping(), timeout=self.timeout)
|
|
48
|
+
|
|
49
|
+
# 关闭连接
|
|
50
|
+
await redis_client.close()
|
|
51
|
+
|
|
52
|
+
self.logger.debug(f"Redis连接测试成功: {self.redis_url}")
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
except asyncio.TimeoutError:
|
|
56
|
+
self.logger.warning(f"Redis连接超时: {self.redis_url}")
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
self.logger.warning(f"Redis连接失败: {self.redis_url} - {e}")
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
async def check_redis_availability(redis_url: str, timeout: float = 5.0) -> bool:
|
|
65
|
+
"""
|
|
66
|
+
便捷函数:检查Redis是否可用
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
redis_url: Redis连接URL
|
|
70
|
+
timeout: 连接超时时间(秒)
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Redis是否可用
|
|
74
|
+
"""
|
|
75
|
+
checker = RedisChecker(redis_url, timeout)
|
|
76
|
+
return await checker.is_redis_available()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# 便捷函数
|
|
80
|
+
async def is_redis_available(redis_url: str, timeout: float = 5.0) -> bool:
|
|
81
|
+
"""
|
|
82
|
+
检查Redis是否可用
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
redis_url: Redis连接URL
|
|
86
|
+
timeout: 连接超时时间(秒)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Redis是否可用
|
|
90
|
+
"""
|
|
91
|
+
return await RedisChecker.check_redis_availability(redis_url, timeout)
|