crawlo 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__version__.py +1 -1
- crawlo/cli.py +16 -0
- crawlo/commands/check.py +69 -69
- crawlo/commands/genspider.py +25 -25
- crawlo/commands/help.py +4 -4
- crawlo/commands/list.py +23 -23
- crawlo/commands/run.py +34 -34
- crawlo/commands/startproject.py +35 -36
- crawlo/commands/stats.py +21 -21
- crawlo/commands/utils.py +4 -4
- crawlo/config.py +4 -4
- crawlo/config_validator.py +1 -2
- crawlo/data/__init__.py +6 -0
- crawlo/data/user_agents.py +108 -0
- crawlo/downloader/hybrid_downloader.py +0 -1
- crawlo/downloader/selenium_downloader.py +1 -1
- crawlo/extension/health_check.py +2 -2
- crawlo/items/fields.py +0 -1
- crawlo/middleware/offsite.py +0 -1
- crawlo/middleware/proxy.py +3 -2
- crawlo/middleware/request_ignore.py +0 -1
- crawlo/middleware/response_code.py +0 -1
- crawlo/middleware/response_filter.py +0 -1
- crawlo/middleware/retry.py +1 -1
- crawlo/mode_manager.py +3 -3
- crawlo/pipelines/database_dedup_pipeline.py +1 -3
- crawlo/pipelines/memory_dedup_pipeline.py +2 -2
- crawlo/pipelines/mysql_pipeline.py +4 -3
- crawlo/pipelines/redis_dedup_pipeline.py +2 -4
- crawlo/project.py +2 -2
- crawlo/subscriber.py +1 -2
- crawlo/templates/project/settings.py.tmpl +1 -3
- crawlo/templates/project/settings_distributed.py.tmpl +2 -0
- crawlo/utils/batch_processor.py +2 -3
- crawlo/utils/controlled_spider_mixin.py +1 -1
- crawlo/utils/enhanced_error_handler.py +3 -6
- crawlo/utils/env_config.py +1 -1
- crawlo/utils/error_handler.py +2 -4
- crawlo/utils/large_scale_helper.py +2 -1
- crawlo/utils/performance_monitor.py +5 -4
- crawlo/utils/redis_connection_pool.py +4 -4
- crawlo/utils/redis_key_validator.py +1 -2
- crawlo/utils/request_serializer.py +1 -2
- crawlo/utils/spider_loader.py +0 -1
- {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/METADATA +1 -1
- {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/RECORD +49 -47
- {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/WHEEL +0 -0
- {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/entry_points.txt +0 -0
- {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
User-Agent列表
|
|
5
|
+
包含各种设备和浏览器的User-Agent字符串,用于爬虫伪装
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# 桌面浏览器User-Agent
|
|
9
|
+
DESKTOP_USER_AGENTS = [
|
|
10
|
+
# Chrome
|
|
11
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
12
|
+
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
13
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
14
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
15
|
+
|
|
16
|
+
# Firefox
|
|
17
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
|
|
18
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:125.0) Gecko/20100101 Firefox/125.0",
|
|
19
|
+
"Mozilla/5.0 (X11; Linux i686; rv:125.0) Gecko/20100101 Firefox/125.0",
|
|
20
|
+
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0",
|
|
21
|
+
|
|
22
|
+
# Safari
|
|
23
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15",
|
|
24
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15",
|
|
25
|
+
|
|
26
|
+
# Edge
|
|
27
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.2478.80",
|
|
28
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/123.0.2420.40",
|
|
29
|
+
|
|
30
|
+
# Internet Explorer
|
|
31
|
+
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# 移动设备User-Agent
|
|
35
|
+
MOBILE_USER_AGENTS = [
|
|
36
|
+
# iPhone
|
|
37
|
+
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1",
|
|
38
|
+
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
|
|
39
|
+
|
|
40
|
+
# iPad
|
|
41
|
+
"Mozilla/5.0 (iPad; CPU OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1",
|
|
42
|
+
"Mozilla/5.0 (iPad; CPU OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
|
|
43
|
+
|
|
44
|
+
# Android
|
|
45
|
+
"Mozilla/5.0 (Linux; Android 14; SM-S911B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
|
|
46
|
+
"Mozilla/5.0 (Linux; Android 13; SM-S908B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
|
|
47
|
+
"Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
|
|
48
|
+
"Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
|
|
49
|
+
|
|
50
|
+
# Android平板
|
|
51
|
+
"Mozilla/5.0 (Linux; Android 13; SM-X906C Build/TP1A.220624.014; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/124.0.6367.118 Safari/537.36",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
# 爬虫/机器人User-Agent (用于测试)
|
|
55
|
+
BOT_USER_AGENTS = [
|
|
56
|
+
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
|
|
57
|
+
"Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)",
|
|
58
|
+
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
|
|
59
|
+
"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
# 所有User-Agent的组合列表
|
|
63
|
+
ALL_USER_AGENTS = DESKTOP_USER_AGENTS + MOBILE_USER_AGENTS
|
|
64
|
+
|
|
65
|
+
# 按设备类型分类的User-Agent字典
|
|
66
|
+
USER_AGENTS_BY_TYPE = {
|
|
67
|
+
"desktop": DESKTOP_USER_AGENTS,
|
|
68
|
+
"mobile": MOBILE_USER_AGENTS,
|
|
69
|
+
"bot": BOT_USER_AGENTS,
|
|
70
|
+
"all": ALL_USER_AGENTS,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
def get_user_agents(device_type="all"):
|
|
74
|
+
"""
|
|
75
|
+
获取指定类型的User-Agent列表
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
device_type (str): 设备类型,可选值: "desktop", "mobile", "bot", "all"
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
list: User-Agent字符串列表
|
|
82
|
+
"""
|
|
83
|
+
return USER_AGENTS_BY_TYPE.get(device_type, ALL_USER_AGENTS)
|
|
84
|
+
|
|
85
|
+
def get_random_user_agent(device_type="all"):
|
|
86
|
+
"""
|
|
87
|
+
获取随机User-Agent
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
device_type (str): 设备类型,可选值: "desktop", "mobile", "bot", "all"
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
str: 随机User-Agent字符串
|
|
94
|
+
"""
|
|
95
|
+
import random
|
|
96
|
+
user_agents = get_user_agents(device_type)
|
|
97
|
+
return random.choice(user_agents) if user_agents else ""
|
|
98
|
+
|
|
99
|
+
# 导出常用的User-Agent列表
|
|
100
|
+
__all__ = [
|
|
101
|
+
"DESKTOP_USER_AGENTS",
|
|
102
|
+
"MOBILE_USER_AGENTS",
|
|
103
|
+
"BOT_USER_AGENTS",
|
|
104
|
+
"ALL_USER_AGENTS",
|
|
105
|
+
"USER_AGENTS_BY_TYPE",
|
|
106
|
+
"get_user_agents",
|
|
107
|
+
"get_random_user_agent"
|
|
108
|
+
]
|
crawlo/extension/health_check.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
# -*- coding:UTF-8 -*-
|
|
3
3
|
import asyncio
|
|
4
|
+
from datetime import datetime
|
|
4
5
|
from typing import Any, Optional, Dict
|
|
5
|
-
from datetime import datetime, timedelta
|
|
6
6
|
|
|
7
|
-
from crawlo.utils.log import get_logger
|
|
8
7
|
from crawlo.event import spider_opened, spider_closed, response_received, request_scheduled
|
|
8
|
+
from crawlo.utils.log import get_logger
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class HealthCheckExtension:
|
crawlo/items/fields.py
CHANGED
crawlo/middleware/offsite.py
CHANGED
crawlo/middleware/proxy.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
# -*- coding: UTF-8 -*-
|
|
3
|
+
import time
|
|
3
4
|
import asyncio
|
|
4
5
|
import socket
|
|
5
|
-
from typing import Optional, Dict, Any, Callable, Union, TYPE_CHECKING, List
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
|
-
import
|
|
7
|
+
from typing import Optional, Dict, Any, Callable, Union, TYPE_CHECKING, List
|
|
8
|
+
|
|
8
9
|
|
|
9
10
|
from crawlo import Request, Response
|
|
10
11
|
from crawlo.exceptions import NotConfiguredError
|
crawlo/middleware/retry.py
CHANGED
crawlo/mode_manager.py
CHANGED
|
@@ -10,10 +10,10 @@
|
|
|
10
10
|
2. distributed - 分布式模式
|
|
11
11
|
3. auto - 自动检测模式
|
|
12
12
|
"""
|
|
13
|
-
|
|
14
|
-
from typing import Dict, Any, Optional
|
|
15
|
-
from enum import Enum
|
|
16
13
|
import os
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import Dict, Any, Optional
|
|
16
|
+
|
|
17
17
|
from crawlo.utils.log import get_logger
|
|
18
18
|
|
|
19
19
|
|
|
@@ -11,15 +11,13 @@
|
|
|
11
11
|
- 适用性广: 支持多种数据库后端
|
|
12
12
|
- 可扩展: 支持自定义表结构和字段
|
|
13
13
|
"""
|
|
14
|
-
|
|
15
14
|
import hashlib
|
|
16
|
-
from typing import Dict, Any, Optional
|
|
17
15
|
import aiomysql
|
|
18
16
|
|
|
19
17
|
from crawlo import Item
|
|
18
|
+
from crawlo.exceptions import DropItem
|
|
20
19
|
from crawlo.spider import Spider
|
|
21
20
|
from crawlo.utils.log import get_logger
|
|
22
|
-
from crawlo.exceptions import DropItem
|
|
23
21
|
|
|
24
22
|
|
|
25
23
|
class DatabaseDedupPipeline:
|
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
import hashlib
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Set
|
|
17
17
|
|
|
18
18
|
from crawlo import Item
|
|
19
|
+
from crawlo.exceptions import DropItem
|
|
19
20
|
from crawlo.spider import Spider
|
|
20
21
|
from crawlo.utils.log import get_logger
|
|
21
|
-
from crawlo.exceptions import DropItem
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class MemoryDedupPipeline:
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
import asyncio
|
|
3
3
|
import aiomysql
|
|
4
|
-
from typing import Optional, List, Dict
|
|
5
4
|
from asyncmy import create_pool
|
|
6
|
-
from
|
|
5
|
+
from typing import Optional, List, Dict
|
|
6
|
+
|
|
7
7
|
from crawlo.exceptions import ItemDiscard
|
|
8
|
-
from crawlo.utils.db_helper import make_insert_sql, make_batch_sql
|
|
8
|
+
from crawlo.utils.db_helper import make_insert_sql, make_batch_sql
|
|
9
|
+
from crawlo.utils.log import get_logger
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class AsyncmyMySQLPipeline:
|
|
@@ -11,15 +11,13 @@
|
|
|
11
11
|
- 可配置: 支持自定义 Redis 连接参数
|
|
12
12
|
- 容错设计: 网络异常时不会丢失数据
|
|
13
13
|
"""
|
|
14
|
-
|
|
14
|
+
import redis
|
|
15
15
|
import hashlib
|
|
16
16
|
from typing import Optional
|
|
17
17
|
|
|
18
|
-
import redis
|
|
19
|
-
|
|
20
18
|
from crawlo import Item
|
|
21
|
-
from crawlo.exceptions import DropItem
|
|
22
19
|
from crawlo.spider import Spider
|
|
20
|
+
from crawlo.exceptions import DropItem
|
|
23
21
|
from crawlo.utils.log import get_logger
|
|
24
22
|
|
|
25
23
|
|
crawlo/project.py
CHANGED
|
@@ -14,10 +14,10 @@ import sys
|
|
|
14
14
|
import configparser
|
|
15
15
|
from importlib import import_module
|
|
16
16
|
from inspect import iscoroutinefunction
|
|
17
|
-
from typing import Callable, Optional
|
|
17
|
+
from typing import Callable, Optional
|
|
18
18
|
|
|
19
|
-
from crawlo.utils.log import get_logger
|
|
20
19
|
from crawlo.settings.setting_manager import SettingManager
|
|
20
|
+
from crawlo.utils.log import get_logger
|
|
21
21
|
|
|
22
22
|
logger = get_logger(__name__)
|
|
23
23
|
|
crawlo/subscriber.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
# -*- coding:UTF-8 -*-
|
|
3
3
|
import asyncio
|
|
4
|
-
import weakref
|
|
5
4
|
from collections import defaultdict
|
|
6
5
|
from inspect import iscoroutinefunction
|
|
7
|
-
from typing import Dict,
|
|
6
|
+
from typing import Dict, Callable, Coroutine, Any, TypeAlias, List, Tuple
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
class ReceiverTypeError(TypeError):
|
|
@@ -146,8 +146,6 @@ MONGO_USE_BATCH = False # 是否启用批量插入
|
|
|
146
146
|
REQUEST_DIR = '.'
|
|
147
147
|
|
|
148
148
|
# 根据运行模式自动选择去重管道
|
|
149
|
-
# 单机模式默认使用内存去重管道
|
|
150
|
-
# 分布式模式默认使用Redis去重管道
|
|
151
149
|
if RUN_MODE == 'distributed':
|
|
152
150
|
# 分布式模式下默认使用Redis去重管道
|
|
153
151
|
DEFAULT_DEDUP_PIPELINE = 'crawlo.pipelines.redis_dedup_pipeline.RedisDedupPipeline'
|
|
@@ -157,7 +155,6 @@ else:
|
|
|
157
155
|
|
|
158
156
|
# 去重过滤器(推荐分布式项目使用 Redis 过滤器)
|
|
159
157
|
FILTER_CLASS = 'crawlo.filters.memory_filter.MemoryFilter'
|
|
160
|
-
# FILTER_CLASS = 'crawlo.filters.aioredis_filter.AioRedisFilter' # 分布式去重
|
|
161
158
|
|
|
162
159
|
# --- Redis 配置(用于分布式去重和队列) ---
|
|
163
160
|
REDIS_HOST = os.getenv('REDIS_HOST', '127.0.0.1')
|
|
@@ -190,6 +187,7 @@ MIDDLEWARES = [
|
|
|
190
187
|
'crawlo.middleware.download_delay.DownloadDelayMiddleware',
|
|
191
188
|
'crawlo.middleware.default_header.DefaultHeaderMiddleware',
|
|
192
189
|
'crawlo.middleware.proxy.ProxyMiddleware',
|
|
190
|
+
'crawlo.middleware.offsite.OffsiteMiddleware',
|
|
193
191
|
|
|
194
192
|
# === 响应处理阶段 ===
|
|
195
193
|
'crawlo.middleware.retry.RetryMiddleware',
|
|
@@ -83,6 +83,8 @@ MIDDLEWARES = [
|
|
|
83
83
|
'crawlo.middleware.download_delay.DownloadDelayMiddleware',
|
|
84
84
|
'crawlo.middleware.default_header.DefaultHeaderMiddleware',
|
|
85
85
|
'crawlo.middleware.proxy.ProxyMiddleware',
|
|
86
|
+
'crawlo.middleware.offsite.OffsiteMiddleware',
|
|
87
|
+
|
|
86
88
|
'crawlo.middleware.retry.RetryMiddleware',
|
|
87
89
|
'crawlo.middleware.response_code.ResponseCodeMiddleware',
|
|
88
90
|
'crawlo.middleware.response_filter.ResponseFilterMiddleware',
|
crawlo/utils/batch_processor.py
CHANGED
|
@@ -5,12 +5,11 @@
|
|
|
5
5
|
提供批量操作的统一接口和优化实现
|
|
6
6
|
"""
|
|
7
7
|
import asyncio
|
|
8
|
-
import time
|
|
9
|
-
from typing import List, Callable, Any, Optional, Dict
|
|
10
8
|
from functools import wraps
|
|
9
|
+
from typing import List, Callable, Any, Optional, Dict
|
|
11
10
|
|
|
12
|
-
from crawlo.utils.log import get_logger
|
|
13
11
|
from crawlo.utils.error_handler import ErrorHandler
|
|
12
|
+
from crawlo.utils.log import get_logger
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class BatchProcessor:
|
|
@@ -1,16 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
# -*- coding:UTF-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
|
|
4
|
+
误处理工具
|
|
5
5
|
提供更详细、更一致的错误处理和日志记录机制
|
|
6
6
|
"""
|
|
7
7
|
import traceback
|
|
8
|
-
import logging
|
|
9
|
-
import sys
|
|
10
|
-
import os
|
|
11
|
-
from typing import Optional, Callable, Any, Dict, List
|
|
12
|
-
from functools import wraps
|
|
13
8
|
from datetime import datetime
|
|
9
|
+
from functools import wraps
|
|
10
|
+
from typing import Optional, Callable, Any, Dict, List
|
|
14
11
|
|
|
15
12
|
from crawlo.utils.log import get_logger
|
|
16
13
|
|
crawlo/utils/env_config.py
CHANGED
crawlo/utils/error_handler.py
CHANGED
|
@@ -4,13 +4,11 @@
|
|
|
4
4
|
统一错误处理工具
|
|
5
5
|
提供一致的错误处理和日志记录机制
|
|
6
6
|
"""
|
|
7
|
-
import traceback
|
|
8
|
-
import logging
|
|
9
|
-
from typing import Optional, Callable, Any
|
|
10
7
|
from functools import wraps
|
|
8
|
+
from typing import Callable, Any
|
|
11
9
|
|
|
12
|
-
from crawlo.utils.log import get_logger
|
|
13
10
|
from crawlo.utils.enhanced_error_handler import EnhancedErrorHandler, ErrorContext
|
|
11
|
+
from crawlo.utils.log import get_logger
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
class ErrorHandler:
|
|
@@ -4,14 +4,15 @@
|
|
|
4
4
|
性能监控工具
|
|
5
5
|
提供系统性能监控和资源使用情况跟踪
|
|
6
6
|
"""
|
|
7
|
-
import time
|
|
8
|
-
import psutil
|
|
9
7
|
import asyncio
|
|
10
|
-
|
|
8
|
+
import time
|
|
11
9
|
from functools import wraps
|
|
10
|
+
from typing import Dict, Any
|
|
11
|
+
|
|
12
|
+
import psutil
|
|
12
13
|
|
|
13
|
-
from crawlo.utils.log import get_logger
|
|
14
14
|
from crawlo.utils.error_handler import ErrorHandler
|
|
15
|
+
from crawlo.utils.log import get_logger
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class PerformanceMonitor:
|
|
@@ -4,13 +4,13 @@
|
|
|
4
4
|
Redis连接池优化工具
|
|
5
5
|
提供优化的Redis连接池管理和配置
|
|
6
6
|
"""
|
|
7
|
-
import asyncio
|
|
8
|
-
import redis.asyncio as aioredis
|
|
9
|
-
from typing import Dict, Any, Optional, Union
|
|
10
7
|
from contextlib import asynccontextmanager
|
|
8
|
+
from typing import Dict, Any, Optional
|
|
9
|
+
|
|
10
|
+
import redis.asyncio as aioredis
|
|
11
11
|
|
|
12
|
-
from crawlo.utils.log import get_logger
|
|
13
12
|
from crawlo.utils.error_handler import ErrorHandler
|
|
13
|
+
from crawlo.utils.log import get_logger
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class OptimizedRedisConnectionPool:
|
crawlo/utils/spider_loader.py
CHANGED