crawlo 1.3.2__py3-none-any.whl → 1.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +24 -0
- crawlo/__version__.py +1 -1
- crawlo/commands/run.py +58 -32
- crawlo/core/__init__.py +44 -0
- crawlo/core/engine.py +119 -45
- crawlo/core/scheduler.py +4 -3
- crawlo/crawler.py +603 -1133
- crawlo/downloader/aiohttp_downloader.py +4 -2
- crawlo/extension/__init__.py +1 -1
- crawlo/extension/logging_extension.py +23 -7
- crawlo/factories/__init__.py +28 -0
- crawlo/factories/base.py +69 -0
- crawlo/factories/crawler.py +104 -0
- crawlo/factories/registry.py +85 -0
- crawlo/filters/aioredis_filter.py +25 -2
- crawlo/framework.py +292 -0
- crawlo/initialization/__init__.py +40 -0
- crawlo/initialization/built_in.py +426 -0
- crawlo/initialization/context.py +142 -0
- crawlo/initialization/core.py +194 -0
- crawlo/initialization/phases.py +149 -0
- crawlo/initialization/registry.py +146 -0
- crawlo/items/base.py +2 -1
- crawlo/logging/__init__.py +38 -0
- crawlo/logging/config.py +97 -0
- crawlo/logging/factory.py +129 -0
- crawlo/logging/manager.py +112 -0
- crawlo/middleware/middleware_manager.py +1 -1
- crawlo/middleware/offsite.py +1 -1
- crawlo/mode_manager.py +26 -1
- crawlo/pipelines/pipeline_manager.py +2 -1
- crawlo/project.py +76 -46
- crawlo/queue/pqueue.py +11 -5
- crawlo/queue/queue_manager.py +143 -19
- crawlo/queue/redis_priority_queue.py +69 -49
- crawlo/settings/default_settings.py +110 -14
- crawlo/settings/setting_manager.py +29 -13
- crawlo/spider/__init__.py +34 -16
- crawlo/stats_collector.py +17 -3
- crawlo/task_manager.py +112 -3
- crawlo/templates/project/settings.py.tmpl +103 -202
- crawlo/templates/project/settings_distributed.py.tmpl +122 -135
- crawlo/templates/project/settings_gentle.py.tmpl +149 -43
- crawlo/templates/project/settings_high_performance.py.tmpl +127 -90
- crawlo/templates/project/settings_minimal.py.tmpl +46 -15
- crawlo/templates/project/settings_simple.py.tmpl +138 -75
- crawlo/templates/project/spiders/__init__.py.tmpl +5 -1
- crawlo/templates/run.py.tmpl +10 -14
- crawlo/templates/spiders_init.py.tmpl +10 -0
- crawlo/tools/network_diagnostic.py +365 -0
- crawlo/utils/class_loader.py +26 -0
- crawlo/utils/error_handler.py +76 -35
- crawlo/utils/log.py +41 -144
- crawlo/utils/redis_connection_pool.py +43 -6
- crawlo/utils/request_serializer.py +8 -1
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/METADATA +120 -14
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/RECORD +104 -45
- tests/authenticated_proxy_example.py +2 -2
- tests/baidu_performance_test.py +109 -0
- tests/baidu_test.py +60 -0
- tests/comprehensive_framework_test.py +213 -0
- tests/comprehensive_test.py +82 -0
- tests/comprehensive_testing_summary.md +187 -0
- tests/debug_configure.py +70 -0
- tests/debug_framework_logger.py +85 -0
- tests/debug_log_levels.py +64 -0
- tests/distributed_test.py +67 -0
- tests/distributed_test_debug.py +77 -0
- tests/final_command_test_report.md +0 -0
- tests/final_comprehensive_test.py +152 -0
- tests/final_validation_test.py +183 -0
- tests/framework_performance_test.py +203 -0
- tests/optimized_performance_test.py +212 -0
- tests/performance_comparison.py +246 -0
- tests/queue_blocking_test.py +114 -0
- tests/queue_test.py +90 -0
- tests/scrapy_comparison/ofweek_scrapy.py +139 -0
- tests/scrapy_comparison/scrapy_test.py +134 -0
- tests/simple_command_test.py +120 -0
- tests/simple_crawlo_test.py +128 -0
- tests/simple_log_test.py +58 -0
- tests/simple_optimization_test.py +129 -0
- tests/simple_spider_test.py +50 -0
- tests/simple_test.py +48 -0
- tests/test_all_commands.py +231 -0
- tests/test_batch_processor.py +179 -0
- tests/test_component_factory.py +175 -0
- tests/test_controlled_spider_mixin.py +80 -0
- tests/test_enhanced_error_handler_comprehensive.py +246 -0
- tests/test_factories.py +253 -0
- tests/test_framework_logger.py +67 -0
- tests/test_framework_startup.py +65 -0
- tests/test_large_scale_config.py +113 -0
- tests/test_large_scale_helper.py +236 -0
- tests/test_mode_change.py +73 -0
- tests/test_mode_consistency.py +1 -1
- tests/test_performance_monitor.py +116 -0
- tests/test_queue_empty_check.py +42 -0
- tests/untested_features_report.md +139 -0
- tests/verify_debug.py +52 -0
- tests/verify_log_fix.py +112 -0
- tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +0 -82
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/WHEEL +0 -0
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/entry_points.txt +0 -0
- {crawlo-1.3.2.dist-info → crawlo-1.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
类加载器工具模块
|
|
4
|
+
==============
|
|
5
|
+
提供动态类加载功能,避免循环依赖问题。
|
|
6
|
+
"""
|
|
7
|
+
import importlib
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_class(path: str) -> Any:
|
|
12
|
+
"""
|
|
13
|
+
动态加载类
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
path: 类的完整路径,如 'package.module.ClassName'
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
加载的类对象
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
module_path, class_name = path.rsplit('.', 1)
|
|
23
|
+
module = importlib.import_module(module_path)
|
|
24
|
+
return getattr(module, class_name)
|
|
25
|
+
except (ValueError, ImportError, AttributeError) as e:
|
|
26
|
+
raise ImportError(f"无法加载类 '{path}': {e}")
|
crawlo/utils/error_handler.py
CHANGED
|
@@ -4,22 +4,30 @@
|
|
|
4
4
|
统一错误处理工具
|
|
5
5
|
提供一致的错误处理和日志记录机制
|
|
6
6
|
"""
|
|
7
|
+
import time
|
|
8
|
+
import traceback
|
|
7
9
|
from functools import wraps
|
|
8
10
|
from typing import Callable, Any
|
|
9
11
|
|
|
10
|
-
from crawlo.utils.enhanced_error_handler import EnhancedErrorHandler, ErrorContext
|
|
11
12
|
from crawlo.utils.log import get_logger
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class ErrorHandler:
|
|
15
|
-
"""
|
|
16
|
-
|
|
16
|
+
"""统一错误处理器(简化版,避免循环依赖)"""
|
|
17
|
+
|
|
17
18
|
def __init__(self, logger_name: str = __name__, log_level: str = 'ERROR'):
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
self.
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
# 延迟初始化logger避免循环依赖
|
|
20
|
+
self._logger = None
|
|
21
|
+
self.logger_name = logger_name
|
|
22
|
+
self.log_level = log_level
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def logger(self):
|
|
26
|
+
if self._logger is None:
|
|
27
|
+
self._logger = get_logger(self.logger_name)
|
|
28
|
+
return self._logger
|
|
29
|
+
|
|
30
|
+
def handle_error(self, exception: Exception, context: str = "",
|
|
23
31
|
raise_error: bool = True, log_error: bool = True) -> None:
|
|
24
32
|
"""
|
|
25
33
|
统一处理错误
|
|
@@ -30,14 +38,16 @@ class ErrorHandler:
|
|
|
30
38
|
raise_error: 是否重新抛出异常
|
|
31
39
|
log_error: 是否记录错误日志
|
|
32
40
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
+
if log_error:
|
|
42
|
+
error_msg = f"Error in {context}: {str(exception)}" if context else str(exception)
|
|
43
|
+
self.logger.error(error_msg)
|
|
44
|
+
# 在DEBUG级别记录详细的堆栈跟踪
|
|
45
|
+
self.logger.debug(f"详细错误信息:\n{traceback.format_exc()}")
|
|
46
|
+
|
|
47
|
+
if raise_error:
|
|
48
|
+
raise exception
|
|
49
|
+
|
|
50
|
+
def safe_call(self, func: Callable, *args, default_return=None,
|
|
41
51
|
context: str = "", **kwargs) -> Any:
|
|
42
52
|
"""
|
|
43
53
|
安全调用函数,捕获并处理异常
|
|
@@ -52,13 +62,13 @@ class ErrorHandler:
|
|
|
52
62
|
Returns:
|
|
53
63
|
函数返回值或默认值
|
|
54
64
|
"""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
context=
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def retry_on_failure(self, max_retries: int = 3, delay: float = 1.0,
|
|
65
|
+
try:
|
|
66
|
+
return func(*args, **kwargs)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
self.handle_error(e, context=context, raise_error=False)
|
|
69
|
+
return default_return
|
|
70
|
+
|
|
71
|
+
def retry_on_failure(self, max_retries: int = 3, delay: float = 1.0,
|
|
62
72
|
exceptions: tuple = (Exception,)):
|
|
63
73
|
"""
|
|
64
74
|
装饰器:失败时重试
|
|
@@ -68,16 +78,46 @@ class ErrorHandler:
|
|
|
68
78
|
delay: 重试间隔(秒)
|
|
69
79
|
exceptions: 需要重试的异常类型
|
|
70
80
|
"""
|
|
81
|
+
|
|
71
82
|
def decorator(func):
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
83
|
+
@wraps(func)
|
|
84
|
+
def wrapper(*args, **kwargs):
|
|
85
|
+
last_exception = None
|
|
86
|
+
for attempt in range(max_retries + 1):
|
|
87
|
+
try:
|
|
88
|
+
return func(*args, **kwargs)
|
|
89
|
+
except exceptions as e:
|
|
90
|
+
last_exception = e
|
|
91
|
+
if attempt < max_retries:
|
|
92
|
+
self.logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}")
|
|
93
|
+
time.sleep(delay)
|
|
94
|
+
else:
|
|
95
|
+
self.logger.error(f"All {max_retries + 1} attempts failed")
|
|
96
|
+
raise e
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
return wrapper
|
|
100
|
+
|
|
76
101
|
return decorator
|
|
77
102
|
|
|
78
103
|
|
|
79
|
-
#
|
|
80
|
-
|
|
104
|
+
# 全局错误处理器实例(延迟初始化)
|
|
105
|
+
_default_error_handler = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_default_error_handler():
|
|
109
|
+
"""Get the default error handler with lazy initialization"""
|
|
110
|
+
global _default_error_handler
|
|
111
|
+
if _default_error_handler is None:
|
|
112
|
+
_default_error_handler = ErrorHandler()
|
|
113
|
+
return _default_error_handler
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# 为了向后兼容,保留老的接口
|
|
117
|
+
def __getattr__(name):
|
|
118
|
+
if name == 'default_error_handler':
|
|
119
|
+
return get_default_error_handler()
|
|
120
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
81
121
|
|
|
82
122
|
|
|
83
123
|
def handle_exception(context: str = "", raise_error: bool = True, log_error: bool = True):
|
|
@@ -89,6 +129,7 @@ def handle_exception(context: str = "", raise_error: bool = True, log_error: boo
|
|
|
89
129
|
raise_error: 是否重新抛出异常
|
|
90
130
|
log_error: 是否记录错误日志
|
|
91
131
|
"""
|
|
132
|
+
|
|
92
133
|
def decorator(func):
|
|
93
134
|
@wraps(func)
|
|
94
135
|
async def async_wrapper(*args, **kwargs):
|
|
@@ -96,29 +137,29 @@ def handle_exception(context: str = "", raise_error: bool = True, log_error: boo
|
|
|
96
137
|
return await func(*args, **kwargs)
|
|
97
138
|
except Exception as e:
|
|
98
139
|
default_error_handler.handle_error(
|
|
99
|
-
e, context=f"{context} - {func.__name__}",
|
|
140
|
+
e, context=f"{context} - {func.__name__}",
|
|
100
141
|
raise_error=raise_error, log_error=log_error
|
|
101
142
|
)
|
|
102
143
|
if not raise_error:
|
|
103
144
|
return None
|
|
104
|
-
|
|
145
|
+
|
|
105
146
|
@wraps(func)
|
|
106
147
|
def sync_wrapper(*args, **kwargs):
|
|
107
148
|
try:
|
|
108
149
|
return func(*args, **kwargs)
|
|
109
150
|
except Exception as e:
|
|
110
151
|
default_error_handler.handle_error(
|
|
111
|
-
e, context=f"{context} - {func.__name__}",
|
|
152
|
+
e, context=f"{context} - {func.__name__}",
|
|
112
153
|
raise_error=raise_error, log_error=log_error
|
|
113
154
|
)
|
|
114
155
|
if not raise_error:
|
|
115
156
|
return None
|
|
116
|
-
|
|
157
|
+
|
|
117
158
|
# 根据函数是否为异步函数返回相应的包装器
|
|
118
159
|
import inspect
|
|
119
160
|
if inspect.iscoroutinefunction(func):
|
|
120
161
|
return async_wrapper
|
|
121
162
|
else:
|
|
122
163
|
return sync_wrapper
|
|
123
|
-
|
|
124
|
-
return decorator
|
|
164
|
+
|
|
165
|
+
return decorator
|
crawlo/utils/log.py
CHANGED
|
@@ -1,147 +1,44 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
# ==================== 向后兼容的日志接口 ====================
|
|
2
|
+
# 主要功能已迁移到 crawlo.logging 模块
|
|
3
|
+
# 本文件仅保留最基本的兼容性接口
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
# 向后兼容:导入新的日志系统
|
|
9
|
+
try:
|
|
10
|
+
from crawlo.logging import get_logger as new_get_logger, configure_logging
|
|
11
|
+
_NEW_LOGGING_AVAILABLE = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
_NEW_LOGGING_AVAILABLE = False
|
|
14
|
+
new_get_logger = None
|
|
15
|
+
configure_logging = None
|
|
11
16
|
|
|
12
17
|
LOG_FORMAT = '%(asctime)s - [%(name)s] - %(levelname)s: %(message)s'
|
|
13
18
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
return INFO
|
|
41
|
-
if hasattr(level, 'get'): # 如 SettingManager 或 dict
|
|
42
|
-
lv = level.get('LOG_LEVEL')
|
|
43
|
-
if isinstance(lv, int):
|
|
44
|
-
return lv
|
|
45
|
-
if isinstance(lv, str):
|
|
46
|
-
level_value = getLevelName(lv.upper())
|
|
47
|
-
if isinstance(level_value, int):
|
|
48
|
-
return level_value
|
|
49
|
-
else:
|
|
50
|
-
return INFO
|
|
51
|
-
return INFO
|
|
52
|
-
|
|
53
|
-
@classmethod
|
|
54
|
-
def configure(cls, settings=None, **kwargs):
|
|
55
|
-
"""
|
|
56
|
-
使用 settings 对象或关键字参数配置日志
|
|
57
|
-
"""
|
|
58
|
-
# 优先使用 settings,否则用 kwargs
|
|
59
|
-
get_val = settings.get if hasattr(settings, 'get') else (lambda k, d=None: kwargs.get(k, d))
|
|
60
|
-
|
|
61
|
-
filename = get_val('LOG_FILE')
|
|
62
|
-
level = get_val('LOG_LEVEL', 'INFO') # 默认为INFO级别
|
|
63
|
-
file_level = get_val('LOG_FILE_LEVEL', level) # 默认继承LOG_LEVEL的值
|
|
64
|
-
# 根据项目规范,已完全移除LOG_CONSOLE_LEVEL支持,统一使用LOG_LEVEL控制控制台和文件的日志输出级别
|
|
65
|
-
log_format = get_val('LOG_FORMAT', LOG_FORMAT)
|
|
66
|
-
encoding = get_val('LOG_ENCODING', 'utf-8')
|
|
67
|
-
|
|
68
|
-
cls._default_filename = filename
|
|
69
|
-
cls._default_level = cls._to_level(level)
|
|
70
|
-
cls._default_file_level = cls._to_level(file_level)
|
|
71
|
-
# 控制台日志级别直接使用LOG_LEVEL的值,不再支持LOG_CONSOLE_LEVEL
|
|
72
|
-
cls._default_console_level = cls._default_level
|
|
73
|
-
cls._default_log_format = log_format
|
|
74
|
-
cls._default_encoding = encoding
|
|
75
|
-
|
|
76
|
-
cls._configured = True
|
|
77
|
-
|
|
78
|
-
@classmethod
|
|
79
|
-
def get_logger(cls, name='default', level=None, filename=None):
|
|
80
|
-
"""
|
|
81
|
-
获取logger实例
|
|
82
|
-
"""
|
|
83
|
-
# 确定最终参数
|
|
84
|
-
# 如果传入了level参数,则使用它,否则使用默认级别
|
|
85
|
-
if level is not None:
|
|
86
|
-
final_level = cls._to_level(level)
|
|
87
|
-
else:
|
|
88
|
-
# Logger级别设置为DEBUG(最低级别),由handler控制实际输出
|
|
89
|
-
final_level = DEBUG
|
|
90
|
-
|
|
91
|
-
final_filename = filename if filename is not None else cls._default_filename
|
|
92
|
-
|
|
93
|
-
# 安全的字符串化 key,避免任何 unhashable 类型
|
|
94
|
-
key_parts = [
|
|
95
|
-
name,
|
|
96
|
-
str(final_level),
|
|
97
|
-
final_filename or 'no_file',
|
|
98
|
-
]
|
|
99
|
-
key = '|'.join(key_parts) # 如 "my_spider|20|logs/app.log"
|
|
100
|
-
|
|
101
|
-
if key in cls.logger_cache:
|
|
102
|
-
# 更新logger级别
|
|
103
|
-
cls.logger_cache[key].setLevel(final_level)
|
|
104
|
-
return cls.logger_cache[key]
|
|
105
|
-
|
|
106
|
-
# 创建 logger
|
|
107
|
-
_logger = Logger(name=name)
|
|
108
|
-
_logger.setLevel(final_level)
|
|
109
|
-
|
|
110
|
-
formatter = Formatter(cls._default_log_format)
|
|
111
|
-
|
|
112
|
-
# 控制台
|
|
113
|
-
if cls._default_console_level is not False:
|
|
114
|
-
ch = StreamHandler()
|
|
115
|
-
ch.setFormatter(formatter)
|
|
116
|
-
ch.setLevel(cls._default_console_level)
|
|
117
|
-
_logger.addHandler(ch)
|
|
118
|
-
|
|
119
|
-
# 文件
|
|
120
|
-
if final_filename:
|
|
121
|
-
try:
|
|
122
|
-
log_dir = os.path.dirname(final_filename)
|
|
123
|
-
if log_dir and not os.path.exists(log_dir):
|
|
124
|
-
os.makedirs(log_dir, exist_ok=True)
|
|
125
|
-
|
|
126
|
-
# 使用普通文件处理器(移除日志轮转功能)
|
|
127
|
-
fh = FileHandler(final_filename, mode='a', encoding=cls._default_encoding)
|
|
128
|
-
|
|
129
|
-
fh.setFormatter(formatter)
|
|
130
|
-
fh.setLevel(cls._default_file_level)
|
|
131
|
-
_logger.addHandler(fh)
|
|
132
|
-
except (PermissionError, FileNotFoundError) as e:
|
|
133
|
-
print(f"[Logger] 无法创建日志文件 {final_filename}: {e}")
|
|
134
|
-
except Exception as e:
|
|
135
|
-
print(f"[Logger] 创建日志文件时发生未知错误 {final_filename}: {e}")
|
|
136
|
-
|
|
137
|
-
cls.logger_cache[key] = _logger
|
|
138
|
-
return _logger
|
|
139
|
-
|
|
140
|
-
@classmethod
|
|
141
|
-
def is_configured(cls):
|
|
142
|
-
"""检查日志系统是否已配置"""
|
|
143
|
-
return cls._configured
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
# 全局快捷函数
|
|
147
|
-
get_logger = LoggerManager.get_logger
|
|
19
|
+
# 向后兼容的日志函数
|
|
20
|
+
def get_logger(name: str = 'default', level: Optional[int] = None):
|
|
21
|
+
"""获取Logger实例 - 向后兼容函数"""
|
|
22
|
+
if _NEW_LOGGING_AVAILABLE and new_get_logger:
|
|
23
|
+
# 使用新的日志系统
|
|
24
|
+
return new_get_logger(name)
|
|
25
|
+
else:
|
|
26
|
+
# 降级到基本的Python logging
|
|
27
|
+
logger = logging.getLogger(name)
|
|
28
|
+
if not logger.handlers:
|
|
29
|
+
handler = logging.StreamHandler()
|
|
30
|
+
formatter = logging.Formatter(LOG_FORMAT)
|
|
31
|
+
handler.setFormatter(formatter)
|
|
32
|
+
logger.addHandler(handler)
|
|
33
|
+
logger.setLevel(level or logging.INFO)
|
|
34
|
+
return logger
|
|
35
|
+
|
|
36
|
+
# 兼容性函数
|
|
37
|
+
def get_component_logger(component_class, settings=None, level=None):
|
|
38
|
+
"""获取组件Logger - 向后兼容"""
|
|
39
|
+
if hasattr(component_class, '__name__'):
|
|
40
|
+
component_name = component_class.__name__
|
|
41
|
+
else:
|
|
42
|
+
component_name = str(component_class)
|
|
43
|
+
|
|
44
|
+
return get_logger(component_name)
|
|
@@ -9,8 +9,9 @@ from typing import Dict, Any, Optional
|
|
|
9
9
|
|
|
10
10
|
import redis.asyncio as aioredis
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
from crawlo.utils.
|
|
12
|
+
# 延迟导入避免循环依赖
|
|
13
|
+
# from crawlo.utils.error_handler import ErrorHandler
|
|
14
|
+
# from crawlo.utils.log import get_logger
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class OptimizedRedisConnectionPool:
|
|
@@ -31,8 +32,10 @@ class OptimizedRedisConnectionPool:
|
|
|
31
32
|
def __init__(self, redis_url: str, **kwargs):
|
|
32
33
|
self.redis_url = redis_url
|
|
33
34
|
self.config = {**self.DEFAULT_CONFIG, **kwargs}
|
|
34
|
-
|
|
35
|
-
|
|
35
|
+
|
|
36
|
+
# 延迟初始化logger和error_handler
|
|
37
|
+
self._logger = None
|
|
38
|
+
self._error_handler = None
|
|
36
39
|
|
|
37
40
|
# 连接池实例
|
|
38
41
|
self._connection_pool: Optional[aioredis.ConnectionPool] = None
|
|
@@ -50,6 +53,22 @@ class OptimizedRedisConnectionPool:
|
|
|
50
53
|
# 初始化连接池
|
|
51
54
|
self._initialize_pool()
|
|
52
55
|
|
|
56
|
+
@property
|
|
57
|
+
def logger(self):
|
|
58
|
+
"""延迟初始化logger"""
|
|
59
|
+
if self._logger is None:
|
|
60
|
+
from crawlo.utils.log import get_logger
|
|
61
|
+
self._logger = get_logger(self.__class__.__name__)
|
|
62
|
+
return self._logger
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def error_handler(self):
|
|
66
|
+
"""延迟初始化error_handler"""
|
|
67
|
+
if self._error_handler is None:
|
|
68
|
+
from crawlo.utils.error_handler import ErrorHandler
|
|
69
|
+
self._error_handler = ErrorHandler(self.__class__.__name__)
|
|
70
|
+
return self._error_handler
|
|
71
|
+
|
|
53
72
|
def _initialize_pool(self):
|
|
54
73
|
"""初始化连接池"""
|
|
55
74
|
try:
|
|
@@ -176,8 +195,26 @@ class RedisBatchOperationHelper:
|
|
|
176
195
|
def __init__(self, redis_client: aioredis.Redis, batch_size: int = 100):
|
|
177
196
|
self.redis_client = redis_client
|
|
178
197
|
self.batch_size = batch_size
|
|
179
|
-
|
|
180
|
-
|
|
198
|
+
|
|
199
|
+
# 延迟初始化logger和error_handler
|
|
200
|
+
self._logger = None
|
|
201
|
+
self._error_handler = None
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def logger(self):
|
|
205
|
+
"""延迟初始化logger"""
|
|
206
|
+
if self._logger is None:
|
|
207
|
+
from crawlo.utils.log import get_logger
|
|
208
|
+
self._logger = get_logger(self.__class__.__name__)
|
|
209
|
+
return self._logger
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def error_handler(self):
|
|
213
|
+
"""延迟初始化error_handler"""
|
|
214
|
+
if self._error_handler is None:
|
|
215
|
+
from crawlo.utils.error_handler import ErrorHandler
|
|
216
|
+
self._error_handler = ErrorHandler(self.__class__.__name__)
|
|
217
|
+
return self._error_handler
|
|
181
218
|
|
|
182
219
|
async def batch_execute(self, operations: list, batch_size: Optional[int] = None) -> list:
|
|
183
220
|
"""
|
|
@@ -15,7 +15,14 @@ class RequestSerializer:
|
|
|
15
15
|
"""Request 序列化工具类"""
|
|
16
16
|
|
|
17
17
|
def __init__(self):
|
|
18
|
-
|
|
18
|
+
# 延迟初始化logger避免循环依赖
|
|
19
|
+
self._logger = None
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def logger(self):
|
|
23
|
+
if self._logger is None:
|
|
24
|
+
self._logger = get_logger(self.__class__.__name__)
|
|
25
|
+
return self._logger
|
|
19
26
|
|
|
20
27
|
def prepare_for_serialization(self, request):
|
|
21
28
|
"""
|