crawlo 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +93 -93
- crawlo/__version__.py +1 -1
- crawlo/cli.py +75 -75
- crawlo/commands/__init__.py +14 -14
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +151 -151
- crawlo/commands/help.py +138 -138
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +341 -341
- crawlo/commands/startproject.py +436 -436
- crawlo/commands/stats.py +187 -187
- crawlo/commands/utils.py +196 -196
- crawlo/config.py +312 -312
- crawlo/config_validator.py +277 -277
- crawlo/core/__init__.py +52 -52
- crawlo/core/engine.py +438 -439
- crawlo/core/processor.py +47 -47
- crawlo/core/scheduler.py +291 -257
- crawlo/crawler.py +650 -650
- crawlo/data/__init__.py +5 -5
- crawlo/data/user_agents.py +194 -194
- crawlo/downloader/__init__.py +273 -273
- crawlo/downloader/aiohttp_downloader.py +233 -228
- crawlo/downloader/cffi_downloader.py +245 -245
- crawlo/downloader/httpx_downloader.py +259 -259
- crawlo/downloader/hybrid_downloader.py +212 -212
- crawlo/downloader/playwright_downloader.py +402 -402
- crawlo/downloader/selenium_downloader.py +472 -472
- crawlo/event.py +11 -11
- crawlo/exceptions.py +81 -81
- crawlo/extension/__init__.py +63 -63
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +94 -94
- crawlo/extension/log_stats.py +70 -70
- crawlo/extension/logging_extension.py +61 -61
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/factories/__init__.py +27 -27
- crawlo/factories/base.py +68 -68
- crawlo/factories/crawler.py +103 -103
- crawlo/factories/registry.py +84 -84
- crawlo/filters/__init__.py +154 -154
- crawlo/filters/aioredis_filter.py +257 -257
- crawlo/filters/memory_filter.py +269 -269
- crawlo/framework.py +292 -292
- crawlo/initialization/__init__.py +44 -44
- crawlo/initialization/built_in.py +425 -425
- crawlo/initialization/context.py +141 -141
- crawlo/initialization/core.py +193 -193
- crawlo/initialization/phases.py +148 -148
- crawlo/initialization/registry.py +145 -145
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +23 -23
- crawlo/items/fields.py +52 -52
- crawlo/items/items.py +104 -104
- crawlo/logging/__init__.py +45 -37
- crawlo/logging/async_handler.py +181 -0
- crawlo/logging/config.py +196 -96
- crawlo/logging/factory.py +171 -128
- crawlo/logging/manager.py +111 -111
- crawlo/logging/monitor.py +153 -0
- crawlo/logging/sampler.py +167 -0
- crawlo/middleware/__init__.py +21 -21
- crawlo/middleware/default_header.py +132 -132
- crawlo/middleware/download_delay.py +104 -104
- crawlo/middleware/middleware_manager.py +135 -135
- crawlo/middleware/offsite.py +123 -123
- crawlo/middleware/proxy.py +386 -386
- crawlo/middleware/request_ignore.py +86 -86
- crawlo/middleware/response_code.py +150 -150
- crawlo/middleware/response_filter.py +136 -136
- crawlo/middleware/retry.py +124 -124
- crawlo/middleware/simple_proxy.py +65 -65
- crawlo/mode_manager.py +219 -219
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +379 -379
- crawlo/network/response.py +359 -359
- crawlo/pipelines/__init__.py +21 -21
- crawlo/pipelines/bloom_dedup_pipeline.py +146 -146
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +197 -197
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +105 -105
- crawlo/pipelines/mongo_pipeline.py +131 -131
- crawlo/pipelines/mysql_pipeline.py +325 -325
- crawlo/pipelines/pipeline_manager.py +100 -84
- crawlo/pipelines/redis_dedup_pipeline.py +156 -156
- crawlo/project.py +349 -338
- crawlo/queue/pqueue.py +42 -42
- crawlo/queue/queue_manager.py +526 -522
- crawlo/queue/redis_priority_queue.py +370 -367
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +284 -284
- crawlo/settings/setting_manager.py +219 -219
- crawlo/spider/__init__.py +657 -657
- crawlo/stats_collector.py +73 -73
- crawlo/subscriber.py +129 -129
- crawlo/task_manager.py +138 -138
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +3 -3
- crawlo/templates/project/items.py.tmpl +17 -17
- crawlo/templates/project/middlewares.py.tmpl +118 -118
- crawlo/templates/project/pipelines.py.tmpl +96 -96
- crawlo/templates/project/settings.py.tmpl +170 -170
- crawlo/templates/project/settings_distributed.py.tmpl +169 -169
- crawlo/templates/project/settings_gentle.py.tmpl +166 -166
- crawlo/templates/project/settings_high_performance.py.tmpl +167 -167
- crawlo/templates/project/settings_minimal.py.tmpl +65 -65
- crawlo/templates/project/settings_simple.py.tmpl +164 -164
- crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
- crawlo/templates/run.py.tmpl +34 -34
- crawlo/templates/spider/spider.py.tmpl +143 -143
- crawlo/templates/spiders_init.py.tmpl +9 -9
- crawlo/tools/__init__.py +200 -200
- crawlo/tools/anti_crawler.py +268 -268
- crawlo/tools/authenticated_proxy.py +240 -240
- crawlo/tools/data_formatter.py +225 -225
- crawlo/tools/data_validator.py +180 -180
- crawlo/tools/date_tools.py +289 -289
- crawlo/tools/distributed_coordinator.py +384 -384
- crawlo/tools/encoding_converter.py +127 -127
- crawlo/tools/network_diagnostic.py +364 -364
- crawlo/tools/request_tools.py +82 -82
- crawlo/tools/retry_mechanism.py +224 -224
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/tools/text_cleaner.py +232 -232
- crawlo/utils/__init__.py +34 -34
- crawlo/utils/batch_processor.py +259 -259
- crawlo/utils/class_loader.py +25 -25
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/db_helper.py +343 -343
- crawlo/utils/enhanced_error_handler.py +356 -356
- crawlo/utils/env_config.py +142 -142
- crawlo/utils/error_handler.py +165 -165
- crawlo/utils/fingerprint.py +122 -122
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_config.py +286 -286
- crawlo/utils/large_scale_helper.py +344 -344
- crawlo/utils/log.py +79 -79
- crawlo/utils/performance_monitor.py +285 -285
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_connection_pool.py +388 -388
- crawlo/utils/redis_key_validator.py +198 -198
- crawlo/utils/request.py +267 -267
- crawlo/utils/request_serializer.py +225 -225
- crawlo/utils/spider_loader.py +61 -61
- crawlo/utils/system.py +11 -11
- crawlo/utils/tools.py +4 -4
- crawlo/utils/url.py +39 -39
- crawlo-1.4.3.dist-info/METADATA +190 -0
- crawlo-1.4.3.dist-info/RECORD +326 -0
- examples/__init__.py +7 -7
- examples/test_project/__init__.py +7 -7
- examples/test_project/run.py +34 -34
- examples/test_project/test_project/__init__.py +3 -3
- examples/test_project/test_project/items.py +17 -17
- examples/test_project/test_project/middlewares.py +118 -118
- examples/test_project/test_project/pipelines.py +96 -96
- examples/test_project/test_project/settings.py +169 -169
- examples/test_project/test_project/spiders/__init__.py +9 -9
- examples/test_project/test_project/spiders/of_week_dis.py +143 -143
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +275 -275
- tests/authenticated_proxy_example.py +106 -106
- tests/baidu_performance_test.py +108 -108
- tests/baidu_test.py +59 -59
- tests/cleaners_example.py +160 -160
- tests/comprehensive_framework_test.py +212 -212
- tests/comprehensive_test.py +81 -81
- tests/comprehensive_testing_summary.md +186 -186
- tests/config_validation_demo.py +142 -142
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/debug_configure.py +69 -69
- tests/debug_framework_logger.py +84 -84
- tests/debug_log_config.py +126 -126
- tests/debug_log_levels.py +63 -63
- tests/debug_pipelines.py +66 -66
- tests/detailed_log_test.py +233 -233
- tests/distributed_test.py +66 -66
- tests/distributed_test_debug.py +76 -76
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/env_config_example.py +133 -133
- tests/error_handling_example.py +171 -171
- tests/final_comprehensive_test.py +151 -151
- tests/final_log_test.py +260 -260
- tests/final_validation_test.py +182 -182
- tests/fix_log_test.py +142 -142
- tests/framework_performance_test.py +202 -202
- tests/log_buffering_test.py +111 -111
- tests/log_generation_timing_test.py +153 -153
- tests/optimized_performance_test.py +211 -211
- tests/performance_comparison.py +245 -245
- tests/queue_blocking_test.py +113 -113
- tests/queue_test.py +89 -89
- tests/redis_key_validation_demo.py +130 -130
- tests/request_params_example.py +150 -150
- tests/response_improvements_example.py +144 -144
- tests/scrapy_comparison/ofweek_scrapy.py +138 -138
- tests/scrapy_comparison/scrapy_test.py +133 -133
- tests/simple_command_test.py +119 -119
- tests/simple_crawlo_test.py +127 -127
- tests/simple_log_test.py +57 -57
- tests/simple_log_test2.py +137 -137
- tests/simple_optimization_test.py +128 -128
- tests/simple_queue_type_test.py +41 -41
- tests/simple_spider_test.py +49 -49
- tests/simple_test.py +47 -47
- tests/spider_log_timing_test.py +177 -177
- tests/test_advanced_tools.py +148 -148
- tests/test_all_commands.py +230 -230
- tests/test_all_pipeline_fingerprints.py +133 -133
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_authenticated_proxy.py +141 -141
- tests/test_batch_processor.py +178 -178
- tests/test_cleaners.py +54 -54
- tests/test_component_factory.py +174 -174
- tests/test_comprehensive.py +146 -146
- tests/test_config_consistency.py +80 -80
- tests/test_config_merge.py +152 -152
- tests/test_config_validator.py +182 -182
- tests/test_controlled_spider_mixin.py +79 -79
- tests/test_crawlo_proxy_integration.py +108 -108
- tests/test_date_tools.py +123 -123
- tests/test_dedup_fix.py +220 -220
- tests/test_dedup_pipeline_consistency.py +125 -0
- tests/test_default_header_middleware.py +313 -313
- tests/test_distributed.py +65 -65
- tests/test_double_crawlo_fix.py +204 -204
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_download_delay_middleware.py +221 -221
- tests/test_downloader_proxy_compatibility.py +268 -268
- tests/test_dynamic_downloaders_proxy.py +124 -124
- tests/test_dynamic_proxy.py +92 -92
- tests/test_dynamic_proxy_config.py +146 -146
- tests/test_dynamic_proxy_real.py +109 -109
- tests/test_edge_cases.py +303 -303
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_enhanced_error_handler_comprehensive.py +245 -245
- tests/test_env_config.py +121 -121
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_factories.py +252 -252
- tests/test_final_validation.py +153 -153
- tests/test_fingerprint_consistency.py +135 -135
- tests/test_fingerprint_simple.py +51 -51
- tests/test_framework_env_usage.py +103 -103
- tests/test_framework_logger.py +66 -66
- tests/test_framework_startup.py +64 -64
- tests/test_get_component_logger.py +83 -83
- tests/test_hash_performance.py +99 -99
- tests/test_integration.py +169 -169
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_large_scale_config.py +112 -112
- tests/test_large_scale_helper.py +235 -235
- tests/test_logging_enhancements.py +375 -0
- tests/test_logging_final.py +185 -0
- tests/test_logging_integration.py +313 -0
- tests/test_logging_system.py +282 -282
- tests/test_middleware_debug.py +142 -0
- tests/test_mode_change.py +72 -72
- tests/test_mode_consistency.py +51 -51
- tests/test_offsite_middleware.py +244 -244
- tests/test_offsite_middleware_simple.py +203 -203
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_performance_monitor.py +115 -115
- tests/test_pipeline_fingerprint_consistency.py +86 -86
- tests/test_priority_behavior.py +212 -0
- tests/test_priority_consistency.py +152 -0
- tests/test_priority_consistency_fixed.py +250 -0
- tests/test_proxy_api.py +264 -264
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware.py +121 -121
- tests/test_proxy_middleware_enhanced.py +216 -216
- tests/test_proxy_middleware_integration.py +136 -136
- tests/test_proxy_middleware_refactored.py +184 -184
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_queue_empty_check.py +41 -41
- tests/test_queue_manager_double_crawlo.py +173 -173
- tests/test_queue_manager_redis_key.py +179 -179
- tests/test_queue_naming.py +154 -154
- tests/test_queue_type.py +106 -106
- tests/test_queue_type_redis_config_consistency.py +131 -0
- tests/test_random_headers_default.py +323 -0
- tests/test_random_headers_necessity.py +309 -0
- tests/test_random_user_agent.py +72 -72
- tests/test_real_scenario_proxy.py +195 -195
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_redis_queue_name_fix.py +175 -175
- tests/test_redis_queue_type_fallback.py +130 -0
- tests/test_request_ignore_middleware.py +182 -182
- tests/test_request_params.py +111 -111
- tests/test_request_serialization.py +70 -70
- tests/test_response_code_middleware.py +349 -349
- tests/test_response_filter_middleware.py +427 -427
- tests/test_response_improvements.py +152 -152
- tests/test_retry_middleware.py +334 -242
- tests/test_retry_middleware_realistic.py +274 -0
- tests/test_scheduler.py +252 -252
- tests/test_scheduler_config_update.py +133 -133
- tests/test_simple_response.py +61 -61
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +159 -159
- tests/test_user_agent_randomness.py +177 -0
- tests/test_user_agents.py +96 -96
- tests/tools_example.py +260 -260
- tests/untested_features_report.md +138 -138
- tests/verify_debug.py +51 -51
- tests/verify_distributed.py +117 -117
- tests/verify_log_fix.py +111 -111
- crawlo-1.4.1.dist-info/METADATA +0 -1199
- crawlo-1.4.1.dist-info/RECORD +0 -309
- {crawlo-1.4.1.dist-info → crawlo-1.4.3.dist-info}/WHEEL +0 -0
- {crawlo-1.4.1.dist-info → crawlo-1.4.3.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.1.dist-info → crawlo-1.4.3.dist-info}/top_level.txt +0 -0
tests/detailed_log_test.py
CHANGED
|
@@ -1,234 +1,234 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
详细的日志系统功能测试
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import os
|
|
8
|
-
import sys
|
|
9
|
-
import tempfile
|
|
10
|
-
import shutil
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
|
|
13
|
-
# 添加项目根目录到Python路径
|
|
14
|
-
project_root = Path(__file__).parent.parent
|
|
15
|
-
sys.path.insert(0, str(project_root))
|
|
16
|
-
|
|
17
|
-
from crawlo.logging import configure_logging as configure, get_logger, LogManager
|
|
18
|
-
from crawlo.logging.config import LogConfig
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def test_log_config_creation():
|
|
22
|
-
"""测试日志配置创建"""
|
|
23
|
-
print("=== 测试日志配置创建 ===")
|
|
24
|
-
|
|
25
|
-
# 重置日志管理器
|
|
26
|
-
LogManager().reset()
|
|
27
|
-
|
|
28
|
-
# 1. 测试通过关键字参数创建配置
|
|
29
|
-
print("1. 测试通过关键字参数创建配置...")
|
|
30
|
-
config = configure(
|
|
31
|
-
LOG_LEVEL='DEBUG',
|
|
32
|
-
LOG_FILE='test.log',
|
|
33
|
-
LOG_MAX_BYTES=1024,
|
|
34
|
-
LOG_BACKUP_COUNT=3
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
print(f" 配置级别: {config.level}")
|
|
38
|
-
print(f" 配置文件路径: {config.file_path}")
|
|
39
|
-
print(f" 轮转大小: {config.max_bytes}")
|
|
40
|
-
print(f" 备份数量: {config.backup_count}")
|
|
41
|
-
|
|
42
|
-
# 2. 测试通过字典创建配置
|
|
43
|
-
print("2. 测试通过字典创建配置...")
|
|
44
|
-
LogManager().reset()
|
|
45
|
-
config_dict = {
|
|
46
|
-
'LOG_LEVEL': 'WARNING',
|
|
47
|
-
'LOG_FILE': 'dict_test.log',
|
|
48
|
-
'LOG_CONSOLE_ENABLED': False
|
|
49
|
-
}
|
|
50
|
-
config = configure(**config_dict)
|
|
51
|
-
|
|
52
|
-
print(f" 配置级别: {config.level}")
|
|
53
|
-
print(f" 配置文件路径: {config.file_path}")
|
|
54
|
-
print(f" 控制台启用: {config.console_enabled}")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def test_logger_factory():
|
|
58
|
-
"""测试Logger工厂"""
|
|
59
|
-
print("\n=== 测试Logger工厂 ===")
|
|
60
|
-
|
|
61
|
-
# 重置并配置
|
|
62
|
-
LogManager().reset()
|
|
63
|
-
configure(LOG_LEVEL='INFO')
|
|
64
|
-
|
|
65
|
-
# 1. 测试获取Logger
|
|
66
|
-
print("1. 测试获取Logger...")
|
|
67
|
-
logger1 = get_logger('test.factory1')
|
|
68
|
-
logger2 = get_logger('test.factory2')
|
|
69
|
-
logger3 = get_logger('test.factory1') # 应该是同一个实例
|
|
70
|
-
|
|
71
|
-
print(f" Logger1 ID: {id(logger1)}")
|
|
72
|
-
print(f" Logger2 ID: {id(logger2)}")
|
|
73
|
-
print(f" Logger3 ID: {id(logger3)}")
|
|
74
|
-
print(f" Logger1和Logger3是同一对象: {logger1 is logger3}")
|
|
75
|
-
|
|
76
|
-
# 2. 测试Logger配置
|
|
77
|
-
print("2. 测试Logger配置...")
|
|
78
|
-
print(f" Logger1名称: {logger1.name}")
|
|
79
|
-
print(f" Logger1 handlers数量: {len(logger1.handlers)}")
|
|
80
|
-
|
|
81
|
-
for i, handler in enumerate(logger1.handlers):
|
|
82
|
-
print(f" Handler {i}: {type(handler).__name__}")
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def test_file_and_console_handlers():
|
|
86
|
-
"""测试文件和控制台处理器"""
|
|
87
|
-
print("\n=== 测试文件和控制台处理器 ===")
|
|
88
|
-
|
|
89
|
-
# 创建临时目录
|
|
90
|
-
temp_dir = tempfile.mkdtemp()
|
|
91
|
-
log_file = os.path.join(temp_dir, 'handler_test.log')
|
|
92
|
-
|
|
93
|
-
try:
|
|
94
|
-
# 1. 测试文件和控制台都启用
|
|
95
|
-
print("1. 测试文件和控制台都启用...")
|
|
96
|
-
LogManager().reset()
|
|
97
|
-
configure(
|
|
98
|
-
LOG_LEVEL='INFO',
|
|
99
|
-
LOG_FILE=log_file,
|
|
100
|
-
LOG_CONSOLE_ENABLED=True,
|
|
101
|
-
LOG_FILE_ENABLED=True
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
logger = get_logger('test.handlers')
|
|
105
|
-
print(f" Handlers数量: {len(logger.handlers)}")
|
|
106
|
-
|
|
107
|
-
has_file_handler = False
|
|
108
|
-
has_console_handler = False
|
|
109
|
-
|
|
110
|
-
for handler in logger.handlers:
|
|
111
|
-
handler_type = type(handler).__name__
|
|
112
|
-
print(f" Handler类型: {handler_type}")
|
|
113
|
-
if 'FileHandler' in handler_type:
|
|
114
|
-
has_file_handler = True
|
|
115
|
-
print(f" 文件路径: {getattr(handler, 'baseFilename', 'N/A')}")
|
|
116
|
-
elif 'StreamHandler' in handler_type:
|
|
117
|
-
has_console_handler = True
|
|
118
|
-
|
|
119
|
-
print(f" 有文件处理器: {has_file_handler}")
|
|
120
|
-
print(f" 有控制台处理器: {has_console_handler}")
|
|
121
|
-
|
|
122
|
-
# 输出日志
|
|
123
|
-
logger.info("测试文件和控制台处理器")
|
|
124
|
-
|
|
125
|
-
# 检查文件是否存在
|
|
126
|
-
if os.path.exists(log_file):
|
|
127
|
-
with open(log_file, 'r', encoding='utf-8') as f:
|
|
128
|
-
content = f.read()
|
|
129
|
-
print(f" 文件内容行数: {len(content.splitlines())}")
|
|
130
|
-
else:
|
|
131
|
-
print(" 文件不存在!")
|
|
132
|
-
|
|
133
|
-
finally:
|
|
134
|
-
# 清理
|
|
135
|
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
def test_log_levels():
|
|
139
|
-
"""测试日志级别"""
|
|
140
|
-
print("\n=== 测试日志级别 ===")
|
|
141
|
-
|
|
142
|
-
# 1. 测试默认级别
|
|
143
|
-
print("1. 测试默认级别...")
|
|
144
|
-
LogManager().reset()
|
|
145
|
-
configure(LOG_LEVEL='WARNING')
|
|
146
|
-
|
|
147
|
-
logger = get_logger('test.levels')
|
|
148
|
-
logger.debug("DEBUG消息 - 不应该显示")
|
|
149
|
-
logger.info("INFO消息 - 不应该显示")
|
|
150
|
-
logger.warning("WARNING消息 - 应该显示")
|
|
151
|
-
logger.error("ERROR消息 - 应该显示")
|
|
152
|
-
|
|
153
|
-
# 2. 测试模块特定级别
|
|
154
|
-
print("2. 测试模块特定级别...")
|
|
155
|
-
LogManager().reset()
|
|
156
|
-
configure(
|
|
157
|
-
LOG_LEVEL='ERROR',
|
|
158
|
-
LOG_LEVELS={
|
|
159
|
-
'test.debug_module': 'DEBUG',
|
|
160
|
-
'test.info_module': 'INFO'
|
|
161
|
-
}
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
# 默认模块(ERROR级别)
|
|
165
|
-
default_logger = get_logger('test.default')
|
|
166
|
-
default_logger.info("默认模块INFO消息 - 不应该显示")
|
|
167
|
-
default_logger.error("默认模块ERROR消息 - 应该显示")
|
|
168
|
-
|
|
169
|
-
# DEBUG模块(DEBUG级别)
|
|
170
|
-
debug_logger = get_logger('test.debug_module')
|
|
171
|
-
debug_logger.debug("DEBUG模块DEBUG消息 - 应该显示")
|
|
172
|
-
debug_logger.info("DEBUG模块INFO消息 - 应该显示")
|
|
173
|
-
|
|
174
|
-
# INFO模块(INFO级别)
|
|
175
|
-
info_logger = get_logger('test.info_module')
|
|
176
|
-
info_logger.debug("INFO模块DEBUG消息 - 不应该显示")
|
|
177
|
-
info_logger.info("INFO模块INFO消息 - 应该显示")
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def test_log_config_validation():
|
|
181
|
-
"""测试日志配置验证"""
|
|
182
|
-
print("\n=== 测试日志配置验证 ===")
|
|
183
|
-
|
|
184
|
-
# 1. 测试有效配置
|
|
185
|
-
print("1. 测试有效配置...")
|
|
186
|
-
valid_config = LogConfig(level='INFO', file_path='valid.log')
|
|
187
|
-
is_valid = valid_config.validate()
|
|
188
|
-
print(f" 有效配置验证: {is_valid}")
|
|
189
|
-
|
|
190
|
-
# 2. 测试无效级别
|
|
191
|
-
print("2. 测试无效级别...")
|
|
192
|
-
invalid_config = LogConfig(level='INVALID', file_path='invalid.log')
|
|
193
|
-
is_valid = invalid_config.validate()
|
|
194
|
-
print(f" 无效级别验证: {is_valid}")
|
|
195
|
-
|
|
196
|
-
# 3. 测试目录创建
|
|
197
|
-
print("3. 测试目录创建...")
|
|
198
|
-
temp_dir = tempfile.mkdtemp()
|
|
199
|
-
nested_path = os.path.join(temp_dir, 'subdir', 'nested.log')
|
|
200
|
-
|
|
201
|
-
nested_config = LogConfig(level='INFO', file_path=nested_path)
|
|
202
|
-
is_valid = nested_config.validate()
|
|
203
|
-
print(f" 嵌套目录验证: {is_valid}")
|
|
204
|
-
print(f" 目录存在: {os.path.exists(os.path.dirname(nested_path))}")
|
|
205
|
-
|
|
206
|
-
# 清理
|
|
207
|
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def main():
|
|
211
|
-
"""主测试函数"""
|
|
212
|
-
print("开始详细测试Crawlo框架日志系统...")
|
|
213
|
-
|
|
214
|
-
try:
|
|
215
|
-
# 运行所有测试
|
|
216
|
-
test_log_config_creation()
|
|
217
|
-
test_logger_factory()
|
|
218
|
-
test_file_and_console_handlers()
|
|
219
|
-
test_log_levels()
|
|
220
|
-
test_log_config_validation()
|
|
221
|
-
|
|
222
|
-
print("\n=== 详细测试完成 ===")
|
|
223
|
-
|
|
224
|
-
except Exception as e:
|
|
225
|
-
print(f"\n测试过程中出现错误: {e}")
|
|
226
|
-
import traceback
|
|
227
|
-
traceback.print_exc()
|
|
228
|
-
return 1
|
|
229
|
-
|
|
230
|
-
return 0
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
if __name__ == '__main__':
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
详细的日志系统功能测试
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import tempfile
|
|
10
|
+
import shutil
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
# 添加项目根目录到Python路径
|
|
14
|
+
project_root = Path(__file__).parent.parent
|
|
15
|
+
sys.path.insert(0, str(project_root))
|
|
16
|
+
|
|
17
|
+
from crawlo.logging import configure_logging as configure, get_logger, LogManager
|
|
18
|
+
from crawlo.logging.config import LogConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_log_config_creation():
|
|
22
|
+
"""测试日志配置创建"""
|
|
23
|
+
print("=== 测试日志配置创建 ===")
|
|
24
|
+
|
|
25
|
+
# 重置日志管理器
|
|
26
|
+
LogManager().reset()
|
|
27
|
+
|
|
28
|
+
# 1. 测试通过关键字参数创建配置
|
|
29
|
+
print("1. 测试通过关键字参数创建配置...")
|
|
30
|
+
config = configure(
|
|
31
|
+
LOG_LEVEL='DEBUG',
|
|
32
|
+
LOG_FILE='test.log',
|
|
33
|
+
LOG_MAX_BYTES=1024,
|
|
34
|
+
LOG_BACKUP_COUNT=3
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
print(f" 配置级别: {config.level}")
|
|
38
|
+
print(f" 配置文件路径: {config.file_path}")
|
|
39
|
+
print(f" 轮转大小: {config.max_bytes}")
|
|
40
|
+
print(f" 备份数量: {config.backup_count}")
|
|
41
|
+
|
|
42
|
+
# 2. 测试通过字典创建配置
|
|
43
|
+
print("2. 测试通过字典创建配置...")
|
|
44
|
+
LogManager().reset()
|
|
45
|
+
config_dict = {
|
|
46
|
+
'LOG_LEVEL': 'WARNING',
|
|
47
|
+
'LOG_FILE': 'dict_test.log',
|
|
48
|
+
'LOG_CONSOLE_ENABLED': False
|
|
49
|
+
}
|
|
50
|
+
config = configure(**config_dict)
|
|
51
|
+
|
|
52
|
+
print(f" 配置级别: {config.level}")
|
|
53
|
+
print(f" 配置文件路径: {config.file_path}")
|
|
54
|
+
print(f" 控制台启用: {config.console_enabled}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_logger_factory():
|
|
58
|
+
"""测试Logger工厂"""
|
|
59
|
+
print("\n=== 测试Logger工厂 ===")
|
|
60
|
+
|
|
61
|
+
# 重置并配置
|
|
62
|
+
LogManager().reset()
|
|
63
|
+
configure(LOG_LEVEL='INFO')
|
|
64
|
+
|
|
65
|
+
# 1. 测试获取Logger
|
|
66
|
+
print("1. 测试获取Logger...")
|
|
67
|
+
logger1 = get_logger('test.factory1')
|
|
68
|
+
logger2 = get_logger('test.factory2')
|
|
69
|
+
logger3 = get_logger('test.factory1') # 应该是同一个实例
|
|
70
|
+
|
|
71
|
+
print(f" Logger1 ID: {id(logger1)}")
|
|
72
|
+
print(f" Logger2 ID: {id(logger2)}")
|
|
73
|
+
print(f" Logger3 ID: {id(logger3)}")
|
|
74
|
+
print(f" Logger1和Logger3是同一对象: {logger1 is logger3}")
|
|
75
|
+
|
|
76
|
+
# 2. 测试Logger配置
|
|
77
|
+
print("2. 测试Logger配置...")
|
|
78
|
+
print(f" Logger1名称: {logger1.name}")
|
|
79
|
+
print(f" Logger1 handlers数量: {len(logger1.handlers)}")
|
|
80
|
+
|
|
81
|
+
for i, handler in enumerate(logger1.handlers):
|
|
82
|
+
print(f" Handler {i}: {type(handler).__name__}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_file_and_console_handlers():
|
|
86
|
+
"""测试文件和控制台处理器"""
|
|
87
|
+
print("\n=== 测试文件和控制台处理器 ===")
|
|
88
|
+
|
|
89
|
+
# 创建临时目录
|
|
90
|
+
temp_dir = tempfile.mkdtemp()
|
|
91
|
+
log_file = os.path.join(temp_dir, 'handler_test.log')
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
# 1. 测试文件和控制台都启用
|
|
95
|
+
print("1. 测试文件和控制台都启用...")
|
|
96
|
+
LogManager().reset()
|
|
97
|
+
configure(
|
|
98
|
+
LOG_LEVEL='INFO',
|
|
99
|
+
LOG_FILE=log_file,
|
|
100
|
+
LOG_CONSOLE_ENABLED=True,
|
|
101
|
+
LOG_FILE_ENABLED=True
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
logger = get_logger('test.handlers')
|
|
105
|
+
print(f" Handlers数量: {len(logger.handlers)}")
|
|
106
|
+
|
|
107
|
+
has_file_handler = False
|
|
108
|
+
has_console_handler = False
|
|
109
|
+
|
|
110
|
+
for handler in logger.handlers:
|
|
111
|
+
handler_type = type(handler).__name__
|
|
112
|
+
print(f" Handler类型: {handler_type}")
|
|
113
|
+
if 'FileHandler' in handler_type:
|
|
114
|
+
has_file_handler = True
|
|
115
|
+
print(f" 文件路径: {getattr(handler, 'baseFilename', 'N/A')}")
|
|
116
|
+
elif 'StreamHandler' in handler_type:
|
|
117
|
+
has_console_handler = True
|
|
118
|
+
|
|
119
|
+
print(f" 有文件处理器: {has_file_handler}")
|
|
120
|
+
print(f" 有控制台处理器: {has_console_handler}")
|
|
121
|
+
|
|
122
|
+
# 输出日志
|
|
123
|
+
logger.info("测试文件和控制台处理器")
|
|
124
|
+
|
|
125
|
+
# 检查文件是否存在
|
|
126
|
+
if os.path.exists(log_file):
|
|
127
|
+
with open(log_file, 'r', encoding='utf-8') as f:
|
|
128
|
+
content = f.read()
|
|
129
|
+
print(f" 文件内容行数: {len(content.splitlines())}")
|
|
130
|
+
else:
|
|
131
|
+
print(" 文件不存在!")
|
|
132
|
+
|
|
133
|
+
finally:
|
|
134
|
+
# 清理
|
|
135
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_log_levels():
|
|
139
|
+
"""测试日志级别"""
|
|
140
|
+
print("\n=== 测试日志级别 ===")
|
|
141
|
+
|
|
142
|
+
# 1. 测试默认级别
|
|
143
|
+
print("1. 测试默认级别...")
|
|
144
|
+
LogManager().reset()
|
|
145
|
+
configure(LOG_LEVEL='WARNING')
|
|
146
|
+
|
|
147
|
+
logger = get_logger('test.levels')
|
|
148
|
+
logger.debug("DEBUG消息 - 不应该显示")
|
|
149
|
+
logger.info("INFO消息 - 不应该显示")
|
|
150
|
+
logger.warning("WARNING消息 - 应该显示")
|
|
151
|
+
logger.error("ERROR消息 - 应该显示")
|
|
152
|
+
|
|
153
|
+
# 2. 测试模块特定级别
|
|
154
|
+
print("2. 测试模块特定级别...")
|
|
155
|
+
LogManager().reset()
|
|
156
|
+
configure(
|
|
157
|
+
LOG_LEVEL='ERROR',
|
|
158
|
+
LOG_LEVELS={
|
|
159
|
+
'test.debug_module': 'DEBUG',
|
|
160
|
+
'test.info_module': 'INFO'
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# 默认模块(ERROR级别)
|
|
165
|
+
default_logger = get_logger('test.default')
|
|
166
|
+
default_logger.info("默认模块INFO消息 - 不应该显示")
|
|
167
|
+
default_logger.error("默认模块ERROR消息 - 应该显示")
|
|
168
|
+
|
|
169
|
+
# DEBUG模块(DEBUG级别)
|
|
170
|
+
debug_logger = get_logger('test.debug_module')
|
|
171
|
+
debug_logger.debug("DEBUG模块DEBUG消息 - 应该显示")
|
|
172
|
+
debug_logger.info("DEBUG模块INFO消息 - 应该显示")
|
|
173
|
+
|
|
174
|
+
# INFO模块(INFO级别)
|
|
175
|
+
info_logger = get_logger('test.info_module')
|
|
176
|
+
info_logger.debug("INFO模块DEBUG消息 - 不应该显示")
|
|
177
|
+
info_logger.info("INFO模块INFO消息 - 应该显示")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_log_config_validation():
|
|
181
|
+
"""测试日志配置验证"""
|
|
182
|
+
print("\n=== 测试日志配置验证 ===")
|
|
183
|
+
|
|
184
|
+
# 1. 测试有效配置
|
|
185
|
+
print("1. 测试有效配置...")
|
|
186
|
+
valid_config = LogConfig(level='INFO', file_path='valid.log')
|
|
187
|
+
is_valid = valid_config.validate()
|
|
188
|
+
print(f" 有效配置验证: {is_valid}")
|
|
189
|
+
|
|
190
|
+
# 2. 测试无效级别
|
|
191
|
+
print("2. 测试无效级别...")
|
|
192
|
+
invalid_config = LogConfig(level='INVALID', file_path='invalid.log')
|
|
193
|
+
is_valid = invalid_config.validate()
|
|
194
|
+
print(f" 无效级别验证: {is_valid}")
|
|
195
|
+
|
|
196
|
+
# 3. 测试目录创建
|
|
197
|
+
print("3. 测试目录创建...")
|
|
198
|
+
temp_dir = tempfile.mkdtemp()
|
|
199
|
+
nested_path = os.path.join(temp_dir, 'subdir', 'nested.log')
|
|
200
|
+
|
|
201
|
+
nested_config = LogConfig(level='INFO', file_path=nested_path)
|
|
202
|
+
is_valid = nested_config.validate()
|
|
203
|
+
print(f" 嵌套目录验证: {is_valid}")
|
|
204
|
+
print(f" 目录存在: {os.path.exists(os.path.dirname(nested_path))}")
|
|
205
|
+
|
|
206
|
+
# 清理
|
|
207
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def main():
|
|
211
|
+
"""主测试函数"""
|
|
212
|
+
print("开始详细测试Crawlo框架日志系统...")
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
# 运行所有测试
|
|
216
|
+
test_log_config_creation()
|
|
217
|
+
test_logger_factory()
|
|
218
|
+
test_file_and_console_handlers()
|
|
219
|
+
test_log_levels()
|
|
220
|
+
test_log_config_validation()
|
|
221
|
+
|
|
222
|
+
print("\n=== 详细测试完成 ===")
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
print(f"\n测试过程中出现错误: {e}")
|
|
226
|
+
import traceback
|
|
227
|
+
traceback.print_exc()
|
|
228
|
+
return 1
|
|
229
|
+
|
|
230
|
+
return 0
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
if __name__ == '__main__':
|
|
234
234
|
sys.exit(main())
|
tests/distributed_test.py
CHANGED
|
@@ -1,67 +1,67 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
分布式模式测试脚本
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
import asyncio
|
|
10
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
11
|
-
|
|
12
|
-
from crawlo.spider import Spider
|
|
13
|
-
from crawlo import Request
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class DistributedTestSpider(Spider):
|
|
17
|
-
"""分布式测试爬虫"""
|
|
18
|
-
name = 'distributed_test_spider'
|
|
19
|
-
|
|
20
|
-
def start_requests(self):
|
|
21
|
-
"""发起测试请求"""
|
|
22
|
-
# 生成一些测试请求
|
|
23
|
-
for i in range(5):
|
|
24
|
-
yield Request(f'https://httpbin.org/get?page={i}', callback=self.parse)
|
|
25
|
-
|
|
26
|
-
def parse(self, response):
|
|
27
|
-
"""解析响应"""
|
|
28
|
-
print(f"成功获取响应: {response.url}")
|
|
29
|
-
print(f"状态码: {response.status_code}")
|
|
30
|
-
return []
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
async def test_distributed_mode():
|
|
34
|
-
"""测试分布式模式"""
|
|
35
|
-
print("开始测试分布式模式...")
|
|
36
|
-
|
|
37
|
-
# 初始化框架,设置为分布式模式
|
|
38
|
-
from crawlo.initialization import initialize_framework
|
|
39
|
-
custom_settings = {
|
|
40
|
-
'RUN_MODE': 'distributed',
|
|
41
|
-
'QUEUE_TYPE': 'redis',
|
|
42
|
-
'FILTER_CLASS': 'crawlo.filters.aioredis_filter.AioRedisFilter',
|
|
43
|
-
'REDIS_HOST': '127.0.0.1',
|
|
44
|
-
'REDIS_PORT': 6379,
|
|
45
|
-
'REDIS_DB': 15, # 使用测试数据库
|
|
46
|
-
'PROJECT_NAME': 'distributed_test'
|
|
47
|
-
}
|
|
48
|
-
settings = initialize_framework(custom_settings)
|
|
49
|
-
|
|
50
|
-
# 创建爬虫进程
|
|
51
|
-
from crawlo.crawler import CrawlerProcess
|
|
52
|
-
process = CrawlerProcess(settings=settings)
|
|
53
|
-
|
|
54
|
-
# 运行爬虫
|
|
55
|
-
await process.crawl(DistributedTestSpider)
|
|
56
|
-
|
|
57
|
-
print("分布式模式测试完成!")
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def main():
|
|
61
|
-
"""主函数"""
|
|
62
|
-
print("开始分布式模式测试...")
|
|
63
|
-
asyncio.run(test_distributed_mode())
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
if __name__ == "__main__":
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
分布式模式测试脚本
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import asyncio
|
|
10
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
11
|
+
|
|
12
|
+
from crawlo.spider import Spider
|
|
13
|
+
from crawlo import Request
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DistributedTestSpider(Spider):
|
|
17
|
+
"""分布式测试爬虫"""
|
|
18
|
+
name = 'distributed_test_spider'
|
|
19
|
+
|
|
20
|
+
def start_requests(self):
|
|
21
|
+
"""发起测试请求"""
|
|
22
|
+
# 生成一些测试请求
|
|
23
|
+
for i in range(5):
|
|
24
|
+
yield Request(f'https://httpbin.org/get?page={i}', callback=self.parse)
|
|
25
|
+
|
|
26
|
+
def parse(self, response):
|
|
27
|
+
"""解析响应"""
|
|
28
|
+
print(f"成功获取响应: {response.url}")
|
|
29
|
+
print(f"状态码: {response.status_code}")
|
|
30
|
+
return []
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def test_distributed_mode():
|
|
34
|
+
"""测试分布式模式"""
|
|
35
|
+
print("开始测试分布式模式...")
|
|
36
|
+
|
|
37
|
+
# 初始化框架,设置为分布式模式
|
|
38
|
+
from crawlo.initialization import initialize_framework
|
|
39
|
+
custom_settings = {
|
|
40
|
+
'RUN_MODE': 'distributed',
|
|
41
|
+
'QUEUE_TYPE': 'redis',
|
|
42
|
+
'FILTER_CLASS': 'crawlo.filters.aioredis_filter.AioRedisFilter',
|
|
43
|
+
'REDIS_HOST': '127.0.0.1',
|
|
44
|
+
'REDIS_PORT': 6379,
|
|
45
|
+
'REDIS_DB': 15, # 使用测试数据库
|
|
46
|
+
'PROJECT_NAME': 'distributed_test'
|
|
47
|
+
}
|
|
48
|
+
settings = initialize_framework(custom_settings)
|
|
49
|
+
|
|
50
|
+
# 创建爬虫进程
|
|
51
|
+
from crawlo.crawler import CrawlerProcess
|
|
52
|
+
process = CrawlerProcess(settings=settings)
|
|
53
|
+
|
|
54
|
+
# 运行爬虫
|
|
55
|
+
await process.crawl(DistributedTestSpider)
|
|
56
|
+
|
|
57
|
+
print("分布式模式测试完成!")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def main():
|
|
61
|
+
"""主函数"""
|
|
62
|
+
print("开始分布式模式测试...")
|
|
63
|
+
asyncio.run(test_distributed_mode())
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if __name__ == "__main__":
|
|
67
67
|
main()
|