crawlo 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +93 -93
- crawlo/__version__.py +1 -1
- crawlo/cli.py +75 -75
- crawlo/commands/__init__.py +14 -14
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +151 -151
- crawlo/commands/help.py +138 -138
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +341 -341
- crawlo/commands/startproject.py +436 -436
- crawlo/commands/stats.py +187 -187
- crawlo/commands/utils.py +196 -196
- crawlo/config.py +312 -312
- crawlo/config_validator.py +277 -277
- crawlo/core/__init__.py +52 -52
- crawlo/core/engine.py +438 -439
- crawlo/core/processor.py +47 -47
- crawlo/core/scheduler.py +291 -257
- crawlo/crawler.py +650 -650
- crawlo/data/__init__.py +5 -5
- crawlo/data/user_agents.py +194 -194
- crawlo/downloader/__init__.py +273 -273
- crawlo/downloader/aiohttp_downloader.py +233 -228
- crawlo/downloader/cffi_downloader.py +245 -245
- crawlo/downloader/httpx_downloader.py +259 -259
- crawlo/downloader/hybrid_downloader.py +212 -212
- crawlo/downloader/playwright_downloader.py +402 -402
- crawlo/downloader/selenium_downloader.py +472 -472
- crawlo/event.py +11 -11
- crawlo/exceptions.py +81 -81
- crawlo/extension/__init__.py +63 -63
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +94 -94
- crawlo/extension/log_stats.py +70 -70
- crawlo/extension/logging_extension.py +61 -61
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/factories/__init__.py +27 -27
- crawlo/factories/base.py +68 -68
- crawlo/factories/crawler.py +103 -103
- crawlo/factories/registry.py +84 -84
- crawlo/filters/__init__.py +154 -154
- crawlo/filters/aioredis_filter.py +257 -257
- crawlo/filters/memory_filter.py +269 -269
- crawlo/framework.py +292 -292
- crawlo/initialization/__init__.py +44 -44
- crawlo/initialization/built_in.py +425 -425
- crawlo/initialization/context.py +141 -141
- crawlo/initialization/core.py +193 -193
- crawlo/initialization/phases.py +148 -148
- crawlo/initialization/registry.py +145 -145
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +23 -23
- crawlo/items/fields.py +52 -52
- crawlo/items/items.py +104 -104
- crawlo/logging/__init__.py +45 -37
- crawlo/logging/async_handler.py +181 -0
- crawlo/logging/config.py +196 -96
- crawlo/logging/factory.py +171 -128
- crawlo/logging/manager.py +111 -111
- crawlo/logging/monitor.py +153 -0
- crawlo/logging/sampler.py +167 -0
- crawlo/middleware/__init__.py +21 -21
- crawlo/middleware/default_header.py +132 -132
- crawlo/middleware/download_delay.py +104 -104
- crawlo/middleware/middleware_manager.py +135 -135
- crawlo/middleware/offsite.py +123 -123
- crawlo/middleware/proxy.py +386 -386
- crawlo/middleware/request_ignore.py +86 -86
- crawlo/middleware/response_code.py +150 -150
- crawlo/middleware/response_filter.py +136 -136
- crawlo/middleware/retry.py +124 -124
- crawlo/middleware/simple_proxy.py +65 -65
- crawlo/mode_manager.py +219 -219
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +379 -379
- crawlo/network/response.py +359 -359
- crawlo/pipelines/__init__.py +21 -21
- crawlo/pipelines/bloom_dedup_pipeline.py +146 -146
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +197 -197
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +105 -105
- crawlo/pipelines/mongo_pipeline.py +131 -131
- crawlo/pipelines/mysql_pipeline.py +325 -325
- crawlo/pipelines/pipeline_manager.py +100 -84
- crawlo/pipelines/redis_dedup_pipeline.py +156 -156
- crawlo/project.py +349 -338
- crawlo/queue/pqueue.py +42 -42
- crawlo/queue/queue_manager.py +526 -522
- crawlo/queue/redis_priority_queue.py +370 -367
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +284 -284
- crawlo/settings/setting_manager.py +219 -219
- crawlo/spider/__init__.py +657 -657
- crawlo/stats_collector.py +73 -73
- crawlo/subscriber.py +129 -129
- crawlo/task_manager.py +138 -138
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +3 -3
- crawlo/templates/project/items.py.tmpl +17 -17
- crawlo/templates/project/middlewares.py.tmpl +118 -118
- crawlo/templates/project/pipelines.py.tmpl +96 -96
- crawlo/templates/project/settings.py.tmpl +170 -170
- crawlo/templates/project/settings_distributed.py.tmpl +169 -169
- crawlo/templates/project/settings_gentle.py.tmpl +166 -166
- crawlo/templates/project/settings_high_performance.py.tmpl +167 -167
- crawlo/templates/project/settings_minimal.py.tmpl +65 -65
- crawlo/templates/project/settings_simple.py.tmpl +164 -164
- crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
- crawlo/templates/run.py.tmpl +34 -34
- crawlo/templates/spider/spider.py.tmpl +143 -143
- crawlo/templates/spiders_init.py.tmpl +9 -9
- crawlo/tools/__init__.py +200 -200
- crawlo/tools/anti_crawler.py +268 -268
- crawlo/tools/authenticated_proxy.py +240 -240
- crawlo/tools/data_formatter.py +225 -225
- crawlo/tools/data_validator.py +180 -180
- crawlo/tools/date_tools.py +289 -289
- crawlo/tools/distributed_coordinator.py +384 -384
- crawlo/tools/encoding_converter.py +127 -127
- crawlo/tools/network_diagnostic.py +364 -364
- crawlo/tools/request_tools.py +82 -82
- crawlo/tools/retry_mechanism.py +224 -224
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/tools/text_cleaner.py +232 -232
- crawlo/utils/__init__.py +34 -34
- crawlo/utils/batch_processor.py +259 -259
- crawlo/utils/class_loader.py +25 -25
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/db_helper.py +343 -343
- crawlo/utils/enhanced_error_handler.py +356 -356
- crawlo/utils/env_config.py +142 -142
- crawlo/utils/error_handler.py +165 -165
- crawlo/utils/fingerprint.py +122 -122
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_config.py +286 -286
- crawlo/utils/large_scale_helper.py +344 -344
- crawlo/utils/log.py +79 -79
- crawlo/utils/performance_monitor.py +285 -285
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_connection_pool.py +388 -388
- crawlo/utils/redis_key_validator.py +198 -198
- crawlo/utils/request.py +267 -267
- crawlo/utils/request_serializer.py +225 -225
- crawlo/utils/spider_loader.py +61 -61
- crawlo/utils/system.py +11 -11
- crawlo/utils/tools.py +4 -4
- crawlo/utils/url.py +39 -39
- crawlo-1.4.3.dist-info/METADATA +190 -0
- crawlo-1.4.3.dist-info/RECORD +326 -0
- examples/__init__.py +7 -7
- examples/test_project/__init__.py +7 -7
- examples/test_project/run.py +34 -34
- examples/test_project/test_project/__init__.py +3 -3
- examples/test_project/test_project/items.py +17 -17
- examples/test_project/test_project/middlewares.py +118 -118
- examples/test_project/test_project/pipelines.py +96 -96
- examples/test_project/test_project/settings.py +169 -169
- examples/test_project/test_project/spiders/__init__.py +9 -9
- examples/test_project/test_project/spiders/of_week_dis.py +143 -143
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +275 -275
- tests/authenticated_proxy_example.py +106 -106
- tests/baidu_performance_test.py +108 -108
- tests/baidu_test.py +59 -59
- tests/cleaners_example.py +160 -160
- tests/comprehensive_framework_test.py +212 -212
- tests/comprehensive_test.py +81 -81
- tests/comprehensive_testing_summary.md +186 -186
- tests/config_validation_demo.py +142 -142
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/debug_configure.py +69 -69
- tests/debug_framework_logger.py +84 -84
- tests/debug_log_config.py +126 -126
- tests/debug_log_levels.py +63 -63
- tests/debug_pipelines.py +66 -66
- tests/detailed_log_test.py +233 -233
- tests/distributed_test.py +66 -66
- tests/distributed_test_debug.py +76 -76
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/env_config_example.py +133 -133
- tests/error_handling_example.py +171 -171
- tests/final_comprehensive_test.py +151 -151
- tests/final_log_test.py +260 -260
- tests/final_validation_test.py +182 -182
- tests/fix_log_test.py +142 -142
- tests/framework_performance_test.py +202 -202
- tests/log_buffering_test.py +111 -111
- tests/log_generation_timing_test.py +153 -153
- tests/optimized_performance_test.py +211 -211
- tests/performance_comparison.py +245 -245
- tests/queue_blocking_test.py +113 -113
- tests/queue_test.py +89 -89
- tests/redis_key_validation_demo.py +130 -130
- tests/request_params_example.py +150 -150
- tests/response_improvements_example.py +144 -144
- tests/scrapy_comparison/ofweek_scrapy.py +138 -138
- tests/scrapy_comparison/scrapy_test.py +133 -133
- tests/simple_command_test.py +119 -119
- tests/simple_crawlo_test.py +127 -127
- tests/simple_log_test.py +57 -57
- tests/simple_log_test2.py +137 -137
- tests/simple_optimization_test.py +128 -128
- tests/simple_queue_type_test.py +41 -41
- tests/simple_spider_test.py +49 -49
- tests/simple_test.py +47 -47
- tests/spider_log_timing_test.py +177 -177
- tests/test_advanced_tools.py +148 -148
- tests/test_all_commands.py +230 -230
- tests/test_all_pipeline_fingerprints.py +133 -133
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_authenticated_proxy.py +141 -141
- tests/test_batch_processor.py +178 -178
- tests/test_cleaners.py +54 -54
- tests/test_component_factory.py +174 -174
- tests/test_comprehensive.py +146 -146
- tests/test_config_consistency.py +80 -80
- tests/test_config_merge.py +152 -152
- tests/test_config_validator.py +182 -182
- tests/test_controlled_spider_mixin.py +79 -79
- tests/test_crawlo_proxy_integration.py +108 -108
- tests/test_date_tools.py +123 -123
- tests/test_dedup_fix.py +220 -220
- tests/test_dedup_pipeline_consistency.py +125 -0
- tests/test_default_header_middleware.py +313 -313
- tests/test_distributed.py +65 -65
- tests/test_double_crawlo_fix.py +204 -204
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_download_delay_middleware.py +221 -221
- tests/test_downloader_proxy_compatibility.py +268 -268
- tests/test_dynamic_downloaders_proxy.py +124 -124
- tests/test_dynamic_proxy.py +92 -92
- tests/test_dynamic_proxy_config.py +146 -146
- tests/test_dynamic_proxy_real.py +109 -109
- tests/test_edge_cases.py +303 -303
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_enhanced_error_handler_comprehensive.py +245 -245
- tests/test_env_config.py +121 -121
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_factories.py +252 -252
- tests/test_final_validation.py +153 -153
- tests/test_fingerprint_consistency.py +135 -135
- tests/test_fingerprint_simple.py +51 -51
- tests/test_framework_env_usage.py +103 -103
- tests/test_framework_logger.py +66 -66
- tests/test_framework_startup.py +64 -64
- tests/test_get_component_logger.py +83 -83
- tests/test_hash_performance.py +99 -99
- tests/test_integration.py +169 -169
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_large_scale_config.py +112 -112
- tests/test_large_scale_helper.py +235 -235
- tests/test_logging_enhancements.py +375 -0
- tests/test_logging_final.py +185 -0
- tests/test_logging_integration.py +313 -0
- tests/test_logging_system.py +282 -282
- tests/test_middleware_debug.py +142 -0
- tests/test_mode_change.py +72 -72
- tests/test_mode_consistency.py +51 -51
- tests/test_offsite_middleware.py +244 -244
- tests/test_offsite_middleware_simple.py +203 -203
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_performance_monitor.py +115 -115
- tests/test_pipeline_fingerprint_consistency.py +86 -86
- tests/test_priority_behavior.py +212 -0
- tests/test_priority_consistency.py +152 -0
- tests/test_priority_consistency_fixed.py +250 -0
- tests/test_proxy_api.py +264 -264
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware.py +121 -121
- tests/test_proxy_middleware_enhanced.py +216 -216
- tests/test_proxy_middleware_integration.py +136 -136
- tests/test_proxy_middleware_refactored.py +184 -184
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_queue_empty_check.py +41 -41
- tests/test_queue_manager_double_crawlo.py +173 -173
- tests/test_queue_manager_redis_key.py +179 -179
- tests/test_queue_naming.py +154 -154
- tests/test_queue_type.py +106 -106
- tests/test_queue_type_redis_config_consistency.py +131 -0
- tests/test_random_headers_default.py +323 -0
- tests/test_random_headers_necessity.py +309 -0
- tests/test_random_user_agent.py +72 -72
- tests/test_real_scenario_proxy.py +195 -195
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_redis_queue_name_fix.py +175 -175
- tests/test_redis_queue_type_fallback.py +130 -0
- tests/test_request_ignore_middleware.py +182 -182
- tests/test_request_params.py +111 -111
- tests/test_request_serialization.py +70 -70
- tests/test_response_code_middleware.py +349 -349
- tests/test_response_filter_middleware.py +427 -427
- tests/test_response_improvements.py +152 -152
- tests/test_retry_middleware.py +334 -242
- tests/test_retry_middleware_realistic.py +274 -0
- tests/test_scheduler.py +252 -252
- tests/test_scheduler_config_update.py +133 -133
- tests/test_simple_response.py +61 -61
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +159 -159
- tests/test_user_agent_randomness.py +177 -0
- tests/test_user_agents.py +96 -96
- tests/tools_example.py +260 -260
- tests/untested_features_report.md +138 -138
- tests/verify_debug.py +51 -51
- tests/verify_distributed.py +117 -117
- tests/verify_log_fix.py +111 -111
- crawlo-1.4.1.dist-info/METADATA +0 -1199
- crawlo-1.4.1.dist-info/RECORD +0 -309
- {crawlo-1.4.1.dist-info → crawlo-1.4.3.dist-info}/WHEEL +0 -0
- {crawlo-1.4.1.dist-info → crawlo-1.4.3.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.1.dist-info → crawlo-1.4.3.dist-info}/top_level.txt +0 -0
tests/debug_log_config.py
CHANGED
|
@@ -1,127 +1,127 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
调试日志配置问题
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import os
|
|
8
|
-
import sys
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
|
|
11
|
-
# 添加项目根目录到Python路径
|
|
12
|
-
project_root = Path(__file__).parent.parent
|
|
13
|
-
sys.path.insert(0, str(project_root))
|
|
14
|
-
|
|
15
|
-
from crawlo.logging import configure_logging as configure, get_logger, LogManager
|
|
16
|
-
from crawlo.logging.config import LogConfig
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def debug_log_configuration():
|
|
20
|
-
"""调试日志配置"""
|
|
21
|
-
print("=== 调试日志配置 ===")
|
|
22
|
-
|
|
23
|
-
# 重置配置
|
|
24
|
-
LogManager().reset()
|
|
25
|
-
|
|
26
|
-
# 1. 检查初始状态
|
|
27
|
-
print("1. 检查初始状态...")
|
|
28
|
-
print(f" 初始配置状态: {LogManager().is_configured}")
|
|
29
|
-
if LogManager().config:
|
|
30
|
-
print(f" 初始配置: {LogManager().config}")
|
|
31
|
-
|
|
32
|
-
# 2. 配置日志系统
|
|
33
|
-
print("2. 配置日志系统...")
|
|
34
|
-
config = configure(
|
|
35
|
-
LOG_LEVEL='DEBUG',
|
|
36
|
-
LOG_FILE='debug_test.log',
|
|
37
|
-
LOG_MAX_BYTES=1024,
|
|
38
|
-
LOG_BACKUP_COUNT=2,
|
|
39
|
-
LOG_CONSOLE_ENABLED=True,
|
|
40
|
-
LOG_FILE_ENABLED=True
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
print(f" 配置返回值: {config}")
|
|
44
|
-
print(f" 配置类型: {type(config)}")
|
|
45
|
-
print(f" 配置级别: {config.level}")
|
|
46
|
-
print(f" 配置文件路径: {config.file_path}")
|
|
47
|
-
print(f" 轮转大小: {config.max_bytes}")
|
|
48
|
-
print(f" 备份数量: {config.backup_count}")
|
|
49
|
-
print(f" 控制台启用: {config.console_enabled}")
|
|
50
|
-
print(f" 文件启用: {config.file_enabled}")
|
|
51
|
-
|
|
52
|
-
# 3. 检查管理器状态
|
|
53
|
-
print("3. 检查管理器状态...")
|
|
54
|
-
manager = LogManager()
|
|
55
|
-
print(f" 管理器配置状态: {manager.is_configured}")
|
|
56
|
-
if manager.config:
|
|
57
|
-
print(f" 管理器配置: {manager.config}")
|
|
58
|
-
print(f" 管理器配置文件路径: {manager.config.file_path}")
|
|
59
|
-
|
|
60
|
-
# 4. 测试Logger创建
|
|
61
|
-
print("4. 测试Logger创建...")
|
|
62
|
-
logger = get_logger('test.debug')
|
|
63
|
-
print(f" Logger handlers数量: {len(logger.handlers)}")
|
|
64
|
-
|
|
65
|
-
for i, handler in enumerate(logger.handlers):
|
|
66
|
-
handler_type = type(handler).__name__
|
|
67
|
-
print(f" Handler {i}: {handler_type}")
|
|
68
|
-
if hasattr(handler, 'baseFilename'):
|
|
69
|
-
print(f" 文件名: {handler.baseFilename}")
|
|
70
|
-
|
|
71
|
-
# 5. 测试日志输出
|
|
72
|
-
print("5. 测试日志输出...")
|
|
73
|
-
logger.info("调试测试消息")
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def test_config_from_dict():
|
|
77
|
-
"""测试从字典创建配置"""
|
|
78
|
-
print("\n=== 测试从字典创建配置 ===")
|
|
79
|
-
|
|
80
|
-
LogManager().reset()
|
|
81
|
-
|
|
82
|
-
# 使用字典配置
|
|
83
|
-
config_dict = {
|
|
84
|
-
'level': 'DEBUG',
|
|
85
|
-
'file_path': 'dict_test.log',
|
|
86
|
-
'max_bytes': 1024,
|
|
87
|
-
'backup_count': 2,
|
|
88
|
-
'console_enabled': True,
|
|
89
|
-
'file_enabled': True
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
config = LogConfig.from_dict(config_dict)
|
|
93
|
-
print(f" 字典配置: {config}")
|
|
94
|
-
print(f" 验证结果: {config.validate()}")
|
|
95
|
-
|
|
96
|
-
# 应用配置
|
|
97
|
-
LogManager().configure(config)
|
|
98
|
-
|
|
99
|
-
logger = get_logger('test.dict')
|
|
100
|
-
print(f" Logger handlers数量: {len(logger.handlers)}")
|
|
101
|
-
|
|
102
|
-
for i, handler in enumerate(logger.handlers):
|
|
103
|
-
handler_type = type(handler).__name__
|
|
104
|
-
print(f" Handler {i}: {handler_type}")
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def main():
|
|
108
|
-
"""主函数"""
|
|
109
|
-
print("开始调试日志配置问题...")
|
|
110
|
-
|
|
111
|
-
try:
|
|
112
|
-
debug_log_configuration()
|
|
113
|
-
test_config_from_dict()
|
|
114
|
-
|
|
115
|
-
print("\n=== 调试完成 ===")
|
|
116
|
-
|
|
117
|
-
except Exception as e:
|
|
118
|
-
print(f"\n调试过程中出现错误: {e}")
|
|
119
|
-
import traceback
|
|
120
|
-
traceback.print_exc()
|
|
121
|
-
return 1
|
|
122
|
-
|
|
123
|
-
return 0
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if __name__ == '__main__':
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
调试日志配置问题
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
# 添加项目根目录到Python路径
|
|
12
|
+
project_root = Path(__file__).parent.parent
|
|
13
|
+
sys.path.insert(0, str(project_root))
|
|
14
|
+
|
|
15
|
+
from crawlo.logging import configure_logging as configure, get_logger, LogManager
|
|
16
|
+
from crawlo.logging.config import LogConfig
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def debug_log_configuration():
|
|
20
|
+
"""调试日志配置"""
|
|
21
|
+
print("=== 调试日志配置 ===")
|
|
22
|
+
|
|
23
|
+
# 重置配置
|
|
24
|
+
LogManager().reset()
|
|
25
|
+
|
|
26
|
+
# 1. 检查初始状态
|
|
27
|
+
print("1. 检查初始状态...")
|
|
28
|
+
print(f" 初始配置状态: {LogManager().is_configured}")
|
|
29
|
+
if LogManager().config:
|
|
30
|
+
print(f" 初始配置: {LogManager().config}")
|
|
31
|
+
|
|
32
|
+
# 2. 配置日志系统
|
|
33
|
+
print("2. 配置日志系统...")
|
|
34
|
+
config = configure(
|
|
35
|
+
LOG_LEVEL='DEBUG',
|
|
36
|
+
LOG_FILE='debug_test.log',
|
|
37
|
+
LOG_MAX_BYTES=1024,
|
|
38
|
+
LOG_BACKUP_COUNT=2,
|
|
39
|
+
LOG_CONSOLE_ENABLED=True,
|
|
40
|
+
LOG_FILE_ENABLED=True
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
print(f" 配置返回值: {config}")
|
|
44
|
+
print(f" 配置类型: {type(config)}")
|
|
45
|
+
print(f" 配置级别: {config.level}")
|
|
46
|
+
print(f" 配置文件路径: {config.file_path}")
|
|
47
|
+
print(f" 轮转大小: {config.max_bytes}")
|
|
48
|
+
print(f" 备份数量: {config.backup_count}")
|
|
49
|
+
print(f" 控制台启用: {config.console_enabled}")
|
|
50
|
+
print(f" 文件启用: {config.file_enabled}")
|
|
51
|
+
|
|
52
|
+
# 3. 检查管理器状态
|
|
53
|
+
print("3. 检查管理器状态...")
|
|
54
|
+
manager = LogManager()
|
|
55
|
+
print(f" 管理器配置状态: {manager.is_configured}")
|
|
56
|
+
if manager.config:
|
|
57
|
+
print(f" 管理器配置: {manager.config}")
|
|
58
|
+
print(f" 管理器配置文件路径: {manager.config.file_path}")
|
|
59
|
+
|
|
60
|
+
# 4. 测试Logger创建
|
|
61
|
+
print("4. 测试Logger创建...")
|
|
62
|
+
logger = get_logger('test.debug')
|
|
63
|
+
print(f" Logger handlers数量: {len(logger.handlers)}")
|
|
64
|
+
|
|
65
|
+
for i, handler in enumerate(logger.handlers):
|
|
66
|
+
handler_type = type(handler).__name__
|
|
67
|
+
print(f" Handler {i}: {handler_type}")
|
|
68
|
+
if hasattr(handler, 'baseFilename'):
|
|
69
|
+
print(f" 文件名: {handler.baseFilename}")
|
|
70
|
+
|
|
71
|
+
# 5. 测试日志输出
|
|
72
|
+
print("5. 测试日志输出...")
|
|
73
|
+
logger.info("调试测试消息")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_config_from_dict():
|
|
77
|
+
"""测试从字典创建配置"""
|
|
78
|
+
print("\n=== 测试从字典创建配置 ===")
|
|
79
|
+
|
|
80
|
+
LogManager().reset()
|
|
81
|
+
|
|
82
|
+
# 使用字典配置
|
|
83
|
+
config_dict = {
|
|
84
|
+
'level': 'DEBUG',
|
|
85
|
+
'file_path': 'dict_test.log',
|
|
86
|
+
'max_bytes': 1024,
|
|
87
|
+
'backup_count': 2,
|
|
88
|
+
'console_enabled': True,
|
|
89
|
+
'file_enabled': True
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
config = LogConfig.from_dict(config_dict)
|
|
93
|
+
print(f" 字典配置: {config}")
|
|
94
|
+
print(f" 验证结果: {config.validate()}")
|
|
95
|
+
|
|
96
|
+
# 应用配置
|
|
97
|
+
LogManager().configure(config)
|
|
98
|
+
|
|
99
|
+
logger = get_logger('test.dict')
|
|
100
|
+
print(f" Logger handlers数量: {len(logger.handlers)}")
|
|
101
|
+
|
|
102
|
+
for i, handler in enumerate(logger.handlers):
|
|
103
|
+
handler_type = type(handler).__name__
|
|
104
|
+
print(f" Handler {i}: {handler_type}")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def main():
|
|
108
|
+
"""主函数"""
|
|
109
|
+
print("开始调试日志配置问题...")
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
debug_log_configuration()
|
|
113
|
+
test_config_from_dict()
|
|
114
|
+
|
|
115
|
+
print("\n=== 调试完成 ===")
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
print(f"\n调试过程中出现错误: {e}")
|
|
119
|
+
import traceback
|
|
120
|
+
traceback.print_exc()
|
|
121
|
+
return 1
|
|
122
|
+
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == '__main__':
|
|
127
127
|
sys.exit(main())
|
tests/debug_log_levels.py
CHANGED
|
@@ -1,64 +1,64 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
调试日志级别配置脚本
|
|
5
|
-
"""
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
sys.path.insert(0, '/')
|
|
9
|
-
|
|
10
|
-
from crawlo.initialization import initialize_framework
|
|
11
|
-
from crawlo.utils.log import LoggerManager, get_logger
|
|
12
|
-
import logging
|
|
13
|
-
|
|
14
|
-
def main():
|
|
15
|
-
print("=== 开始调试日志级别配置 ===")
|
|
16
|
-
|
|
17
|
-
# 初始化框架
|
|
18
|
-
print("1. 初始化框架...")
|
|
19
|
-
settings = initialize_framework()
|
|
20
|
-
|
|
21
|
-
# 打印配置信息
|
|
22
|
-
print(f"2. 配置信息:")
|
|
23
|
-
print(f" LOG_LEVEL: {settings.get('LOG_LEVEL')}")
|
|
24
|
-
print(f" LOG_FILE: {settings.get('LOG_FILE')}")
|
|
25
|
-
print(f" LoggerManager._default_level: {LoggerManager._default_level}")
|
|
26
|
-
print(f" LoggerManager._default_console_level: {LoggerManager._default_console_level}")
|
|
27
|
-
print(f" LoggerManager._default_file_level: {LoggerManager._default_file_level}")
|
|
28
|
-
|
|
29
|
-
# 测试不同组件的日志级别
|
|
30
|
-
components = [
|
|
31
|
-
'crawlo.framework',
|
|
32
|
-
'crawlo.crawler',
|
|
33
|
-
'QueueManager',
|
|
34
|
-
'Scheduler',
|
|
35
|
-
'AioHttpDownloader',
|
|
36
|
-
'MiddlewareManager',
|
|
37
|
-
'PipelineManager',
|
|
38
|
-
'ExtensionManager',
|
|
39
|
-
'of_week_standalone'
|
|
40
|
-
]
|
|
41
|
-
|
|
42
|
-
print("3. 组件日志级别测试:")
|
|
43
|
-
for component_name in components:
|
|
44
|
-
logger = get_logger(component_name)
|
|
45
|
-
print(f" {component_name}:")
|
|
46
|
-
print(f" logger.level: {logger.level} ({logging.getLevelName(logger.level)})")
|
|
47
|
-
|
|
48
|
-
for handler in logger.handlers:
|
|
49
|
-
handler_type = type(handler).__name__
|
|
50
|
-
print(f" {handler_type}.level: {handler.level} ({logging.getLevelName(handler.level)})")
|
|
51
|
-
|
|
52
|
-
# 实际测试日志输出
|
|
53
|
-
print("4. 测试日志输出:")
|
|
54
|
-
test_logger = get_logger('TestLogger')
|
|
55
|
-
|
|
56
|
-
print(" 控制台应该看到以下日志:")
|
|
57
|
-
test_logger.debug("这是DEBUG级别日志 - 控制台应该看不到")
|
|
58
|
-
test_logger.info("这是INFO级别日志 - 控制台应该能看到")
|
|
59
|
-
test_logger.warning("这是WARNING级别日志 - 控制台应该能看到")
|
|
60
|
-
|
|
61
|
-
print("=== 调试完成 ===")
|
|
62
|
-
|
|
63
|
-
if __name__ == '__main__':
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
调试日志级别配置脚本
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
sys.path.insert(0, '/')
|
|
9
|
+
|
|
10
|
+
from crawlo.initialization import initialize_framework
|
|
11
|
+
from crawlo.utils.log import LoggerManager, get_logger
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
print("=== 开始调试日志级别配置 ===")
|
|
16
|
+
|
|
17
|
+
# 初始化框架
|
|
18
|
+
print("1. 初始化框架...")
|
|
19
|
+
settings = initialize_framework()
|
|
20
|
+
|
|
21
|
+
# 打印配置信息
|
|
22
|
+
print(f"2. 配置信息:")
|
|
23
|
+
print(f" LOG_LEVEL: {settings.get('LOG_LEVEL')}")
|
|
24
|
+
print(f" LOG_FILE: {settings.get('LOG_FILE')}")
|
|
25
|
+
print(f" LoggerManager._default_level: {LoggerManager._default_level}")
|
|
26
|
+
print(f" LoggerManager._default_console_level: {LoggerManager._default_console_level}")
|
|
27
|
+
print(f" LoggerManager._default_file_level: {LoggerManager._default_file_level}")
|
|
28
|
+
|
|
29
|
+
# 测试不同组件的日志级别
|
|
30
|
+
components = [
|
|
31
|
+
'crawlo.framework',
|
|
32
|
+
'crawlo.crawler',
|
|
33
|
+
'QueueManager',
|
|
34
|
+
'Scheduler',
|
|
35
|
+
'AioHttpDownloader',
|
|
36
|
+
'MiddlewareManager',
|
|
37
|
+
'PipelineManager',
|
|
38
|
+
'ExtensionManager',
|
|
39
|
+
'of_week_standalone'
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
print("3. 组件日志级别测试:")
|
|
43
|
+
for component_name in components:
|
|
44
|
+
logger = get_logger(component_name)
|
|
45
|
+
print(f" {component_name}:")
|
|
46
|
+
print(f" logger.level: {logger.level} ({logging.getLevelName(logger.level)})")
|
|
47
|
+
|
|
48
|
+
for handler in logger.handlers:
|
|
49
|
+
handler_type = type(handler).__name__
|
|
50
|
+
print(f" {handler_type}.level: {handler.level} ({logging.getLevelName(handler.level)})")
|
|
51
|
+
|
|
52
|
+
# 实际测试日志输出
|
|
53
|
+
print("4. 测试日志输出:")
|
|
54
|
+
test_logger = get_logger('TestLogger')
|
|
55
|
+
|
|
56
|
+
print(" 控制台应该看到以下日志:")
|
|
57
|
+
test_logger.debug("这是DEBUG级别日志 - 控制台应该看不到")
|
|
58
|
+
test_logger.info("这是INFO级别日志 - 控制台应该能看到")
|
|
59
|
+
test_logger.warning("这是WARNING级别日志 - 控制台应该能看到")
|
|
60
|
+
|
|
61
|
+
print("=== 调试完成 ===")
|
|
62
|
+
|
|
63
|
+
if __name__ == '__main__':
|
|
64
64
|
main()
|
tests/debug_pipelines.py
CHANGED
|
@@ -1,67 +1,67 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
调试管道配置
|
|
5
|
-
查看实际的管道配置合并结果
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import sys
|
|
9
|
-
import os
|
|
10
|
-
|
|
11
|
-
# 添加项目根目录到路径
|
|
12
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
13
|
-
|
|
14
|
-
from crawlo.settings.setting_manager import SettingManager
|
|
15
|
-
|
|
16
|
-
def debug_pipelines():
|
|
17
|
-
"""调试管道配置"""
|
|
18
|
-
print("调试管道配置合并...")
|
|
19
|
-
print("=" * 50)
|
|
20
|
-
|
|
21
|
-
# 用户自定义配置
|
|
22
|
-
user_config = {
|
|
23
|
-
'PIPELINES': [
|
|
24
|
-
'myproject.pipelines.CustomPipeline',
|
|
25
|
-
]
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
settings = SettingManager(user_config)
|
|
29
|
-
|
|
30
|
-
# 获取合并后的管道列表
|
|
31
|
-
pipelines = settings.get('PIPELINES')
|
|
32
|
-
|
|
33
|
-
print("合并后的管道列表:")
|
|
34
|
-
for i, pipeline in enumerate(pipelines):
|
|
35
|
-
print(f" {i}: {pipeline}")
|
|
36
|
-
|
|
37
|
-
print()
|
|
38
|
-
print("默认去重管道:")
|
|
39
|
-
dedup_pipeline = settings.get('DEFAULT_DEDUP_PIPELINE')
|
|
40
|
-
print(f" {dedup_pipeline}")
|
|
41
|
-
|
|
42
|
-
print()
|
|
43
|
-
print("框架默认管道:")
|
|
44
|
-
default_pipelines = settings.get('PIPELINES', []) # 直接获取PIPELINES,它已经包含了默认管道
|
|
45
|
-
# 从合并后的管道中移除去重管道,得到框架默认管道
|
|
46
|
-
if dedup_pipeline:
|
|
47
|
-
default_pipelines_without_dedup = [p for p in default_pipelines if p != dedup_pipeline]
|
|
48
|
-
for i, pipeline in enumerate(default_pipelines_without_dedup):
|
|
49
|
-
print(f" {i}: {pipeline}")
|
|
50
|
-
else:
|
|
51
|
-
for i, pipeline in enumerate(default_pipelines):
|
|
52
|
-
print(f" {i}: {pipeline}")
|
|
53
|
-
|
|
54
|
-
print()
|
|
55
|
-
print("自定义管道:")
|
|
56
|
-
custom_pipelines = settings.get('PIPELINES')
|
|
57
|
-
# 从合并后的管道中移除默认管道,得到自定义管道
|
|
58
|
-
default_pipelines_list = [
|
|
59
|
-
'crawlo.pipelines.console_pipeline.ConsolePipeline',
|
|
60
|
-
'crawlo.pipelines.memory_dedup_pipeline.MemoryDedupPipeline'
|
|
61
|
-
]
|
|
62
|
-
custom_pipelines_list = [p for p in custom_pipelines if p not in default_pipelines_list]
|
|
63
|
-
for i, pipeline in enumerate(custom_pipelines_list):
|
|
64
|
-
print(f" {i}: {pipeline}")
|
|
65
|
-
|
|
66
|
-
if __name__ == "__main__":
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
调试管道配置
|
|
5
|
+
查看实际的管道配置合并结果
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
# 添加项目根目录到路径
|
|
12
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
13
|
+
|
|
14
|
+
from crawlo.settings.setting_manager import SettingManager
|
|
15
|
+
|
|
16
|
+
def debug_pipelines():
|
|
17
|
+
"""调试管道配置"""
|
|
18
|
+
print("调试管道配置合并...")
|
|
19
|
+
print("=" * 50)
|
|
20
|
+
|
|
21
|
+
# 用户自定义配置
|
|
22
|
+
user_config = {
|
|
23
|
+
'PIPELINES': [
|
|
24
|
+
'myproject.pipelines.CustomPipeline',
|
|
25
|
+
]
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
settings = SettingManager(user_config)
|
|
29
|
+
|
|
30
|
+
# 获取合并后的管道列表
|
|
31
|
+
pipelines = settings.get('PIPELINES')
|
|
32
|
+
|
|
33
|
+
print("合并后的管道列表:")
|
|
34
|
+
for i, pipeline in enumerate(pipelines):
|
|
35
|
+
print(f" {i}: {pipeline}")
|
|
36
|
+
|
|
37
|
+
print()
|
|
38
|
+
print("默认去重管道:")
|
|
39
|
+
dedup_pipeline = settings.get('DEFAULT_DEDUP_PIPELINE')
|
|
40
|
+
print(f" {dedup_pipeline}")
|
|
41
|
+
|
|
42
|
+
print()
|
|
43
|
+
print("框架默认管道:")
|
|
44
|
+
default_pipelines = settings.get('PIPELINES', []) # 直接获取PIPELINES,它已经包含了默认管道
|
|
45
|
+
# 从合并后的管道中移除去重管道,得到框架默认管道
|
|
46
|
+
if dedup_pipeline:
|
|
47
|
+
default_pipelines_without_dedup = [p for p in default_pipelines if p != dedup_pipeline]
|
|
48
|
+
for i, pipeline in enumerate(default_pipelines_without_dedup):
|
|
49
|
+
print(f" {i}: {pipeline}")
|
|
50
|
+
else:
|
|
51
|
+
for i, pipeline in enumerate(default_pipelines):
|
|
52
|
+
print(f" {i}: {pipeline}")
|
|
53
|
+
|
|
54
|
+
print()
|
|
55
|
+
print("自定义管道:")
|
|
56
|
+
custom_pipelines = settings.get('PIPELINES')
|
|
57
|
+
# 从合并后的管道中移除默认管道,得到自定义管道
|
|
58
|
+
default_pipelines_list = [
|
|
59
|
+
'crawlo.pipelines.console_pipeline.ConsolePipeline',
|
|
60
|
+
'crawlo.pipelines.memory_dedup_pipeline.MemoryDedupPipeline'
|
|
61
|
+
]
|
|
62
|
+
custom_pipelines_list = [p for p in custom_pipelines if p not in default_pipelines_list]
|
|
63
|
+
for i, pipeline in enumerate(custom_pipelines_list):
|
|
64
|
+
print(f" {i}: {pipeline}")
|
|
65
|
+
|
|
66
|
+
if __name__ == "__main__":
|
|
67
67
|
debug_pipelines()
|