crawlo 1.4.6__py3-none-any.whl → 1.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +90 -89
- crawlo/__version__.py +1 -1
- crawlo/cli.py +75 -75
- crawlo/commands/__init__.py +14 -14
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +186 -186
- crawlo/commands/help.py +140 -138
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +379 -341
- crawlo/commands/startproject.py +460 -460
- crawlo/commands/stats.py +187 -187
- crawlo/commands/utils.py +196 -196
- crawlo/config.py +320 -312
- crawlo/config_validator.py +277 -277
- crawlo/core/__init__.py +52 -52
- crawlo/core/engine.py +451 -438
- crawlo/core/processor.py +47 -47
- crawlo/core/scheduler.py +290 -291
- crawlo/crawler.py +698 -657
- crawlo/data/__init__.py +5 -5
- crawlo/data/user_agents.py +194 -194
- crawlo/downloader/__init__.py +280 -276
- crawlo/downloader/aiohttp_downloader.py +233 -233
- crawlo/downloader/cffi_downloader.py +250 -247
- crawlo/downloader/httpx_downloader.py +265 -259
- crawlo/downloader/hybrid_downloader.py +212 -212
- crawlo/downloader/playwright_downloader.py +425 -402
- crawlo/downloader/selenium_downloader.py +486 -472
- crawlo/event.py +45 -11
- crawlo/exceptions.py +215 -82
- crawlo/extension/__init__.py +65 -64
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +94 -94
- crawlo/extension/log_stats.py +70 -70
- crawlo/extension/logging_extension.py +53 -61
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/factories/__init__.py +27 -27
- crawlo/factories/base.py +68 -68
- crawlo/factories/crawler.py +104 -103
- crawlo/factories/registry.py +84 -84
- crawlo/factories/utils.py +135 -0
- crawlo/filters/__init__.py +170 -153
- crawlo/filters/aioredis_filter.py +348 -264
- crawlo/filters/memory_filter.py +261 -276
- crawlo/framework.py +306 -292
- crawlo/initialization/__init__.py +44 -44
- crawlo/initialization/built_in.py +391 -434
- crawlo/initialization/context.py +141 -141
- crawlo/initialization/core.py +240 -194
- crawlo/initialization/phases.py +230 -149
- crawlo/initialization/registry.py +143 -145
- crawlo/initialization/utils.py +49 -0
- crawlo/interfaces.py +23 -23
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +23 -23
- crawlo/items/fields.py +52 -52
- crawlo/items/items.py +104 -104
- crawlo/logging/__init__.py +42 -46
- crawlo/logging/config.py +277 -197
- crawlo/logging/factory.py +175 -171
- crawlo/logging/manager.py +104 -112
- crawlo/middleware/__init__.py +87 -24
- crawlo/middleware/default_header.py +132 -132
- crawlo/middleware/download_delay.py +104 -104
- crawlo/middleware/middleware_manager.py +142 -142
- crawlo/middleware/offsite.py +123 -123
- crawlo/middleware/proxy.py +209 -209
- crawlo/middleware/request_ignore.py +86 -86
- crawlo/middleware/response_code.py +150 -150
- crawlo/middleware/response_filter.py +136 -136
- crawlo/middleware/retry.py +124 -124
- crawlo/mode_manager.py +287 -253
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +375 -379
- crawlo/network/response.py +569 -664
- crawlo/pipelines/__init__.py +53 -22
- crawlo/pipelines/base_pipeline.py +452 -0
- crawlo/pipelines/bloom_dedup_pipeline.py +146 -146
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +197 -197
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +105 -105
- crawlo/pipelines/mongo_pipeline.py +140 -132
- crawlo/pipelines/mysql_pipeline.py +469 -476
- crawlo/pipelines/pipeline_manager.py +100 -100
- crawlo/pipelines/redis_dedup_pipeline.py +155 -156
- crawlo/project.py +347 -347
- crawlo/queue/__init__.py +10 -0
- crawlo/queue/pqueue.py +38 -38
- crawlo/queue/queue_manager.py +591 -525
- crawlo/queue/redis_priority_queue.py +519 -370
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +284 -277
- crawlo/settings/setting_manager.py +219 -219
- crawlo/spider/__init__.py +657 -657
- crawlo/stats_collector.py +81 -81
- crawlo/subscriber.py +129 -129
- crawlo/task_manager.py +138 -138
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +2 -4
- crawlo/templates/project/items.py.tmpl +13 -17
- crawlo/templates/project/middlewares.py.tmpl +38 -38
- crawlo/templates/project/pipelines.py.tmpl +35 -36
- crawlo/templates/project/settings.py.tmpl +109 -111
- crawlo/templates/project/settings_distributed.py.tmpl +156 -159
- crawlo/templates/project/settings_gentle.py.tmpl +170 -176
- crawlo/templates/project/settings_high_performance.py.tmpl +171 -177
- crawlo/templates/project/settings_minimal.py.tmpl +98 -100
- crawlo/templates/project/settings_simple.py.tmpl +168 -174
- crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
- crawlo/templates/run.py.tmpl +23 -23
- crawlo/templates/spider/spider.py.tmpl +32 -40
- crawlo/templates/spiders_init.py.tmpl +5 -10
- crawlo/tools/__init__.py +86 -189
- crawlo/tools/date_tools.py +289 -289
- crawlo/tools/distributed_coordinator.py +384 -384
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/tools/text_cleaner.py +232 -232
- crawlo/utils/__init__.py +50 -50
- crawlo/utils/batch_processor.py +276 -259
- crawlo/utils/config_manager.py +442 -0
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/db_helper.py +250 -250
- crawlo/utils/error_handler.py +410 -410
- crawlo/utils/fingerprint.py +121 -121
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_helper.py +344 -344
- crawlo/utils/leak_detector.py +335 -0
- crawlo/utils/log.py +79 -79
- crawlo/utils/misc.py +81 -81
- crawlo/utils/mongo_connection_pool.py +157 -0
- crawlo/utils/mysql_connection_pool.py +197 -0
- crawlo/utils/performance_monitor.py +285 -285
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_checker.py +91 -0
- crawlo/utils/redis_connection_pool.py +578 -388
- crawlo/utils/redis_key_validator.py +198 -198
- crawlo/utils/request.py +278 -256
- crawlo/utils/request_serializer.py +225 -225
- crawlo/utils/resource_manager.py +337 -0
- crawlo/utils/selector_helper.py +137 -137
- crawlo/utils/singleton.py +70 -0
- crawlo/utils/spider_loader.py +201 -201
- crawlo/utils/text_helper.py +94 -94
- crawlo/utils/{url.py → url_utils.py} +39 -39
- crawlo-1.4.7.dist-info/METADATA +689 -0
- crawlo-1.4.7.dist-info/RECORD +347 -0
- examples/__init__.py +7 -7
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +217 -275
- tests/authenticated_proxy_example.py +110 -110
- tests/baidu_performance_test.py +108 -108
- tests/baidu_test.py +59 -59
- tests/bug_check_test.py +250 -250
- tests/cleaners_example.py +160 -160
- tests/comprehensive_framework_test.py +212 -212
- tests/comprehensive_test.py +81 -81
- tests/comprehensive_testing_summary.md +186 -186
- tests/config_validation_demo.py +142 -142
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/debug_configure.py +69 -69
- tests/debug_framework_logger.py +84 -84
- tests/debug_log_config.py +126 -126
- tests/debug_log_levels.py +63 -63
- tests/debug_pipelines.py +66 -66
- tests/detailed_log_test.py +233 -233
- tests/direct_selector_helper_test.py +96 -96
- tests/distributed_dedup_test.py +467 -0
- tests/distributed_test.py +66 -66
- tests/distributed_test_debug.py +76 -76
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/error_handling_example.py +171 -171
- tests/explain_mysql_update_behavior.py +76 -76
- tests/final_comprehensive_test.py +151 -151
- tests/final_log_test.py +260 -260
- tests/final_validation_test.py +182 -182
- tests/fix_log_test.py +142 -142
- tests/framework_performance_test.py +202 -202
- tests/log_buffering_test.py +111 -111
- tests/log_generation_timing_test.py +153 -153
- tests/monitor_redis_dedup.sh +72 -0
- tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -12
- tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -100
- tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -13
- tests/ofweek_scrapy/ofweek_scrapy/settings.py +84 -84
- tests/ofweek_scrapy/scrapy.cfg +11 -11
- tests/optimized_performance_test.py +211 -211
- tests/performance_comparison.py +244 -244
- tests/queue_blocking_test.py +113 -113
- tests/queue_test.py +89 -89
- tests/redis_key_validation_demo.py +130 -130
- tests/request_params_example.py +150 -150
- tests/response_improvements_example.py +144 -144
- tests/scrapy_comparison/ofweek_scrapy.py +138 -138
- tests/scrapy_comparison/scrapy_test.py +133 -133
- tests/simple_cli_test.py +55 -0
- tests/simple_command_test.py +119 -119
- tests/simple_crawlo_test.py +126 -126
- tests/simple_follow_test.py +38 -38
- tests/simple_log_test2.py +137 -137
- tests/simple_optimization_test.py +128 -128
- tests/simple_queue_type_test.py +41 -41
- tests/simple_response_selector_test.py +94 -94
- tests/simple_selector_helper_test.py +154 -154
- tests/simple_selector_test.py +207 -207
- tests/simple_spider_test.py +49 -49
- tests/simple_url_test.py +73 -73
- tests/simulate_mysql_update_test.py +139 -139
- tests/spider_log_timing_test.py +177 -177
- tests/test_advanced_tools.py +148 -148
- tests/test_all_commands.py +230 -230
- tests/test_all_pipeline_fingerprints.py +133 -133
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_asyncmy_usage.py +56 -56
- tests/test_batch_processor.py +178 -178
- tests/test_cleaners.py +54 -54
- tests/test_cli_arguments.py +119 -0
- tests/test_component_factory.py +174 -174
- tests/test_config_consistency.py +80 -80
- tests/test_config_merge.py +152 -152
- tests/test_config_validator.py +182 -182
- tests/test_controlled_spider_mixin.py +79 -79
- tests/test_crawler_process_import.py +38 -38
- tests/test_crawler_process_spider_modules.py +47 -47
- tests/test_crawlo_proxy_integration.py +114 -114
- tests/test_date_tools.py +123 -123
- tests/test_dedup_fix.py +220 -220
- tests/test_dedup_pipeline_consistency.py +124 -124
- tests/test_default_header_middleware.py +313 -313
- tests/test_distributed.py +65 -65
- tests/test_double_crawlo_fix.py +204 -204
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_download_delay_middleware.py +221 -221
- tests/test_downloader_proxy_compatibility.py +272 -272
- tests/test_edge_cases.py +305 -305
- tests/test_encoding_core.py +56 -56
- tests/test_encoding_detection.py +126 -126
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_enhanced_error_handler_comprehensive.py +245 -245
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_factories.py +252 -252
- tests/test_factory_compatibility.py +196 -196
- tests/test_final_validation.py +153 -153
- tests/test_fingerprint_consistency.py +135 -135
- tests/test_fingerprint_simple.py +51 -51
- tests/test_get_component_logger.py +83 -83
- tests/test_hash_performance.py +99 -99
- tests/test_integration.py +169 -169
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_large_scale_helper.py +235 -235
- tests/test_logging_enhancements.py +374 -374
- tests/test_logging_final.py +184 -184
- tests/test_logging_integration.py +312 -312
- tests/test_logging_system.py +282 -282
- tests/test_middleware_debug.py +141 -141
- tests/test_mode_consistency.py +51 -51
- tests/test_multi_directory.py +67 -67
- tests/test_multiple_spider_modules.py +80 -80
- tests/test_mysql_pipeline_config.py +164 -164
- tests/test_mysql_pipeline_error.py +98 -98
- tests/test_mysql_pipeline_init_log.py +82 -82
- tests/test_mysql_pipeline_integration.py +132 -132
- tests/test_mysql_pipeline_refactor.py +143 -143
- tests/test_mysql_pipeline_refactor_simple.py +85 -85
- tests/test_mysql_pipeline_robustness.py +195 -195
- tests/test_mysql_pipeline_types.py +88 -88
- tests/test_mysql_update_columns.py +93 -93
- tests/test_offsite_middleware.py +244 -244
- tests/test_offsite_middleware_simple.py +203 -203
- tests/test_optimized_selector_naming.py +100 -100
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_performance_monitor.py +115 -115
- tests/test_pipeline_fingerprint_consistency.py +86 -86
- tests/test_priority_behavior.py +211 -211
- tests/test_priority_consistency.py +151 -151
- tests/test_priority_consistency_fixed.py +249 -249
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware.py +217 -217
- tests/test_proxy_middleware_enhanced.py +212 -212
- tests/test_proxy_middleware_integration.py +142 -142
- tests/test_proxy_middleware_refactored.py +207 -207
- tests/test_proxy_only.py +83 -83
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_proxy_with_downloader.py +152 -152
- tests/test_queue_empty_check.py +41 -41
- tests/test_queue_manager_double_crawlo.py +173 -173
- tests/test_queue_manager_redis_key.py +179 -179
- tests/test_queue_naming.py +154 -154
- tests/test_queue_type.py +106 -106
- tests/test_queue_type_redis_config_consistency.py +130 -130
- tests/test_random_headers_default.py +322 -322
- tests/test_random_headers_necessity.py +308 -308
- tests/test_random_user_agent.py +72 -72
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_redis_queue_name_fix.py +175 -175
- tests/test_redis_queue_type_fallback.py +129 -129
- tests/test_request_ignore_middleware.py +182 -182
- tests/test_request_params.py +111 -111
- tests/test_request_serialization.py +70 -70
- tests/test_response_code_middleware.py +349 -349
- tests/test_response_filter_middleware.py +427 -427
- tests/test_response_follow.py +104 -104
- tests/test_response_improvements.py +152 -152
- tests/test_response_selector_methods.py +92 -92
- tests/test_response_url_methods.py +70 -70
- tests/test_response_urljoin.py +86 -86
- tests/test_retry_middleware.py +333 -333
- tests/test_retry_middleware_realistic.py +273 -273
- tests/test_scheduler.py +252 -252
- tests/test_scheduler_config_update.py +133 -133
- tests/test_scrapy_style_encoding.py +112 -112
- tests/test_selector_helper.py +100 -100
- tests/test_selector_optimizations.py +146 -146
- tests/test_simple_response.py +61 -61
- tests/test_spider_loader.py +49 -49
- tests/test_spider_loader_comprehensive.py +69 -69
- tests/test_spider_modules.py +84 -84
- tests/test_spiders/test_spider.py +9 -9
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +159 -159
- tests/test_user_agent_randomness.py +176 -176
- tests/test_user_agents.py +96 -96
- tests/untested_features_report.md +138 -138
- tests/verify_debug.py +51 -51
- tests/verify_distributed.py +117 -117
- tests/verify_log_fix.py +111 -111
- tests/verify_mysql_warnings.py +109 -109
- crawlo/logging/async_handler.py +0 -181
- crawlo/logging/monitor.py +0 -153
- crawlo/logging/sampler.py +0 -167
- crawlo/tools/authenticated_proxy.py +0 -241
- crawlo/tools/data_formatter.py +0 -226
- crawlo/tools/data_validator.py +0 -181
- crawlo/tools/encoding_converter.py +0 -127
- crawlo/tools/network_diagnostic.py +0 -365
- crawlo/tools/request_tools.py +0 -83
- crawlo/tools/retry_mechanism.py +0 -224
- crawlo/utils/env_config.py +0 -143
- crawlo/utils/large_scale_config.py +0 -287
- crawlo/utils/system.py +0 -11
- crawlo/utils/tools.py +0 -5
- crawlo-1.4.6.dist-info/METADATA +0 -329
- crawlo-1.4.6.dist-info/RECORD +0 -361
- tests/env_config_example.py +0 -134
- tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +0 -162
- tests/test_authenticated_proxy.py +0 -142
- tests/test_comprehensive.py +0 -147
- tests/test_dynamic_downloaders_proxy.py +0 -125
- tests/test_dynamic_proxy.py +0 -93
- tests/test_dynamic_proxy_config.py +0 -147
- tests/test_dynamic_proxy_real.py +0 -110
- tests/test_env_config.py +0 -122
- tests/test_framework_env_usage.py +0 -104
- tests/test_large_scale_config.py +0 -113
- tests/test_proxy_api.py +0 -265
- tests/test_real_scenario_proxy.py +0 -196
- tests/tools_example.py +0 -261
- {crawlo-1.4.6.dist-info → crawlo-1.4.7.dist-info}/WHEEL +0 -0
- {crawlo-1.4.6.dist-info → crawlo-1.4.7.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.6.dist-info → crawlo-1.4.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
测试CLI参数解析
|
|
5
|
+
验证crawlo run命令是否正确解析--log-level、--config和--concurrency参数
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
import unittest
|
|
11
|
+
from unittest.mock import patch, MagicMock
|
|
12
|
+
|
|
13
|
+
# 添加项目根目录到Python路径
|
|
14
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
15
|
+
|
|
16
|
+
from crawlo.commands.run import main as run_main
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestCLIArguments(unittest.TestCase):
|
|
20
|
+
|
|
21
|
+
def setUp(self):
|
|
22
|
+
"""测试前准备"""
|
|
23
|
+
# 保存原始的sys.path
|
|
24
|
+
self.original_path = sys.path[:]
|
|
25
|
+
|
|
26
|
+
def tearDown(self):
|
|
27
|
+
"""测试后清理"""
|
|
28
|
+
# 恢复原始的sys.path
|
|
29
|
+
sys.path = self.original_path[:]
|
|
30
|
+
|
|
31
|
+
@patch('crawlo.commands.run._find_project_root')
|
|
32
|
+
@patch('crawlo.commands.run.initialize_framework')
|
|
33
|
+
@patch('crawlo.commands.run.CrawlerProcess')
|
|
34
|
+
def test_log_level_argument(self, mock_crawler_process, mock_initialize, mock_find_project):
|
|
35
|
+
"""测试--log-level参数解析"""
|
|
36
|
+
# 模拟项目环境
|
|
37
|
+
mock_find_project.return_value = os.path.join(os.path.dirname(__file__), '..')
|
|
38
|
+
mock_initialize.return_value = {}
|
|
39
|
+
mock_process_instance = MagicMock()
|
|
40
|
+
mock_crawler_process.return_value = mock_process_instance
|
|
41
|
+
mock_process_instance.get_spider_names.return_value = ['test_spider']
|
|
42
|
+
mock_process_instance.is_spider_registered.return_value = True
|
|
43
|
+
mock_process_instance.get_spider_class.return_value = MagicMock(__name__='TestSpider')
|
|
44
|
+
|
|
45
|
+
# 测试参数解析
|
|
46
|
+
args = ['test_spider', '--log-level', 'DEBUG']
|
|
47
|
+
result = run_main(args)
|
|
48
|
+
|
|
49
|
+
# 验证initialize_framework被调用时传入了正确的日志级别
|
|
50
|
+
mock_initialize.assert_called_once()
|
|
51
|
+
call_args = mock_initialize.call_args
|
|
52
|
+
if call_args and call_args[0]: # 检查位置参数
|
|
53
|
+
settings = call_args[0][0]
|
|
54
|
+
self.assertEqual(settings.get('LOG_LEVEL'), 'DEBUG')
|
|
55
|
+
elif call_args and call_args[1]: # 检查关键字参数
|
|
56
|
+
settings = call_args[1].get('custom_settings', {})
|
|
57
|
+
self.assertEqual(settings.get('LOG_LEVEL'), 'DEBUG')
|
|
58
|
+
|
|
59
|
+
@patch('crawlo.commands.run._find_project_root')
|
|
60
|
+
@patch('crawlo.commands.run.initialize_framework')
|
|
61
|
+
@patch('crawlo.commands.run.CrawlerProcess')
|
|
62
|
+
def test_concurrency_argument(self, mock_crawler_process, mock_initialize, mock_find_project):
|
|
63
|
+
"""测试--concurrency参数解析"""
|
|
64
|
+
# 模拟项目环境
|
|
65
|
+
mock_find_project.return_value = os.path.join(os.path.dirname(__file__), '..')
|
|
66
|
+
mock_initialize.return_value = {}
|
|
67
|
+
mock_process_instance = MagicMock()
|
|
68
|
+
mock_crawler_process.return_value = mock_process_instance
|
|
69
|
+
mock_process_instance.get_spider_names.return_value = ['test_spider']
|
|
70
|
+
mock_process_instance.is_spider_registered.return_value = True
|
|
71
|
+
mock_process_instance.get_spider_class.return_value = MagicMock(__name__='TestSpider')
|
|
72
|
+
|
|
73
|
+
# 测试参数解析
|
|
74
|
+
args = ['test_spider', '--concurrency', '32']
|
|
75
|
+
result = run_main(args)
|
|
76
|
+
|
|
77
|
+
# 验证initialize_framework被调用时传入了正确的并发数
|
|
78
|
+
mock_initialize.assert_called_once()
|
|
79
|
+
call_args = mock_initialize.call_args
|
|
80
|
+
if call_args and call_args[0]: # 检查位置参数
|
|
81
|
+
settings = call_args[0][0]
|
|
82
|
+
self.assertEqual(settings.get('CONCURRENCY'), 32)
|
|
83
|
+
elif call_args and call_args[1]: # 检查关键字参数
|
|
84
|
+
settings = call_args[1].get('custom_settings', {})
|
|
85
|
+
self.assertEqual(settings.get('CONCURRENCY'), 32)
|
|
86
|
+
|
|
87
|
+
@patch('crawlo.commands.run._find_project_root')
|
|
88
|
+
@patch('crawlo.commands.run.initialize_framework')
|
|
89
|
+
@patch('crawlo.commands.run.CrawlerProcess')
|
|
90
|
+
def test_combined_arguments(self, mock_crawler_process, mock_initialize, mock_find_project):
|
|
91
|
+
"""测试组合参数解析"""
|
|
92
|
+
# 模拟项目环境
|
|
93
|
+
mock_find_project.return_value = os.path.join(os.path.dirname(__file__), '..')
|
|
94
|
+
mock_initialize.return_value = {}
|
|
95
|
+
mock_process_instance = MagicMock()
|
|
96
|
+
mock_crawler_process.return_value = mock_process_instance
|
|
97
|
+
mock_process_instance.get_spider_names.return_value = ['test_spider']
|
|
98
|
+
mock_process_instance.is_spider_registered.return_value = True
|
|
99
|
+
mock_process_instance.get_spider_class.return_value = MagicMock(__name__='TestSpider')
|
|
100
|
+
|
|
101
|
+
# 测试参数解析
|
|
102
|
+
args = ['test_spider', '--log-level', 'DEBUG', '--concurrency', '16']
|
|
103
|
+
result = run_main(args)
|
|
104
|
+
|
|
105
|
+
# 验证initialize_framework被调用时传入了正确的参数
|
|
106
|
+
mock_initialize.assert_called_once()
|
|
107
|
+
call_args = mock_initialize.call_args
|
|
108
|
+
if call_args and call_args[0]: # 检查位置参数
|
|
109
|
+
settings = call_args[0][0]
|
|
110
|
+
self.assertEqual(settings.get('LOG_LEVEL'), 'DEBUG')
|
|
111
|
+
self.assertEqual(settings.get('CONCURRENCY'), 16)
|
|
112
|
+
elif call_args and call_args[1]: # 检查关键字参数
|
|
113
|
+
settings = call_args[1].get('custom_settings', {})
|
|
114
|
+
self.assertEqual(settings.get('LOG_LEVEL'), 'DEBUG')
|
|
115
|
+
self.assertEqual(settings.get('CONCURRENCY'), 16)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == '__main__':
|
|
119
|
+
unittest.main()
|
tests/test_component_factory.py
CHANGED
|
@@ -1,175 +1,175 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
组件工厂测试
|
|
5
|
-
"""
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
import unittest
|
|
9
|
-
|
|
10
|
-
# 添加项目根目录到 Python 路径
|
|
11
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
12
|
-
|
|
13
|
-
from crawlo.factories import (
|
|
14
|
-
ComponentRegistry,
|
|
15
|
-
ComponentFactory,
|
|
16
|
-
ComponentSpec,
|
|
17
|
-
DefaultComponentFactory,
|
|
18
|
-
CrawlerComponentFactory,
|
|
19
|
-
get_component_registry
|
|
20
|
-
)
|
|
21
|
-
from crawlo.factories.base import ComponentSpec
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class TestComponent:
|
|
25
|
-
"""测试组件类"""
|
|
26
|
-
def __init__(self, name="test_component", value=42):
|
|
27
|
-
self.name = name
|
|
28
|
-
self.value = value
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class TestComponentFactory(unittest.TestCase):
|
|
32
|
-
"""组件工厂测试类"""
|
|
33
|
-
|
|
34
|
-
def setUp(self):
|
|
35
|
-
"""测试前准备"""
|
|
36
|
-
self.registry = ComponentRegistry()
|
|
37
|
-
|
|
38
|
-
def test_component_spec_creation(self):
|
|
39
|
-
"""测试组件规范创建"""
|
|
40
|
-
def factory_func(**kwargs):
|
|
41
|
-
return TestComponent(**kwargs)
|
|
42
|
-
|
|
43
|
-
spec = ComponentSpec(
|
|
44
|
-
name="test_component",
|
|
45
|
-
component_type=TestComponent,
|
|
46
|
-
factory_func=factory_func,
|
|
47
|
-
dependencies=[],
|
|
48
|
-
singleton=False
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
self.assertEqual(spec.name, "test_component")
|
|
52
|
-
self.assertEqual(spec.component_type, TestComponent)
|
|
53
|
-
self.assertEqual(spec.dependencies, [])
|
|
54
|
-
self.assertFalse(spec.singleton)
|
|
55
|
-
|
|
56
|
-
def test_default_component_factory(self):
|
|
57
|
-
"""测试默认组件工厂"""
|
|
58
|
-
factory = DefaultComponentFactory()
|
|
59
|
-
|
|
60
|
-
def factory_func(**kwargs):
|
|
61
|
-
return TestComponent(**kwargs)
|
|
62
|
-
|
|
63
|
-
spec = ComponentSpec(
|
|
64
|
-
name="test_component",
|
|
65
|
-
component_type=TestComponent,
|
|
66
|
-
factory_func=factory_func
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
# 测试创建组件
|
|
70
|
-
component = factory.create(spec, name="created_component", value=100)
|
|
71
|
-
self.assertIsInstance(component, TestComponent)
|
|
72
|
-
self.assertEqual(component.name, "created_component")
|
|
73
|
-
self.assertEqual(component.value, 100)
|
|
74
|
-
|
|
75
|
-
# 测试单例模式
|
|
76
|
-
spec_singleton = ComponentSpec(
|
|
77
|
-
name="singleton_component",
|
|
78
|
-
component_type=TestComponent,
|
|
79
|
-
factory_func=factory_func,
|
|
80
|
-
singleton=True
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
component1 = factory.create(spec_singleton, name="singleton_1", value=200)
|
|
84
|
-
component2 = factory.create(spec_singleton, name="singleton_2", value=300)
|
|
85
|
-
|
|
86
|
-
# 单例应该返回相同的实例
|
|
87
|
-
self.assertIs(component1, component2)
|
|
88
|
-
self.assertEqual(component1.value, 200) # 应该是第一次创建时的值
|
|
89
|
-
|
|
90
|
-
def test_component_registry_registration(self):
|
|
91
|
-
"""测试组件注册表注册功能"""
|
|
92
|
-
def factory_func(**kwargs):
|
|
93
|
-
return TestComponent(**kwargs)
|
|
94
|
-
|
|
95
|
-
spec = ComponentSpec(
|
|
96
|
-
name="registered_component",
|
|
97
|
-
component_type=TestComponent,
|
|
98
|
-
factory_func=factory_func
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
# 注册组件规范
|
|
102
|
-
self.registry.register(spec)
|
|
103
|
-
|
|
104
|
-
# 验证注册
|
|
105
|
-
retrieved_spec = self.registry.get_spec("registered_component")
|
|
106
|
-
self.assertEqual(retrieved_spec, spec)
|
|
107
|
-
|
|
108
|
-
# 测试列出组件
|
|
109
|
-
components = self.registry.list_components()
|
|
110
|
-
self.assertIn("registered_component", components)
|
|
111
|
-
|
|
112
|
-
def test_component_registry_creation(self):
|
|
113
|
-
"""测试组件注册表创建功能"""
|
|
114
|
-
def factory_func(**kwargs):
|
|
115
|
-
return TestComponent(**kwargs)
|
|
116
|
-
|
|
117
|
-
spec = ComponentSpec(
|
|
118
|
-
name="creatable_component",
|
|
119
|
-
component_type=TestComponent,
|
|
120
|
-
factory_func=factory_func
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
# 注册组件规范
|
|
124
|
-
self.registry.register(spec)
|
|
125
|
-
|
|
126
|
-
# 创建组件
|
|
127
|
-
component = self.registry.create("creatable_component", name="created", value=500)
|
|
128
|
-
self.assertIsInstance(component, TestComponent)
|
|
129
|
-
self.assertEqual(component.name, "created")
|
|
130
|
-
self.assertEqual(component.value, 500)
|
|
131
|
-
|
|
132
|
-
def test_global_component_registry(self):
|
|
133
|
-
"""测试全局组件注册表"""
|
|
134
|
-
registry = get_component_registry()
|
|
135
|
-
self.assertIsInstance(registry, ComponentRegistry)
|
|
136
|
-
|
|
137
|
-
# 测试注册表是否包含预注册的组件
|
|
138
|
-
components = registry.list_components()
|
|
139
|
-
# 应该至少包含crawler组件
|
|
140
|
-
self.assertGreater(len(components), 0)
|
|
141
|
-
|
|
142
|
-
def test_crawler_component_factory(self):
|
|
143
|
-
"""测试Crawler组件工厂"""
|
|
144
|
-
factory = CrawlerComponentFactory()
|
|
145
|
-
|
|
146
|
-
# 测试支持检查
|
|
147
|
-
class MockEngine:
|
|
148
|
-
pass
|
|
149
|
-
|
|
150
|
-
self.assertTrue(factory.supports(MockEngine)) # 默认支持所有类型
|
|
151
|
-
|
|
152
|
-
# 测试创建功能(需要crawler依赖)
|
|
153
|
-
def mock_engine_factory(crawler=None, **kwargs):
|
|
154
|
-
if crawler is None:
|
|
155
|
-
raise ValueError("需要crawler实例")
|
|
156
|
-
return "mock_engine"
|
|
157
|
-
|
|
158
|
-
spec = ComponentSpec(
|
|
159
|
-
name="mock_engine",
|
|
160
|
-
component_type=type('MockEngine', (), {}),
|
|
161
|
-
factory_func=mock_engine_factory,
|
|
162
|
-
dependencies=['crawler']
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
# 测试缺少依赖时的错误处理
|
|
166
|
-
with self.assertRaises(ValueError):
|
|
167
|
-
factory.create(spec)
|
|
168
|
-
|
|
169
|
-
# 测试带依赖的创建
|
|
170
|
-
result = factory.create(spec, crawler="mock_crawler")
|
|
171
|
-
self.assertEqual(result, "mock_engine")
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
if __name__ == '__main__':
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
组件工厂测试
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
10
|
+
# 添加项目根目录到 Python 路径
|
|
11
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
12
|
+
|
|
13
|
+
from crawlo.factories import (
|
|
14
|
+
ComponentRegistry,
|
|
15
|
+
ComponentFactory,
|
|
16
|
+
ComponentSpec,
|
|
17
|
+
DefaultComponentFactory,
|
|
18
|
+
CrawlerComponentFactory,
|
|
19
|
+
get_component_registry
|
|
20
|
+
)
|
|
21
|
+
from crawlo.factories.base import ComponentSpec
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestComponent:
|
|
25
|
+
"""测试组件类"""
|
|
26
|
+
def __init__(self, name="test_component", value=42):
|
|
27
|
+
self.name = name
|
|
28
|
+
self.value = value
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestComponentFactory(unittest.TestCase):
|
|
32
|
+
"""组件工厂测试类"""
|
|
33
|
+
|
|
34
|
+
def setUp(self):
|
|
35
|
+
"""测试前准备"""
|
|
36
|
+
self.registry = ComponentRegistry()
|
|
37
|
+
|
|
38
|
+
def test_component_spec_creation(self):
|
|
39
|
+
"""测试组件规范创建"""
|
|
40
|
+
def factory_func(**kwargs):
|
|
41
|
+
return TestComponent(**kwargs)
|
|
42
|
+
|
|
43
|
+
spec = ComponentSpec(
|
|
44
|
+
name="test_component",
|
|
45
|
+
component_type=TestComponent,
|
|
46
|
+
factory_func=factory_func,
|
|
47
|
+
dependencies=[],
|
|
48
|
+
singleton=False
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
self.assertEqual(spec.name, "test_component")
|
|
52
|
+
self.assertEqual(spec.component_type, TestComponent)
|
|
53
|
+
self.assertEqual(spec.dependencies, [])
|
|
54
|
+
self.assertFalse(spec.singleton)
|
|
55
|
+
|
|
56
|
+
def test_default_component_factory(self):
|
|
57
|
+
"""测试默认组件工厂"""
|
|
58
|
+
factory = DefaultComponentFactory()
|
|
59
|
+
|
|
60
|
+
def factory_func(**kwargs):
|
|
61
|
+
return TestComponent(**kwargs)
|
|
62
|
+
|
|
63
|
+
spec = ComponentSpec(
|
|
64
|
+
name="test_component",
|
|
65
|
+
component_type=TestComponent,
|
|
66
|
+
factory_func=factory_func
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# 测试创建组件
|
|
70
|
+
component = factory.create(spec, name="created_component", value=100)
|
|
71
|
+
self.assertIsInstance(component, TestComponent)
|
|
72
|
+
self.assertEqual(component.name, "created_component")
|
|
73
|
+
self.assertEqual(component.value, 100)
|
|
74
|
+
|
|
75
|
+
# 测试单例模式
|
|
76
|
+
spec_singleton = ComponentSpec(
|
|
77
|
+
name="singleton_component",
|
|
78
|
+
component_type=TestComponent,
|
|
79
|
+
factory_func=factory_func,
|
|
80
|
+
singleton=True
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
component1 = factory.create(spec_singleton, name="singleton_1", value=200)
|
|
84
|
+
component2 = factory.create(spec_singleton, name="singleton_2", value=300)
|
|
85
|
+
|
|
86
|
+
# 单例应该返回相同的实例
|
|
87
|
+
self.assertIs(component1, component2)
|
|
88
|
+
self.assertEqual(component1.value, 200) # 应该是第一次创建时的值
|
|
89
|
+
|
|
90
|
+
def test_component_registry_registration(self):
|
|
91
|
+
"""测试组件注册表注册功能"""
|
|
92
|
+
def factory_func(**kwargs):
|
|
93
|
+
return TestComponent(**kwargs)
|
|
94
|
+
|
|
95
|
+
spec = ComponentSpec(
|
|
96
|
+
name="registered_component",
|
|
97
|
+
component_type=TestComponent,
|
|
98
|
+
factory_func=factory_func
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# 注册组件规范
|
|
102
|
+
self.registry.register(spec)
|
|
103
|
+
|
|
104
|
+
# 验证注册
|
|
105
|
+
retrieved_spec = self.registry.get_spec("registered_component")
|
|
106
|
+
self.assertEqual(retrieved_spec, spec)
|
|
107
|
+
|
|
108
|
+
# 测试列出组件
|
|
109
|
+
components = self.registry.list_components()
|
|
110
|
+
self.assertIn("registered_component", components)
|
|
111
|
+
|
|
112
|
+
def test_component_registry_creation(self):
|
|
113
|
+
"""测试组件注册表创建功能"""
|
|
114
|
+
def factory_func(**kwargs):
|
|
115
|
+
return TestComponent(**kwargs)
|
|
116
|
+
|
|
117
|
+
spec = ComponentSpec(
|
|
118
|
+
name="creatable_component",
|
|
119
|
+
component_type=TestComponent,
|
|
120
|
+
factory_func=factory_func
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# 注册组件规范
|
|
124
|
+
self.registry.register(spec)
|
|
125
|
+
|
|
126
|
+
# 创建组件
|
|
127
|
+
component = self.registry.create("creatable_component", name="created", value=500)
|
|
128
|
+
self.assertIsInstance(component, TestComponent)
|
|
129
|
+
self.assertEqual(component.name, "created")
|
|
130
|
+
self.assertEqual(component.value, 500)
|
|
131
|
+
|
|
132
|
+
def test_global_component_registry(self):
|
|
133
|
+
"""测试全局组件注册表"""
|
|
134
|
+
registry = get_component_registry()
|
|
135
|
+
self.assertIsInstance(registry, ComponentRegistry)
|
|
136
|
+
|
|
137
|
+
# 测试注册表是否包含预注册的组件
|
|
138
|
+
components = registry.list_components()
|
|
139
|
+
# 应该至少包含crawler组件
|
|
140
|
+
self.assertGreater(len(components), 0)
|
|
141
|
+
|
|
142
|
+
def test_crawler_component_factory(self):
|
|
143
|
+
"""测试Crawler组件工厂"""
|
|
144
|
+
factory = CrawlerComponentFactory()
|
|
145
|
+
|
|
146
|
+
# 测试支持检查
|
|
147
|
+
class MockEngine:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
self.assertTrue(factory.supports(MockEngine)) # 默认支持所有类型
|
|
151
|
+
|
|
152
|
+
# 测试创建功能(需要crawler依赖)
|
|
153
|
+
def mock_engine_factory(crawler=None, **kwargs):
|
|
154
|
+
if crawler is None:
|
|
155
|
+
raise ValueError("需要crawler实例")
|
|
156
|
+
return "mock_engine"
|
|
157
|
+
|
|
158
|
+
spec = ComponentSpec(
|
|
159
|
+
name="mock_engine",
|
|
160
|
+
component_type=type('MockEngine', (), {}),
|
|
161
|
+
factory_func=mock_engine_factory,
|
|
162
|
+
dependencies=['crawler']
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# 测试缺少依赖时的错误处理
|
|
166
|
+
with self.assertRaises(ValueError):
|
|
167
|
+
factory.create(spec)
|
|
168
|
+
|
|
169
|
+
# 测试带依赖的创建
|
|
170
|
+
result = factory.create(spec, crawler="mock_crawler")
|
|
171
|
+
self.assertEqual(result, "mock_engine")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == '__main__':
|
|
175
175
|
unittest.main()
|
tests/test_config_consistency.py
CHANGED
|
@@ -1,81 +1,81 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
测试配置一致性优化
|
|
5
|
-
"""
|
|
6
|
-
import asyncio
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
|
|
10
|
-
# 添加项目根目录到路径
|
|
11
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
12
|
-
|
|
13
|
-
from crawlo.project import get_settings
|
|
14
|
-
from crawlo.crawler import Crawler
|
|
15
|
-
from crawlo.spider import Spider
|
|
16
|
-
from crawlo.utils.log import get_logger
|
|
17
|
-
from crawlo import Request
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class TestSpider(Spider):
|
|
21
|
-
name = "test_spider"
|
|
22
|
-
|
|
23
|
-
def start_requests(self):
|
|
24
|
-
yield Request("https://example.com")
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
async def test_config_consistency():
|
|
28
|
-
"""测试配置一致性优化"""
|
|
29
|
-
print("测试配置一致性优化...")
|
|
30
|
-
|
|
31
|
-
# 模拟单机模式配置但Redis可用的情况
|
|
32
|
-
custom_settings = {
|
|
33
|
-
'QUEUE_TYPE': 'auto', # 自动检测模式
|
|
34
|
-
'CONCURRENCY': 4,
|
|
35
|
-
'DOWNLOAD_DELAY': 1.0,
|
|
36
|
-
'LOG_LEVEL': 'INFO'
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
try:
|
|
40
|
-
# 获取配置
|
|
41
|
-
settings = get_settings(custom_settings)
|
|
42
|
-
|
|
43
|
-
# 创建爬虫实例
|
|
44
|
-
crawler = Crawler(TestSpider, settings)
|
|
45
|
-
|
|
46
|
-
# 启动爬虫(这会触发调度器初始化)
|
|
47
|
-
print("开始初始化爬虫...")
|
|
48
|
-
await crawler.crawl()
|
|
49
|
-
|
|
50
|
-
print("配置一致性测试完成")
|
|
51
|
-
|
|
52
|
-
except Exception as e:
|
|
53
|
-
print(f"测试失败: {e}")
|
|
54
|
-
import traceback
|
|
55
|
-
traceback.print_exc()
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
async def main():
|
|
59
|
-
"""主测试函数"""
|
|
60
|
-
print("开始测试配置一致性优化...")
|
|
61
|
-
print("=" * 50)
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
await test_config_consistency()
|
|
65
|
-
|
|
66
|
-
print("=" * 50)
|
|
67
|
-
print("配置一致性优化测试完成!")
|
|
68
|
-
|
|
69
|
-
except Exception as e:
|
|
70
|
-
print("=" * 50)
|
|
71
|
-
print(f"测试失败: {e}")
|
|
72
|
-
import traceback
|
|
73
|
-
traceback.print_exc()
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
if __name__ == "__main__":
|
|
77
|
-
# 设置日志级别
|
|
78
|
-
import logging
|
|
79
|
-
logging.basicConfig(level=logging.INFO)
|
|
80
|
-
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
测试配置一致性优化
|
|
5
|
+
"""
|
|
6
|
+
import asyncio
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
# 添加项目根目录到路径
|
|
11
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
12
|
+
|
|
13
|
+
from crawlo.project import get_settings
|
|
14
|
+
from crawlo.crawler import Crawler
|
|
15
|
+
from crawlo.spider import Spider
|
|
16
|
+
from crawlo.utils.log import get_logger
|
|
17
|
+
from crawlo import Request
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TestSpider(Spider):
|
|
21
|
+
name = "test_spider"
|
|
22
|
+
|
|
23
|
+
def start_requests(self):
|
|
24
|
+
yield Request("https://example.com")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def test_config_consistency():
|
|
28
|
+
"""测试配置一致性优化"""
|
|
29
|
+
print("测试配置一致性优化...")
|
|
30
|
+
|
|
31
|
+
# 模拟单机模式配置但Redis可用的情况
|
|
32
|
+
custom_settings = {
|
|
33
|
+
'QUEUE_TYPE': 'auto', # 自动检测模式
|
|
34
|
+
'CONCURRENCY': 4,
|
|
35
|
+
'DOWNLOAD_DELAY': 1.0,
|
|
36
|
+
'LOG_LEVEL': 'INFO'
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# 获取配置
|
|
41
|
+
settings = get_settings(custom_settings)
|
|
42
|
+
|
|
43
|
+
# 创建爬虫实例
|
|
44
|
+
crawler = Crawler(TestSpider, settings)
|
|
45
|
+
|
|
46
|
+
# 启动爬虫(这会触发调度器初始化)
|
|
47
|
+
print("开始初始化爬虫...")
|
|
48
|
+
await crawler.crawl()
|
|
49
|
+
|
|
50
|
+
print("配置一致性测试完成")
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
print(f"测试失败: {e}")
|
|
54
|
+
import traceback
|
|
55
|
+
traceback.print_exc()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def main():
|
|
59
|
+
"""主测试函数"""
|
|
60
|
+
print("开始测试配置一致性优化...")
|
|
61
|
+
print("=" * 50)
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
await test_config_consistency()
|
|
65
|
+
|
|
66
|
+
print("=" * 50)
|
|
67
|
+
print("配置一致性优化测试完成!")
|
|
68
|
+
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print("=" * 50)
|
|
71
|
+
print(f"测试失败: {e}")
|
|
72
|
+
import traceback
|
|
73
|
+
traceback.print_exc()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == "__main__":
|
|
77
|
+
# 设置日志级别
|
|
78
|
+
import logging
|
|
79
|
+
logging.basicConfig(level=logging.INFO)
|
|
80
|
+
|
|
81
81
|
asyncio.run(main())
|