crawlo 1.4.6__py3-none-any.whl → 1.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +90 -89
- crawlo/__version__.py +1 -1
- crawlo/cli.py +75 -75
- crawlo/commands/__init__.py +14 -14
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +186 -186
- crawlo/commands/help.py +140 -138
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +379 -341
- crawlo/commands/startproject.py +460 -460
- crawlo/commands/stats.py +187 -187
- crawlo/commands/utils.py +196 -196
- crawlo/config.py +320 -312
- crawlo/config_validator.py +277 -277
- crawlo/core/__init__.py +52 -52
- crawlo/core/engine.py +451 -438
- crawlo/core/processor.py +47 -47
- crawlo/core/scheduler.py +290 -291
- crawlo/crawler.py +698 -657
- crawlo/data/__init__.py +5 -5
- crawlo/data/user_agents.py +194 -194
- crawlo/downloader/__init__.py +280 -276
- crawlo/downloader/aiohttp_downloader.py +233 -233
- crawlo/downloader/cffi_downloader.py +250 -247
- crawlo/downloader/httpx_downloader.py +265 -259
- crawlo/downloader/hybrid_downloader.py +212 -212
- crawlo/downloader/playwright_downloader.py +425 -402
- crawlo/downloader/selenium_downloader.py +486 -472
- crawlo/event.py +45 -11
- crawlo/exceptions.py +215 -82
- crawlo/extension/__init__.py +65 -64
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +94 -94
- crawlo/extension/log_stats.py +70 -70
- crawlo/extension/logging_extension.py +53 -61
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/factories/__init__.py +27 -27
- crawlo/factories/base.py +68 -68
- crawlo/factories/crawler.py +104 -103
- crawlo/factories/registry.py +84 -84
- crawlo/factories/utils.py +135 -0
- crawlo/filters/__init__.py +170 -153
- crawlo/filters/aioredis_filter.py +348 -264
- crawlo/filters/memory_filter.py +261 -276
- crawlo/framework.py +306 -292
- crawlo/initialization/__init__.py +44 -44
- crawlo/initialization/built_in.py +391 -434
- crawlo/initialization/context.py +141 -141
- crawlo/initialization/core.py +240 -194
- crawlo/initialization/phases.py +230 -149
- crawlo/initialization/registry.py +143 -145
- crawlo/initialization/utils.py +49 -0
- crawlo/interfaces.py +23 -23
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +23 -23
- crawlo/items/fields.py +52 -52
- crawlo/items/items.py +104 -104
- crawlo/logging/__init__.py +42 -46
- crawlo/logging/config.py +277 -197
- crawlo/logging/factory.py +175 -171
- crawlo/logging/manager.py +104 -112
- crawlo/middleware/__init__.py +87 -24
- crawlo/middleware/default_header.py +132 -132
- crawlo/middleware/download_delay.py +104 -104
- crawlo/middleware/middleware_manager.py +142 -142
- crawlo/middleware/offsite.py +123 -123
- crawlo/middleware/proxy.py +209 -209
- crawlo/middleware/request_ignore.py +86 -86
- crawlo/middleware/response_code.py +150 -150
- crawlo/middleware/response_filter.py +136 -136
- crawlo/middleware/retry.py +124 -124
- crawlo/mode_manager.py +287 -253
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +375 -379
- crawlo/network/response.py +569 -664
- crawlo/pipelines/__init__.py +53 -22
- crawlo/pipelines/base_pipeline.py +452 -0
- crawlo/pipelines/bloom_dedup_pipeline.py +146 -146
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +197 -197
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +105 -105
- crawlo/pipelines/mongo_pipeline.py +140 -132
- crawlo/pipelines/mysql_pipeline.py +469 -476
- crawlo/pipelines/pipeline_manager.py +100 -100
- crawlo/pipelines/redis_dedup_pipeline.py +155 -156
- crawlo/project.py +347 -347
- crawlo/queue/__init__.py +10 -0
- crawlo/queue/pqueue.py +38 -38
- crawlo/queue/queue_manager.py +591 -525
- crawlo/queue/redis_priority_queue.py +519 -370
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +284 -277
- crawlo/settings/setting_manager.py +219 -219
- crawlo/spider/__init__.py +657 -657
- crawlo/stats_collector.py +81 -81
- crawlo/subscriber.py +129 -129
- crawlo/task_manager.py +138 -138
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +2 -4
- crawlo/templates/project/items.py.tmpl +13 -17
- crawlo/templates/project/middlewares.py.tmpl +38 -38
- crawlo/templates/project/pipelines.py.tmpl +35 -36
- crawlo/templates/project/settings.py.tmpl +109 -111
- crawlo/templates/project/settings_distributed.py.tmpl +156 -159
- crawlo/templates/project/settings_gentle.py.tmpl +170 -176
- crawlo/templates/project/settings_high_performance.py.tmpl +171 -177
- crawlo/templates/project/settings_minimal.py.tmpl +98 -100
- crawlo/templates/project/settings_simple.py.tmpl +168 -174
- crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
- crawlo/templates/run.py.tmpl +23 -23
- crawlo/templates/spider/spider.py.tmpl +32 -40
- crawlo/templates/spiders_init.py.tmpl +5 -10
- crawlo/tools/__init__.py +86 -189
- crawlo/tools/date_tools.py +289 -289
- crawlo/tools/distributed_coordinator.py +384 -384
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/tools/text_cleaner.py +232 -232
- crawlo/utils/__init__.py +50 -50
- crawlo/utils/batch_processor.py +276 -259
- crawlo/utils/config_manager.py +442 -0
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/db_helper.py +250 -250
- crawlo/utils/error_handler.py +410 -410
- crawlo/utils/fingerprint.py +121 -121
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_helper.py +344 -344
- crawlo/utils/leak_detector.py +335 -0
- crawlo/utils/log.py +79 -79
- crawlo/utils/misc.py +81 -81
- crawlo/utils/mongo_connection_pool.py +157 -0
- crawlo/utils/mysql_connection_pool.py +197 -0
- crawlo/utils/performance_monitor.py +285 -285
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_checker.py +91 -0
- crawlo/utils/redis_connection_pool.py +578 -388
- crawlo/utils/redis_key_validator.py +198 -198
- crawlo/utils/request.py +278 -256
- crawlo/utils/request_serializer.py +225 -225
- crawlo/utils/resource_manager.py +337 -0
- crawlo/utils/selector_helper.py +137 -137
- crawlo/utils/singleton.py +70 -0
- crawlo/utils/spider_loader.py +201 -201
- crawlo/utils/text_helper.py +94 -94
- crawlo/utils/{url.py → url_utils.py} +39 -39
- crawlo-1.4.7.dist-info/METADATA +689 -0
- crawlo-1.4.7.dist-info/RECORD +347 -0
- examples/__init__.py +7 -7
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +217 -275
- tests/authenticated_proxy_example.py +110 -110
- tests/baidu_performance_test.py +108 -108
- tests/baidu_test.py +59 -59
- tests/bug_check_test.py +250 -250
- tests/cleaners_example.py +160 -160
- tests/comprehensive_framework_test.py +212 -212
- tests/comprehensive_test.py +81 -81
- tests/comprehensive_testing_summary.md +186 -186
- tests/config_validation_demo.py +142 -142
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/debug_configure.py +69 -69
- tests/debug_framework_logger.py +84 -84
- tests/debug_log_config.py +126 -126
- tests/debug_log_levels.py +63 -63
- tests/debug_pipelines.py +66 -66
- tests/detailed_log_test.py +233 -233
- tests/direct_selector_helper_test.py +96 -96
- tests/distributed_dedup_test.py +467 -0
- tests/distributed_test.py +66 -66
- tests/distributed_test_debug.py +76 -76
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/error_handling_example.py +171 -171
- tests/explain_mysql_update_behavior.py +76 -76
- tests/final_comprehensive_test.py +151 -151
- tests/final_log_test.py +260 -260
- tests/final_validation_test.py +182 -182
- tests/fix_log_test.py +142 -142
- tests/framework_performance_test.py +202 -202
- tests/log_buffering_test.py +111 -111
- tests/log_generation_timing_test.py +153 -153
- tests/monitor_redis_dedup.sh +72 -0
- tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -12
- tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -100
- tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -13
- tests/ofweek_scrapy/ofweek_scrapy/settings.py +84 -84
- tests/ofweek_scrapy/scrapy.cfg +11 -11
- tests/optimized_performance_test.py +211 -211
- tests/performance_comparison.py +244 -244
- tests/queue_blocking_test.py +113 -113
- tests/queue_test.py +89 -89
- tests/redis_key_validation_demo.py +130 -130
- tests/request_params_example.py +150 -150
- tests/response_improvements_example.py +144 -144
- tests/scrapy_comparison/ofweek_scrapy.py +138 -138
- tests/scrapy_comparison/scrapy_test.py +133 -133
- tests/simple_cli_test.py +55 -0
- tests/simple_command_test.py +119 -119
- tests/simple_crawlo_test.py +126 -126
- tests/simple_follow_test.py +38 -38
- tests/simple_log_test2.py +137 -137
- tests/simple_optimization_test.py +128 -128
- tests/simple_queue_type_test.py +41 -41
- tests/simple_response_selector_test.py +94 -94
- tests/simple_selector_helper_test.py +154 -154
- tests/simple_selector_test.py +207 -207
- tests/simple_spider_test.py +49 -49
- tests/simple_url_test.py +73 -73
- tests/simulate_mysql_update_test.py +139 -139
- tests/spider_log_timing_test.py +177 -177
- tests/test_advanced_tools.py +148 -148
- tests/test_all_commands.py +230 -230
- tests/test_all_pipeline_fingerprints.py +133 -133
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_asyncmy_usage.py +56 -56
- tests/test_batch_processor.py +178 -178
- tests/test_cleaners.py +54 -54
- tests/test_cli_arguments.py +119 -0
- tests/test_component_factory.py +174 -174
- tests/test_config_consistency.py +80 -80
- tests/test_config_merge.py +152 -152
- tests/test_config_validator.py +182 -182
- tests/test_controlled_spider_mixin.py +79 -79
- tests/test_crawler_process_import.py +38 -38
- tests/test_crawler_process_spider_modules.py +47 -47
- tests/test_crawlo_proxy_integration.py +114 -114
- tests/test_date_tools.py +123 -123
- tests/test_dedup_fix.py +220 -220
- tests/test_dedup_pipeline_consistency.py +124 -124
- tests/test_default_header_middleware.py +313 -313
- tests/test_distributed.py +65 -65
- tests/test_double_crawlo_fix.py +204 -204
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_download_delay_middleware.py +221 -221
- tests/test_downloader_proxy_compatibility.py +272 -272
- tests/test_edge_cases.py +305 -305
- tests/test_encoding_core.py +56 -56
- tests/test_encoding_detection.py +126 -126
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_enhanced_error_handler_comprehensive.py +245 -245
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_factories.py +252 -252
- tests/test_factory_compatibility.py +196 -196
- tests/test_final_validation.py +153 -153
- tests/test_fingerprint_consistency.py +135 -135
- tests/test_fingerprint_simple.py +51 -51
- tests/test_get_component_logger.py +83 -83
- tests/test_hash_performance.py +99 -99
- tests/test_integration.py +169 -169
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_large_scale_helper.py +235 -235
- tests/test_logging_enhancements.py +374 -374
- tests/test_logging_final.py +184 -184
- tests/test_logging_integration.py +312 -312
- tests/test_logging_system.py +282 -282
- tests/test_middleware_debug.py +141 -141
- tests/test_mode_consistency.py +51 -51
- tests/test_multi_directory.py +67 -67
- tests/test_multiple_spider_modules.py +80 -80
- tests/test_mysql_pipeline_config.py +164 -164
- tests/test_mysql_pipeline_error.py +98 -98
- tests/test_mysql_pipeline_init_log.py +82 -82
- tests/test_mysql_pipeline_integration.py +132 -132
- tests/test_mysql_pipeline_refactor.py +143 -143
- tests/test_mysql_pipeline_refactor_simple.py +85 -85
- tests/test_mysql_pipeline_robustness.py +195 -195
- tests/test_mysql_pipeline_types.py +88 -88
- tests/test_mysql_update_columns.py +93 -93
- tests/test_offsite_middleware.py +244 -244
- tests/test_offsite_middleware_simple.py +203 -203
- tests/test_optimized_selector_naming.py +100 -100
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_performance_monitor.py +115 -115
- tests/test_pipeline_fingerprint_consistency.py +86 -86
- tests/test_priority_behavior.py +211 -211
- tests/test_priority_consistency.py +151 -151
- tests/test_priority_consistency_fixed.py +249 -249
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware.py +217 -217
- tests/test_proxy_middleware_enhanced.py +212 -212
- tests/test_proxy_middleware_integration.py +142 -142
- tests/test_proxy_middleware_refactored.py +207 -207
- tests/test_proxy_only.py +83 -83
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_proxy_with_downloader.py +152 -152
- tests/test_queue_empty_check.py +41 -41
- tests/test_queue_manager_double_crawlo.py +173 -173
- tests/test_queue_manager_redis_key.py +179 -179
- tests/test_queue_naming.py +154 -154
- tests/test_queue_type.py +106 -106
- tests/test_queue_type_redis_config_consistency.py +130 -130
- tests/test_random_headers_default.py +322 -322
- tests/test_random_headers_necessity.py +308 -308
- tests/test_random_user_agent.py +72 -72
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_redis_queue_name_fix.py +175 -175
- tests/test_redis_queue_type_fallback.py +129 -129
- tests/test_request_ignore_middleware.py +182 -182
- tests/test_request_params.py +111 -111
- tests/test_request_serialization.py +70 -70
- tests/test_response_code_middleware.py +349 -349
- tests/test_response_filter_middleware.py +427 -427
- tests/test_response_follow.py +104 -104
- tests/test_response_improvements.py +152 -152
- tests/test_response_selector_methods.py +92 -92
- tests/test_response_url_methods.py +70 -70
- tests/test_response_urljoin.py +86 -86
- tests/test_retry_middleware.py +333 -333
- tests/test_retry_middleware_realistic.py +273 -273
- tests/test_scheduler.py +252 -252
- tests/test_scheduler_config_update.py +133 -133
- tests/test_scrapy_style_encoding.py +112 -112
- tests/test_selector_helper.py +100 -100
- tests/test_selector_optimizations.py +146 -146
- tests/test_simple_response.py +61 -61
- tests/test_spider_loader.py +49 -49
- tests/test_spider_loader_comprehensive.py +69 -69
- tests/test_spider_modules.py +84 -84
- tests/test_spiders/test_spider.py +9 -9
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +159 -159
- tests/test_user_agent_randomness.py +176 -176
- tests/test_user_agents.py +96 -96
- tests/untested_features_report.md +138 -138
- tests/verify_debug.py +51 -51
- tests/verify_distributed.py +117 -117
- tests/verify_log_fix.py +111 -111
- tests/verify_mysql_warnings.py +109 -109
- crawlo/logging/async_handler.py +0 -181
- crawlo/logging/monitor.py +0 -153
- crawlo/logging/sampler.py +0 -167
- crawlo/tools/authenticated_proxy.py +0 -241
- crawlo/tools/data_formatter.py +0 -226
- crawlo/tools/data_validator.py +0 -181
- crawlo/tools/encoding_converter.py +0 -127
- crawlo/tools/network_diagnostic.py +0 -365
- crawlo/tools/request_tools.py +0 -83
- crawlo/tools/retry_mechanism.py +0 -224
- crawlo/utils/env_config.py +0 -143
- crawlo/utils/large_scale_config.py +0 -287
- crawlo/utils/system.py +0 -11
- crawlo/utils/tools.py +0 -5
- crawlo-1.4.6.dist-info/METADATA +0 -329
- crawlo-1.4.6.dist-info/RECORD +0 -361
- tests/env_config_example.py +0 -134
- tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +0 -162
- tests/test_authenticated_proxy.py +0 -142
- tests/test_comprehensive.py +0 -147
- tests/test_dynamic_downloaders_proxy.py +0 -125
- tests/test_dynamic_proxy.py +0 -93
- tests/test_dynamic_proxy_config.py +0 -147
- tests/test_dynamic_proxy_real.py +0 -110
- tests/test_env_config.py +0 -122
- tests/test_framework_env_usage.py +0 -104
- tests/test_large_scale_config.py +0 -113
- tests/test_proxy_api.py +0 -265
- tests/test_real_scenario_proxy.py +0 -196
- tests/tools_example.py +0 -261
- {crawlo-1.4.6.dist-info → crawlo-1.4.7.dist-info}/WHEEL +0 -0
- {crawlo-1.4.6.dist-info → crawlo-1.4.7.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.6.dist-info → crawlo-1.4.7.dist-info}/top_level.txt +0 -0
tests/test_config_validator.py
CHANGED
|
@@ -1,183 +1,183 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
配置验证器测试
|
|
5
|
-
测试 Crawlo 框架的配置验证功能
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import sys
|
|
9
|
-
import os
|
|
10
|
-
import unittest
|
|
11
|
-
|
|
12
|
-
# 添加项目根目录到路径
|
|
13
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
14
|
-
|
|
15
|
-
from crawlo.config_validator import ConfigValidator
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class TestConfigValidator(unittest.TestCase):
|
|
19
|
-
"""配置验证器测试类"""
|
|
20
|
-
|
|
21
|
-
def setUp(self):
|
|
22
|
-
"""测试初始化"""
|
|
23
|
-
self.validator = ConfigValidator()
|
|
24
|
-
|
|
25
|
-
def test_valid_standalone_config(self):
|
|
26
|
-
"""测试有效的单机模式配置"""
|
|
27
|
-
config = {
|
|
28
|
-
'PROJECT_NAME': 'test_project',
|
|
29
|
-
'QUEUE_TYPE': 'memory',
|
|
30
|
-
'CONCURRENCY': 8,
|
|
31
|
-
'DOWNLOAD_DELAY': 1.0,
|
|
32
|
-
'LOG_LEVEL': 'INFO',
|
|
33
|
-
'MIDDLEWARES': [
|
|
34
|
-
'crawlo.middleware.request_ignore.RequestIgnoreMiddleware',
|
|
35
|
-
'crawlo.middleware.download_delay.DownloadDelayMiddleware',
|
|
36
|
-
],
|
|
37
|
-
'PIPELINES': [
|
|
38
|
-
'crawlo.pipelines.console_pipeline.ConsolePipeline',
|
|
39
|
-
]
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
43
|
-
self.assertTrue(is_valid)
|
|
44
|
-
self.assertEqual(len(errors), 0)
|
|
45
|
-
|
|
46
|
-
def test_valid_distributed_config(self):
|
|
47
|
-
"""测试有效的分布式模式配置"""
|
|
48
|
-
config = {
|
|
49
|
-
'PROJECT_NAME': 'test_project',
|
|
50
|
-
'QUEUE_TYPE': 'redis',
|
|
51
|
-
'CONCURRENCY': 16,
|
|
52
|
-
'DOWNLOAD_DELAY': 0.5,
|
|
53
|
-
'LOG_LEVEL': 'INFO',
|
|
54
|
-
'REDIS_HOST': '127.0.0.1',
|
|
55
|
-
'REDIS_PORT': 6379,
|
|
56
|
-
'REDIS_PASSWORD': '',
|
|
57
|
-
'SCHEDULER_QUEUE_NAME': 'crawlo:test_project:queue:requests',
|
|
58
|
-
'MIDDLEWARES': [
|
|
59
|
-
'crawlo.middleware.request_ignore.RequestIgnoreMiddleware',
|
|
60
|
-
'crawlo.middleware.download_delay.DownloadDelayMiddleware',
|
|
61
|
-
],
|
|
62
|
-
'PIPELINES': [
|
|
63
|
-
'crawlo.pipelines.console_pipeline.ConsolePipeline',
|
|
64
|
-
'crawlo.pipelines.redis_dedup_pipeline.RedisDedupPipeline',
|
|
65
|
-
],
|
|
66
|
-
'EXTENSIONS': [
|
|
67
|
-
'crawlo.extension.memory_monitor.MemoryMonitorExtension',
|
|
68
|
-
]
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
72
|
-
self.assertTrue(is_valid)
|
|
73
|
-
self.assertEqual(len(errors), 0)
|
|
74
|
-
|
|
75
|
-
def test_invalid_project_name(self):
|
|
76
|
-
"""测试无效的项目名称"""
|
|
77
|
-
config = {
|
|
78
|
-
'PROJECT_NAME': '', # 空项目名称
|
|
79
|
-
'QUEUE_TYPE': 'memory',
|
|
80
|
-
'CONCURRENCY': 8,
|
|
81
|
-
'LOG_LEVEL': 'INFO'
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
85
|
-
self.assertFalse(is_valid)
|
|
86
|
-
self.assertIn("PROJECT_NAME 必须是非空字符串", errors)
|
|
87
|
-
|
|
88
|
-
def test_invalid_queue_type(self):
|
|
89
|
-
"""测试无效的队列类型"""
|
|
90
|
-
config = {
|
|
91
|
-
'PROJECT_NAME': 'test_project',
|
|
92
|
-
'QUEUE_TYPE': 'invalid_type', # 无效队列类型
|
|
93
|
-
'CONCURRENCY': 8,
|
|
94
|
-
'LOG_LEVEL': 'INFO'
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
98
|
-
self.assertFalse(is_valid)
|
|
99
|
-
self.assertIn("QUEUE_TYPE 必须是以下值之一: ['memory', 'redis', 'auto']", errors)
|
|
100
|
-
|
|
101
|
-
def test_invalid_concurrency(self):
|
|
102
|
-
"""测试无效的并发数"""
|
|
103
|
-
config = {
|
|
104
|
-
'PROJECT_NAME': 'test_project',
|
|
105
|
-
'QUEUE_TYPE': 'memory',
|
|
106
|
-
'CONCURRENCY': -1, # 负数并发数
|
|
107
|
-
'LOG_LEVEL': 'INFO'
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
111
|
-
self.assertFalse(is_valid)
|
|
112
|
-
self.assertIn("CONCURRENCY 必须是正整数", errors)
|
|
113
|
-
|
|
114
|
-
def test_invalid_log_level(self):
|
|
115
|
-
"""测试无效的日志级别"""
|
|
116
|
-
config = {
|
|
117
|
-
'PROJECT_NAME': 'test_project',
|
|
118
|
-
'QUEUE_TYPE': 'memory',
|
|
119
|
-
'CONCURRENCY': 8,
|
|
120
|
-
'LOG_LEVEL': 'INVALID_LEVEL' # 无效日志级别
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
124
|
-
self.assertFalse(is_valid)
|
|
125
|
-
self.assertIn("LOG_LEVEL 必须是以下值之一: ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']", errors)
|
|
126
|
-
|
|
127
|
-
def test_invalid_middleware_config(self):
|
|
128
|
-
"""测试无效的中间件配置"""
|
|
129
|
-
config = {
|
|
130
|
-
'PROJECT_NAME': 'test_project',
|
|
131
|
-
'QUEUE_TYPE': 'memory',
|
|
132
|
-
'CONCURRENCY': 8,
|
|
133
|
-
'LOG_LEVEL': 'INFO',
|
|
134
|
-
'MIDDLEWARES': 'not_a_list' # 不是列表
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
138
|
-
self.assertFalse(is_valid)
|
|
139
|
-
self.assertIn("MIDDLEWARES 必须是列表", errors)
|
|
140
|
-
|
|
141
|
-
def test_invalid_pipeline_config(self):
|
|
142
|
-
"""测试无效的管道配置"""
|
|
143
|
-
config = {
|
|
144
|
-
'PROJECT_NAME': 'test_project',
|
|
145
|
-
'QUEUE_TYPE': 'memory',
|
|
146
|
-
'CONCURRENCY': 8,
|
|
147
|
-
'LOG_LEVEL': 'INFO',
|
|
148
|
-
'PIPELINES': 'not_a_list' # 不是列表
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
is_valid, errors, warnings = self.validator.validate(config)
|
|
152
|
-
self.assertFalse(is_valid)
|
|
153
|
-
self.assertIn("PIPELINES 必须是列表", errors)
|
|
154
|
-
|
|
155
|
-
def test_convenience_function(self):
|
|
156
|
-
"""测试便利函数"""
|
|
157
|
-
config = {
|
|
158
|
-
'PROJECT_NAME': 'test_project',
|
|
159
|
-
'QUEUE_TYPE': 'memory',
|
|
160
|
-
'CONCURRENCY': 8,
|
|
161
|
-
'LOG_LEVEL': 'INFO'
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
from crawlo.config_validator import validate_config
|
|
165
|
-
is_valid, errors, warnings = validate_config(config)
|
|
166
|
-
self.assertTrue(is_valid)
|
|
167
|
-
self.assertEqual(len(errors), 0)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def main():
|
|
171
|
-
"""主测试函数"""
|
|
172
|
-
print("开始配置验证器测试...")
|
|
173
|
-
print("=" * 50)
|
|
174
|
-
|
|
175
|
-
# 运行测试
|
|
176
|
-
unittest.main(argv=['first-arg-is-ignored'], exit=False, verbosity=2)
|
|
177
|
-
|
|
178
|
-
print("=" * 50)
|
|
179
|
-
print("配置验证器测试完成")
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
if __name__ == "__main__":
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
配置验证器测试
|
|
5
|
+
测试 Crawlo 框架的配置验证功能
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
import unittest
|
|
11
|
+
|
|
12
|
+
# 添加项目根目录到路径
|
|
13
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
14
|
+
|
|
15
|
+
from crawlo.config_validator import ConfigValidator
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestConfigValidator(unittest.TestCase):
|
|
19
|
+
"""配置验证器测试类"""
|
|
20
|
+
|
|
21
|
+
def setUp(self):
|
|
22
|
+
"""测试初始化"""
|
|
23
|
+
self.validator = ConfigValidator()
|
|
24
|
+
|
|
25
|
+
def test_valid_standalone_config(self):
|
|
26
|
+
"""测试有效的单机模式配置"""
|
|
27
|
+
config = {
|
|
28
|
+
'PROJECT_NAME': 'test_project',
|
|
29
|
+
'QUEUE_TYPE': 'memory',
|
|
30
|
+
'CONCURRENCY': 8,
|
|
31
|
+
'DOWNLOAD_DELAY': 1.0,
|
|
32
|
+
'LOG_LEVEL': 'INFO',
|
|
33
|
+
'MIDDLEWARES': [
|
|
34
|
+
'crawlo.middleware.request_ignore.RequestIgnoreMiddleware',
|
|
35
|
+
'crawlo.middleware.download_delay.DownloadDelayMiddleware',
|
|
36
|
+
],
|
|
37
|
+
'PIPELINES': [
|
|
38
|
+
'crawlo.pipelines.console_pipeline.ConsolePipeline',
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
43
|
+
self.assertTrue(is_valid)
|
|
44
|
+
self.assertEqual(len(errors), 0)
|
|
45
|
+
|
|
46
|
+
def test_valid_distributed_config(self):
|
|
47
|
+
"""测试有效的分布式模式配置"""
|
|
48
|
+
config = {
|
|
49
|
+
'PROJECT_NAME': 'test_project',
|
|
50
|
+
'QUEUE_TYPE': 'redis',
|
|
51
|
+
'CONCURRENCY': 16,
|
|
52
|
+
'DOWNLOAD_DELAY': 0.5,
|
|
53
|
+
'LOG_LEVEL': 'INFO',
|
|
54
|
+
'REDIS_HOST': '127.0.0.1',
|
|
55
|
+
'REDIS_PORT': 6379,
|
|
56
|
+
'REDIS_PASSWORD': '',
|
|
57
|
+
'SCHEDULER_QUEUE_NAME': 'crawlo:test_project:queue:requests',
|
|
58
|
+
'MIDDLEWARES': [
|
|
59
|
+
'crawlo.middleware.request_ignore.RequestIgnoreMiddleware',
|
|
60
|
+
'crawlo.middleware.download_delay.DownloadDelayMiddleware',
|
|
61
|
+
],
|
|
62
|
+
'PIPELINES': [
|
|
63
|
+
'crawlo.pipelines.console_pipeline.ConsolePipeline',
|
|
64
|
+
'crawlo.pipelines.redis_dedup_pipeline.RedisDedupPipeline',
|
|
65
|
+
],
|
|
66
|
+
'EXTENSIONS': [
|
|
67
|
+
'crawlo.extension.memory_monitor.MemoryMonitorExtension',
|
|
68
|
+
]
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
72
|
+
self.assertTrue(is_valid)
|
|
73
|
+
self.assertEqual(len(errors), 0)
|
|
74
|
+
|
|
75
|
+
def test_invalid_project_name(self):
|
|
76
|
+
"""测试无效的项目名称"""
|
|
77
|
+
config = {
|
|
78
|
+
'PROJECT_NAME': '', # 空项目名称
|
|
79
|
+
'QUEUE_TYPE': 'memory',
|
|
80
|
+
'CONCURRENCY': 8,
|
|
81
|
+
'LOG_LEVEL': 'INFO'
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
85
|
+
self.assertFalse(is_valid)
|
|
86
|
+
self.assertIn("PROJECT_NAME 必须是非空字符串", errors)
|
|
87
|
+
|
|
88
|
+
def test_invalid_queue_type(self):
|
|
89
|
+
"""测试无效的队列类型"""
|
|
90
|
+
config = {
|
|
91
|
+
'PROJECT_NAME': 'test_project',
|
|
92
|
+
'QUEUE_TYPE': 'invalid_type', # 无效队列类型
|
|
93
|
+
'CONCURRENCY': 8,
|
|
94
|
+
'LOG_LEVEL': 'INFO'
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
98
|
+
self.assertFalse(is_valid)
|
|
99
|
+
self.assertIn("QUEUE_TYPE 必须是以下值之一: ['memory', 'redis', 'auto']", errors)
|
|
100
|
+
|
|
101
|
+
def test_invalid_concurrency(self):
|
|
102
|
+
"""测试无效的并发数"""
|
|
103
|
+
config = {
|
|
104
|
+
'PROJECT_NAME': 'test_project',
|
|
105
|
+
'QUEUE_TYPE': 'memory',
|
|
106
|
+
'CONCURRENCY': -1, # 负数并发数
|
|
107
|
+
'LOG_LEVEL': 'INFO'
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
111
|
+
self.assertFalse(is_valid)
|
|
112
|
+
self.assertIn("CONCURRENCY 必须是正整数", errors)
|
|
113
|
+
|
|
114
|
+
def test_invalid_log_level(self):
|
|
115
|
+
"""测试无效的日志级别"""
|
|
116
|
+
config = {
|
|
117
|
+
'PROJECT_NAME': 'test_project',
|
|
118
|
+
'QUEUE_TYPE': 'memory',
|
|
119
|
+
'CONCURRENCY': 8,
|
|
120
|
+
'LOG_LEVEL': 'INVALID_LEVEL' # 无效日志级别
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
124
|
+
self.assertFalse(is_valid)
|
|
125
|
+
self.assertIn("LOG_LEVEL 必须是以下值之一: ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']", errors)
|
|
126
|
+
|
|
127
|
+
def test_invalid_middleware_config(self):
|
|
128
|
+
"""测试无效的中间件配置"""
|
|
129
|
+
config = {
|
|
130
|
+
'PROJECT_NAME': 'test_project',
|
|
131
|
+
'QUEUE_TYPE': 'memory',
|
|
132
|
+
'CONCURRENCY': 8,
|
|
133
|
+
'LOG_LEVEL': 'INFO',
|
|
134
|
+
'MIDDLEWARES': 'not_a_list' # 不是列表
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
138
|
+
self.assertFalse(is_valid)
|
|
139
|
+
self.assertIn("MIDDLEWARES 必须是列表", errors)
|
|
140
|
+
|
|
141
|
+
def test_invalid_pipeline_config(self):
|
|
142
|
+
"""测试无效的管道配置"""
|
|
143
|
+
config = {
|
|
144
|
+
'PROJECT_NAME': 'test_project',
|
|
145
|
+
'QUEUE_TYPE': 'memory',
|
|
146
|
+
'CONCURRENCY': 8,
|
|
147
|
+
'LOG_LEVEL': 'INFO',
|
|
148
|
+
'PIPELINES': 'not_a_list' # 不是列表
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
is_valid, errors, warnings = self.validator.validate(config)
|
|
152
|
+
self.assertFalse(is_valid)
|
|
153
|
+
self.assertIn("PIPELINES 必须是列表", errors)
|
|
154
|
+
|
|
155
|
+
def test_convenience_function(self):
|
|
156
|
+
"""测试便利函数"""
|
|
157
|
+
config = {
|
|
158
|
+
'PROJECT_NAME': 'test_project',
|
|
159
|
+
'QUEUE_TYPE': 'memory',
|
|
160
|
+
'CONCURRENCY': 8,
|
|
161
|
+
'LOG_LEVEL': 'INFO'
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
from crawlo.config_validator import validate_config
|
|
165
|
+
is_valid, errors, warnings = validate_config(config)
|
|
166
|
+
self.assertTrue(is_valid)
|
|
167
|
+
self.assertEqual(len(errors), 0)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def main():
|
|
171
|
+
"""主测试函数"""
|
|
172
|
+
print("开始配置验证器测试...")
|
|
173
|
+
print("=" * 50)
|
|
174
|
+
|
|
175
|
+
# 运行测试
|
|
176
|
+
unittest.main(argv=['first-arg-is-ignored'], exit=False, verbosity=2)
|
|
177
|
+
|
|
178
|
+
print("=" * 50)
|
|
179
|
+
print("配置验证器测试完成")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
if __name__ == "__main__":
|
|
183
183
|
main()
|
|
@@ -1,80 +1,80 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
受控爬虫混入类测试
|
|
5
|
-
测试 ControlledRequestMixin, AsyncControlledRequestMixin
|
|
6
|
-
"""
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
import unittest
|
|
10
|
-
from unittest.mock import Mock, patch, MagicMock
|
|
11
|
-
import asyncio
|
|
12
|
-
|
|
13
|
-
# 添加项目根目录到 Python 路径
|
|
14
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
15
|
-
|
|
16
|
-
from crawlo.utils.controlled_spider_mixin import ControlledRequestMixin, AsyncControlledRequestMixin
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class TestControlledRequestMixin(unittest.TestCase):
|
|
20
|
-
"""受控请求混入类测试"""
|
|
21
|
-
|
|
22
|
-
def setUp(self):
|
|
23
|
-
"""测试前准备"""
|
|
24
|
-
self.mixin = ControlledRequestMixin()
|
|
25
|
-
|
|
26
|
-
def test_mixin_initialization(self):
|
|
27
|
-
"""测试混入类初始化"""
|
|
28
|
-
self.assertEqual(self.mixin.max_pending_requests, 100)
|
|
29
|
-
self.assertEqual(self.mixin.batch_size, 50)
|
|
30
|
-
self.assertEqual(self.mixin.generation_interval, 0.1)
|
|
31
|
-
self.assertEqual(self.mixin.backpressure_threshold, 200)
|
|
32
|
-
|
|
33
|
-
def test_mixin_configuration(self):
|
|
34
|
-
"""测试混入类配置"""
|
|
35
|
-
# 修改配置
|
|
36
|
-
self.mixin.max_pending_requests = 200
|
|
37
|
-
self.mixin.batch_size = 100
|
|
38
|
-
self.mixin.generation_interval = 0.05
|
|
39
|
-
self.mixin.backpressure_threshold = 300
|
|
40
|
-
|
|
41
|
-
self.assertEqual(self.mixin.max_pending_requests, 200)
|
|
42
|
-
self.assertEqual(self.mixin.batch_size, 100)
|
|
43
|
-
self.assertEqual(self.mixin.generation_interval, 0.05)
|
|
44
|
-
self.assertEqual(self.mixin.backpressure_threshold, 300)
|
|
45
|
-
|
|
46
|
-
def test_get_generation_stats(self):
|
|
47
|
-
"""测试获取生成统计信息"""
|
|
48
|
-
stats = self.mixin.get_generation_stats()
|
|
49
|
-
self.assertIsInstance(stats, dict)
|
|
50
|
-
self.assertIn('generated', stats)
|
|
51
|
-
self.assertIn('skipped', stats)
|
|
52
|
-
self.assertIn('backpressure_events', stats)
|
|
53
|
-
self.assertIn('total_generated', stats)
|
|
54
|
-
self.assertIn('last_generation_time', stats)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class TestAsyncControlledRequestMixin(unittest.TestCase):
|
|
58
|
-
"""异步受控请求混入类测试"""
|
|
59
|
-
|
|
60
|
-
def setUp(self):
|
|
61
|
-
"""测试前准备"""
|
|
62
|
-
self.mixin = AsyncControlledRequestMixin()
|
|
63
|
-
|
|
64
|
-
def test_async_mixin_initialization(self):
|
|
65
|
-
"""测试异步混入类初始化"""
|
|
66
|
-
self.assertEqual(self.mixin.max_concurrent_generations, 10)
|
|
67
|
-
self.assertEqual(self.mixin.queue_monitor_interval, 1.0)
|
|
68
|
-
|
|
69
|
-
def test_async_mixin_configuration(self):
|
|
70
|
-
"""测试异步混入类配置"""
|
|
71
|
-
# 修改配置
|
|
72
|
-
self.mixin.max_concurrent_generations = 20
|
|
73
|
-
self.mixin.queue_monitor_interval = 0.5
|
|
74
|
-
|
|
75
|
-
self.assertEqual(self.mixin.max_concurrent_generations, 20)
|
|
76
|
-
self.assertEqual(self.mixin.queue_monitor_interval, 0.5)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
if __name__ == '__main__':
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
受控爬虫混入类测试
|
|
5
|
+
测试 ControlledRequestMixin, AsyncControlledRequestMixin
|
|
6
|
+
"""
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import unittest
|
|
10
|
+
from unittest.mock import Mock, patch, MagicMock
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
# 添加项目根目录到 Python 路径
|
|
14
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
15
|
+
|
|
16
|
+
from crawlo.utils.controlled_spider_mixin import ControlledRequestMixin, AsyncControlledRequestMixin
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestControlledRequestMixin(unittest.TestCase):
|
|
20
|
+
"""受控请求混入类测试"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
"""测试前准备"""
|
|
24
|
+
self.mixin = ControlledRequestMixin()
|
|
25
|
+
|
|
26
|
+
def test_mixin_initialization(self):
|
|
27
|
+
"""测试混入类初始化"""
|
|
28
|
+
self.assertEqual(self.mixin.max_pending_requests, 100)
|
|
29
|
+
self.assertEqual(self.mixin.batch_size, 50)
|
|
30
|
+
self.assertEqual(self.mixin.generation_interval, 0.1)
|
|
31
|
+
self.assertEqual(self.mixin.backpressure_threshold, 200)
|
|
32
|
+
|
|
33
|
+
def test_mixin_configuration(self):
|
|
34
|
+
"""测试混入类配置"""
|
|
35
|
+
# 修改配置
|
|
36
|
+
self.mixin.max_pending_requests = 200
|
|
37
|
+
self.mixin.batch_size = 100
|
|
38
|
+
self.mixin.generation_interval = 0.05
|
|
39
|
+
self.mixin.backpressure_threshold = 300
|
|
40
|
+
|
|
41
|
+
self.assertEqual(self.mixin.max_pending_requests, 200)
|
|
42
|
+
self.assertEqual(self.mixin.batch_size, 100)
|
|
43
|
+
self.assertEqual(self.mixin.generation_interval, 0.05)
|
|
44
|
+
self.assertEqual(self.mixin.backpressure_threshold, 300)
|
|
45
|
+
|
|
46
|
+
def test_get_generation_stats(self):
|
|
47
|
+
"""测试获取生成统计信息"""
|
|
48
|
+
stats = self.mixin.get_generation_stats()
|
|
49
|
+
self.assertIsInstance(stats, dict)
|
|
50
|
+
self.assertIn('generated', stats)
|
|
51
|
+
self.assertIn('skipped', stats)
|
|
52
|
+
self.assertIn('backpressure_events', stats)
|
|
53
|
+
self.assertIn('total_generated', stats)
|
|
54
|
+
self.assertIn('last_generation_time', stats)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TestAsyncControlledRequestMixin(unittest.TestCase):
|
|
58
|
+
"""异步受控请求混入类测试"""
|
|
59
|
+
|
|
60
|
+
def setUp(self):
|
|
61
|
+
"""测试前准备"""
|
|
62
|
+
self.mixin = AsyncControlledRequestMixin()
|
|
63
|
+
|
|
64
|
+
def test_async_mixin_initialization(self):
|
|
65
|
+
"""测试异步混入类初始化"""
|
|
66
|
+
self.assertEqual(self.mixin.max_concurrent_generations, 10)
|
|
67
|
+
self.assertEqual(self.mixin.queue_monitor_interval, 1.0)
|
|
68
|
+
|
|
69
|
+
def test_async_mixin_configuration(self):
|
|
70
|
+
"""测试异步混入类配置"""
|
|
71
|
+
# 修改配置
|
|
72
|
+
self.mixin.max_concurrent_generations = 20
|
|
73
|
+
self.mixin.queue_monitor_interval = 0.5
|
|
74
|
+
|
|
75
|
+
self.assertEqual(self.mixin.max_concurrent_generations, 20)
|
|
76
|
+
self.assertEqual(self.mixin.queue_monitor_interval, 0.5)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == '__main__':
|
|
80
80
|
unittest.main()
|
|
@@ -1,39 +1,39 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
测试CrawlerProcess导入功能
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import sys
|
|
9
|
-
import os
|
|
10
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def test_crawler_process_import():
|
|
14
|
-
"""测试CrawlerProcess导入功能"""
|
|
15
|
-
print("测试CrawlerProcess导入功能...")
|
|
16
|
-
|
|
17
|
-
try:
|
|
18
|
-
# 测试直接从crawlo导入CrawlerProcess
|
|
19
|
-
from crawlo import CrawlerProcess
|
|
20
|
-
print(f" 成功从crawlo导入CrawlerProcess: {CrawlerProcess}")
|
|
21
|
-
|
|
22
|
-
# 测试创建实例
|
|
23
|
-
process = CrawlerProcess()
|
|
24
|
-
print(f" 成功创建CrawlerProcess实例: {process}")
|
|
25
|
-
|
|
26
|
-
print("CrawlerProcess导入测试通过!")
|
|
27
|
-
|
|
28
|
-
except ImportError as e:
|
|
29
|
-
print(f" 导入失败: {e}")
|
|
30
|
-
# 如果直接导入失败,尝试从crawler模块导入
|
|
31
|
-
try:
|
|
32
|
-
from crawlo.crawler import CrawlerProcess
|
|
33
|
-
print(f" 成功从crawlo.crawler导入CrawlerProcess: {CrawlerProcess}")
|
|
34
|
-
except ImportError as e2:
|
|
35
|
-
print(f" 从crawler模块导入也失败: {e2}")
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
if __name__ == '__main__':
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
测试CrawlerProcess导入功能
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_crawler_process_import():
|
|
14
|
+
"""测试CrawlerProcess导入功能"""
|
|
15
|
+
print("测试CrawlerProcess导入功能...")
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
# 测试直接从crawlo导入CrawlerProcess
|
|
19
|
+
from crawlo import CrawlerProcess
|
|
20
|
+
print(f" 成功从crawlo导入CrawlerProcess: {CrawlerProcess}")
|
|
21
|
+
|
|
22
|
+
# 测试创建实例
|
|
23
|
+
process = CrawlerProcess()
|
|
24
|
+
print(f" 成功创建CrawlerProcess实例: {process}")
|
|
25
|
+
|
|
26
|
+
print("CrawlerProcess导入测试通过!")
|
|
27
|
+
|
|
28
|
+
except ImportError as e:
|
|
29
|
+
print(f" 导入失败: {e}")
|
|
30
|
+
# 如果直接导入失败,尝试从crawler模块导入
|
|
31
|
+
try:
|
|
32
|
+
from crawlo.crawler import CrawlerProcess
|
|
33
|
+
print(f" 成功从crawlo.crawler导入CrawlerProcess: {CrawlerProcess}")
|
|
34
|
+
except ImportError as e2:
|
|
35
|
+
print(f" 从crawler模块导入也失败: {e2}")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == '__main__':
|
|
39
39
|
test_crawler_process_import()
|