crawlo 1.3.5__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +87 -87
- crawlo/__version__.py +1 -1
- crawlo/cli.py +75 -75
- crawlo/commands/__init__.py +14 -14
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +151 -151
- crawlo/commands/help.py +138 -138
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +341 -341
- crawlo/commands/startproject.py +436 -436
- crawlo/commands/stats.py +187 -187
- crawlo/commands/utils.py +196 -196
- crawlo/config.py +312 -312
- crawlo/config_validator.py +277 -277
- crawlo/core/__init__.py +45 -45
- crawlo/core/engine.py +439 -439
- crawlo/core/processor.py +40 -40
- crawlo/core/scheduler.py +257 -257
- crawlo/crawler.py +638 -638
- crawlo/data/__init__.py +5 -5
- crawlo/data/user_agents.py +194 -194
- crawlo/downloader/__init__.py +273 -273
- crawlo/downloader/aiohttp_downloader.py +228 -228
- crawlo/downloader/cffi_downloader.py +245 -245
- crawlo/downloader/httpx_downloader.py +259 -259
- crawlo/downloader/hybrid_downloader.py +212 -212
- crawlo/downloader/playwright_downloader.py +402 -402
- crawlo/downloader/selenium_downloader.py +472 -472
- crawlo/event.py +11 -11
- crawlo/exceptions.py +81 -81
- crawlo/extension/__init__.py +39 -39
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +57 -57
- crawlo/extension/log_stats.py +81 -81
- crawlo/extension/logging_extension.py +61 -61
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/factories/__init__.py +27 -27
- crawlo/factories/base.py +68 -68
- crawlo/factories/crawler.py +103 -103
- crawlo/factories/registry.py +84 -84
- crawlo/filters/__init__.py +154 -154
- crawlo/filters/aioredis_filter.py +257 -257
- crawlo/filters/memory_filter.py +269 -269
- crawlo/framework.py +292 -291
- crawlo/initialization/__init__.py +39 -39
- crawlo/initialization/built_in.py +425 -425
- crawlo/initialization/context.py +141 -141
- crawlo/initialization/core.py +193 -193
- crawlo/initialization/phases.py +148 -148
- crawlo/initialization/registry.py +145 -145
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +23 -23
- crawlo/items/fields.py +52 -52
- crawlo/items/items.py +104 -104
- crawlo/logging/__init__.py +37 -37
- crawlo/logging/config.py +96 -96
- crawlo/logging/factory.py +128 -128
- crawlo/logging/manager.py +111 -111
- crawlo/middleware/__init__.py +21 -21
- crawlo/middleware/default_header.py +132 -132
- crawlo/middleware/download_delay.py +104 -104
- crawlo/middleware/middleware_manager.py +135 -135
- crawlo/middleware/offsite.py +123 -123
- crawlo/middleware/proxy.py +386 -386
- crawlo/middleware/request_ignore.py +86 -86
- crawlo/middleware/response_code.py +163 -163
- crawlo/middleware/response_filter.py +136 -136
- crawlo/middleware/retry.py +124 -124
- crawlo/middleware/simple_proxy.py +65 -65
- crawlo/mode_manager.py +212 -212
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +379 -379
- crawlo/network/response.py +359 -359
- crawlo/pipelines/__init__.py +21 -21
- crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +222 -222
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +115 -115
- crawlo/pipelines/mongo_pipeline.py +131 -131
- crawlo/pipelines/mysql_pipeline.py +325 -325
- crawlo/pipelines/pipeline_manager.py +76 -76
- crawlo/pipelines/redis_dedup_pipeline.py +166 -166
- crawlo/project.py +327 -327
- crawlo/queue/pqueue.py +42 -42
- crawlo/queue/queue_manager.py +503 -503
- crawlo/queue/redis_priority_queue.py +326 -326
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +321 -321
- crawlo/settings/setting_manager.py +214 -214
- crawlo/spider/__init__.py +657 -657
- crawlo/stats_collector.py +73 -73
- crawlo/subscriber.py +129 -129
- crawlo/task_manager.py +138 -138
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +3 -3
- crawlo/templates/project/items.py.tmpl +17 -17
- crawlo/templates/project/middlewares.py.tmpl +118 -118
- crawlo/templates/project/pipelines.py.tmpl +96 -96
- crawlo/templates/project/settings.py.tmpl +167 -167
- crawlo/templates/project/settings_distributed.py.tmpl +166 -166
- crawlo/templates/project/settings_gentle.py.tmpl +166 -166
- crawlo/templates/project/settings_high_performance.py.tmpl +167 -167
- crawlo/templates/project/settings_minimal.py.tmpl +65 -65
- crawlo/templates/project/settings_simple.py.tmpl +164 -164
- crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
- crawlo/templates/run.py.tmpl +34 -34
- crawlo/templates/spider/spider.py.tmpl +143 -143
- crawlo/templates/spiders_init.py.tmpl +9 -9
- crawlo/tools/__init__.py +200 -200
- crawlo/tools/anti_crawler.py +268 -268
- crawlo/tools/authenticated_proxy.py +240 -240
- crawlo/tools/data_formatter.py +225 -225
- crawlo/tools/data_validator.py +180 -180
- crawlo/tools/date_tools.py +289 -289
- crawlo/tools/distributed_coordinator.py +388 -388
- crawlo/tools/encoding_converter.py +127 -127
- crawlo/tools/network_diagnostic.py +364 -364
- crawlo/tools/request_tools.py +82 -82
- crawlo/tools/retry_mechanism.py +224 -224
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/tools/text_cleaner.py +232 -232
- crawlo/utils/__init__.py +34 -34
- crawlo/utils/batch_processor.py +259 -259
- crawlo/utils/class_loader.py +25 -25
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/db_helper.py +343 -343
- crawlo/utils/enhanced_error_handler.py +356 -356
- crawlo/utils/env_config.py +142 -142
- crawlo/utils/error_handler.py +165 -165
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_config.py +286 -286
- crawlo/utils/large_scale_helper.py +344 -344
- crawlo/utils/log.py +79 -79
- crawlo/utils/performance_monitor.py +285 -285
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_connection_pool.py +388 -388
- crawlo/utils/redis_key_validator.py +198 -198
- crawlo/utils/request.py +267 -267
- crawlo/utils/request_serializer.py +225 -225
- crawlo/utils/spider_loader.py +61 -61
- crawlo/utils/system.py +11 -11
- crawlo/utils/tools.py +4 -4
- crawlo/utils/url.py +39 -39
- {crawlo-1.3.5.dist-info → crawlo-1.3.6.dist-info}/METADATA +1126 -1126
- crawlo-1.3.6.dist-info/RECORD +290 -0
- examples/__init__.py +7 -7
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +275 -275
- tests/authenticated_proxy_example.py +106 -106
- tests/baidu_performance_test.py +108 -108
- tests/baidu_test.py +59 -59
- tests/cleaners_example.py +160 -160
- tests/comprehensive_framework_test.py +212 -212
- tests/comprehensive_test.py +81 -81
- tests/comprehensive_testing_summary.md +186 -186
- tests/config_validation_demo.py +142 -142
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/debug_configure.py +69 -69
- tests/debug_framework_logger.py +84 -84
- tests/debug_log_config.py +126 -126
- tests/debug_log_levels.py +63 -63
- tests/debug_pipelines.py +66 -66
- tests/detailed_log_test.py +233 -233
- tests/distributed_test.py +66 -66
- tests/distributed_test_debug.py +76 -76
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/env_config_example.py +133 -133
- tests/error_handling_example.py +171 -171
- tests/final_comprehensive_test.py +151 -151
- tests/final_log_test.py +260 -260
- tests/final_validation_test.py +182 -182
- tests/fix_log_test.py +142 -142
- tests/framework_performance_test.py +202 -202
- tests/log_buffering_test.py +111 -111
- tests/log_generation_timing_test.py +153 -153
- tests/optimized_performance_test.py +211 -211
- tests/performance_comparison.py +245 -245
- tests/queue_blocking_test.py +113 -113
- tests/queue_test.py +89 -89
- tests/redis_key_validation_demo.py +130 -130
- tests/request_params_example.py +150 -150
- tests/response_improvements_example.py +144 -144
- tests/scrapy_comparison/ofweek_scrapy.py +138 -138
- tests/scrapy_comparison/scrapy_test.py +133 -133
- tests/simple_command_test.py +119 -119
- tests/simple_crawlo_test.py +127 -127
- tests/simple_log_test.py +57 -57
- tests/simple_log_test2.py +137 -137
- tests/simple_optimization_test.py +128 -128
- tests/simple_queue_type_test.py +42 -0
- tests/simple_spider_test.py +49 -49
- tests/simple_test.py +47 -47
- tests/spider_log_timing_test.py +177 -177
- tests/test_advanced_tools.py +148 -148
- tests/test_all_commands.py +230 -230
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_authenticated_proxy.py +141 -141
- tests/test_batch_processor.py +178 -178
- tests/test_cleaners.py +54 -54
- tests/test_component_factory.py +174 -174
- tests/test_comprehensive.py +146 -146
- tests/test_config_consistency.py +80 -80
- tests/test_config_merge.py +152 -152
- tests/test_config_validator.py +182 -182
- tests/test_controlled_spider_mixin.py +79 -79
- tests/test_crawlo_proxy_integration.py +108 -108
- tests/test_date_tools.py +123 -123
- tests/test_default_header_middleware.py +158 -158
- tests/test_distributed.py +65 -65
- tests/test_double_crawlo_fix.py +207 -207
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_download_delay_middleware.py +221 -221
- tests/test_downloader_proxy_compatibility.py +268 -268
- tests/test_dynamic_downloaders_proxy.py +124 -124
- tests/test_dynamic_proxy.py +92 -92
- tests/test_dynamic_proxy_config.py +146 -146
- tests/test_dynamic_proxy_real.py +109 -109
- tests/test_edge_cases.py +303 -303
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_enhanced_error_handler_comprehensive.py +245 -245
- tests/test_env_config.py +121 -121
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_factories.py +252 -252
- tests/test_final_validation.py +153 -153
- tests/test_framework_env_usage.py +103 -103
- tests/test_framework_logger.py +66 -66
- tests/test_framework_startup.py +64 -64
- tests/test_get_component_logger.py +83 -83
- tests/test_integration.py +169 -169
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_large_scale_config.py +112 -112
- tests/test_large_scale_helper.py +235 -235
- tests/test_logging_system.py +282 -282
- tests/test_mode_change.py +72 -72
- tests/test_mode_consistency.py +51 -51
- tests/test_offsite_middleware.py +221 -221
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_performance_monitor.py +115 -115
- tests/test_proxy_api.py +264 -264
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware.py +121 -121
- tests/test_proxy_middleware_enhanced.py +216 -216
- tests/test_proxy_middleware_integration.py +136 -136
- tests/test_proxy_middleware_refactored.py +184 -184
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_queue_empty_check.py +41 -41
- tests/test_queue_manager_double_crawlo.py +173 -173
- tests/test_queue_manager_redis_key.py +176 -176
- tests/test_queue_type.py +107 -0
- tests/test_random_user_agent.py +72 -72
- tests/test_real_scenario_proxy.py +195 -195
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_request_ignore_middleware.py +182 -182
- tests/test_request_params.py +111 -111
- tests/test_request_serialization.py +70 -70
- tests/test_response_code_middleware.py +349 -349
- tests/test_response_filter_middleware.py +427 -427
- tests/test_response_improvements.py +152 -152
- tests/test_retry_middleware.py +241 -241
- tests/test_scheduler.py +252 -252
- tests/test_scheduler_config_update.py +133 -133
- tests/test_simple_response.py +61 -61
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +159 -159
- tests/test_user_agents.py +96 -96
- tests/tools_example.py +260 -260
- tests/untested_features_report.md +138 -138
- tests/verify_debug.py +51 -51
- tests/verify_distributed.py +117 -117
- tests/verify_log_fix.py +111 -111
- crawlo-1.3.5.dist-info/RECORD +0 -288
- {crawlo-1.3.5.dist-info → crawlo-1.3.6.dist-info}/WHEEL +0 -0
- {crawlo-1.3.5.dist-info → crawlo-1.3.6.dist-info}/entry_points.txt +0 -0
- {crawlo-1.3.5.dist-info → crawlo-1.3.6.dist-info}/top_level.txt +0 -0
tests/test_mode_change.py
CHANGED
|
@@ -1,73 +1,73 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
测试运行模式日志级别修改
|
|
5
|
-
"""
|
|
6
|
-
import sys
|
|
7
|
-
import os
|
|
8
|
-
sys.path.insert(0, '/')
|
|
9
|
-
|
|
10
|
-
def test_mode_log_level():
|
|
11
|
-
print("=== 测试运行模式日志级别修改 ===")
|
|
12
|
-
|
|
13
|
-
# 删除旧的日志文件
|
|
14
|
-
test_log_file = '/Users/oscar/projects/Crawlo/test_mode_change.log'
|
|
15
|
-
if os.path.exists(test_log_file):
|
|
16
|
-
os.remove(test_log_file)
|
|
17
|
-
|
|
18
|
-
# 准备测试设置
|
|
19
|
-
test_settings = {
|
|
20
|
-
'PROJECT_NAME': 'test_mode_change',
|
|
21
|
-
'LOG_LEVEL': 'INFO',
|
|
22
|
-
'LOG_FILE': test_log_file,
|
|
23
|
-
'RUN_MODE': 'standalone'
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
try:
|
|
27
|
-
# 初始化框架
|
|
28
|
-
from crawlo.core.framework_initializer import initialize_framework
|
|
29
|
-
settings = initialize_framework(test_settings)
|
|
30
|
-
|
|
31
|
-
print(f"设置初始化完成: {settings.get('PROJECT_NAME')}")
|
|
32
|
-
|
|
33
|
-
# 检查日志文件是否包含运行模式信息
|
|
34
|
-
if os.path.exists(test_log_file):
|
|
35
|
-
with open(test_log_file, 'r', encoding='utf-8') as f:
|
|
36
|
-
content = f.read()
|
|
37
|
-
print(f"日志文件内容长度: {len(content)} 字符")
|
|
38
|
-
|
|
39
|
-
# 检查是否还有INFO级别的运行模式信息
|
|
40
|
-
info_lines = [line for line in content.split('\n') if 'INFO' in line and '使用单机模式' in line]
|
|
41
|
-
debug_lines = [line for line in content.split('\n') if 'DEBUG' in line and '使用单机模式' in line]
|
|
42
|
-
|
|
43
|
-
if info_lines:
|
|
44
|
-
print("❌ 仍然发现INFO级别的运行模式信息:")
|
|
45
|
-
for line in info_lines:
|
|
46
|
-
print(f" {line}")
|
|
47
|
-
else:
|
|
48
|
-
print("✅ 没有发现INFO级别的运行模式信息")
|
|
49
|
-
|
|
50
|
-
if debug_lines:
|
|
51
|
-
print("✅ 发现DEBUG级别的运行模式信息:")
|
|
52
|
-
for line in debug_lines:
|
|
53
|
-
print(f" {line}")
|
|
54
|
-
else:
|
|
55
|
-
print("❌ 没有发现DEBUG级别的运行模式信息")
|
|
56
|
-
|
|
57
|
-
print("\n所有日志内容:")
|
|
58
|
-
lines = content.split('\n')
|
|
59
|
-
for i, line in enumerate(lines, 1):
|
|
60
|
-
if line.strip():
|
|
61
|
-
print(f"{i:3d}: {line}")
|
|
62
|
-
else:
|
|
63
|
-
print("❌ 日志文件未创建")
|
|
64
|
-
|
|
65
|
-
except Exception as e:
|
|
66
|
-
print(f"错误: {e}")
|
|
67
|
-
import traceback
|
|
68
|
-
traceback.print_exc()
|
|
69
|
-
|
|
70
|
-
print("=== 测试完成 ===")
|
|
71
|
-
|
|
72
|
-
if __name__ == "__main__":
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
测试运行模式日志级别修改
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
sys.path.insert(0, '/')
|
|
9
|
+
|
|
10
|
+
def test_mode_log_level():
|
|
11
|
+
print("=== 测试运行模式日志级别修改 ===")
|
|
12
|
+
|
|
13
|
+
# 删除旧的日志文件
|
|
14
|
+
test_log_file = '/Users/oscar/projects/Crawlo/test_mode_change.log'
|
|
15
|
+
if os.path.exists(test_log_file):
|
|
16
|
+
os.remove(test_log_file)
|
|
17
|
+
|
|
18
|
+
# 准备测试设置
|
|
19
|
+
test_settings = {
|
|
20
|
+
'PROJECT_NAME': 'test_mode_change',
|
|
21
|
+
'LOG_LEVEL': 'INFO',
|
|
22
|
+
'LOG_FILE': test_log_file,
|
|
23
|
+
'RUN_MODE': 'standalone'
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
# 初始化框架
|
|
28
|
+
from crawlo.core.framework_initializer import initialize_framework
|
|
29
|
+
settings = initialize_framework(test_settings)
|
|
30
|
+
|
|
31
|
+
print(f"设置初始化完成: {settings.get('PROJECT_NAME')}")
|
|
32
|
+
|
|
33
|
+
# 检查日志文件是否包含运行模式信息
|
|
34
|
+
if os.path.exists(test_log_file):
|
|
35
|
+
with open(test_log_file, 'r', encoding='utf-8') as f:
|
|
36
|
+
content = f.read()
|
|
37
|
+
print(f"日志文件内容长度: {len(content)} 字符")
|
|
38
|
+
|
|
39
|
+
# 检查是否还有INFO级别的运行模式信息
|
|
40
|
+
info_lines = [line for line in content.split('\n') if 'INFO' in line and '使用单机模式' in line]
|
|
41
|
+
debug_lines = [line for line in content.split('\n') if 'DEBUG' in line and '使用单机模式' in line]
|
|
42
|
+
|
|
43
|
+
if info_lines:
|
|
44
|
+
print("❌ 仍然发现INFO级别的运行模式信息:")
|
|
45
|
+
for line in info_lines:
|
|
46
|
+
print(f" {line}")
|
|
47
|
+
else:
|
|
48
|
+
print("✅ 没有发现INFO级别的运行模式信息")
|
|
49
|
+
|
|
50
|
+
if debug_lines:
|
|
51
|
+
print("✅ 发现DEBUG级别的运行模式信息:")
|
|
52
|
+
for line in debug_lines:
|
|
53
|
+
print(f" {line}")
|
|
54
|
+
else:
|
|
55
|
+
print("❌ 没有发现DEBUG级别的运行模式信息")
|
|
56
|
+
|
|
57
|
+
print("\n所有日志内容:")
|
|
58
|
+
lines = content.split('\n')
|
|
59
|
+
for i, line in enumerate(lines, 1):
|
|
60
|
+
if line.strip():
|
|
61
|
+
print(f"{i:3d}: {line}")
|
|
62
|
+
else:
|
|
63
|
+
print("❌ 日志文件未创建")
|
|
64
|
+
|
|
65
|
+
except Exception as e:
|
|
66
|
+
print(f"错误: {e}")
|
|
67
|
+
import traceback
|
|
68
|
+
traceback.print_exc()
|
|
69
|
+
|
|
70
|
+
print("=== 测试完成 ===")
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
73
|
test_mode_log_level()
|
tests/test_mode_consistency.py
CHANGED
|
@@ -1,52 +1,52 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
测试模式一致性提示
|
|
5
|
-
"""
|
|
6
|
-
import asyncio
|
|
7
|
-
import sys
|
|
8
|
-
import os
|
|
9
|
-
|
|
10
|
-
# 添加项目根目录到路径
|
|
11
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
12
|
-
|
|
13
|
-
from crawlo.crawler import CrawlerProcess
|
|
14
|
-
from crawlo.spider import Spider
|
|
15
|
-
from crawlo import Request
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class TestSpider(Spider):
|
|
19
|
-
name = "test_mode_spider"
|
|
20
|
-
|
|
21
|
-
def start_requests(self):
|
|
22
|
-
yield Request("https://httpbin.org/get")
|
|
23
|
-
|
|
24
|
-
def parse(self, response):
|
|
25
|
-
yield {"url": response.url, "status": response.status_code} # 修复:使用status_code而不是status
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
async def test_mode_consistency():
|
|
29
|
-
"""测试模式一致性提示"""
|
|
30
|
-
print("测试模式一致性提示...")
|
|
31
|
-
|
|
32
|
-
try:
|
|
33
|
-
# 创建爬虫进程
|
|
34
|
-
process = CrawlerProcess()
|
|
35
|
-
|
|
36
|
-
# 添加爬虫
|
|
37
|
-
await process.crawl(TestSpider)
|
|
38
|
-
|
|
39
|
-
print("模式一致性测试完成")
|
|
40
|
-
|
|
41
|
-
except Exception as e:
|
|
42
|
-
print(f"测试失败: {e}")
|
|
43
|
-
import traceback
|
|
44
|
-
traceback.print_exc()
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if __name__ == "__main__":
|
|
48
|
-
# 设置日志级别
|
|
49
|
-
import logging
|
|
50
|
-
logging.basicConfig(level=logging.INFO)
|
|
51
|
-
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
测试模式一致性提示
|
|
5
|
+
"""
|
|
6
|
+
import asyncio
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
# 添加项目根目录到路径
|
|
11
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
12
|
+
|
|
13
|
+
from crawlo.crawler import CrawlerProcess
|
|
14
|
+
from crawlo.spider import Spider
|
|
15
|
+
from crawlo import Request
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestSpider(Spider):
|
|
19
|
+
name = "test_mode_spider"
|
|
20
|
+
|
|
21
|
+
def start_requests(self):
|
|
22
|
+
yield Request("https://httpbin.org/get")
|
|
23
|
+
|
|
24
|
+
def parse(self, response):
|
|
25
|
+
yield {"url": response.url, "status": response.status_code} # 修复:使用status_code而不是status
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def test_mode_consistency():
|
|
29
|
+
"""测试模式一致性提示"""
|
|
30
|
+
print("测试模式一致性提示...")
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
# 创建爬虫进程
|
|
34
|
+
process = CrawlerProcess()
|
|
35
|
+
|
|
36
|
+
# 添加爬虫
|
|
37
|
+
await process.crawl(TestSpider)
|
|
38
|
+
|
|
39
|
+
print("模式一致性测试完成")
|
|
40
|
+
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(f"测试失败: {e}")
|
|
43
|
+
import traceback
|
|
44
|
+
traceback.print_exc()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
# 设置日志级别
|
|
49
|
+
import logging
|
|
50
|
+
logging.basicConfig(level=logging.INFO)
|
|
51
|
+
|
|
52
52
|
asyncio.run(test_mode_consistency())
|