crawlo 1.4.5__py3-none-any.whl → 1.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +90 -89
- crawlo/__version__.py +1 -1
- crawlo/cli.py +75 -75
- crawlo/commands/__init__.py +14 -14
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +186 -186
- crawlo/commands/help.py +140 -138
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +379 -341
- crawlo/commands/startproject.py +460 -460
- crawlo/commands/stats.py +187 -187
- crawlo/commands/utils.py +196 -196
- crawlo/config.py +320 -312
- crawlo/config_validator.py +277 -277
- crawlo/core/__init__.py +52 -52
- crawlo/core/engine.py +451 -438
- crawlo/core/processor.py +47 -47
- crawlo/core/scheduler.py +290 -291
- crawlo/crawler.py +698 -657
- crawlo/data/__init__.py +5 -5
- crawlo/data/user_agents.py +194 -194
- crawlo/downloader/__init__.py +280 -276
- crawlo/downloader/aiohttp_downloader.py +233 -233
- crawlo/downloader/cffi_downloader.py +250 -245
- crawlo/downloader/httpx_downloader.py +265 -259
- crawlo/downloader/hybrid_downloader.py +212 -212
- crawlo/downloader/playwright_downloader.py +425 -402
- crawlo/downloader/selenium_downloader.py +486 -472
- crawlo/event.py +45 -11
- crawlo/exceptions.py +215 -82
- crawlo/extension/__init__.py +65 -64
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +94 -94
- crawlo/extension/log_stats.py +70 -70
- crawlo/extension/logging_extension.py +53 -61
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/factories/__init__.py +27 -27
- crawlo/factories/base.py +68 -68
- crawlo/factories/crawler.py +104 -103
- crawlo/factories/registry.py +84 -84
- crawlo/factories/utils.py +135 -0
- crawlo/filters/__init__.py +170 -153
- crawlo/filters/aioredis_filter.py +348 -264
- crawlo/filters/memory_filter.py +261 -276
- crawlo/framework.py +306 -292
- crawlo/initialization/__init__.py +44 -44
- crawlo/initialization/built_in.py +391 -434
- crawlo/initialization/context.py +141 -141
- crawlo/initialization/core.py +240 -194
- crawlo/initialization/phases.py +230 -149
- crawlo/initialization/registry.py +143 -145
- crawlo/initialization/utils.py +49 -0
- crawlo/interfaces.py +23 -23
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +23 -23
- crawlo/items/fields.py +52 -52
- crawlo/items/items.py +104 -104
- crawlo/logging/__init__.py +42 -46
- crawlo/logging/config.py +277 -197
- crawlo/logging/factory.py +175 -171
- crawlo/logging/manager.py +104 -112
- crawlo/middleware/__init__.py +87 -24
- crawlo/middleware/default_header.py +132 -132
- crawlo/middleware/download_delay.py +104 -104
- crawlo/middleware/middleware_manager.py +142 -142
- crawlo/middleware/offsite.py +123 -123
- crawlo/middleware/proxy.py +209 -386
- crawlo/middleware/request_ignore.py +86 -86
- crawlo/middleware/response_code.py +150 -150
- crawlo/middleware/response_filter.py +136 -136
- crawlo/middleware/retry.py +124 -124
- crawlo/mode_manager.py +287 -253
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +375 -379
- crawlo/network/response.py +569 -664
- crawlo/pipelines/__init__.py +53 -22
- crawlo/pipelines/base_pipeline.py +452 -0
- crawlo/pipelines/bloom_dedup_pipeline.py +146 -146
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +197 -197
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +105 -105
- crawlo/pipelines/mongo_pipeline.py +140 -132
- crawlo/pipelines/mysql_pipeline.py +470 -326
- crawlo/pipelines/pipeline_manager.py +100 -100
- crawlo/pipelines/redis_dedup_pipeline.py +155 -156
- crawlo/project.py +347 -347
- crawlo/queue/__init__.py +10 -0
- crawlo/queue/pqueue.py +38 -38
- crawlo/queue/queue_manager.py +591 -525
- crawlo/queue/redis_priority_queue.py +519 -370
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +285 -270
- crawlo/settings/setting_manager.py +219 -219
- crawlo/spider/__init__.py +657 -657
- crawlo/stats_collector.py +82 -73
- crawlo/subscriber.py +129 -129
- crawlo/task_manager.py +138 -138
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +2 -4
- crawlo/templates/project/items.py.tmpl +13 -17
- crawlo/templates/project/middlewares.py.tmpl +38 -38
- crawlo/templates/project/pipelines.py.tmpl +35 -36
- crawlo/templates/project/settings.py.tmpl +110 -157
- crawlo/templates/project/settings_distributed.py.tmpl +156 -161
- crawlo/templates/project/settings_gentle.py.tmpl +170 -171
- crawlo/templates/project/settings_high_performance.py.tmpl +171 -172
- crawlo/templates/project/settings_minimal.py.tmpl +99 -77
- crawlo/templates/project/settings_simple.py.tmpl +168 -169
- crawlo/templates/project/spiders/__init__.py.tmpl +9 -9
- crawlo/templates/run.py.tmpl +23 -30
- crawlo/templates/spider/spider.py.tmpl +33 -144
- crawlo/templates/spiders_init.py.tmpl +5 -10
- crawlo/tools/__init__.py +86 -189
- crawlo/tools/date_tools.py +289 -289
- crawlo/tools/distributed_coordinator.py +384 -384
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/tools/text_cleaner.py +232 -232
- crawlo/utils/__init__.py +50 -50
- crawlo/utils/batch_processor.py +276 -259
- crawlo/utils/config_manager.py +442 -0
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/db_helper.py +250 -244
- crawlo/utils/error_handler.py +410 -410
- crawlo/utils/fingerprint.py +121 -121
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_helper.py +344 -344
- crawlo/utils/leak_detector.py +335 -0
- crawlo/utils/log.py +79 -79
- crawlo/utils/misc.py +81 -81
- crawlo/utils/mongo_connection_pool.py +157 -0
- crawlo/utils/mysql_connection_pool.py +197 -0
- crawlo/utils/performance_monitor.py +285 -285
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_checker.py +91 -0
- crawlo/utils/redis_connection_pool.py +578 -388
- crawlo/utils/redis_key_validator.py +198 -198
- crawlo/utils/request.py +278 -256
- crawlo/utils/request_serializer.py +225 -225
- crawlo/utils/resource_manager.py +337 -0
- crawlo/utils/selector_helper.py +137 -137
- crawlo/utils/singleton.py +70 -0
- crawlo/utils/spider_loader.py +201 -201
- crawlo/utils/text_helper.py +94 -94
- crawlo/utils/{url.py → url_utils.py} +39 -39
- crawlo-1.4.7.dist-info/METADATA +689 -0
- crawlo-1.4.7.dist-info/RECORD +347 -0
- examples/__init__.py +7 -7
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +217 -275
- tests/authenticated_proxy_example.py +110 -106
- tests/baidu_performance_test.py +108 -108
- tests/baidu_test.py +59 -59
- tests/bug_check_test.py +250 -250
- tests/cleaners_example.py +160 -160
- tests/comprehensive_framework_test.py +212 -212
- tests/comprehensive_test.py +81 -81
- tests/comprehensive_testing_summary.md +186 -186
- tests/config_validation_demo.py +142 -142
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/debug_configure.py +69 -69
- tests/debug_framework_logger.py +84 -84
- tests/debug_log_config.py +126 -126
- tests/debug_log_levels.py +63 -63
- tests/debug_pipelines.py +66 -66
- tests/detailed_log_test.py +233 -233
- tests/direct_selector_helper_test.py +96 -96
- tests/distributed_dedup_test.py +467 -0
- tests/distributed_test.py +66 -66
- tests/distributed_test_debug.py +76 -76
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/error_handling_example.py +171 -171
- tests/explain_mysql_update_behavior.py +77 -0
- tests/final_comprehensive_test.py +151 -151
- tests/final_log_test.py +260 -260
- tests/final_validation_test.py +182 -182
- tests/fix_log_test.py +142 -142
- tests/framework_performance_test.py +202 -202
- tests/log_buffering_test.py +111 -111
- tests/log_generation_timing_test.py +153 -153
- tests/monitor_redis_dedup.sh +72 -0
- tests/ofweek_scrapy/ofweek_scrapy/items.py +12 -12
- tests/ofweek_scrapy/ofweek_scrapy/middlewares.py +100 -100
- tests/ofweek_scrapy/ofweek_scrapy/pipelines.py +13 -13
- tests/ofweek_scrapy/ofweek_scrapy/settings.py +84 -84
- tests/ofweek_scrapy/scrapy.cfg +11 -11
- tests/optimized_performance_test.py +211 -211
- tests/performance_comparison.py +244 -244
- tests/queue_blocking_test.py +113 -113
- tests/queue_test.py +89 -89
- tests/redis_key_validation_demo.py +130 -130
- tests/request_params_example.py +150 -150
- tests/response_improvements_example.py +144 -144
- tests/scrapy_comparison/ofweek_scrapy.py +138 -138
- tests/scrapy_comparison/scrapy_test.py +133 -133
- tests/simple_cli_test.py +55 -0
- tests/simple_command_test.py +119 -119
- tests/simple_crawlo_test.py +126 -126
- tests/simple_follow_test.py +38 -38
- tests/simple_log_test2.py +137 -137
- tests/simple_optimization_test.py +128 -128
- tests/simple_queue_type_test.py +41 -41
- tests/simple_response_selector_test.py +94 -94
- tests/simple_selector_helper_test.py +154 -154
- tests/simple_selector_test.py +207 -207
- tests/simple_spider_test.py +49 -49
- tests/simple_url_test.py +73 -73
- tests/simulate_mysql_update_test.py +140 -0
- tests/spider_log_timing_test.py +177 -177
- tests/test_advanced_tools.py +148 -148
- tests/test_all_commands.py +230 -230
- tests/test_all_pipeline_fingerprints.py +133 -133
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_asyncmy_usage.py +57 -0
- tests/test_batch_processor.py +178 -178
- tests/test_cleaners.py +54 -54
- tests/test_cli_arguments.py +119 -0
- tests/test_component_factory.py +174 -174
- tests/test_config_consistency.py +80 -80
- tests/test_config_merge.py +152 -152
- tests/test_config_validator.py +182 -182
- tests/test_controlled_spider_mixin.py +79 -79
- tests/test_crawler_process_import.py +38 -38
- tests/test_crawler_process_spider_modules.py +47 -47
- tests/test_crawlo_proxy_integration.py +114 -108
- tests/test_date_tools.py +123 -123
- tests/test_dedup_fix.py +220 -220
- tests/test_dedup_pipeline_consistency.py +124 -124
- tests/test_default_header_middleware.py +313 -313
- tests/test_distributed.py +65 -65
- tests/test_double_crawlo_fix.py +204 -204
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_download_delay_middleware.py +221 -221
- tests/test_downloader_proxy_compatibility.py +272 -268
- tests/test_edge_cases.py +305 -305
- tests/test_encoding_core.py +56 -56
- tests/test_encoding_detection.py +126 -126
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_enhanced_error_handler_comprehensive.py +245 -245
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_factories.py +252 -252
- tests/test_factory_compatibility.py +196 -196
- tests/test_final_validation.py +153 -153
- tests/test_fingerprint_consistency.py +135 -135
- tests/test_fingerprint_simple.py +51 -51
- tests/test_get_component_logger.py +83 -83
- tests/test_hash_performance.py +99 -99
- tests/test_integration.py +169 -169
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_large_scale_helper.py +235 -235
- tests/test_logging_enhancements.py +374 -374
- tests/test_logging_final.py +184 -184
- tests/test_logging_integration.py +312 -312
- tests/test_logging_system.py +282 -282
- tests/test_middleware_debug.py +141 -141
- tests/test_mode_consistency.py +51 -51
- tests/test_multi_directory.py +67 -67
- tests/test_multiple_spider_modules.py +80 -80
- tests/test_mysql_pipeline_config.py +165 -0
- tests/test_mysql_pipeline_error.py +99 -0
- tests/test_mysql_pipeline_init_log.py +83 -0
- tests/test_mysql_pipeline_integration.py +133 -0
- tests/test_mysql_pipeline_refactor.py +144 -0
- tests/test_mysql_pipeline_refactor_simple.py +86 -0
- tests/test_mysql_pipeline_robustness.py +196 -0
- tests/test_mysql_pipeline_types.py +89 -0
- tests/test_mysql_update_columns.py +94 -0
- tests/test_offsite_middleware.py +244 -244
- tests/test_offsite_middleware_simple.py +203 -203
- tests/test_optimized_selector_naming.py +100 -100
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_performance_monitor.py +115 -115
- tests/test_pipeline_fingerprint_consistency.py +86 -86
- tests/test_priority_behavior.py +211 -211
- tests/test_priority_consistency.py +151 -151
- tests/test_priority_consistency_fixed.py +249 -249
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware.py +217 -121
- tests/test_proxy_middleware_enhanced.py +212 -216
- tests/test_proxy_middleware_integration.py +142 -137
- tests/test_proxy_middleware_refactored.py +207 -184
- tests/test_proxy_only.py +84 -0
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_proxy_with_downloader.py +153 -0
- tests/test_queue_empty_check.py +41 -41
- tests/test_queue_manager_double_crawlo.py +173 -173
- tests/test_queue_manager_redis_key.py +179 -179
- tests/test_queue_naming.py +154 -154
- tests/test_queue_type.py +106 -106
- tests/test_queue_type_redis_config_consistency.py +130 -130
- tests/test_random_headers_default.py +322 -322
- tests/test_random_headers_necessity.py +308 -308
- tests/test_random_user_agent.py +72 -72
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_redis_queue_name_fix.py +175 -175
- tests/test_redis_queue_type_fallback.py +129 -129
- tests/test_request_ignore_middleware.py +182 -182
- tests/test_request_params.py +111 -111
- tests/test_request_serialization.py +70 -70
- tests/test_response_code_middleware.py +349 -349
- tests/test_response_filter_middleware.py +427 -427
- tests/test_response_follow.py +104 -104
- tests/test_response_improvements.py +152 -152
- tests/test_response_selector_methods.py +92 -92
- tests/test_response_url_methods.py +70 -70
- tests/test_response_urljoin.py +86 -86
- tests/test_retry_middleware.py +333 -333
- tests/test_retry_middleware_realistic.py +273 -273
- tests/test_scheduler.py +252 -252
- tests/test_scheduler_config_update.py +133 -133
- tests/test_scrapy_style_encoding.py +112 -112
- tests/test_selector_helper.py +100 -100
- tests/test_selector_optimizations.py +146 -146
- tests/test_simple_response.py +61 -61
- tests/test_spider_loader.py +49 -49
- tests/test_spider_loader_comprehensive.py +69 -69
- tests/test_spider_modules.py +84 -84
- tests/test_spiders/test_spider.py +9 -9
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +159 -159
- tests/test_user_agent_randomness.py +176 -176
- tests/test_user_agents.py +96 -96
- tests/untested_features_report.md +138 -138
- tests/verify_debug.py +51 -51
- tests/verify_distributed.py +117 -117
- tests/verify_log_fix.py +111 -111
- tests/verify_mysql_warnings.py +110 -0
- crawlo/logging/async_handler.py +0 -181
- crawlo/logging/monitor.py +0 -153
- crawlo/logging/sampler.py +0 -167
- crawlo/middleware/simple_proxy.py +0 -65
- crawlo/tools/authenticated_proxy.py +0 -241
- crawlo/tools/data_formatter.py +0 -226
- crawlo/tools/data_validator.py +0 -181
- crawlo/tools/encoding_converter.py +0 -127
- crawlo/tools/network_diagnostic.py +0 -365
- crawlo/tools/request_tools.py +0 -83
- crawlo/tools/retry_mechanism.py +0 -224
- crawlo/utils/env_config.py +0 -143
- crawlo/utils/large_scale_config.py +0 -287
- crawlo/utils/system.py +0 -11
- crawlo/utils/tools.py +0 -5
- crawlo-1.4.5.dist-info/METADATA +0 -329
- crawlo-1.4.5.dist-info/RECORD +0 -347
- tests/env_config_example.py +0 -134
- tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +0 -162
- tests/test_authenticated_proxy.py +0 -142
- tests/test_comprehensive.py +0 -147
- tests/test_dynamic_downloaders_proxy.py +0 -125
- tests/test_dynamic_proxy.py +0 -93
- tests/test_dynamic_proxy_config.py +0 -147
- tests/test_dynamic_proxy_real.py +0 -110
- tests/test_env_config.py +0 -122
- tests/test_framework_env_usage.py +0 -104
- tests/test_large_scale_config.py +0 -113
- tests/test_proxy_api.py +0 -265
- tests/test_real_scenario_proxy.py +0 -196
- tests/tools_example.py +0 -261
- {crawlo-1.4.5.dist-info → crawlo-1.4.7.dist-info}/WHEEL +0 -0
- {crawlo-1.4.5.dist-info → crawlo-1.4.7.dist-info}/entry_points.txt +0 -0
- {crawlo-1.4.5.dist-info → crawlo-1.4.7.dist-info}/top_level.txt +0 -0
crawlo/middleware/__init__.py
CHANGED
|
@@ -1,24 +1,87 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
if TYPE_CHECKING:
|
|
6
|
-
from crawlo import Request, Response
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class BaseMiddleware
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding:UTF-8 -*-
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from crawlo import Request, Response
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseMiddleware:
|
|
10
|
+
"""中间件基类
|
|
11
|
+
|
|
12
|
+
定义了中间件的标准接口,所有自定义中间件都应该继承此类。
|
|
13
|
+
|
|
14
|
+
中间件处理流程:
|
|
15
|
+
1. process_request: 请求发送前处理
|
|
16
|
+
2. process_response: 响应接收后处理
|
|
17
|
+
3. process_exception: 异常发生时处理
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def process_request(
|
|
21
|
+
self,
|
|
22
|
+
request: 'Request',
|
|
23
|
+
spider
|
|
24
|
+
) -> Optional[Union['Request', 'Response']]:
|
|
25
|
+
"""处理请求
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
request: 待处理的请求对象
|
|
29
|
+
spider: 当前爬虫实例
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
None: 继续处理
|
|
33
|
+
Request: 替换原请求
|
|
34
|
+
Response: 跳过下载,直接返回响应
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def process_response(
|
|
39
|
+
self,
|
|
40
|
+
request: 'Request',
|
|
41
|
+
response: 'Response',
|
|
42
|
+
spider
|
|
43
|
+
) -> Union['Request', 'Response']:
|
|
44
|
+
"""处理响应
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
request: 原始请求对象
|
|
48
|
+
response: 接收到的响应对象
|
|
49
|
+
spider: 当前爬虫实例
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Request: 重新发起请求
|
|
53
|
+
Response: 返回响应(可能是修改后的)
|
|
54
|
+
"""
|
|
55
|
+
return response
|
|
56
|
+
|
|
57
|
+
def process_exception(
|
|
58
|
+
self,
|
|
59
|
+
request: 'Request',
|
|
60
|
+
exp: Exception,
|
|
61
|
+
spider
|
|
62
|
+
) -> Optional[Union['Request', 'Response']]:
|
|
63
|
+
"""处理异常
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
request: 发生异常的请求
|
|
67
|
+
exp: 捕获的异常对象
|
|
68
|
+
spider: 当前爬虫实例
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
None: 继续传递异常
|
|
72
|
+
Request: 重新发起请求
|
|
73
|
+
Response: 返回响应
|
|
74
|
+
"""
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def create_instance(cls, crawler):
|
|
79
|
+
"""创建中间件实例
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
crawler: Crawler实例,包含settings等配置
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
中间件实例
|
|
86
|
+
"""
|
|
87
|
+
return cls()
|
|
@@ -1,132 +1,132 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
DefaultHeaderMiddleware 中间件
|
|
5
|
-
用于为所有请求添加默认请求头,支持随机更换User-Agent等功能
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import random
|
|
9
|
-
from crawlo.
|
|
10
|
-
from crawlo.exceptions import NotConfiguredError
|
|
11
|
-
# 导入User-Agent数据
|
|
12
|
-
from crawlo.data.user_agents import get_user_agents
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class DefaultHeaderMiddleware(object):
|
|
16
|
-
"""
|
|
17
|
-
DefaultHeaderMiddleware 中间件
|
|
18
|
-
用于为所有请求添加默认请求头,包括User-Agent等,支持随机更换功能
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
def __init__(self, settings, log_level):
|
|
22
|
-
"""
|
|
23
|
-
初始化中间件
|
|
24
|
-
"""
|
|
25
|
-
self.logger = get_logger(self.__class__.__name__
|
|
26
|
-
|
|
27
|
-
# 获取默认请求头配置
|
|
28
|
-
self.headers = settings.get_dict('DEFAULT_REQUEST_HEADERS', {})
|
|
29
|
-
|
|
30
|
-
# 获取User-Agent配置
|
|
31
|
-
self.user_agent = settings.get('USER_AGENT')
|
|
32
|
-
|
|
33
|
-
# 获取随机User-Agent列表
|
|
34
|
-
self.user_agents = settings.get_list('USER_AGENTS', [])
|
|
35
|
-
|
|
36
|
-
# 获取随机请求头配置
|
|
37
|
-
self.random_headers = settings.get_dict('RANDOM_HEADERS', {})
|
|
38
|
-
|
|
39
|
-
# 获取随机性配置
|
|
40
|
-
self.randomness = settings.get_bool("RANDOMNESS", False)
|
|
41
|
-
|
|
42
|
-
# 检查是否启用随机User-Agent
|
|
43
|
-
self.random_user_agent_enabled = settings.get_bool("RANDOM_USER_AGENT_ENABLED", False)
|
|
44
|
-
|
|
45
|
-
# 获取User-Agent设备类型
|
|
46
|
-
self.user_agent_device_type = settings.get("USER_AGENT_DEVICE_TYPE", "all")
|
|
47
|
-
|
|
48
|
-
# 如果没有配置默认请求头、User-Agent且没有启用随机功能,则禁用此中间件
|
|
49
|
-
if not self.headers and not self.user_agent and not self.user_agents and not self.random_headers:
|
|
50
|
-
raise NotConfiguredError(
|
|
51
|
-
"未配置DEFAULT_REQUEST_HEADERS、USER_AGENT或随机头部配置,DefaultHeaderMiddleware已禁用")
|
|
52
|
-
|
|
53
|
-
# 如果配置了User-Agent,将其添加到默认请求头中
|
|
54
|
-
if self.user_agent:
|
|
55
|
-
self.headers.setdefault('User-Agent', self.user_agent)
|
|
56
|
-
|
|
57
|
-
# 如果启用了随机User-Agent但没有提供User-Agent列表,使用内置列表
|
|
58
|
-
if self.random_user_agent_enabled and not self.user_agents:
|
|
59
|
-
self.user_agents = get_user_agents(self.user_agent_device_type)
|
|
60
|
-
|
|
61
|
-
self.logger.debug(f"DefaultHeaderMiddleware已启用 [默认请求头={len(self.headers)}, "
|
|
62
|
-
f"User-Agent列表={len(self.user_agents)}, "
|
|
63
|
-
f"随机头部={len(self.random_headers)}, "
|
|
64
|
-
f"随机功能={'启用' if self.randomness else '禁用'}]")
|
|
65
|
-
|
|
66
|
-
@classmethod
|
|
67
|
-
def create_instance(cls, crawler):
|
|
68
|
-
"""
|
|
69
|
-
创建中间件实例
|
|
70
|
-
"""
|
|
71
|
-
o = cls(
|
|
72
|
-
settings=crawler.settings,
|
|
73
|
-
log_level=crawler.settings.get('LOG_LEVEL')
|
|
74
|
-
)
|
|
75
|
-
return o
|
|
76
|
-
|
|
77
|
-
def _get_random_user_agent(self):
|
|
78
|
-
"""
|
|
79
|
-
获取随机User-Agent
|
|
80
|
-
"""
|
|
81
|
-
if self.user_agents:
|
|
82
|
-
return random.choice(self.user_agents)
|
|
83
|
-
return None
|
|
84
|
-
|
|
85
|
-
def _apply_random_headers(self, request):
|
|
86
|
-
"""
|
|
87
|
-
应用随机请求头
|
|
88
|
-
"""
|
|
89
|
-
if not self.random_headers:
|
|
90
|
-
return
|
|
91
|
-
|
|
92
|
-
for header_name, header_values in self.random_headers.items():
|
|
93
|
-
# 如果header_values是列表,随机选择一个值
|
|
94
|
-
if isinstance(header_values, (list, tuple)):
|
|
95
|
-
header_value = random.choice(header_values)
|
|
96
|
-
else:
|
|
97
|
-
header_value = header_values
|
|
98
|
-
|
|
99
|
-
# 只有当请求中没有该头部时才添加
|
|
100
|
-
if header_name not in request.headers:
|
|
101
|
-
request.headers[header_name] = header_value
|
|
102
|
-
self.logger.debug(f"为请求 {request.url} 添加随机头部: {header_name}={header_value[:50]}...")
|
|
103
|
-
|
|
104
|
-
def process_request(self, request, _spider):
|
|
105
|
-
"""
|
|
106
|
-
处理请求,添加默认请求头
|
|
107
|
-
"""
|
|
108
|
-
# 添加默认请求头
|
|
109
|
-
if self.headers:
|
|
110
|
-
added_headers = []
|
|
111
|
-
for key, value in self.headers.items():
|
|
112
|
-
# 只有当请求中没有该头部时才添加
|
|
113
|
-
if key not in request.headers:
|
|
114
|
-
request.headers[key] = value
|
|
115
|
-
added_headers.append(key)
|
|
116
|
-
|
|
117
|
-
# 记录添加的请求头(仅在调试模式下)
|
|
118
|
-
if added_headers and self.logger.isEnabledFor(10): # DEBUG level
|
|
119
|
-
self.logger.debug(f"为请求 {request.url} 添加了 {len(added_headers)} 个默认请求头: {added_headers}")
|
|
120
|
-
|
|
121
|
-
# 处理随机User-Agent
|
|
122
|
-
if self.random_user_agent_enabled and 'User-Agent' not in request.headers:
|
|
123
|
-
random_ua = self._get_random_user_agent()
|
|
124
|
-
if random_ua:
|
|
125
|
-
request.headers['User-Agent'] = random_ua
|
|
126
|
-
self.logger.debug(f"为请求 {request.url} 设置随机User-Agent: {random_ua[:50]}...")
|
|
127
|
-
|
|
128
|
-
# 处理随机请求头
|
|
129
|
-
if self.randomness:
|
|
130
|
-
self._apply_random_headers(request)
|
|
131
|
-
|
|
132
|
-
return None
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding:UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
DefaultHeaderMiddleware 中间件
|
|
5
|
+
用于为所有请求添加默认请求头,支持随机更换User-Agent等功能
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import random
|
|
9
|
+
from crawlo.logging import get_logger
|
|
10
|
+
from crawlo.exceptions import NotConfiguredError
|
|
11
|
+
# 导入User-Agent数据
|
|
12
|
+
from crawlo.data.user_agents import get_user_agents
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DefaultHeaderMiddleware(object):
|
|
16
|
+
"""
|
|
17
|
+
DefaultHeaderMiddleware 中间件
|
|
18
|
+
用于为所有请求添加默认请求头,包括User-Agent等,支持随机更换功能
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, settings, log_level):
|
|
22
|
+
"""
|
|
23
|
+
初始化中间件
|
|
24
|
+
"""
|
|
25
|
+
self.logger = get_logger(self.__class__.__name__)
|
|
26
|
+
|
|
27
|
+
# 获取默认请求头配置
|
|
28
|
+
self.headers = settings.get_dict('DEFAULT_REQUEST_HEADERS', {})
|
|
29
|
+
|
|
30
|
+
# 获取User-Agent配置
|
|
31
|
+
self.user_agent = settings.get('USER_AGENT')
|
|
32
|
+
|
|
33
|
+
# 获取随机User-Agent列表
|
|
34
|
+
self.user_agents = settings.get_list('USER_AGENTS', [])
|
|
35
|
+
|
|
36
|
+
# 获取随机请求头配置
|
|
37
|
+
self.random_headers = settings.get_dict('RANDOM_HEADERS', {})
|
|
38
|
+
|
|
39
|
+
# 获取随机性配置
|
|
40
|
+
self.randomness = settings.get_bool("RANDOMNESS", False)
|
|
41
|
+
|
|
42
|
+
# 检查是否启用随机User-Agent
|
|
43
|
+
self.random_user_agent_enabled = settings.get_bool("RANDOM_USER_AGENT_ENABLED", False)
|
|
44
|
+
|
|
45
|
+
# 获取User-Agent设备类型
|
|
46
|
+
self.user_agent_device_type = settings.get("USER_AGENT_DEVICE_TYPE", "all")
|
|
47
|
+
|
|
48
|
+
# 如果没有配置默认请求头、User-Agent且没有启用随机功能,则禁用此中间件
|
|
49
|
+
if not self.headers and not self.user_agent and not self.user_agents and not self.random_headers:
|
|
50
|
+
raise NotConfiguredError(
|
|
51
|
+
"未配置DEFAULT_REQUEST_HEADERS、USER_AGENT或随机头部配置,DefaultHeaderMiddleware已禁用")
|
|
52
|
+
|
|
53
|
+
# 如果配置了User-Agent,将其添加到默认请求头中
|
|
54
|
+
if self.user_agent:
|
|
55
|
+
self.headers.setdefault('User-Agent', self.user_agent)
|
|
56
|
+
|
|
57
|
+
# 如果启用了随机User-Agent但没有提供User-Agent列表,使用内置列表
|
|
58
|
+
if self.random_user_agent_enabled and not self.user_agents:
|
|
59
|
+
self.user_agents = get_user_agents(self.user_agent_device_type)
|
|
60
|
+
|
|
61
|
+
self.logger.debug(f"DefaultHeaderMiddleware已启用 [默认请求头={len(self.headers)}, "
|
|
62
|
+
f"User-Agent列表={len(self.user_agents)}, "
|
|
63
|
+
f"随机头部={len(self.random_headers)}, "
|
|
64
|
+
f"随机功能={'启用' if self.randomness else '禁用'}]")
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def create_instance(cls, crawler):
|
|
68
|
+
"""
|
|
69
|
+
创建中间件实例
|
|
70
|
+
"""
|
|
71
|
+
o = cls(
|
|
72
|
+
settings=crawler.settings,
|
|
73
|
+
log_level=crawler.settings.get('LOG_LEVEL')
|
|
74
|
+
)
|
|
75
|
+
return o
|
|
76
|
+
|
|
77
|
+
def _get_random_user_agent(self):
|
|
78
|
+
"""
|
|
79
|
+
获取随机User-Agent
|
|
80
|
+
"""
|
|
81
|
+
if self.user_agents:
|
|
82
|
+
return random.choice(self.user_agents)
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
def _apply_random_headers(self, request):
|
|
86
|
+
"""
|
|
87
|
+
应用随机请求头
|
|
88
|
+
"""
|
|
89
|
+
if not self.random_headers:
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
for header_name, header_values in self.random_headers.items():
|
|
93
|
+
# 如果header_values是列表,随机选择一个值
|
|
94
|
+
if isinstance(header_values, (list, tuple)):
|
|
95
|
+
header_value = random.choice(header_values)
|
|
96
|
+
else:
|
|
97
|
+
header_value = header_values
|
|
98
|
+
|
|
99
|
+
# 只有当请求中没有该头部时才添加
|
|
100
|
+
if header_name not in request.headers:
|
|
101
|
+
request.headers[header_name] = header_value
|
|
102
|
+
self.logger.debug(f"为请求 {request.url} 添加随机头部: {header_name}={header_value[:50]}...")
|
|
103
|
+
|
|
104
|
+
def process_request(self, request, _spider):
|
|
105
|
+
"""
|
|
106
|
+
处理请求,添加默认请求头
|
|
107
|
+
"""
|
|
108
|
+
# 添加默认请求头
|
|
109
|
+
if self.headers:
|
|
110
|
+
added_headers = []
|
|
111
|
+
for key, value in self.headers.items():
|
|
112
|
+
# 只有当请求中没有该头部时才添加
|
|
113
|
+
if key not in request.headers:
|
|
114
|
+
request.headers[key] = value
|
|
115
|
+
added_headers.append(key)
|
|
116
|
+
|
|
117
|
+
# 记录添加的请求头(仅在调试模式下)
|
|
118
|
+
if added_headers and self.logger.isEnabledFor(10): # DEBUG level
|
|
119
|
+
self.logger.debug(f"为请求 {request.url} 添加了 {len(added_headers)} 个默认请求头: {added_headers}")
|
|
120
|
+
|
|
121
|
+
# 处理随机User-Agent
|
|
122
|
+
if self.random_user_agent_enabled and 'User-Agent' not in request.headers:
|
|
123
|
+
random_ua = self._get_random_user_agent()
|
|
124
|
+
if random_ua:
|
|
125
|
+
request.headers['User-Agent'] = random_ua
|
|
126
|
+
self.logger.debug(f"为请求 {request.url} 设置随机User-Agent: {random_ua[:50]}...")
|
|
127
|
+
|
|
128
|
+
# 处理随机请求头
|
|
129
|
+
if self.randomness:
|
|
130
|
+
self._apply_random_headers(request)
|
|
131
|
+
|
|
132
|
+
return None
|
|
@@ -1,105 +1,105 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
DownloadDelayMiddleware 中间件
|
|
5
|
-
用于控制请求之间的延迟时间,支持固定延迟和随机延迟
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from asyncio import sleep
|
|
9
|
-
from random import uniform
|
|
10
|
-
from crawlo.
|
|
11
|
-
from crawlo.exceptions import NotConfiguredError
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class DownloadDelayMiddleware(object):
|
|
15
|
-
"""
|
|
16
|
-
DownloadDelayMiddleware 中间件
|
|
17
|
-
用于控制请求之间的延迟时间,支持固定延迟和随机延迟
|
|
18
|
-
|
|
19
|
-
功能特性:
|
|
20
|
-
- 支持固定延迟时间
|
|
21
|
-
- 支持随机延迟时间
|
|
22
|
-
- 提供详细的日志信息
|
|
23
|
-
- 记录延迟统计信息
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
def __init__(self, settings, log_level, stats=None):
|
|
27
|
-
"""
|
|
28
|
-
初始化中间件
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
settings: 设置管理器
|
|
32
|
-
log_level: 日志级别
|
|
33
|
-
stats: 统计信息收集器(可选)
|
|
34
|
-
"""
|
|
35
|
-
self.delay = settings.get_float("DOWNLOAD_DELAY")
|
|
36
|
-
if not self.delay:
|
|
37
|
-
raise NotConfiguredError("DOWNLOAD_DELAY not set or is zero")
|
|
38
|
-
|
|
39
|
-
self.randomness = settings.get_bool("RANDOMNESS", False)
|
|
40
|
-
|
|
41
|
-
# 安全地获取随机范围配置
|
|
42
|
-
random_range = settings.get_list("RANDOM_RANGE")
|
|
43
|
-
if len(random_range) >= 2:
|
|
44
|
-
try:
|
|
45
|
-
self.floor = float(random_range[0])
|
|
46
|
-
self.upper = float(random_range[1])
|
|
47
|
-
except (ValueError, TypeError):
|
|
48
|
-
# 如果配置无效,使用默认值
|
|
49
|
-
self.floor, self.upper = 0.5, 1.5
|
|
50
|
-
else:
|
|
51
|
-
# 如果配置不完整,使用默认值
|
|
52
|
-
self.floor, self.upper = 0.5, 1.5
|
|
53
|
-
|
|
54
|
-
self.logger = get_logger(self.__class__.__name__
|
|
55
|
-
self.stats = stats
|
|
56
|
-
|
|
57
|
-
@classmethod
|
|
58
|
-
def create_instance(cls, crawler):
|
|
59
|
-
"""
|
|
60
|
-
创建中间件实例
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
crawler: 爬虫实例
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
DownloadDelayMiddleware: 中间件实例
|
|
67
|
-
"""
|
|
68
|
-
o = cls(
|
|
69
|
-
settings=crawler.settings,
|
|
70
|
-
log_level=crawler.settings.get('LOG_LEVEL'),
|
|
71
|
-
stats=getattr(crawler, 'stats', None)
|
|
72
|
-
)
|
|
73
|
-
return o
|
|
74
|
-
|
|
75
|
-
async def process_request(self, _request, _spider):
|
|
76
|
-
"""
|
|
77
|
-
处理请求,添加延迟
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
_request: 请求对象
|
|
81
|
-
_spider: 爬虫实例
|
|
82
|
-
"""
|
|
83
|
-
if self.randomness:
|
|
84
|
-
# 计算随机延迟时间
|
|
85
|
-
delay_time = uniform(self.delay * self.floor, self.delay * self.upper)
|
|
86
|
-
await sleep(delay_time)
|
|
87
|
-
|
|
88
|
-
# 记录统计信息
|
|
89
|
-
if self.stats:
|
|
90
|
-
self.stats.inc_value('download_delay/random_count')
|
|
91
|
-
self.stats.inc_value('download_delay/random_total_time', delay_time)
|
|
92
|
-
|
|
93
|
-
# 记录日志
|
|
94
|
-
self.logger.debug(f"应用随机延迟: {delay_time:.2f}秒 (范围: {self.delay * self.floor:.2f} - {self.delay * self.upper:.2f})")
|
|
95
|
-
else:
|
|
96
|
-
# 应用固定延迟
|
|
97
|
-
await sleep(self.delay)
|
|
98
|
-
|
|
99
|
-
# 记录统计信息
|
|
100
|
-
if self.stats:
|
|
101
|
-
self.stats.inc_value('download_delay/fixed_count')
|
|
102
|
-
self.stats.inc_value('download_delay/fixed_total_time', self.delay)
|
|
103
|
-
|
|
104
|
-
# 记录日志
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding:UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
DownloadDelayMiddleware 中间件
|
|
5
|
+
用于控制请求之间的延迟时间,支持固定延迟和随机延迟
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from asyncio import sleep
|
|
9
|
+
from random import uniform
|
|
10
|
+
from crawlo.logging import get_logger
|
|
11
|
+
from crawlo.exceptions import NotConfiguredError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DownloadDelayMiddleware(object):
|
|
15
|
+
"""
|
|
16
|
+
DownloadDelayMiddleware 中间件
|
|
17
|
+
用于控制请求之间的延迟时间,支持固定延迟和随机延迟
|
|
18
|
+
|
|
19
|
+
功能特性:
|
|
20
|
+
- 支持固定延迟时间
|
|
21
|
+
- 支持随机延迟时间
|
|
22
|
+
- 提供详细的日志信息
|
|
23
|
+
- 记录延迟统计信息
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, settings, log_level, stats=None):
|
|
27
|
+
"""
|
|
28
|
+
初始化中间件
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
settings: 设置管理器
|
|
32
|
+
log_level: 日志级别
|
|
33
|
+
stats: 统计信息收集器(可选)
|
|
34
|
+
"""
|
|
35
|
+
self.delay = settings.get_float("DOWNLOAD_DELAY")
|
|
36
|
+
if not self.delay:
|
|
37
|
+
raise NotConfiguredError("DOWNLOAD_DELAY not set or is zero")
|
|
38
|
+
|
|
39
|
+
self.randomness = settings.get_bool("RANDOMNESS", False)
|
|
40
|
+
|
|
41
|
+
# 安全地获取随机范围配置
|
|
42
|
+
random_range = settings.get_list("RANDOM_RANGE")
|
|
43
|
+
if len(random_range) >= 2:
|
|
44
|
+
try:
|
|
45
|
+
self.floor = float(random_range[0])
|
|
46
|
+
self.upper = float(random_range[1])
|
|
47
|
+
except (ValueError, TypeError):
|
|
48
|
+
# 如果配置无效,使用默认值
|
|
49
|
+
self.floor, self.upper = 0.5, 1.5
|
|
50
|
+
else:
|
|
51
|
+
# 如果配置不完整,使用默认值
|
|
52
|
+
self.floor, self.upper = 0.5, 1.5
|
|
53
|
+
|
|
54
|
+
self.logger = get_logger(self.__class__.__name__)
|
|
55
|
+
self.stats = stats
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def create_instance(cls, crawler):
|
|
59
|
+
"""
|
|
60
|
+
创建中间件实例
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
crawler: 爬虫实例
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
DownloadDelayMiddleware: 中间件实例
|
|
67
|
+
"""
|
|
68
|
+
o = cls(
|
|
69
|
+
settings=crawler.settings,
|
|
70
|
+
log_level=crawler.settings.get('LOG_LEVEL'),
|
|
71
|
+
stats=getattr(crawler, 'stats', None)
|
|
72
|
+
)
|
|
73
|
+
return o
|
|
74
|
+
|
|
75
|
+
async def process_request(self, _request, _spider):
|
|
76
|
+
"""
|
|
77
|
+
处理请求,添加延迟
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
_request: 请求对象
|
|
81
|
+
_spider: 爬虫实例
|
|
82
|
+
"""
|
|
83
|
+
if self.randomness:
|
|
84
|
+
# 计算随机延迟时间
|
|
85
|
+
delay_time = uniform(self.delay * self.floor, self.delay * self.upper)
|
|
86
|
+
await sleep(delay_time)
|
|
87
|
+
|
|
88
|
+
# 记录统计信息
|
|
89
|
+
if self.stats:
|
|
90
|
+
self.stats.inc_value('download_delay/random_count')
|
|
91
|
+
self.stats.inc_value('download_delay/random_total_time', delay_time)
|
|
92
|
+
|
|
93
|
+
# 记录日志
|
|
94
|
+
self.logger.debug(f"应用随机延迟: {delay_time:.2f}秒 (范围: {self.delay * self.floor:.2f} - {self.delay * self.upper:.2f})")
|
|
95
|
+
else:
|
|
96
|
+
# 应用固定延迟
|
|
97
|
+
await sleep(self.delay)
|
|
98
|
+
|
|
99
|
+
# 记录统计信息
|
|
100
|
+
if self.stats:
|
|
101
|
+
self.stats.inc_value('download_delay/fixed_count')
|
|
102
|
+
self.stats.inc_value('download_delay/fixed_total_time', self.delay)
|
|
103
|
+
|
|
104
|
+
# 记录日志
|
|
105
105
|
self.logger.debug(f"应用固定延迟: {self.delay:.2f}秒")
|