crawlo 1.1.6__py3-none-any.whl → 1.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +61 -61
- crawlo/__version__.py +1 -1
- crawlo/cleaners/__init__.py +60 -60
- crawlo/cleaners/data_formatter.py +225 -225
- crawlo/cleaners/encoding_converter.py +125 -125
- crawlo/cleaners/text_cleaner.py +232 -232
- crawlo/cli.py +65 -40
- crawlo/commands/__init__.py +14 -13
- crawlo/commands/check.py +594 -594
- crawlo/commands/genspider.py +151 -151
- crawlo/commands/help.py +133 -0
- crawlo/commands/list.py +155 -155
- crawlo/commands/run.py +292 -292
- crawlo/commands/startproject.py +418 -418
- crawlo/commands/stats.py +188 -188
- crawlo/commands/utils.py +186 -186
- crawlo/config.py +312 -312
- crawlo/config_validator.py +252 -252
- crawlo/core/__init__.py +2 -2
- crawlo/core/engine.py +345 -345
- crawlo/core/processor.py +40 -40
- crawlo/core/scheduler.py +136 -136
- crawlo/crawler.py +1027 -1027
- crawlo/downloader/__init__.py +266 -266
- crawlo/downloader/aiohttp_downloader.py +220 -220
- crawlo/downloader/cffi_downloader.py +256 -256
- crawlo/downloader/httpx_downloader.py +259 -259
- crawlo/downloader/hybrid_downloader.py +213 -213
- crawlo/downloader/playwright_downloader.py +402 -402
- crawlo/downloader/selenium_downloader.py +472 -472
- crawlo/event.py +11 -11
- crawlo/exceptions.py +81 -81
- crawlo/extension/__init__.py +37 -37
- crawlo/extension/health_check.py +141 -141
- crawlo/extension/log_interval.py +57 -57
- crawlo/extension/log_stats.py +81 -81
- crawlo/extension/logging_extension.py +43 -43
- crawlo/extension/memory_monitor.py +104 -104
- crawlo/extension/performance_profiler.py +133 -133
- crawlo/extension/request_recorder.py +107 -107
- crawlo/filters/__init__.py +154 -154
- crawlo/filters/aioredis_filter.py +280 -280
- crawlo/filters/memory_filter.py +269 -269
- crawlo/items/__init__.py +23 -23
- crawlo/items/base.py +21 -21
- crawlo/items/fields.py +53 -53
- crawlo/items/items.py +104 -104
- crawlo/middleware/__init__.py +21 -21
- crawlo/middleware/default_header.py +32 -32
- crawlo/middleware/download_delay.py +28 -28
- crawlo/middleware/middleware_manager.py +135 -135
- crawlo/middleware/proxy.py +272 -272
- crawlo/middleware/request_ignore.py +30 -30
- crawlo/middleware/response_code.py +18 -18
- crawlo/middleware/response_filter.py +26 -26
- crawlo/middleware/retry.py +124 -124
- crawlo/mode_manager.py +211 -211
- crawlo/network/__init__.py +21 -21
- crawlo/network/request.py +338 -338
- crawlo/network/response.py +359 -359
- crawlo/pipelines/__init__.py +21 -21
- crawlo/pipelines/bloom_dedup_pipeline.py +156 -156
- crawlo/pipelines/console_pipeline.py +39 -39
- crawlo/pipelines/csv_pipeline.py +316 -316
- crawlo/pipelines/database_dedup_pipeline.py +224 -224
- crawlo/pipelines/json_pipeline.py +218 -218
- crawlo/pipelines/memory_dedup_pipeline.py +115 -115
- crawlo/pipelines/mongo_pipeline.py +131 -131
- crawlo/pipelines/mysql_pipeline.py +316 -316
- crawlo/pipelines/pipeline_manager.py +61 -61
- crawlo/pipelines/redis_dedup_pipeline.py +167 -167
- crawlo/project.py +187 -187
- crawlo/queue/pqueue.py +37 -37
- crawlo/queue/queue_manager.py +334 -334
- crawlo/queue/redis_priority_queue.py +298 -298
- crawlo/settings/__init__.py +7 -7
- crawlo/settings/default_settings.py +219 -219
- crawlo/settings/setting_manager.py +122 -122
- crawlo/spider/__init__.py +639 -639
- crawlo/stats_collector.py +59 -59
- crawlo/subscriber.py +130 -130
- crawlo/task_manager.py +30 -30
- crawlo/templates/crawlo.cfg.tmpl +10 -10
- crawlo/templates/project/__init__.py.tmpl +3 -3
- crawlo/templates/project/items.py.tmpl +17 -17
- crawlo/templates/project/middlewares.py.tmpl +109 -110
- crawlo/templates/project/pipelines.py.tmpl +96 -97
- crawlo/templates/project/run.py.tmpl +46 -252
- crawlo/templates/project/settings.py.tmpl +326 -326
- crawlo/templates/project/settings_distributed.py.tmpl +119 -119
- crawlo/templates/project/settings_gentle.py.tmpl +94 -94
- crawlo/templates/project/settings_high_performance.py.tmpl +151 -151
- crawlo/templates/project/settings_simple.py.tmpl +68 -68
- crawlo/templates/project/spiders/__init__.py.tmpl +5 -5
- crawlo/templates/spider/spider.py.tmpl +141 -141
- crawlo/tools/__init__.py +182 -182
- crawlo/tools/anti_crawler.py +268 -268
- crawlo/tools/authenticated_proxy.py +240 -240
- crawlo/tools/data_validator.py +180 -180
- crawlo/tools/date_tools.py +35 -35
- crawlo/tools/distributed_coordinator.py +386 -386
- crawlo/tools/retry_mechanism.py +220 -220
- crawlo/tools/scenario_adapter.py +262 -262
- crawlo/utils/__init__.py +35 -35
- crawlo/utils/batch_processor.py +260 -260
- crawlo/utils/controlled_spider_mixin.py +439 -439
- crawlo/utils/date_tools.py +290 -290
- crawlo/utils/db_helper.py +343 -343
- crawlo/utils/enhanced_error_handler.py +359 -359
- crawlo/utils/env_config.py +105 -105
- crawlo/utils/error_handler.py +125 -125
- crawlo/utils/func_tools.py +82 -82
- crawlo/utils/large_scale_config.py +286 -286
- crawlo/utils/large_scale_helper.py +343 -343
- crawlo/utils/log.py +128 -128
- crawlo/utils/performance_monitor.py +284 -284
- crawlo/utils/queue_helper.py +175 -175
- crawlo/utils/redis_connection_pool.py +334 -334
- crawlo/utils/redis_key_validator.py +199 -199
- crawlo/utils/request.py +267 -267
- crawlo/utils/request_serializer.py +219 -219
- crawlo/utils/spider_loader.py +62 -62
- crawlo/utils/system.py +11 -11
- crawlo/utils/tools.py +4 -4
- crawlo/utils/url.py +39 -39
- {crawlo-1.1.6.dist-info → crawlo-1.1.9.dist-info}/METADATA +626 -401
- crawlo-1.1.9.dist-info/RECORD +190 -0
- examples/__init__.py +7 -7
- tests/DOUBLE_CRAWLO_PREFIX_FIX_REPORT.md +81 -81
- tests/__init__.py +7 -7
- tests/advanced_tools_example.py +275 -275
- tests/authenticated_proxy_example.py +236 -236
- tests/cleaners_example.py +160 -160
- tests/config_validation_demo.py +102 -102
- tests/controlled_spider_example.py +205 -205
- tests/date_tools_example.py +180 -180
- tests/dynamic_loading_example.py +523 -523
- tests/dynamic_loading_test.py +104 -104
- tests/env_config_example.py +133 -133
- tests/error_handling_example.py +171 -171
- tests/redis_key_validation_demo.py +130 -130
- tests/response_improvements_example.py +144 -144
- tests/test_advanced_tools.py +148 -148
- tests/test_all_redis_key_configs.py +145 -145
- tests/test_authenticated_proxy.py +141 -141
- tests/test_cleaners.py +54 -54
- tests/test_comprehensive.py +146 -146
- tests/test_config_validator.py +193 -193
- tests/test_date_tools.py +123 -123
- tests/test_double_crawlo_fix.py +207 -207
- tests/test_double_crawlo_fix_simple.py +124 -124
- tests/test_dynamic_downloaders_proxy.py +124 -124
- tests/test_dynamic_proxy.py +92 -92
- tests/test_dynamic_proxy_config.py +146 -146
- tests/test_dynamic_proxy_real.py +109 -109
- tests/test_edge_cases.py +303 -303
- tests/test_enhanced_error_handler.py +270 -270
- tests/test_env_config.py +121 -121
- tests/test_error_handler_compatibility.py +112 -112
- tests/test_final_validation.py +153 -153
- tests/test_framework_env_usage.py +103 -103
- tests/test_integration.py +356 -356
- tests/test_item_dedup_redis_key.py +122 -122
- tests/test_parsel.py +29 -29
- tests/test_performance.py +327 -327
- tests/test_proxy_health_check.py +32 -32
- tests/test_proxy_middleware_integration.py +136 -136
- tests/test_proxy_providers.py +56 -56
- tests/test_proxy_stats.py +19 -19
- tests/test_proxy_strategies.py +59 -59
- tests/test_queue_manager_double_crawlo.py +230 -230
- tests/test_queue_manager_redis_key.py +176 -176
- tests/test_redis_config.py +28 -28
- tests/test_redis_connection_pool.py +294 -294
- tests/test_redis_key_naming.py +181 -181
- tests/test_redis_key_validator.py +123 -123
- tests/test_redis_queue.py +224 -224
- tests/test_request_serialization.py +70 -70
- tests/test_response_improvements.py +152 -152
- tests/test_scheduler.py +241 -241
- tests/test_simple_response.py +61 -61
- tests/test_telecom_spider_redis_key.py +205 -205
- tests/test_template_content.py +87 -87
- tests/test_template_redis_key.py +134 -134
- tests/test_tools.py +153 -153
- tests/tools_example.py +257 -257
- crawlo-1.1.6.dist-info/RECORD +0 -189
- {crawlo-1.1.6.dist-info → crawlo-1.1.9.dist-info}/WHEEL +0 -0
- {crawlo-1.1.6.dist-info → crawlo-1.1.9.dist-info}/entry_points.txt +0 -0
- {crawlo-1.1.6.dist-info → crawlo-1.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,135 +1,135 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
3
|
-
from pprint import pformat
|
|
4
|
-
from types import MethodType
|
|
5
|
-
from asyncio import create_task
|
|
6
|
-
from collections import defaultdict
|
|
7
|
-
from typing import List, Dict, Callable, Optional
|
|
8
|
-
|
|
9
|
-
from crawlo import Request, Response
|
|
10
|
-
from crawlo.utils.log import get_logger
|
|
11
|
-
from crawlo.project import load_class
|
|
12
|
-
from crawlo.middleware import BaseMiddleware
|
|
13
|
-
from crawlo.project import common_call
|
|
14
|
-
from crawlo.event import ignore_request, response_received
|
|
15
|
-
from crawlo.exceptions import MiddlewareInitError, InvalidOutputError, RequestMethodError, IgnoreRequestError, \
|
|
16
|
-
NotConfiguredError
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class MiddlewareManager:
|
|
20
|
-
|
|
21
|
-
def __init__(self, crawler):
|
|
22
|
-
self.crawler = crawler
|
|
23
|
-
self.logger = get_logger(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
|
|
24
|
-
self.middlewares: List = []
|
|
25
|
-
self.methods: Dict[str, List[MethodType]] = defaultdict(list)
|
|
26
|
-
middlewares = self.crawler.settings.get_list('MIDDLEWARES')
|
|
27
|
-
self._add_middleware(middlewares)
|
|
28
|
-
self._add_method()
|
|
29
|
-
|
|
30
|
-
self.download_method: Callable = crawler.engine.downloader.download
|
|
31
|
-
self._stats = crawler.stats
|
|
32
|
-
|
|
33
|
-
async def _process_request(self, request: Request):
|
|
34
|
-
for method in self.methods['process_request']:
|
|
35
|
-
result = await common_call(method, request, self.crawler.spider)
|
|
36
|
-
if result is None:
|
|
37
|
-
continue
|
|
38
|
-
if isinstance(result, (Request, Response)):
|
|
39
|
-
return result
|
|
40
|
-
raise InvalidOutputError(
|
|
41
|
-
f"{method.__self__.__class__.__name__}. must return None or Request or Response, got {type(result).__name__}"
|
|
42
|
-
)
|
|
43
|
-
return await self.download_method(request)
|
|
44
|
-
|
|
45
|
-
async def _process_response(self, request: Request, response: Response):
|
|
46
|
-
for method in reversed(self.methods['process_response']):
|
|
47
|
-
try:
|
|
48
|
-
response = await common_call(method, request, response, self.crawler.spider)
|
|
49
|
-
except IgnoreRequestError as exp:
|
|
50
|
-
create_task(self.crawler.subscriber.notify(ignore_request, exp, request, self.crawler.spider))
|
|
51
|
-
if isinstance(response, Request):
|
|
52
|
-
return response
|
|
53
|
-
if isinstance(response, Response):
|
|
54
|
-
continue
|
|
55
|
-
raise InvalidOutputError(
|
|
56
|
-
f"{method.__self__.__class__.__name__}. must return Request or Response, got {type(response).__name__}"
|
|
57
|
-
)
|
|
58
|
-
return response
|
|
59
|
-
|
|
60
|
-
async def _process_exception(self, request: Request, exp: Exception):
|
|
61
|
-
for method in self.methods['process_exception']:
|
|
62
|
-
response = await common_call(method, request, exp, self.crawler.spider)
|
|
63
|
-
if response is None:
|
|
64
|
-
continue
|
|
65
|
-
if isinstance(response, (Request, Response)):
|
|
66
|
-
return response
|
|
67
|
-
if response:
|
|
68
|
-
break
|
|
69
|
-
raise InvalidOutputError(
|
|
70
|
-
f"{method.__self__.__class__.__name__}. must return None or Request or Response, got {type(response).__name__}"
|
|
71
|
-
)
|
|
72
|
-
else:
|
|
73
|
-
raise exp
|
|
74
|
-
|
|
75
|
-
async def download(self, request) -> Optional[Response]:
|
|
76
|
-
""" called in the download method. """
|
|
77
|
-
try:
|
|
78
|
-
response = await self._process_request(request)
|
|
79
|
-
except KeyError:
|
|
80
|
-
raise RequestMethodError(f"{request.method.lower()} is not supported")
|
|
81
|
-
except IgnoreRequestError as exp:
|
|
82
|
-
create_task(self.crawler.subscriber.notify(ignore_request, exp, request, self.crawler.spider))
|
|
83
|
-
response = await self._process_exception(request, exp)
|
|
84
|
-
except Exception as exp:
|
|
85
|
-
self._stats.inc_value(f'download_error/{exp.__class__.__name__}')
|
|
86
|
-
response = await self._process_exception(request, exp)
|
|
87
|
-
else:
|
|
88
|
-
create_task(self.crawler.subscriber.notify(response_received, response, self.crawler.spider))
|
|
89
|
-
# self.crawler.stats.inc_value('response_received_count')
|
|
90
|
-
if isinstance(response, Response):
|
|
91
|
-
response = await self._process_response(request, response)
|
|
92
|
-
if isinstance(response, Request):
|
|
93
|
-
await self.crawler.engine.enqueue_request(request)
|
|
94
|
-
return None
|
|
95
|
-
return response
|
|
96
|
-
|
|
97
|
-
@classmethod
|
|
98
|
-
def create_instance(cls, *args, **kwargs):
|
|
99
|
-
return cls(*args, **kwargs)
|
|
100
|
-
|
|
101
|
-
def _add_middleware(self, middlewares):
|
|
102
|
-
enabled_middlewares = [m for m in middlewares if self._validate_middleware(m)]
|
|
103
|
-
if enabled_middlewares:
|
|
104
|
-
self.logger.info(f'enabled middleware:\n {pformat(enabled_middlewares)}')
|
|
105
|
-
|
|
106
|
-
def _validate_middleware(self, middleware):
|
|
107
|
-
middleware_cls = load_class(middleware)
|
|
108
|
-
if not hasattr(middleware_cls, 'create_instance'):
|
|
109
|
-
raise MiddlewareInitError(
|
|
110
|
-
f"Middleware init failed, must inherit from `BaseMiddleware` or have a `create_instance` method"
|
|
111
|
-
)
|
|
112
|
-
try:
|
|
113
|
-
instance = middleware_cls.create_instance(self.crawler)
|
|
114
|
-
self.middlewares.append(instance)
|
|
115
|
-
return True
|
|
116
|
-
except NotConfiguredError:
|
|
117
|
-
return False
|
|
118
|
-
|
|
119
|
-
def _add_method(self):
|
|
120
|
-
for middleware in self.middlewares:
|
|
121
|
-
if hasattr(middleware, 'process_request'):
|
|
122
|
-
if self._validate_middleware_method(method_name='process_request', middleware=middleware):
|
|
123
|
-
self.methods['process_request'].append(middleware.process_request)
|
|
124
|
-
if hasattr(middleware, 'process_response'):
|
|
125
|
-
if self._validate_middleware_method(method_name='process_response', middleware=middleware):
|
|
126
|
-
self.methods['process_response'].append(middleware.process_response)
|
|
127
|
-
if hasattr(middleware, 'process_exception'):
|
|
128
|
-
if self._validate_middleware_method(method_name='process_exception', middleware=middleware):
|
|
129
|
-
self.methods['process_exception'].append(middleware.process_exception)
|
|
130
|
-
|
|
131
|
-
@staticmethod
|
|
132
|
-
def _validate_middleware_method(method_name, middleware) -> bool:
|
|
133
|
-
method = getattr(type(middleware), method_name)
|
|
134
|
-
base_method = getattr(BaseMiddleware, method_name)
|
|
135
|
-
return False if method == base_method else True
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
# -*- coding:UTF-8 -*-
|
|
3
|
+
from pprint import pformat
|
|
4
|
+
from types import MethodType
|
|
5
|
+
from asyncio import create_task
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from typing import List, Dict, Callable, Optional
|
|
8
|
+
|
|
9
|
+
from crawlo import Request, Response
|
|
10
|
+
from crawlo.utils.log import get_logger
|
|
11
|
+
from crawlo.project import load_class
|
|
12
|
+
from crawlo.middleware import BaseMiddleware
|
|
13
|
+
from crawlo.project import common_call
|
|
14
|
+
from crawlo.event import ignore_request, response_received
|
|
15
|
+
from crawlo.exceptions import MiddlewareInitError, InvalidOutputError, RequestMethodError, IgnoreRequestError, \
|
|
16
|
+
NotConfiguredError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MiddlewareManager:
|
|
20
|
+
|
|
21
|
+
def __init__(self, crawler):
|
|
22
|
+
self.crawler = crawler
|
|
23
|
+
self.logger = get_logger(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
|
|
24
|
+
self.middlewares: List = []
|
|
25
|
+
self.methods: Dict[str, List[MethodType]] = defaultdict(list)
|
|
26
|
+
middlewares = self.crawler.settings.get_list('MIDDLEWARES')
|
|
27
|
+
self._add_middleware(middlewares)
|
|
28
|
+
self._add_method()
|
|
29
|
+
|
|
30
|
+
self.download_method: Callable = crawler.engine.downloader.download
|
|
31
|
+
self._stats = crawler.stats
|
|
32
|
+
|
|
33
|
+
async def _process_request(self, request: Request):
|
|
34
|
+
for method in self.methods['process_request']:
|
|
35
|
+
result = await common_call(method, request, self.crawler.spider)
|
|
36
|
+
if result is None:
|
|
37
|
+
continue
|
|
38
|
+
if isinstance(result, (Request, Response)):
|
|
39
|
+
return result
|
|
40
|
+
raise InvalidOutputError(
|
|
41
|
+
f"{method.__self__.__class__.__name__}. must return None or Request or Response, got {type(result).__name__}"
|
|
42
|
+
)
|
|
43
|
+
return await self.download_method(request)
|
|
44
|
+
|
|
45
|
+
async def _process_response(self, request: Request, response: Response):
|
|
46
|
+
for method in reversed(self.methods['process_response']):
|
|
47
|
+
try:
|
|
48
|
+
response = await common_call(method, request, response, self.crawler.spider)
|
|
49
|
+
except IgnoreRequestError as exp:
|
|
50
|
+
create_task(self.crawler.subscriber.notify(ignore_request, exp, request, self.crawler.spider))
|
|
51
|
+
if isinstance(response, Request):
|
|
52
|
+
return response
|
|
53
|
+
if isinstance(response, Response):
|
|
54
|
+
continue
|
|
55
|
+
raise InvalidOutputError(
|
|
56
|
+
f"{method.__self__.__class__.__name__}. must return Request or Response, got {type(response).__name__}"
|
|
57
|
+
)
|
|
58
|
+
return response
|
|
59
|
+
|
|
60
|
+
async def _process_exception(self, request: Request, exp: Exception):
|
|
61
|
+
for method in self.methods['process_exception']:
|
|
62
|
+
response = await common_call(method, request, exp, self.crawler.spider)
|
|
63
|
+
if response is None:
|
|
64
|
+
continue
|
|
65
|
+
if isinstance(response, (Request, Response)):
|
|
66
|
+
return response
|
|
67
|
+
if response:
|
|
68
|
+
break
|
|
69
|
+
raise InvalidOutputError(
|
|
70
|
+
f"{method.__self__.__class__.__name__}. must return None or Request or Response, got {type(response).__name__}"
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
raise exp
|
|
74
|
+
|
|
75
|
+
async def download(self, request) -> Optional[Response]:
|
|
76
|
+
""" called in the download method. """
|
|
77
|
+
try:
|
|
78
|
+
response = await self._process_request(request)
|
|
79
|
+
except KeyError:
|
|
80
|
+
raise RequestMethodError(f"{request.method.lower()} is not supported")
|
|
81
|
+
except IgnoreRequestError as exp:
|
|
82
|
+
create_task(self.crawler.subscriber.notify(ignore_request, exp, request, self.crawler.spider))
|
|
83
|
+
response = await self._process_exception(request, exp)
|
|
84
|
+
except Exception as exp:
|
|
85
|
+
self._stats.inc_value(f'download_error/{exp.__class__.__name__}')
|
|
86
|
+
response = await self._process_exception(request, exp)
|
|
87
|
+
else:
|
|
88
|
+
create_task(self.crawler.subscriber.notify(response_received, response, self.crawler.spider))
|
|
89
|
+
# self.crawler.stats.inc_value('response_received_count')
|
|
90
|
+
if isinstance(response, Response):
|
|
91
|
+
response = await self._process_response(request, response)
|
|
92
|
+
if isinstance(response, Request):
|
|
93
|
+
await self.crawler.engine.enqueue_request(request)
|
|
94
|
+
return None
|
|
95
|
+
return response
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def create_instance(cls, *args, **kwargs):
|
|
99
|
+
return cls(*args, **kwargs)
|
|
100
|
+
|
|
101
|
+
def _add_middleware(self, middlewares):
|
|
102
|
+
enabled_middlewares = [m for m in middlewares if self._validate_middleware(m)]
|
|
103
|
+
if enabled_middlewares:
|
|
104
|
+
self.logger.info(f'enabled middleware:\n {pformat(enabled_middlewares)}')
|
|
105
|
+
|
|
106
|
+
def _validate_middleware(self, middleware):
|
|
107
|
+
middleware_cls = load_class(middleware)
|
|
108
|
+
if not hasattr(middleware_cls, 'create_instance'):
|
|
109
|
+
raise MiddlewareInitError(
|
|
110
|
+
f"Middleware init failed, must inherit from `BaseMiddleware` or have a `create_instance` method"
|
|
111
|
+
)
|
|
112
|
+
try:
|
|
113
|
+
instance = middleware_cls.create_instance(self.crawler)
|
|
114
|
+
self.middlewares.append(instance)
|
|
115
|
+
return True
|
|
116
|
+
except NotConfiguredError:
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
def _add_method(self):
|
|
120
|
+
for middleware in self.middlewares:
|
|
121
|
+
if hasattr(middleware, 'process_request'):
|
|
122
|
+
if self._validate_middleware_method(method_name='process_request', middleware=middleware):
|
|
123
|
+
self.methods['process_request'].append(middleware.process_request)
|
|
124
|
+
if hasattr(middleware, 'process_response'):
|
|
125
|
+
if self._validate_middleware_method(method_name='process_response', middleware=middleware):
|
|
126
|
+
self.methods['process_response'].append(middleware.process_response)
|
|
127
|
+
if hasattr(middleware, 'process_exception'):
|
|
128
|
+
if self._validate_middleware_method(method_name='process_exception', middleware=middleware):
|
|
129
|
+
self.methods['process_exception'].append(middleware.process_exception)
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def _validate_middleware_method(method_name, middleware) -> bool:
|
|
133
|
+
method = getattr(type(middleware), method_name)
|
|
134
|
+
base_method = getattr(BaseMiddleware, method_name)
|
|
135
|
+
return False if method == base_method else True
|