crawlo 1.4.6__py3-none-any.whl → 1.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (162) hide show
  1. crawlo/__init__.py +2 -1
  2. crawlo/__version__.py +1 -1
  3. crawlo/cli.py +2 -2
  4. crawlo/commands/check.py +1 -1
  5. crawlo/commands/help.py +5 -3
  6. crawlo/commands/list.py +1 -1
  7. crawlo/commands/run.py +49 -11
  8. crawlo/commands/stats.py +1 -1
  9. crawlo/config.py +12 -4
  10. crawlo/config_validator.py +1 -1
  11. crawlo/core/engine.py +20 -7
  12. crawlo/core/processor.py +1 -1
  13. crawlo/core/scheduler.py +4 -5
  14. crawlo/crawler.py +51 -10
  15. crawlo/downloader/__init__.py +7 -3
  16. crawlo/downloader/aiohttp_downloader.py +18 -18
  17. crawlo/downloader/cffi_downloader.py +5 -2
  18. crawlo/downloader/httpx_downloader.py +9 -3
  19. crawlo/downloader/hybrid_downloader.py +2 -2
  20. crawlo/downloader/playwright_downloader.py +38 -15
  21. crawlo/downloader/selenium_downloader.py +16 -2
  22. crawlo/event.py +42 -8
  23. crawlo/exceptions.py +157 -24
  24. crawlo/extension/__init__.py +10 -9
  25. crawlo/extension/health_check.py +7 -7
  26. crawlo/extension/log_interval.py +6 -6
  27. crawlo/extension/log_stats.py +2 -2
  28. crawlo/extension/logging_extension.py +4 -12
  29. crawlo/extension/memory_monitor.py +5 -5
  30. crawlo/extension/performance_profiler.py +5 -5
  31. crawlo/extension/request_recorder.py +6 -6
  32. crawlo/factories/base.py +1 -1
  33. crawlo/factories/crawler.py +61 -60
  34. crawlo/factories/utils.py +135 -0
  35. crawlo/filters/__init__.py +19 -2
  36. crawlo/filters/aioredis_filter.py +133 -49
  37. crawlo/filters/memory_filter.py +6 -21
  38. crawlo/framework.py +22 -8
  39. crawlo/initialization/built_in.py +24 -67
  40. crawlo/initialization/core.py +65 -19
  41. crawlo/initialization/phases.py +83 -2
  42. crawlo/initialization/registry.py +5 -7
  43. crawlo/initialization/utils.py +49 -0
  44. crawlo/logging/__init__.py +6 -10
  45. crawlo/logging/config.py +106 -22
  46. crawlo/logging/factory.py +12 -8
  47. crawlo/logging/manager.py +19 -27
  48. crawlo/middleware/__init__.py +72 -9
  49. crawlo/middleware/default_header.py +2 -2
  50. crawlo/middleware/download_delay.py +2 -2
  51. crawlo/middleware/middleware_manager.py +6 -6
  52. crawlo/middleware/offsite.py +2 -2
  53. crawlo/middleware/proxy.py +2 -2
  54. crawlo/middleware/request_ignore.py +4 -4
  55. crawlo/middleware/response_code.py +2 -2
  56. crawlo/middleware/response_filter.py +2 -2
  57. crawlo/middleware/retry.py +1 -1
  58. crawlo/mode_manager.py +38 -4
  59. crawlo/network/request.py +54 -26
  60. crawlo/network/response.py +69 -135
  61. crawlo/pipelines/__init__.py +40 -9
  62. crawlo/pipelines/base_pipeline.py +452 -0
  63. crawlo/pipelines/bloom_dedup_pipeline.py +4 -5
  64. crawlo/pipelines/console_pipeline.py +2 -2
  65. crawlo/pipelines/csv_pipeline.py +4 -4
  66. crawlo/pipelines/database_dedup_pipeline.py +4 -5
  67. crawlo/pipelines/json_pipeline.py +4 -4
  68. crawlo/pipelines/memory_dedup_pipeline.py +4 -5
  69. crawlo/pipelines/mongo_pipeline.py +23 -14
  70. crawlo/pipelines/mysql_pipeline.py +31 -39
  71. crawlo/pipelines/pipeline_manager.py +8 -8
  72. crawlo/pipelines/redis_dedup_pipeline.py +13 -14
  73. crawlo/project.py +1 -1
  74. crawlo/queue/__init__.py +10 -0
  75. crawlo/queue/queue_manager.py +79 -13
  76. crawlo/queue/redis_priority_queue.py +196 -47
  77. crawlo/settings/default_settings.py +16 -6
  78. crawlo/spider/__init__.py +6 -5
  79. crawlo/stats_collector.py +2 -2
  80. crawlo/task_manager.py +1 -1
  81. crawlo/templates/crawlo.cfg.tmpl +3 -3
  82. crawlo/templates/project/__init__.py.tmpl +1 -3
  83. crawlo/templates/project/items.py.tmpl +2 -6
  84. crawlo/templates/project/middlewares.py.tmpl +1 -1
  85. crawlo/templates/project/pipelines.py.tmpl +1 -2
  86. crawlo/templates/project/settings.py.tmpl +12 -10
  87. crawlo/templates/project/settings_distributed.py.tmpl +14 -13
  88. crawlo/templates/project/settings_gentle.py.tmpl +21 -23
  89. crawlo/templates/project/settings_high_performance.py.tmpl +21 -23
  90. crawlo/templates/project/settings_minimal.py.tmpl +10 -8
  91. crawlo/templates/project/settings_simple.py.tmpl +21 -23
  92. crawlo/templates/run.py.tmpl +1 -1
  93. crawlo/templates/spider/spider.py.tmpl +4 -12
  94. crawlo/templates/spiders_init.py.tmpl +3 -8
  95. crawlo/tools/__init__.py +0 -103
  96. crawlo/tools/scenario_adapter.py +1 -1
  97. crawlo/utils/__init__.py +25 -1
  98. crawlo/utils/batch_processor.py +23 -6
  99. crawlo/utils/config_manager.py +442 -0
  100. crawlo/utils/controlled_spider_mixin.py +1 -1
  101. crawlo/utils/db_helper.py +1 -1
  102. crawlo/utils/encoding_helper.py +190 -0
  103. crawlo/utils/error_handler.py +2 -2
  104. crawlo/utils/large_scale_helper.py +1 -1
  105. crawlo/utils/leak_detector.py +335 -0
  106. crawlo/utils/mongo_connection_pool.py +157 -0
  107. crawlo/utils/mysql_connection_pool.py +197 -0
  108. crawlo/utils/performance_monitor.py +1 -1
  109. crawlo/utils/redis_checker.py +91 -0
  110. crawlo/utils/redis_connection_pool.py +260 -70
  111. crawlo/utils/redis_key_validator.py +1 -1
  112. crawlo/utils/request.py +24 -2
  113. crawlo/utils/request_serializer.py +1 -1
  114. crawlo/utils/resource_manager.py +337 -0
  115. crawlo/utils/response_helper.py +113 -0
  116. crawlo/utils/selector_helper.py +3 -2
  117. crawlo/utils/singleton.py +70 -0
  118. crawlo/utils/spider_loader.py +1 -1
  119. crawlo/utils/text_helper.py +1 -1
  120. crawlo-1.4.8.dist-info/METADATA +831 -0
  121. {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/RECORD +131 -145
  122. tests/advanced_tools_example.py +10 -68
  123. tests/distributed_dedup_test.py +467 -0
  124. tests/monitor_redis_dedup.sh +72 -0
  125. tests/ofweek_scrapy/ofweek_scrapy/spiders/__init__.py +4 -4
  126. tests/simple_cli_test.py +55 -0
  127. tests/test_cli_arguments.py +119 -0
  128. tests/test_dedup_fix.py +10 -10
  129. crawlo/logging/async_handler.py +0 -181
  130. crawlo/logging/monitor.py +0 -153
  131. crawlo/logging/sampler.py +0 -167
  132. crawlo/tools/authenticated_proxy.py +0 -241
  133. crawlo/tools/data_formatter.py +0 -226
  134. crawlo/tools/data_validator.py +0 -181
  135. crawlo/tools/encoding_converter.py +0 -127
  136. crawlo/tools/network_diagnostic.py +0 -365
  137. crawlo/tools/request_tools.py +0 -83
  138. crawlo/tools/retry_mechanism.py +0 -224
  139. crawlo/utils/env_config.py +0 -143
  140. crawlo/utils/large_scale_config.py +0 -287
  141. crawlo/utils/log.py +0 -80
  142. crawlo/utils/system.py +0 -11
  143. crawlo/utils/tools.py +0 -5
  144. crawlo/utils/url.py +0 -40
  145. crawlo-1.4.6.dist-info/METADATA +0 -329
  146. tests/env_config_example.py +0 -134
  147. tests/ofweek_scrapy/ofweek_scrapy/spiders/ofweek_spider.py +0 -162
  148. tests/test_authenticated_proxy.py +0 -142
  149. tests/test_comprehensive.py +0 -147
  150. tests/test_dynamic_downloaders_proxy.py +0 -125
  151. tests/test_dynamic_proxy.py +0 -93
  152. tests/test_dynamic_proxy_config.py +0 -147
  153. tests/test_dynamic_proxy_real.py +0 -110
  154. tests/test_env_config.py +0 -122
  155. tests/test_framework_env_usage.py +0 -104
  156. tests/test_large_scale_config.py +0 -113
  157. tests/test_proxy_api.py +0 -265
  158. tests/test_real_scenario_proxy.py +0 -196
  159. tests/tools_example.py +0 -261
  160. {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/WHEEL +0 -0
  161. {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/entry_points.txt +0 -0
  162. {crawlo-1.4.6.dist-info → crawlo-1.4.8.dist-info}/top_level.txt +0 -0
@@ -4,9 +4,9 @@ import asyncio
4
4
  import psutil
5
5
  from typing import Any, Optional
6
6
 
7
- from crawlo.utils.log import get_logger
7
+ from crawlo.logging import get_logger
8
8
  from crawlo.utils.error_handler import ErrorHandler
9
- from crawlo.event import spider_opened, spider_closed
9
+ from crawlo.event import CrawlerEvent
10
10
 
11
11
 
12
12
  class MemoryMonitorExtension:
@@ -19,7 +19,7 @@ class MemoryMonitorExtension:
19
19
  self.task: Optional[asyncio.Task] = None
20
20
  self.process = psutil.Process()
21
21
  self.settings = crawler.settings
22
- self.logger = get_logger(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
22
+ self.logger = get_logger(self.__class__.__name__)
23
23
  self.error_handler = ErrorHandler(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
24
24
 
25
25
  # 获取配置参数
@@ -35,8 +35,8 @@ class MemoryMonitorExtension:
35
35
  raise NotConfigured("MemoryMonitorExtension: MEMORY_MONITOR_ENABLED is False")
36
36
 
37
37
  o = cls(crawler)
38
- crawler.subscriber.subscribe(o.spider_opened, event=spider_opened)
39
- crawler.subscriber.subscribe(o.spider_closed, event=spider_closed)
38
+ crawler.subscriber.subscribe(o.spider_opened, event=CrawlerEvent.SPIDER_OPENED)
39
+ crawler.subscriber.subscribe(o.spider_closed, event=CrawlerEvent.SPIDER_CLOSED)
40
40
  return o
41
41
 
42
42
  async def spider_opened(self) -> None:
@@ -7,9 +7,9 @@ import asyncio
7
7
  import cProfile
8
8
  from typing import Any, Optional
9
9
 
10
- from crawlo.utils.log import get_logger
10
+ from crawlo.logging import get_logger
11
11
  from crawlo.utils.error_handler import ErrorHandler
12
- from crawlo.event import spider_opened, spider_closed
12
+ from crawlo.event import CrawlerEvent
13
13
 
14
14
 
15
15
  class PerformanceProfilerExtension:
@@ -20,7 +20,7 @@ class PerformanceProfilerExtension:
20
20
 
21
21
  def __init__(self, crawler: Any):
22
22
  self.settings = crawler.settings
23
- self.logger = get_logger(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
23
+ self.logger = get_logger(self.__class__.__name__)
24
24
  self.error_handler = ErrorHandler(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
25
25
 
26
26
  # 获取配置参数
@@ -44,8 +44,8 @@ class PerformanceProfilerExtension:
44
44
 
45
45
  o = cls(crawler)
46
46
  if o.enabled:
47
- crawler.subscriber.subscribe(o.spider_opened, event=spider_opened)
48
- crawler.subscriber.subscribe(o.spider_closed, event=spider_closed)
47
+ crawler.subscriber.subscribe(o.spider_opened, event=CrawlerEvent.SPIDER_OPENED)
48
+ crawler.subscriber.subscribe(o.spider_closed, event=CrawlerEvent.SPIDER_CLOSED)
49
49
  return o
50
50
 
51
51
  async def spider_opened(self) -> None:
@@ -5,8 +5,8 @@ import json
5
5
  from typing import Any
6
6
  from datetime import datetime
7
7
 
8
- from crawlo import event
9
- from crawlo.utils.log import get_logger
8
+ from crawlo.event import CrawlerEvent
9
+ from crawlo.logging import get_logger
10
10
 
11
11
 
12
12
  class RequestRecorderExtension:
@@ -17,7 +17,7 @@ class RequestRecorderExtension:
17
17
 
18
18
  def __init__(self, crawler: Any):
19
19
  self.settings = crawler.settings
20
- self.logger = get_logger(self.__class__.__name__, crawler.settings.get('LOG_LEVEL'))
20
+ self.logger = get_logger(self.__class__.__name__)
21
21
 
22
22
  # 获取配置参数
23
23
  self.enabled = self.settings.get_bool('REQUEST_RECORDER_ENABLED', False)
@@ -40,9 +40,9 @@ class RequestRecorderExtension:
40
40
 
41
41
  o = cls(crawler)
42
42
  if o.enabled:
43
- crawler.subscriber.subscribe(o.request_scheduled, event=event.request_scheduled)
44
- crawler.subscriber.subscribe(o.response_received, event=event.response_received)
45
- crawler.subscriber.subscribe(o.spider_closed, event=event.spider_closed)
43
+ crawler.subscriber.subscribe(o.request_scheduled, event=CrawlerEvent.REQUEST_SCHEDULED)
44
+ crawler.subscriber.subscribe(o.response_received, event=CrawlerEvent.RESPONSE_RECEIVED)
45
+ crawler.subscriber.subscribe(o.spider_closed, event=CrawlerEvent.SPIDER_CLOSED)
46
46
  return o
47
47
 
48
48
  async def request_scheduled(self, request: Any, spider: Any) -> None:
crawlo/factories/base.py CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  from abc import ABC, abstractmethod
8
8
  from dataclasses import dataclass
9
- from typing import Type, Any, Dict, Optional, Callable
9
+ from typing import Type, Any, Dict, Callable
10
10
 
11
11
 
12
12
  @dataclass
@@ -31,73 +31,74 @@ class CrawlerComponentFactory(ComponentFactory):
31
31
  return component_type.__name__ in supported_types
32
32
 
33
33
 
34
+ # Engine组件
35
+ def create_engine(crawler, **kwargs):
36
+ from crawlo.core.engine import Engine
37
+ return Engine(crawler)
38
+
39
+ # Scheduler组件
40
+ def create_scheduler(crawler, **kwargs):
41
+ from crawlo.core.scheduler import Scheduler
42
+ return Scheduler.create_instance(crawler)
43
+
44
+ # StatsCollector组件
45
+ def create_stats(crawler, **kwargs):
46
+ from crawlo.stats_collector import StatsCollector
47
+ return StatsCollector(crawler)
48
+
49
+ # Subscriber组件
50
+ def create_subscriber(**kwargs):
51
+ from crawlo.subscriber import Subscriber
52
+ return Subscriber()
53
+
54
+ # ExtensionManager组件
55
+ def create_extension_manager(crawler, **kwargs):
56
+ from crawlo.extension import ExtensionManager
57
+ return ExtensionManager.create_instance(crawler)
58
+
34
59
  def register_crawler_components():
35
60
  """注册Crawler相关组件"""
36
- registry = get_component_registry()
61
+ from .utils import register_components
37
62
 
38
63
  # 注册工厂
64
+ registry = get_component_registry()
39
65
  registry.register_factory(CrawlerComponentFactory())
40
66
 
41
- # 注册组件规范
42
-
43
- # Engine组件
44
- def create_engine(crawler, **kwargs):
45
- from crawlo.core.engine import Engine
46
- return Engine(crawler)
47
-
48
- registry.register(ComponentSpec(
49
- name='engine',
50
- component_type=type('Engine', (), {}),
51
- factory_func=create_engine,
52
- dependencies=['crawler']
53
- ))
54
-
55
- # Scheduler组件
56
- def create_scheduler(crawler, **kwargs):
57
- from crawlo.core.scheduler import Scheduler
58
- return Scheduler.create_instance(crawler)
59
-
60
- registry.register(ComponentSpec(
61
- name='scheduler',
62
- component_type=type('Scheduler', (), {}),
63
- factory_func=create_scheduler,
64
- dependencies=['crawler']
65
- ))
66
-
67
- # StatsCollector组件
68
- def create_stats(crawler, **kwargs):
69
- from crawlo.stats_collector import StatsCollector
70
- return StatsCollector(crawler)
71
-
72
- registry.register(ComponentSpec(
73
- name='stats',
74
- component_type=type('StatsCollector', (), {}),
75
- factory_func=create_stats,
76
- dependencies=['crawler']
77
- ))
78
-
79
- # Subscriber组件
80
- def create_subscriber(**kwargs):
81
- from crawlo.subscriber import Subscriber
82
- return Subscriber()
83
-
84
- registry.register(ComponentSpec(
85
- name='subscriber',
86
- component_type=type('Subscriber', (), {}),
87
- factory_func=create_subscriber
88
- ))
89
-
90
- # ExtensionManager组件
91
- def create_extension_manager(crawler, **kwargs):
92
- from crawlo.extension import ExtensionManager
93
- return ExtensionManager.create_instance(crawler)
67
+ # 批量注册组件
68
+ component_list = [
69
+ {
70
+ 'name': 'engine',
71
+ 'component_type': 'Engine',
72
+ 'factory_func': create_engine,
73
+ 'dependencies': ['crawler']
74
+ },
75
+ {
76
+ 'name': 'scheduler',
77
+ 'component_type': 'Scheduler',
78
+ 'factory_func': create_scheduler,
79
+ 'dependencies': ['crawler']
80
+ },
81
+ {
82
+ 'name': 'stats',
83
+ 'component_type': 'StatsCollector',
84
+ 'factory_func': create_stats,
85
+ 'dependencies': ['crawler']
86
+ },
87
+ {
88
+ 'name': 'subscriber',
89
+ 'component_type': 'Subscriber',
90
+ 'factory_func': create_subscriber,
91
+ 'dependencies': []
92
+ },
93
+ {
94
+ 'name': 'extension_manager',
95
+ 'component_type': 'ExtensionManager',
96
+ 'factory_func': create_extension_manager,
97
+ 'dependencies': ['crawler']
98
+ }
99
+ ]
94
100
 
95
- registry.register(ComponentSpec(
96
- name='extension_manager',
97
- component_type=type('ExtensionManager', (), {}),
98
- factory_func=create_extension_manager,
99
- dependencies=['crawler']
100
- ))
101
+ register_components(component_list)
101
102
 
102
103
 
103
104
  # 自动注册
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/python
2
+ # -*- coding: UTF-8 -*-
3
+ """
4
+ 工厂工具模块 - 提供通用的组件注册和创建工具
5
+ """
6
+
7
+ from typing import Any, Callable, List, Optional, Type, Union
8
+ from .base import ComponentSpec
9
+ from .registry import get_component_registry
10
+
11
+
12
+ def register_component(
13
+ name: str,
14
+ component_type: Union[Type, str],
15
+ factory_func: Callable[..., Any],
16
+ dependencies: Optional[List[str]] = None,
17
+ singleton: bool = False,
18
+ config_key: Optional[str] = None
19
+ ) -> None:
20
+ """
21
+ 注册组件的便捷函数
22
+
23
+ Args:
24
+ name: 组件名称
25
+ component_type: 组件类型
26
+ factory_func: 工厂函数
27
+ dependencies: 依赖列表
28
+ singleton: 是否单例
29
+ config_key: 配置键名
30
+ """
31
+ registry = get_component_registry()
32
+
33
+ # 如果component_type是字符串,创建一个动态类型
34
+ if isinstance(component_type, str):
35
+ component_type = type(component_type, (), {})
36
+
37
+ spec_kwargs = {
38
+ 'name': name,
39
+ 'component_type': component_type,
40
+ 'factory_func': factory_func,
41
+ 'dependencies': dependencies or [],
42
+ 'singleton': singleton
43
+ }
44
+
45
+ # 只有当config_key不为None时才添加
46
+ if config_key is not None:
47
+ spec_kwargs['config_key'] = config_key
48
+
49
+ spec = ComponentSpec(**spec_kwargs)
50
+
51
+ registry.register(spec)
52
+
53
+
54
+ def register_components(component_list: List[dict]) -> None:
55
+ """
56
+ 批量注册组件
57
+
58
+ Args:
59
+ component_list: 组件定义列表,每个元素是一个包含组件信息的字典
60
+ """
61
+ for component_info in component_list:
62
+ register_component(**component_info)
63
+
64
+
65
+ def create_component_factory(
66
+ component_name: str,
67
+ module_path: str,
68
+ class_name: str,
69
+ dependencies: Optional[List[str]] = None,
70
+ singleton: bool = False
71
+ ) -> Callable[..., Any]:
72
+ """
73
+ 创建组件工厂函数的便捷函数
74
+
75
+ Args:
76
+ component_name: 组件名称(用于错误信息)
77
+ module_path: 模块路径
78
+ class_name: 类名
79
+ dependencies: 依赖列表
80
+ singleton: 是否单例
81
+
82
+ Returns:
83
+ 工厂函数
84
+ """
85
+ def factory_func(*args, **kwargs):
86
+ try:
87
+ # 动态导入模块
88
+ module = __import__(module_path, fromlist=[class_name])
89
+ component_class = getattr(module, class_name)
90
+
91
+ # 检查是否需要调用create_instance方法
92
+ if hasattr(component_class, 'create_instance'):
93
+ return component_class.create_instance(*args, **kwargs)
94
+ else:
95
+ return component_class(*args, **kwargs)
96
+ except Exception as e:
97
+ raise RuntimeError(f"Failed to create {component_name}: {e}")
98
+
99
+ return factory_func
100
+
101
+
102
+ def create_crawler_component_factory(
103
+ component_name: str,
104
+ module_path: str,
105
+ class_name: str
106
+ ) -> Callable[..., Any]:
107
+ """
108
+ 创建需要crawler依赖的组件工厂函数
109
+
110
+ Args:
111
+ component_name: 组件名称
112
+ module_path: 模块路径
113
+ class_name: 类名
114
+
115
+ Returns:
116
+ 工厂函数
117
+ """
118
+ def factory_func(crawler=None, **kwargs):
119
+ if crawler is None:
120
+ raise ValueError(f"Crawler instance required for component {component_name}")
121
+
122
+ try:
123
+ # 动态导入模块
124
+ module = __import__(module_path, fromlist=[class_name])
125
+ component_class = getattr(module, class_name)
126
+
127
+ # 检查是否需要调用create_instance方法
128
+ if hasattr(component_class, 'create_instance'):
129
+ return component_class.create_instance(crawler, **kwargs)
130
+ else:
131
+ return component_class(crawler, **kwargs)
132
+ except Exception as e:
133
+ raise RuntimeError(f"Failed to create {component_name}: {e}")
134
+
135
+ return factory_func
@@ -17,7 +17,7 @@ Crawlo过滤器模块
17
17
  from abc import ABC, abstractmethod
18
18
  from typing import Optional
19
19
 
20
- from crawlo.utils.request import request_fingerprint
20
+ from crawlo.utils.fingerprint import FingerprintGenerator
21
21
 
22
22
 
23
23
  class BaseFilter(ABC):
@@ -46,6 +46,23 @@ class BaseFilter(ABC):
46
46
  def create_instance(cls, *args, **kwargs) -> 'BaseFilter':
47
47
  return cls(*args, **kwargs)
48
48
 
49
+ def _get_fingerprint(self, request) -> str:
50
+ """
51
+ 获取请求指纹(内部辅助方法)
52
+
53
+ 使用统一的 FingerprintGenerator 生成请求指纹。
54
+ 子类可以直接调用此方法,避免重复实现。
55
+
56
+ :param request: 请求对象
57
+ :return: 请求指纹字符串
58
+ """
59
+ return FingerprintGenerator.request_fingerprint(
60
+ request.method,
61
+ request.url,
62
+ request.body or b'',
63
+ dict(request.headers) if hasattr(request, 'headers') else {}
64
+ )
65
+
49
66
  def requested(self, request) -> bool:
50
67
  """
51
68
  检查请求是否重复(主要接口)
@@ -54,7 +71,7 @@ class BaseFilter(ABC):
54
71
  :return: True 表示重复,False 表示新请求
55
72
  """
56
73
  self._request_count += 1
57
- fp = request_fingerprint(request)
74
+ fp = self._get_fingerprint(request)
58
75
 
59
76
  if fp in self:
60
77
  self._duplicate_count += 1