crawlo 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__version__.py +1 -1
- crawlo/filters/aioredis_filter.py +52 -60
- crawlo/settings/default_settings.py +2 -1
- {crawlo-1.0.3.dist-info → crawlo-1.0.4.dist-info}/METADATA +1 -1
- {crawlo-1.0.3.dist-info → crawlo-1.0.4.dist-info}/RECORD +9 -10
- tests/baidu_spider/settings.py +3 -1
- crawlo/filters/redis_filter.py +0 -120
- {crawlo-1.0.3.dist-info → crawlo-1.0.4.dist-info}/WHEEL +0 -0
- {crawlo-1.0.3.dist-info → crawlo-1.0.4.dist-info}/entry_points.txt +0 -0
- {crawlo-1.0.3.dist-info → crawlo-1.0.4.dist-info}/top_level.txt +0 -0
crawlo/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0.
|
|
1
|
+
__version__ = "1.0.4"
|
|
@@ -9,7 +9,7 @@ from crawlo.utils.request import request_fingerprint
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class AioRedisFilter(BaseFilter):
|
|
12
|
-
"""
|
|
12
|
+
"""基于Redis集合实现的异步请求去重过滤器(支持分布式爬虫),提供TTL和清理控制"""
|
|
13
13
|
|
|
14
14
|
def __init__(
|
|
15
15
|
self,
|
|
@@ -18,7 +18,8 @@ class AioRedisFilter(BaseFilter):
|
|
|
18
18
|
stats: dict,
|
|
19
19
|
debug: bool,
|
|
20
20
|
log_level: str,
|
|
21
|
-
cleanup_fp: bool = False
|
|
21
|
+
cleanup_fp: bool = False,
|
|
22
|
+
ttl: Optional[int] = None # None表示持久化,>0表示过期时间(秒)
|
|
22
23
|
):
|
|
23
24
|
"""初始化过滤器"""
|
|
24
25
|
self.logger = get_logger(self.__class__.__name__, log_level)
|
|
@@ -27,12 +28,19 @@ class AioRedisFilter(BaseFilter):
|
|
|
27
28
|
self.redis_key = redis_key
|
|
28
29
|
self.redis = client
|
|
29
30
|
self.cleanup_fp = cleanup_fp
|
|
31
|
+
self.ttl = ttl
|
|
30
32
|
|
|
31
33
|
@classmethod
|
|
32
34
|
def create_instance(cls, crawler) -> 'BaseFilter':
|
|
33
35
|
"""从爬虫配置创建过滤器实例"""
|
|
34
36
|
redis_url = crawler.settings.get('REDIS_URL', 'redis://localhost:6379')
|
|
35
|
-
decode_responses = crawler.settings.get_bool('DECODE_RESPONSES', False)
|
|
37
|
+
decode_responses = crawler.settings.get_bool('DECODE_RESPONSES', False)
|
|
38
|
+
ttl_setting = crawler.settings.get_int('REDIS_TTL')
|
|
39
|
+
|
|
40
|
+
# 处理TTL设置
|
|
41
|
+
ttl = None
|
|
42
|
+
if ttl_setting is not None:
|
|
43
|
+
ttl = max(0, int(ttl_setting)) if ttl_setting > 0 else None
|
|
36
44
|
|
|
37
45
|
try:
|
|
38
46
|
redis_client = aioredis.from_url(
|
|
@@ -42,110 +50,93 @@ class AioRedisFilter(BaseFilter):
|
|
|
42
50
|
encoding='utf-8'
|
|
43
51
|
)
|
|
44
52
|
except Exception as e:
|
|
45
|
-
raise RuntimeError(f"Redis
|
|
53
|
+
raise RuntimeError(f"Redis连接失败: {redis_url} - {str(e)}")
|
|
46
54
|
|
|
47
55
|
return cls(
|
|
48
56
|
redis_key=f"{crawler.settings.get('PROJECT_NAME', 'default')}:{crawler.settings.get('REDIS_KEY', 'request_fingerprints')}",
|
|
49
57
|
client=redis_client,
|
|
50
58
|
stats=crawler.stats,
|
|
51
59
|
cleanup_fp=crawler.settings.get_bool('CLEANUP_FP', False),
|
|
60
|
+
ttl=ttl,
|
|
52
61
|
debug=crawler.settings.get_bool('FILTER_DEBUG', False),
|
|
53
62
|
log_level=crawler.settings.get('LOG_LEVEL', 'INFO')
|
|
54
63
|
)
|
|
55
64
|
|
|
56
65
|
async def requested(self, request: Request) -> bool:
|
|
57
|
-
"""
|
|
58
|
-
检查请求是否重复
|
|
59
|
-
"""
|
|
66
|
+
"""检查请求是否已存在"""
|
|
60
67
|
try:
|
|
61
|
-
fp = request_fingerprint(request)
|
|
62
|
-
self.logger.debug(f"Checking fingerprint: {fp}")
|
|
63
|
-
|
|
64
|
-
# 确保fp是字符串类型
|
|
65
|
-
if not isinstance(fp, str):
|
|
66
|
-
fp = str(fp)
|
|
67
|
-
|
|
68
|
-
# 检查Redis连接状态
|
|
69
|
-
if not self.redis:
|
|
70
|
-
raise RuntimeError("Redis client is not initialized")
|
|
68
|
+
fp = str(request_fingerprint(request))
|
|
71
69
|
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
self.
|
|
70
|
+
# 1. 检查指纹是否存在
|
|
71
|
+
pipe = self.redis.pipeline()
|
|
72
|
+
pipe.sismember(self.redis_key, fp) # 不单独 await
|
|
73
|
+
exists = (await pipe.execute())[0] # 执行并获取结果
|
|
75
74
|
|
|
76
|
-
if
|
|
77
|
-
if self.debug:
|
|
78
|
-
self.logger.debug(f"Filtered duplicate request: {fp}")
|
|
75
|
+
if exists: # 如果已存在,返回 True
|
|
79
76
|
return True
|
|
80
77
|
|
|
81
|
-
#
|
|
82
|
-
|
|
78
|
+
# 2. 如果不存在,添加指纹并设置 TTL
|
|
79
|
+
pipe = self.redis.pipeline()
|
|
80
|
+
pipe.sadd(self.redis_key, fp) # 不单独 await
|
|
81
|
+
if self.ttl and self.ttl > 0:
|
|
82
|
+
pipe.expire(self.redis_key, self.ttl) # 不单独 await
|
|
83
|
+
await pipe.execute() # 一次性执行所有命令
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
if result == 1:
|
|
86
|
-
self.logger.debug(f"Added new fingerprint: {fp}")
|
|
87
|
-
else:
|
|
88
|
-
self.logger.warning(f"Failed to add fingerprint: {fp}")
|
|
89
|
-
|
|
90
|
-
return False
|
|
85
|
+
return False # 表示是新请求
|
|
91
86
|
|
|
92
87
|
except Exception as e:
|
|
93
|
-
self.logger.error(f"
|
|
94
|
-
# 可以选择抛出异常或返回False(不过滤)
|
|
88
|
+
self.logger.error(f"请求检查失败: {getattr(request, 'url', '未知URL')}")
|
|
95
89
|
raise
|
|
96
90
|
|
|
97
91
|
async def add_fingerprint(self, fp: str) -> bool:
|
|
98
|
-
"""
|
|
92
|
+
"""添加新指纹到Redis集合"""
|
|
99
93
|
try:
|
|
100
|
-
|
|
101
|
-
|
|
94
|
+
fp = str(fp)
|
|
95
|
+
added = await self.redis.sadd(self.redis_key, fp)
|
|
96
|
+
|
|
97
|
+
if self.ttl and self.ttl > 0:
|
|
98
|
+
await self.redis.expire(self.redis_key, self.ttl)
|
|
102
99
|
|
|
103
|
-
|
|
104
|
-
if self.debug:
|
|
105
|
-
self.logger.debug(f"Added fingerprint {fp}, result: {result}")
|
|
106
|
-
return result == 1
|
|
100
|
+
return added == 1
|
|
107
101
|
except Exception as e:
|
|
108
|
-
self.logger.error(
|
|
102
|
+
self.logger.error("添加指纹失败")
|
|
109
103
|
raise
|
|
110
104
|
|
|
111
105
|
async def get_stats(self) -> dict:
|
|
112
|
-
"""
|
|
106
|
+
"""获取过滤器统计信息"""
|
|
113
107
|
try:
|
|
114
108
|
count = await self.redis.scard(self.redis_key)
|
|
115
|
-
|
|
116
|
-
'
|
|
117
|
-
'
|
|
118
|
-
|
|
109
|
+
stats = {
|
|
110
|
+
'指纹总数': count,
|
|
111
|
+
'Redis键名': self.redis_key,
|
|
112
|
+
'TTL配置': f"{self.ttl}秒" if self.ttl else "持久化"
|
|
119
113
|
}
|
|
114
|
+
stats.update(self.stats)
|
|
115
|
+
return stats
|
|
120
116
|
except Exception as e:
|
|
121
|
-
self.logger.error(
|
|
117
|
+
self.logger.error("获取统计信息失败")
|
|
122
118
|
return self.stats
|
|
123
119
|
|
|
124
120
|
async def clear_all(self) -> int:
|
|
125
121
|
"""清空所有指纹数据"""
|
|
126
122
|
try:
|
|
127
123
|
deleted = await self.redis.delete(self.redis_key)
|
|
128
|
-
self.logger.info(f"
|
|
124
|
+
self.logger.info(f"已清除指纹数: {deleted}")
|
|
129
125
|
return deleted
|
|
130
126
|
except Exception as e:
|
|
131
|
-
self.logger.error(
|
|
127
|
+
self.logger.error("清空指纹失败")
|
|
132
128
|
raise
|
|
133
129
|
|
|
134
130
|
async def closed(self, reason: Optional[str] = None) -> None:
|
|
135
|
-
"""
|
|
131
|
+
"""爬虫关闭时的清理操作"""
|
|
136
132
|
try:
|
|
137
133
|
if self.cleanup_fp:
|
|
138
134
|
deleted = await self.redis.delete(self.redis_key)
|
|
139
|
-
self.logger.info(
|
|
140
|
-
f"Cleaned {deleted} fingerprints from {self.redis_key} "
|
|
141
|
-
f"(reason: {reason or 'manual'})"
|
|
142
|
-
)
|
|
135
|
+
self.logger.info(f"爬虫关闭清理: 已删除{deleted}个指纹")
|
|
143
136
|
else:
|
|
144
|
-
# 显示统计信息
|
|
145
137
|
count = await self.redis.scard(self.redis_key)
|
|
146
|
-
self.
|
|
147
|
-
|
|
148
|
-
self.logger.warning(f"Close operation failed: {e}")
|
|
138
|
+
ttl_info = f"{self.ttl}秒" if self.ttl else "持久化"
|
|
139
|
+
self.logger.info(f"保留指纹数: {count} (TTL: {ttl_info})")
|
|
149
140
|
finally:
|
|
150
141
|
await self._close_redis()
|
|
151
142
|
|
|
@@ -154,5 +145,6 @@ class AioRedisFilter(BaseFilter):
|
|
|
154
145
|
try:
|
|
155
146
|
if hasattr(self.redis, 'close'):
|
|
156
147
|
await self.redis.close()
|
|
148
|
+
self.logger.debug("Redis连接已关闭")
|
|
157
149
|
except Exception as e:
|
|
158
|
-
self.logger.warning(f"Redis
|
|
150
|
+
self.logger.warning(f"Redis关闭时出错:{e}")
|
|
@@ -84,7 +84,8 @@ FILTER_DEBUG = True
|
|
|
84
84
|
FILTER_CLASS = 'crawlo.filters.memory_filter.MemoryFilter'
|
|
85
85
|
|
|
86
86
|
# redis filter
|
|
87
|
-
|
|
87
|
+
REDIS_TTL = 0
|
|
88
|
+
CLEANUP_FP = 0
|
|
88
89
|
DECODE_RESPONSES = True
|
|
89
90
|
REDIS_KEY = 'request_fingerprint'
|
|
90
91
|
REDIS_HOST = os.getenv('REDIS_HOST', '127.0.0.1')
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
crawlo/__init__.py,sha256=XOWXajnhT2HVql5cycwGkQ0MS85bpQnFdM7tl0Fusik,327
|
|
2
|
-
crawlo/__version__.py,sha256=
|
|
2
|
+
crawlo/__version__.py,sha256=acuR_XSJzp4OrQ5T8-Ac5gYe48mUwObuwjRmisFmZ7k,22
|
|
3
3
|
crawlo/crawler.py,sha256=rqKjMLDU6qlm2D2gIhkezF5jFOCz0TgYyq-nS7MEFMU,9237
|
|
4
4
|
crawlo/event.py,sha256=7-y6HNv_EIJSYQNzsj0mVK-Gg4ON3wdQeMdQjfFJPlw,313
|
|
5
5
|
crawlo/exceptions.py,sha256=7dtEJBxb9yvmMJe6MQyDB0LuV9que1J_jQN4QYeyO4g,916
|
|
@@ -18,9 +18,8 @@ crawlo/extension/__init__.py,sha256=LPy9XyCu089k6L6oVENIi_imr75AEuY8QTtSJjRioiw,
|
|
|
18
18
|
crawlo/extension/log_interval.py,sha256=S-hSoiz9GdmgHrac4vDQ52fleoBcH-kzdPUD8YRAons,1922
|
|
19
19
|
crawlo/extension/log_stats.py,sha256=WeSnOoSKB8pI_xmcGdh906XnF1xwo6fgJnf_prElwwI,1742
|
|
20
20
|
crawlo/filters/__init__.py,sha256=BCZl86BHiTfDGRe_b1TlNSr6pfNbMKTu0Uq0j4gX_1Q,977
|
|
21
|
-
crawlo/filters/aioredis_filter.py,sha256=
|
|
21
|
+
crawlo/filters/aioredis_filter.py,sha256=MJT74BeVZTjdExKEzdrWKc7WPXFss1k-txc7E54H77E,5522
|
|
22
22
|
crawlo/filters/memory_filter.py,sha256=bs2WUe7CdHiXgr344vzDqMfBv1b3RwXJMnwxpDb64Pw,6639
|
|
23
|
-
crawlo/filters/redis_filter.py,sha256=W3Wam4Qdd1mZPyue3N9pYkaF72HUcXd38iHOsHHDfEg,4092
|
|
24
23
|
crawlo/items/__init__.py,sha256=JUw4wZX50DidJuCMLkP41ik_wTKum2b8iDxm7EbRRds,2063
|
|
25
24
|
crawlo/items/items.py,sha256=00TdAYChF5Rbbgm6a6d-GCxkx4gXP-rA-_Q7u33BuFI,3990
|
|
26
25
|
crawlo/middleware/__init__.py,sha256=ldaGFNbiJnK9Fx12Vdf9fDNfzXxoETtShp5r-vodtw0,549
|
|
@@ -41,7 +40,7 @@ crawlo/pipelines/mysql_batch_pipline.py,sha256=g111iuPTRyKr0q4PHTJYIfsYAFf8CCuyY
|
|
|
41
40
|
crawlo/pipelines/mysql_pipeline.py,sha256=ZlRWwZLewG9SBLBZ1wWNZ8yAj5xWWitb7BKRSrqEWtI,7857
|
|
42
41
|
crawlo/pipelines/pipeline_manager.py,sha256=JIoX5D-oDfUT7VJrb5m355wi43SChb4nNb09z_0F4_g,2118
|
|
43
42
|
crawlo/settings/__init__.py,sha256=xsukVKn_h2Hopm1Nj-bXkhbfyS62QTTvJi7fhZUwR9M,123
|
|
44
|
-
crawlo/settings/default_settings.py,sha256=
|
|
43
|
+
crawlo/settings/default_settings.py,sha256=zNMVMo_9s1DGr1TiPzwZjSmxuD4qj_JT_oCCmkoMfjs,2579
|
|
45
44
|
crawlo/settings/setting_manager.py,sha256=SxKB1aCWh4OySM_bH9cYng9I3PAmrSP-Q8XOZEWEwbI,2899
|
|
46
45
|
crawlo/spider/__init__.py,sha256=pP_TChnozpHeuS87Bs-Sj31hb0R7glYN3K6BsRw4FOA,905
|
|
47
46
|
crawlo/templates/item_template.tmpl,sha256=bo0cjaFOT1jMrtLjXs6z7Mhwev-s3037suD4BL2_ji4,351
|
|
@@ -69,12 +68,12 @@ tests/baidu_spider/middleware.py,sha256=I71ZMmWTiDBFq4t2zfTE7IIXCqwaaeQ1DvKGW70q
|
|
|
69
68
|
tests/baidu_spider/pipeline.py,sha256=TUK_LnrU818UYmCn2_gKeNaTZjaj9qjrlndRLsR4wf0,1437
|
|
70
69
|
tests/baidu_spider/request_fingerprints.txt,sha256=TJAuFJZZ_uvYExfruA9bEsIiArz86vxe95QoF2lbnfE,585
|
|
71
70
|
tests/baidu_spider/run.py,sha256=YVe9qwn-2XBRRoZdUnwPRrWlBO5YAmKnyLRI3RpfogE,646
|
|
72
|
-
tests/baidu_spider/settings.py,sha256=
|
|
71
|
+
tests/baidu_spider/settings.py,sha256=EenFOFgupwnn7HIySKSHBgP9--qxxkiWgIi2NDltXRw,2811
|
|
73
72
|
tests/baidu_spider/spiders/__init__.py,sha256=eJ_ih4GiGfwQzPILeouy1Hnc4BrPz0KNPYlLHYvrvoc,123
|
|
74
73
|
tests/baidu_spider/spiders/bai_du.py,sha256=pw4WccbmBR07CuSqCgm_7x9SH63FDJS_sXSaN5Ew5Tw,1589
|
|
75
74
|
tests/baidu_spider/spiders/sina.py,sha256=BKQGJiCS8aiZ2f27C99WcK90QQJwgUY-vS4fUaQSdIQ,2456
|
|
76
|
-
crawlo-1.0.
|
|
77
|
-
crawlo-1.0.
|
|
78
|
-
crawlo-1.0.
|
|
79
|
-
crawlo-1.0.
|
|
80
|
-
crawlo-1.0.
|
|
75
|
+
crawlo-1.0.4.dist-info/METADATA,sha256=dzEuRJVuBVSeKTQeEvOXRhfRcyjhcZqJFlPWivAZ9UE,1743
|
|
76
|
+
crawlo-1.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
77
|
+
crawlo-1.0.4.dist-info/entry_points.txt,sha256=GD9PBhKQN83EaxPYtz7NhcGeZeh3bdr2jWbTixOs-lw,59
|
|
78
|
+
crawlo-1.0.4.dist-info/top_level.txt,sha256=bKtfejkszFTNHm7Z6aqtt0AUG8DdeNeL4AoZsg4XdZY,13
|
|
79
|
+
crawlo-1.0.4.dist-info/RECORD,,
|
tests/baidu_spider/settings.py
CHANGED
|
@@ -72,7 +72,9 @@ DEFAULT_HEADERS = {
|
|
|
72
72
|
Mongo_Params = ''
|
|
73
73
|
MONGODB_DB = 'news'
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
REDIS_TTL = 0
|
|
76
|
+
CLEANUP_FP = False
|
|
76
77
|
|
|
77
78
|
FILTER_CLASS = 'crawlo.filters.aioredis_filter.AioRedisFilter'
|
|
79
|
+
# FILTER_CLASS = 'crawlo.filters.redis_filter.RedisFilter'
|
|
78
80
|
# FILTER_CLASS = 'crawlo.filters.memory_filter.MemoryFileFilter'
|
crawlo/filters/redis_filter.py
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
# -*- coding:UTF-8 -*-
|
|
3
|
-
import redis
|
|
4
|
-
|
|
5
|
-
from crawlo import Request
|
|
6
|
-
from crawlo.filters import BaseFilter
|
|
7
|
-
from crawlo.utils.log import get_logger
|
|
8
|
-
from crawlo.utils.request import request_fingerprint
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class RedisFilter(BaseFilter):
|
|
12
|
-
"""使用Redis集合实现的同步请求去重过滤器"""
|
|
13
|
-
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
redis_key: str,
|
|
17
|
-
client: redis.Redis,
|
|
18
|
-
stats: dict,
|
|
19
|
-
debug: bool,
|
|
20
|
-
log_level: str,
|
|
21
|
-
save_fp: bool
|
|
22
|
-
):
|
|
23
|
-
"""
|
|
24
|
-
初始化过滤器
|
|
25
|
-
|
|
26
|
-
:param redis_key: Redis存储键名
|
|
27
|
-
:param client: redis客户端实例
|
|
28
|
-
:param stats: 统计字典
|
|
29
|
-
:param debug: 是否启用调试模式
|
|
30
|
-
:param log_level: 日志级别
|
|
31
|
-
:param save_fp: 是否保留指纹数据
|
|
32
|
-
"""
|
|
33
|
-
self.logger = get_logger(self.__class__.__name__, log_level)
|
|
34
|
-
super().__init__(self.logger, stats, debug)
|
|
35
|
-
|
|
36
|
-
self.redis_key = redis_key
|
|
37
|
-
self.redis = client
|
|
38
|
-
self.save_fp = save_fp
|
|
39
|
-
|
|
40
|
-
@classmethod
|
|
41
|
-
def create_instance(cls, crawler) -> 'BaseFilter':
|
|
42
|
-
"""工厂方法创建实例"""
|
|
43
|
-
redis_url = crawler.settings.get('REDIS_URL', 'redis://localhost:6379')
|
|
44
|
-
decode_responses = crawler.settings.get_bool('DECODE_RESPONSES', True)
|
|
45
|
-
|
|
46
|
-
try:
|
|
47
|
-
# 添加连接池配置
|
|
48
|
-
redis_client = redis.from_url(
|
|
49
|
-
redis_url,
|
|
50
|
-
decode_responses=decode_responses,
|
|
51
|
-
socket_timeout=5, # 超时设置
|
|
52
|
-
socket_connect_timeout=5,
|
|
53
|
-
max_connections=20 # 连接池大小
|
|
54
|
-
)
|
|
55
|
-
# 测试连接是否有效
|
|
56
|
-
redis_client.ping()
|
|
57
|
-
except redis.RedisError as e:
|
|
58
|
-
raise RuntimeError(f"Redis连接失败: {str(e)}")
|
|
59
|
-
|
|
60
|
-
return cls(
|
|
61
|
-
redis_key=f"{crawler.settings.get('PROJECT_NAME')}:{crawler.settings.get('REDIS_KEY', 'request_fingerprints')}",
|
|
62
|
-
client=redis_client,
|
|
63
|
-
stats=crawler.stats,
|
|
64
|
-
save_fp=crawler.settings.get_bool('SAVE_FP', False),
|
|
65
|
-
debug=crawler.settings.get_bool('FILTER_DEBUG', False),
|
|
66
|
-
log_level=crawler.settings.get('LOG_LEVEL', 'INFO')
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
def requested(self, request: Request) -> bool:
|
|
70
|
-
"""
|
|
71
|
-
检查请求是否已存在
|
|
72
|
-
|
|
73
|
-
:param request: 请求对象
|
|
74
|
-
:return: 是否重复
|
|
75
|
-
"""
|
|
76
|
-
fp = request_fingerprint(request)
|
|
77
|
-
try:
|
|
78
|
-
if self.redis.sismember(self.redis_key, fp):
|
|
79
|
-
self.logger.debug(f"重复请求: {fp}")
|
|
80
|
-
return True
|
|
81
|
-
|
|
82
|
-
self.add_fingerprint(fp)
|
|
83
|
-
return False
|
|
84
|
-
except redis.RedisError as e:
|
|
85
|
-
self.logger.error(f"Redis操作失败: {str(e)}")
|
|
86
|
-
raise
|
|
87
|
-
|
|
88
|
-
def add_fingerprint(self, fp: str) -> None:
|
|
89
|
-
"""添加指纹到Redis集合"""
|
|
90
|
-
try:
|
|
91
|
-
self.redis.sadd(self.redis_key, fp)
|
|
92
|
-
self.logger.debug(f"新增指纹: {fp}")
|
|
93
|
-
except redis.RedisError as e:
|
|
94
|
-
self.logger.error(f"指纹添加失败: {str(e)}")
|
|
95
|
-
raise
|
|
96
|
-
|
|
97
|
-
def __contains__(self, item) -> bool:
|
|
98
|
-
"""支持 in 操作符检查 (必须返回bool类型)"""
|
|
99
|
-
try:
|
|
100
|
-
# 显式将redis返回的0/1转换为bool
|
|
101
|
-
return bool(self.redis.sismember(self.redis_key, item))
|
|
102
|
-
except redis.RedisError as e:
|
|
103
|
-
self.logger.error(f"Redis查询失败: {str(e)}")
|
|
104
|
-
raise
|
|
105
|
-
|
|
106
|
-
def close(self) -> None:
|
|
107
|
-
"""同步清理方法(注意不是异步的closed)"""
|
|
108
|
-
if not self.save_fp:
|
|
109
|
-
try:
|
|
110
|
-
count = self.redis.delete(self.redis_key)
|
|
111
|
-
self.logger.info(f"已清理Redis键 {self.redis_key}, 删除数量: {count}")
|
|
112
|
-
except redis.RedisError as e:
|
|
113
|
-
self.logger.error(f"清理失败: {str(e)}")
|
|
114
|
-
finally:
|
|
115
|
-
# 同步客户端需要手动关闭连接池
|
|
116
|
-
self.redis.close()
|
|
117
|
-
|
|
118
|
-
async def closed(self):
|
|
119
|
-
"""兼容异步接口的同步实现"""
|
|
120
|
-
self.close()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|