cobweb-launcher 1.2.57__tar.gz → 1.2.58__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- {cobweb-launcher-1.2.57/cobweb_launcher.egg-info → cobweb-launcher-1.2.58}/PKG-INFO +1 -1
- cobweb-launcher-1.2.58/cobweb/db/redis_db.py +150 -0
- cobweb-launcher-1.2.57/cobweb/db/redis_db.py → cobweb-launcher-1.2.58/cobweb/db/redis_db_new.py +38 -50
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/launchers/launcher_pro.py +6 -4
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb_launcher.egg-info/SOURCES.txt +1 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/setup.py +1 -1
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/LICENSE +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/README.md +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/crawlers/base_crawler.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/crawlers/crawler.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/crawlers/file_crawler.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/db/api_db.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/launchers/launcher.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/launchers/launcher_air.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/launchers/launcher_api.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/setting.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/utils/dotting.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/setup.cfg +0 -0
- {cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/test/test.py +0 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import redis
|
|
2
|
+
import time
|
|
3
|
+
from cobweb import setting
|
|
4
|
+
from redis.exceptions import ConnectionError, TimeoutError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RedisClient:
|
|
8
|
+
def __init__(self, **kwargs):
|
|
9
|
+
redis_config = kwargs or setting.REDIS_CONFIG
|
|
10
|
+
self.host = redis_config['host']
|
|
11
|
+
self.password = redis_config['password']
|
|
12
|
+
self.port = redis_config['port']
|
|
13
|
+
self.db = redis_config['db']
|
|
14
|
+
|
|
15
|
+
self.max_retries = 5
|
|
16
|
+
self.retry_delay = 5
|
|
17
|
+
self.client = None
|
|
18
|
+
self.connect()
|
|
19
|
+
|
|
20
|
+
def connect(self):
|
|
21
|
+
"""尝试连接 Redis"""
|
|
22
|
+
retries = 0
|
|
23
|
+
while retries < self.max_retries:
|
|
24
|
+
try:
|
|
25
|
+
self.client = redis.Redis(
|
|
26
|
+
host=self.host,
|
|
27
|
+
port=self.port,
|
|
28
|
+
password=self.password,
|
|
29
|
+
db=self.db,
|
|
30
|
+
socket_timeout=5, # 设置连接超时时间
|
|
31
|
+
socket_connect_timeout=5 # 设置连接超时时间
|
|
32
|
+
)
|
|
33
|
+
# 测试连接是否成功
|
|
34
|
+
self.client.ping()
|
|
35
|
+
return
|
|
36
|
+
except (ConnectionError, TimeoutError) as e:
|
|
37
|
+
retries += 1
|
|
38
|
+
if retries < self.max_retries:
|
|
39
|
+
time.sleep(self.retry_delay)
|
|
40
|
+
else:
|
|
41
|
+
raise Exception("达到最大重试次数,无法连接 Redis")
|
|
42
|
+
|
|
43
|
+
def is_connected(self):
|
|
44
|
+
try:
|
|
45
|
+
self.client.ping()
|
|
46
|
+
return True
|
|
47
|
+
except (ConnectionError, TimeoutError):
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
def reconnect(self):
|
|
51
|
+
self.connect()
|
|
52
|
+
|
|
53
|
+
def execute_command(self, command, *args, **kwargs):
|
|
54
|
+
retries = 0
|
|
55
|
+
while retries < self.max_retries:
|
|
56
|
+
try:
|
|
57
|
+
if not self.is_connected():
|
|
58
|
+
self.reconnect()
|
|
59
|
+
return getattr(self.client, command)(*args, **kwargs)
|
|
60
|
+
except (ConnectionError, TimeoutError) as e:
|
|
61
|
+
retries += 1
|
|
62
|
+
if retries < self.max_retries:
|
|
63
|
+
time.sleep(self.retry_delay)
|
|
64
|
+
else:
|
|
65
|
+
raise Exception("达到最大重试次数,无法执行命令")
|
|
66
|
+
|
|
67
|
+
def get(self, name):
|
|
68
|
+
# with self.get_connection() as client:
|
|
69
|
+
# return client.get(name)
|
|
70
|
+
return self.execute_command("get", name)
|
|
71
|
+
|
|
72
|
+
def incrby(self, name, value):
|
|
73
|
+
# with self.get_connection() as client:
|
|
74
|
+
# client.incrby(name, value)
|
|
75
|
+
self.execute_command("incrby", name, value)
|
|
76
|
+
|
|
77
|
+
def setnx(self, name, value=""):
|
|
78
|
+
# with self.get_connection() as client:
|
|
79
|
+
# client.setnx(name, value)
|
|
80
|
+
self.execute_command("setnx", name, value)
|
|
81
|
+
|
|
82
|
+
def setex(self, name, t, value=""):
|
|
83
|
+
# with self.get_connection() as client:
|
|
84
|
+
# client.setex(name, t, value)
|
|
85
|
+
self.execute_command("setex", name, t, value)
|
|
86
|
+
|
|
87
|
+
def expire(self, name, t, nx: bool = False, xx: bool = False, gt: bool = False, lt: bool = False):
|
|
88
|
+
# with self.get_connection() as client:
|
|
89
|
+
# client.expire(name, t, nx, xx, gt, lt)
|
|
90
|
+
self.execute_command("expire", name, t, nx, xx, gt, lt)
|
|
91
|
+
|
|
92
|
+
def ttl(self, name):
|
|
93
|
+
# with self.get_connection() as client:
|
|
94
|
+
# return client.ttl(name)
|
|
95
|
+
return self.execute_command("ttl", name)
|
|
96
|
+
|
|
97
|
+
def delete(self, name):
|
|
98
|
+
# with self.get_connection() as client:
|
|
99
|
+
# return client.delete(name)
|
|
100
|
+
return self.execute_command("delete", name)
|
|
101
|
+
|
|
102
|
+
def exists(self, *name) -> bool:
|
|
103
|
+
# with self.get_connection() as client:
|
|
104
|
+
# return client.exists(*name)
|
|
105
|
+
return self.execute_command("exists", *name)
|
|
106
|
+
|
|
107
|
+
def sadd(self, name, value):
|
|
108
|
+
# with self.get_connection() as client:
|
|
109
|
+
# return client.sadd(name, value)
|
|
110
|
+
return self.execute_command("sadd", name, value)
|
|
111
|
+
|
|
112
|
+
def zcard(self, name) -> bool:
|
|
113
|
+
# with self.get_connection() as client:
|
|
114
|
+
# return client.zcard(name)
|
|
115
|
+
return self.execute_command("zcard", name)
|
|
116
|
+
|
|
117
|
+
def zadd(self, name, item: dict, **kwargs):
|
|
118
|
+
# with self.get_connection() as client:
|
|
119
|
+
# return client.zadd(name, item, **kwargs)
|
|
120
|
+
return self.execute_command("zadd", name, item, **kwargs)
|
|
121
|
+
|
|
122
|
+
def zrem(self, name, *value):
|
|
123
|
+
# with self.get_connection() as client:
|
|
124
|
+
# return client.zrem(name, *value)
|
|
125
|
+
return self.execute_command("zrem", name, *value)
|
|
126
|
+
|
|
127
|
+
def zcount(self, name, _min, _max):
|
|
128
|
+
# with self.get_connection() as client:
|
|
129
|
+
# return client.zcount(name, _min, _max)
|
|
130
|
+
return self.execute_command("zcount", name, _min, _max)
|
|
131
|
+
|
|
132
|
+
# def zrangebyscore(self, name, _min, _max, start, num, withscores: bool = False, *args):
|
|
133
|
+
# with self.get_connection() as client:
|
|
134
|
+
# return client.zrangebyscore(name, _min, _max, start, num, withscores, *args)
|
|
135
|
+
|
|
136
|
+
def lua(self, script: str, keys: list = None, args: list = None):
|
|
137
|
+
keys = keys or []
|
|
138
|
+
args = args or []
|
|
139
|
+
keys_count = len(keys)
|
|
140
|
+
return self.execute_command("eval", script, keys_count, *keys, *args)
|
|
141
|
+
|
|
142
|
+
def lua_sha(self, sha1: str, keys: list = None, args: list = None):
|
|
143
|
+
keys = keys or []
|
|
144
|
+
args = args or []
|
|
145
|
+
keys_count = len(keys)
|
|
146
|
+
return self.execute_command("evalsha", sha1, keys_count, *keys, *args)
|
|
147
|
+
|
|
148
|
+
def execute_lua(self, lua_script: str, keys: list, *args):
|
|
149
|
+
execute = self.execute_command("register_script", lua_script)
|
|
150
|
+
return execute(keys=keys, args=args)
|
cobweb-launcher-1.2.57/cobweb/db/redis_db.py → cobweb-launcher-1.2.58/cobweb/db/redis_db_new.py
RENAMED
|
@@ -1,3 +1,21 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# 示例用法
|
|
5
|
+
# if __name__ == "__main__":
|
|
6
|
+
# redis_client = RedisClient(
|
|
7
|
+
# host="r-j6c1t3etiefpmz7cwdpd.redis.rds.aliyuncs.com", port=6379,
|
|
8
|
+
# password="SpiderLinux666", db=0
|
|
9
|
+
# )
|
|
10
|
+
#
|
|
11
|
+
# # 执行 Redis 命令
|
|
12
|
+
# try:
|
|
13
|
+
# ss = redis_client.get("host_speed_control:bepls.com")
|
|
14
|
+
# print(f"获取的值: {ss}")
|
|
15
|
+
# except Exception as e:
|
|
16
|
+
# print(f"操作失败: {e}")
|
|
17
|
+
|
|
18
|
+
|
|
1
19
|
import redis
|
|
2
20
|
from cobweb import setting
|
|
3
21
|
|
|
@@ -6,86 +24,60 @@ class RedisDB:
|
|
|
6
24
|
|
|
7
25
|
def __init__(self, **kwargs):
|
|
8
26
|
redis_config = kwargs or setting.REDIS_CONFIG
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def get_connection(self):
|
|
12
|
-
return redis.StrictRedis(connection_pool=self.pool)
|
|
13
|
-
# self._client = redis.Redis(connection_pool=pool)
|
|
14
|
-
|
|
15
|
-
def get(self, name):
|
|
16
|
-
with self.get_connection() as client:
|
|
17
|
-
return client.get(name)
|
|
18
|
-
|
|
19
|
-
def incrby(self, name, value):
|
|
20
|
-
with self.get_connection() as client:
|
|
21
|
-
client.incrby(name, value)
|
|
27
|
+
pool = redis.ConnectionPool(**redis_config)
|
|
28
|
+
self._client = redis.Redis(connection_pool=pool)
|
|
22
29
|
|
|
23
30
|
def setnx(self, name, value=""):
|
|
24
|
-
|
|
25
|
-
client.setnx(name, value)
|
|
31
|
+
return self._client.setnx(name, value)
|
|
26
32
|
|
|
27
33
|
def setex(self, name, t, value=""):
|
|
28
|
-
|
|
29
|
-
client.setex(name, t, value)
|
|
34
|
+
return self._client.setex(name, t, value)
|
|
30
35
|
|
|
31
36
|
def expire(self, name, t, nx: bool = False, xx: bool = False, gt: bool = False, lt: bool = False):
|
|
32
|
-
|
|
33
|
-
client.expire(name, t, nx, xx, gt, lt)
|
|
37
|
+
return self._client.expire(name, t, nx, xx, gt, lt)
|
|
34
38
|
|
|
35
39
|
def ttl(self, name):
|
|
36
|
-
|
|
37
|
-
return client.ttl(name)
|
|
40
|
+
return self._client.ttl(name)
|
|
38
41
|
|
|
39
42
|
def delete(self, name):
|
|
40
|
-
|
|
41
|
-
return client.delete(name)
|
|
43
|
+
return self._client.delete(name)
|
|
42
44
|
|
|
43
45
|
def exists(self, *name) -> bool:
|
|
44
|
-
|
|
45
|
-
return client.exists(*name)
|
|
46
|
+
return self._client.exists(*name)
|
|
46
47
|
|
|
47
48
|
def sadd(self, name, value):
|
|
48
|
-
|
|
49
|
-
return client.sadd(name, value)
|
|
49
|
+
return self._client.sadd(name, value)
|
|
50
50
|
|
|
51
51
|
def zcard(self, name) -> bool:
|
|
52
|
-
|
|
53
|
-
return client.zcard(name)
|
|
52
|
+
return self._client.zcard(name)
|
|
54
53
|
|
|
55
54
|
def zadd(self, name, item: dict, **kwargs):
|
|
56
|
-
|
|
57
|
-
return client.zadd(name, item, **kwargs)
|
|
55
|
+
return self._client.zadd(name, item, **kwargs)
|
|
58
56
|
|
|
59
57
|
def zrem(self, name, *value):
|
|
60
|
-
|
|
61
|
-
return client.zrem(name, *value)
|
|
58
|
+
return self._client.zrem(name, *value)
|
|
62
59
|
|
|
63
60
|
def zcount(self, name, _min, _max):
|
|
64
|
-
|
|
65
|
-
return client.zcount(name, _min, _max)
|
|
61
|
+
return self._client.zcount(name, _min, _max)
|
|
66
62
|
|
|
67
63
|
# def zrangebyscore(self, name, _min, _max, start, num, withscores: bool = False, *args):
|
|
68
|
-
#
|
|
69
|
-
# return client.zrangebyscore(name, _min, _max, start, num, withscores, *args)
|
|
64
|
+
# return self._client.zrangebyscore(name, _min, _max, start, num, withscores, *args)
|
|
70
65
|
|
|
71
66
|
def lua(self, script: str, keys: list = None, args: list = None):
|
|
72
67
|
keys = keys or []
|
|
73
68
|
args = args or []
|
|
74
69
|
keys_count = len(keys)
|
|
75
|
-
|
|
76
|
-
return client.eval(script, keys_count, *keys, *args)
|
|
70
|
+
return self._client.eval(script, keys_count, *keys, *args)
|
|
77
71
|
|
|
78
72
|
def lua_sha(self, sha1: str, keys: list = None, args: list = None):
|
|
79
73
|
keys = keys or []
|
|
80
74
|
args = args or []
|
|
81
75
|
keys_count = len(keys)
|
|
82
|
-
|
|
83
|
-
return client.evalsha(sha1, keys_count, *keys, *args)
|
|
76
|
+
return self._client.evalsha(sha1, keys_count, *keys, *args)
|
|
84
77
|
|
|
85
78
|
def execute_lua(self, lua_script: str, keys: list, *args):
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
return execute(keys=keys, args=args)
|
|
79
|
+
execute = self._client.register_script(lua_script)
|
|
80
|
+
return execute(keys=keys, args=args)
|
|
89
81
|
|
|
90
82
|
def lock(self, key, t=15) -> bool:
|
|
91
83
|
lua_script = """
|
|
@@ -112,7 +104,7 @@ class RedisDB:
|
|
|
112
104
|
else
|
|
113
105
|
members = redis.call('zrangebyscore', KEYS[1], min, max, 'WITHSCORES', 'limit', start, count)
|
|
114
106
|
end
|
|
115
|
-
|
|
107
|
+
|
|
116
108
|
local result = {}
|
|
117
109
|
|
|
118
110
|
for i = 1, #members, 2 do
|
|
@@ -124,7 +116,7 @@ class RedisDB:
|
|
|
124
116
|
else
|
|
125
117
|
originPriority = math.floor(members[i+1])
|
|
126
118
|
end
|
|
127
|
-
|
|
119
|
+
|
|
128
120
|
if ( score + 0 >= 1000 ) then
|
|
129
121
|
priority = -score - originPriority / 1000
|
|
130
122
|
elseif ( score + 0 == 0 ) then
|
|
@@ -143,10 +135,6 @@ class RedisDB:
|
|
|
143
135
|
members = self.execute_lua(lua_script, [key], _min, _max, start, count, score)
|
|
144
136
|
return [(members[i].decode(), int(members[i + 1])) for i in range(0, len(members), 2)]
|
|
145
137
|
|
|
146
|
-
# def get_member(self):
|
|
147
|
-
# with self.get_connection() as client:
|
|
148
|
-
# pipeline = client.pipeline()
|
|
149
|
-
|
|
150
138
|
def done(self, keys: list, *args) -> list:
|
|
151
139
|
lua_script = """
|
|
152
140
|
for i, member in ipairs(ARGV) do
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import time
|
|
2
2
|
import threading
|
|
3
3
|
|
|
4
|
+
from h5py.h5pl import insert
|
|
5
|
+
|
|
4
6
|
from cobweb.db import RedisDB
|
|
5
7
|
from cobweb.base import Seed, logger
|
|
6
8
|
from cobweb.constant import LogTemplate
|
|
@@ -23,9 +25,9 @@ class LauncherPro(Launcher):
|
|
|
23
25
|
self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
|
|
24
26
|
|
|
25
27
|
# self._bf_key = "bloom_%s_%s" % (project, task)
|
|
26
|
-
|
|
28
|
+
|
|
27
29
|
self._db = RedisDB()
|
|
28
|
-
|
|
30
|
+
|
|
29
31
|
# self._bf = BloomFilter(self._bf_key)
|
|
30
32
|
|
|
31
33
|
self._heartbeat_start_event = threading.Event()
|
|
@@ -46,9 +48,9 @@ class LauncherPro(Launcher):
|
|
|
46
48
|
spider_speed = self._db.get(self._speed_control_key)
|
|
47
49
|
if int(spider_speed or 0) > self._spider_max_count:
|
|
48
50
|
expire_time = self._db.ttl(self._speed_control_key)
|
|
49
|
-
if expire_time
|
|
51
|
+
if expire_time <= -1:
|
|
50
52
|
self._db.delete(self._speed_control_key)
|
|
51
|
-
|
|
53
|
+
elif isinstance(expire_time, int):
|
|
52
54
|
logger.info(f"Too fast! Please wait {expire_time} seconds...")
|
|
53
55
|
time.sleep(expire_time / 2)
|
|
54
56
|
return None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cobweb-launcher-1.2.57 → cobweb-launcher-1.2.58}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|