cobweb-launcher 0.0.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- cobweb/__init__.py +2 -0
- cobweb/base/__init__.py +0 -0
- cobweb/base/bbb.py +187 -0
- cobweb/base/config.py +164 -0
- cobweb/base/decorators.py +95 -0
- cobweb/base/hash_table.py +60 -0
- cobweb/base/interface.py +44 -0
- cobweb/base/log.py +96 -0
- cobweb/base/queue_tmp.py +60 -0
- cobweb/base/request.py +62 -0
- cobweb/base/task.py +38 -0
- cobweb/base/utils.py +15 -0
- cobweb/db/__init__.py +0 -0
- cobweb/db/base/__init__.py +0 -0
- cobweb/db/base/client_db.py +1 -0
- cobweb/db/base/oss_db.py +116 -0
- cobweb/db/base/redis_db.py +214 -0
- cobweb/db/base/redis_dbv3.py +231 -0
- cobweb/db/scheduler/__init__.py +0 -0
- cobweb/db/scheduler/default.py +8 -0
- cobweb/db/scheduler/textfile.py +29 -0
- cobweb/db/storer/__init__.py +0 -0
- cobweb/db/storer/console.py +10 -0
- cobweb/db/storer/loghub.py +55 -0
- cobweb/db/storer/redis.py +16 -0
- cobweb/db/storer/textfile.py +16 -0
- cobweb/distributed/__init__.py +0 -0
- cobweb/distributed/launcher.py +194 -0
- cobweb/distributed/models.py +140 -0
- cobweb/single/__init__.py +0 -0
- cobweb/single/models.py +104 -0
- cobweb/single/nest.py +153 -0
- cobweb_launcher-0.0.1.dist-info/LICENSE +21 -0
- cobweb_launcher-0.0.1.dist-info/METADATA +45 -0
- cobweb_launcher-0.0.1.dist-info/RECORD +37 -0
- cobweb_launcher-0.0.1.dist-info/WHEEL +5 -0
- cobweb_launcher-0.0.1.dist-info/top_level.txt +1 -0
cobweb/base/queue_tmp.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
from typing import Iterable
|
2
|
+
|
3
|
+
# from pympler import asizeof
|
4
|
+
from collections import deque
|
5
|
+
|
6
|
+
|
7
|
+
class Queue:
|
8
|
+
|
9
|
+
def __init__(self):
|
10
|
+
self._seed_queue = deque()
|
11
|
+
|
12
|
+
@property
|
13
|
+
def queue_names(self):
|
14
|
+
return tuple(self.__dict__.keys())
|
15
|
+
|
16
|
+
@property
|
17
|
+
def used_memory(self):
|
18
|
+
return asizeof.asizeof(self)
|
19
|
+
|
20
|
+
def create_queue(self, queue_name: str):
|
21
|
+
self.__setattr__(queue_name, deque())
|
22
|
+
|
23
|
+
def push_seed(self, seed):
|
24
|
+
self.push("_seed_queue", seed)
|
25
|
+
|
26
|
+
def pop_seed(self):
|
27
|
+
return self.pop("_seed_queue")
|
28
|
+
|
29
|
+
def push(self, queue_name: str, data, left: bool = False):
|
30
|
+
try:
|
31
|
+
if not data:
|
32
|
+
return None
|
33
|
+
queue = self.__getattribute__(queue_name)
|
34
|
+
if isinstance(data, Iterable):
|
35
|
+
queue.extend(data) if left else queue.extendleft(data)
|
36
|
+
else:
|
37
|
+
queue.appendleft(data) if left else queue.append(data)
|
38
|
+
except AttributeError as e:
|
39
|
+
print(e)
|
40
|
+
|
41
|
+
def pop(self, queue_name: str, left: bool = True):
|
42
|
+
try:
|
43
|
+
queue = self.__getattribute__(queue_name)
|
44
|
+
return queue.pop() if left else queue.popleft()
|
45
|
+
except IndexError as e:
|
46
|
+
print(e)
|
47
|
+
return None
|
48
|
+
except AttributeError as e:
|
49
|
+
print(e)
|
50
|
+
return None
|
51
|
+
|
52
|
+
|
53
|
+
# qqueue = Queue()
|
54
|
+
# # qqueue.create_queue("test")
|
55
|
+
# print(qqueue.queue_names)
|
56
|
+
# qqueue.push("task_queue", "key")
|
57
|
+
# print(qqueue.used_memory)
|
58
|
+
# c = qqueue.pop("task_queue")
|
59
|
+
# print(c)
|
60
|
+
|
cobweb/base/request.py
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
import random
|
2
|
+
from typing import Union
|
3
|
+
|
4
|
+
import requests
|
5
|
+
|
6
|
+
|
7
|
+
class Request:
|
8
|
+
|
9
|
+
def __init__(self):
|
10
|
+
pass
|
11
|
+
|
12
|
+
|
13
|
+
def gen_user_agent(platform: str = 'android', redis_client=None):
|
14
|
+
user_agent = ''
|
15
|
+
if platform == 'android':
|
16
|
+
os_version = f'{random.randint(4, 10)}.{random.randint(0, 9)}.{random.randint(0, 9)}'
|
17
|
+
model = (redis_client and redis_client.srandmember('(md)set_android_model').decode()) or ''
|
18
|
+
webkit_version = f'{random.randint(450, 550)}.{random.randint(0, 100)}.{random.randint(0, 100)}'
|
19
|
+
version = f'{random.randint(3, 6)}.{random.randint(0, 9)}.{random.randint(0, 9)}'
|
20
|
+
chrome_version = f'{random.randint(50, 88)}.{random.randint(0, 9)}.{random.randint(1000, 5000)}.{random.randint(0, 1000)}'
|
21
|
+
user_agent = f'Mozilla/5.0 (Linux; U; Android {os_version}; zh-cn; {model} Build/{model}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Chrome/{chrome_version} Mobile Safari/{webkit_version}'
|
22
|
+
elif platform == 'iphone':
|
23
|
+
os_version = f'{random.randint(5, 13)}_{random.randint(0, 9)}_{random.randint(0, 9)}'
|
24
|
+
webkit_version = f'{random.randint(550, 650)}.{random.randint(0, 100)}.{random.randint(0, 100)}'
|
25
|
+
version = f'{random.randint(4, 13)}.{random.randint(0, 9)}.{random.randint(0, 9)}'
|
26
|
+
user_agent = f'Mozilla/5.0 (iPhone; CPU iPhone OS {os_version} like Mac OS X) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Mobile Safari/{webkit_version}'
|
27
|
+
|
28
|
+
return user_agent
|
29
|
+
|
30
|
+
|
31
|
+
def config(
|
32
|
+
url,
|
33
|
+
method: str = "GET",
|
34
|
+
headers: dict = None,
|
35
|
+
proxies: dict = None,
|
36
|
+
cookies: dict = None,
|
37
|
+
params: dict = None,
|
38
|
+
timeout: int = None,
|
39
|
+
stream: bool = False,
|
40
|
+
data: Union[dict, str, tuple] = None,
|
41
|
+
) -> dict:
|
42
|
+
if not headers:
|
43
|
+
headers = {"accept": "*/*", "user-agent": gen_user_agent()}
|
44
|
+
|
45
|
+
elif "user-agent" not in [key.lower() for key in headers.keys()]:
|
46
|
+
headers["user-agent"] = gen_user_agent()
|
47
|
+
|
48
|
+
return {
|
49
|
+
"method": method,
|
50
|
+
"url": url,
|
51
|
+
"data": data,
|
52
|
+
"params": params,
|
53
|
+
"cookies": cookies,
|
54
|
+
"headers": headers,
|
55
|
+
"proxies": proxies,
|
56
|
+
"stream": stream,
|
57
|
+
"timeout": timeout or 3,
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
def request(**kwargs):
|
62
|
+
return requests.request(**kwargs)
|
cobweb/base/task.py
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
from config import info
|
2
|
+
|
3
|
+
|
4
|
+
class Task:
|
5
|
+
|
6
|
+
def __init__(
|
7
|
+
self,
|
8
|
+
project=None,
|
9
|
+
task_name=None,
|
10
|
+
start_seed=None,
|
11
|
+
spider_num=None,
|
12
|
+
# queue_length=None,
|
13
|
+
max_retries=None,
|
14
|
+
scheduler_info=None,
|
15
|
+
storer_info=None,
|
16
|
+
redis_info=None
|
17
|
+
):
|
18
|
+
"""
|
19
|
+
|
20
|
+
:param project:
|
21
|
+
:param task_name:
|
22
|
+
:param start_seed:
|
23
|
+
:param spider_num:
|
24
|
+
# :param queue_length:
|
25
|
+
:param scheduler_info:
|
26
|
+
:param storer_info: Union(list, DataInfo/namedtuple), 单个元素构成必须有3个值(数据库类型,表名,字段名)
|
27
|
+
"""
|
28
|
+
self.project = project or "test"
|
29
|
+
self.task_name = task_name or "spider"
|
30
|
+
self.start_seed = start_seed
|
31
|
+
self.spider_num = spider_num or 1
|
32
|
+
self.max_retries = max_retries or 5
|
33
|
+
# self.redis_info = RedisInfo(**(redis_info or dict()))
|
34
|
+
self.redis_info = info(redis_info, tag=0)
|
35
|
+
# self.scheduler_info = SchedulerDB.info(scheduler_info)
|
36
|
+
self.scheduler_info = info(scheduler_info, tag=1)
|
37
|
+
# self.storer_info = StorerDB.info(storer_info)
|
38
|
+
self.storer_info = info(storer_info, tag=2)
|
cobweb/base/utils.py
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
import sys
|
2
|
+
|
3
|
+
|
4
|
+
def struct_table_name(table_name):
|
5
|
+
return table_name.replace(".", "__p__").replace(":", "__c__")
|
6
|
+
|
7
|
+
|
8
|
+
def restore_table_name(table_name):
|
9
|
+
return table_name.replace("__p__", ".").replace("__c__", ":")
|
10
|
+
|
11
|
+
|
12
|
+
def struct_queue_name(db_name, table_name):
|
13
|
+
return sys.intern(f"__{db_name}_{table_name}_queue__")
|
14
|
+
|
15
|
+
|
cobweb/db/__init__.py
ADDED
File without changes
|
File without changes
|
@@ -0,0 +1 @@
|
|
1
|
+
|
cobweb/db/base/oss_db.py
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
import oss2
|
2
|
+
from typing import Union
|
3
|
+
from oss2.models import PartInfo
|
4
|
+
from requests import Response
|
5
|
+
from base.log import log
|
6
|
+
|
7
|
+
|
8
|
+
class OssDB:
|
9
|
+
|
10
|
+
def __init__(
|
11
|
+
self,
|
12
|
+
bucket_name,
|
13
|
+
endpoint,
|
14
|
+
access_key,
|
15
|
+
secret_key,
|
16
|
+
chunk_size,
|
17
|
+
min_size
|
18
|
+
):
|
19
|
+
self.auth = oss2.Auth(
|
20
|
+
access_key_id=access_key,
|
21
|
+
access_key_secret=secret_key
|
22
|
+
)
|
23
|
+
self.bucket = oss2.Bucket(
|
24
|
+
auth=self.auth,
|
25
|
+
endpoint=endpoint,
|
26
|
+
bucket_name=bucket_name
|
27
|
+
)
|
28
|
+
self.chunk_size = chunk_size or 1024 ** 2
|
29
|
+
self.min_size = min_size or 1024
|
30
|
+
|
31
|
+
@staticmethod
|
32
|
+
def format_upload_len(length):
|
33
|
+
if not length:
|
34
|
+
raise ValueError("Length cannot be None or 0")
|
35
|
+
|
36
|
+
units = ["KB", "MB", "GB", "TB"]
|
37
|
+
for i in range(3):
|
38
|
+
num = length / 1024 ** (i + 1)
|
39
|
+
if num >= 1:
|
40
|
+
return f"{round(num, 2)} {units[i]}"
|
41
|
+
|
42
|
+
def assemble(self, ready_data, part_data):
|
43
|
+
upload_data = None
|
44
|
+
ready_data = ready_data + part_data
|
45
|
+
if len(ready_data) >= self.chunk_size:
|
46
|
+
upload_data = ready_data[:self.chunk_size]
|
47
|
+
ready_data = ready_data[self.chunk_size:]
|
48
|
+
|
49
|
+
return ready_data, upload_data
|
50
|
+
|
51
|
+
def iter_data(self, data):
|
52
|
+
if isinstance(data, Response):
|
53
|
+
for part_data in data.iter_content(self.chunk_size):
|
54
|
+
yield part_data
|
55
|
+
if isinstance(data, bytes):
|
56
|
+
for i in range(0, len(data), self.chunk_size):
|
57
|
+
yield data[i:i + self.chunk_size]
|
58
|
+
|
59
|
+
def upload_split(
|
60
|
+
self, oss_path: str,
|
61
|
+
data: Union[bytes, Response],
|
62
|
+
timeout: int = 300,
|
63
|
+
):
|
64
|
+
parts = []
|
65
|
+
upload_id = None
|
66
|
+
ready_data = b""
|
67
|
+
upload_data_len = 0
|
68
|
+
headers = {"Expires": str(timeout * 1000)}
|
69
|
+
try:
|
70
|
+
upload_id = self.bucket.init_multipart_upload(oss_path).upload_id
|
71
|
+
for part_data in self.iter_data(data):
|
72
|
+
upload_data_len += len(part_data)
|
73
|
+
ready_data, upload_data = self.assemble(ready_data, part_data)
|
74
|
+
if upload_data:
|
75
|
+
part_index = len(parts) + 1
|
76
|
+
upload_info = self.bucket.upload_part(
|
77
|
+
oss_path, upload_id, part_index, upload_data
|
78
|
+
)
|
79
|
+
parts.append(PartInfo(part_index, upload_info.etag))
|
80
|
+
|
81
|
+
format_upload = self.format_upload_len(upload_data_len)
|
82
|
+
|
83
|
+
if parts and ready_data:
|
84
|
+
part_index = len(parts) + 1
|
85
|
+
upload_info = self.bucket.upload_part(
|
86
|
+
oss_path, upload_id, part_index, ready_data
|
87
|
+
)
|
88
|
+
parts.append(PartInfo(part_index, upload_info.etag))
|
89
|
+
self.bucket.complete_multipart_upload(
|
90
|
+
oss_path, upload_id, parts
|
91
|
+
)
|
92
|
+
log.info(
|
93
|
+
f"split upload, file path: {oss_path}"
|
94
|
+
f", file size: {format_upload}"
|
95
|
+
)
|
96
|
+
|
97
|
+
elif len(ready_data) > self.min_size:
|
98
|
+
self.bucket.put_object(oss_path, ready_data, headers)
|
99
|
+
log.info(
|
100
|
+
f"upload file, file path: {oss_path}"
|
101
|
+
f", file size: {format_upload}"
|
102
|
+
)
|
103
|
+
|
104
|
+
else:
|
105
|
+
log.info(
|
106
|
+
f"file size smaller than min size! "
|
107
|
+
f"file size: {format_upload}"
|
108
|
+
)
|
109
|
+
status = True
|
110
|
+
except Exception as e:
|
111
|
+
self.bucket.abort_multipart_upload(oss_path, upload_id, headers)
|
112
|
+
log.exception("upload file exception: " + str(e))
|
113
|
+
status = False
|
114
|
+
|
115
|
+
return status
|
116
|
+
|
@@ -0,0 +1,214 @@
|
|
1
|
+
import time
|
2
|
+
import redis
|
3
|
+
from base.bbb import Seed
|
4
|
+
|
5
|
+
|
6
|
+
class RedisDB:
|
7
|
+
|
8
|
+
def __init__(
|
9
|
+
self,
|
10
|
+
project: str,
|
11
|
+
task_name: str,
|
12
|
+
host=None,
|
13
|
+
port=None,
|
14
|
+
username=None,
|
15
|
+
password=None,
|
16
|
+
db=0
|
17
|
+
):
|
18
|
+
pool = redis.ConnectionPool(
|
19
|
+
host=host,
|
20
|
+
port=port,
|
21
|
+
username=username,
|
22
|
+
password=password,
|
23
|
+
db=db
|
24
|
+
)
|
25
|
+
self.heartbeat_key = f"{project}:{task_name}:heartbeat" # redis type string
|
26
|
+
self.spider_key = f"{project}:{task_name}:seed_info:spider" # redis type zset, .format(priority)
|
27
|
+
self.storer_key = f"{project}:{task_name}:seed_info:storer:%s" # redis type set,
|
28
|
+
self.failed_key = f"{project}:{task_name}:seed_info:failed" # redis type set, .format(priority)
|
29
|
+
self.succeed_key = f"{project}:{task_name}:seed_info:succeed" # redis type set, .format(priority)
|
30
|
+
self.update_lock = f"{project}:{task_name}:update_seed_lock" # redis type string
|
31
|
+
self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
|
32
|
+
self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
|
33
|
+
self.client = redis.Redis(connection_pool=pool)
|
34
|
+
|
35
|
+
# pass!
|
36
|
+
def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
|
37
|
+
begin_time = int(time.time())
|
38
|
+
while True:
|
39
|
+
if self.client.setnx(key, ""):
|
40
|
+
self.client.expire(key, t)
|
41
|
+
return True
|
42
|
+
if int(time.time()) - begin_time > timeout:
|
43
|
+
break
|
44
|
+
time.sleep(sleep_time)
|
45
|
+
|
46
|
+
if self.client.ttl(key) == -1:
|
47
|
+
delete_status = True
|
48
|
+
for _ in range(3):
|
49
|
+
if self.client.ttl(key) != -1:
|
50
|
+
delete_status = False
|
51
|
+
break
|
52
|
+
time.sleep(0.5)
|
53
|
+
if delete_status:
|
54
|
+
self.client.expire(key, t)
|
55
|
+
return False
|
56
|
+
else:
|
57
|
+
ttl = self.client.ttl(key)
|
58
|
+
print("ttl: " + str(ttl))
|
59
|
+
return False
|
60
|
+
|
61
|
+
# pass!
|
62
|
+
def _deal_seed(self, seeds, is_add: bool):
|
63
|
+
if not seeds:
|
64
|
+
return None
|
65
|
+
|
66
|
+
if not isinstance(seeds, list):
|
67
|
+
seeds = [seeds]
|
68
|
+
|
69
|
+
item_info = dict()
|
70
|
+
|
71
|
+
for seed in seeds:
|
72
|
+
if not isinstance(seed, Seed):
|
73
|
+
seed = Seed(seed)
|
74
|
+
item_info[seed.format_seed] = seed._priority
|
75
|
+
|
76
|
+
if item_info:
|
77
|
+
self.client.zadd(self.spider_key, mapping=item_info, nx=is_add, xx=not is_add)
|
78
|
+
|
79
|
+
# pass!
|
80
|
+
def add_seed(self, seeds):
|
81
|
+
self._deal_seed(seeds, is_add=True)
|
82
|
+
|
83
|
+
def reset_seed(self, seeds):
|
84
|
+
self._deal_seed(seeds, is_add=False)
|
85
|
+
|
86
|
+
# pass!
|
87
|
+
def del_seed(self, seeds, spider_status: bool = True):
|
88
|
+
if not seeds:
|
89
|
+
return None
|
90
|
+
|
91
|
+
if not isinstance(seeds, list):
|
92
|
+
seeds = [seeds]
|
93
|
+
|
94
|
+
seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
|
95
|
+
|
96
|
+
if seeds:
|
97
|
+
redis_key = self.succeed_key if spider_status else self.failed_key
|
98
|
+
self.client.sadd(redis_key, *(str(seed) for seed in seeds))
|
99
|
+
self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
|
100
|
+
|
101
|
+
# pass!
|
102
|
+
def set_storer(self, key, seeds):
|
103
|
+
if not seeds:
|
104
|
+
return None
|
105
|
+
|
106
|
+
if not isinstance(seeds, list):
|
107
|
+
seeds = [seeds]
|
108
|
+
|
109
|
+
item_info = dict()
|
110
|
+
score = -int(time.time())
|
111
|
+
for seed in seeds:
|
112
|
+
if not isinstance(seed, Seed):
|
113
|
+
seed = Seed(seed)
|
114
|
+
item_info[seed.format_seed] = score
|
115
|
+
|
116
|
+
if item_info:
|
117
|
+
self.client.zadd(self.storer_key % key, mapping=item_info)
|
118
|
+
print("zadd storer key", len(item_info.keys()))
|
119
|
+
|
120
|
+
# pass!
|
121
|
+
def get_seed(self, length: int = 200):
|
122
|
+
cs = time.time()
|
123
|
+
|
124
|
+
if self._get_lock(key=self.update_lock):
|
125
|
+
|
126
|
+
update_item, result = {}, []
|
127
|
+
|
128
|
+
version = int(time.time())
|
129
|
+
|
130
|
+
items = self.client.zrangebyscore(self.spider_key, min=0, max="+inf", start=0, num=length, withscores=True)
|
131
|
+
|
132
|
+
for value, priority in items:
|
133
|
+
score = -(version + int(priority) / 1000)
|
134
|
+
seed = Seed(value, priority=priority, version=version)
|
135
|
+
update_item[value] = score
|
136
|
+
result.append(seed)
|
137
|
+
|
138
|
+
print("\nset seeds into queue time: " + str(time.time() - cs))
|
139
|
+
if result:
|
140
|
+
self.client.zadd(self.spider_key, mapping=update_item, xx=True)
|
141
|
+
|
142
|
+
self.client.delete(self.update_lock)
|
143
|
+
print("push seeds into queue time: " + str(time.time() - cs))
|
144
|
+
return result
|
145
|
+
|
146
|
+
# pass!
|
147
|
+
def check_spider_queue(self, stop, storer_num):
|
148
|
+
while not stop.is_set():
|
149
|
+
# 每15s获取check锁,等待600s后仍获取不到锁则重试;获取到锁后,设置锁的存活时间为15s
|
150
|
+
if self._get_lock(key=self.check_lock, t=5, timeout=600, sleep_time=3):
|
151
|
+
heartbeat = True if self.client.exists(self.heartbeat_key) else False
|
152
|
+
# 重启重制score值,否则获取10分钟前的分数值
|
153
|
+
score = -int(time.time()) + 600 if heartbeat else "-inf"
|
154
|
+
|
155
|
+
keys = self.client.keys(self.storer_key % "*")
|
156
|
+
if len(keys) == storer_num:
|
157
|
+
intersection_key = self.storer_key % "intersection"
|
158
|
+
self.client.delete(intersection_key)
|
159
|
+
self.client.zinterstore(intersection_key, keys)
|
160
|
+
while True:
|
161
|
+
members = self.client.zrange(intersection_key, 0, 1999)
|
162
|
+
if not members:
|
163
|
+
break
|
164
|
+
for key in keys:
|
165
|
+
self.client.zrem(key, *members)
|
166
|
+
self.client.sadd(self.succeed_key, *members)
|
167
|
+
self.client.zrem(self.spider_key, *members)
|
168
|
+
self.client.zrem(intersection_key, *members)
|
169
|
+
print("succeed spider data ...")
|
170
|
+
|
171
|
+
for key in keys:
|
172
|
+
self.client.zremrangebyscore(key, min=score, max="(0")
|
173
|
+
|
174
|
+
while True:
|
175
|
+
items = self.client.zrangebyscore(self.spider_key, min=score, max="(0", start=0, num=5000, withscores=True)
|
176
|
+
if not items:
|
177
|
+
break
|
178
|
+
reset_items = {}
|
179
|
+
for value, priority in items:
|
180
|
+
reset_score = "{:.3f}".format(priority).split(".")[1]
|
181
|
+
reset_items[value] = int(reset_score)
|
182
|
+
if reset_items:
|
183
|
+
self.client.zadd(self.spider_key, mapping=reset_items, xx=True)
|
184
|
+
|
185
|
+
if not heartbeat:
|
186
|
+
self.client.setex(self.heartbeat_key, 15, "")
|
187
|
+
|
188
|
+
self.client.delete(self.check_lock)
|
189
|
+
time.sleep(3)
|
190
|
+
|
191
|
+
# pass!
|
192
|
+
def set_heartbeat(self, stop):
|
193
|
+
time.sleep(5)
|
194
|
+
while not stop.is_set():
|
195
|
+
self.client.setex(self.heartbeat_key, 5, "")
|
196
|
+
time.sleep(3)
|
197
|
+
|
198
|
+
# # pass!
|
199
|
+
# def heartbeat(self):
|
200
|
+
# """
|
201
|
+
# 返回心跳key剩余存活时间
|
202
|
+
# """
|
203
|
+
# return self.client.ttl(self.heartbeat_key)
|
204
|
+
|
205
|
+
# pass!
|
206
|
+
def spider_queue_length(self):
|
207
|
+
return self.client.zcard(self.spider_key)
|
208
|
+
|
209
|
+
# pass!
|
210
|
+
def ready_seed_length(self):
|
211
|
+
return self.client.zcount(self.spider_key, min=0, max="+inf")
|
212
|
+
|
213
|
+
def get_scheduler_lock(self):
|
214
|
+
return self._get_lock(self.scheduler_lock)
|