cobweb-launcher 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/__init__.py +2 -0
- cobweb/base/__init__.py +0 -0
- cobweb/base/bbb.py +187 -0
- cobweb/base/config.py +164 -0
- cobweb/base/decorators.py +95 -0
- cobweb/base/hash_table.py +60 -0
- cobweb/base/interface.py +44 -0
- cobweb/base/log.py +96 -0
- cobweb/base/queue_tmp.py +60 -0
- cobweb/base/request.py +62 -0
- cobweb/base/task.py +38 -0
- cobweb/base/utils.py +15 -0
- cobweb/db/__init__.py +0 -0
- cobweb/db/base/__init__.py +0 -0
- cobweb/db/base/client_db.py +1 -0
- cobweb/db/base/oss_db.py +116 -0
- cobweb/db/base/redis_db.py +214 -0
- cobweb/db/base/redis_dbv3.py +231 -0
- cobweb/db/scheduler/__init__.py +0 -0
- cobweb/db/scheduler/default.py +8 -0
- cobweb/db/scheduler/textfile.py +29 -0
- cobweb/db/storer/__init__.py +0 -0
- cobweb/db/storer/console.py +10 -0
- cobweb/db/storer/loghub.py +55 -0
- cobweb/db/storer/redis.py +16 -0
- cobweb/db/storer/textfile.py +16 -0
- cobweb/distributed/__init__.py +0 -0
- cobweb/distributed/launcher.py +194 -0
- cobweb/distributed/models.py +140 -0
- cobweb/single/__init__.py +0 -0
- cobweb/single/models.py +104 -0
- cobweb/single/nest.py +153 -0
- cobweb_launcher-0.0.1.dist-info/LICENSE +21 -0
- cobweb_launcher-0.0.1.dist-info/METADATA +45 -0
- cobweb_launcher-0.0.1.dist-info/RECORD +37 -0
- cobweb_launcher-0.0.1.dist-info/WHEEL +5 -0
- cobweb_launcher-0.0.1.dist-info/top_level.txt +1 -0
cobweb/base/queue_tmp.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
from typing import Iterable
|
2
|
+
|
3
|
+
# from pympler import asizeof
|
4
|
+
from collections import deque
|
5
|
+
|
6
|
+
|
7
|
+
class Queue:
|
8
|
+
|
9
|
+
def __init__(self):
|
10
|
+
self._seed_queue = deque()
|
11
|
+
|
12
|
+
@property
|
13
|
+
def queue_names(self):
|
14
|
+
return tuple(self.__dict__.keys())
|
15
|
+
|
16
|
+
@property
|
17
|
+
def used_memory(self):
|
18
|
+
return asizeof.asizeof(self)
|
19
|
+
|
20
|
+
def create_queue(self, queue_name: str):
|
21
|
+
self.__setattr__(queue_name, deque())
|
22
|
+
|
23
|
+
def push_seed(self, seed):
|
24
|
+
self.push("_seed_queue", seed)
|
25
|
+
|
26
|
+
def pop_seed(self):
|
27
|
+
return self.pop("_seed_queue")
|
28
|
+
|
29
|
+
def push(self, queue_name: str, data, left: bool = False):
|
30
|
+
try:
|
31
|
+
if not data:
|
32
|
+
return None
|
33
|
+
queue = self.__getattribute__(queue_name)
|
34
|
+
if isinstance(data, Iterable):
|
35
|
+
queue.extend(data) if left else queue.extendleft(data)
|
36
|
+
else:
|
37
|
+
queue.appendleft(data) if left else queue.append(data)
|
38
|
+
except AttributeError as e:
|
39
|
+
print(e)
|
40
|
+
|
41
|
+
def pop(self, queue_name: str, left: bool = True):
|
42
|
+
try:
|
43
|
+
queue = self.__getattribute__(queue_name)
|
44
|
+
return queue.pop() if left else queue.popleft()
|
45
|
+
except IndexError as e:
|
46
|
+
print(e)
|
47
|
+
return None
|
48
|
+
except AttributeError as e:
|
49
|
+
print(e)
|
50
|
+
return None
|
51
|
+
|
52
|
+
|
53
|
+
# qqueue = Queue()
|
54
|
+
# # qqueue.create_queue("test")
|
55
|
+
# print(qqueue.queue_names)
|
56
|
+
# qqueue.push("task_queue", "key")
|
57
|
+
# print(qqueue.used_memory)
|
58
|
+
# c = qqueue.pop("task_queue")
|
59
|
+
# print(c)
|
60
|
+
|
cobweb/base/request.py
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
import random
|
2
|
+
from typing import Union
|
3
|
+
|
4
|
+
import requests
|
5
|
+
|
6
|
+
|
7
|
+
class Request:
|
8
|
+
|
9
|
+
def __init__(self):
|
10
|
+
pass
|
11
|
+
|
12
|
+
|
13
|
+
def gen_user_agent(platform: str = 'android', redis_client=None):
|
14
|
+
user_agent = ''
|
15
|
+
if platform == 'android':
|
16
|
+
os_version = f'{random.randint(4, 10)}.{random.randint(0, 9)}.{random.randint(0, 9)}'
|
17
|
+
model = (redis_client and redis_client.srandmember('(md)set_android_model').decode()) or ''
|
18
|
+
webkit_version = f'{random.randint(450, 550)}.{random.randint(0, 100)}.{random.randint(0, 100)}'
|
19
|
+
version = f'{random.randint(3, 6)}.{random.randint(0, 9)}.{random.randint(0, 9)}'
|
20
|
+
chrome_version = f'{random.randint(50, 88)}.{random.randint(0, 9)}.{random.randint(1000, 5000)}.{random.randint(0, 1000)}'
|
21
|
+
user_agent = f'Mozilla/5.0 (Linux; U; Android {os_version}; zh-cn; {model} Build/{model}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Chrome/{chrome_version} Mobile Safari/{webkit_version}'
|
22
|
+
elif platform == 'iphone':
|
23
|
+
os_version = f'{random.randint(5, 13)}_{random.randint(0, 9)}_{random.randint(0, 9)}'
|
24
|
+
webkit_version = f'{random.randint(550, 650)}.{random.randint(0, 100)}.{random.randint(0, 100)}'
|
25
|
+
version = f'{random.randint(4, 13)}.{random.randint(0, 9)}.{random.randint(0, 9)}'
|
26
|
+
user_agent = f'Mozilla/5.0 (iPhone; CPU iPhone OS {os_version} like Mac OS X) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Mobile Safari/{webkit_version}'
|
27
|
+
|
28
|
+
return user_agent
|
29
|
+
|
30
|
+
|
31
|
+
def config(
|
32
|
+
url,
|
33
|
+
method: str = "GET",
|
34
|
+
headers: dict = None,
|
35
|
+
proxies: dict = None,
|
36
|
+
cookies: dict = None,
|
37
|
+
params: dict = None,
|
38
|
+
timeout: int = None,
|
39
|
+
stream: bool = False,
|
40
|
+
data: Union[dict, str, tuple] = None,
|
41
|
+
) -> dict:
|
42
|
+
if not headers:
|
43
|
+
headers = {"accept": "*/*", "user-agent": gen_user_agent()}
|
44
|
+
|
45
|
+
elif "user-agent" not in [key.lower() for key in headers.keys()]:
|
46
|
+
headers["user-agent"] = gen_user_agent()
|
47
|
+
|
48
|
+
return {
|
49
|
+
"method": method,
|
50
|
+
"url": url,
|
51
|
+
"data": data,
|
52
|
+
"params": params,
|
53
|
+
"cookies": cookies,
|
54
|
+
"headers": headers,
|
55
|
+
"proxies": proxies,
|
56
|
+
"stream": stream,
|
57
|
+
"timeout": timeout or 3,
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
def request(**kwargs):
|
62
|
+
return requests.request(**kwargs)
|
cobweb/base/task.py
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
from config import info
|
2
|
+
|
3
|
+
|
4
|
+
class Task:
|
5
|
+
|
6
|
+
def __init__(
|
7
|
+
self,
|
8
|
+
project=None,
|
9
|
+
task_name=None,
|
10
|
+
start_seed=None,
|
11
|
+
spider_num=None,
|
12
|
+
# queue_length=None,
|
13
|
+
max_retries=None,
|
14
|
+
scheduler_info=None,
|
15
|
+
storer_info=None,
|
16
|
+
redis_info=None
|
17
|
+
):
|
18
|
+
"""
|
19
|
+
|
20
|
+
:param project:
|
21
|
+
:param task_name:
|
22
|
+
:param start_seed:
|
23
|
+
:param spider_num:
|
24
|
+
# :param queue_length:
|
25
|
+
:param scheduler_info:
|
26
|
+
:param storer_info: Union(list, DataInfo/namedtuple), 单个元素构成必须有3个值(数据库类型,表名,字段名)
|
27
|
+
"""
|
28
|
+
self.project = project or "test"
|
29
|
+
self.task_name = task_name or "spider"
|
30
|
+
self.start_seed = start_seed
|
31
|
+
self.spider_num = spider_num or 1
|
32
|
+
self.max_retries = max_retries or 5
|
33
|
+
# self.redis_info = RedisInfo(**(redis_info or dict()))
|
34
|
+
self.redis_info = info(redis_info, tag=0)
|
35
|
+
# self.scheduler_info = SchedulerDB.info(scheduler_info)
|
36
|
+
self.scheduler_info = info(scheduler_info, tag=1)
|
37
|
+
# self.storer_info = StorerDB.info(storer_info)
|
38
|
+
self.storer_info = info(storer_info, tag=2)
|
cobweb/base/utils.py
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
import sys
|
2
|
+
|
3
|
+
|
4
|
+
def struct_table_name(table_name):
|
5
|
+
return table_name.replace(".", "__p__").replace(":", "__c__")
|
6
|
+
|
7
|
+
|
8
|
+
def restore_table_name(table_name):
|
9
|
+
return table_name.replace("__p__", ".").replace("__c__", ":")
|
10
|
+
|
11
|
+
|
12
|
+
def struct_queue_name(db_name, table_name):
|
13
|
+
return sys.intern(f"__{db_name}_{table_name}_queue__")
|
14
|
+
|
15
|
+
|
cobweb/db/__init__.py
ADDED
File without changes
|
File without changes
|
@@ -0,0 +1 @@
|
|
1
|
+
|
cobweb/db/base/oss_db.py
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
import oss2
|
2
|
+
from typing import Union
|
3
|
+
from oss2.models import PartInfo
|
4
|
+
from requests import Response
|
5
|
+
from base.log import log
|
6
|
+
|
7
|
+
|
8
|
+
class OssDB:
|
9
|
+
|
10
|
+
def __init__(
|
11
|
+
self,
|
12
|
+
bucket_name,
|
13
|
+
endpoint,
|
14
|
+
access_key,
|
15
|
+
secret_key,
|
16
|
+
chunk_size,
|
17
|
+
min_size
|
18
|
+
):
|
19
|
+
self.auth = oss2.Auth(
|
20
|
+
access_key_id=access_key,
|
21
|
+
access_key_secret=secret_key
|
22
|
+
)
|
23
|
+
self.bucket = oss2.Bucket(
|
24
|
+
auth=self.auth,
|
25
|
+
endpoint=endpoint,
|
26
|
+
bucket_name=bucket_name
|
27
|
+
)
|
28
|
+
self.chunk_size = chunk_size or 1024 ** 2
|
29
|
+
self.min_size = min_size or 1024
|
30
|
+
|
31
|
+
@staticmethod
|
32
|
+
def format_upload_len(length):
|
33
|
+
if not length:
|
34
|
+
raise ValueError("Length cannot be None or 0")
|
35
|
+
|
36
|
+
units = ["KB", "MB", "GB", "TB"]
|
37
|
+
for i in range(3):
|
38
|
+
num = length / 1024 ** (i + 1)
|
39
|
+
if num >= 1:
|
40
|
+
return f"{round(num, 2)} {units[i]}"
|
41
|
+
|
42
|
+
def assemble(self, ready_data, part_data):
|
43
|
+
upload_data = None
|
44
|
+
ready_data = ready_data + part_data
|
45
|
+
if len(ready_data) >= self.chunk_size:
|
46
|
+
upload_data = ready_data[:self.chunk_size]
|
47
|
+
ready_data = ready_data[self.chunk_size:]
|
48
|
+
|
49
|
+
return ready_data, upload_data
|
50
|
+
|
51
|
+
def iter_data(self, data):
|
52
|
+
if isinstance(data, Response):
|
53
|
+
for part_data in data.iter_content(self.chunk_size):
|
54
|
+
yield part_data
|
55
|
+
if isinstance(data, bytes):
|
56
|
+
for i in range(0, len(data), self.chunk_size):
|
57
|
+
yield data[i:i + self.chunk_size]
|
58
|
+
|
59
|
+
def upload_split(
|
60
|
+
self, oss_path: str,
|
61
|
+
data: Union[bytes, Response],
|
62
|
+
timeout: int = 300,
|
63
|
+
):
|
64
|
+
parts = []
|
65
|
+
upload_id = None
|
66
|
+
ready_data = b""
|
67
|
+
upload_data_len = 0
|
68
|
+
headers = {"Expires": str(timeout * 1000)}
|
69
|
+
try:
|
70
|
+
upload_id = self.bucket.init_multipart_upload(oss_path).upload_id
|
71
|
+
for part_data in self.iter_data(data):
|
72
|
+
upload_data_len += len(part_data)
|
73
|
+
ready_data, upload_data = self.assemble(ready_data, part_data)
|
74
|
+
if upload_data:
|
75
|
+
part_index = len(parts) + 1
|
76
|
+
upload_info = self.bucket.upload_part(
|
77
|
+
oss_path, upload_id, part_index, upload_data
|
78
|
+
)
|
79
|
+
parts.append(PartInfo(part_index, upload_info.etag))
|
80
|
+
|
81
|
+
format_upload = self.format_upload_len(upload_data_len)
|
82
|
+
|
83
|
+
if parts and ready_data:
|
84
|
+
part_index = len(parts) + 1
|
85
|
+
upload_info = self.bucket.upload_part(
|
86
|
+
oss_path, upload_id, part_index, ready_data
|
87
|
+
)
|
88
|
+
parts.append(PartInfo(part_index, upload_info.etag))
|
89
|
+
self.bucket.complete_multipart_upload(
|
90
|
+
oss_path, upload_id, parts
|
91
|
+
)
|
92
|
+
log.info(
|
93
|
+
f"split upload, file path: {oss_path}"
|
94
|
+
f", file size: {format_upload}"
|
95
|
+
)
|
96
|
+
|
97
|
+
elif len(ready_data) > self.min_size:
|
98
|
+
self.bucket.put_object(oss_path, ready_data, headers)
|
99
|
+
log.info(
|
100
|
+
f"upload file, file path: {oss_path}"
|
101
|
+
f", file size: {format_upload}"
|
102
|
+
)
|
103
|
+
|
104
|
+
else:
|
105
|
+
log.info(
|
106
|
+
f"file size smaller than min size! "
|
107
|
+
f"file size: {format_upload}"
|
108
|
+
)
|
109
|
+
status = True
|
110
|
+
except Exception as e:
|
111
|
+
self.bucket.abort_multipart_upload(oss_path, upload_id, headers)
|
112
|
+
log.exception("upload file exception: " + str(e))
|
113
|
+
status = False
|
114
|
+
|
115
|
+
return status
|
116
|
+
|
@@ -0,0 +1,214 @@
|
|
1
|
+
import time
|
2
|
+
import redis
|
3
|
+
from base.bbb import Seed
|
4
|
+
|
5
|
+
|
6
|
+
class RedisDB:
|
7
|
+
|
8
|
+
def __init__(
|
9
|
+
self,
|
10
|
+
project: str,
|
11
|
+
task_name: str,
|
12
|
+
host=None,
|
13
|
+
port=None,
|
14
|
+
username=None,
|
15
|
+
password=None,
|
16
|
+
db=0
|
17
|
+
):
|
18
|
+
pool = redis.ConnectionPool(
|
19
|
+
host=host,
|
20
|
+
port=port,
|
21
|
+
username=username,
|
22
|
+
password=password,
|
23
|
+
db=db
|
24
|
+
)
|
25
|
+
self.heartbeat_key = f"{project}:{task_name}:heartbeat" # redis type string
|
26
|
+
self.spider_key = f"{project}:{task_name}:seed_info:spider" # redis type zset, .format(priority)
|
27
|
+
self.storer_key = f"{project}:{task_name}:seed_info:storer:%s" # redis type set,
|
28
|
+
self.failed_key = f"{project}:{task_name}:seed_info:failed" # redis type set, .format(priority)
|
29
|
+
self.succeed_key = f"{project}:{task_name}:seed_info:succeed" # redis type set, .format(priority)
|
30
|
+
self.update_lock = f"{project}:{task_name}:update_seed_lock" # redis type string
|
31
|
+
self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
|
32
|
+
self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
|
33
|
+
self.client = redis.Redis(connection_pool=pool)
|
34
|
+
|
35
|
+
# pass!
|
36
|
+
def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
|
37
|
+
begin_time = int(time.time())
|
38
|
+
while True:
|
39
|
+
if self.client.setnx(key, ""):
|
40
|
+
self.client.expire(key, t)
|
41
|
+
return True
|
42
|
+
if int(time.time()) - begin_time > timeout:
|
43
|
+
break
|
44
|
+
time.sleep(sleep_time)
|
45
|
+
|
46
|
+
if self.client.ttl(key) == -1:
|
47
|
+
delete_status = True
|
48
|
+
for _ in range(3):
|
49
|
+
if self.client.ttl(key) != -1:
|
50
|
+
delete_status = False
|
51
|
+
break
|
52
|
+
time.sleep(0.5)
|
53
|
+
if delete_status:
|
54
|
+
self.client.expire(key, t)
|
55
|
+
return False
|
56
|
+
else:
|
57
|
+
ttl = self.client.ttl(key)
|
58
|
+
print("ttl: " + str(ttl))
|
59
|
+
return False
|
60
|
+
|
61
|
+
# pass!
|
62
|
+
def _deal_seed(self, seeds, is_add: bool):
|
63
|
+
if not seeds:
|
64
|
+
return None
|
65
|
+
|
66
|
+
if not isinstance(seeds, list):
|
67
|
+
seeds = [seeds]
|
68
|
+
|
69
|
+
item_info = dict()
|
70
|
+
|
71
|
+
for seed in seeds:
|
72
|
+
if not isinstance(seed, Seed):
|
73
|
+
seed = Seed(seed)
|
74
|
+
item_info[seed.format_seed] = seed._priority
|
75
|
+
|
76
|
+
if item_info:
|
77
|
+
self.client.zadd(self.spider_key, mapping=item_info, nx=is_add, xx=not is_add)
|
78
|
+
|
79
|
+
# pass!
|
80
|
+
def add_seed(self, seeds):
|
81
|
+
self._deal_seed(seeds, is_add=True)
|
82
|
+
|
83
|
+
def reset_seed(self, seeds):
|
84
|
+
self._deal_seed(seeds, is_add=False)
|
85
|
+
|
86
|
+
# pass!
|
87
|
+
def del_seed(self, seeds, spider_status: bool = True):
|
88
|
+
if not seeds:
|
89
|
+
return None
|
90
|
+
|
91
|
+
if not isinstance(seeds, list):
|
92
|
+
seeds = [seeds]
|
93
|
+
|
94
|
+
seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
|
95
|
+
|
96
|
+
if seeds:
|
97
|
+
redis_key = self.succeed_key if spider_status else self.failed_key
|
98
|
+
self.client.sadd(redis_key, *(str(seed) for seed in seeds))
|
99
|
+
self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
|
100
|
+
|
101
|
+
# pass!
|
102
|
+
def set_storer(self, key, seeds):
|
103
|
+
if not seeds:
|
104
|
+
return None
|
105
|
+
|
106
|
+
if not isinstance(seeds, list):
|
107
|
+
seeds = [seeds]
|
108
|
+
|
109
|
+
item_info = dict()
|
110
|
+
score = -int(time.time())
|
111
|
+
for seed in seeds:
|
112
|
+
if not isinstance(seed, Seed):
|
113
|
+
seed = Seed(seed)
|
114
|
+
item_info[seed.format_seed] = score
|
115
|
+
|
116
|
+
if item_info:
|
117
|
+
self.client.zadd(self.storer_key % key, mapping=item_info)
|
118
|
+
print("zadd storer key", len(item_info.keys()))
|
119
|
+
|
120
|
+
# pass!
|
121
|
+
def get_seed(self, length: int = 200):
|
122
|
+
cs = time.time()
|
123
|
+
|
124
|
+
if self._get_lock(key=self.update_lock):
|
125
|
+
|
126
|
+
update_item, result = {}, []
|
127
|
+
|
128
|
+
version = int(time.time())
|
129
|
+
|
130
|
+
items = self.client.zrangebyscore(self.spider_key, min=0, max="+inf", start=0, num=length, withscores=True)
|
131
|
+
|
132
|
+
for value, priority in items:
|
133
|
+
score = -(version + int(priority) / 1000)
|
134
|
+
seed = Seed(value, priority=priority, version=version)
|
135
|
+
update_item[value] = score
|
136
|
+
result.append(seed)
|
137
|
+
|
138
|
+
print("\nset seeds into queue time: " + str(time.time() - cs))
|
139
|
+
if result:
|
140
|
+
self.client.zadd(self.spider_key, mapping=update_item, xx=True)
|
141
|
+
|
142
|
+
self.client.delete(self.update_lock)
|
143
|
+
print("push seeds into queue time: " + str(time.time() - cs))
|
144
|
+
return result
|
145
|
+
|
146
|
+
# pass!
|
147
|
+
def check_spider_queue(self, stop, storer_num):
|
148
|
+
while not stop.is_set():
|
149
|
+
# 每15s获取check锁,等待600s后仍获取不到锁则重试;获取到锁后,设置锁的存活时间为15s
|
150
|
+
if self._get_lock(key=self.check_lock, t=5, timeout=600, sleep_time=3):
|
151
|
+
heartbeat = True if self.client.exists(self.heartbeat_key) else False
|
152
|
+
# 重启重制score值,否则获取10分钟前的分数值
|
153
|
+
score = -int(time.time()) + 600 if heartbeat else "-inf"
|
154
|
+
|
155
|
+
keys = self.client.keys(self.storer_key % "*")
|
156
|
+
if len(keys) == storer_num:
|
157
|
+
intersection_key = self.storer_key % "intersection"
|
158
|
+
self.client.delete(intersection_key)
|
159
|
+
self.client.zinterstore(intersection_key, keys)
|
160
|
+
while True:
|
161
|
+
members = self.client.zrange(intersection_key, 0, 1999)
|
162
|
+
if not members:
|
163
|
+
break
|
164
|
+
for key in keys:
|
165
|
+
self.client.zrem(key, *members)
|
166
|
+
self.client.sadd(self.succeed_key, *members)
|
167
|
+
self.client.zrem(self.spider_key, *members)
|
168
|
+
self.client.zrem(intersection_key, *members)
|
169
|
+
print("succeed spider data ...")
|
170
|
+
|
171
|
+
for key in keys:
|
172
|
+
self.client.zremrangebyscore(key, min=score, max="(0")
|
173
|
+
|
174
|
+
while True:
|
175
|
+
items = self.client.zrangebyscore(self.spider_key, min=score, max="(0", start=0, num=5000, withscores=True)
|
176
|
+
if not items:
|
177
|
+
break
|
178
|
+
reset_items = {}
|
179
|
+
for value, priority in items:
|
180
|
+
reset_score = "{:.3f}".format(priority).split(".")[1]
|
181
|
+
reset_items[value] = int(reset_score)
|
182
|
+
if reset_items:
|
183
|
+
self.client.zadd(self.spider_key, mapping=reset_items, xx=True)
|
184
|
+
|
185
|
+
if not heartbeat:
|
186
|
+
self.client.setex(self.heartbeat_key, 15, "")
|
187
|
+
|
188
|
+
self.client.delete(self.check_lock)
|
189
|
+
time.sleep(3)
|
190
|
+
|
191
|
+
# pass!
|
192
|
+
def set_heartbeat(self, stop):
|
193
|
+
time.sleep(5)
|
194
|
+
while not stop.is_set():
|
195
|
+
self.client.setex(self.heartbeat_key, 5, "")
|
196
|
+
time.sleep(3)
|
197
|
+
|
198
|
+
# # pass!
|
199
|
+
# def heartbeat(self):
|
200
|
+
# """
|
201
|
+
# 返回心跳key剩余存活时间
|
202
|
+
# """
|
203
|
+
# return self.client.ttl(self.heartbeat_key)
|
204
|
+
|
205
|
+
# pass!
|
206
|
+
def spider_queue_length(self):
|
207
|
+
return self.client.zcard(self.spider_key)
|
208
|
+
|
209
|
+
# pass!
|
210
|
+
def ready_seed_length(self):
|
211
|
+
return self.client.zcount(self.spider_key, min=0, max="+inf")
|
212
|
+
|
213
|
+
def get_scheduler_lock(self):
|
214
|
+
return self._get_lock(self.scheduler_lock)
|