cobweb-launcher 3.1.8__py3-none-any.whl → 3.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/launchers/launcher.py +1 -1
- cobweb/schedulers/scheduler_with_redis.py +1 -2
- cobweb/utils/__init__.py +1 -1
- cobweb/utils/oss.py +113 -113
- {cobweb_launcher-3.1.8.dist-info → cobweb_launcher-3.1.9.dist-info}/METADATA +1 -1
- {cobweb_launcher-3.1.8.dist-info → cobweb_launcher-3.1.9.dist-info}/RECORD +9 -11
- cobweb/schedulers/launcher_air.py +0 -93
- cobweb/schedulers/launcher_api.py +0 -225
- {cobweb_launcher-3.1.8.dist-info → cobweb_launcher-3.1.9.dist-info}/LICENSE +0 -0
- {cobweb_launcher-3.1.8.dist-info → cobweb_launcher-3.1.9.dist-info}/WHEEL +0 -0
- {cobweb_launcher-3.1.8.dist-info → cobweb_launcher-3.1.9.dist-info}/top_level.txt +0 -0
cobweb/launchers/launcher.py
CHANGED
@@ -147,7 +147,6 @@ class RedisScheduler(Scheduler):
|
|
147
147
|
elif todo_count:
|
148
148
|
logger.info(f"Recovery {self.task} task run!todo seeds count: {todo_count}, queue length: {all_count}")
|
149
149
|
self.pause.clear()
|
150
|
-
# self.execute()
|
151
150
|
else:
|
152
151
|
logger.info("pause! waiting for resume...")
|
153
152
|
else:
|
@@ -173,5 +172,5 @@ class RedisScheduler(Scheduler):
|
|
173
172
|
|
174
173
|
time.sleep(30)
|
175
174
|
|
176
|
-
logger.info("Done!
|
175
|
+
logger.info("Scheduler Done!")
|
177
176
|
|
cobweb/utils/__init__.py
CHANGED
cobweb/utils/oss.py
CHANGED
@@ -1,113 +1,113 @@
|
|
1
|
-
|
2
|
-
from cobweb import setting
|
3
|
-
from requests import Response
|
4
|
-
from oss2 import Auth, Bucket, models, PartIterator
|
5
|
-
from cobweb.exceptions import oss_db_exception
|
6
|
-
from cobweb.utils.decorators import decorator_oss_db
|
7
|
-
|
8
|
-
|
9
|
-
class OssUtil:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
1
|
+
#
|
2
|
+
# from cobweb import setting
|
3
|
+
# from requests import Response
|
4
|
+
# from oss2 import Auth, Bucket, models, PartIterator
|
5
|
+
# from cobweb.exceptions import oss_db_exception
|
6
|
+
# from cobweb.utils.decorators import decorator_oss_db
|
7
|
+
#
|
8
|
+
#
|
9
|
+
# class OssUtil:
|
10
|
+
#
|
11
|
+
# def __init__(
|
12
|
+
# self,
|
13
|
+
# bucket=None,
|
14
|
+
# endpoint=None,
|
15
|
+
# access_key=None,
|
16
|
+
# secret_key=None,
|
17
|
+
# chunk_size=None,
|
18
|
+
# min_upload_size=None,
|
19
|
+
# **kwargs
|
20
|
+
# ):
|
21
|
+
# self.bucket = bucket or setting.OSS_BUCKET
|
22
|
+
# self.endpoint = endpoint or setting.OSS_ENDPOINT
|
23
|
+
# self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
|
24
|
+
# self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
|
25
|
+
#
|
26
|
+
# self.failed_count = 0
|
27
|
+
# self._kw = kwargs
|
28
|
+
#
|
29
|
+
# self._auth = Auth(
|
30
|
+
# access_key_id=access_key or setting.OSS_ACCESS_KEY,
|
31
|
+
# access_key_secret=secret_key or setting.OSS_SECRET_KEY
|
32
|
+
# )
|
33
|
+
# self._client = Bucket(
|
34
|
+
# auth=self._auth,
|
35
|
+
# endpoint=self.endpoint,
|
36
|
+
# bucket_name=self.bucket,
|
37
|
+
# **self._kw
|
38
|
+
# )
|
39
|
+
#
|
40
|
+
# def failed(self):
|
41
|
+
# self.failed_count += 1
|
42
|
+
# if self.failed_count >= 5:
|
43
|
+
# self._client = Bucket(
|
44
|
+
# auth=self._auth,
|
45
|
+
# endpoint=self.endpoint,
|
46
|
+
# bucket_name=self.bucket,
|
47
|
+
# **self._kw
|
48
|
+
# )
|
49
|
+
#
|
50
|
+
# def exists(self, key: str) -> bool:
|
51
|
+
# try:
|
52
|
+
# result = self._client.object_exists(key)
|
53
|
+
# self.failed_count = 0
|
54
|
+
# return result
|
55
|
+
# except Exception as e:
|
56
|
+
# self.failed()
|
57
|
+
# raise e
|
58
|
+
#
|
59
|
+
# def head(self, key: str) -> models.HeadObjectResult:
|
60
|
+
# return self._client.head_object(key)
|
61
|
+
#
|
62
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
|
63
|
+
# def init_part(self, key) -> models.InitMultipartUploadResult:
|
64
|
+
# """初始化分片上传"""
|
65
|
+
# return self._client.init_multipart_upload(key)
|
66
|
+
#
|
67
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
|
68
|
+
# def put(self, key, data) -> models.PutObjectResult:
|
69
|
+
# """文件上传"""
|
70
|
+
# return self._client.put_object(key, data)
|
71
|
+
#
|
72
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
|
73
|
+
# def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
|
74
|
+
# """分片上传"""
|
75
|
+
# return self._client.upload_part(key, upload_id, position, data)
|
76
|
+
#
|
77
|
+
# def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
|
78
|
+
# """获取分片列表"""
|
79
|
+
# return [part_info for part_info in PartIterator(self._client, key, upload_id)]
|
80
|
+
#
|
81
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
|
82
|
+
# def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
|
83
|
+
# """合并分片"""
|
84
|
+
# headers = None if parts else {"x-oss-complete-all": "yes"}
|
85
|
+
# return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
|
86
|
+
#
|
87
|
+
# @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
|
88
|
+
# def append(self, key, position, data) -> models.AppendObjectResult:
|
89
|
+
# """追加上传"""
|
90
|
+
# return self._client.append_object(key, position, data)
|
91
|
+
#
|
92
|
+
# def iter_data(self, data, chunk_size=None):
|
93
|
+
# chunk_size = chunk_size or self.chunk_size
|
94
|
+
# if isinstance(data, Response):
|
95
|
+
# for part_data in data.iter_content(chunk_size):
|
96
|
+
# yield part_data
|
97
|
+
# if isinstance(data, bytes):
|
98
|
+
# for i in range(0, len(data), chunk_size):
|
99
|
+
# yield data[i:i + chunk_size]
|
100
|
+
#
|
101
|
+
# def assemble(self, ready_data, data, chunk_size=None):
|
102
|
+
# upload_data = b""
|
103
|
+
# ready_data = ready_data + data
|
104
|
+
# chunk_size = chunk_size or self.chunk_size
|
105
|
+
# if len(ready_data) >= chunk_size:
|
106
|
+
# upload_data = ready_data[:chunk_size]
|
107
|
+
# ready_data = ready_data[chunk_size:]
|
108
|
+
# return ready_data, upload_data
|
109
|
+
#
|
110
|
+
# def content_length(self, key: str) -> int:
|
111
|
+
# head = self.head(key)
|
112
|
+
# return head.content_length
|
113
|
+
#
|
@@ -17,25 +17,23 @@ cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk
|
|
17
17
|
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
18
18
|
cobweb/launchers/__init__.py,sha256=6_v2jd2sgj6YnOB1nPKiYBskuXVb5xpQnq2YaDGJgQ8,100
|
19
19
|
cobweb/launchers/distributor.py,sha256=yZJ7d8EXZGukRr8SzcxXZu1z37VxKqBWE1apNW9T9uE,6429
|
20
|
-
cobweb/launchers/launcher.py,sha256=
|
20
|
+
cobweb/launchers/launcher.py,sha256=HJJgVIh2vAfG2wnU_yHEN_v1tT_qpz2F4JmkcBoZvCE,5344
|
21
21
|
cobweb/launchers/uploader.py,sha256=ePQsGxoFw3Le4DgkoDGDqxrv8lGaYKCnGq2nuaWba9o,1771
|
22
22
|
cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
|
23
23
|
cobweb/pipelines/pipeline.py,sha256=qwoOYMhlAB-MnEmMNpNeauTHoRTOr2wyBDYS4MF6B1c,261
|
24
24
|
cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
|
25
25
|
cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
|
26
26
|
cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0,49
|
27
|
-
cobweb/schedulers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
|
28
|
-
cobweb/schedulers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
|
29
27
|
cobweb/schedulers/scheduler.py,sha256=mN9XvaOCzNnBWQfzslTIM860ZGq2gyLtxpbVUd0Slqs,2240
|
30
|
-
cobweb/schedulers/scheduler_with_redis.py,sha256=
|
31
|
-
cobweb/utils/__init__.py,sha256=
|
28
|
+
cobweb/schedulers/scheduler_with_redis.py,sha256=EUsewtCDxgtgNgv2ogQus56GNv9IujkNLcpki-LPQzs,6343
|
29
|
+
cobweb/utils/__init__.py,sha256=yqIU08RcBRa2JZDr1PsrnXl473Ni5JqhagWIQvyGsc8,147
|
32
30
|
cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
33
31
|
cobweb/utils/decorators.py,sha256=066JCY_RNMr2mXkhEv8XTtOOKkv9CFiBm0ZNCcC-2ag,1131
|
34
32
|
cobweb/utils/dotting.py,sha256=x34WryNKu_o54EzVwAZByagjMTXF0ZP0VYKdjfYEdSM,1833
|
35
|
-
cobweb/utils/oss.py,sha256=
|
33
|
+
cobweb/utils/oss.py,sha256=wmToIIVNO8nCQVRmreVaZejk01aCWS35e1NV6cr0yGI,4192
|
36
34
|
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
37
|
-
cobweb_launcher-3.1.
|
38
|
-
cobweb_launcher-3.1.
|
39
|
-
cobweb_launcher-3.1.
|
40
|
-
cobweb_launcher-3.1.
|
41
|
-
cobweb_launcher-3.1.
|
35
|
+
cobweb_launcher-3.1.9.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
36
|
+
cobweb_launcher-3.1.9.dist-info/METADATA,sha256=eCmqW-nWkaL_ez6C7aTCw7u-gcTsYd3Y6b3IK6I0akY,6509
|
37
|
+
cobweb_launcher-3.1.9.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
38
|
+
cobweb_launcher-3.1.9.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
39
|
+
cobweb_launcher-3.1.9.dist-info/RECORD,,
|
@@ -1,93 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
|
3
|
-
from cobweb.base import logger
|
4
|
-
from cobweb.constant import LogTemplate
|
5
|
-
from .launcher import Launcher, check_pause
|
6
|
-
|
7
|
-
|
8
|
-
class LauncherAir(Launcher):
|
9
|
-
|
10
|
-
# def _scheduler(self):
|
11
|
-
# if self.start_seeds:
|
12
|
-
# self.__LAUNCHER_QUEUE__['todo'].push(self.start_seeds)
|
13
|
-
|
14
|
-
@check_pause
|
15
|
-
def _insert(self):
|
16
|
-
new_seeds = {}
|
17
|
-
del_seeds = set()
|
18
|
-
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
19
|
-
for _ in range(self._new_queue_max_size):
|
20
|
-
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
21
|
-
if not seed_tuple:
|
22
|
-
break
|
23
|
-
seed, new_seed = seed_tuple
|
24
|
-
new_seeds[new_seed.to_string] = new_seed.params.priority
|
25
|
-
del_seeds.add(seed.to_string)
|
26
|
-
if new_seeds:
|
27
|
-
self.__LAUNCHER_QUEUE__['todo'].push(new_seeds)
|
28
|
-
if del_seeds:
|
29
|
-
self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
|
30
|
-
if status:
|
31
|
-
time.sleep(self._new_queue_wait_seconds)
|
32
|
-
|
33
|
-
@check_pause
|
34
|
-
def _delete(self):
|
35
|
-
seeds = []
|
36
|
-
status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
|
37
|
-
|
38
|
-
for _ in range(self._done_queue_max_size):
|
39
|
-
seed = self.__LAUNCHER_QUEUE__['done'].pop()
|
40
|
-
if not seed:
|
41
|
-
break
|
42
|
-
seeds.append(seed.to_string)
|
43
|
-
|
44
|
-
if seeds:
|
45
|
-
self._remove_doing_seeds(seeds)
|
46
|
-
|
47
|
-
if status:
|
48
|
-
time.sleep(self._done_queue_wait_seconds)
|
49
|
-
|
50
|
-
def _polling(self):
|
51
|
-
|
52
|
-
check_emtpy_times = 0
|
53
|
-
|
54
|
-
while not self._stop.is_set():
|
55
|
-
|
56
|
-
queue_not_empty_count = 0
|
57
|
-
pooling_wait_seconds = 30
|
58
|
-
|
59
|
-
for q in self.__LAUNCHER_QUEUE__.values():
|
60
|
-
if q.length != 0:
|
61
|
-
queue_not_empty_count += 1
|
62
|
-
|
63
|
-
if queue_not_empty_count == 0:
|
64
|
-
pooling_wait_seconds = 3
|
65
|
-
if self._pause.is_set():
|
66
|
-
check_emtpy_times = 0
|
67
|
-
if not self._task_model:
|
68
|
-
logger.info("Done! Ready to close thread...")
|
69
|
-
self._stop.set()
|
70
|
-
elif check_emtpy_times > 2:
|
71
|
-
self.__DOING__ = {}
|
72
|
-
self._pause.set()
|
73
|
-
else:
|
74
|
-
logger.info(
|
75
|
-
"check whether the task is complete, "
|
76
|
-
f"reset times {3 - check_emtpy_times}"
|
77
|
-
)
|
78
|
-
check_emtpy_times += 1
|
79
|
-
elif self._pause.is_set():
|
80
|
-
self._pause.clear()
|
81
|
-
self._execute()
|
82
|
-
else:
|
83
|
-
logger.info(LogTemplate.launcher_air_polling.format(
|
84
|
-
task=self.task,
|
85
|
-
doing_len=len(self.__DOING__.keys()),
|
86
|
-
todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
|
87
|
-
done_len=self.__LAUNCHER_QUEUE__['done'].length,
|
88
|
-
upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
|
89
|
-
))
|
90
|
-
|
91
|
-
time.sleep(pooling_wait_seconds)
|
92
|
-
|
93
|
-
|
@@ -1,225 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
import threading
|
3
|
-
|
4
|
-
from cobweb.db import ApiDB
|
5
|
-
from cobweb.base import Seed, logger
|
6
|
-
from cobweb.constant import DealModel, LogTemplate
|
7
|
-
from .launcher import Launcher, check_pause
|
8
|
-
|
9
|
-
|
10
|
-
class LauncherApi(Launcher):
|
11
|
-
|
12
|
-
def __init__(self, task, project, custom_setting=None, **kwargs):
|
13
|
-
super().__init__(task, project, custom_setting, **kwargs)
|
14
|
-
self._db = ApiDB()
|
15
|
-
|
16
|
-
self._todo_key = "{%s:%s}:todo" % (project, task)
|
17
|
-
self._done_key = "{%s:%s}:done" % (project, task)
|
18
|
-
self._fail_key = "{%s:%s}:fail" % (project, task)
|
19
|
-
self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
|
20
|
-
|
21
|
-
self._statistics_done_key = "statistics:%s:%s:done" % (project, task)
|
22
|
-
self._statistics_fail_key = "statistics:%s:%s:fail" % (project, task)
|
23
|
-
self._speed_control_key = "speed_control:%s_%s" % (project, task)
|
24
|
-
|
25
|
-
self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
|
26
|
-
|
27
|
-
# self._bf_key = "bloom_%s_%s" % (project, task)
|
28
|
-
# self._bf = BloomFilter(self._bf_key)
|
29
|
-
|
30
|
-
self._heartbeat_start_event = threading.Event()
|
31
|
-
self._redis_queue_empty_event = threading.Event()
|
32
|
-
|
33
|
-
@property
|
34
|
-
def heartbeat(self):
|
35
|
-
return self._db.exists(self._heartbeat_key)
|
36
|
-
|
37
|
-
def statistics(self, key, count):
|
38
|
-
if not self._task_model and not self._db.exists(key):
|
39
|
-
self._db.setex(key, 86400 * 30, int(count))
|
40
|
-
else:
|
41
|
-
self._db.incrby(key, count)
|
42
|
-
|
43
|
-
def _get_seed(self) -> Seed:
|
44
|
-
"""
|
45
|
-
从队列中获取种子(频控)
|
46
|
-
设置时间窗口为self._time_window(秒),判断在该窗口内的采集量是否满足阈值(self._spider_max_speed)
|
47
|
-
:return: True -> 种子, False -> None
|
48
|
-
"""
|
49
|
-
if (self._speed_control and self.__LAUNCHER_QUEUE__["todo"].length and
|
50
|
-
not self._db.auto_incr(self._speed_control_key, t=self._time_window, limit=self._spider_max_count)):
|
51
|
-
expire_time = self._db.ttl(self._speed_control_key)
|
52
|
-
if isinstance(expire_time, int) and expire_time <= -1:
|
53
|
-
self._db.delete(self._speed_control_key)
|
54
|
-
elif isinstance(expire_time, int):
|
55
|
-
logger.info(f"Too fast! Please wait {expire_time} seconds...")
|
56
|
-
time.sleep(expire_time / 2)
|
57
|
-
return None
|
58
|
-
seed = self.__LAUNCHER_QUEUE__["todo"].pop()
|
59
|
-
return seed
|
60
|
-
|
61
|
-
@check_pause
|
62
|
-
def _execute_heartbeat(self):
|
63
|
-
if self._heartbeat_start_event.is_set():
|
64
|
-
self._db.setex(self._heartbeat_key, 5)
|
65
|
-
time.sleep(3)
|
66
|
-
|
67
|
-
@check_pause
|
68
|
-
def _reset(self):
|
69
|
-
"""
|
70
|
-
检查过期种子,重新添加到redis缓存中
|
71
|
-
"""
|
72
|
-
reset_wait_seconds = 30
|
73
|
-
if self._db.lock(self._reset_lock_key, t=120):
|
74
|
-
|
75
|
-
_min = -int(time.time()) + self._seed_reset_seconds \
|
76
|
-
if self.heartbeat else "-inf"
|
77
|
-
|
78
|
-
self._db.members(self._todo_key, 0, _min=_min, _max="(0")
|
79
|
-
|
80
|
-
if not self.heartbeat:
|
81
|
-
self._heartbeat_start_event.set()
|
82
|
-
|
83
|
-
self._db.delete(self._reset_lock_key)
|
84
|
-
|
85
|
-
time.sleep(reset_wait_seconds)
|
86
|
-
|
87
|
-
@check_pause
|
88
|
-
def _scheduler(self):
|
89
|
-
"""
|
90
|
-
调度任务,获取redis队列种子,同时添加到doing字典中
|
91
|
-
"""
|
92
|
-
if not self._db.zcount(self._todo_key, 0, "(1000"):
|
93
|
-
time.sleep(self._scheduler_wait_seconds)
|
94
|
-
elif self.__LAUNCHER_QUEUE__['todo'].length >= self._todo_queue_size:
|
95
|
-
time.sleep(self._todo_queue_full_wait_seconds)
|
96
|
-
else:
|
97
|
-
members = self._db.members(
|
98
|
-
self._todo_key, int(time.time()),
|
99
|
-
count=self._todo_queue_size,
|
100
|
-
_min=0, _max="(1000"
|
101
|
-
)
|
102
|
-
for member, priority in members:
|
103
|
-
seed = Seed(member, priority=priority)
|
104
|
-
self.__LAUNCHER_QUEUE__['todo'].push(seed)
|
105
|
-
self.__DOING__[seed.to_string] = seed.params.priority
|
106
|
-
|
107
|
-
@check_pause
|
108
|
-
def _insert(self):
|
109
|
-
"""
|
110
|
-
添加新种子到redis队列中
|
111
|
-
"""
|
112
|
-
new_seeds = {}
|
113
|
-
del_seeds = set()
|
114
|
-
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
115
|
-
for _ in range(self._new_queue_max_size):
|
116
|
-
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
117
|
-
if not seed_tuple:
|
118
|
-
break
|
119
|
-
seed, new_seed = seed_tuple
|
120
|
-
new_seeds[new_seed.to_string] = new_seed.params.priority
|
121
|
-
del_seeds.add(seed)
|
122
|
-
if new_seeds:
|
123
|
-
self._db.zadd(self._todo_key, new_seeds, nx=True)
|
124
|
-
if del_seeds:
|
125
|
-
self.__LAUNCHER_QUEUE__['done'].push(list(del_seeds))
|
126
|
-
if status:
|
127
|
-
time.sleep(self._new_queue_wait_seconds)
|
128
|
-
|
129
|
-
@check_pause
|
130
|
-
def _refresh(self):
|
131
|
-
"""
|
132
|
-
刷新doing种子过期时间,防止reset重新消费
|
133
|
-
"""
|
134
|
-
if self.__DOING__:
|
135
|
-
refresh_time = int(time.time())
|
136
|
-
seeds = {k:-refresh_time - v / 1000 for k, v in self.__DOING__.items()}
|
137
|
-
self._db.zadd(self._todo_key, item=seeds, xx=True)
|
138
|
-
time.sleep(15)
|
139
|
-
|
140
|
-
@check_pause
|
141
|
-
def _delete(self):
|
142
|
-
"""
|
143
|
-
删除队列种子,根据状态添加至成功或失败队列,移除doing字典种子索引
|
144
|
-
"""
|
145
|
-
# seed_info = {"count": 0, "failed": [], "succeed": [], "common": []}
|
146
|
-
|
147
|
-
seed_list = []
|
148
|
-
status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
|
149
|
-
|
150
|
-
for _ in range(self._done_queue_max_size):
|
151
|
-
seed = self.__LAUNCHER_QUEUE__['done'].pop()
|
152
|
-
if not seed:
|
153
|
-
break
|
154
|
-
seed_list.append(seed.to_string)
|
155
|
-
|
156
|
-
if seed_list:
|
157
|
-
|
158
|
-
self._db.zrem(self._todo_key, *seed_list)
|
159
|
-
self._remove_doing_seeds(seed_list)
|
160
|
-
|
161
|
-
if status:
|
162
|
-
time.sleep(self._done_queue_wait_seconds)
|
163
|
-
|
164
|
-
def _polling(self):
|
165
|
-
wait_scheduler_execute = True
|
166
|
-
check_emtpy_times = 0
|
167
|
-
while not self._stop.is_set():
|
168
|
-
queue_not_empty_count = 0
|
169
|
-
pooling_wait_seconds = 30
|
170
|
-
|
171
|
-
for q in self.__LAUNCHER_QUEUE__.values():
|
172
|
-
if q.length != 0:
|
173
|
-
queue_not_empty_count += 1
|
174
|
-
wait_scheduler_execute = False
|
175
|
-
|
176
|
-
if queue_not_empty_count == 0:
|
177
|
-
pooling_wait_seconds = 3
|
178
|
-
if self._pause.is_set():
|
179
|
-
check_emtpy_times = 0
|
180
|
-
if not self._task_model and (
|
181
|
-
not wait_scheduler_execute or
|
182
|
-
int(time.time()) - self._app_time > self._before_scheduler_wait_seconds
|
183
|
-
):
|
184
|
-
logger.info("Done! ready to close thread...")
|
185
|
-
self._stop.set()
|
186
|
-
|
187
|
-
elif self._db.zcount(self._todo_key, _min=0, _max="(1000"):
|
188
|
-
logger.info(f"Recovery {self.task} task run!")
|
189
|
-
self._pause.clear()
|
190
|
-
self._execute()
|
191
|
-
else:
|
192
|
-
logger.info("pause! waiting for resume...")
|
193
|
-
elif check_emtpy_times > 2:
|
194
|
-
self.__DOING__ = {}
|
195
|
-
seed_count = self._db.zcard(self._todo_key)
|
196
|
-
logger.info(f"队列剩余种子数:{seed_count}")
|
197
|
-
if not seed_count:
|
198
|
-
logger.info("Done! pause set...")
|
199
|
-
self._pause.set()
|
200
|
-
else:
|
201
|
-
self._pause.clear()
|
202
|
-
else:
|
203
|
-
logger.info(
|
204
|
-
"check whether the task is complete, "
|
205
|
-
f"reset times {3 - check_emtpy_times}"
|
206
|
-
)
|
207
|
-
check_emtpy_times += 1
|
208
|
-
else:
|
209
|
-
if self._pause.is_set():
|
210
|
-
self._pause.clear()
|
211
|
-
logger.info(LogTemplate.launcher_pro_polling.format(
|
212
|
-
task=self.task,
|
213
|
-
doing_len=len(self.__DOING__.keys()),
|
214
|
-
todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
|
215
|
-
done_len=self.__LAUNCHER_QUEUE__['done'].length,
|
216
|
-
redis_seed_count=self._db.zcount(self._todo_key, "-inf", "+inf"),
|
217
|
-
redis_todo_len=self._db.zcount(self._todo_key, 0, "(1000"),
|
218
|
-
redis_doing_len=self._db.zcount(self._todo_key, "-inf", "(0"),
|
219
|
-
upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
|
220
|
-
))
|
221
|
-
|
222
|
-
time.sleep(pooling_wait_seconds)
|
223
|
-
|
224
|
-
logger.info("Done! Ready to close thread...")
|
225
|
-
|
File without changes
|
File without changes
|
File without changes
|