cobweb-launcher 3.1.8__py3-none-any.whl → 3.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -128,7 +128,7 @@ class Launcher:
128
128
  self.__WORKER_THREAD__[name] = worker_thread
129
129
  worker_thread.start()
130
130
  time.sleep(3)
131
- logger.info("monitor thread close ...")
131
+ logger.info("main thread close!")
132
132
 
133
133
  def start(self):
134
134
  self._pause.is_set()
@@ -147,7 +147,6 @@ class RedisScheduler(Scheduler):
147
147
  elif todo_count:
148
148
  logger.info(f"Recovery {self.task} task run!todo seeds count: {todo_count}, queue length: {all_count}")
149
149
  self.pause.clear()
150
- # self.execute()
151
150
  else:
152
151
  logger.info("pause! waiting for resume...")
153
152
  else:
@@ -173,5 +172,5 @@ class RedisScheduler(Scheduler):
173
172
 
174
173
  time.sleep(30)
175
174
 
176
- logger.info("Done! Ready to close thread...")
175
+ logger.info("Scheduler Done!")
177
176
 
cobweb/utils/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .oss import OssUtil
1
+ # from .oss import OssUtil
2
2
  from .tools import *
3
3
  from .bloom import BloomFilter
4
4
  from .dotting import LoghubDot
cobweb/utils/oss.py CHANGED
@@ -1,113 +1,113 @@
1
-
2
- from cobweb import setting
3
- from requests import Response
4
- from oss2 import Auth, Bucket, models, PartIterator
5
- from cobweb.exceptions import oss_db_exception
6
- from cobweb.utils.decorators import decorator_oss_db
7
-
8
-
9
- class OssUtil:
10
-
11
- def __init__(
12
- self,
13
- bucket=None,
14
- endpoint=None,
15
- access_key=None,
16
- secret_key=None,
17
- chunk_size=None,
18
- min_upload_size=None,
19
- **kwargs
20
- ):
21
- self.bucket = bucket or setting.OSS_BUCKET
22
- self.endpoint = endpoint or setting.OSS_ENDPOINT
23
- self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
24
- self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
25
-
26
- self.failed_count = 0
27
- self._kw = kwargs
28
-
29
- self._auth = Auth(
30
- access_key_id=access_key or setting.OSS_ACCESS_KEY,
31
- access_key_secret=secret_key or setting.OSS_SECRET_KEY
32
- )
33
- self._client = Bucket(
34
- auth=self._auth,
35
- endpoint=self.endpoint,
36
- bucket_name=self.bucket,
37
- **self._kw
38
- )
39
-
40
- def failed(self):
41
- self.failed_count += 1
42
- if self.failed_count >= 5:
43
- self._client = Bucket(
44
- auth=self._auth,
45
- endpoint=self.endpoint,
46
- bucket_name=self.bucket,
47
- **self._kw
48
- )
49
-
50
- def exists(self, key: str) -> bool:
51
- try:
52
- result = self._client.object_exists(key)
53
- self.failed_count = 0
54
- return result
55
- except Exception as e:
56
- self.failed()
57
- raise e
58
-
59
- def head(self, key: str) -> models.HeadObjectResult:
60
- return self._client.head_object(key)
61
-
62
- @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
63
- def init_part(self, key) -> models.InitMultipartUploadResult:
64
- """初始化分片上传"""
65
- return self._client.init_multipart_upload(key)
66
-
67
- @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
68
- def put(self, key, data) -> models.PutObjectResult:
69
- """文件上传"""
70
- return self._client.put_object(key, data)
71
-
72
- @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
73
- def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
74
- """分片上传"""
75
- return self._client.upload_part(key, upload_id, position, data)
76
-
77
- def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
78
- """获取分片列表"""
79
- return [part_info for part_info in PartIterator(self._client, key, upload_id)]
80
-
81
- @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
82
- def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
83
- """合并分片"""
84
- headers = None if parts else {"x-oss-complete-all": "yes"}
85
- return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
86
-
87
- @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
88
- def append(self, key, position, data) -> models.AppendObjectResult:
89
- """追加上传"""
90
- return self._client.append_object(key, position, data)
91
-
92
- def iter_data(self, data, chunk_size=None):
93
- chunk_size = chunk_size or self.chunk_size
94
- if isinstance(data, Response):
95
- for part_data in data.iter_content(chunk_size):
96
- yield part_data
97
- if isinstance(data, bytes):
98
- for i in range(0, len(data), chunk_size):
99
- yield data[i:i + chunk_size]
100
-
101
- def assemble(self, ready_data, data, chunk_size=None):
102
- upload_data = b""
103
- ready_data = ready_data + data
104
- chunk_size = chunk_size or self.chunk_size
105
- if len(ready_data) >= chunk_size:
106
- upload_data = ready_data[:chunk_size]
107
- ready_data = ready_data[chunk_size:]
108
- return ready_data, upload_data
109
-
110
- def content_length(self, key: str) -> int:
111
- head = self.head(key)
112
- return head.content_length
113
-
1
+ #
2
+ # from cobweb import setting
3
+ # from requests import Response
4
+ # from oss2 import Auth, Bucket, models, PartIterator
5
+ # from cobweb.exceptions import oss_db_exception
6
+ # from cobweb.utils.decorators import decorator_oss_db
7
+ #
8
+ #
9
+ # class OssUtil:
10
+ #
11
+ # def __init__(
12
+ # self,
13
+ # bucket=None,
14
+ # endpoint=None,
15
+ # access_key=None,
16
+ # secret_key=None,
17
+ # chunk_size=None,
18
+ # min_upload_size=None,
19
+ # **kwargs
20
+ # ):
21
+ # self.bucket = bucket or setting.OSS_BUCKET
22
+ # self.endpoint = endpoint or setting.OSS_ENDPOINT
23
+ # self.chunk_size = int(chunk_size or setting.OSS_CHUNK_SIZE)
24
+ # self.min_upload_size = int(min_upload_size or setting.OSS_MIN_UPLOAD_SIZE)
25
+ #
26
+ # self.failed_count = 0
27
+ # self._kw = kwargs
28
+ #
29
+ # self._auth = Auth(
30
+ # access_key_id=access_key or setting.OSS_ACCESS_KEY,
31
+ # access_key_secret=secret_key or setting.OSS_SECRET_KEY
32
+ # )
33
+ # self._client = Bucket(
34
+ # auth=self._auth,
35
+ # endpoint=self.endpoint,
36
+ # bucket_name=self.bucket,
37
+ # **self._kw
38
+ # )
39
+ #
40
+ # def failed(self):
41
+ # self.failed_count += 1
42
+ # if self.failed_count >= 5:
43
+ # self._client = Bucket(
44
+ # auth=self._auth,
45
+ # endpoint=self.endpoint,
46
+ # bucket_name=self.bucket,
47
+ # **self._kw
48
+ # )
49
+ #
50
+ # def exists(self, key: str) -> bool:
51
+ # try:
52
+ # result = self._client.object_exists(key)
53
+ # self.failed_count = 0
54
+ # return result
55
+ # except Exception as e:
56
+ # self.failed()
57
+ # raise e
58
+ #
59
+ # def head(self, key: str) -> models.HeadObjectResult:
60
+ # return self._client.head_object(key)
61
+ #
62
+ # @decorator_oss_db(exception=oss_db_exception.OssDBInitPartError)
63
+ # def init_part(self, key) -> models.InitMultipartUploadResult:
64
+ # """初始化分片上传"""
65
+ # return self._client.init_multipart_upload(key)
66
+ #
67
+ # @decorator_oss_db(exception=oss_db_exception.OssDBPutObjError)
68
+ # def put(self, key, data) -> models.PutObjectResult:
69
+ # """文件上传"""
70
+ # return self._client.put_object(key, data)
71
+ #
72
+ # @decorator_oss_db(exception=oss_db_exception.OssDBPutPartError)
73
+ # def put_part(self, key, upload_id, position, data) -> models.PutObjectResult:
74
+ # """分片上传"""
75
+ # return self._client.upload_part(key, upload_id, position, data)
76
+ #
77
+ # def list_part(self, key, upload_id): # -> List[models.ListPartsResult]:
78
+ # """获取分片列表"""
79
+ # return [part_info for part_info in PartIterator(self._client, key, upload_id)]
80
+ #
81
+ # @decorator_oss_db(exception=oss_db_exception.OssDBMergeError)
82
+ # def merge(self, key, upload_id, parts=None) -> models.PutObjectResult:
83
+ # """合并分片"""
84
+ # headers = None if parts else {"x-oss-complete-all": "yes"}
85
+ # return self._client.complete_multipart_upload(key, upload_id, parts, headers=headers)
86
+ #
87
+ # @decorator_oss_db(exception=oss_db_exception.OssDBAppendObjError)
88
+ # def append(self, key, position, data) -> models.AppendObjectResult:
89
+ # """追加上传"""
90
+ # return self._client.append_object(key, position, data)
91
+ #
92
+ # def iter_data(self, data, chunk_size=None):
93
+ # chunk_size = chunk_size or self.chunk_size
94
+ # if isinstance(data, Response):
95
+ # for part_data in data.iter_content(chunk_size):
96
+ # yield part_data
97
+ # if isinstance(data, bytes):
98
+ # for i in range(0, len(data), chunk_size):
99
+ # yield data[i:i + chunk_size]
100
+ #
101
+ # def assemble(self, ready_data, data, chunk_size=None):
102
+ # upload_data = b""
103
+ # ready_data = ready_data + data
104
+ # chunk_size = chunk_size or self.chunk_size
105
+ # if len(ready_data) >= chunk_size:
106
+ # upload_data = ready_data[:chunk_size]
107
+ # ready_data = ready_data[chunk_size:]
108
+ # return ready_data, upload_data
109
+ #
110
+ # def content_length(self, key: str) -> int:
111
+ # head = self.head(key)
112
+ # return head.content_length
113
+ #
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 3.1.8
3
+ Version: 3.1.9
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -17,25 +17,23 @@ cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk
17
17
  cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
18
18
  cobweb/launchers/__init__.py,sha256=6_v2jd2sgj6YnOB1nPKiYBskuXVb5xpQnq2YaDGJgQ8,100
19
19
  cobweb/launchers/distributor.py,sha256=yZJ7d8EXZGukRr8SzcxXZu1z37VxKqBWE1apNW9T9uE,6429
20
- cobweb/launchers/launcher.py,sha256=zP0l-5QXxoZauh2bnIuHUJ6L5QI0bbgXvhEN49Ff43g,5350
20
+ cobweb/launchers/launcher.py,sha256=HJJgVIh2vAfG2wnU_yHEN_v1tT_qpz2F4JmkcBoZvCE,5344
21
21
  cobweb/launchers/uploader.py,sha256=ePQsGxoFw3Le4DgkoDGDqxrv8lGaYKCnGq2nuaWba9o,1771
22
22
  cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
23
23
  cobweb/pipelines/pipeline.py,sha256=qwoOYMhlAB-MnEmMNpNeauTHoRTOr2wyBDYS4MF6B1c,261
24
24
  cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
25
25
  cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
26
26
  cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0,49
27
- cobweb/schedulers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
28
- cobweb/schedulers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
29
27
  cobweb/schedulers/scheduler.py,sha256=mN9XvaOCzNnBWQfzslTIM860ZGq2gyLtxpbVUd0Slqs,2240
30
- cobweb/schedulers/scheduler_with_redis.py,sha256=Qq8iMxbDWJpRml76NfcZyyB8q7tM2Et0YrOcF7nwcP0,6399
31
- cobweb/utils/__init__.py,sha256=8Bu5iZrIOUMS4jv4hi0inRPtscf6MK0ZFa7gQ7ZFoqw,145
28
+ cobweb/schedulers/scheduler_with_redis.py,sha256=EUsewtCDxgtgNgv2ogQus56GNv9IujkNLcpki-LPQzs,6343
29
+ cobweb/utils/__init__.py,sha256=yqIU08RcBRa2JZDr1PsrnXl473Ni5JqhagWIQvyGsc8,147
32
30
  cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
33
31
  cobweb/utils/decorators.py,sha256=066JCY_RNMr2mXkhEv8XTtOOKkv9CFiBm0ZNCcC-2ag,1131
34
32
  cobweb/utils/dotting.py,sha256=x34WryNKu_o54EzVwAZByagjMTXF0ZP0VYKdjfYEdSM,1833
35
- cobweb/utils/oss.py,sha256=ktfwMGnq5FMzOkUUS3nNXI7pTdPNinudH3YRJ0nMhoU,3985
33
+ cobweb/utils/oss.py,sha256=wmToIIVNO8nCQVRmreVaZejk01aCWS35e1NV6cr0yGI,4192
36
34
  cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
37
- cobweb_launcher-3.1.8.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
38
- cobweb_launcher-3.1.8.dist-info/METADATA,sha256=dHvMv0I_533y0KCp5RSVT87mqbznPgWakYtN3OeN3KQ,6509
39
- cobweb_launcher-3.1.8.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
40
- cobweb_launcher-3.1.8.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
41
- cobweb_launcher-3.1.8.dist-info/RECORD,,
35
+ cobweb_launcher-3.1.9.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
36
+ cobweb_launcher-3.1.9.dist-info/METADATA,sha256=eCmqW-nWkaL_ez6C7aTCw7u-gcTsYd3Y6b3IK6I0akY,6509
37
+ cobweb_launcher-3.1.9.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
38
+ cobweb_launcher-3.1.9.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
39
+ cobweb_launcher-3.1.9.dist-info/RECORD,,
@@ -1,93 +0,0 @@
1
- import time
2
-
3
- from cobweb.base import logger
4
- from cobweb.constant import LogTemplate
5
- from .launcher import Launcher, check_pause
6
-
7
-
8
- class LauncherAir(Launcher):
9
-
10
- # def _scheduler(self):
11
- # if self.start_seeds:
12
- # self.__LAUNCHER_QUEUE__['todo'].push(self.start_seeds)
13
-
14
- @check_pause
15
- def _insert(self):
16
- new_seeds = {}
17
- del_seeds = set()
18
- status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
19
- for _ in range(self._new_queue_max_size):
20
- seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
21
- if not seed_tuple:
22
- break
23
- seed, new_seed = seed_tuple
24
- new_seeds[new_seed.to_string] = new_seed.params.priority
25
- del_seeds.add(seed.to_string)
26
- if new_seeds:
27
- self.__LAUNCHER_QUEUE__['todo'].push(new_seeds)
28
- if del_seeds:
29
- self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
30
- if status:
31
- time.sleep(self._new_queue_wait_seconds)
32
-
33
- @check_pause
34
- def _delete(self):
35
- seeds = []
36
- status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
37
-
38
- for _ in range(self._done_queue_max_size):
39
- seed = self.__LAUNCHER_QUEUE__['done'].pop()
40
- if not seed:
41
- break
42
- seeds.append(seed.to_string)
43
-
44
- if seeds:
45
- self._remove_doing_seeds(seeds)
46
-
47
- if status:
48
- time.sleep(self._done_queue_wait_seconds)
49
-
50
- def _polling(self):
51
-
52
- check_emtpy_times = 0
53
-
54
- while not self._stop.is_set():
55
-
56
- queue_not_empty_count = 0
57
- pooling_wait_seconds = 30
58
-
59
- for q in self.__LAUNCHER_QUEUE__.values():
60
- if q.length != 0:
61
- queue_not_empty_count += 1
62
-
63
- if queue_not_empty_count == 0:
64
- pooling_wait_seconds = 3
65
- if self._pause.is_set():
66
- check_emtpy_times = 0
67
- if not self._task_model:
68
- logger.info("Done! Ready to close thread...")
69
- self._stop.set()
70
- elif check_emtpy_times > 2:
71
- self.__DOING__ = {}
72
- self._pause.set()
73
- else:
74
- logger.info(
75
- "check whether the task is complete, "
76
- f"reset times {3 - check_emtpy_times}"
77
- )
78
- check_emtpy_times += 1
79
- elif self._pause.is_set():
80
- self._pause.clear()
81
- self._execute()
82
- else:
83
- logger.info(LogTemplate.launcher_air_polling.format(
84
- task=self.task,
85
- doing_len=len(self.__DOING__.keys()),
86
- todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
87
- done_len=self.__LAUNCHER_QUEUE__['done'].length,
88
- upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
89
- ))
90
-
91
- time.sleep(pooling_wait_seconds)
92
-
93
-
@@ -1,225 +0,0 @@
1
- import time
2
- import threading
3
-
4
- from cobweb.db import ApiDB
5
- from cobweb.base import Seed, logger
6
- from cobweb.constant import DealModel, LogTemplate
7
- from .launcher import Launcher, check_pause
8
-
9
-
10
- class LauncherApi(Launcher):
11
-
12
- def __init__(self, task, project, custom_setting=None, **kwargs):
13
- super().__init__(task, project, custom_setting, **kwargs)
14
- self._db = ApiDB()
15
-
16
- self._todo_key = "{%s:%s}:todo" % (project, task)
17
- self._done_key = "{%s:%s}:done" % (project, task)
18
- self._fail_key = "{%s:%s}:fail" % (project, task)
19
- self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
20
-
21
- self._statistics_done_key = "statistics:%s:%s:done" % (project, task)
22
- self._statistics_fail_key = "statistics:%s:%s:fail" % (project, task)
23
- self._speed_control_key = "speed_control:%s_%s" % (project, task)
24
-
25
- self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
26
-
27
- # self._bf_key = "bloom_%s_%s" % (project, task)
28
- # self._bf = BloomFilter(self._bf_key)
29
-
30
- self._heartbeat_start_event = threading.Event()
31
- self._redis_queue_empty_event = threading.Event()
32
-
33
- @property
34
- def heartbeat(self):
35
- return self._db.exists(self._heartbeat_key)
36
-
37
- def statistics(self, key, count):
38
- if not self._task_model and not self._db.exists(key):
39
- self._db.setex(key, 86400 * 30, int(count))
40
- else:
41
- self._db.incrby(key, count)
42
-
43
- def _get_seed(self) -> Seed:
44
- """
45
- 从队列中获取种子(频控)
46
- 设置时间窗口为self._time_window(秒),判断在该窗口内的采集量是否满足阈值(self._spider_max_speed)
47
- :return: True -> 种子, False -> None
48
- """
49
- if (self._speed_control and self.__LAUNCHER_QUEUE__["todo"].length and
50
- not self._db.auto_incr(self._speed_control_key, t=self._time_window, limit=self._spider_max_count)):
51
- expire_time = self._db.ttl(self._speed_control_key)
52
- if isinstance(expire_time, int) and expire_time <= -1:
53
- self._db.delete(self._speed_control_key)
54
- elif isinstance(expire_time, int):
55
- logger.info(f"Too fast! Please wait {expire_time} seconds...")
56
- time.sleep(expire_time / 2)
57
- return None
58
- seed = self.__LAUNCHER_QUEUE__["todo"].pop()
59
- return seed
60
-
61
- @check_pause
62
- def _execute_heartbeat(self):
63
- if self._heartbeat_start_event.is_set():
64
- self._db.setex(self._heartbeat_key, 5)
65
- time.sleep(3)
66
-
67
- @check_pause
68
- def _reset(self):
69
- """
70
- 检查过期种子,重新添加到redis缓存中
71
- """
72
- reset_wait_seconds = 30
73
- if self._db.lock(self._reset_lock_key, t=120):
74
-
75
- _min = -int(time.time()) + self._seed_reset_seconds \
76
- if self.heartbeat else "-inf"
77
-
78
- self._db.members(self._todo_key, 0, _min=_min, _max="(0")
79
-
80
- if not self.heartbeat:
81
- self._heartbeat_start_event.set()
82
-
83
- self._db.delete(self._reset_lock_key)
84
-
85
- time.sleep(reset_wait_seconds)
86
-
87
- @check_pause
88
- def _scheduler(self):
89
- """
90
- 调度任务,获取redis队列种子,同时添加到doing字典中
91
- """
92
- if not self._db.zcount(self._todo_key, 0, "(1000"):
93
- time.sleep(self._scheduler_wait_seconds)
94
- elif self.__LAUNCHER_QUEUE__['todo'].length >= self._todo_queue_size:
95
- time.sleep(self._todo_queue_full_wait_seconds)
96
- else:
97
- members = self._db.members(
98
- self._todo_key, int(time.time()),
99
- count=self._todo_queue_size,
100
- _min=0, _max="(1000"
101
- )
102
- for member, priority in members:
103
- seed = Seed(member, priority=priority)
104
- self.__LAUNCHER_QUEUE__['todo'].push(seed)
105
- self.__DOING__[seed.to_string] = seed.params.priority
106
-
107
- @check_pause
108
- def _insert(self):
109
- """
110
- 添加新种子到redis队列中
111
- """
112
- new_seeds = {}
113
- del_seeds = set()
114
- status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
115
- for _ in range(self._new_queue_max_size):
116
- seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
117
- if not seed_tuple:
118
- break
119
- seed, new_seed = seed_tuple
120
- new_seeds[new_seed.to_string] = new_seed.params.priority
121
- del_seeds.add(seed)
122
- if new_seeds:
123
- self._db.zadd(self._todo_key, new_seeds, nx=True)
124
- if del_seeds:
125
- self.__LAUNCHER_QUEUE__['done'].push(list(del_seeds))
126
- if status:
127
- time.sleep(self._new_queue_wait_seconds)
128
-
129
- @check_pause
130
- def _refresh(self):
131
- """
132
- 刷新doing种子过期时间,防止reset重新消费
133
- """
134
- if self.__DOING__:
135
- refresh_time = int(time.time())
136
- seeds = {k:-refresh_time - v / 1000 for k, v in self.__DOING__.items()}
137
- self._db.zadd(self._todo_key, item=seeds, xx=True)
138
- time.sleep(15)
139
-
140
- @check_pause
141
- def _delete(self):
142
- """
143
- 删除队列种子,根据状态添加至成功或失败队列,移除doing字典种子索引
144
- """
145
- # seed_info = {"count": 0, "failed": [], "succeed": [], "common": []}
146
-
147
- seed_list = []
148
- status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
149
-
150
- for _ in range(self._done_queue_max_size):
151
- seed = self.__LAUNCHER_QUEUE__['done'].pop()
152
- if not seed:
153
- break
154
- seed_list.append(seed.to_string)
155
-
156
- if seed_list:
157
-
158
- self._db.zrem(self._todo_key, *seed_list)
159
- self._remove_doing_seeds(seed_list)
160
-
161
- if status:
162
- time.sleep(self._done_queue_wait_seconds)
163
-
164
- def _polling(self):
165
- wait_scheduler_execute = True
166
- check_emtpy_times = 0
167
- while not self._stop.is_set():
168
- queue_not_empty_count = 0
169
- pooling_wait_seconds = 30
170
-
171
- for q in self.__LAUNCHER_QUEUE__.values():
172
- if q.length != 0:
173
- queue_not_empty_count += 1
174
- wait_scheduler_execute = False
175
-
176
- if queue_not_empty_count == 0:
177
- pooling_wait_seconds = 3
178
- if self._pause.is_set():
179
- check_emtpy_times = 0
180
- if not self._task_model and (
181
- not wait_scheduler_execute or
182
- int(time.time()) - self._app_time > self._before_scheduler_wait_seconds
183
- ):
184
- logger.info("Done! ready to close thread...")
185
- self._stop.set()
186
-
187
- elif self._db.zcount(self._todo_key, _min=0, _max="(1000"):
188
- logger.info(f"Recovery {self.task} task run!")
189
- self._pause.clear()
190
- self._execute()
191
- else:
192
- logger.info("pause! waiting for resume...")
193
- elif check_emtpy_times > 2:
194
- self.__DOING__ = {}
195
- seed_count = self._db.zcard(self._todo_key)
196
- logger.info(f"队列剩余种子数:{seed_count}")
197
- if not seed_count:
198
- logger.info("Done! pause set...")
199
- self._pause.set()
200
- else:
201
- self._pause.clear()
202
- else:
203
- logger.info(
204
- "check whether the task is complete, "
205
- f"reset times {3 - check_emtpy_times}"
206
- )
207
- check_emtpy_times += 1
208
- else:
209
- if self._pause.is_set():
210
- self._pause.clear()
211
- logger.info(LogTemplate.launcher_pro_polling.format(
212
- task=self.task,
213
- doing_len=len(self.__DOING__.keys()),
214
- todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
215
- done_len=self.__LAUNCHER_QUEUE__['done'].length,
216
- redis_seed_count=self._db.zcount(self._todo_key, "-inf", "+inf"),
217
- redis_todo_len=self._db.zcount(self._todo_key, 0, "(1000"),
218
- redis_doing_len=self._db.zcount(self._todo_key, "-inf", "(0"),
219
- upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
220
- ))
221
-
222
- time.sleep(pooling_wait_seconds)
223
-
224
- logger.info("Done! Ready to close thread...")
225
-