cobweb-launcher 1.3.15__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. cobweb/__init__.py +1 -1
  2. cobweb/base/__init__.py +4 -149
  3. cobweb/base/common_queue.py +0 -13
  4. cobweb/base/request.py +2 -14
  5. cobweb/base/seed.py +16 -12
  6. cobweb/constant.py +0 -16
  7. cobweb/crawlers/crawler.py +3 -85
  8. cobweb/db/redis_db.py +109 -52
  9. cobweb/launchers/__init__.py +8 -2
  10. cobweb/launchers/distributor.py +171 -0
  11. cobweb/launchers/launcher.py +87 -131
  12. cobweb/launchers/uploader.py +65 -0
  13. cobweb/pipelines/pipeline.py +3 -36
  14. cobweb/schedulers/__init__.py +1 -3
  15. cobweb/schedulers/launcher_air.py +93 -0
  16. cobweb/schedulers/launcher_api.py +225 -0
  17. cobweb/schedulers/scheduler.py +85 -0
  18. cobweb/schedulers/scheduler_with_redis.py +177 -0
  19. cobweb/setting.py +15 -32
  20. cobweb/utils/__init__.py +2 -1
  21. cobweb/utils/decorators.py +43 -0
  22. cobweb/utils/dotting.py +55 -0
  23. cobweb/utils/oss.py +28 -9
  24. {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/METADATA +1 -1
  25. cobweb_launcher-3.1.0.dist-info/RECORD +41 -0
  26. cobweb/base/basic.py +0 -297
  27. cobweb/base/dotting.py +0 -35
  28. cobweb/launchers/launcher_air.py +0 -88
  29. cobweb/launchers/launcher_api.py +0 -89
  30. cobweb/launchers/launcher_pro.py +0 -88
  31. cobweb/schedulers/scheduler_api.py +0 -72
  32. cobweb/schedulers/scheduler_redis.py +0 -72
  33. cobweb_launcher-1.3.15.dist-info/RECORD +0 -40
  34. {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/LICENSE +0 -0
  35. {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/WHEEL +0 -0
  36. {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,171 @@
1
+ import time
2
+ import threading
3
+ import traceback
4
+ from inspect import isgenerator
5
+ from typing import Callable
6
+ from urllib.parse import urlparse
7
+ from requests import Response as Res
8
+
9
+ from cobweb import setting
10
+ from cobweb.constant import DealModel, LogTemplate
11
+ from cobweb.base import (
12
+ Seed,
13
+ Queue,
14
+ BaseItem,
15
+ Request,
16
+ Response,
17
+ logger
18
+ )
19
+ from cobweb.utils import LoghubDot, check_pause
20
+
21
+
22
+ class Distributor(threading.Thread):
23
+
24
+ def __init__(
25
+ self,
26
+ task: str,
27
+ project: str,
28
+ new: Queue,
29
+ todo: Queue,
30
+ done: Queue,
31
+ upload: Queue,
32
+ register: Callable,
33
+ stop: threading.Event,
34
+ pause: threading.Event,
35
+ SpiderCrawler
36
+ ):
37
+ super().__init__()
38
+ self.task = task
39
+ self.project = project
40
+ self.stop = stop
41
+ self.pause = pause
42
+
43
+ self.new = new
44
+ self.todo = todo
45
+ self.done = done
46
+ self.upload = upload
47
+ self.register = register
48
+
49
+ self.time_sleep = setting.SPIDER_TIME_SLEEP
50
+ self.thread_num = setting.SPIDER_THREAD_NUM
51
+ self.max_retries = setting.SPIDER_MAX_RETRIES
52
+ self.record_failed = setting.RECORD_FAILED_SPIDER
53
+ self.loghub_dot = LoghubDot() # todo: 解偶
54
+
55
+ self.Crawler = SpiderCrawler
56
+
57
+ logger.debug(f"Distribute instance attrs: {self.__dict__}")
58
+
59
+ def distribute(self, item, seed, _id: int):
60
+ if isinstance(item, Request):
61
+ seed.params.start_time = time.time()
62
+ self.process(item=seed, seed=seed, callback=self.Crawler.download, _id=1)
63
+ elif isinstance(item, Response):
64
+ if _id == 2:
65
+ raise TypeError("parse function can't yield a Response instance")
66
+ dot = isinstance(item.response, Res)
67
+ self.spider_logging(seed, item, dot=dot)
68
+ self.process(item=seed, seed=seed, callback=self.Crawler.parse, _id=2)
69
+ elif isinstance(item, BaseItem):
70
+ self.upload.push(item)
71
+ elif isinstance(item, Seed):
72
+ self.new.push((seed, item), direct_insertion=True)
73
+ elif isinstance(item, str) and item == DealModel.poll:
74
+ self.todo.push(seed)
75
+ elif isinstance(item, str) and item == DealModel.done:
76
+ self.done.push(seed)
77
+ elif isinstance(item, str) and item == DealModel.fail:
78
+ seed.params.retry += 1
79
+ if seed.params.retry < self.max_retries:
80
+ self.todo.push(seed)
81
+ else:
82
+ if record_failed := self.record_failed:
83
+ try:
84
+ response = Response(seed, "failed", max_retries=True)
85
+ self.process(response, seed, self.Crawler.parse, _id=2)
86
+ except:
87
+ record_failed = False
88
+ if not record_failed:
89
+ self.done.push(seed)
90
+ else:
91
+ raise TypeError("yield value type error!")
92
+
93
+ def process(self, item, seed, callback, _id: int):
94
+ result_iterators = callback(item)
95
+ if not isgenerator(result_iterators):
96
+ raise TypeError(f"{callback.__name__} function isn't a generator!")
97
+ for result_item in result_iterators:
98
+ self.distribute(result_item, seed, _id)
99
+
100
+ @check_pause
101
+ def spider(self):
102
+ if seed := self.todo.pop():
103
+ try:
104
+ self.process(item=seed, seed=seed, callback=self.Crawler.request, _id=0)
105
+ except Exception as e:
106
+ url, status = seed.url, e.__class__.__name__
107
+ msg = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
108
+ if getattr(e, "response", None) and isinstance(e.response, Res):
109
+ url = e.response.request.url
110
+ status = e.response.status_code
111
+ self.spider_logging(seed, None, error=True, url=url, status=status, msg=msg)
112
+ self.distribute(DealModel.fail, seed, _id=-1)
113
+
114
+ def spider_logging(
115
+ self, seed,
116
+ item: Response = None,
117
+ error: bool = False,
118
+ dot: bool = True,
119
+ **kwargs
120
+ ):
121
+ detail_log_info = LogTemplate.log_info(seed.to_dict)
122
+ if error:
123
+ url = kwargs.get("url")
124
+ msg = kwargs.get("msg")
125
+ status = kwargs.get("status")
126
+ if dot:
127
+ self.loghub_dot.build(
128
+ topic=urlparse(url).netloc,
129
+ data_size=-1, cost_time=-1,
130
+ status=status, url=url,
131
+ seed=seed.to_string,
132
+ proxy_type=seed.params.proxy_type,
133
+ proxy=seed.params.proxy,
134
+ project=self.project,
135
+ task=self.task, msg=msg,
136
+ )
137
+ logger.info(LogTemplate.download_exception.format(
138
+ detail=detail_log_info,
139
+ retry=seed.params.retry,
140
+ priority=seed.params.priority,
141
+ seed_version=seed.params.seed_version,
142
+ identifier=seed.identifier or "",
143
+ exception=msg
144
+ ))
145
+ else:
146
+ logger.info(LogTemplate.download_info.format(
147
+ detail=detail_log_info,
148
+ retry=seed.params.retry,
149
+ priority=seed.params.priority,
150
+ seed_version=seed.params.seed_version,
151
+ identifier=seed.identifier or "",
152
+ status=item.response,
153
+ response=LogTemplate.log_info(item.to_dict)
154
+ ))
155
+ if dot:
156
+ end_time = time.time()
157
+ stime = seed.params.start_time
158
+ cost_time = end_time - stime if stime else -1
159
+ topic = urlparse(item.response.request.url).netloc
160
+ data_size = int(item.response.headers.get("content-length", 0))
161
+ self.loghub_dot.build(
162
+ topic=topic, data_size=data_size, cost_time=cost_time,
163
+ status=200, seed=seed.to_string, url=item.response.url,
164
+ proxy=seed.params.proxy, proxy_type=seed.params.proxy_type,
165
+ project=self.project, task=self.task,
166
+ )
167
+
168
+ def run(self):
169
+ self.register(self.loghub_dot.build_run, tag="LoghubDot")
170
+ for _ in range(self.thread_num):
171
+ self.register(self.spider, tag="Distributor")
@@ -1,26 +1,34 @@
1
1
  import time
2
+ import uuid
2
3
  import inspect
3
4
  import threading
4
5
  import importlib
5
6
 
6
- from cobweb.constant import LogTemplate
7
- from cobweb.utils import dynamic_load_class
8
- from cobweb.base import TaskQueue, Decorators, logger
9
7
  from cobweb import setting
8
+ from cobweb.base import Seed, Queue, logger
9
+ from cobweb.utils.tools import dynamic_load_class
10
+ from cobweb.launchers import Distributor, Uploader
10
11
 
11
12
 
12
- class Launcher(threading.Thread):
13
+ class Launcher:
13
14
 
14
- __CUSTOM_FUNC__ = {}
15
+ _NEW_QUEUE_ = Queue()
16
+ _TODO_QUEUE_ = Queue()
17
+ _DONE_QUEUE_ = Queue()
18
+ _UPLOAD_QUEUE_ = Queue()
19
+
20
+ __WORKER_THREAD__ = dict()
21
+ __REGISTER_FUNC__ = dict()
15
22
 
16
23
  def __init__(self, task, project, custom_setting=None, **kwargs):
17
24
  super().__init__()
25
+
18
26
  self.task = task
19
27
  self.project = project
20
- self.custom_func = dict()
21
- self.app_time = int(time.time())
22
28
 
23
- self.check_emtpy_times = 0
29
+ self._app_time = int(time.time())
30
+ self._stop = threading.Event() # 结束事件
31
+ self._pause = threading.Event() # 暂停事件
24
32
 
25
33
  _setting = dict()
26
34
 
@@ -41,40 +49,12 @@ class Launcher(threading.Thread):
41
49
  for k, v in _setting.items():
42
50
  setattr(setting, k.upper(), v)
43
51
 
44
- self.before_scheduler_wait_seconds = setting.BEFORE_SCHEDULER_WAIT_SECONDS
45
-
46
- self.scheduling_wait_time = setting.SCHEDULING_WAIT_TIME
47
- self.inserting_wait_time = setting.INSERTING_WAIT_TIME
48
- self.removing_wait_time = setting.REMOVING_WAIT_TIME
49
- self.seed_reset_seconds = setting.SEED_RESET_SECONDS
50
-
51
- self.scheduling_size = setting.SCHEDULING_SIZE
52
- self.inserting_size = setting.INSERTING_SIZE
53
- self.removing_size = setting.REMOVING_SIZE
54
-
55
- self.todo_queue_size = setting.TODO_QUEUE_SIZE
56
- self.seed_queue_size = setting.SEED_QUEUE_SIZE
57
- self.request_queue_size = setting.REQUEST_QUEUE_SIZE
58
- self.download_queue_size = setting.DOWNLOAD_QUEUE_SIZE
59
- self.response_queue_size = setting.RESPONSE_QUEUE_SIZE
60
- self.upload_queue_size = setting.UPLOAD_QUEUE_SIZE
61
- self.delete_queue_size = setting.DELETE_QUEUE_SIZE
62
- self.done_queue_size = setting.DONE_QUEUE_SIZE
63
- self.spider_max_retries = setting.SPIDER_MAX_RETRIES
64
-
65
- self.spider_thread_num = setting.SPIDER_THREAD_NUM
52
+ self._done_model = setting.DONE_MODEL
53
+ self._task_model = setting.TASK_MODEL
66
54
 
67
- self.task_model = setting.TASK_MODEL
68
-
69
- self.stop = threading.Event() # 结束事件
70
- self.pause = threading.Event() # 暂停事件
71
-
72
- self.crawler_path = setting.CRAWLER
73
- self.pipeline_path = setting.PIPELINE
74
-
75
- self._thread_info = {}
76
-
77
- self._task_info = dict(todo={}, download={})
55
+ self.Scheduler = dynamic_load_class(setting.SCHEDULER)
56
+ self.SpiderCrawler = dynamic_load_class(setting.CRAWLER)
57
+ self.SpiderPipeline = dynamic_load_class(setting.PIPELINE)
78
58
 
79
59
  @property
80
60
  def request(self):
@@ -88,7 +68,7 @@ class Launcher(threading.Thread):
88
68
  yield Request(seed.url, seed)
89
69
  """
90
70
  def decorator(func):
91
- self.custom_func['request'] = func
71
+ self.SpiderCrawler.request = func
92
72
  return decorator
93
73
 
94
74
  @property
@@ -103,7 +83,7 @@ class Launcher(threading.Thread):
103
83
  yield Response(item.seed, response)
104
84
  """
105
85
  def decorator(func):
106
- self.custom_func['download'] = func
86
+ self.SpiderCrawler.download = func
107
87
  return decorator
108
88
 
109
89
  @property
@@ -118,94 +98,70 @@ class Launcher(threading.Thread):
118
98
  yield xxxItem(seed, **kwargs)
119
99
  """
120
100
  def decorator(func):
121
- self.custom_func['parse'] = func
101
+ self.SpiderCrawler.parse = func
122
102
  return decorator
123
103
 
124
- def remove_working_items(self, key, items):
125
- for item in items:
126
- self._task_info[key].pop(item, None)
127
-
128
- def add_working_item(self, key, member, priority):
129
- self._task_info[key][member] = priority
130
-
131
- def check_alive(self):
132
- while not self.stop.is_set():
133
- if not self.pause.is_set():
134
- for name, thread_info in self._thread_info.items():
135
- instance = thread_info['instance']
136
- if not instance.is_alive():
137
- instance = threading.Thread(name=name, target=thread_info['func'], args=())
138
- self._thread_info[name] = dict(instance=instance, func=thread_info['func'])
139
- instance.start()
140
- time.sleep(1)
141
-
142
- def _add_thread(self, func, num=1, obj=None, name=None, args=()):
143
- obj = obj or self
144
- name = obj.__class__.__name__ + ":" + (name or func.__name__)
145
- for i in range(num):
146
- func_name = name + "_" + str(i) if num > 1 else name
147
- instance = threading.Thread(name=func_name, target=func, args=())
148
- self._thread_info[func_name] = dict(instance=instance, func=func)
149
- instance.start()
150
-
151
- @Decorators.stop
152
- def _polling(self):
153
- time.sleep(10)
154
- if self.pause.is_set():
155
- run_time = int(time.time()) - self.app_time
156
- if not self.task_model and run_time > self.before_scheduler_wait_seconds:
157
- logger.info("Done! ready to close thread...")
158
- self.stop.set()
159
- elif TaskQueue.TODO.length or TaskQueue.DOWNLOAD.length:
160
- logger.info(f"Recovery {self.task} task run!")
161
- self.check_emtpy_times = 0
162
- self.pause.clear()
163
- else:
164
- logger.info("pause! waiting for resume...")
165
- elif TaskQueue.is_empty() and self.check_emtpy_times > 2:
166
- logger.info("pause! waiting for resume...")
167
- self.doing_seeds = {}
168
- self._task_info['todo'] = {}
169
- self._task_info['download'] = {}
170
- self.pause.set()
171
- elif TaskQueue.is_empty():
172
- logger.info(
173
- "check whether the task is complete, "
174
- f"reset times {3 - self.check_emtpy_times}"
175
- )
176
- self.check_emtpy_times += 1
177
- else:
178
- logger.info(LogTemplate.launcher_polling.format(
179
- task=self.task,
180
- memory_todo_count=len(self._task_info["todo"]),
181
- memory_download_count=len(self._task_info["download"]),
182
- todo_queue_len=TaskQueue.TODO.length,
183
- delete_queue_len=TaskQueue.DELETE.length,
184
- request_queue_len=TaskQueue.REQUEST.length,
185
- response_queue_len=TaskQueue.RESPONSE.length,
186
- done_queue_len=TaskQueue.DONE.length,
187
- upload_queue_len=TaskQueue.UPLOAD.length,
188
- seed_queue_len=TaskQueue.SEED.length,
189
- download_queue_len=TaskQueue.DOWNLOAD.length
190
- ))
191
-
192
- def run(self):
193
- Crawler = dynamic_load_class(self.crawler_path)
194
- Pipeline = dynamic_load_class(self.pipeline_path)
195
-
196
- crawler = Crawler(stop=self.stop, pause=self.pause, custom_func=self.custom_func)
197
- pipeline = Pipeline(stop=self.stop, pause=self.pause)
198
-
199
- self._add_thread(obj=crawler, func=crawler.build_request_item)
200
- self._add_thread(obj=crawler, func=crawler.build_download_item, num=self.spider_thread_num)
201
- self._add_thread(obj=crawler, func=crawler.build_parse_item)
202
- self._add_thread(obj=pipeline, func=pipeline.run)
203
-
204
- self._add_thread(func=self._polling)
205
-
206
- self._init_schedule_thread()
207
- self.check_alive()
208
-
209
- def _init_schedule_thread(self):
210
- ...
104
+ def start_seeds(self, seeds: list):
105
+ seed_list = [Seed(seed) for seed in seeds]
106
+ self._TODO_QUEUE_.push(seed_list)
107
+ return seed_list
108
+
109
+ def _register(self, func, tag: str = "launcher"):
110
+ name = tag + ":" + func.__name__ + "_" + str(uuid.uuid4())
111
+ self.__REGISTER_FUNC__[name] = func
112
+ if not self.__WORKER_THREAD__.get(name):
113
+ worker_thread = threading.Thread(name=name, target=func)
114
+ self.__WORKER_THREAD__[name] = worker_thread
115
+ worker_thread.start()
116
+
117
+ def _monitor(self):
118
+ while not self._stop.is_set():
119
+ if self._pause.is_set():
120
+ time.sleep(15)
121
+ continue
122
+ for name, worker_thread in self.__WORKER_THREAD__.items():
123
+ if not worker_thread.is_alive():
124
+ logger.info(f"{name} thread is dead. Restarting...")
125
+ func = self.__REGISTER_FUNC__[name]
126
+ worker_thread = threading.Thread(name=name, target=func)
127
+ self.__WORKER_THREAD__[name] = worker_thread
128
+ worker_thread.start()
129
+ time.sleep(3)
130
+
131
+ def start(self):
132
+ self._pause.is_set()
133
+
134
+ self.Scheduler(
135
+ task=self.task,
136
+ project=self.project,
137
+ stop=self._stop,
138
+ pause=self._pause,
139
+ new=self._NEW_QUEUE_,
140
+ todo=self._TODO_QUEUE_,
141
+ done=self._DONE_QUEUE_,
142
+ upload=self._UPLOAD_QUEUE_,
143
+ register=self._register
144
+ ).start()
145
+
146
+ Distributor(
147
+ task=self.task,
148
+ project=self.project,
149
+ new=self._NEW_QUEUE_,
150
+ todo=self._TODO_QUEUE_,
151
+ done=self._DONE_QUEUE_,
152
+ upload=self._UPLOAD_QUEUE_,
153
+ register=self._register,
154
+ stop=self._stop, pause=self._pause,
155
+ SpiderCrawler=self.SpiderCrawler
156
+ ).start()
157
+
158
+ Uploader(
159
+ stop=self._stop, pause=self._pause,
160
+ done=self._DONE_QUEUE_,
161
+ upload=self._UPLOAD_QUEUE_,
162
+ register=self._register,
163
+ SpiderPipeline=self.SpiderPipeline
164
+ ).start()
165
+
166
+ self._monitor()
211
167
 
@@ -0,0 +1,65 @@
1
+ import time
2
+ import threading
3
+ from typing import Callable
4
+ from cobweb import setting
5
+ from cobweb.base import Queue, logger
6
+ from cobweb.utils import check_pause
7
+
8
+
9
+ class Uploader(threading.Thread):
10
+
11
+ def __init__(
12
+ self,
13
+ stop: threading.Event,
14
+ pause: threading.Event,
15
+ upload: Queue, done: Queue,
16
+ register: Callable,
17
+ SpiderPipeline
18
+ ):
19
+ super().__init__()
20
+ self.stop = stop
21
+ self.pause = pause
22
+
23
+ self.done = done
24
+ self.upload = upload
25
+ self.register = register
26
+
27
+ self.upload_size = setting.UPLOAD_QUEUE_MAX_SIZE
28
+ self.wait_seconds = setting.UPLOAD_QUEUE_WAIT_SECONDS
29
+
30
+ self.Pipeline = SpiderPipeline
31
+
32
+ logger.debug(f"Uploader instance attrs: {self.__dict__}")
33
+
34
+ @check_pause
35
+ def upload(self):
36
+ if not self.upload.length:
37
+ time.sleep(self.wait_seconds)
38
+ return
39
+ if self.upload.length < self.upload_size:
40
+ time.sleep(self.wait_seconds)
41
+ data_info, seeds = {}, []
42
+ try:
43
+ for _ in range(self.upload_size):
44
+ item = self.upload.pop()
45
+ if not item:
46
+ break
47
+ seeds.append(item.seed)
48
+ data = self.Pipeline.build(item)
49
+ data_info.setdefault(item.table, []).append(data)
50
+ for table, datas in data_info.items():
51
+ try:
52
+ self.Pipeline.upload(table, datas)
53
+ except Exception as e:
54
+ logger.info(e)
55
+ except Exception as e:
56
+ logger.info(e)
57
+ if seeds:
58
+ self.done.push(seeds)
59
+
60
+ logger.info("upload pipeline close!")
61
+
62
+ def run(self):
63
+ self.register(self.upload, tag="Uploader")
64
+
65
+
@@ -1,48 +1,15 @@
1
- import time
2
- import threading
3
-
4
1
  from abc import ABC, abstractmethod
5
- from cobweb.base import BaseItem, TaskQueue, logger, Decorators
6
- from cobweb import setting
2
+ from cobweb.base import BaseItem
7
3
 
8
4
 
9
5
  class Pipeline(ABC):
10
6
 
11
- def __init__(
12
- self,
13
- stop: threading.Event,
14
- pause: threading.Event,
15
- ):
16
- super().__init__()
17
- self.stop = stop
18
- self.pause = pause
19
- self.upload_queue_size = setting.UPLOAD_QUEUE_SIZE
20
- self.upload_wait_time = setting.UPLOAD_WAIT_TIME
21
-
22
7
  @abstractmethod
23
8
  def build(self, item: BaseItem) -> dict:
24
- ...
9
+ pass
25
10
 
26
11
  @abstractmethod
27
12
  def upload(self, table: str, data: list) -> bool:
28
- ...
13
+ pass
29
14
 
30
- @Decorators.pause
31
- def run(self):
32
- data_info, seeds = {}, []
33
- thread_sleep = self.upload_wait_time if TaskQueue.UPLOAD.length < self.upload_queue_size else 0.1
34
- try:
35
- while (item := TaskQueue.UPLOAD.pop()) and len(seeds) <= self.upload_queue_size:
36
- data = self.build(item)
37
- data_info.setdefault(item.table, []).append(data)
38
- seeds.append(item.seed)
39
- for table, datas in data_info.items():
40
- self.upload(table, datas)
41
- except Exception as e:
42
- logger.info(e)
43
- seeds = None
44
- # todo: retry
45
- finally:
46
- TaskQueue.DONE.push(seeds)
47
15
 
48
- time.sleep(thread_sleep)
@@ -1,3 +1 @@
1
- from .scheduler_redis import RedisScheduler
2
- from .scheduler_api import ApiScheduler
3
-
1
+ from .scheduler_with_redis import RedisScheduler
@@ -0,0 +1,93 @@
1
+ import time
2
+
3
+ from cobweb.base import logger
4
+ from cobweb.constant import LogTemplate
5
+ from .launcher import Launcher, check_pause
6
+
7
+
8
+ class LauncherAir(Launcher):
9
+
10
+ # def _scheduler(self):
11
+ # if self.start_seeds:
12
+ # self.__LAUNCHER_QUEUE__['todo'].push(self.start_seeds)
13
+
14
+ @check_pause
15
+ def _insert(self):
16
+ new_seeds = {}
17
+ del_seeds = set()
18
+ status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
19
+ for _ in range(self._new_queue_max_size):
20
+ seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
21
+ if not seed_tuple:
22
+ break
23
+ seed, new_seed = seed_tuple
24
+ new_seeds[new_seed.to_string] = new_seed.params.priority
25
+ del_seeds.add(seed.to_string)
26
+ if new_seeds:
27
+ self.__LAUNCHER_QUEUE__['todo'].push(new_seeds)
28
+ if del_seeds:
29
+ self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
30
+ if status:
31
+ time.sleep(self._new_queue_wait_seconds)
32
+
33
+ @check_pause
34
+ def _delete(self):
35
+ seeds = []
36
+ status = self.__LAUNCHER_QUEUE__['done'].length < self._done_queue_max_size
37
+
38
+ for _ in range(self._done_queue_max_size):
39
+ seed = self.__LAUNCHER_QUEUE__['done'].pop()
40
+ if not seed:
41
+ break
42
+ seeds.append(seed.to_string)
43
+
44
+ if seeds:
45
+ self._remove_doing_seeds(seeds)
46
+
47
+ if status:
48
+ time.sleep(self._done_queue_wait_seconds)
49
+
50
+ def _polling(self):
51
+
52
+ check_emtpy_times = 0
53
+
54
+ while not self._stop.is_set():
55
+
56
+ queue_not_empty_count = 0
57
+ pooling_wait_seconds = 30
58
+
59
+ for q in self.__LAUNCHER_QUEUE__.values():
60
+ if q.length != 0:
61
+ queue_not_empty_count += 1
62
+
63
+ if queue_not_empty_count == 0:
64
+ pooling_wait_seconds = 3
65
+ if self._pause.is_set():
66
+ check_emtpy_times = 0
67
+ if not self._task_model:
68
+ logger.info("Done! Ready to close thread...")
69
+ self._stop.set()
70
+ elif check_emtpy_times > 2:
71
+ self.__DOING__ = {}
72
+ self._pause.set()
73
+ else:
74
+ logger.info(
75
+ "check whether the task is complete, "
76
+ f"reset times {3 - check_emtpy_times}"
77
+ )
78
+ check_emtpy_times += 1
79
+ elif self._pause.is_set():
80
+ self._pause.clear()
81
+ self._execute()
82
+ else:
83
+ logger.info(LogTemplate.launcher_air_polling.format(
84
+ task=self.task,
85
+ doing_len=len(self.__DOING__.keys()),
86
+ todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
87
+ done_len=self.__LAUNCHER_QUEUE__['done'].length,
88
+ upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
89
+ ))
90
+
91
+ time.sleep(pooling_wait_seconds)
92
+
93
+