cobweb-launcher 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cobweb-launcher might be problematic. Click here for more details.

@@ -65,6 +65,7 @@ class Launcher(threading.Thread):
65
65
  self._Crawler = dynamic_load_class(setting.CRAWLER)
66
66
  self._Pipeline = dynamic_load_class(setting.PIPELINE)
67
67
 
68
+ self._before_scheduler_wait_seconds = setting.BEFORE_SCHEDULER_WAIT_SECONDS
68
69
  self._scheduler_wait_seconds = setting.SCHEDULER_WAIT_SECONDS
69
70
  self._todo_queue_full_wait_seconds = setting.TODO_QUEUE_FULL_WAIT_SECONDS
70
71
  self._new_queue_wait_seconds = setting.NEW_QUEUE_WAIT_SECONDS
@@ -83,7 +84,6 @@ class Launcher(threading.Thread):
83
84
  self._done_model = setting.DONE_MODEL
84
85
  self._task_model = setting.TASK_MODEL
85
86
 
86
- # self._upload_queue = Queue()
87
87
 
88
88
  @property
89
89
  def start_seeds(self):
@@ -141,7 +141,7 @@ class Launcher(threading.Thread):
141
141
  def _execute(self):
142
142
  for func_name in self.__LAUNCHER_FUNC__:
143
143
  threading.Thread(name=func_name, target=getattr(self, func_name)).start()
144
- time.sleep(2)
144
+ time.sleep(1)
145
145
 
146
146
  def run(self):
147
147
  threading.Thread(target=self._execute_heartbeat).start()
@@ -16,6 +16,8 @@ class LauncherPro(Launcher):
16
16
  self._fail_key = "{%s:%s}:fail" % (project, task)
17
17
  self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
18
18
  self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
19
+ self._statistics_done_key = "{%s:%s}:statistics:done" % (project, task)
20
+ self._statistics_fail_key = "{%s:%s}:statistics:fail" % (project, task)
19
21
  self._db = RedisDB()
20
22
 
21
23
  self._heartbeat_start_event = threading.Event()
@@ -25,6 +27,12 @@ class LauncherPro(Launcher):
25
27
  def heartbeat(self):
26
28
  return self._db.exists(self._heartbeat_key)
27
29
 
30
+ def statistics(self, key, count, expire_time):
31
+ if self._db.exists(key):
32
+ self._db._client.incrby(key, count)
33
+ else:
34
+ self._db.setex(key, expire_time, int(count))
35
+
28
36
  def _execute_heartbeat(self):
29
37
  while not self._stop.is_set():
30
38
  if self._heartbeat_start_event.is_set():
@@ -124,19 +132,23 @@ class LauncherPro(Launcher):
124
132
  else:
125
133
  seeds.append(seed.to_string)
126
134
  if seeds:
127
- self._db.zrem(self._todo_key, *seeds)
135
+ count = self._db.zrem(self._todo_key, *seeds)
136
+ self.statistics(self._statistics_done_key, count, 86400 * 30)
128
137
  self._remove_doing_seeds(seeds)
129
138
  if s_seeds:
130
- self._db.done([self._todo_key, self._done_key], *s_seeds)
139
+ count = self._db.done([self._todo_key, self._done_key], *s_seeds)
140
+ self.statistics(self._statistics_done_key, count, 86400 * 30)
131
141
  self._remove_doing_seeds(s_seeds)
132
142
  if f_seeds:
133
- self._db.done([self._todo_key, self._fail_key], *f_seeds)
143
+ count = self._db.done([self._todo_key, self._fail_key], *f_seeds)
144
+ self.statistics(self._statistics_fail_key, count, 86400 * 30)
134
145
  self._remove_doing_seeds(f_seeds)
135
146
 
136
147
  if status:
137
148
  time.sleep(self._done_queue_wait_seconds)
138
149
 
139
150
  def _polling(self):
151
+ wait_scheduler_execute = True
140
152
  check_emtpy_times = 0
141
153
  while not self._stop.is_set():
142
154
  queue_not_empty_count = 0
@@ -145,14 +157,19 @@ class LauncherPro(Launcher):
145
157
  for q in self.__LAUNCHER_QUEUE__.values():
146
158
  if q.length != 0:
147
159
  queue_not_empty_count += 1
160
+ wait_scheduler_execute = False
148
161
 
149
162
  if queue_not_empty_count == 0:
150
163
  pooling_wait_seconds = 3
151
164
  if self._pause.is_set():
152
165
  check_emtpy_times = 0
153
- if not self._task_model:
166
+ if not self._task_model and (
167
+ not wait_scheduler_execute or
168
+ int(time.time()) - self._app_time > self._before_scheduler_wait_seconds
169
+ ):
154
170
  logger.info("Done! ready to close thread...")
155
171
  self._stop.set()
172
+
156
173
  elif self._db.zcount(self._todo_key, _min=0, _max="(1000"):
157
174
  logger.info(f"Recovery {self.task} task run!")
158
175
  self._pause.clear()
@@ -161,16 +178,14 @@ class LauncherPro(Launcher):
161
178
  logger.info("pause! waiting for resume...")
162
179
  elif check_emtpy_times > 2:
163
180
  self.__DOING__ = {}
164
- self._pause.set()
181
+ if not self._db.zcount(self._todo_key, _min="-inf", _max="(1000"):
182
+ self._pause.set()
165
183
  else:
166
184
  logger.info(
167
185
  "check whether the task is complete, "
168
186
  f"reset times {3 - check_emtpy_times}"
169
187
  )
170
188
  check_emtpy_times += 1
171
- # elif self._pause.is_set():
172
- # self._pause.clear()
173
- # self._execute()
174
189
  else:
175
190
  logger.info(LogTemplate.launcher_pro_polling.format(
176
191
  task=self.task,
@@ -184,36 +199,6 @@ class LauncherPro(Launcher):
184
199
  ))
185
200
 
186
201
  time.sleep(pooling_wait_seconds)
187
- # if self._pause.is_set():
188
- # self._pause.clear()
189
- # self._execute()
190
- #
191
- # elif queue_not_empty_count == 0:
192
- # pooling_wait_seconds = 5
193
- # check_emtpy_times += 1
194
- # else:
195
- # check_emtpy_times = 0
196
- #
197
- # if not self._db.zcount(self._todo, _min=0, _max="(1000") and check_emtpy_times > 2:
198
- # check_emtpy_times = 0
199
- # self.__DOING__ = {}
200
- # self._pause.set()
201
- #
202
- # time.sleep(pooling_wait_seconds)
203
- #
204
- # if not self._pause.is_set():
205
- # logger.info(LogTemplate.launcher_pro_polling.format(
206
- # task=self.task,
207
- # doing_len=len(self.__DOING__.keys()),
208
- # todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
209
- # done_len=self.__LAUNCHER_QUEUE__['done'].length,
210
- # redis_seed_count=self._db.zcount(self._todo, "-inf", "+inf"),
211
- # redis_todo_len=self._db.zcount(self._todo, 0, "(1000"),
212
- # redis_doing_len=self._db.zcount(self._todo, "-inf", "(0"),
213
- # upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
214
- # ))
215
- # elif not self._task_model:
216
- # self._stop.set()
217
202
 
218
203
  logger.info("Done! Ready to close thread...")
219
204
 
cobweb/setting.py CHANGED
@@ -26,6 +26,9 @@ OSS_SECRET_KEY = os.getenv("OSS_SECRET_KEY")
26
26
  OSS_CHUNK_SIZE = 10 * 1024 ** 2
27
27
  OSS_MIN_UPLOAD_SIZE = 1024
28
28
 
29
+ # message
30
+ MESSAGE = ""
31
+
29
32
 
30
33
  # 采集器选择
31
34
  CRAWLER = "cobweb.crawlers.Crawler"
@@ -35,6 +38,8 @@ PIPELINE = "cobweb.pipelines.pipeline_console.Console"
35
38
 
36
39
 
37
40
  # Launcher 等待时间
41
+
42
+ BEFORE_SCHEDULER_WAIT_SECONDS = 60 # 调度前等待时间,只作用于单次任务
38
43
  SCHEDULER_WAIT_SECONDS = 15 # 调度等待时间
39
44
  TODO_QUEUE_FULL_WAIT_SECONDS = 5 # todo队列已满时等待时间
40
45
  NEW_QUEUE_WAIT_SECONDS = 30 # new队列等待时间
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.1
3
+ Version: 1.2.3
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -1,6 +1,6 @@
1
1
  cobweb/__init__.py,sha256=uMHyf4Fekbyw2xBCbkA8R0LwCpBJf5p_7pWbh60ZWYk,83
2
2
  cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
- cobweb/setting.py,sha256=zOO1cA_zQd4Q0CzY_tdSfdo-10L4QIVpm4382wbP5BQ,1906
3
+ cobweb/setting.py,sha256=_t3LMSpxUNR4dVD9Tox22W9omHPvjeWABFzpgkEOoH0,2016
4
4
  cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
5
5
  cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
6
6
  cobweb/base/decorators.py,sha256=wDCaQ94aAZGxks9Ljc0aXq6omDXT1_yzFy83ZW6VbVI,930
@@ -18,9 +18,9 @@ cobweb/db/redis_db.py,sha256=NNI2QkRV1hEZI-z-COEncXt88z3pZN6wusKlcQzc8V4,4304
18
18
  cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
19
19
  cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
20
20
  cobweb/launchers/__init__.py,sha256=af0Y6wrGX8SQZ7w7XL2sOtREjCT3dwad-uCc3nIontY,76
21
- cobweb/launchers/launcher.py,sha256=_oPoFNXp3Rrf5eK2tcHtwHZi2pKbuNPBf_VRvj9dJUc,5379
21
+ cobweb/launchers/launcher.py,sha256=W2CdS6-tYL0queyT51g__EoQfwsBxE0Z4wEQzuONe6Q,5424
22
22
  cobweb/launchers/launcher_air.py,sha256=zHVEJqQCxYU1WDnqQzzEHbEXasR1GmKevujQkCfFt5o,2947
23
- cobweb/launchers/launcher_pro.py,sha256=046x07CKK26qc69hOAoDPNVkAXAiFwmR0hz2EsDh_tc,8859
23
+ cobweb/launchers/launcher_pro.py,sha256=uu_5S9rzgv0psDr8tYijuWcPQUSiVCcrzWtd6QDPiYY,8349
24
24
  cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
25
25
  cobweb/pipelines/base_pipeline.py,sha256=fYnWf79GmhufXpcnMa3te18SbmnVeYLwxfyo-zLd9CY,1577
26
26
  cobweb/pipelines/loghub_pipeline.py,sha256=cjPO6w6UJ0jNw2fVvdX0BCdlm58T7dmYXlxzXOBpvfY,1027
@@ -30,8 +30,8 @@ cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXt
30
30
  cobweb/utils/__init__.py,sha256=JTE4sBfHnKHhD6w9Auk0MIT7O9BMOamCeryhlHNx3Zg,47
31
31
  cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
32
32
  cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
33
- cobweb_launcher-1.2.1.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
34
- cobweb_launcher-1.2.1.dist-info/METADATA,sha256=M-72J_GDC29Vz2KGFB9nZCqHlK8YCYhVioR_qwOnjrs,6489
35
- cobweb_launcher-1.2.1.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
36
- cobweb_launcher-1.2.1.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
37
- cobweb_launcher-1.2.1.dist-info/RECORD,,
33
+ cobweb_launcher-1.2.3.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
34
+ cobweb_launcher-1.2.3.dist-info/METADATA,sha256=YzkV8mWy2OVcj36NSo3N_Im-r8dvSfu5-K2_B43VcuA,6489
35
+ cobweb_launcher-1.2.3.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
36
+ cobweb_launcher-1.2.3.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
37
+ cobweb_launcher-1.2.3.dist-info/RECORD,,