cobweb-launcher 1.2.1__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- {cobweb-launcher-1.2.1/cobweb_launcher.egg-info → cobweb-launcher-1.2.3}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/launchers/launcher.py +2 -2
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/launchers/launcher_pro.py +23 -38
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/setting.py +5 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/setup.py +1 -1
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/LICENSE +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/README.md +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/crawlers/crawler.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/launchers/launcher_air.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/setup.cfg +0 -0
|
@@ -65,6 +65,7 @@ class Launcher(threading.Thread):
|
|
|
65
65
|
self._Crawler = dynamic_load_class(setting.CRAWLER)
|
|
66
66
|
self._Pipeline = dynamic_load_class(setting.PIPELINE)
|
|
67
67
|
|
|
68
|
+
self._before_scheduler_wait_seconds = setting.BEFORE_SCHEDULER_WAIT_SECONDS
|
|
68
69
|
self._scheduler_wait_seconds = setting.SCHEDULER_WAIT_SECONDS
|
|
69
70
|
self._todo_queue_full_wait_seconds = setting.TODO_QUEUE_FULL_WAIT_SECONDS
|
|
70
71
|
self._new_queue_wait_seconds = setting.NEW_QUEUE_WAIT_SECONDS
|
|
@@ -83,7 +84,6 @@ class Launcher(threading.Thread):
|
|
|
83
84
|
self._done_model = setting.DONE_MODEL
|
|
84
85
|
self._task_model = setting.TASK_MODEL
|
|
85
86
|
|
|
86
|
-
# self._upload_queue = Queue()
|
|
87
87
|
|
|
88
88
|
@property
|
|
89
89
|
def start_seeds(self):
|
|
@@ -141,7 +141,7 @@ class Launcher(threading.Thread):
|
|
|
141
141
|
def _execute(self):
|
|
142
142
|
for func_name in self.__LAUNCHER_FUNC__:
|
|
143
143
|
threading.Thread(name=func_name, target=getattr(self, func_name)).start()
|
|
144
|
-
time.sleep(
|
|
144
|
+
time.sleep(1)
|
|
145
145
|
|
|
146
146
|
def run(self):
|
|
147
147
|
threading.Thread(target=self._execute_heartbeat).start()
|
|
@@ -16,6 +16,8 @@ class LauncherPro(Launcher):
|
|
|
16
16
|
self._fail_key = "{%s:%s}:fail" % (project, task)
|
|
17
17
|
self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
|
|
18
18
|
self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
|
|
19
|
+
self._statistics_done_key = "{%s:%s}:statistics:done" % (project, task)
|
|
20
|
+
self._statistics_fail_key = "{%s:%s}:statistics:fail" % (project, task)
|
|
19
21
|
self._db = RedisDB()
|
|
20
22
|
|
|
21
23
|
self._heartbeat_start_event = threading.Event()
|
|
@@ -25,6 +27,12 @@ class LauncherPro(Launcher):
|
|
|
25
27
|
def heartbeat(self):
|
|
26
28
|
return self._db.exists(self._heartbeat_key)
|
|
27
29
|
|
|
30
|
+
def statistics(self, key, count, expire_time):
|
|
31
|
+
if self._db.exists(key):
|
|
32
|
+
self._db._client.incrby(key, count)
|
|
33
|
+
else:
|
|
34
|
+
self._db.setex(key, expire_time, int(count))
|
|
35
|
+
|
|
28
36
|
def _execute_heartbeat(self):
|
|
29
37
|
while not self._stop.is_set():
|
|
30
38
|
if self._heartbeat_start_event.is_set():
|
|
@@ -124,19 +132,23 @@ class LauncherPro(Launcher):
|
|
|
124
132
|
else:
|
|
125
133
|
seeds.append(seed.to_string)
|
|
126
134
|
if seeds:
|
|
127
|
-
self._db.zrem(self._todo_key, *seeds)
|
|
135
|
+
count = self._db.zrem(self._todo_key, *seeds)
|
|
136
|
+
self.statistics(self._statistics_done_key, count, 86400 * 30)
|
|
128
137
|
self._remove_doing_seeds(seeds)
|
|
129
138
|
if s_seeds:
|
|
130
|
-
self._db.done([self._todo_key, self._done_key], *s_seeds)
|
|
139
|
+
count = self._db.done([self._todo_key, self._done_key], *s_seeds)
|
|
140
|
+
self.statistics(self._statistics_done_key, count, 86400 * 30)
|
|
131
141
|
self._remove_doing_seeds(s_seeds)
|
|
132
142
|
if f_seeds:
|
|
133
|
-
self._db.done([self._todo_key, self._fail_key], *f_seeds)
|
|
143
|
+
count = self._db.done([self._todo_key, self._fail_key], *f_seeds)
|
|
144
|
+
self.statistics(self._statistics_fail_key, count, 86400 * 30)
|
|
134
145
|
self._remove_doing_seeds(f_seeds)
|
|
135
146
|
|
|
136
147
|
if status:
|
|
137
148
|
time.sleep(self._done_queue_wait_seconds)
|
|
138
149
|
|
|
139
150
|
def _polling(self):
|
|
151
|
+
wait_scheduler_execute = True
|
|
140
152
|
check_emtpy_times = 0
|
|
141
153
|
while not self._stop.is_set():
|
|
142
154
|
queue_not_empty_count = 0
|
|
@@ -145,14 +157,19 @@ class LauncherPro(Launcher):
|
|
|
145
157
|
for q in self.__LAUNCHER_QUEUE__.values():
|
|
146
158
|
if q.length != 0:
|
|
147
159
|
queue_not_empty_count += 1
|
|
160
|
+
wait_scheduler_execute = False
|
|
148
161
|
|
|
149
162
|
if queue_not_empty_count == 0:
|
|
150
163
|
pooling_wait_seconds = 3
|
|
151
164
|
if self._pause.is_set():
|
|
152
165
|
check_emtpy_times = 0
|
|
153
|
-
if not self._task_model
|
|
166
|
+
if not self._task_model and (
|
|
167
|
+
not wait_scheduler_execute or
|
|
168
|
+
int(time.time()) - self._app_time > self._before_scheduler_wait_seconds
|
|
169
|
+
):
|
|
154
170
|
logger.info("Done! ready to close thread...")
|
|
155
171
|
self._stop.set()
|
|
172
|
+
|
|
156
173
|
elif self._db.zcount(self._todo_key, _min=0, _max="(1000"):
|
|
157
174
|
logger.info(f"Recovery {self.task} task run!")
|
|
158
175
|
self._pause.clear()
|
|
@@ -161,16 +178,14 @@ class LauncherPro(Launcher):
|
|
|
161
178
|
logger.info("pause! waiting for resume...")
|
|
162
179
|
elif check_emtpy_times > 2:
|
|
163
180
|
self.__DOING__ = {}
|
|
164
|
-
self.
|
|
181
|
+
if not self._db.zcount(self._todo_key, _min="-inf", _max="(1000"):
|
|
182
|
+
self._pause.set()
|
|
165
183
|
else:
|
|
166
184
|
logger.info(
|
|
167
185
|
"check whether the task is complete, "
|
|
168
186
|
f"reset times {3 - check_emtpy_times}"
|
|
169
187
|
)
|
|
170
188
|
check_emtpy_times += 1
|
|
171
|
-
# elif self._pause.is_set():
|
|
172
|
-
# self._pause.clear()
|
|
173
|
-
# self._execute()
|
|
174
189
|
else:
|
|
175
190
|
logger.info(LogTemplate.launcher_pro_polling.format(
|
|
176
191
|
task=self.task,
|
|
@@ -184,36 +199,6 @@ class LauncherPro(Launcher):
|
|
|
184
199
|
))
|
|
185
200
|
|
|
186
201
|
time.sleep(pooling_wait_seconds)
|
|
187
|
-
# if self._pause.is_set():
|
|
188
|
-
# self._pause.clear()
|
|
189
|
-
# self._execute()
|
|
190
|
-
#
|
|
191
|
-
# elif queue_not_empty_count == 0:
|
|
192
|
-
# pooling_wait_seconds = 5
|
|
193
|
-
# check_emtpy_times += 1
|
|
194
|
-
# else:
|
|
195
|
-
# check_emtpy_times = 0
|
|
196
|
-
#
|
|
197
|
-
# if not self._db.zcount(self._todo, _min=0, _max="(1000") and check_emtpy_times > 2:
|
|
198
|
-
# check_emtpy_times = 0
|
|
199
|
-
# self.__DOING__ = {}
|
|
200
|
-
# self._pause.set()
|
|
201
|
-
#
|
|
202
|
-
# time.sleep(pooling_wait_seconds)
|
|
203
|
-
#
|
|
204
|
-
# if not self._pause.is_set():
|
|
205
|
-
# logger.info(LogTemplate.launcher_pro_polling.format(
|
|
206
|
-
# task=self.task,
|
|
207
|
-
# doing_len=len(self.__DOING__.keys()),
|
|
208
|
-
# todo_len=self.__LAUNCHER_QUEUE__['todo'].length,
|
|
209
|
-
# done_len=self.__LAUNCHER_QUEUE__['done'].length,
|
|
210
|
-
# redis_seed_count=self._db.zcount(self._todo, "-inf", "+inf"),
|
|
211
|
-
# redis_todo_len=self._db.zcount(self._todo, 0, "(1000"),
|
|
212
|
-
# redis_doing_len=self._db.zcount(self._todo, "-inf", "(0"),
|
|
213
|
-
# upload_len=self.__LAUNCHER_QUEUE__['upload'].length,
|
|
214
|
-
# ))
|
|
215
|
-
# elif not self._task_model:
|
|
216
|
-
# self._stop.set()
|
|
217
202
|
|
|
218
203
|
logger.info("Done! Ready to close thread...")
|
|
219
204
|
|
|
@@ -26,6 +26,9 @@ OSS_SECRET_KEY = os.getenv("OSS_SECRET_KEY")
|
|
|
26
26
|
OSS_CHUNK_SIZE = 10 * 1024 ** 2
|
|
27
27
|
OSS_MIN_UPLOAD_SIZE = 1024
|
|
28
28
|
|
|
29
|
+
# message
|
|
30
|
+
MESSAGE = ""
|
|
31
|
+
|
|
29
32
|
|
|
30
33
|
# 采集器选择
|
|
31
34
|
CRAWLER = "cobweb.crawlers.Crawler"
|
|
@@ -35,6 +38,8 @@ PIPELINE = "cobweb.pipelines.pipeline_console.Console"
|
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
# Launcher 等待时间
|
|
41
|
+
|
|
42
|
+
BEFORE_SCHEDULER_WAIT_SECONDS = 60 # 调度前等待时间,只作用于单次任务
|
|
38
43
|
SCHEDULER_WAIT_SECONDS = 15 # 调度等待时间
|
|
39
44
|
TODO_QUEUE_FULL_WAIT_SECONDS = 5 # todo队列已满时等待时间
|
|
40
45
|
NEW_QUEUE_WAIT_SECONDS = 30 # new队列等待时间
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cobweb-launcher-1.2.1 → cobweb-launcher-1.2.3}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|