cobweb-launcher 1.2.5__tar.gz → 1.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- {cobweb-launcher-1.2.5/cobweb_launcher.egg-info → cobweb-launcher-1.2.7}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/crawlers/crawler.py +1 -1
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/launchers/launcher.py +4 -3
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/launchers/launcher_pro.py +7 -11
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/setting.py +2 -5
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/setup.py +1 -1
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/LICENSE +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/README.md +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/launchers/launcher_air.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/setup.cfg +0 -0
|
@@ -4,7 +4,7 @@ import threading
|
|
|
4
4
|
import importlib
|
|
5
5
|
|
|
6
6
|
from cobweb import setting
|
|
7
|
-
from cobweb.base import Seed, Queue
|
|
7
|
+
from cobweb.base import Seed, Queue, logger
|
|
8
8
|
from cobweb.utils.tools import dynamic_load_class
|
|
9
9
|
|
|
10
10
|
|
|
@@ -79,8 +79,8 @@ class Launcher(threading.Thread):
|
|
|
79
79
|
self._done_queue_max_size = setting.DONE_QUEUE_MAX_SIZE
|
|
80
80
|
self._upload_queue_max_size = setting.UPLOAD_QUEUE_MAX_SIZE
|
|
81
81
|
|
|
82
|
-
self.
|
|
83
|
-
self.
|
|
82
|
+
self._spider_max_retries = setting.SPIDER_MAX_RETRIES
|
|
83
|
+
self._spider_thread_num = setting.SPIDER_THREAD_NUM
|
|
84
84
|
|
|
85
85
|
self._done_model = setting.DONE_MODEL
|
|
86
86
|
self._task_model = setting.TASK_MODEL
|
|
@@ -138,6 +138,7 @@ class Launcher(threading.Thread):
|
|
|
138
138
|
def _remove_doing_seeds(self, seeds):
|
|
139
139
|
for seed in seeds:
|
|
140
140
|
self.__DOING__.pop(seed, None)
|
|
141
|
+
logger.info("remove %s seeds from __DOING__" % len(seeds))
|
|
141
142
|
|
|
142
143
|
def _execute(self):
|
|
143
144
|
for func_name in self.__LAUNCHER_FUNC__:
|
|
@@ -36,31 +36,27 @@ class LauncherPro(Launcher):
|
|
|
36
36
|
def _execute_heartbeat(self):
|
|
37
37
|
while not self._stop.is_set():
|
|
38
38
|
if self._heartbeat_start_event.is_set():
|
|
39
|
-
self._db.setex(self._heartbeat_key,
|
|
40
|
-
time.sleep(
|
|
39
|
+
self._db.setex(self._heartbeat_key, 5)
|
|
40
|
+
time.sleep(3)
|
|
41
41
|
|
|
42
42
|
def _reset(self):
|
|
43
43
|
"""
|
|
44
44
|
检查过期种子,重新添加到redis缓存中
|
|
45
45
|
"""
|
|
46
|
-
init = True
|
|
47
46
|
while not self._pause.is_set():
|
|
48
47
|
reset_wait_seconds = 30
|
|
49
|
-
start_reset_time = int(time.time())
|
|
50
48
|
if self._db.lock(self._reset_lock_key, t=120):
|
|
51
|
-
if not self.heartbeat:
|
|
52
|
-
self._heartbeat_start_event.set()
|
|
53
49
|
|
|
54
50
|
_min = -int(time.time()) + self._seed_reset_seconds \
|
|
55
|
-
if self.heartbeat
|
|
51
|
+
if self.heartbeat else "-inf"
|
|
56
52
|
|
|
57
53
|
self._db.members(self._todo_key, 0, _min=_min, _max="(0")
|
|
58
54
|
self._db.delete(self._reset_lock_key)
|
|
59
55
|
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
if not self.heartbeat:
|
|
57
|
+
self._heartbeat_start_event.set()
|
|
58
|
+
|
|
62
59
|
time.sleep(reset_wait_seconds)
|
|
63
|
-
init = False
|
|
64
60
|
|
|
65
61
|
def _scheduler(self):
|
|
66
62
|
"""
|
|
@@ -111,7 +107,7 @@ class LauncherPro(Launcher):
|
|
|
111
107
|
refresh_time = int(time.time())
|
|
112
108
|
seeds = {k:-refresh_time - v / 1000 for k, v in self.__DOING__.items()}
|
|
113
109
|
self._db.zadd(self._todo_key, item=seeds, xx=True)
|
|
114
|
-
time.sleep(
|
|
110
|
+
time.sleep(15)
|
|
115
111
|
|
|
116
112
|
def _delete(self):
|
|
117
113
|
"""
|
|
@@ -26,9 +26,6 @@ OSS_SECRET_KEY = os.getenv("OSS_SECRET_KEY")
|
|
|
26
26
|
OSS_CHUNK_SIZE = 10 * 1024 ** 2
|
|
27
27
|
OSS_MIN_UPLOAD_SIZE = 1024
|
|
28
28
|
|
|
29
|
-
# message
|
|
30
|
-
MESSAGE = ""
|
|
31
|
-
|
|
32
29
|
|
|
33
30
|
# 采集器选择
|
|
34
31
|
CRAWLER = "cobweb.crawlers.Crawler"
|
|
@@ -43,9 +40,9 @@ BEFORE_SCHEDULER_WAIT_SECONDS = 60 # 调度前等待时间,只作用于单次
|
|
|
43
40
|
SCHEDULER_WAIT_SECONDS = 15 # 调度等待时间
|
|
44
41
|
TODO_QUEUE_FULL_WAIT_SECONDS = 5 # todo队列已满时等待时间
|
|
45
42
|
NEW_QUEUE_WAIT_SECONDS = 30 # new队列等待时间
|
|
46
|
-
DONE_QUEUE_WAIT_SECONDS =
|
|
43
|
+
DONE_QUEUE_WAIT_SECONDS = 5 # done队列等待时间
|
|
47
44
|
UPLOAD_QUEUE_WAIT_SECONDS = 15 # upload队列等待时间
|
|
48
|
-
SEED_RESET_SECONDS =
|
|
45
|
+
SEED_RESET_SECONDS = 30 # 种子重制时间
|
|
49
46
|
|
|
50
47
|
|
|
51
48
|
# Launcher 队列长度
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cobweb-launcher-1.2.5 → cobweb-launcher-1.2.7}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|