cobweb-launcher 1.3.9__tar.gz → 1.3.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cobweb-launcher-1.3.9/cobweb_launcher.egg-info → cobweb-launcher-1.3.10}/PKG-INFO +1 -1
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/__init__.py +1 -1
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/launchers/launcher.py +23 -24
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/schedulers/scheduler_api.py +4 -3
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/schedulers/scheduler_redis.py +4 -3
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/setup.py +1 -1
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/LICENSE +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/README.md +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/basic.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/dotting.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/crawlers/crawler.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/db/api_db.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/launchers/launcher_air.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/launchers/launcher_api.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/launchers/launcher_pro.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/schedulers/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/setting.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/setup.cfg +0 -0
- {cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/test/test.py +0 -0
@@ -59,7 +59,7 @@ class TaskQueue:
|
|
59
59
|
else:
|
60
60
|
raise TypeError(f"{crawler_func.__name__} function return type isn't supported")
|
61
61
|
TaskQueue.DOT.build(
|
62
|
-
topic=f"{os.getenv('PROJECT')}:{os.getenv('TASK')}",
|
62
|
+
topic=f"{os.getenv('PROJECT')}:{os.getenv('TASK')}:{tk.__class__.__name__}",
|
63
63
|
cost_time=round(time.time() - start_time, 2),
|
64
64
|
**tk.to_dict
|
65
65
|
)
|
@@ -144,30 +144,30 @@ class Launcher(threading.Thread):
|
|
144
144
|
|
145
145
|
@Decorators.stop
|
146
146
|
def _polling(self):
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
else:
|
158
|
-
logger.info("pause! waiting for resume...")
|
159
|
-
elif self.check_emtpy_times > 2:
|
160
|
-
logger.info("pause! waiting for resume...")
|
161
|
-
self.doing_seeds = {}
|
162
|
-
self._task_info['todo'] = {}
|
163
|
-
self._task_info['download'] = {}
|
164
|
-
self.pause.set()
|
147
|
+
time.sleep(10)
|
148
|
+
if self.pause.is_set():
|
149
|
+
run_time = int(time.time()) - self.app_time
|
150
|
+
if not self.task_model and run_time > self.before_scheduler_wait_seconds:
|
151
|
+
logger.info("Done! ready to close thread...")
|
152
|
+
self.stop.set()
|
153
|
+
elif TaskQueue.TODO.length or TaskQueue.DOWNLOAD.length:
|
154
|
+
logger.info(f"Recovery {self.task} task run!")
|
155
|
+
self.check_emtpy_times = 0
|
156
|
+
self.pause.clear()
|
165
157
|
else:
|
166
|
-
logger.info(
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
158
|
+
logger.info("pause! waiting for resume...")
|
159
|
+
elif TaskQueue.is_empty() and self.check_emtpy_times > 2:
|
160
|
+
logger.info("pause! waiting for resume...")
|
161
|
+
self.doing_seeds = {}
|
162
|
+
self._task_info['todo'] = {}
|
163
|
+
self._task_info['download'] = {}
|
164
|
+
self.pause.set()
|
165
|
+
elif TaskQueue.is_empty():
|
166
|
+
logger.info(
|
167
|
+
"check whether the task is complete, "
|
168
|
+
f"reset times {3 - self.check_emtpy_times}"
|
169
|
+
)
|
170
|
+
self.check_emtpy_times += 1
|
171
171
|
else:
|
172
172
|
logger.info(LogTemplate.launcher_polling.format(
|
173
173
|
task=self.task,
|
@@ -182,7 +182,6 @@ class Launcher(threading.Thread):
|
|
182
182
|
seed_queue_len=TaskQueue.SEED.length,
|
183
183
|
download_queue_len=TaskQueue.DOWNLOAD.length
|
184
184
|
))
|
185
|
-
time.sleep(10)
|
186
185
|
|
187
186
|
def run(self):
|
188
187
|
Crawler = dynamic_load_class(self.crawler_path)
|
@@ -33,7 +33,7 @@ class ApiScheduler:
|
|
33
33
|
members = self._db.members(key, source, count=count, _min=0, _max="(1000")
|
34
34
|
for member, priority in members:
|
35
35
|
# seed = Seed(member, priority=priority)
|
36
|
-
yield member
|
36
|
+
yield member, priority
|
37
37
|
|
38
38
|
def insert(self, key, items):
|
39
39
|
if items:
|
@@ -48,7 +48,8 @@ class ApiScheduler:
|
|
48
48
|
_min = reset_time - int(time.time()) if self.heartbeat else "-inf"
|
49
49
|
|
50
50
|
for key in keys:
|
51
|
-
self._db.
|
51
|
+
if self._db.exists(key):
|
52
|
+
self._db.members(key, 0, _min=_min, _max="(0")
|
52
53
|
|
53
54
|
if not self.heartbeat:
|
54
55
|
self.working.set()
|
@@ -58,7 +59,7 @@ class ApiScheduler:
|
|
58
59
|
|
59
60
|
def refresh(self, key, items: dict[str, int]):
|
60
61
|
refresh_time = int(time.time())
|
61
|
-
its = {k: -refresh_time - v / 1000 for k, v in items}
|
62
|
+
its = {k: -refresh_time - v / 1000 for k, v in items.items()}
|
62
63
|
if its:
|
63
64
|
self._db.zadd(key, item=its, xx=True)
|
64
65
|
|
@@ -33,7 +33,7 @@ class RedisScheduler:
|
|
33
33
|
members = self._db.members(key, source, count=count, _min=0, _max="(1000")
|
34
34
|
for member, priority in members:
|
35
35
|
# seed = Seed(member, priority=priority)
|
36
|
-
yield member
|
36
|
+
yield member, priority
|
37
37
|
|
38
38
|
def insert(self, key, items):
|
39
39
|
if items:
|
@@ -48,7 +48,8 @@ class RedisScheduler:
|
|
48
48
|
_min = reset_time - int(time.time()) if self.heartbeat else "-inf"
|
49
49
|
|
50
50
|
for key in keys:
|
51
|
-
self._db.
|
51
|
+
if self._db.exists(key):
|
52
|
+
self._db.members(key, 0, _min=_min, _max="(0")
|
52
53
|
|
53
54
|
if not self.heartbeat:
|
54
55
|
self.working.set()
|
@@ -58,7 +59,7 @@ class RedisScheduler:
|
|
58
59
|
|
59
60
|
def refresh(self, key, items: dict[str, int]):
|
60
61
|
refresh_time = int(time.time())
|
61
|
-
its = {k: -refresh_time - v / 1000 for k, v in items}
|
62
|
+
its = {k: -refresh_time - v / 1000 for k, v in items.items()}
|
62
63
|
if its:
|
63
64
|
self._db.zadd(key, item=its, xx=True)
|
64
65
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cobweb-launcher-1.3.9 → cobweb-launcher-1.3.10}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|