cobweb-launcher 0.0.18__tar.gz → 0.0.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/PKG-INFO +1 -1
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/redis_db.py +9 -2
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/distributed/launcher.py +9 -26
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/distributed/models.py +1 -1
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/PKG-INFO +1 -1
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/setup.py +1 -1
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/LICENSE +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/README.md +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/__init__.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/bbb.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/oss_db.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/scheduler/__init__.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/scheduler/default.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/scheduler/textfile.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/__init__.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/console.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/loghub.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/redis.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/textfile.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/decorators.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/distributed/__init__.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/interface.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/log.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/single/__init__.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/single/models.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/single/nest.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/task.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/utils.py +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/setup.cfg +0 -0
@@ -8,6 +8,7 @@ class RedisDB:
|
|
8
8
|
|
9
9
|
def __init__(
|
10
10
|
self,
|
11
|
+
model: int,
|
11
12
|
project: str,
|
12
13
|
task_name: str,
|
13
14
|
config: dict
|
@@ -22,6 +23,7 @@ class RedisDB:
|
|
22
23
|
self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
|
23
24
|
self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
|
24
25
|
self.client = redis.Redis(connection_pool=pool)
|
26
|
+
self.model = model
|
25
27
|
|
26
28
|
@check_redis_status
|
27
29
|
def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
|
@@ -86,8 +88,13 @@ class RedisDB:
|
|
86
88
|
seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
|
87
89
|
|
88
90
|
if seeds:
|
89
|
-
redis_key = self.succeed_key if spider_status else self.failed_key
|
90
|
-
|
91
|
+
# redis_key = self.succeed_key if spider_status else self.failed_key
|
92
|
+
redis_key = None
|
93
|
+
if isinstance(self.model, int):
|
94
|
+
if self.model == 2 and spider_status:
|
95
|
+
redis_key = self.succeed_key
|
96
|
+
if redis_key:
|
97
|
+
self.client.sadd(redis_key, *(str(seed) for seed in seeds))
|
91
98
|
self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
|
92
99
|
|
93
100
|
@check_redis_status
|
@@ -6,29 +6,7 @@ from importlib import import_module
|
|
6
6
|
from cobweb import log, Queue, DBItem, RedisDB, OssDB, StorerInterface
|
7
7
|
from cobweb.utils import struct_queue_name, restore_table_name
|
8
8
|
from .models import Scheduler, Spider, Storer
|
9
|
-
|
10
|
-
|
11
|
-
# def start_seeds(seeds):
|
12
|
-
# if not seeds:
|
13
|
-
# return None
|
14
|
-
# if any(isinstance(seeds, t) for t in (list, tuple)):
|
15
|
-
# return [Seed(seed) for seed in seeds]
|
16
|
-
# elif any(isinstance(seeds, t) for t in (str, dict)):
|
17
|
-
# return Seed(seeds)
|
18
|
-
|
19
|
-
|
20
|
-
# def parse_storer_info(storer_info):
|
21
|
-
# storer_data = {}
|
22
|
-
# storer_info_list = []
|
23
|
-
# if storer_info.__class__.__name__ == 'StorerInfo':
|
24
|
-
# storer_info_list.append(storer_info)
|
25
|
-
# elif any(isinstance(storer_info, t) for t in (list, tuple)):
|
26
|
-
# storer_info_list = storer_info
|
27
|
-
# for info in storer_info_list:
|
28
|
-
# db_name = info.DB.__name__
|
29
|
-
# storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
|
30
|
-
# storer_data[db_name]["db_args_list"].append(info[1:])
|
31
|
-
# return storer_data
|
9
|
+
|
32
10
|
|
33
11
|
def get_scheduler_db(db):
|
34
12
|
if isinstance(db, str):
|
@@ -87,7 +65,10 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
87
65
|
redis_spider_seed_length = spider_queue_length()
|
88
66
|
memory_seed_queue_length = scheduler.queue.length
|
89
67
|
storer_upload_queue_list = []
|
90
|
-
|
68
|
+
for storer in storer_list:
|
69
|
+
storer_upload_queue_list.append(
|
70
|
+
f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
|
71
|
+
)
|
91
72
|
if (
|
92
73
|
scheduler.stop and
|
93
74
|
not redis_ready_seed_length and
|
@@ -99,6 +80,7 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
99
80
|
last.set()
|
100
81
|
time.sleep(3)
|
101
82
|
storer_queue_empty = True
|
83
|
+
storer_upload_queue_list = []
|
102
84
|
for storer in storer_list:
|
103
85
|
if storer.queue.length:
|
104
86
|
storer_queue_empty = False
|
@@ -116,7 +98,8 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
116
98
|
|
117
99
|
last.clear()
|
118
100
|
|
119
|
-
storer_upload_queue_length_info = "\n ".join(
|
101
|
+
storer_upload_queue_length_info = "\n ".join(
|
102
|
+
storer_upload_queue_list) if storer_upload_queue_list else "None"
|
120
103
|
log.info(spider_info.format(
|
121
104
|
status,
|
122
105
|
running_spider_thread_num,
|
@@ -158,7 +141,7 @@ def launcher(task):
|
|
158
141
|
stop = threading.Event()
|
159
142
|
|
160
143
|
# 初始化redis信息
|
161
|
-
redis_db = RedisDB(task.project, task.task_name, task.redis_info)
|
144
|
+
redis_db = RedisDB(task.model, task.project, task.task_name, task.redis_info)
|
162
145
|
|
163
146
|
log.info("初始化cobweb!")
|
164
147
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|