cobweb-launcher 0.0.14__tar.gz → 0.0.16__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/PKG-INFO +1 -1
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/bbb.py +6 -2
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/launcher.py +27 -17
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/task.py +3 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/PKG-INFO +1 -1
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/setup.py +1 -1
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/LICENSE +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/README.md +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/__init__.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/oss_db.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/__init__.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/default.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/textfile.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/__init__.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/console.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/loghub.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/redis.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/textfile.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/decorators.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/__init__.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/models.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/interface.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/log.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/__init__.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/models.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/nest.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/utils.py +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/setup.cfg +0 -0
@@ -114,12 +114,16 @@ class Seed:
|
|
114
114
|
return f'{self.__class__.__name__}({", ".join(chars)})'
|
115
115
|
|
116
116
|
@property
|
117
|
-
def
|
117
|
+
def dict_seed(self):
|
118
118
|
seed = self.__dict__.copy()
|
119
119
|
del seed["_priority"]
|
120
120
|
del seed["_version"]
|
121
121
|
del seed["_retry"]
|
122
|
-
return
|
122
|
+
return seed
|
123
|
+
|
124
|
+
@property
|
125
|
+
def format_seed(self):
|
126
|
+
return json.dumps(self.dict_seed, ensure_ascii=False)
|
123
127
|
|
124
128
|
|
125
129
|
class DBItem:
|
@@ -68,10 +68,11 @@ def get_storer_db(db):
|
|
68
68
|
raise TypeError()
|
69
69
|
|
70
70
|
|
71
|
-
def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
|
71
|
+
def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
|
72
|
+
log.info("run check thread after 30 seconds...")
|
72
73
|
time.sleep(30)
|
73
74
|
spider_info = """
|
74
|
-
|
75
|
+
------------------- check: %s ------------------
|
75
76
|
running_spider_thread_num: {0}
|
76
77
|
redis_ready_seed_length: {1}
|
77
78
|
redis_spider_seed_length: {2}
|
@@ -80,20 +81,12 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
|
|
80
81
|
{4}
|
81
82
|
----------------------- end -----------------------"""
|
82
83
|
while True:
|
83
|
-
|
84
|
+
status = "running"
|
84
85
|
running_spider_thread_num = spider.spider_in_progress.length
|
85
86
|
redis_ready_seed_length = ready_seed_length()
|
86
87
|
redis_spider_seed_length = spider_queue_length()
|
87
88
|
memory_seed_queue_length = scheduler.queue.length
|
88
89
|
storer_upload_queue_list = []
|
89
|
-
storer_queue_empty = True
|
90
|
-
for storer in storer_list:
|
91
|
-
if storer.queue.length:
|
92
|
-
storer_queue_empty = False
|
93
|
-
storer_upload_queue_list.append(
|
94
|
-
f"{storer.__class__.__name__}: {storer.queue.length}"
|
95
|
-
)
|
96
|
-
storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
|
97
90
|
|
98
91
|
if (
|
99
92
|
scheduler.stop and
|
@@ -101,14 +94,31 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
|
|
101
94
|
not memory_seed_queue_length and
|
102
95
|
not running_spider_thread_num
|
103
96
|
):
|
104
|
-
|
97
|
+
if not model:
|
98
|
+
log.info("spider is done?")
|
105
99
|
last.set()
|
100
|
+
time.sleep(3)
|
101
|
+
storer_queue_empty = True
|
102
|
+
for storer in storer_list:
|
103
|
+
if storer.queue.length:
|
104
|
+
storer_queue_empty = False
|
105
|
+
storer_upload_queue_list.append(
|
106
|
+
f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
|
107
|
+
)
|
106
108
|
if storer_queue_empty and not redis_spider_seed_length:
|
107
|
-
|
108
|
-
|
109
|
-
|
109
|
+
if model:
|
110
|
+
log.info("waiting for push seeds...")
|
111
|
+
status = "waiting"
|
112
|
+
time.sleep(30)
|
113
|
+
else:
|
114
|
+
log.info("spider done!")
|
115
|
+
break
|
116
|
+
|
117
|
+
last.clear()
|
110
118
|
|
119
|
+
storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
|
111
120
|
log.info(spider_info.format(
|
121
|
+
status,
|
112
122
|
running_spider_thread_num,
|
113
123
|
redis_ready_seed_length,
|
114
124
|
redis_spider_seed_length,
|
@@ -182,7 +192,7 @@ def launcher(task):
|
|
182
192
|
storer_info_list = [storer_info_list]
|
183
193
|
|
184
194
|
# new item
|
185
|
-
item = type("Item", (object,), {"redis_client": redis_db})()
|
195
|
+
item = type("Item", (object,), {"redis_client": redis_db.client})()
|
186
196
|
|
187
197
|
if task.oss_config:
|
188
198
|
item.oss = OssDB(**task.oss_config)
|
@@ -289,7 +299,7 @@ def launcher(task):
|
|
289
299
|
# name="check_spider",
|
290
300
|
target=check,
|
291
301
|
args=(
|
292
|
-
stop, last, spider,
|
302
|
+
task.model, stop, last, spider,
|
293
303
|
scheduler, storer_list,
|
294
304
|
redis_db.ready_seed_length,
|
295
305
|
redis_db.spider_queue_length,
|
@@ -5,6 +5,7 @@ class Task:
|
|
5
5
|
|
6
6
|
def __init__(
|
7
7
|
self,
|
8
|
+
model=None,
|
8
9
|
seeds=None,
|
9
10
|
project=None,
|
10
11
|
task_name=None,
|
@@ -30,6 +31,8 @@ class Task:
|
|
30
31
|
:param storer_queue_length:
|
31
32
|
:param scheduler_queue_length:
|
32
33
|
"""
|
34
|
+
self.model = model
|
35
|
+
|
33
36
|
self.seeds = struct_start_seeds(seeds)
|
34
37
|
self.project = project or "test"
|
35
38
|
self.task_name = task_name or "spider"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|