cobweb-launcher 0.0.15__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/distributed/launcher.py +27 -17
- cobweb/task.py +3 -0
- {cobweb_launcher-0.0.15.dist-info → cobweb_launcher-0.0.16.dist-info}/METADATA +1 -1
- {cobweb_launcher-0.0.15.dist-info → cobweb_launcher-0.0.16.dist-info}/RECORD +7 -7
- {cobweb_launcher-0.0.15.dist-info → cobweb_launcher-0.0.16.dist-info}/LICENSE +0 -0
- {cobweb_launcher-0.0.15.dist-info → cobweb_launcher-0.0.16.dist-info}/WHEEL +0 -0
- {cobweb_launcher-0.0.15.dist-info → cobweb_launcher-0.0.16.dist-info}/top_level.txt +0 -0
cobweb/distributed/launcher.py
CHANGED
@@ -68,10 +68,11 @@ def get_storer_db(db):
|
|
68
68
|
raise TypeError()
|
69
69
|
|
70
70
|
|
71
|
-
def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
|
71
|
+
def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
|
72
|
+
log.info("run check thread after 30 seconds...")
|
72
73
|
time.sleep(30)
|
73
74
|
spider_info = """
|
74
|
-
|
75
|
+
------------------- check: %s ------------------
|
75
76
|
running_spider_thread_num: {0}
|
76
77
|
redis_ready_seed_length: {1}
|
77
78
|
redis_spider_seed_length: {2}
|
@@ -80,20 +81,12 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
|
|
80
81
|
{4}
|
81
82
|
----------------------- end -----------------------"""
|
82
83
|
while True:
|
83
|
-
|
84
|
+
status = "running"
|
84
85
|
running_spider_thread_num = spider.spider_in_progress.length
|
85
86
|
redis_ready_seed_length = ready_seed_length()
|
86
87
|
redis_spider_seed_length = spider_queue_length()
|
87
88
|
memory_seed_queue_length = scheduler.queue.length
|
88
89
|
storer_upload_queue_list = []
|
89
|
-
storer_queue_empty = True
|
90
|
-
for storer in storer_list:
|
91
|
-
if storer.queue.length:
|
92
|
-
storer_queue_empty = False
|
93
|
-
storer_upload_queue_list.append(
|
94
|
-
f"{storer.__class__.__name__}: {storer.queue.length}"
|
95
|
-
)
|
96
|
-
storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
|
97
90
|
|
98
91
|
if (
|
99
92
|
scheduler.stop and
|
@@ -101,14 +94,31 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
|
|
101
94
|
not memory_seed_queue_length and
|
102
95
|
not running_spider_thread_num
|
103
96
|
):
|
104
|
-
|
97
|
+
if not model:
|
98
|
+
log.info("spider is done?")
|
105
99
|
last.set()
|
100
|
+
time.sleep(3)
|
101
|
+
storer_queue_empty = True
|
102
|
+
for storer in storer_list:
|
103
|
+
if storer.queue.length:
|
104
|
+
storer_queue_empty = False
|
105
|
+
storer_upload_queue_list.append(
|
106
|
+
f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
|
107
|
+
)
|
106
108
|
if storer_queue_empty and not redis_spider_seed_length:
|
107
|
-
|
108
|
-
|
109
|
-
|
109
|
+
if model:
|
110
|
+
log.info("waiting for push seeds...")
|
111
|
+
status = "waiting"
|
112
|
+
time.sleep(30)
|
113
|
+
else:
|
114
|
+
log.info("spider done!")
|
115
|
+
break
|
116
|
+
|
117
|
+
last.clear()
|
110
118
|
|
119
|
+
storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
|
111
120
|
log.info(spider_info.format(
|
121
|
+
status,
|
112
122
|
running_spider_thread_num,
|
113
123
|
redis_ready_seed_length,
|
114
124
|
redis_spider_seed_length,
|
@@ -182,7 +192,7 @@ def launcher(task):
|
|
182
192
|
storer_info_list = [storer_info_list]
|
183
193
|
|
184
194
|
# new item
|
185
|
-
item = type("Item", (object,), {"redis_client": redis_db})()
|
195
|
+
item = type("Item", (object,), {"redis_client": redis_db.client})()
|
186
196
|
|
187
197
|
if task.oss_config:
|
188
198
|
item.oss = OssDB(**task.oss_config)
|
@@ -289,7 +299,7 @@ def launcher(task):
|
|
289
299
|
# name="check_spider",
|
290
300
|
target=check,
|
291
301
|
args=(
|
292
|
-
stop, last, spider,
|
302
|
+
task.model, stop, last, spider,
|
293
303
|
scheduler, storer_list,
|
294
304
|
redis_db.ready_seed_length,
|
295
305
|
redis_db.spider_queue_length,
|
cobweb/task.py
CHANGED
@@ -5,6 +5,7 @@ class Task:
|
|
5
5
|
|
6
6
|
def __init__(
|
7
7
|
self,
|
8
|
+
model=None,
|
8
9
|
seeds=None,
|
9
10
|
project=None,
|
10
11
|
task_name=None,
|
@@ -30,6 +31,8 @@ class Task:
|
|
30
31
|
:param storer_queue_length:
|
31
32
|
:param scheduler_queue_length:
|
32
33
|
"""
|
34
|
+
self.model = model
|
35
|
+
|
33
36
|
self.seeds = struct_start_seeds(seeds)
|
34
37
|
self.project = project or "test"
|
35
38
|
self.task_name = task_name or "spider"
|
@@ -3,7 +3,7 @@ cobweb/bbb.py,sha256=Sckof5zgzWEz2tIjs6xxoLkrL7wUdogPw3AetlXIDUo,5684
|
|
3
3
|
cobweb/decorators.py,sha256=8KPSKL8xsiXOLv-kckkaDtK8LXM8d5gaRriGpuEgOQk,320
|
4
4
|
cobweb/interface.py,sha256=um_k2AAQl1HTOvfUlq914DjkpfZVwt2m1B65EpPKrmE,802
|
5
5
|
cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
|
6
|
-
cobweb/task.py,sha256=
|
6
|
+
cobweb/task.py,sha256=0zPomzJOm374cpds0qHMZwX0y2qK_Gi_WnmOVcWPIEM,1401
|
7
7
|
cobweb/utils.py,sha256=3iQRn2s4fY-5ClrVo46RO9OhziRAyOn5hGJOQGjMMwQ,2176
|
8
8
|
cobweb/db/__init__.py,sha256=4m9lqmxZCRbaih3Z3rl_BT0GugMd0dkOIgu_P9aeC84,63
|
9
9
|
cobweb/db/oss_db.py,sha256=pPpWPeOGMVz9X5aqD3c_9zMwTvs6dncE9JgBCmKc3v0,4150
|
@@ -17,13 +17,13 @@ cobweb/db/storer/loghub.py,sha256=4VqZacXWhidzINHXQu2_-E0HOBRCcc86f6LkKfnXD5I,17
|
|
17
17
|
cobweb/db/storer/redis.py,sha256=7Q2XEQwBL6X_M1uvxzzuSBt6iw9piKw-_FWKm2INZDQ,412
|
18
18
|
cobweb/db/storer/textfile.py,sha256=3mDHMvF6Sh5fn3IHzWQxyTUd45V-zUoH8vY3EoRlMx0,415
|
19
19
|
cobweb/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
cobweb/distributed/launcher.py,sha256=
|
20
|
+
cobweb/distributed/launcher.py,sha256=32iFDWo59bzUok2jGuGvCe03J5PdwsF2wGpJLXBYFSw,10687
|
21
21
|
cobweb/distributed/models.py,sha256=wnqumVPEX6ENVNcvqo3WvALyb_pXVd1BdT0QS0xpzbQ,4412
|
22
22
|
cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
cobweb/single/models.py,sha256=lu8teNWnWcUwZFra8XmqyhzOAf3UyuEztwBr1Ne6pUs,2898
|
24
24
|
cobweb/single/nest.py,sha256=mL8q9a5BjtoeUyzXCIVw_vyUsNY8ltbvQpYIIpZEDFU,5012
|
25
|
-
cobweb_launcher-0.0.
|
26
|
-
cobweb_launcher-0.0.
|
27
|
-
cobweb_launcher-0.0.
|
28
|
-
cobweb_launcher-0.0.
|
29
|
-
cobweb_launcher-0.0.
|
25
|
+
cobweb_launcher-0.0.16.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
26
|
+
cobweb_launcher-0.0.16.dist-info/METADATA,sha256=ad7tP97rdRX3pVo6zNm6JXPSrCL1399uLnuZRGeTdvI,1226
|
27
|
+
cobweb_launcher-0.0.16.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
28
|
+
cobweb_launcher-0.0.16.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
29
|
+
cobweb_launcher-0.0.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|