cobweb-launcher 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- cobweb/db/oss_db.py +4 -2
- cobweb/db/redis_db.py +9 -2
- cobweb/distributed/launcher.py +8 -25
- cobweb/distributed/models.py +3 -1
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/METADATA +1 -1
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/RECORD +9 -9
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/LICENSE +0 -0
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/WHEEL +0 -0
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/top_level.txt +0 -0
cobweb/db/oss_db.py
CHANGED
@@ -37,8 +37,8 @@ class OssDB:
|
|
37
37
|
|
38
38
|
units = ["KB", "MB", "GB", "TB"]
|
39
39
|
for i in range(3):
|
40
|
-
num = length / 1024 ** (i + 1)
|
41
|
-
if num
|
40
|
+
num = length / (1024 ** (i + 1))
|
41
|
+
if num <= 1024:
|
42
42
|
return f"{round(num, 2)} {units[i]}"
|
43
43
|
|
44
44
|
def assemble(self, ready_data, part_data):
|
@@ -110,6 +110,8 @@ class OssDB:
|
|
110
110
|
f"file size: {format_upload}"
|
111
111
|
)
|
112
112
|
status = True
|
113
|
+
except ValueError as e:
|
114
|
+
log.exception(str(e))
|
113
115
|
except oss2.exceptions.RequestError as e:
|
114
116
|
self.bucket = oss2.Bucket(
|
115
117
|
auth=self.auth,
|
cobweb/db/redis_db.py
CHANGED
@@ -8,6 +8,7 @@ class RedisDB:
|
|
8
8
|
|
9
9
|
def __init__(
|
10
10
|
self,
|
11
|
+
model: int,
|
11
12
|
project: str,
|
12
13
|
task_name: str,
|
13
14
|
config: dict
|
@@ -22,6 +23,7 @@ class RedisDB:
|
|
22
23
|
self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
|
23
24
|
self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
|
24
25
|
self.client = redis.Redis(connection_pool=pool)
|
26
|
+
self.model = model
|
25
27
|
|
26
28
|
@check_redis_status
|
27
29
|
def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
|
@@ -86,8 +88,13 @@ class RedisDB:
|
|
86
88
|
seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
|
87
89
|
|
88
90
|
if seeds:
|
89
|
-
redis_key = self.succeed_key if spider_status else self.failed_key
|
90
|
-
|
91
|
+
# redis_key = self.succeed_key if spider_status else self.failed_key
|
92
|
+
redis_key = None
|
93
|
+
if isinstance(self.model, int):
|
94
|
+
if self.model == 2 and spider_status:
|
95
|
+
redis_key = self.succeed_key
|
96
|
+
if redis_key:
|
97
|
+
self.client.sadd(redis_key, *(str(seed) for seed in seeds))
|
91
98
|
self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
|
92
99
|
|
93
100
|
@check_redis_status
|
cobweb/distributed/launcher.py
CHANGED
@@ -6,29 +6,7 @@ from importlib import import_module
|
|
6
6
|
from cobweb import log, Queue, DBItem, RedisDB, OssDB, StorerInterface
|
7
7
|
from cobweb.utils import struct_queue_name, restore_table_name
|
8
8
|
from .models import Scheduler, Spider, Storer
|
9
|
-
|
10
|
-
|
11
|
-
# def start_seeds(seeds):
|
12
|
-
# if not seeds:
|
13
|
-
# return None
|
14
|
-
# if any(isinstance(seeds, t) for t in (list, tuple)):
|
15
|
-
# return [Seed(seed) for seed in seeds]
|
16
|
-
# elif any(isinstance(seeds, t) for t in (str, dict)):
|
17
|
-
# return Seed(seeds)
|
18
|
-
|
19
|
-
|
20
|
-
# def parse_storer_info(storer_info):
|
21
|
-
# storer_data = {}
|
22
|
-
# storer_info_list = []
|
23
|
-
# if storer_info.__class__.__name__ == 'StorerInfo':
|
24
|
-
# storer_info_list.append(storer_info)
|
25
|
-
# elif any(isinstance(storer_info, t) for t in (list, tuple)):
|
26
|
-
# storer_info_list = storer_info
|
27
|
-
# for info in storer_info_list:
|
28
|
-
# db_name = info.DB.__name__
|
29
|
-
# storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
|
30
|
-
# storer_data[db_name]["db_args_list"].append(info[1:])
|
31
|
-
# return storer_data
|
9
|
+
|
32
10
|
|
33
11
|
def get_scheduler_db(db):
|
34
12
|
if isinstance(db, str):
|
@@ -87,7 +65,10 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
87
65
|
redis_spider_seed_length = spider_queue_length()
|
88
66
|
memory_seed_queue_length = scheduler.queue.length
|
89
67
|
storer_upload_queue_list = []
|
90
|
-
|
68
|
+
for storer in storer_list:
|
69
|
+
storer_upload_queue_list.append(
|
70
|
+
f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
|
71
|
+
)
|
91
72
|
if (
|
92
73
|
scheduler.stop and
|
93
74
|
not redis_ready_seed_length and
|
@@ -99,6 +80,7 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
99
80
|
last.set()
|
100
81
|
time.sleep(3)
|
101
82
|
storer_queue_empty = True
|
83
|
+
storer_upload_queue_list = []
|
102
84
|
for storer in storer_list:
|
103
85
|
if storer.queue.length:
|
104
86
|
storer_queue_empty = False
|
@@ -116,7 +98,8 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
116
98
|
|
117
99
|
last.clear()
|
118
100
|
|
119
|
-
storer_upload_queue_length_info = "\n ".join(
|
101
|
+
storer_upload_queue_length_info = "\n ".join(
|
102
|
+
storer_upload_queue_list) if storer_upload_queue_list else "None"
|
120
103
|
log.info(spider_info.format(
|
121
104
|
status,
|
122
105
|
running_spider_thread_num,
|
cobweb/distributed/models.py
CHANGED
@@ -60,7 +60,7 @@ class Spider:
|
|
60
60
|
del_seed(seed, spider_status=False)
|
61
61
|
continue
|
62
62
|
try:
|
63
|
-
self.spider_in_progress.push(1)
|
63
|
+
self.spider_in_progress.push(1, direct_insertion=True)
|
64
64
|
# log.info("spider seed: " + str(seed))
|
65
65
|
status = None
|
66
66
|
store_queue = None
|
@@ -76,6 +76,8 @@ class Spider:
|
|
76
76
|
self.queue.push([s if isinstance(s, Seed) else Seed(s) for s in it])
|
77
77
|
elif isinstance(it, bool):
|
78
78
|
status = it
|
79
|
+
elif it is None:
|
80
|
+
status = False
|
79
81
|
|
80
82
|
if store_queue and store_data:
|
81
83
|
store_data.append(seed)
|
@@ -6,8 +6,8 @@ cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
|
|
6
6
|
cobweb/task.py,sha256=0zPomzJOm374cpds0qHMZwX0y2qK_Gi_WnmOVcWPIEM,1401
|
7
7
|
cobweb/utils.py,sha256=3iQRn2s4fY-5ClrVo46RO9OhziRAyOn5hGJOQGjMMwQ,2176
|
8
8
|
cobweb/db/__init__.py,sha256=4m9lqmxZCRbaih3Z3rl_BT0GugMd0dkOIgu_P9aeC84,63
|
9
|
-
cobweb/db/oss_db.py,sha256=
|
10
|
-
cobweb/db/redis_db.py,sha256=
|
9
|
+
cobweb/db/oss_db.py,sha256=lFGNuH3tdIMsohVXQ_fTZPyBfS2oxYNmFNuQ-ZBQgm0,4221
|
10
|
+
cobweb/db/redis_db.py,sha256=OvJMgGKn20h3NKiTzAkYci9yBFIidlyAJP1bme2FDgI,7936
|
11
11
|
cobweb/db/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
cobweb/db/scheduler/default.py,sha256=OxmFX7OvMEhKEq-NF7A8I9cA4V4qWw5vayS-yIbng0A,114
|
13
13
|
cobweb/db/scheduler/textfile.py,sha256=atRDeNT-e5toNvyGsCXAxL1FJi77uSYktdCzH_hXGo8,821
|
@@ -17,13 +17,13 @@ cobweb/db/storer/loghub.py,sha256=4VqZacXWhidzINHXQu2_-E0HOBRCcc86f6LkKfnXD5I,17
|
|
17
17
|
cobweb/db/storer/redis.py,sha256=7Q2XEQwBL6X_M1uvxzzuSBt6iw9piKw-_FWKm2INZDQ,412
|
18
18
|
cobweb/db/storer/textfile.py,sha256=3mDHMvF6Sh5fn3IHzWQxyTUd45V-zUoH8vY3EoRlMx0,415
|
19
19
|
cobweb/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
cobweb/distributed/launcher.py,sha256=
|
21
|
-
cobweb/distributed/models.py,sha256=
|
20
|
+
cobweb/distributed/launcher.py,sha256=CgsrAZX6m0BM91uw085fw5oecGi2gHgXjnNGejar1gM,10116
|
21
|
+
cobweb/distributed/models.py,sha256=5rRy16BHBq4EwWZxHcH_hJ6GJ0jEZ9hechSlZxzvdFQ,4513
|
22
22
|
cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
cobweb/single/models.py,sha256=lu8teNWnWcUwZFra8XmqyhzOAf3UyuEztwBr1Ne6pUs,2898
|
24
24
|
cobweb/single/nest.py,sha256=mL8q9a5BjtoeUyzXCIVw_vyUsNY8ltbvQpYIIpZEDFU,5012
|
25
|
-
cobweb_launcher-0.0.
|
26
|
-
cobweb_launcher-0.0.
|
27
|
-
cobweb_launcher-0.0.
|
28
|
-
cobweb_launcher-0.0.
|
29
|
-
cobweb_launcher-0.0.
|
25
|
+
cobweb_launcher-0.0.19.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
26
|
+
cobweb_launcher-0.0.19.dist-info/METADATA,sha256=s81yOSvm3whrUbKiii7wwNvbbKY8oYvSNagLrQnU_9o,1226
|
27
|
+
cobweb_launcher-0.0.19.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
28
|
+
cobweb_launcher-0.0.19.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
29
|
+
cobweb_launcher-0.0.19.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|