cobweb-launcher 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/db/oss_db.py +4 -2
- cobweb/db/redis_db.py +9 -2
- cobweb/distributed/launcher.py +8 -25
- cobweb/distributed/models.py +3 -1
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/METADATA +1 -1
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/RECORD +9 -9
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/LICENSE +0 -0
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/WHEEL +0 -0
- {cobweb_launcher-0.0.17.dist-info → cobweb_launcher-0.0.19.dist-info}/top_level.txt +0 -0
cobweb/db/oss_db.py
CHANGED
@@ -37,8 +37,8 @@ class OssDB:
|
|
37
37
|
|
38
38
|
units = ["KB", "MB", "GB", "TB"]
|
39
39
|
for i in range(3):
|
40
|
-
num = length / 1024 ** (i + 1)
|
41
|
-
if num
|
40
|
+
num = length / (1024 ** (i + 1))
|
41
|
+
if num <= 1024:
|
42
42
|
return f"{round(num, 2)} {units[i]}"
|
43
43
|
|
44
44
|
def assemble(self, ready_data, part_data):
|
@@ -110,6 +110,8 @@ class OssDB:
|
|
110
110
|
f"file size: {format_upload}"
|
111
111
|
)
|
112
112
|
status = True
|
113
|
+
except ValueError as e:
|
114
|
+
log.exception(str(e))
|
113
115
|
except oss2.exceptions.RequestError as e:
|
114
116
|
self.bucket = oss2.Bucket(
|
115
117
|
auth=self.auth,
|
cobweb/db/redis_db.py
CHANGED
@@ -8,6 +8,7 @@ class RedisDB:
|
|
8
8
|
|
9
9
|
def __init__(
|
10
10
|
self,
|
11
|
+
model: int,
|
11
12
|
project: str,
|
12
13
|
task_name: str,
|
13
14
|
config: dict
|
@@ -22,6 +23,7 @@ class RedisDB:
|
|
22
23
|
self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
|
23
24
|
self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
|
24
25
|
self.client = redis.Redis(connection_pool=pool)
|
26
|
+
self.model = model
|
25
27
|
|
26
28
|
@check_redis_status
|
27
29
|
def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
|
@@ -86,8 +88,13 @@ class RedisDB:
|
|
86
88
|
seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
|
87
89
|
|
88
90
|
if seeds:
|
89
|
-
redis_key = self.succeed_key if spider_status else self.failed_key
|
90
|
-
|
91
|
+
# redis_key = self.succeed_key if spider_status else self.failed_key
|
92
|
+
redis_key = None
|
93
|
+
if isinstance(self.model, int):
|
94
|
+
if self.model == 2 and spider_status:
|
95
|
+
redis_key = self.succeed_key
|
96
|
+
if redis_key:
|
97
|
+
self.client.sadd(redis_key, *(str(seed) for seed in seeds))
|
91
98
|
self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
|
92
99
|
|
93
100
|
@check_redis_status
|
cobweb/distributed/launcher.py
CHANGED
@@ -6,29 +6,7 @@ from importlib import import_module
|
|
6
6
|
from cobweb import log, Queue, DBItem, RedisDB, OssDB, StorerInterface
|
7
7
|
from cobweb.utils import struct_queue_name, restore_table_name
|
8
8
|
from .models import Scheduler, Spider, Storer
|
9
|
-
|
10
|
-
|
11
|
-
# def start_seeds(seeds):
|
12
|
-
# if not seeds:
|
13
|
-
# return None
|
14
|
-
# if any(isinstance(seeds, t) for t in (list, tuple)):
|
15
|
-
# return [Seed(seed) for seed in seeds]
|
16
|
-
# elif any(isinstance(seeds, t) for t in (str, dict)):
|
17
|
-
# return Seed(seeds)
|
18
|
-
|
19
|
-
|
20
|
-
# def parse_storer_info(storer_info):
|
21
|
-
# storer_data = {}
|
22
|
-
# storer_info_list = []
|
23
|
-
# if storer_info.__class__.__name__ == 'StorerInfo':
|
24
|
-
# storer_info_list.append(storer_info)
|
25
|
-
# elif any(isinstance(storer_info, t) for t in (list, tuple)):
|
26
|
-
# storer_info_list = storer_info
|
27
|
-
# for info in storer_info_list:
|
28
|
-
# db_name = info.DB.__name__
|
29
|
-
# storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
|
30
|
-
# storer_data[db_name]["db_args_list"].append(info[1:])
|
31
|
-
# return storer_data
|
9
|
+
|
32
10
|
|
33
11
|
def get_scheduler_db(db):
|
34
12
|
if isinstance(db, str):
|
@@ -87,7 +65,10 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
87
65
|
redis_spider_seed_length = spider_queue_length()
|
88
66
|
memory_seed_queue_length = scheduler.queue.length
|
89
67
|
storer_upload_queue_list = []
|
90
|
-
|
68
|
+
for storer in storer_list:
|
69
|
+
storer_upload_queue_list.append(
|
70
|
+
f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
|
71
|
+
)
|
91
72
|
if (
|
92
73
|
scheduler.stop and
|
93
74
|
not redis_ready_seed_length and
|
@@ -99,6 +80,7 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
99
80
|
last.set()
|
100
81
|
time.sleep(3)
|
101
82
|
storer_queue_empty = True
|
83
|
+
storer_upload_queue_list = []
|
102
84
|
for storer in storer_list:
|
103
85
|
if storer.queue.length:
|
104
86
|
storer_queue_empty = False
|
@@ -116,7 +98,8 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
|
|
116
98
|
|
117
99
|
last.clear()
|
118
100
|
|
119
|
-
storer_upload_queue_length_info = "\n ".join(
|
101
|
+
storer_upload_queue_length_info = "\n ".join(
|
102
|
+
storer_upload_queue_list) if storer_upload_queue_list else "None"
|
120
103
|
log.info(spider_info.format(
|
121
104
|
status,
|
122
105
|
running_spider_thread_num,
|
cobweb/distributed/models.py
CHANGED
@@ -60,7 +60,7 @@ class Spider:
|
|
60
60
|
del_seed(seed, spider_status=False)
|
61
61
|
continue
|
62
62
|
try:
|
63
|
-
self.spider_in_progress.push(1)
|
63
|
+
self.spider_in_progress.push(1, direct_insertion=True)
|
64
64
|
# log.info("spider seed: " + str(seed))
|
65
65
|
status = None
|
66
66
|
store_queue = None
|
@@ -76,6 +76,8 @@ class Spider:
|
|
76
76
|
self.queue.push([s if isinstance(s, Seed) else Seed(s) for s in it])
|
77
77
|
elif isinstance(it, bool):
|
78
78
|
status = it
|
79
|
+
elif it is None:
|
80
|
+
status = False
|
79
81
|
|
80
82
|
if store_queue and store_data:
|
81
83
|
store_data.append(seed)
|
@@ -6,8 +6,8 @@ cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
|
|
6
6
|
cobweb/task.py,sha256=0zPomzJOm374cpds0qHMZwX0y2qK_Gi_WnmOVcWPIEM,1401
|
7
7
|
cobweb/utils.py,sha256=3iQRn2s4fY-5ClrVo46RO9OhziRAyOn5hGJOQGjMMwQ,2176
|
8
8
|
cobweb/db/__init__.py,sha256=4m9lqmxZCRbaih3Z3rl_BT0GugMd0dkOIgu_P9aeC84,63
|
9
|
-
cobweb/db/oss_db.py,sha256=
|
10
|
-
cobweb/db/redis_db.py,sha256=
|
9
|
+
cobweb/db/oss_db.py,sha256=lFGNuH3tdIMsohVXQ_fTZPyBfS2oxYNmFNuQ-ZBQgm0,4221
|
10
|
+
cobweb/db/redis_db.py,sha256=OvJMgGKn20h3NKiTzAkYci9yBFIidlyAJP1bme2FDgI,7936
|
11
11
|
cobweb/db/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
cobweb/db/scheduler/default.py,sha256=OxmFX7OvMEhKEq-NF7A8I9cA4V4qWw5vayS-yIbng0A,114
|
13
13
|
cobweb/db/scheduler/textfile.py,sha256=atRDeNT-e5toNvyGsCXAxL1FJi77uSYktdCzH_hXGo8,821
|
@@ -17,13 +17,13 @@ cobweb/db/storer/loghub.py,sha256=4VqZacXWhidzINHXQu2_-E0HOBRCcc86f6LkKfnXD5I,17
|
|
17
17
|
cobweb/db/storer/redis.py,sha256=7Q2XEQwBL6X_M1uvxzzuSBt6iw9piKw-_FWKm2INZDQ,412
|
18
18
|
cobweb/db/storer/textfile.py,sha256=3mDHMvF6Sh5fn3IHzWQxyTUd45V-zUoH8vY3EoRlMx0,415
|
19
19
|
cobweb/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
cobweb/distributed/launcher.py,sha256=
|
21
|
-
cobweb/distributed/models.py,sha256=
|
20
|
+
cobweb/distributed/launcher.py,sha256=CgsrAZX6m0BM91uw085fw5oecGi2gHgXjnNGejar1gM,10116
|
21
|
+
cobweb/distributed/models.py,sha256=5rRy16BHBq4EwWZxHcH_hJ6GJ0jEZ9hechSlZxzvdFQ,4513
|
22
22
|
cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
cobweb/single/models.py,sha256=lu8teNWnWcUwZFra8XmqyhzOAf3UyuEztwBr1Ne6pUs,2898
|
24
24
|
cobweb/single/nest.py,sha256=mL8q9a5BjtoeUyzXCIVw_vyUsNY8ltbvQpYIIpZEDFU,5012
|
25
|
-
cobweb_launcher-0.0.
|
26
|
-
cobweb_launcher-0.0.
|
27
|
-
cobweb_launcher-0.0.
|
28
|
-
cobweb_launcher-0.0.
|
29
|
-
cobweb_launcher-0.0.
|
25
|
+
cobweb_launcher-0.0.19.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
26
|
+
cobweb_launcher-0.0.19.dist-info/METADATA,sha256=s81yOSvm3whrUbKiii7wwNvbbKY8oYvSNagLrQnU_9o,1226
|
27
|
+
cobweb_launcher-0.0.19.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
28
|
+
cobweb_launcher-0.0.19.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
29
|
+
cobweb_launcher-0.0.19.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|