cobweb-launcher 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
cobweb/db/oss_db.py CHANGED
@@ -37,8 +37,8 @@ class OssDB:
37
37
 
38
38
  units = ["KB", "MB", "GB", "TB"]
39
39
  for i in range(3):
40
- num = length / 1024 ** (i + 1)
41
- if num >= 1:
40
+ num = length / (1024 ** (i + 1))
41
+ if num <= 1024:
42
42
  return f"{round(num, 2)} {units[i]}"
43
43
 
44
44
  def assemble(self, ready_data, part_data):
@@ -110,6 +110,8 @@ class OssDB:
110
110
  f"file size: {format_upload}"
111
111
  )
112
112
  status = True
113
+ except ValueError as e:
114
+ log.exception(str(e))
113
115
  except oss2.exceptions.RequestError as e:
114
116
  self.bucket = oss2.Bucket(
115
117
  auth=self.auth,
cobweb/db/redis_db.py CHANGED
@@ -8,6 +8,7 @@ class RedisDB:
8
8
 
9
9
  def __init__(
10
10
  self,
11
+ model: int,
11
12
  project: str,
12
13
  task_name: str,
13
14
  config: dict
@@ -22,6 +23,7 @@ class RedisDB:
22
23
  self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
23
24
  self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
24
25
  self.client = redis.Redis(connection_pool=pool)
26
+ self.model = model
25
27
 
26
28
  @check_redis_status
27
29
  def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
@@ -86,8 +88,13 @@ class RedisDB:
86
88
  seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
87
89
 
88
90
  if seeds:
89
- redis_key = self.succeed_key if spider_status else self.failed_key
90
- self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
+ # redis_key = self.succeed_key if spider_status else self.failed_key
92
+ redis_key = None
93
+ if isinstance(self.model, int):
94
+ if self.model == 2 and spider_status:
95
+ redis_key = self.succeed_key
96
+ if redis_key:
97
+ self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
98
  self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
92
99
 
93
100
  @check_redis_status
@@ -6,29 +6,7 @@ from importlib import import_module
6
6
  from cobweb import log, Queue, DBItem, RedisDB, OssDB, StorerInterface
7
7
  from cobweb.utils import struct_queue_name, restore_table_name
8
8
  from .models import Scheduler, Spider, Storer
9
- # from collections import namedtuple
10
-
11
- # def start_seeds(seeds):
12
- # if not seeds:
13
- # return None
14
- # if any(isinstance(seeds, t) for t in (list, tuple)):
15
- # return [Seed(seed) for seed in seeds]
16
- # elif any(isinstance(seeds, t) for t in (str, dict)):
17
- # return Seed(seeds)
18
-
19
-
20
- # def parse_storer_info(storer_info):
21
- # storer_data = {}
22
- # storer_info_list = []
23
- # if storer_info.__class__.__name__ == 'StorerInfo':
24
- # storer_info_list.append(storer_info)
25
- # elif any(isinstance(storer_info, t) for t in (list, tuple)):
26
- # storer_info_list = storer_info
27
- # for info in storer_info_list:
28
- # db_name = info.DB.__name__
29
- # storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
30
- # storer_data[db_name]["db_args_list"].append(info[1:])
31
- # return storer_data
9
+
32
10
 
33
11
  def get_scheduler_db(db):
34
12
  if isinstance(db, str):
@@ -87,7 +65,10 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
87
65
  redis_spider_seed_length = spider_queue_length()
88
66
  memory_seed_queue_length = scheduler.queue.length
89
67
  storer_upload_queue_list = []
90
-
68
+ for storer in storer_list:
69
+ storer_upload_queue_list.append(
70
+ f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
71
+ )
91
72
  if (
92
73
  scheduler.stop and
93
74
  not redis_ready_seed_length and
@@ -99,6 +80,7 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
99
80
  last.set()
100
81
  time.sleep(3)
101
82
  storer_queue_empty = True
83
+ storer_upload_queue_list = []
102
84
  for storer in storer_list:
103
85
  if storer.queue.length:
104
86
  storer_queue_empty = False
@@ -116,7 +98,8 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
116
98
 
117
99
  last.clear()
118
100
 
119
- storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
101
+ storer_upload_queue_length_info = "\n ".join(
102
+ storer_upload_queue_list) if storer_upload_queue_list else "None"
120
103
  log.info(spider_info.format(
121
104
  status,
122
105
  running_spider_thread_num,
@@ -60,7 +60,7 @@ class Spider:
60
60
  del_seed(seed, spider_status=False)
61
61
  continue
62
62
  try:
63
- self.spider_in_progress.push(1)
63
+ self.spider_in_progress.push(1, direct_insertion=True)
64
64
  # log.info("spider seed: " + str(seed))
65
65
  status = None
66
66
  store_queue = None
@@ -76,6 +76,8 @@ class Spider:
76
76
  self.queue.push([s if isinstance(s, Seed) else Seed(s) for s in it])
77
77
  elif isinstance(it, bool):
78
78
  status = it
79
+ elif it is None:
80
+ status = False
79
81
 
80
82
  if store_queue and store_data:
81
83
  store_data.append(seed)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.17
3
+ Version: 0.0.19
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -6,8 +6,8 @@ cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
6
6
  cobweb/task.py,sha256=0zPomzJOm374cpds0qHMZwX0y2qK_Gi_WnmOVcWPIEM,1401
7
7
  cobweb/utils.py,sha256=3iQRn2s4fY-5ClrVo46RO9OhziRAyOn5hGJOQGjMMwQ,2176
8
8
  cobweb/db/__init__.py,sha256=4m9lqmxZCRbaih3Z3rl_BT0GugMd0dkOIgu_P9aeC84,63
9
- cobweb/db/oss_db.py,sha256=pPpWPeOGMVz9X5aqD3c_9zMwTvs6dncE9JgBCmKc3v0,4150
10
- cobweb/db/redis_db.py,sha256=rpNvhEZJyHgjfmLeXid0sag4PW5LEreuGXolB_oYHX4,7677
9
+ cobweb/db/oss_db.py,sha256=lFGNuH3tdIMsohVXQ_fTZPyBfS2oxYNmFNuQ-ZBQgm0,4221
10
+ cobweb/db/redis_db.py,sha256=OvJMgGKn20h3NKiTzAkYci9yBFIidlyAJP1bme2FDgI,7936
11
11
  cobweb/db/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  cobweb/db/scheduler/default.py,sha256=OxmFX7OvMEhKEq-NF7A8I9cA4V4qWw5vayS-yIbng0A,114
13
13
  cobweb/db/scheduler/textfile.py,sha256=atRDeNT-e5toNvyGsCXAxL1FJi77uSYktdCzH_hXGo8,821
@@ -17,13 +17,13 @@ cobweb/db/storer/loghub.py,sha256=4VqZacXWhidzINHXQu2_-E0HOBRCcc86f6LkKfnXD5I,17
17
17
  cobweb/db/storer/redis.py,sha256=7Q2XEQwBL6X_M1uvxzzuSBt6iw9piKw-_FWKm2INZDQ,412
18
18
  cobweb/db/storer/textfile.py,sha256=3mDHMvF6Sh5fn3IHzWQxyTUd45V-zUoH8vY3EoRlMx0,415
19
19
  cobweb/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- cobweb/distributed/launcher.py,sha256=nD4uV8Bi1ymEgxFqRBxNb8Z5DJ_cnpG0riWq2RjzoBE,10688
21
- cobweb/distributed/models.py,sha256=eYD3yzOLGTI_LrILhzlYBBtwxU2x3KlETFGqVT7_66g,4414
20
+ cobweb/distributed/launcher.py,sha256=CgsrAZX6m0BM91uw085fw5oecGi2gHgXjnNGejar1gM,10116
21
+ cobweb/distributed/models.py,sha256=5rRy16BHBq4EwWZxHcH_hJ6GJ0jEZ9hechSlZxzvdFQ,4513
22
22
  cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  cobweb/single/models.py,sha256=lu8teNWnWcUwZFra8XmqyhzOAf3UyuEztwBr1Ne6pUs,2898
24
24
  cobweb/single/nest.py,sha256=mL8q9a5BjtoeUyzXCIVw_vyUsNY8ltbvQpYIIpZEDFU,5012
25
- cobweb_launcher-0.0.17.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
26
- cobweb_launcher-0.0.17.dist-info/METADATA,sha256=N7hIcJCHqWpxfzlZDBb7CQzDC-PuUA1iaOqtfpme6oY,1226
27
- cobweb_launcher-0.0.17.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
28
- cobweb_launcher-0.0.17.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
29
- cobweb_launcher-0.0.17.dist-info/RECORD,,
25
+ cobweb_launcher-0.0.19.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
26
+ cobweb_launcher-0.0.19.dist-info/METADATA,sha256=s81yOSvm3whrUbKiii7wwNvbbKY8oYvSNagLrQnU_9o,1226
27
+ cobweb_launcher-0.0.19.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
28
+ cobweb_launcher-0.0.19.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
29
+ cobweb_launcher-0.0.19.dist-info/RECORD,,