cobweb-launcher 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cobweb/db/oss_db.py CHANGED
@@ -37,8 +37,8 @@ class OssDB:
37
37
 
38
38
  units = ["KB", "MB", "GB", "TB"]
39
39
  for i in range(3):
40
- num = length / 1024 ** (i + 1)
41
- if num >= 1:
40
+ num = length / (1024 ** (i + 1))
41
+ if num <= 1024:
42
42
  return f"{round(num, 2)} {units[i]}"
43
43
 
44
44
  def assemble(self, ready_data, part_data):
@@ -110,6 +110,8 @@ class OssDB:
110
110
  f"file size: {format_upload}"
111
111
  )
112
112
  status = True
113
+ except ValueError as e:
114
+ log.exception(str(e))
113
115
  except oss2.exceptions.RequestError as e:
114
116
  self.bucket = oss2.Bucket(
115
117
  auth=self.auth,
cobweb/db/redis_db.py CHANGED
@@ -8,6 +8,7 @@ class RedisDB:
8
8
 
9
9
  def __init__(
10
10
  self,
11
+ model: int,
11
12
  project: str,
12
13
  task_name: str,
13
14
  config: dict
@@ -22,6 +23,7 @@ class RedisDB:
22
23
  self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
23
24
  self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
24
25
  self.client = redis.Redis(connection_pool=pool)
26
+ self.model = model
25
27
 
26
28
  @check_redis_status
27
29
  def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
@@ -86,8 +88,13 @@ class RedisDB:
86
88
  seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
87
89
 
88
90
  if seeds:
89
- redis_key = self.succeed_key if spider_status else self.failed_key
90
- self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
+ # redis_key = self.succeed_key if spider_status else self.failed_key
92
+ redis_key = None
93
+ if isinstance(self.model, int):
94
+ if self.model == 2 and spider_status:
95
+ redis_key = self.succeed_key
96
+ if redis_key:
97
+ self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
98
  self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
92
99
 
93
100
  @check_redis_status
@@ -6,29 +6,7 @@ from importlib import import_module
6
6
  from cobweb import log, Queue, DBItem, RedisDB, OssDB, StorerInterface
7
7
  from cobweb.utils import struct_queue_name, restore_table_name
8
8
  from .models import Scheduler, Spider, Storer
9
- # from collections import namedtuple
10
-
11
- # def start_seeds(seeds):
12
- # if not seeds:
13
- # return None
14
- # if any(isinstance(seeds, t) for t in (list, tuple)):
15
- # return [Seed(seed) for seed in seeds]
16
- # elif any(isinstance(seeds, t) for t in (str, dict)):
17
- # return Seed(seeds)
18
-
19
-
20
- # def parse_storer_info(storer_info):
21
- # storer_data = {}
22
- # storer_info_list = []
23
- # if storer_info.__class__.__name__ == 'StorerInfo':
24
- # storer_info_list.append(storer_info)
25
- # elif any(isinstance(storer_info, t) for t in (list, tuple)):
26
- # storer_info_list = storer_info
27
- # for info in storer_info_list:
28
- # db_name = info.DB.__name__
29
- # storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
30
- # storer_data[db_name]["db_args_list"].append(info[1:])
31
- # return storer_data
9
+
32
10
 
33
11
  def get_scheduler_db(db):
34
12
  if isinstance(db, str):
@@ -87,7 +65,10 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
87
65
  redis_spider_seed_length = spider_queue_length()
88
66
  memory_seed_queue_length = scheduler.queue.length
89
67
  storer_upload_queue_list = []
90
-
68
+ for storer in storer_list:
69
+ storer_upload_queue_list.append(
70
+ f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
71
+ )
91
72
  if (
92
73
  scheduler.stop and
93
74
  not redis_ready_seed_length and
@@ -99,6 +80,7 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
99
80
  last.set()
100
81
  time.sleep(3)
101
82
  storer_queue_empty = True
83
+ storer_upload_queue_list = []
102
84
  for storer in storer_list:
103
85
  if storer.queue.length:
104
86
  storer_queue_empty = False
@@ -116,7 +98,8 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
116
98
 
117
99
  last.clear()
118
100
 
119
- storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
101
+ storer_upload_queue_length_info = "\n ".join(
102
+ storer_upload_queue_list) if storer_upload_queue_list else "None"
120
103
  log.info(spider_info.format(
121
104
  status,
122
105
  running_spider_thread_num,
@@ -60,7 +60,7 @@ class Spider:
60
60
  del_seed(seed, spider_status=False)
61
61
  continue
62
62
  try:
63
- self.spider_in_progress.push(1)
63
+ self.spider_in_progress.push(1, direct_insertion=True)
64
64
  # log.info("spider seed: " + str(seed))
65
65
  status = None
66
66
  store_queue = None
@@ -76,6 +76,8 @@ class Spider:
76
76
  self.queue.push([s if isinstance(s, Seed) else Seed(s) for s in it])
77
77
  elif isinstance(it, bool):
78
78
  status = it
79
+ elif it is None:
80
+ status = False
79
81
 
80
82
  if store_queue and store_data:
81
83
  store_data.append(seed)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.17
3
+ Version: 0.0.19
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -6,8 +6,8 @@ cobweb/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
6
6
  cobweb/task.py,sha256=0zPomzJOm374cpds0qHMZwX0y2qK_Gi_WnmOVcWPIEM,1401
7
7
  cobweb/utils.py,sha256=3iQRn2s4fY-5ClrVo46RO9OhziRAyOn5hGJOQGjMMwQ,2176
8
8
  cobweb/db/__init__.py,sha256=4m9lqmxZCRbaih3Z3rl_BT0GugMd0dkOIgu_P9aeC84,63
9
- cobweb/db/oss_db.py,sha256=pPpWPeOGMVz9X5aqD3c_9zMwTvs6dncE9JgBCmKc3v0,4150
10
- cobweb/db/redis_db.py,sha256=rpNvhEZJyHgjfmLeXid0sag4PW5LEreuGXolB_oYHX4,7677
9
+ cobweb/db/oss_db.py,sha256=lFGNuH3tdIMsohVXQ_fTZPyBfS2oxYNmFNuQ-ZBQgm0,4221
10
+ cobweb/db/redis_db.py,sha256=OvJMgGKn20h3NKiTzAkYci9yBFIidlyAJP1bme2FDgI,7936
11
11
  cobweb/db/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  cobweb/db/scheduler/default.py,sha256=OxmFX7OvMEhKEq-NF7A8I9cA4V4qWw5vayS-yIbng0A,114
13
13
  cobweb/db/scheduler/textfile.py,sha256=atRDeNT-e5toNvyGsCXAxL1FJi77uSYktdCzH_hXGo8,821
@@ -17,13 +17,13 @@ cobweb/db/storer/loghub.py,sha256=4VqZacXWhidzINHXQu2_-E0HOBRCcc86f6LkKfnXD5I,17
17
17
  cobweb/db/storer/redis.py,sha256=7Q2XEQwBL6X_M1uvxzzuSBt6iw9piKw-_FWKm2INZDQ,412
18
18
  cobweb/db/storer/textfile.py,sha256=3mDHMvF6Sh5fn3IHzWQxyTUd45V-zUoH8vY3EoRlMx0,415
19
19
  cobweb/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- cobweb/distributed/launcher.py,sha256=nD4uV8Bi1ymEgxFqRBxNb8Z5DJ_cnpG0riWq2RjzoBE,10688
21
- cobweb/distributed/models.py,sha256=eYD3yzOLGTI_LrILhzlYBBtwxU2x3KlETFGqVT7_66g,4414
20
+ cobweb/distributed/launcher.py,sha256=CgsrAZX6m0BM91uw085fw5oecGi2gHgXjnNGejar1gM,10116
21
+ cobweb/distributed/models.py,sha256=5rRy16BHBq4EwWZxHcH_hJ6GJ0jEZ9hechSlZxzvdFQ,4513
22
22
  cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  cobweb/single/models.py,sha256=lu8teNWnWcUwZFra8XmqyhzOAf3UyuEztwBr1Ne6pUs,2898
24
24
  cobweb/single/nest.py,sha256=mL8q9a5BjtoeUyzXCIVw_vyUsNY8ltbvQpYIIpZEDFU,5012
25
- cobweb_launcher-0.0.17.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
26
- cobweb_launcher-0.0.17.dist-info/METADATA,sha256=N7hIcJCHqWpxfzlZDBb7CQzDC-PuUA1iaOqtfpme6oY,1226
27
- cobweb_launcher-0.0.17.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
28
- cobweb_launcher-0.0.17.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
29
- cobweb_launcher-0.0.17.dist-info/RECORD,,
25
+ cobweb_launcher-0.0.19.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
26
+ cobweb_launcher-0.0.19.dist-info/METADATA,sha256=s81yOSvm3whrUbKiii7wwNvbbKY8oYvSNagLrQnU_9o,1226
27
+ cobweb_launcher-0.0.19.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
28
+ cobweb_launcher-0.0.19.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
29
+ cobweb_launcher-0.0.19.dist-info/RECORD,,