cobweb-launcher 0.0.17__tar.gz → 0.0.19__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/PKG-INFO +1 -1
  2. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/oss_db.py +4 -2
  3. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/redis_db.py +9 -2
  4. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/distributed/launcher.py +8 -25
  5. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/distributed/models.py +3 -1
  6. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb_launcher.egg-info/PKG-INFO +1 -1
  7. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/setup.py +1 -1
  8. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/LICENSE +0 -0
  9. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/README.md +0 -0
  10. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/__init__.py +0 -0
  11. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/bbb.py +0 -0
  12. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/__init__.py +0 -0
  13. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/scheduler/__init__.py +0 -0
  14. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/scheduler/default.py +0 -0
  15. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/scheduler/textfile.py +0 -0
  16. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/storer/__init__.py +0 -0
  17. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/storer/console.py +0 -0
  18. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/storer/loghub.py +0 -0
  19. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/storer/redis.py +0 -0
  20. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/db/storer/textfile.py +0 -0
  21. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/decorators.py +0 -0
  22. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/distributed/__init__.py +0 -0
  23. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/interface.py +0 -0
  24. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/log.py +0 -0
  25. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/single/__init__.py +0 -0
  26. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/single/models.py +0 -0
  27. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/single/nest.py +0 -0
  28. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/task.py +0 -0
  29. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb/utils.py +0 -0
  30. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  31. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  32. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb_launcher.egg-info/requires.txt +0 -0
  33. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/cobweb_launcher.egg-info/top_level.txt +0 -0
  34. {cobweb-launcher-0.0.17 → cobweb-launcher-0.0.19}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.17
3
+ Version: 0.0.19
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -37,8 +37,8 @@ class OssDB:
37
37
 
38
38
  units = ["KB", "MB", "GB", "TB"]
39
39
  for i in range(3):
40
- num = length / 1024 ** (i + 1)
41
- if num >= 1:
40
+ num = length / (1024 ** (i + 1))
41
+ if num <= 1024:
42
42
  return f"{round(num, 2)} {units[i]}"
43
43
 
44
44
  def assemble(self, ready_data, part_data):
@@ -110,6 +110,8 @@ class OssDB:
110
110
  f"file size: {format_upload}"
111
111
  )
112
112
  status = True
113
+ except ValueError as e:
114
+ log.exception(str(e))
113
115
  except oss2.exceptions.RequestError as e:
114
116
  self.bucket = oss2.Bucket(
115
117
  auth=self.auth,
@@ -8,6 +8,7 @@ class RedisDB:
8
8
 
9
9
  def __init__(
10
10
  self,
11
+ model: int,
11
12
  project: str,
12
13
  task_name: str,
13
14
  config: dict
@@ -22,6 +23,7 @@ class RedisDB:
22
23
  self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
23
24
  self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
24
25
  self.client = redis.Redis(connection_pool=pool)
26
+ self.model = model
25
27
 
26
28
  @check_redis_status
27
29
  def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
@@ -86,8 +88,13 @@ class RedisDB:
86
88
  seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
87
89
 
88
90
  if seeds:
89
- redis_key = self.succeed_key if spider_status else self.failed_key
90
- self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
+ # redis_key = self.succeed_key if spider_status else self.failed_key
92
+ redis_key = None
93
+ if isinstance(self.model, int):
94
+ if self.model == 2 and spider_status:
95
+ redis_key = self.succeed_key
96
+ if redis_key:
97
+ self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
98
  self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
92
99
 
93
100
  @check_redis_status
@@ -6,29 +6,7 @@ from importlib import import_module
6
6
  from cobweb import log, Queue, DBItem, RedisDB, OssDB, StorerInterface
7
7
  from cobweb.utils import struct_queue_name, restore_table_name
8
8
  from .models import Scheduler, Spider, Storer
9
- # from collections import namedtuple
10
-
11
- # def start_seeds(seeds):
12
- # if not seeds:
13
- # return None
14
- # if any(isinstance(seeds, t) for t in (list, tuple)):
15
- # return [Seed(seed) for seed in seeds]
16
- # elif any(isinstance(seeds, t) for t in (str, dict)):
17
- # return Seed(seeds)
18
-
19
-
20
- # def parse_storer_info(storer_info):
21
- # storer_data = {}
22
- # storer_info_list = []
23
- # if storer_info.__class__.__name__ == 'StorerInfo':
24
- # storer_info_list.append(storer_info)
25
- # elif any(isinstance(storer_info, t) for t in (list, tuple)):
26
- # storer_info_list = storer_info
27
- # for info in storer_info_list:
28
- # db_name = info.DB.__name__
29
- # storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
30
- # storer_data[db_name]["db_args_list"].append(info[1:])
31
- # return storer_data
9
+
32
10
 
33
11
  def get_scheduler_db(db):
34
12
  if isinstance(db, str):
@@ -87,7 +65,10 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
87
65
  redis_spider_seed_length = spider_queue_length()
88
66
  memory_seed_queue_length = scheduler.queue.length
89
67
  storer_upload_queue_list = []
90
-
68
+ for storer in storer_list:
69
+ storer_upload_queue_list.append(
70
+ f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
71
+ )
91
72
  if (
92
73
  scheduler.stop and
93
74
  not redis_ready_seed_length and
@@ -99,6 +80,7 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
99
80
  last.set()
100
81
  time.sleep(3)
101
82
  storer_queue_empty = True
83
+ storer_upload_queue_list = []
102
84
  for storer in storer_list:
103
85
  if storer.queue.length:
104
86
  storer_queue_empty = False
@@ -116,7 +98,8 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
116
98
 
117
99
  last.clear()
118
100
 
119
- storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
101
+ storer_upload_queue_length_info = "\n ".join(
102
+ storer_upload_queue_list) if storer_upload_queue_list else "None"
120
103
  log.info(spider_info.format(
121
104
  status,
122
105
  running_spider_thread_num,
@@ -60,7 +60,7 @@ class Spider:
60
60
  del_seed(seed, spider_status=False)
61
61
  continue
62
62
  try:
63
- self.spider_in_progress.push(1)
63
+ self.spider_in_progress.push(1, direct_insertion=True)
64
64
  # log.info("spider seed: " + str(seed))
65
65
  status = None
66
66
  store_queue = None
@@ -76,6 +76,8 @@ class Spider:
76
76
  self.queue.push([s if isinstance(s, Seed) else Seed(s) for s in it])
77
77
  elif isinstance(it, bool):
78
78
  status = it
79
+ elif it is None:
80
+ status = False
79
81
 
80
82
  if store_queue and store_data:
81
83
  store_data.append(seed)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.17
3
+ Version: 0.0.19
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="0.0.17",
8
+ version="0.0.19",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",