cobweb-launcher 0.0.18__tar.gz → 0.0.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/PKG-INFO +1 -1
  2. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/redis_db.py +9 -2
  3. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/distributed/launcher.py +9 -26
  4. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/distributed/models.py +1 -1
  5. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/PKG-INFO +1 -1
  6. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/setup.py +1 -1
  7. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/LICENSE +0 -0
  8. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/README.md +0 -0
  9. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/__init__.py +0 -0
  10. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/bbb.py +0 -0
  11. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/__init__.py +0 -0
  12. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/oss_db.py +0 -0
  13. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/scheduler/__init__.py +0 -0
  14. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/scheduler/default.py +0 -0
  15. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/scheduler/textfile.py +0 -0
  16. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/__init__.py +0 -0
  17. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/console.py +0 -0
  18. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/loghub.py +0 -0
  19. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/redis.py +0 -0
  20. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/db/storer/textfile.py +0 -0
  21. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/decorators.py +0 -0
  22. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/distributed/__init__.py +0 -0
  23. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/interface.py +0 -0
  24. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/log.py +0 -0
  25. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/single/__init__.py +0 -0
  26. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/single/models.py +0 -0
  27. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/single/nest.py +0 -0
  28. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/task.py +0 -0
  29. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb/utils.py +0 -0
  30. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  31. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  32. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/requires.txt +0 -0
  33. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/cobweb_launcher.egg-info/top_level.txt +0 -0
  34. {cobweb-launcher-0.0.18 → cobweb-launcher-0.0.20}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.18
3
+ Version: 0.0.20
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -8,6 +8,7 @@ class RedisDB:
8
8
 
9
9
  def __init__(
10
10
  self,
11
+ model: int,
11
12
  project: str,
12
13
  task_name: str,
13
14
  config: dict
@@ -22,6 +23,7 @@ class RedisDB:
22
23
  self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
23
24
  self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
24
25
  self.client = redis.Redis(connection_pool=pool)
26
+ self.model = model
25
27
 
26
28
  @check_redis_status
27
29
  def _get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
@@ -86,8 +88,13 @@ class RedisDB:
86
88
  seeds = [seed if isinstance(seed, Seed) else Seed(seed) for seed in seeds]
87
89
 
88
90
  if seeds:
89
- redis_key = self.succeed_key if spider_status else self.failed_key
90
- self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
+ # redis_key = self.succeed_key if spider_status else self.failed_key
92
+ redis_key = None
93
+ if isinstance(self.model, int):
94
+ if self.model == 2 and spider_status:
95
+ redis_key = self.succeed_key
96
+ if redis_key:
97
+ self.client.sadd(redis_key, *(str(seed) for seed in seeds))
91
98
  self.client.zrem(self.spider_key, *(seed.format_seed for seed in seeds))
92
99
 
93
100
  @check_redis_status
@@ -6,29 +6,7 @@ from importlib import import_module
6
6
  from cobweb import log, Queue, DBItem, RedisDB, OssDB, StorerInterface
7
7
  from cobweb.utils import struct_queue_name, restore_table_name
8
8
  from .models import Scheduler, Spider, Storer
9
- # from collections import namedtuple
10
-
11
- # def start_seeds(seeds):
12
- # if not seeds:
13
- # return None
14
- # if any(isinstance(seeds, t) for t in (list, tuple)):
15
- # return [Seed(seed) for seed in seeds]
16
- # elif any(isinstance(seeds, t) for t in (str, dict)):
17
- # return Seed(seeds)
18
-
19
-
20
- # def parse_storer_info(storer_info):
21
- # storer_data = {}
22
- # storer_info_list = []
23
- # if storer_info.__class__.__name__ == 'StorerInfo':
24
- # storer_info_list.append(storer_info)
25
- # elif any(isinstance(storer_info, t) for t in (list, tuple)):
26
- # storer_info_list = storer_info
27
- # for info in storer_info_list:
28
- # db_name = info.DB.__name__
29
- # storer_data.setdefault(db_name, {"StorerDB": info.DB, "db_args_list": []})
30
- # storer_data[db_name]["db_args_list"].append(info[1:])
31
- # return storer_data
9
+
32
10
 
33
11
  def get_scheduler_db(db):
34
12
  if isinstance(db, str):
@@ -87,7 +65,10 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
87
65
  redis_spider_seed_length = spider_queue_length()
88
66
  memory_seed_queue_length = scheduler.queue.length
89
67
  storer_upload_queue_list = []
90
-
68
+ for storer in storer_list:
69
+ storer_upload_queue_list.append(
70
+ f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
71
+ )
91
72
  if (
92
73
  scheduler.stop and
93
74
  not redis_ready_seed_length and
@@ -99,6 +80,7 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
99
80
  last.set()
100
81
  time.sleep(3)
101
82
  storer_queue_empty = True
83
+ storer_upload_queue_list = []
102
84
  for storer in storer_list:
103
85
  if storer.queue.length:
104
86
  storer_queue_empty = False
@@ -116,7 +98,8 @@ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length,
116
98
 
117
99
  last.clear()
118
100
 
119
- storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
101
+ storer_upload_queue_length_info = "\n ".join(
102
+ storer_upload_queue_list) if storer_upload_queue_list else "None"
120
103
  log.info(spider_info.format(
121
104
  status,
122
105
  running_spider_thread_num,
@@ -158,7 +141,7 @@ def launcher(task):
158
141
  stop = threading.Event()
159
142
 
160
143
  # 初始化redis信息
161
- redis_db = RedisDB(task.project, task.task_name, task.redis_info)
144
+ redis_db = RedisDB(task.model, task.project, task.task_name, task.redis_info)
162
145
 
163
146
  log.info("初始化cobweb!")
164
147
 
@@ -60,7 +60,7 @@ class Spider:
60
60
  del_seed(seed, spider_status=False)
61
61
  continue
62
62
  try:
63
- self.spider_in_progress.push(1)
63
+ self.spider_in_progress.push(1, direct_insertion=True)
64
64
  # log.info("spider seed: " + str(seed))
65
65
  status = None
66
66
  store_queue = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.18
3
+ Version: 0.0.20
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="0.0.18",
8
+ version="0.0.20",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",