cobweb-launcher 0.0.14__tar.gz → 0.0.16__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/PKG-INFO +1 -1
  2. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/bbb.py +6 -2
  3. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/launcher.py +27 -17
  4. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/task.py +3 -0
  5. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/PKG-INFO +1 -1
  6. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/setup.py +1 -1
  7. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/LICENSE +0 -0
  8. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/README.md +0 -0
  9. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/__init__.py +0 -0
  10. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/__init__.py +0 -0
  11. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/oss_db.py +0 -0
  12. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/redis_db.py +0 -0
  13. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/__init__.py +0 -0
  14. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/default.py +0 -0
  15. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/textfile.py +0 -0
  16. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/__init__.py +0 -0
  17. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/console.py +0 -0
  18. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/loghub.py +0 -0
  19. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/redis.py +0 -0
  20. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/textfile.py +0 -0
  21. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/decorators.py +0 -0
  22. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/__init__.py +0 -0
  23. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/models.py +0 -0
  24. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/interface.py +0 -0
  25. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/log.py +0 -0
  26. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/__init__.py +0 -0
  27. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/models.py +0 -0
  28. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/nest.py +0 -0
  29. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/utils.py +0 -0
  30. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  31. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  32. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/requires.txt +0 -0
  33. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/top_level.txt +0 -0
  34. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.14
3
+ Version: 0.0.16
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -114,12 +114,16 @@ class Seed:
114
114
  return f'{self.__class__.__name__}({", ".join(chars)})'
115
115
 
116
116
  @property
117
- def format_seed(self, json_str=True):
117
+ def dict_seed(self):
118
118
  seed = self.__dict__.copy()
119
119
  del seed["_priority"]
120
120
  del seed["_version"]
121
121
  del seed["_retry"]
122
- return json.dumps(seed, ensure_ascii=False) if json_str else seed
122
+ return seed
123
+
124
+ @property
125
+ def format_seed(self):
126
+ return json.dumps(self.dict_seed, ensure_ascii=False)
123
127
 
124
128
 
125
129
  class DBItem:
@@ -68,10 +68,11 @@ def get_storer_db(db):
68
68
  raise TypeError()
69
69
 
70
70
 
71
- def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
71
+ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
72
+ log.info("run check thread after 30 seconds...")
72
73
  time.sleep(30)
73
74
  spider_info = """
74
- ----------------------- check -----------------------
75
+ ------------------- check: %s ------------------
75
76
  running_spider_thread_num: {0}
76
77
  redis_ready_seed_length: {1}
77
78
  redis_spider_seed_length: {2}
@@ -80,20 +81,12 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
80
81
  {4}
81
82
  ----------------------- end -----------------------"""
82
83
  while True:
83
-
84
+ status = "running"
84
85
  running_spider_thread_num = spider.spider_in_progress.length
85
86
  redis_ready_seed_length = ready_seed_length()
86
87
  redis_spider_seed_length = spider_queue_length()
87
88
  memory_seed_queue_length = scheduler.queue.length
88
89
  storer_upload_queue_list = []
89
- storer_queue_empty = True
90
- for storer in storer_list:
91
- if storer.queue.length:
92
- storer_queue_empty = False
93
- storer_upload_queue_list.append(
94
- f"{storer.__class__.__name__}: {storer.queue.length}"
95
- )
96
- storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
97
90
 
98
91
  if (
99
92
  scheduler.stop and
@@ -101,14 +94,31 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
101
94
  not memory_seed_queue_length and
102
95
  not running_spider_thread_num
103
96
  ):
104
- log.info("spider is done?")
97
+ if not model:
98
+ log.info("spider is done?")
105
99
  last.set()
100
+ time.sleep(3)
101
+ storer_queue_empty = True
102
+ for storer in storer_list:
103
+ if storer.queue.length:
104
+ storer_queue_empty = False
105
+ storer_upload_queue_list.append(
106
+ f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
107
+ )
106
108
  if storer_queue_empty and not redis_spider_seed_length:
107
- log.info("spider done!")
108
- break
109
- last.clear()
109
+ if model:
110
+ log.info("waiting for push seeds...")
111
+ status = "waiting"
112
+ time.sleep(30)
113
+ else:
114
+ log.info("spider done!")
115
+ break
116
+
117
+ last.clear()
110
118
 
119
+ storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
111
120
  log.info(spider_info.format(
121
+ status,
112
122
  running_spider_thread_num,
113
123
  redis_ready_seed_length,
114
124
  redis_spider_seed_length,
@@ -182,7 +192,7 @@ def launcher(task):
182
192
  storer_info_list = [storer_info_list]
183
193
 
184
194
  # new item
185
- item = type("Item", (object,), {"redis_client": redis_db})()
195
+ item = type("Item", (object,), {"redis_client": redis_db.client})()
186
196
 
187
197
  if task.oss_config:
188
198
  item.oss = OssDB(**task.oss_config)
@@ -289,7 +299,7 @@ def launcher(task):
289
299
  # name="check_spider",
290
300
  target=check,
291
301
  args=(
292
- stop, last, spider,
302
+ task.model, stop, last, spider,
293
303
  scheduler, storer_list,
294
304
  redis_db.ready_seed_length,
295
305
  redis_db.spider_queue_length,
@@ -5,6 +5,7 @@ class Task:
5
5
 
6
6
  def __init__(
7
7
  self,
8
+ model=None,
8
9
  seeds=None,
9
10
  project=None,
10
11
  task_name=None,
@@ -30,6 +31,8 @@ class Task:
30
31
  :param storer_queue_length:
31
32
  :param scheduler_queue_length:
32
33
  """
34
+ self.model = model
35
+
33
36
  self.seeds = struct_start_seeds(seeds)
34
37
  self.project = project or "test"
35
38
  self.task_name = task_name or "spider"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.14
3
+ Version: 0.0.16
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="0.0.14",
8
+ version="0.0.16",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",