cobweb-launcher 0.0.14__tar.gz → 0.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/PKG-INFO +1 -1
  2. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/bbb.py +6 -2
  3. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/launcher.py +27 -17
  4. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/task.py +3 -0
  5. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/PKG-INFO +1 -1
  6. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/setup.py +1 -1
  7. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/LICENSE +0 -0
  8. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/README.md +0 -0
  9. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/__init__.py +0 -0
  10. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/__init__.py +0 -0
  11. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/oss_db.py +0 -0
  12. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/redis_db.py +0 -0
  13. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/__init__.py +0 -0
  14. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/default.py +0 -0
  15. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/scheduler/textfile.py +0 -0
  16. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/__init__.py +0 -0
  17. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/console.py +0 -0
  18. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/loghub.py +0 -0
  19. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/redis.py +0 -0
  20. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/db/storer/textfile.py +0 -0
  21. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/decorators.py +0 -0
  22. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/__init__.py +0 -0
  23. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/distributed/models.py +0 -0
  24. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/interface.py +0 -0
  25. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/log.py +0 -0
  26. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/__init__.py +0 -0
  27. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/models.py +0 -0
  28. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/single/nest.py +0 -0
  29. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb/utils.py +0 -0
  30. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  31. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  32. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/requires.txt +0 -0
  33. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/cobweb_launcher.egg-info/top_level.txt +0 -0
  34. {cobweb-launcher-0.0.14 → cobweb-launcher-0.0.16}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.14
3
+ Version: 0.0.16
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -114,12 +114,16 @@ class Seed:
114
114
  return f'{self.__class__.__name__}({", ".join(chars)})'
115
115
 
116
116
  @property
117
- def format_seed(self, json_str=True):
117
+ def dict_seed(self):
118
118
  seed = self.__dict__.copy()
119
119
  del seed["_priority"]
120
120
  del seed["_version"]
121
121
  del seed["_retry"]
122
- return json.dumps(seed, ensure_ascii=False) if json_str else seed
122
+ return seed
123
+
124
+ @property
125
+ def format_seed(self):
126
+ return json.dumps(self.dict_seed, ensure_ascii=False)
123
127
 
124
128
 
125
129
  class DBItem:
@@ -68,10 +68,11 @@ def get_storer_db(db):
68
68
  raise TypeError()
69
69
 
70
70
 
71
- def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
71
+ def check(model, stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
72
+ log.info("run check thread after 30 seconds...")
72
73
  time.sleep(30)
73
74
  spider_info = """
74
- ----------------------- check -----------------------
75
+ ------------------- check: %s ------------------
75
76
  running_spider_thread_num: {0}
76
77
  redis_ready_seed_length: {1}
77
78
  redis_spider_seed_length: {2}
@@ -80,20 +81,12 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
80
81
  {4}
81
82
  ----------------------- end -----------------------"""
82
83
  while True:
83
-
84
+ status = "running"
84
85
  running_spider_thread_num = spider.spider_in_progress.length
85
86
  redis_ready_seed_length = ready_seed_length()
86
87
  redis_spider_seed_length = spider_queue_length()
87
88
  memory_seed_queue_length = scheduler.queue.length
88
89
  storer_upload_queue_list = []
89
- storer_queue_empty = True
90
- for storer in storer_list:
91
- if storer.queue.length:
92
- storer_queue_empty = False
93
- storer_upload_queue_list.append(
94
- f"{storer.__class__.__name__}: {storer.queue.length}"
95
- )
96
- storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
97
90
 
98
91
  if (
99
92
  scheduler.stop and
@@ -101,14 +94,31 @@ def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_
101
94
  not memory_seed_queue_length and
102
95
  not running_spider_thread_num
103
96
  ):
104
- log.info("spider is done?")
97
+ if not model:
98
+ log.info("spider is done?")
105
99
  last.set()
100
+ time.sleep(3)
101
+ storer_queue_empty = True
102
+ for storer in storer_list:
103
+ if storer.queue.length:
104
+ storer_queue_empty = False
105
+ storer_upload_queue_list.append(
106
+ f"{storer.__class__.__name__} storer queue length: {storer.queue.length}"
107
+ )
106
108
  if storer_queue_empty and not redis_spider_seed_length:
107
- log.info("spider done!")
108
- break
109
- last.clear()
109
+ if model:
110
+ log.info("waiting for push seeds...")
111
+ status = "waiting"
112
+ time.sleep(30)
113
+ else:
114
+ log.info("spider done!")
115
+ break
116
+
117
+ last.clear()
110
118
 
119
+ storer_upload_queue_length_info = "\n ".join(storer_upload_queue_list)
111
120
  log.info(spider_info.format(
121
+ status,
112
122
  running_spider_thread_num,
113
123
  redis_ready_seed_length,
114
124
  redis_spider_seed_length,
@@ -182,7 +192,7 @@ def launcher(task):
182
192
  storer_info_list = [storer_info_list]
183
193
 
184
194
  # new item
185
- item = type("Item", (object,), {"redis_client": redis_db})()
195
+ item = type("Item", (object,), {"redis_client": redis_db.client})()
186
196
 
187
197
  if task.oss_config:
188
198
  item.oss = OssDB(**task.oss_config)
@@ -289,7 +299,7 @@ def launcher(task):
289
299
  # name="check_spider",
290
300
  target=check,
291
301
  args=(
292
- stop, last, spider,
302
+ task.model, stop, last, spider,
293
303
  scheduler, storer_list,
294
304
  redis_db.ready_seed_length,
295
305
  redis_db.spider_queue_length,
@@ -5,6 +5,7 @@ class Task:
5
5
 
6
6
  def __init__(
7
7
  self,
8
+ model=None,
8
9
  seeds=None,
9
10
  project=None,
10
11
  task_name=None,
@@ -30,6 +31,8 @@ class Task:
30
31
  :param storer_queue_length:
31
32
  :param scheduler_queue_length:
32
33
  """
34
+ self.model = model
35
+
33
36
  self.seeds = struct_start_seeds(seeds)
34
37
  self.project = project or "test"
35
38
  self.task_name = task_name or "spider"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.14
3
+ Version: 0.0.16
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="0.0.14",
8
+ version="0.0.16",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",