cobweb-launcher 1.2.61__tar.gz → 1.2.63__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {cobweb-launcher-1.2.61/cobweb_launcher.egg-info → cobweb-launcher-1.2.63}/PKG-INFO +1 -1
  2. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/crawler.py +1 -1
  3. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher.py +2 -2
  4. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher_air.py +11 -6
  5. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher_api.py +17 -8
  6. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher_pro.py +13 -7
  7. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63/cobweb_launcher.egg-info}/PKG-INFO +1 -1
  8. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/setup.py +1 -1
  9. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/LICENSE +0 -0
  10. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/README.md +0 -0
  11. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/__init__.py +0 -0
  12. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/__init__.py +0 -0
  13. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/common_queue.py +0 -0
  14. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/decorators.py +0 -0
  15. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/item.py +0 -0
  16. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/log.py +0 -0
  17. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/request.py +0 -0
  18. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/response.py +0 -0
  19. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/seed.py +0 -0
  20. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/constant.py +0 -0
  21. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/__init__.py +0 -0
  22. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/base_crawler.py +0 -0
  23. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/file_crawler.py +0 -0
  24. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/__init__.py +0 -0
  25. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/api_db.py +0 -0
  26. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/redis_db.py +0 -0
  27. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/redis_db_new.py +0 -0
  28. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/exceptions/__init__.py +0 -0
  29. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/exceptions/oss_db_exception.py +0 -0
  30. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/__init__.py +0 -0
  31. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/__init__.py +0 -0
  32. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/pipeline.py +0 -0
  33. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/pipeline_console.py +0 -0
  34. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/pipeline_loghub.py +0 -0
  35. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/setting.py +0 -0
  36. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/__init__.py +0 -0
  37. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/bloom.py +0 -0
  38. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/dotting.py +0 -0
  39. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/oss.py +0 -0
  40. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/tools.py +0 -0
  41. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  42. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  43. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/requires.txt +0 -0
  44. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/top_level.txt +0 -0
  45. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/setup.cfg +0 -0
  46. {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/test/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.61
3
+ Version: 1.2.63
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -83,7 +83,7 @@ class Crawler(threading.Thread):
83
83
  if isinstance(item, BaseItem):
84
84
  self._upload_data(item)
85
85
  elif isinstance(item, Seed):
86
- self._add_seed(item)
86
+ self._add_seed((seed, item))
87
87
  elif isinstance(item, str) and item == DealModel.poll:
88
88
  self._set_seed(seed)
89
89
  elif isinstance(item, str) and item == DealModel.done:
@@ -173,8 +173,8 @@ class Launcher(threading.Thread):
173
173
  def _upload_data(self, data, **kwargs):
174
174
  self.__LAUNCHER_QUEUE__["upload"].push(data, **kwargs)
175
175
 
176
- def _add_seed(self, seed, **kwargs):
177
- self.__LAUNCHER_QUEUE__["new"].push(seed, **kwargs)
176
+ def _add_seed(self, seeds, **kwargs):
177
+ self.__LAUNCHER_QUEUE__["new"].push(seeds, direct_insertion=True, **kwargs)
178
178
 
179
179
  def _delete_seed(self, seed, **kwargs):
180
180
  self.__LAUNCHER_QUEUE__["done"].push(seed, **kwargs)
@@ -13,15 +13,20 @@ class LauncherAir(Launcher):
13
13
 
14
14
  @check_pause
15
15
  def _insert(self):
16
- seeds = {}
16
+ new_seeds = {}
17
+ del_seeds = set()
17
18
  status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
18
19
  for _ in range(self._new_queue_max_size):
19
- seed = self.__LAUNCHER_QUEUE__['new'].pop()
20
- if not seed:
20
+ seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
21
+ if not seed_tuple:
21
22
  break
22
- seeds[seed.to_string] = seed.params.priority
23
- if seeds:
24
- self.__LAUNCHER_QUEUE__['todo'].push(seeds)
23
+ seed, new_seed = seed_tuple
24
+ new_seeds[new_seed.to_string] = new_seed.params.priority
25
+ del_seeds.add(seed.to_string)
26
+ if new_seeds:
27
+ self.__LAUNCHER_QUEUE__['todo'].push(new_seeds)
28
+ if del_seeds:
29
+ self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
25
30
  if status:
26
31
  time.sleep(self._new_queue_wait_seconds)
27
32
 
@@ -49,8 +49,11 @@ class LauncherApi(Launcher):
49
49
  if (self._speed_control and self.__LAUNCHER_QUEUE__["todo"].length and
50
50
  not self._db.auto_incr(self._speed_control_key, t=self._time_window, limit=self._spider_max_count)):
51
51
  expire_time = self._db.ttl(self._speed_control_key)
52
- logger.info(f"Too fast! Please wait {expire_time} seconds...")
53
- time.sleep(expire_time / 2)
52
+ if isinstance(expire_time, int) and expire_time <= -1:
53
+ self._db.delete(self._speed_control_key)
54
+ elif isinstance(expire_time, int):
55
+ logger.info(f"Too fast! Please wait {expire_time} seconds...")
56
+ time.sleep(expire_time / 2)
54
57
  return None
55
58
  seed = self.__LAUNCHER_QUEUE__["todo"].pop()
56
59
  return seed
@@ -106,14 +109,20 @@ class LauncherApi(Launcher):
106
109
  """
107
110
  添加新种子到redis队列中
108
111
  """
109
- seeds = {}
112
+ new_seeds = {}
113
+ del_seeds = set()
110
114
  status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
111
115
  for _ in range(self._new_queue_max_size):
112
- seed = self.__LAUNCHER_QUEUE__['new'].pop()
113
- if seed:
114
- seeds[seed.to_string] = seed.params.priority
115
- if seeds:
116
- self._db.zadd(self._todo_key, seeds, nx=True)
116
+ seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
117
+ if not seed_tuple:
118
+ break
119
+ seed, new_seed = seed_tuple
120
+ new_seeds[new_seed.to_string] = new_seed.params.priority
121
+ del_seeds.add(seed.to_string)
122
+ if new_seeds:
123
+ self._db.zadd(self._todo_key, new_seeds, nx=True)
124
+ if del_seeds:
125
+ self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
117
126
  if status:
118
127
  time.sleep(self._new_queue_wait_seconds)
119
128
 
@@ -47,7 +47,7 @@ class LauncherPro(Launcher):
47
47
  spider_speed = self._db.get(self._speed_control_key)
48
48
  if int(spider_speed or 0) > self._spider_max_count:
49
49
  expire_time = self._db.ttl(self._speed_control_key)
50
- if expire_time <= -1:
50
+ if isinstance(expire_time, int) and expire_time <= -1:
51
51
  self._db.delete(self._speed_control_key)
52
52
  elif isinstance(expire_time, int):
53
53
  logger.info(f"Too fast! Please wait {expire_time} seconds...")
@@ -108,14 +108,20 @@ class LauncherPro(Launcher):
108
108
  """
109
109
  添加新种子到redis队列中
110
110
  """
111
- seeds = {}
111
+ new_seeds = {}
112
+ del_seeds = set()
112
113
  status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
113
114
  for _ in range(self._new_queue_max_size):
114
- seed = self.__LAUNCHER_QUEUE__['new'].pop()
115
- if seed:
116
- seeds[seed.to_string] = seed.params.priority
117
- if seeds:
118
- self._db.zadd(self._todo_key, seeds, nx=True)
115
+ seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
116
+ if not seed_tuple:
117
+ break
118
+ seed, new_seed = seed_tuple
119
+ new_seeds[new_seed.to_string] = new_seed.params.priority
120
+ del_seeds.add(seed.to_string)
121
+ if new_seeds:
122
+ self._db.zadd(self._todo_key, new_seeds, nx=True)
123
+ if del_seeds:
124
+ self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
119
125
  if status:
120
126
  time.sleep(self._new_queue_wait_seconds)
121
127
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.61
3
+ Version: 1.2.63
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="1.2.61",
8
+ version="1.2.63",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",