cobweb-launcher 1.2.61__tar.gz → 1.2.63__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cobweb-launcher-1.2.61/cobweb_launcher.egg-info → cobweb-launcher-1.2.63}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/crawler.py +1 -1
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher.py +2 -2
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher_air.py +11 -6
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher_api.py +17 -8
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/launcher_pro.py +13 -7
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/setup.py +1 -1
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/LICENSE +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/README.md +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/base_crawler.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/crawlers/file_crawler.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/api_db.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/redis_db.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/db/redis_db_new.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/setting.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/dotting.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/setup.cfg +0 -0
- {cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/test/test.py +0 -0
@@ -83,7 +83,7 @@ class Crawler(threading.Thread):
|
|
83
83
|
if isinstance(item, BaseItem):
|
84
84
|
self._upload_data(item)
|
85
85
|
elif isinstance(item, Seed):
|
86
|
-
self._add_seed(item)
|
86
|
+
self._add_seed((seed, item))
|
87
87
|
elif isinstance(item, str) and item == DealModel.poll:
|
88
88
|
self._set_seed(seed)
|
89
89
|
elif isinstance(item, str) and item == DealModel.done:
|
@@ -173,8 +173,8 @@ class Launcher(threading.Thread):
|
|
173
173
|
def _upload_data(self, data, **kwargs):
|
174
174
|
self.__LAUNCHER_QUEUE__["upload"].push(data, **kwargs)
|
175
175
|
|
176
|
-
def _add_seed(self,
|
177
|
-
self.__LAUNCHER_QUEUE__["new"].push(
|
176
|
+
def _add_seed(self, seeds, **kwargs):
|
177
|
+
self.__LAUNCHER_QUEUE__["new"].push(seeds, direct_insertion=True, **kwargs)
|
178
178
|
|
179
179
|
def _delete_seed(self, seed, **kwargs):
|
180
180
|
self.__LAUNCHER_QUEUE__["done"].push(seed, **kwargs)
|
@@ -13,15 +13,20 @@ class LauncherAir(Launcher):
|
|
13
13
|
|
14
14
|
@check_pause
|
15
15
|
def _insert(self):
|
16
|
-
|
16
|
+
new_seeds = {}
|
17
|
+
del_seeds = set()
|
17
18
|
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
18
19
|
for _ in range(self._new_queue_max_size):
|
19
|
-
|
20
|
-
if not
|
20
|
+
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
21
|
+
if not seed_tuple:
|
21
22
|
break
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
seed, new_seed = seed_tuple
|
24
|
+
new_seeds[new_seed.to_string] = new_seed.params.priority
|
25
|
+
del_seeds.add(seed.to_string)
|
26
|
+
if new_seeds:
|
27
|
+
self.__LAUNCHER_QUEUE__['todo'].push(new_seeds)
|
28
|
+
if del_seeds:
|
29
|
+
self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
|
25
30
|
if status:
|
26
31
|
time.sleep(self._new_queue_wait_seconds)
|
27
32
|
|
@@ -49,8 +49,11 @@ class LauncherApi(Launcher):
|
|
49
49
|
if (self._speed_control and self.__LAUNCHER_QUEUE__["todo"].length and
|
50
50
|
not self._db.auto_incr(self._speed_control_key, t=self._time_window, limit=self._spider_max_count)):
|
51
51
|
expire_time = self._db.ttl(self._speed_control_key)
|
52
|
-
|
53
|
-
|
52
|
+
if isinstance(expire_time, int) and expire_time <= -1:
|
53
|
+
self._db.delete(self._speed_control_key)
|
54
|
+
elif isinstance(expire_time, int):
|
55
|
+
logger.info(f"Too fast! Please wait {expire_time} seconds...")
|
56
|
+
time.sleep(expire_time / 2)
|
54
57
|
return None
|
55
58
|
seed = self.__LAUNCHER_QUEUE__["todo"].pop()
|
56
59
|
return seed
|
@@ -106,14 +109,20 @@ class LauncherApi(Launcher):
|
|
106
109
|
"""
|
107
110
|
添加新种子到redis队列中
|
108
111
|
"""
|
109
|
-
|
112
|
+
new_seeds = {}
|
113
|
+
del_seeds = set()
|
110
114
|
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
111
115
|
for _ in range(self._new_queue_max_size):
|
112
|
-
|
113
|
-
if
|
114
|
-
|
115
|
-
|
116
|
-
|
116
|
+
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
117
|
+
if not seed_tuple:
|
118
|
+
break
|
119
|
+
seed, new_seed = seed_tuple
|
120
|
+
new_seeds[new_seed.to_string] = new_seed.params.priority
|
121
|
+
del_seeds.add(seed.to_string)
|
122
|
+
if new_seeds:
|
123
|
+
self._db.zadd(self._todo_key, new_seeds, nx=True)
|
124
|
+
if del_seeds:
|
125
|
+
self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
|
117
126
|
if status:
|
118
127
|
time.sleep(self._new_queue_wait_seconds)
|
119
128
|
|
@@ -47,7 +47,7 @@ class LauncherPro(Launcher):
|
|
47
47
|
spider_speed = self._db.get(self._speed_control_key)
|
48
48
|
if int(spider_speed or 0) > self._spider_max_count:
|
49
49
|
expire_time = self._db.ttl(self._speed_control_key)
|
50
|
-
if expire_time <= -1:
|
50
|
+
if isinstance(expire_time, int) and expire_time <= -1:
|
51
51
|
self._db.delete(self._speed_control_key)
|
52
52
|
elif isinstance(expire_time, int):
|
53
53
|
logger.info(f"Too fast! Please wait {expire_time} seconds...")
|
@@ -108,14 +108,20 @@ class LauncherPro(Launcher):
|
|
108
108
|
"""
|
109
109
|
添加新种子到redis队列中
|
110
110
|
"""
|
111
|
-
|
111
|
+
new_seeds = {}
|
112
|
+
del_seeds = set()
|
112
113
|
status = self.__LAUNCHER_QUEUE__['new'].length < self._new_queue_max_size
|
113
114
|
for _ in range(self._new_queue_max_size):
|
114
|
-
|
115
|
-
if
|
116
|
-
|
117
|
-
|
118
|
-
|
115
|
+
seed_tuple = self.__LAUNCHER_QUEUE__['new'].pop()
|
116
|
+
if not seed_tuple:
|
117
|
+
break
|
118
|
+
seed, new_seed = seed_tuple
|
119
|
+
new_seeds[new_seed.to_string] = new_seed.params.priority
|
120
|
+
del_seeds.add(seed.to_string)
|
121
|
+
if new_seeds:
|
122
|
+
self._db.zadd(self._todo_key, new_seeds, nx=True)
|
123
|
+
if del_seeds:
|
124
|
+
self.__LAUNCHER_QUEUE__['done'].push(del_seeds)
|
119
125
|
if status:
|
120
126
|
time.sleep(self._new_queue_wait_seconds)
|
121
127
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cobweb-launcher-1.2.61 → cobweb-launcher-1.2.63}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|