cobweb-launcher 1.2.50__tar.gz → 1.2.52__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cobweb-launcher might be problematic. Click here for more details.
- {cobweb-launcher-1.2.50/cobweb_launcher.egg-info → cobweb-launcher-1.2.52}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/db/redis_db.py +40 -18
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/launchers/launcher.py +1 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/launchers/launcher_api.py +1 -1
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/launchers/launcher_pro.py +12 -11
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/setting.py +2 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52/cobweb_launcher.egg-info}/PKG-INFO +1 -1
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/setup.py +1 -1
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/LICENSE +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/README.md +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/common_queue.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/decorators.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/item.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/log.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/request.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/response.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/base/seed.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/constant.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/crawlers/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/crawlers/base_crawler.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/crawlers/crawler.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/crawlers/file_crawler.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/db/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/db/api_db.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/exceptions/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/exceptions/oss_db_exception.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/launchers/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/launchers/launcher_air.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/pipelines/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/pipelines/pipeline.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/pipelines/pipeline_console.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/pipelines/pipeline_loghub.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/utils/__init__.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/utils/bloom.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/utils/dotting.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/utils/oss.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb/utils/tools.py +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb_launcher.egg-info/requires.txt +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb_launcher.egg-info/top_level.txt +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/setup.cfg +0 -0
- {cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/test/test.py +0 -0
|
@@ -6,60 +6,82 @@ class RedisDB:
|
|
|
6
6
|
|
|
7
7
|
def __init__(self, **kwargs):
|
|
8
8
|
redis_config = kwargs or setting.REDIS_CONFIG
|
|
9
|
-
pool = redis.ConnectionPool(**redis_config)
|
|
10
|
-
self.
|
|
9
|
+
# pool = redis.ConnectionPool(**redis_config)
|
|
10
|
+
self.pool = redis.ConnectionPool(
|
|
11
|
+
max_connections=25,
|
|
12
|
+
**redis_config
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
def get_connection(self):
|
|
16
|
+
return redis.StrictRedis(connection_pool=self.pool)
|
|
17
|
+
# self._client = redis.Redis(connection_pool=pool)
|
|
11
18
|
|
|
12
19
|
def setnx(self, name, value=""):
|
|
13
|
-
|
|
20
|
+
with self.get_connection() as client:
|
|
21
|
+
client.setnx(name, value)
|
|
14
22
|
|
|
15
23
|
def setex(self, name, t, value=""):
|
|
16
|
-
|
|
24
|
+
with self.get_connection() as client:
|
|
25
|
+
client.setex(name, t, value)
|
|
17
26
|
|
|
18
27
|
def expire(self, name, t, nx: bool = False, xx: bool = False, gt: bool = False, lt: bool = False):
|
|
19
|
-
|
|
28
|
+
with self.get_connection() as client:
|
|
29
|
+
client.expire(name, t, nx, xx, gt, lt)
|
|
20
30
|
|
|
21
31
|
def ttl(self, name):
|
|
22
|
-
|
|
32
|
+
with self.get_connection() as client:
|
|
33
|
+
return client.ttl(name)
|
|
23
34
|
|
|
24
35
|
def delete(self, name):
|
|
25
|
-
|
|
36
|
+
with self.get_connection() as client:
|
|
37
|
+
return client.delete(name)
|
|
26
38
|
|
|
27
39
|
def exists(self, *name) -> bool:
|
|
28
|
-
|
|
40
|
+
with self.get_connection() as client:
|
|
41
|
+
return client.exists(*name)
|
|
29
42
|
|
|
30
43
|
def sadd(self, name, value):
|
|
31
|
-
|
|
44
|
+
with self.get_connection() as client:
|
|
45
|
+
return client.sadd(name, value)
|
|
32
46
|
|
|
33
47
|
def zcard(self, name) -> bool:
|
|
34
|
-
|
|
48
|
+
with self.get_connection() as client:
|
|
49
|
+
return client.zcard(name)
|
|
35
50
|
|
|
36
51
|
def zadd(self, name, item: dict, **kwargs):
|
|
37
|
-
|
|
52
|
+
with self.get_connection() as client:
|
|
53
|
+
return client.zadd(name, item, **kwargs)
|
|
38
54
|
|
|
39
55
|
def zrem(self, name, *value):
|
|
40
|
-
|
|
56
|
+
with self.get_connection() as client:
|
|
57
|
+
return client.zrem(name, *value)
|
|
41
58
|
|
|
42
59
|
def zcount(self, name, _min, _max):
|
|
43
|
-
|
|
60
|
+
with self.get_connection() as client:
|
|
61
|
+
return client.zcount(name, _min, _max)
|
|
44
62
|
|
|
45
63
|
# def zrangebyscore(self, name, _min, _max, start, num, withscores: bool = False, *args):
|
|
46
|
-
#
|
|
64
|
+
# with self.get_connection() as client:
|
|
65
|
+
# return client.zrangebyscore(name, _min, _max, start, num, withscores, *args)
|
|
47
66
|
|
|
48
67
|
def lua(self, script: str, keys: list = None, args: list = None):
|
|
49
68
|
keys = keys or []
|
|
50
69
|
args = args or []
|
|
51
70
|
keys_count = len(keys)
|
|
52
|
-
|
|
71
|
+
with self.get_connection() as client:
|
|
72
|
+
return client.eval(script, keys_count, *keys, *args)
|
|
53
73
|
|
|
54
74
|
def lua_sha(self, sha1: str, keys: list = None, args: list = None):
|
|
55
75
|
keys = keys or []
|
|
56
76
|
args = args or []
|
|
57
77
|
keys_count = len(keys)
|
|
58
|
-
|
|
78
|
+
with self.get_connection() as client:
|
|
79
|
+
return client.evalsha(sha1, keys_count, *keys, *args)
|
|
59
80
|
|
|
60
81
|
def execute_lua(self, lua_script: str, keys: list, *args):
|
|
61
|
-
|
|
62
|
-
|
|
82
|
+
with self.get_connection() as client:
|
|
83
|
+
execute = client.register_script(lua_script)
|
|
84
|
+
return execute(keys=keys, args=args)
|
|
63
85
|
|
|
64
86
|
def lock(self, key, t=15) -> bool:
|
|
65
87
|
lua_script = """
|
|
@@ -100,6 +100,7 @@ class Launcher(threading.Thread):
|
|
|
100
100
|
self._spider_time_sleep = setting.SPIDER_TIME_SLEEP
|
|
101
101
|
self._spider_max_count = setting.SPIDER_MAX_COUNT
|
|
102
102
|
self._time_window = setting.TIME_WINDOW
|
|
103
|
+
self._speed_control = setting.SPEED_CONTROL
|
|
103
104
|
|
|
104
105
|
self._done_model = setting.DONE_MODEL
|
|
105
106
|
self._task_model = setting.TASK_MODEL
|
|
@@ -46,7 +46,7 @@ class LauncherApi(Launcher):
|
|
|
46
46
|
设置时间窗口为self._time_window(秒),判断在该窗口内的采集量是否满足阈值(self._spider_max_speed)
|
|
47
47
|
:return: True -> 种子, False -> None
|
|
48
48
|
"""
|
|
49
|
-
if (self.__LAUNCHER_QUEUE__["todo"].length and
|
|
49
|
+
if (self._speed_control and self.__LAUNCHER_QUEUE__["todo"].length and
|
|
50
50
|
not self._db.auto_incr(self._speed_control_key, t=self._time_window, limit=self._spider_max_count)):
|
|
51
51
|
expire_time = self._db.ttl(self._speed_control_key)
|
|
52
52
|
logger.info(f"Too fast! Please wait {expire_time} seconds...")
|
|
@@ -3,7 +3,7 @@ import threading
|
|
|
3
3
|
|
|
4
4
|
from cobweb.db import RedisDB
|
|
5
5
|
from cobweb.base import Seed, logger
|
|
6
|
-
from cobweb.utils import BloomFilter
|
|
6
|
+
# from cobweb.utils import BloomFilter
|
|
7
7
|
from cobweb.constant import DealModel, LogTemplate
|
|
8
8
|
from .launcher import Launcher, check_pause
|
|
9
9
|
|
|
@@ -43,17 +43,18 @@ class LauncherPro(Launcher):
|
|
|
43
43
|
self._db._client.incrby(key, count)
|
|
44
44
|
|
|
45
45
|
def _get_seed(self) -> Seed:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
46
|
+
if self._speed_control:
|
|
47
|
+
spider_speed = self._db._client.get(self._speed_control_key)
|
|
48
|
+
if int(spider_speed or 0) > self._spider_max_count:
|
|
49
|
+
expire_time = self._db.ttl(self._speed_control_key)
|
|
50
|
+
if expire_time == -1:
|
|
51
|
+
self._db.delete(self._speed_control_key)
|
|
52
|
+
else:
|
|
53
|
+
logger.info(f"Too fast! Please wait {expire_time} seconds...")
|
|
54
|
+
time.sleep(expire_time / 2)
|
|
55
|
+
return None
|
|
55
56
|
seed = self.__LAUNCHER_QUEUE__["todo"].pop()
|
|
56
|
-
if seed and not self._db.lock(self._speed_control_key, t=self._time_window):
|
|
57
|
+
if self._speed_control and seed and not self._db.lock(self._speed_control_key, t=self._time_window):
|
|
57
58
|
self._db._client.incrby(self._speed_control_key, 1)
|
|
58
59
|
return seed
|
|
59
60
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cobweb-launcher-1.2.50 → cobweb-launcher-1.2.52}/cobweb_launcher.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|