cobweb-launcher 1.2.50__py3-none-any.whl → 1.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cobweb-launcher might be problematic. Click here for more details.

cobweb/db/redis_db.py CHANGED
@@ -6,60 +6,82 @@ class RedisDB:
6
6
 
7
7
  def __init__(self, **kwargs):
8
8
  redis_config = kwargs or setting.REDIS_CONFIG
9
- pool = redis.ConnectionPool(**redis_config)
10
- self._client = redis.Redis(connection_pool=pool)
9
+ # pool = redis.ConnectionPool(**redis_config)
10
+ self.pool = redis.ConnectionPool(
11
+ max_connections=25,
12
+ **redis_config
13
+ )
14
+
15
+ def get_connection(self):
16
+ return redis.StrictRedis(connection_pool=self.pool)
17
+ # self._client = redis.Redis(connection_pool=pool)
11
18
 
12
19
  def setnx(self, name, value=""):
13
- return self._client.setnx(name, value)
20
+ with self.get_connection() as client:
21
+ client.setnx(name, value)
14
22
 
15
23
  def setex(self, name, t, value=""):
16
- return self._client.setex(name, t, value)
24
+ with self.get_connection() as client:
25
+ client.setex(name, t, value)
17
26
 
18
27
  def expire(self, name, t, nx: bool = False, xx: bool = False, gt: bool = False, lt: bool = False):
19
- return self._client.expire(name, t, nx, xx, gt, lt)
28
+ with self.get_connection() as client:
29
+ client.expire(name, t, nx, xx, gt, lt)
20
30
 
21
31
  def ttl(self, name):
22
- return self._client.ttl(name)
32
+ with self.get_connection() as client:
33
+ return client.ttl(name)
23
34
 
24
35
  def delete(self, name):
25
- return self._client.delete(name)
36
+ with self.get_connection() as client:
37
+ return client.delete(name)
26
38
 
27
39
  def exists(self, *name) -> bool:
28
- return self._client.exists(*name)
40
+ with self.get_connection() as client:
41
+ return client.exists(*name)
29
42
 
30
43
  def sadd(self, name, value):
31
- return self._client.sadd(name, value)
44
+ with self.get_connection() as client:
45
+ return client.sadd(name, value)
32
46
 
33
47
  def zcard(self, name) -> bool:
34
- return self._client.zcard(name)
48
+ with self.get_connection() as client:
49
+ return client.zcard(name)
35
50
 
36
51
  def zadd(self, name, item: dict, **kwargs):
37
- return self._client.zadd(name, item, **kwargs)
52
+ with self.get_connection() as client:
53
+ return client.zadd(name, item, **kwargs)
38
54
 
39
55
  def zrem(self, name, *value):
40
- return self._client.zrem(name, *value)
56
+ with self.get_connection() as client:
57
+ return client.zrem(name, *value)
41
58
 
42
59
  def zcount(self, name, _min, _max):
43
- return self._client.zcount(name, _min, _max)
60
+ with self.get_connection() as client:
61
+ return client.zcount(name, _min, _max)
44
62
 
45
63
  # def zrangebyscore(self, name, _min, _max, start, num, withscores: bool = False, *args):
46
- # return self._client.zrangebyscore(name, _min, _max, start, num, withscores, *args)
64
+ # with self.get_connection() as client:
65
+ # return client.zrangebyscore(name, _min, _max, start, num, withscores, *args)
47
66
 
48
67
  def lua(self, script: str, keys: list = None, args: list = None):
49
68
  keys = keys or []
50
69
  args = args or []
51
70
  keys_count = len(keys)
52
- return self._client.eval(script, keys_count, *keys, *args)
71
+ with self.get_connection() as client:
72
+ return client.eval(script, keys_count, *keys, *args)
53
73
 
54
74
  def lua_sha(self, sha1: str, keys: list = None, args: list = None):
55
75
  keys = keys or []
56
76
  args = args or []
57
77
  keys_count = len(keys)
58
- return self._client.evalsha(sha1, keys_count, *keys, *args)
78
+ with self.get_connection() as client:
79
+ return client.evalsha(sha1, keys_count, *keys, *args)
59
80
 
60
81
  def execute_lua(self, lua_script: str, keys: list, *args):
61
- execute = self._client.register_script(lua_script)
62
- return execute(keys=keys, args=args)
82
+ with self.get_connection() as client:
83
+ execute = client.register_script(lua_script)
84
+ return execute(keys=keys, args=args)
63
85
 
64
86
  def lock(self, key, t=15) -> bool:
65
87
  lua_script = """
@@ -100,6 +100,7 @@ class Launcher(threading.Thread):
100
100
  self._spider_time_sleep = setting.SPIDER_TIME_SLEEP
101
101
  self._spider_max_count = setting.SPIDER_MAX_COUNT
102
102
  self._time_window = setting.TIME_WINDOW
103
+ self._speed_control = setting.SPEED_CONTROL
103
104
 
104
105
  self._done_model = setting.DONE_MODEL
105
106
  self._task_model = setting.TASK_MODEL
@@ -46,7 +46,7 @@ class LauncherApi(Launcher):
46
46
  设置时间窗口为self._time_window(秒),判断在该窗口内的采集量是否满足阈值(self._spider_max_speed)
47
47
  :return: True -> 种子, False -> None
48
48
  """
49
- if (self.__LAUNCHER_QUEUE__["todo"].length and
49
+ if (self._speed_control and self.__LAUNCHER_QUEUE__["todo"].length and
50
50
  not self._db.auto_incr(self._speed_control_key, t=self._time_window, limit=self._spider_max_count)):
51
51
  expire_time = self._db.ttl(self._speed_control_key)
52
52
  logger.info(f"Too fast! Please wait {expire_time} seconds...")
@@ -3,7 +3,7 @@ import threading
3
3
 
4
4
  from cobweb.db import RedisDB
5
5
  from cobweb.base import Seed, logger
6
- from cobweb.utils import BloomFilter
6
+ # from cobweb.utils import BloomFilter
7
7
  from cobweb.constant import DealModel, LogTemplate
8
8
  from .launcher import Launcher, check_pause
9
9
 
@@ -43,17 +43,18 @@ class LauncherPro(Launcher):
43
43
  self._db._client.incrby(key, count)
44
44
 
45
45
  def _get_seed(self) -> Seed:
46
- spider_speed = self._db._client.get(self._speed_control_key)
47
- if int(spider_speed or 0) > self._spider_max_count:
48
- expire_time = self._db.ttl(self._speed_control_key)
49
- if expire_time == -1:
50
- self._db.delete(self._speed_control_key)
51
- else:
52
- logger.info(f"Too fast! Please wait {expire_time} seconds...")
53
- time.sleep(expire_time / 2)
54
- return None
46
+ if self._speed_control:
47
+ spider_speed = self._db._client.get(self._speed_control_key)
48
+ if int(spider_speed or 0) > self._spider_max_count:
49
+ expire_time = self._db.ttl(self._speed_control_key)
50
+ if expire_time == -1:
51
+ self._db.delete(self._speed_control_key)
52
+ else:
53
+ logger.info(f"Too fast! Please wait {expire_time} seconds...")
54
+ time.sleep(expire_time / 2)
55
+ return None
55
56
  seed = self.__LAUNCHER_QUEUE__["todo"].pop()
56
- if seed and not self._db.lock(self._speed_control_key, t=self._time_window):
57
+ if self._speed_control and seed and not self._db.lock(self._speed_control_key, t=self._time_window):
57
58
  self._db._client.incrby(self._speed_control_key, 1)
58
59
  return seed
59
60
 
cobweb/setting.py CHANGED
@@ -65,6 +65,8 @@ TIME_WINDOW = 60 # 频控固定时间窗口(秒)
65
65
  # 任务模式
66
66
  TASK_MODEL = 0 # 0:单次,1:常驻
67
67
 
68
+ # 流控措施
69
+ SPEED_CONTROL = 1 # 0:关闭,1:开启
68
70
 
69
71
  # bloom过滤器
70
72
  CAPACITY = 100000000
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.2.50
3
+ Version: 1.2.52
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -1,6 +1,6 @@
1
1
  cobweb/__init__.py,sha256=CBd2oByCfc5EmH2dCZYVHkxXYZG-oWrLyTtZU5sEoP0,96
2
2
  cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
- cobweb/setting.py,sha256=47HZsw40HLpsmOmvij1lyQALPQQCN_tWlKZ0wbn2MtM,2216
3
+ cobweb/setting.py,sha256=JHRrVQL22iidkNe8seQTuxK3hlTdtnvb6VBVygpMs5I,2272
4
4
  cobweb/base/__init__.py,sha256=4gwWWQ0Q8cYG9cD7Lwf4XMqRGc5M_mapS3IczR6zeCE,222
5
5
  cobweb/base/basic.py,sha256=Z56SSLB3I2IGHWCCcSy0Qbfzj8Qbg_po3gP32q1jh4k,7741
6
6
  cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
@@ -17,14 +17,14 @@ cobweb/crawlers/crawler.py,sha256=pePDGroD6JJAht5QTU51L7MkFIY4ob9TKpznZ2wUmsw,89
17
17
  cobweb/crawlers/file_crawler.py,sha256=2Sjbdgxzqd41WykKUQE3QQlGai3T8k-pmHNmPlTchjQ,4454
18
18
  cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
19
19
  cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
20
- cobweb/db/redis_db.py,sha256=fumNZJiio-uQqRcSrymx8eJ1PqsdOwITe_Y-9JOXxrQ,4298
20
+ cobweb/db/redis_db.py,sha256=mEWTBWSBXOhf3qxU9fdXYrhNJ44O8pS9tvqEAvZNhoI,5156
21
21
  cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
22
22
  cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
23
23
  cobweb/launchers/__init__.py,sha256=qMuVlQcjErVK67HyKFZEsXf_rfZD5ODjx1QucSCKMOM,114
24
- cobweb/launchers/launcher.py,sha256=sPts-xlgxoeIfl1fn1XR2XVZxLzt7He9xrYDfTHRAGo,7029
24
+ cobweb/launchers/launcher.py,sha256=_qC3EnD_QxQRP_RBNHHJ8HLfn_0FED1jUF_lyas5Hss,7081
25
25
  cobweb/launchers/launcher_air.py,sha256=KAk_M8F3029cXYe7m4nn3Nzyi89lbxJ2cqZjqW8iZ0E,2832
26
- cobweb/launchers/launcher_api.py,sha256=YFqCTRvKn6icBLWTR1VxkU0WEIte2F7fv_LgPkifqdo,7885
27
- cobweb/launchers/launcher_pro.py,sha256=B5FdxvuENRL3XrMl74ENdP1uNgnZOaYCUUfBfM0t3io,7842
26
+ cobweb/launchers/launcher_api.py,sha256=7K7Kl3dk7Ung9iRBwhiMrALEJywcR66ie5RIkLQEM-Y,7909
27
+ cobweb/launchers/launcher_pro.py,sha256=QMumHUTi6IudO5mw9SnwlmP8k_BJ50cpNjYjoQv-_YY,7936
28
28
  cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
29
29
  cobweb/pipelines/pipeline.py,sha256=4TJLX0sUHRxYndF5A4Vs5btUGI-wigkOcFvhTW1hLXI,2009
30
30
  cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
@@ -37,8 +37,8 @@ cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
37
37
  cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
38
38
  cobweb/utils/oss.py,sha256=gyt8-UB07tVphZLQXMOf-JTJwU-mWq8KZkOXKkAf3uk,3513
39
39
  cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
40
- cobweb_launcher-1.2.50.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
41
- cobweb_launcher-1.2.50.dist-info/METADATA,sha256=uOO7AE9213AnlsYh-hhW2lCT_3-v9vaA7gmgiC1diuA,6510
42
- cobweb_launcher-1.2.50.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
- cobweb_launcher-1.2.50.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
44
- cobweb_launcher-1.2.50.dist-info/RECORD,,
40
+ cobweb_launcher-1.2.52.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
41
+ cobweb_launcher-1.2.52.dist-info/METADATA,sha256=lUniniJ1G9qDV9zA4XtI2p5bZPwDkMI6fKPSGnWjb_g,6510
42
+ cobweb_launcher-1.2.52.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
+ cobweb_launcher-1.2.52.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
44
+ cobweb_launcher-1.2.52.dist-info/RECORD,,