cobweb-launcher 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,231 +0,0 @@
1
- import json
2
- import random
3
- import time
4
- import redis
5
- from datetime import datetime
6
- from base.bbb import Seed
7
-
8
-
9
- class RedisDB:
10
-
11
- def __init__(
12
- self,
13
- project: str,
14
- task_name: str,
15
- # retry_num: int = 3,
16
- host=None,
17
- port=None,
18
- username=None,
19
- password=None,
20
- db=0
21
- ):
22
- pool = redis.ConnectionPool(
23
- host=host,
24
- port=port,
25
- username=username,
26
- password=password,
27
- db=db
28
- )
29
- self.heartbeat_key = f"{project}:{task_name}:heartbeat" # redis type string
30
- self.ready_key = f"{project}:{task_name}:seed_info:ready" # redis type zset, .format(priority)
31
- self.spider_key = f"{project}:{task_name}:seed_info:spider" # redis type hash, .format(priority)
32
- self.store_key = f"{project}:{task_name}:seed_info:store:%s" # redis type set,
33
- self.failed_key = f"{project}:{task_name}:seed_info:failed" # redis type set, .format(priority)
34
- self.succeed_key = f"{project}:{task_name}:seed_info:succeed" # redis type set, .format(priority)
35
- self.update_lock = f"{project}:{task_name}:update_seed_lock" # redis type string
36
- self.check_lock = f"{project}:{task_name}:check_seed_lock" # redis type string
37
- # self.retry_lock = f"{project}:{task_name}:retry_seed_lock" # redis type string
38
- self.scheduler_lock = f"{project}:{task_name}:scheduler_lock" # redis type string
39
- self.client = redis.Redis(connection_pool=pool)
40
- # self.retry_num = retry_num
41
-
42
- def set_heartbeat(self, t=3):
43
- self.client.expire(self.heartbeat_key, t)
44
-
45
- # @property
46
- def heartbeat(self):
47
- return self.client.ttl(self.heartbeat_key)
48
-
49
- def iterate_hash(self, key, count=1000, match=None):
50
- cursor = "0"
51
- while cursor != 0:
52
- # 使用HSCAN命令迭代获取键值对
53
- cursor, data = self.client.hscan(key, cursor=cursor, match=match, count=count)
54
- if not data:
55
- return None
56
- for field, value in data.items():
57
- yield field.decode(), value.decode()
58
-
59
- def get_lock(self, key, t=15, timeout=3, sleep_time=0.1):
60
- begin_time = int(time.time())
61
- while True:
62
- if self.client.setnx(key, ""):
63
- self.client.expire(key, t)
64
- return True
65
- if int(time.time()) - begin_time > timeout:
66
- break
67
- time.sleep(sleep_time)
68
-
69
- if self.client.ttl(key) == -1:
70
- delete_status = True
71
- for _ in range(3):
72
- if self.client.ttl(key) != -1:
73
- delete_status = False
74
- break
75
- time.sleep(0.5)
76
- if delete_status:
77
- self.client.expire(key, t)
78
- return False
79
- else:
80
- ttl = self.client.ttl(key)
81
- print("ttl: " + str(ttl))
82
- return False
83
-
84
- def execute_update(
85
- self,
86
- set_info,
87
- del_info,
88
- status: int = 0
89
- ):
90
- if status not in [0, 1, 2, 3]:
91
- return None
92
-
93
- pipe = self.client.pipeline()
94
- pipe.multi()
95
-
96
- if status == 0:
97
- pipe.hset(self.spider_key, mapping=set_info)
98
- pipe.zrem(self.ready_key, *del_info)
99
- elif status == 1:
100
- pipe.zadd(self.ready_key, mapping=set_info)
101
- pipe.hdel(self.spider_key, *del_info)
102
- elif status == 2:
103
- pipe.sadd(self.failed_key, *set_info)
104
- pipe.hdel(self.spider_key, *del_info)
105
- else:
106
- pipe.sadd(self.succeed_key, *set_info)
107
- pipe.hdel(self.spider_key, *del_info)
108
- pipe.execute()
109
-
110
- @property
111
- def seed_count(self):
112
- return self.client.zcard(self.ready_key)
113
-
114
- def deal_seeds(self, sids, status: bool):
115
- if isinstance(sids, str):
116
- sids = [sids]
117
- # if self.get_lock(key=self.retry_lock, t=15):
118
- status = 2 if status else 3
119
- del_list, fail_set = [], set()
120
- for sid in sids:
121
- for field, value in self.iterate_hash(self.spider_key, match=f"*{sid}"):
122
- _, priority, _sid = field.split("_")
123
- if sid != _sid:
124
- continue
125
- seed = Seed(value, priority=priority)
126
- del_list.append(field)
127
- fail_set.add(seed.format_seed)
128
- if del_list:
129
- self.execute_update(fail_set, del_list, status=status)
130
- # self.client.delete(self.retry_lock)
131
- print("retry seeds, sids: {}".format(json.dumps(sids)))
132
-
133
- def set_seeds(self, seeds):
134
- item_info = {}
135
- if any(isinstance(seeds, t) for t in (list, tuple)):
136
- for seed in seeds:
137
- item_info[seed.format_seed] = seed.priority
138
- elif isinstance(seeds, Seed):
139
- item_info[seeds.format_seed] = seeds.priority
140
- self.client.zadd(self.ready_key, mapping=item_info)
141
-
142
- def get_seeds(self, length: int = 1000):
143
- """
144
- redis获取种子
145
- """
146
- cs = time.time()
147
-
148
- if self.get_lock(key=self.update_lock):
149
-
150
- set_dict, del_list, result = {}, [], []
151
-
152
- # version = int(time.time() * 1e3)
153
- version = time.time() * 1e6
154
-
155
- items = self.client.zrangebyscore(self.ready_key, min=0, max="+inf", start=0, num=length, withscores=True)
156
-
157
- # for value, priority in items:
158
- # seed = Seed(value, priority=priority, version=version)
159
- # pty = "{:03d}".format(int(priority))
160
- # key = f"{version}_{pty}_{seed.sid}"
161
- # set_dict[key] = value
162
- # del_list.append(value)
163
- # result.append(seed)
164
-
165
- for value, priority in items:
166
- v = version + int(priority) / 1000 + random.random() / 1000
167
- seed = Seed(value, priority=priority, version=version)
168
- pty = "{:03d}".format(int(priority))
169
- key = f"{version}_{pty}_{seed.sid}"
170
- set_dict[key] = value
171
- del_list.append(value)
172
- result.append(seed)
173
-
174
- print("\nset seeds into queue time: " + str(time.time() - cs))
175
- if result:
176
- self.execute_update(set_dict, del_list)
177
-
178
- self.client.delete(self.update_lock)
179
- print("push seeds into queue time: " + str(time.time() - cs))
180
- return result
181
-
182
- def check_spider_hash(self):
183
- cs = time.time()
184
- set_dict, del_list, heartbeat = {}, [], False
185
- if self.get_lock(key=self.check_lock, t=60, timeout=600, sleep_time=60):
186
- count = self.client.hlen(self.spider_key)
187
- if self.client.exists(self.heartbeat_key):
188
- heartbeat = True
189
- now = int(time.time())
190
- for field, value in self.iterate_hash(key=self.spider_key, count=count):
191
- version, priority, sid = field.split("_")
192
- if heartbeat and int(version) + 600 > now:
193
- continue
194
- set_dict[value] = priority
195
- del_list.append(field)
196
-
197
- if len(del_list) >= 1000:
198
- self.client.expire(self.check_lock, 60)
199
- self.execute_update(set_dict, del_list, status=1)
200
- set_dict, del_list = {}, []
201
-
202
- if set_dict and del_list:
203
- self.execute_update(set_dict, del_list, status=1)
204
-
205
- # self.client.delete(self.check_lock)
206
- print("init seeds time: " + str(time.time() - cs))
207
- if not heartbeat:
208
- self.client.setnx(self.heartbeat_key, "")
209
- self.set_heartbeat(t=15)
210
-
211
- def add_store_sid(self, key, data):
212
- redis_key = self.store_key % key
213
- self.client.sadd(redis_key, *data)
214
-
215
-
216
- current_time = datetime.now()
217
- # 格式化日期时间字符串
218
- formatted_time = current_time.strftime("%m%d%H%M%S%f")
219
- c = int(formatted_time)
220
- print(c)
221
- d = 200 + 0.9 * random.random()
222
- print(d)
223
- print(time.time())
224
- print(c + d / 1000)
225
- # for _ in range(100):
226
- # redis_db.get_seeds(1000)
227
- # redis_db.get_seeds(1000)
228
- # redis_db.check_spider_hash()
229
- # redis_db.retry_seeds(["dc895aee47f8fc39c479f7cac6025879"])
230
- # "1705996980_200_dc895aee47f8fc39c479f7cac6025879"
231
-
@@ -1,37 +0,0 @@
1
- cobweb/__init__.py,sha256=cuMlo5UdlfMRnMf6TfMHL8-FIw83zCiCUtfOrrG6gog,356
2
- cobweb/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- cobweb/base/bbb.py,sha256=iX2Xn5AukRussL8xHKTGFfgODWTMaFA3JnvQWd02XDQ,5615
4
- cobweb/base/config.py,sha256=qZY26tziCeENdopGyVSTCyd_8B30S37GbWx0-_g7EiY,5357
5
- cobweb/base/decorators.py,sha256=gb2puZLRHv_348ZTIAFLhKuJWKZ6a9_tzduCS8p1-UQ,3089
6
- cobweb/base/hash_table.py,sha256=-EPHRMNOHHbdjxZJUuDxghUfwrbMA05sqrldHAgrIco,1885
7
- cobweb/base/interface.py,sha256=LM6C0eh-d1b2CxjtiHKfP8I3XhhlYQR5r-3MD6TMIc4,1037
8
- cobweb/base/log.py,sha256=Gb3_y4IzTo5pJohTggBCU9rK6-ZN3hgTOHkoXHyN6CU,2384
9
- cobweb/base/queue_tmp.py,sha256=NS4qBHKq2o-R78Jpv5xp7TtOHIMg8y0livilTVK49M8,1527
10
- cobweb/base/request.py,sha256=dHTR7qMHbIIW4ggpTAg4io1TBAYH77teYU4bmcWPXH0,2318
11
- cobweb/base/task.py,sha256=ztgNh4_tgy95pe3REBfMLKkwf7HaShvp-fdRIWJiJXo,1230
12
- cobweb/base/utils.py,sha256=NSSgCBE4u1yTpXZrjg8RIepYedo4ZdM38rhDObVfRhI,325
13
- cobweb/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- cobweb/db/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- cobweb/db/base/client_db.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
16
- cobweb/db/base/oss_db.py,sha256=LYRsh26-Fttc6sjc9oOVsCd93jZSja6dtRc8G2Em-1E,3812
17
- cobweb/db/base/redis_db.py,sha256=2-YMrTpiLNGwA7_bt62HYKevVeJr1Y_n88JKjPg1V3s,7636
18
- cobweb/db/base/redis_dbv3.py,sha256=u-Tmexl0nrYVVRCCbxAjcH6fyRx7CP4J7iW4BdO7q98,8354
19
- cobweb/db/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- cobweb/db/scheduler/default.py,sha256=m-zzC2cbHGxplEW5OoB9Vj3nJm30Xl0sAQ94wpPb7Yw,122
21
- cobweb/db/scheduler/textfile.py,sha256=EiOxV8h99ouIr2HvmpM9B90QY3hqGNPMeQEnps_RG-c,869
22
- cobweb/db/storer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- cobweb/db/storer/console.py,sha256=QbHnZ3ou0AR6c35iVy2hYEbKlYYTnFaD8-bUODeNt14,210
24
- cobweb/db/storer/loghub.py,sha256=Pb0OwVteIllYjL2cIyBlc1WWau1PHn9K1rlAGrr0M3k,1815
25
- cobweb/db/storer/redis.py,sha256=jK_RirqgSaV4aIWSuySIm5f1ZfZiULqFj2kman2H-Qw,440
26
- cobweb/db/storer/textfile.py,sha256=yAvtbPkScjZ298H25kWsI0MDg2JuI2Im4m2qmPEUNTM,443
27
- cobweb/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- cobweb/distributed/launcher.py,sha256=IV2jd1hLyt1YyhyN3leSMrtbN5APsmvqblr2NXy18xg,6163
29
- cobweb/distributed/models.py,sha256=7ypYQaiHP91LbPE0u5Lb-9LDazg74UW7KacK4-ai1tM,4438
30
- cobweb/single/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- cobweb/single/models.py,sha256=lu8teNWnWcUwZFra8XmqyhzOAf3UyuEztwBr1Ne6pUs,2898
32
- cobweb/single/nest.py,sha256=mL8q9a5BjtoeUyzXCIVw_vyUsNY8ltbvQpYIIpZEDFU,5012
33
- cobweb_launcher-0.0.4.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
34
- cobweb_launcher-0.0.4.dist-info/METADATA,sha256=4JShM9pxXi1aouOY1OFrEuW7yrTM1K35GUJLKQpr78w,1225
35
- cobweb_launcher-0.0.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
36
- cobweb_launcher-0.0.4.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
37
- cobweb_launcher-0.0.4.dist-info/RECORD,,
File without changes