cobweb-launcher 1.3.15__py3-none-any.whl → 3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobweb/__init__.py +1 -1
- cobweb/base/__init__.py +4 -149
- cobweb/base/common_queue.py +0 -13
- cobweb/base/request.py +2 -14
- cobweb/base/seed.py +16 -12
- cobweb/constant.py +0 -16
- cobweb/crawlers/crawler.py +3 -85
- cobweb/db/redis_db.py +109 -52
- cobweb/launchers/__init__.py +8 -2
- cobweb/launchers/distributor.py +171 -0
- cobweb/launchers/launcher.py +87 -131
- cobweb/launchers/uploader.py +65 -0
- cobweb/pipelines/pipeline.py +3 -36
- cobweb/schedulers/__init__.py +1 -3
- cobweb/schedulers/launcher_air.py +93 -0
- cobweb/schedulers/launcher_api.py +225 -0
- cobweb/schedulers/scheduler.py +85 -0
- cobweb/schedulers/scheduler_with_redis.py +177 -0
- cobweb/setting.py +15 -32
- cobweb/utils/__init__.py +2 -1
- cobweb/utils/decorators.py +43 -0
- cobweb/utils/dotting.py +55 -0
- cobweb/utils/oss.py +28 -9
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.1.dist-info}/METADATA +1 -1
- cobweb_launcher-3.1.1.dist-info/RECORD +41 -0
- cobweb/base/basic.py +0 -297
- cobweb/base/dotting.py +0 -35
- cobweb/launchers/launcher_air.py +0 -88
- cobweb/launchers/launcher_api.py +0 -89
- cobweb/launchers/launcher_pro.py +0 -88
- cobweb/schedulers/scheduler_api.py +0 -72
- cobweb/schedulers/scheduler_redis.py +0 -72
- cobweb_launcher-1.3.15.dist-info/RECORD +0 -40
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.1.dist-info}/LICENSE +0 -0
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.1.dist-info}/WHEEL +0 -0
- {cobweb_launcher-1.3.15.dist-info → cobweb_launcher-3.1.1.dist-info}/top_level.txt +0 -0
cobweb/launchers/launcher_pro.py
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
|
3
|
-
from cobweb.base import TaskQueue, Decorators, Seed, Request
|
4
|
-
from cobweb.schedulers import RedisScheduler
|
5
|
-
from .launcher import Launcher
|
6
|
-
|
7
|
-
|
8
|
-
class LauncherPro(Launcher):
|
9
|
-
|
10
|
-
def __init__(self, task, project, custom_setting=None, **kwargs):
|
11
|
-
super().__init__(task, project, custom_setting, **kwargs)
|
12
|
-
self._redis_download = "{%s:%s}:download" % (project, task)
|
13
|
-
self._redis_todo = "{%s:%s}:todo" % (project, task)
|
14
|
-
self._scheduler = RedisScheduler(task, project)
|
15
|
-
|
16
|
-
@Decorators.stop
|
17
|
-
def _schedule(self):
|
18
|
-
thread_sleep = self.scheduling_wait_time
|
19
|
-
for q, key, size, item_info, Cls in [
|
20
|
-
(TaskQueue.TODO, self._redis_todo, self.todo_queue_size, self._task_info["todo"], Seed),
|
21
|
-
(TaskQueue.DOWNLOAD, self._redis_download, self.download_queue_size, self._task_info["download"], Request),
|
22
|
-
]:
|
23
|
-
if q.length < size:
|
24
|
-
for member, priority in self._scheduler.schedule(key, self.scheduling_size):
|
25
|
-
q.push(Cls(member, priority=priority))
|
26
|
-
self.add_working_item(key.split(":")[-1], member, priority)
|
27
|
-
thread_sleep = 0.1
|
28
|
-
time.sleep(thread_sleep)
|
29
|
-
|
30
|
-
@Decorators.stop
|
31
|
-
def _heartbeat(self):
|
32
|
-
if self._scheduler.working.is_set():
|
33
|
-
self._scheduler.set_heartbeat()
|
34
|
-
time.sleep(3)
|
35
|
-
|
36
|
-
@Decorators.stop
|
37
|
-
def _reset(self):
|
38
|
-
self._scheduler.reset(
|
39
|
-
keys=[self._redis_todo, self._redis_download],
|
40
|
-
reset_time=self.seed_reset_seconds
|
41
|
-
)
|
42
|
-
time.sleep(30)
|
43
|
-
|
44
|
-
@Decorators.pause
|
45
|
-
def _insert(self):
|
46
|
-
thread_sleep = 0.1
|
47
|
-
for q, key, size in [
|
48
|
-
(TaskQueue.SEED, self._redis_todo, self.seed_queue_size),
|
49
|
-
(TaskQueue.REQUEST, self._redis_download, self.request_queue_size),
|
50
|
-
]:
|
51
|
-
item_info = {}
|
52
|
-
while (item := q.pop()) and len(item_info.keys()) < self.inserting_size:
|
53
|
-
item_info[item.seed] = item.params.priority
|
54
|
-
if q.length >= size:
|
55
|
-
thread_sleep = self.inserting_wait_time
|
56
|
-
self._scheduler.insert(key, item_info)
|
57
|
-
time.sleep(thread_sleep)
|
58
|
-
|
59
|
-
@Decorators.pause
|
60
|
-
def _refresh(self):
|
61
|
-
self._scheduler.refresh(self._redis_todo, self._task_info["todo"])
|
62
|
-
self._scheduler.refresh(self._redis_download, self._task_info["download"])
|
63
|
-
time.sleep(10)
|
64
|
-
|
65
|
-
@Decorators.pause
|
66
|
-
def _remove(self):
|
67
|
-
thread_sleep = self.removing_wait_time
|
68
|
-
for q, key, size in [
|
69
|
-
(TaskQueue.DELETE, self._redis_todo, self.delete_queue_size),
|
70
|
-
(TaskQueue.DONE, self._redis_download, self.done_queue_size),
|
71
|
-
]:
|
72
|
-
items = []
|
73
|
-
while (item := q.pop()) and len(items) < self.removing_size:
|
74
|
-
items.append(item)
|
75
|
-
self._scheduler.delete(key, items)
|
76
|
-
self.remove_working_items(key.split(":")[-1], items)
|
77
|
-
if q.length >= size:
|
78
|
-
thread_sleep = 0.1
|
79
|
-
time.sleep(thread_sleep)
|
80
|
-
|
81
|
-
def _init_schedule_thread(self):
|
82
|
-
self._add_thread(func=self._heartbeat)
|
83
|
-
self._add_thread(func=self._reset)
|
84
|
-
self._add_thread(func=self._refresh)
|
85
|
-
self._add_thread(func=self._schedule)
|
86
|
-
self._add_thread(func=self._insert)
|
87
|
-
self._add_thread(func=self._remove)
|
88
|
-
# self._add_thread(func=self._polling)
|
@@ -1,72 +0,0 @@
|
|
1
|
-
import threading
|
2
|
-
import time
|
3
|
-
|
4
|
-
# from cobweb.base import Seed
|
5
|
-
from cobweb.db import ApiDB
|
6
|
-
|
7
|
-
|
8
|
-
class ApiScheduler:
|
9
|
-
|
10
|
-
def __init__(self, task, project, scheduler_wait_seconds=30):
|
11
|
-
self._todo_key = "{%s:%s}:todo" % (project, task)
|
12
|
-
self._download_key = "{%s:%s}:download" % (project, task)
|
13
|
-
self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
|
14
|
-
self._speed_control_key = "speed_control:%s_%s" % (project, task)
|
15
|
-
self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
|
16
|
-
self._db = ApiDB()
|
17
|
-
|
18
|
-
self.scheduler_wait_seconds = scheduler_wait_seconds
|
19
|
-
self.working = threading.Event()
|
20
|
-
|
21
|
-
@property
|
22
|
-
def heartbeat(self):
|
23
|
-
return self._db.exists(self._heartbeat_key)
|
24
|
-
|
25
|
-
def set_heartbeat(self):
|
26
|
-
return self._db.setex(self._heartbeat_key, 5)
|
27
|
-
|
28
|
-
def schedule(self, key, count):
|
29
|
-
if not self._db.zcount(key, 0, "(1000"):
|
30
|
-
time.sleep(self.scheduler_wait_seconds)
|
31
|
-
else:
|
32
|
-
source = int(time.time())
|
33
|
-
members = self._db.members(key, source, count=count, _min=0, _max="(1000")
|
34
|
-
for member, priority in members:
|
35
|
-
# seed = Seed(member, priority=priority)
|
36
|
-
yield member, priority
|
37
|
-
|
38
|
-
def insert(self, key, items):
|
39
|
-
if items:
|
40
|
-
self._db.zadd(key, items, nx=True)
|
41
|
-
|
42
|
-
def reset(self, keys, reset_time=30):
|
43
|
-
if self._db.lock(self._reset_lock_key, t=120):
|
44
|
-
|
45
|
-
if isinstance(keys, str):
|
46
|
-
keys = [keys]
|
47
|
-
|
48
|
-
_min = reset_time - int(time.time()) if self.heartbeat else "-inf"
|
49
|
-
|
50
|
-
for key in keys:
|
51
|
-
if self._db.exists(key):
|
52
|
-
self._db.members(key, 0, _min=_min, _max="(0")
|
53
|
-
|
54
|
-
if not self.heartbeat:
|
55
|
-
self.working.set()
|
56
|
-
time.sleep(10)
|
57
|
-
|
58
|
-
self._db.delete(self._reset_lock_key)
|
59
|
-
|
60
|
-
def refresh(self, key, items: dict[str, int]):
|
61
|
-
refresh_time = int(time.time())
|
62
|
-
its = {k: -refresh_time - v / 1000 for k, v in items.items()}
|
63
|
-
if its:
|
64
|
-
self._db.zadd(key, item=its, xx=True)
|
65
|
-
|
66
|
-
def delete(self, key, values):
|
67
|
-
if values:
|
68
|
-
self._db.zrem(key, *values)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
@@ -1,72 +0,0 @@
|
|
1
|
-
import threading
|
2
|
-
import time
|
3
|
-
|
4
|
-
# from cobweb.base import Seed
|
5
|
-
from cobweb.db import RedisDB
|
6
|
-
|
7
|
-
|
8
|
-
class RedisScheduler:
|
9
|
-
|
10
|
-
def __init__(self, task, project, scheduler_wait_seconds=30):
|
11
|
-
self._todo_key = "{%s:%s}:todo" % (project, task)
|
12
|
-
self._download_key = "{%s:%s}:download" % (project, task)
|
13
|
-
self._heartbeat_key = "heartbeat:%s_%s" % (project, task)
|
14
|
-
self._speed_control_key = "speed_control:%s_%s" % (project, task)
|
15
|
-
self._reset_lock_key = "lock:reset:%s_%s" % (project, task)
|
16
|
-
self._db = RedisDB()
|
17
|
-
|
18
|
-
self.scheduler_wait_seconds = scheduler_wait_seconds
|
19
|
-
self.working = threading.Event()
|
20
|
-
|
21
|
-
@property
|
22
|
-
def heartbeat(self):
|
23
|
-
return self._db.exists(self._heartbeat_key)
|
24
|
-
|
25
|
-
def set_heartbeat(self):
|
26
|
-
return self._db.setex(self._heartbeat_key, 5)
|
27
|
-
|
28
|
-
def schedule(self, key, count):
|
29
|
-
if not self._db.zcount(key, 0, "(1000"):
|
30
|
-
time.sleep(self.scheduler_wait_seconds)
|
31
|
-
else:
|
32
|
-
source = int(time.time())
|
33
|
-
members = self._db.members(key, source, count=count, _min=0, _max="(1000")
|
34
|
-
for member, priority in members:
|
35
|
-
# seed = Seed(member, priority=priority)
|
36
|
-
yield member, priority
|
37
|
-
|
38
|
-
def insert(self, key, items):
|
39
|
-
if items:
|
40
|
-
self._db.zadd(key, items, nx=True)
|
41
|
-
|
42
|
-
def reset(self, keys, reset_time=30):
|
43
|
-
if self._db.lock(self._reset_lock_key, t=120):
|
44
|
-
|
45
|
-
if isinstance(keys, str):
|
46
|
-
keys = [keys]
|
47
|
-
|
48
|
-
_min = reset_time - int(time.time()) if self.heartbeat else "-inf"
|
49
|
-
|
50
|
-
for key in keys:
|
51
|
-
if self._db.exists(key):
|
52
|
-
self._db.members(key, 0, _min=_min, _max="(0")
|
53
|
-
|
54
|
-
if not self.heartbeat:
|
55
|
-
self.working.set()
|
56
|
-
time.sleep(10)
|
57
|
-
|
58
|
-
self._db.delete(self._reset_lock_key)
|
59
|
-
|
60
|
-
def refresh(self, key, items: dict[str, int]):
|
61
|
-
refresh_time = int(time.time())
|
62
|
-
its = {k: -refresh_time - v / 1000 for k, v in items.items()}
|
63
|
-
if its:
|
64
|
-
self._db.zadd(key, item=its, xx=True)
|
65
|
-
|
66
|
-
def delete(self, key, values):
|
67
|
-
if values:
|
68
|
-
self._db.zrem(key, *values)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
@@ -1,40 +0,0 @@
|
|
1
|
-
cobweb/__init__.py,sha256=oaEfsGUuGP0s39UbFRwrnsjMUeuB6QvQIAwStKFyUTk,83
|
2
|
-
cobweb/constant.py,sha256=eofONAntk9O6S-cb4KbYGYHL_u7nBlOqqFOw_HzJHAU,3588
|
3
|
-
cobweb/setting.py,sha256=pY6LKsgWI3164GiGA1z_y26LVf5-3mpiEgmm86mKRdY,3135
|
4
|
-
cobweb/base/__init__.py,sha256=CgNg7BK8uPICSWrLI9Bi6vNQaquBNY2H31TrDo9-fTI,5245
|
5
|
-
cobweb/base/basic.py,sha256=Z56SSLB3I2IGHWCCcSy0Qbfzj8Qbg_po3gP32q1jh4k,7741
|
6
|
-
cobweb/base/common_queue.py,sha256=Gor7sR3h1hlZWaI0XcNAbf0S15Ftjr3DFRWNTGL13uU,1137
|
7
|
-
cobweb/base/dotting.py,sha256=lfFXXqnVP__hxlW3qH5Bnuq69KtnFaQLbcz1M8e2Ajg,1239
|
8
|
-
cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
|
9
|
-
cobweb/base/log.py,sha256=L01hXdk3L2qEm9X1FOXQ9VmWIoHSELe0cyZvrdAN61A,2003
|
10
|
-
cobweb/base/request.py,sha256=acGm3OzxsPed5VUTk7D9eeHZPMh7KUNQRUv44G5znZg,2659
|
11
|
-
cobweb/base/response.py,sha256=eB1DWMXFCpn3cJ3yzgCRU1WeZAdayGDohRgdjdMUFN4,406
|
12
|
-
cobweb/base/seed.py,sha256=PN5J4gKPEXylwyQeSGOBfauxHktxFr7RJe8nVX1hBw4,2987
|
13
|
-
cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
|
14
|
-
cobweb/crawlers/crawler.py,sha256=A7_qVGXHiULk7PsYIxmYvVCyILdWy4Er8_qmbVDzFzE,3950
|
15
|
-
cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
|
16
|
-
cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
|
17
|
-
cobweb/db/redis_db.py,sha256=FvMzckJtmhwKhZqKoS23iXmJti5P2dnMVD5rJ__5LUw,5139
|
18
|
-
cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
|
19
|
-
cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
|
20
|
-
cobweb/launchers/__init__.py,sha256=m_XNG2bWuMbirPt3d0_s-Ezl1xycfUxeqZnwq_kkfuo,116
|
21
|
-
cobweb/launchers/launcher.py,sha256=KB7aL38T3uMh1s78HyHLcS0DS8ovx7VuW6JHn5ooec8,7807
|
22
|
-
cobweb/launchers/launcher_air.py,sha256=yPr395HVIIHAq6lqRcYJu7c0KkfO9V8O-2sn0hC96p0,2990
|
23
|
-
cobweb/launchers/launcher_api.py,sha256=52JZwNGVNeOiI6xbY3v1Vzb2OzoRjsFJQDgbbYvPbJI,3429
|
24
|
-
cobweb/launchers/launcher_pro.py,sha256=Kl64CQLcUwW9FeaKFg0GIK51fLZaWkoQlQ346zEvJEE,3398
|
25
|
-
cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
|
26
|
-
cobweb/pipelines/pipeline.py,sha256=FHY7ZHNZgx-AdbHt1MKHm-w0aigxvyXFV10T0NMdrOE,1381
|
27
|
-
cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
|
28
|
-
cobweb/pipelines/pipeline_loghub.py,sha256=xZ6D55BGdiM71WUv83jyLGbEyUwhBHLJRZoXthBxxTs,1019
|
29
|
-
cobweb/schedulers/__init__.py,sha256=y7Lv_7b0zfTl0OhIONb_8u1K1C9gVlBA-xz_XG_kI9g,85
|
30
|
-
cobweb/schedulers/scheduler_api.py,sha256=pFEdS1H4zuzxwMhCV-G7CoLz-rEOPv4EVo3xZUXTyDo,2199
|
31
|
-
cobweb/schedulers/scheduler_redis.py,sha256=E5fjc3nNld8GbUhUGT7uY4smRejj2J2ZIzp2g6lhxFM,2205
|
32
|
-
cobweb/utils/__init__.py,sha256=YvD4mIDBd9jmGA6WJBcwkgDU2jRFNBCEbarZCSUBAHE,114
|
33
|
-
cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
|
34
|
-
cobweb/utils/oss.py,sha256=6Qlhdde7CcwD69bBe2rGWHY3-aptG9NXB_DZLhjgDRQ,3553
|
35
|
-
cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
|
36
|
-
cobweb_launcher-1.3.15.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
|
37
|
-
cobweb_launcher-1.3.15.dist-info/METADATA,sha256=UYoDn5Rc_AO9u7-mX-OsNBLu9rS7SjflmJegslVefQE,6510
|
38
|
-
cobweb_launcher-1.3.15.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
39
|
-
cobweb_launcher-1.3.15.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
|
40
|
-
cobweb_launcher-1.3.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|